Merge pull request #15 from w-okada/dev

Dev
2022-08-31 15:22:10 +09:00 · 2022-08-31 15:22:10 +09:00 · 92765157ce
commit 92765157ce
parent c3832a4006 634d1cbe79
12 changed files with 782 additions and 10 deletions
--- a/VoiceChangerDemo.ipynb
+++ b/VoiceChangerDemo.ipynb
@ -0,0 +1,580 @@
 {
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "VoiceChangerDemo",
      "provenance": [],
      "collapsed_sections": [],
      "authorship_tag": "ABX9TyN+8irLJYUFlwMPzvHMSJof",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    },
    "accelerator": "GPU",
    "gpuClass": "standard"
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/w-okada/voice-changer/blob/dev/VoiceChangerDemo.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [],
      "metadata": {
        "id": "57p7pA1Qb5wa"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "!nvidia-smi"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "vV1t7PBRm-o6",
        "outputId": "60fc80b2-a39e-4840-88c1-0d8d483a36ca"
      },
      "execution_count": 1,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Wed Aug 31 06:14:56 2022       \n",
            "+-----------------------------------------------------------------------------+\n",
            "| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |\n",
            "|-------------------------------+----------------------+----------------------+\n",
            "| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |\n",
            "| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |\n",
            "|                               |                      |               MIG M. |\n",
            "|===============================+======================+======================|\n",
            "|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |\n",
            "| N/A   72C    P8    12W /  70W |      0MiB / 15109MiB |      0%      Default |\n",
            "|                               |                      |                  N/A |\n",
            "+-------------------------------+----------------------+----------------------+\n",
            "                                                                               \n",
            "+-----------------------------------------------------------------------------+\n",
            "| Processes:                                                                  |\n",
            "|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |\n",
            "|        ID   ID                                                   Usage      |\n",
            "|=============================================================================|\n",
            "|  No running processes found                                                 |\n",
            "+-----------------------------------------------------------------------------+\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "CONFIG=\"/content/drive/MyDrive/VoiceChanger/config.json\"\n",
        "MODEL=\"/content/drive/MyDrive/VoiceChanger/G_326000.pth\""
      ],
      "metadata": {
        "id": "nSXATMWYb4Ik"
      },
      "execution_count": 2,
      "outputs": []
    },
    {
      "cell_type": "code",
      "execution_count": 3,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "2wxD-gRSMU5R",
        "outputId": "83bb80fa-9ced-43e2-a304-d53a3501b142"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Mounted at /content/drive\n"
          ]
        }
      ],
      "source": [
        "from google.colab import drive\n",
        "drive.mount('/content/drive')"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "!git clone https://github.com/w-okada/voice-changer.git\n",
        "%cd voice-changer/demo/\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "86wTFmqsNMnD",
        "outputId": "3fc68f14-b6b7-48bb-e285-5bed78e74f26"
      },
      "execution_count": 4,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Cloning into 'voice-changer'...\n",
            "remote: Enumerating objects: 266, done.\u001b[K\n",
            "remote: Counting objects: 100% (266/266), done.\u001b[K\n",
            "remote: Compressing objects: 100% (189/189), done.\u001b[K\n",
            "remote: Total 266 (delta 123), reused 194 (delta 65), pack-reused 0\u001b[K\n",
            "Receiving objects: 100% (266/266), 19.11 MiB | 35.44 MiB/s, done.\n",
            "Resolving deltas: 100% (123/123), done.\n",
            "/content/voice-changer/demo\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "!git checkout dev\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "CBsogR-zWH4r",
        "outputId": "f4c9737b-831d-4938-d387-caf07693030e"
      },
      "execution_count": 5,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Branch 'dev' set up to track remote branch 'dev' from 'origin'.\n",
            "Switched to a new branch 'dev'\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "!mkdir -p ../frontend/dist\n",
        "!cp -r ../docs/* ../frontend/dist/\n",
        "!ls ../frontend/dist\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "uCEKf3_JNoyq",
        "outputId": "746e1946-5c3a-49af-df26-d86149f8adb1"
      },
      "execution_count": 6,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "assets\t  coffee.png   index.html  index.js.LICENSE.txt\n",
            "audiolet  favicon.ico  index.js\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "!cp ../template/setting_colab.json ../frontend/dist/assets/setting.json"
      ],
      "metadata": {
        "id": "Bn4kV8TgXp8i"
      },
      "execution_count": 11,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "!cat ../frontend/dist/assets/setting.json"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "pjxPsOOaXXTj",
        "outputId": "1bf85102-87ed-462c-e732-cffb878d95f3"
      },
      "execution_count": 12,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "{\n",
            "    \"app_title\": \"voice-changer\",\n",
            "    \"majar_mode\": \"colab\",\n",
            "    \"voice_changer_server_url\": \"http://localhost:8080/test\",\n",
            "    \"sample_rate\": 48000,\n",
            "    \"buffer_size\": 1024,\n",
            "    \"prefix_chunk_size\": 24,\n",
            "    \"chunk_size\": 24,\n",
            "    \"speaker_ids\": [100, 107, 101, 102, 103],\n",
            "    \"speaker_names\": [\"ずんだもん\", \"user\", \"そら\", \"めたん\", \"つぐみ\"],\n",
            "    \"src_id\": 107,\n",
            "    \"dst_id\": 100,\n",
            "    \"vf_enable\": true,\n",
            "    \"voice_changer_mode\": \"realtime\",\n",
            "    \"gpu\": 0,\n",
            "    \"available_gpus\": [-1, 0, 1, 2, 3, 4],\n",
            "    \"avatar\": {\n",
            "        \"motion_capture_face\": true,\n",
            "        \"motion_capture_upperbody\": true,\n",
            "        \"lip_overwrite_with_voice\": true,\n",
            "        \"avatar_url\": \"./assets/vrm/zundamon/zundamon.vrm\",\n",
            "        \"backgournd_image_url\": \"./assets/images/bg_natural_sougen.jpg\",\n",
            "        \"background_color\": \"#0000dd\",\n",
            "        \"chroma_key\": \"#0000dd\",\n",
            "        \"avatar_canvas_size\": [1280, 720],\n",
            "        \"screen_canvas_size\": [1280, 720]\n",
            "    },\n",
            "    \"advance\": {\n",
            "        \"avatar_draw_skip_rate\": 3,\n",
            "        \"screen_draw_skip_rate\": 3,\n",
            "        \"visualizer_draw_skip_rate\": 3,\n",
            "        \"cross_fade_lower_value\": 0.1,\n",
            "        \"cross_fade_overlap_rate\": 0.03\n",
            "    }\n",
            "}\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "# 手作業\n",
        "\n",
        "・configとモデルをdemoフォルダにコピー\n",
        "\n",
        "・docsをfrontendに変更\n",
        "\n",
        "・setting.jsonをfrontendにコピー\n"
      ],
      "metadata": {
        "id": "8Na2PbLZSWgZ"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "!apt-get install -y espeak libsndfile1-dev\n",
        "!pip install flask\n",
        "!pip install python-socketio\n",
        "!pip install eventlet\n",
        "!pip install unidecode\n",
        "!pip install phonemizer\n",
        "!pip install retry\n",
        "!pip install flask\n",
        "!pip install flask_cors\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "LwZAAuqxX7yY",
        "outputId": "c67b2741-7a1e-448d-abf9-7b8d8f5e3d15"
      },
      "execution_count": 13,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Reading package lists... Done\n",
            "Building dependency tree       \n",
            "Reading state information... Done\n",
            "libsndfile1-dev is already the newest version (1.0.28-4ubuntu0.18.04.2).\n",
            "The following package was automatically installed and is no longer required:\n",
            "  libnvidia-common-460\n",
            "Use 'apt autoremove' to remove it.\n",
            "The following additional packages will be installed:\n",
            "  espeak-data libespeak1 libportaudio2 libsonic0\n",
            "The following NEW packages will be installed:\n",
            "  espeak espeak-data libespeak1 libportaudio2 libsonic0\n",
            "0 upgraded, 5 newly installed, 0 to remove and 20 not upgraded.\n",
            "Need to get 1,219 kB of archives.\n",
            "After this operation, 3,031 kB of additional disk space will be used.\n",
            "Get:1 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libportaudio2 amd64 19.6.0-1 [64.6 kB]\n",
            "Get:2 http://archive.ubuntu.com/ubuntu bionic/main amd64 libsonic0 amd64 0.2.0-6 [13.4 kB]\n",
            "Get:3 http://archive.ubuntu.com/ubuntu bionic/universe amd64 espeak-data amd64 1.48.04+dfsg-5 [934 kB]\n",
            "Get:4 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libespeak1 amd64 1.48.04+dfsg-5 [145 kB]\n",
            "Get:5 http://archive.ubuntu.com/ubuntu bionic/universe amd64 espeak amd64 1.48.04+dfsg-5 [61.6 kB]\n",
            "Fetched 1,219 kB in 1s (1,636 kB/s)\n",
            "Selecting previously unselected package libportaudio2:amd64.\n",
            "(Reading database ... 155676 files and directories currently installed.)\n",
            "Preparing to unpack .../libportaudio2_19.6.0-1_amd64.deb ...\n",
            "Unpacking libportaudio2:amd64 (19.6.0-1) ...\n",
            "Selecting previously unselected package libsonic0:amd64.\n",
            "Preparing to unpack .../libsonic0_0.2.0-6_amd64.deb ...\n",
            "Unpacking libsonic0:amd64 (0.2.0-6) ...\n",
            "Selecting previously unselected package espeak-data:amd64.\n",
            "Preparing to unpack .../espeak-data_1.48.04+dfsg-5_amd64.deb ...\n",
            "Unpacking espeak-data:amd64 (1.48.04+dfsg-5) ...\n",
            "Selecting previously unselected package libespeak1:amd64.\n",
            "Preparing to unpack .../libespeak1_1.48.04+dfsg-5_amd64.deb ...\n",
            "Unpacking libespeak1:amd64 (1.48.04+dfsg-5) ...\n",
            "Selecting previously unselected package espeak.\n",
            "Preparing to unpack .../espeak_1.48.04+dfsg-5_amd64.deb ...\n",
            "Unpacking espeak (1.48.04+dfsg-5) ...\n",
            "Setting up libportaudio2:amd64 (19.6.0-1) ...\n",
            "Setting up espeak-data:amd64 (1.48.04+dfsg-5) ...\n",
            "Setting up libsonic0:amd64 (0.2.0-6) ...\n",
            "Setting up libespeak1:amd64 (1.48.04+dfsg-5) ...\n",
            "Setting up espeak (1.48.04+dfsg-5) ...\n",
            "Processing triggers for man-db (2.8.3-2ubuntu0.1) ...\n",
            "Processing triggers for libc-bin (2.27-3ubuntu1.5) ...\n",
            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
            "Requirement already satisfied: flask in /usr/local/lib/python3.7/dist-packages (1.1.4)\n",
            "Requirement already satisfied: click<8.0,>=5.1 in /usr/local/lib/python3.7/dist-packages (from flask) (7.1.2)\n",
            "Requirement already satisfied: Jinja2<3.0,>=2.10.1 in /usr/local/lib/python3.7/dist-packages (from flask) (2.11.3)\n",
            "Requirement already satisfied: itsdangerous<2.0,>=0.24 in /usr/local/lib/python3.7/dist-packages (from flask) (1.1.0)\n",
            "Requirement already satisfied: Werkzeug<2.0,>=0.15 in /usr/local/lib/python3.7/dist-packages (from flask) (1.0.1)\n",
            "Requirement already satisfied: MarkupSafe>=0.23 in /usr/local/lib/python3.7/dist-packages (from Jinja2<3.0,>=2.10.1->flask) (2.0.1)\n",
            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
            "Collecting python-socketio\n",
            "  Downloading python_socketio-5.7.1-py3-none-any.whl (56 kB)\n",
            "\u001b[K     |████████████████████████████████| 56 kB 5.0 MB/s \n",
            "\u001b[?25hCollecting bidict>=0.21.0\n",
            "  Downloading bidict-0.22.0-py3-none-any.whl (36 kB)\n",
            "Collecting python-engineio>=4.3.0\n",
            "  Downloading python_engineio-4.3.4-py3-none-any.whl (52 kB)\n",
            "\u001b[K     |████████████████████████████████| 52 kB 2.0 MB/s \n",
            "\u001b[?25hInstalling collected packages: python-engineio, bidict, python-socketio\n",
            "Successfully installed bidict-0.22.0 python-engineio-4.3.4 python-socketio-5.7.1\n",
            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
            "Collecting eventlet\n",
            "  Downloading eventlet-0.33.1-py2.py3-none-any.whl (226 kB)\n",
            "\u001b[K     |████████████████████████████████| 226 kB 33.3 MB/s \n",
            "\u001b[?25hCollecting dnspython>=1.15.0\n",
            "  Downloading dnspython-2.2.1-py3-none-any.whl (269 kB)\n",
            "\u001b[K     |████████████████████████████████| 269 kB 52.5 MB/s \n",
            "\u001b[?25hRequirement already satisfied: six>=1.10.0 in /usr/local/lib/python3.7/dist-packages (from eventlet) (1.15.0)\n",
            "Requirement already satisfied: greenlet>=0.3 in /usr/local/lib/python3.7/dist-packages (from eventlet) (1.1.3)\n",
            "Installing collected packages: dnspython, eventlet\n",
            "Successfully installed dnspython-2.2.1 eventlet-0.33.1\n",
            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
            "Collecting unidecode\n",
            "  Downloading Unidecode-1.3.4-py3-none-any.whl (235 kB)\n",
            "\u001b[K     |████████████████████████████████| 235 kB 28.6 MB/s \n",
            "\u001b[?25hInstalling collected packages: unidecode\n",
            "Successfully installed unidecode-1.3.4\n",
            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
            "Collecting phonemizer\n",
            "  Downloading phonemizer-3.2.1-py3-none-any.whl (90 kB)\n",
            "\u001b[K     |████████████████████████████████| 90 kB 9.5 MB/s \n",
            "\u001b[?25hCollecting segments\n",
            "  Downloading segments-2.2.1-py2.py3-none-any.whl (15 kB)\n",
            "Requirement already satisfied: joblib in /usr/local/lib/python3.7/dist-packages (from phonemizer) (1.1.0)\n",
            "Collecting dlinfo\n",
            "  Downloading dlinfo-1.2.1-py3-none-any.whl (3.6 kB)\n",
            "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.7/dist-packages (from phonemizer) (4.1.1)\n",
            "Requirement already satisfied: attrs>=18.1 in /usr/local/lib/python3.7/dist-packages (from phonemizer) (22.1.0)\n",
            "Collecting csvw>=1.5.6\n",
            "  Downloading csvw-3.1.1-py2.py3-none-any.whl (56 kB)\n",
            "\u001b[K     |████████████████████████████████| 56 kB 5.7 MB/s \n",
            "\u001b[?25hRequirement already satisfied: regex in /usr/local/lib/python3.7/dist-packages (from segments->phonemizer) (2022.6.2)\n",
            "Collecting clldutils>=1.7.3\n",
            "  Downloading clldutils-3.12.0-py2.py3-none-any.whl (197 kB)\n",
            "\u001b[K     |████████████████████████████████| 197 kB 63.8 MB/s \n",
            "\u001b[?25hRequirement already satisfied: python-dateutil in /usr/local/lib/python3.7/dist-packages (from clldutils>=1.7.3->segments->phonemizer) (2.8.2)\n",
            "Requirement already satisfied: tabulate>=0.7.7 in /usr/local/lib/python3.7/dist-packages (from clldutils>=1.7.3->segments->phonemizer) (0.8.10)\n",
            "Collecting colorlog\n",
            "  Downloading colorlog-6.7.0-py2.py3-none-any.whl (11 kB)\n",
            "Collecting colorama\n",
            "  Downloading colorama-0.4.5-py2.py3-none-any.whl (16 kB)\n",
            "Requirement already satisfied: jsonschema in /usr/local/lib/python3.7/dist-packages (from csvw>=1.5.6->segments->phonemizer) (4.3.3)\n",
            "Collecting rdflib\n",
            "  Downloading rdflib-6.2.0-py3-none-any.whl (500 kB)\n",
            "\u001b[K     |████████████████████████████████| 500 kB 53.6 MB/s \n",
            "\u001b[?25hRequirement already satisfied: babel in /usr/local/lib/python3.7/dist-packages (from csvw>=1.5.6->segments->phonemizer) (2.10.3)\n",
            "Collecting language-tags\n",
            "  Downloading language_tags-1.1.0-py2.py3-none-any.whl (210 kB)\n",
            "\u001b[K     |████████████████████████████████| 210 kB 65.4 MB/s \n",
            "\u001b[?25hCollecting rfc3986<2\n",
            "  Downloading rfc3986-1.5.0-py2.py3-none-any.whl (31 kB)\n",
            "Requirement already satisfied: uritemplate>=3.0.0 in /usr/local/lib/python3.7/dist-packages (from csvw>=1.5.6->segments->phonemizer) (3.0.1)\n",
            "Collecting isodate\n",
            "  Downloading isodate-0.6.1-py2.py3-none-any.whl (41 kB)\n",
            "\u001b[K     |████████████████████████████████| 41 kB 763 kB/s \n",
            "\u001b[?25hRequirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from csvw>=1.5.6->segments->phonemizer) (2.23.0)\n",
            "Requirement already satisfied: pytz>=2015.7 in /usr/local/lib/python3.7/dist-packages (from babel->csvw>=1.5.6->segments->phonemizer) (2022.2.1)\n",
            "Requirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from isodate->csvw>=1.5.6->segments->phonemizer) (1.15.0)\n",
            "Requirement already satisfied: pyrsistent!=0.17.0,!=0.17.1,!=0.17.2,>=0.14.0 in /usr/local/lib/python3.7/dist-packages (from jsonschema->csvw>=1.5.6->segments->phonemizer) (0.18.1)\n",
            "Requirement already satisfied: importlib-resources>=1.4.0 in /usr/local/lib/python3.7/dist-packages (from jsonschema->csvw>=1.5.6->segments->phonemizer) (5.9.0)\n",
            "Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from jsonschema->csvw>=1.5.6->segments->phonemizer) (4.12.0)\n",
            "Requirement already satisfied: zipp>=3.1.0 in /usr/local/lib/python3.7/dist-packages (from importlib-resources>=1.4.0->jsonschema->csvw>=1.5.6->segments->phonemizer) (3.8.1)\n",
            "Requirement already satisfied: setuptools in /usr/local/lib/python3.7/dist-packages (from rdflib->csvw>=1.5.6->segments->phonemizer) (57.4.0)\n",
            "Requirement already satisfied: pyparsing in /usr/local/lib/python3.7/dist-packages (from rdflib->csvw>=1.5.6->segments->phonemizer) (3.0.9)\n",
            "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->csvw>=1.5.6->segments->phonemizer) (3.0.4)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->csvw>=1.5.6->segments->phonemizer) (2022.6.15)\n",
            "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->csvw>=1.5.6->segments->phonemizer) (2.10)\n",
            "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->csvw>=1.5.6->segments->phonemizer) (1.24.3)\n",
            "Installing collected packages: isodate, rfc3986, rdflib, language-tags, colorama, csvw, colorlog, clldutils, segments, dlinfo, phonemizer\n",
            "Successfully installed clldutils-3.12.0 colorama-0.4.5 colorlog-6.7.0 csvw-3.1.1 dlinfo-1.2.1 isodate-0.6.1 language-tags-1.1.0 phonemizer-3.2.1 rdflib-6.2.0 rfc3986-1.5.0 segments-2.2.1\n",
            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
            "Collecting retry\n",
            "  Downloading retry-0.9.2-py2.py3-none-any.whl (8.0 kB)\n",
            "Requirement already satisfied: decorator>=3.4.2 in /usr/local/lib/python3.7/dist-packages (from retry) (4.4.2)\n",
            "Requirement already satisfied: py<2.0.0,>=1.4.26 in /usr/local/lib/python3.7/dist-packages (from retry) (1.11.0)\n",
            "Installing collected packages: retry\n",
            "Successfully installed retry-0.9.2\n",
            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
            "Requirement already satisfied: flask in /usr/local/lib/python3.7/dist-packages (1.1.4)\n",
            "Requirement already satisfied: itsdangerous<2.0,>=0.24 in /usr/local/lib/python3.7/dist-packages (from flask) (1.1.0)\n",
            "Requirement already satisfied: Jinja2<3.0,>=2.10.1 in /usr/local/lib/python3.7/dist-packages (from flask) (2.11.3)\n",
            "Requirement already satisfied: Werkzeug<2.0,>=0.15 in /usr/local/lib/python3.7/dist-packages (from flask) (1.0.1)\n",
            "Requirement already satisfied: click<8.0,>=5.1 in /usr/local/lib/python3.7/dist-packages (from flask) (7.1.2)\n",
            "Requirement already satisfied: MarkupSafe>=0.23 in /usr/local/lib/python3.7/dist-packages (from Jinja2<3.0,>=2.10.1->flask) (2.0.1)\n",
            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
            "Collecting flask_cors\n",
            "  Downloading Flask_Cors-3.0.10-py2.py3-none-any.whl (14 kB)\n",
            "Requirement already satisfied: Flask>=0.9 in /usr/local/lib/python3.7/dist-packages (from flask_cors) (1.1.4)\n",
            "Requirement already satisfied: Six in /usr/local/lib/python3.7/dist-packages (from flask_cors) (1.15.0)\n",
            "Requirement already satisfied: itsdangerous<2.0,>=0.24 in /usr/local/lib/python3.7/dist-packages (from Flask>=0.9->flask_cors) (1.1.0)\n",
            "Requirement already satisfied: Jinja2<3.0,>=2.10.1 in /usr/local/lib/python3.7/dist-packages (from Flask>=0.9->flask_cors) (2.11.3)\n",
            "Requirement already satisfied: Werkzeug<2.0,>=0.15 in /usr/local/lib/python3.7/dist-packages (from Flask>=0.9->flask_cors) (1.0.1)\n",
            "Requirement already satisfied: click<8.0,>=5.1 in /usr/local/lib/python3.7/dist-packages (from Flask>=0.9->flask_cors) (7.1.2)\n",
            "Requirement already satisfied: MarkupSafe>=0.23 in /usr/local/lib/python3.7/dist-packages (from Jinja2<3.0,>=2.10.1->Flask>=0.9->flask_cors) (2.0.1)\n",
            "Installing collected packages: flask-cors\n",
            "Successfully installed flask-cors-3.0.10\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "get_ipython().system_raw(f'python3 serverFlask.py 8082 {CONFIG} {MODEL} >foo 2>&1 &')"
      ],
      "metadata": {
        "id": "iNOAB7zISI6J"
      },
      "execution_count": 14,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "!cat foo"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "chu06KpAjEK6",
        "outputId": "887c2d50-c49f-4a22-f0d0-8a3667511466"
      },
      "execution_count": 18,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[2022-08-31 06:17:58,669] INFO in serverFlask: INITIALIZE MODEL\n",
            "[2022-08-31 06:18:08,764] INFO in utils: Loaded checkpoint '/content/drive/MyDrive/VoiceChanger/G_326000.pth' (iteration 1136)\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "from google.colab import output\n",
        "\n",
        "output.serve_kernel_port_as_window(8082)"
      ],
      "metadata": {
        "id": "nkRjZm95l87C",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 34
        },
        "outputId": "abf57f92-5cb6-4325-b64a-095d42f561d5"
      },
      "execution_count": 27,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "<IPython.core.display.Javascript object>"
            ],
            "application/javascript": [
              "(async (port, path, text, element) => {\n",
              "    if (!google.colab.kernel.accessAllowed) {\n",
              "      return;\n",
              "    }\n",
              "    element.appendChild(document.createTextNode(''));\n",
              "    const url = await google.colab.kernel.proxyPort(port);\n",
              "    const anchor = document.createElement('a');\n",
              "    anchor.href = new URL(path, url).toString();\n",
              "    anchor.target = '_blank';\n",
              "    anchor.setAttribute('data-href', url + path);\n",
              "    anchor.textContent = text;\n",
              "    element.appendChild(anchor);\n",
              "  })(8082, \"/\", \"https://localhost:8082/\", window.element)"
            ]
          },
          "metadata": {}
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "! ls ../frontend/dist/index.html"
      ],
      "metadata": {
        "id": "DKWni4moSyzO",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "b5635a1e-6ac6-41db-a706-dc3e5fb866a5"
      },
      "execution_count": 23,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "../frontend/dist/index.html\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "3hwJmseXZhJY"
      },
      "execution_count": null,
      "outputs": []
    }
  ]
 }
--- a/demo/dummy.wav
+++ b/demo/dummy.wav
--- a/demo/serverFlask.py
+++ b/demo/serverFlask.py
@ -0,0 +1,136 @@
 from flask import Flask, request, Markup, abort, jsonify
 from flask_cors import CORS
 import logging
 from logging.config import dictConfig
 import sys
 import base64
 import torch
 import numpy as np
 from scipy.io.wavfile import write, read
 from datetime import datetime
 import traceback
 import struct
 sys.path.append("mod")
 sys.path.append("mod/text")
 import utils
 from data_utils import TextAudioSpeakerLoader, TextAudioSpeakerCollate
 from models import SynthesizerTrn
 from text.symbols import symbols
 dictConfig({
    'version': 1,
    'formatters': {'default': {
        'format': '[%(asctime)s] %(levelname)s in %(module)s: %(message)s',
    }},
    'handlers': {'wsgi': {
        'class': 'logging.StreamHandler',
        'stream': 'ext://flask.logging.wsgi_errors_stream',
        'formatter': 'default'
    }},
    'root': {
        'level': 'INFO',
        'handlers': ['wsgi']
    }
 })
 app = Flask(__name__, static_folder="../frontend/dist", static_url_path='/')
 CORS(app, resources={r"/*": {"origins": "*"}}) 
 class VoiceChanger():
    def __init__(self, config, model):
        self.hps =utils.get_hparams_from_file(config)
        self.net_g = SynthesizerTrn(
                len(symbols),
                self.hps.data.filter_length // 2 + 1,
                self.hps.train.segment_size // self.hps.data.hop_length,
                n_speakers=self.hps.data.n_speakers,
                **self.hps.model)
        self.net_g.eval()
        self.gpu_num = torch.cuda.device_count()
        print("GPU_NUM:",self.gpu_num)
        utils.load_checkpoint( model, self.net_g, None)
    def on_request(self, gpu, srcId, dstId, timestamp, wav): 
        if wav==0:
            samplerate, data=read("dummy.wav")
            unpackedData = data
        else:
            unpackedData = np.array(struct.unpack('<%sh'%(len(wav) // struct.calcsize('<h') ), wav))
            write("logs/received_data.wav", 24000, unpackedData.astype(np.int16))
        try:
            if gpu<0 or self.gpu_num==0 :
                with torch.no_grad():
                    dataset = TextAudioSpeakerLoader("dummy.txt", self.hps.data, no_use_textfile=True)
                    data = dataset.get_audio_text_speaker_pair([ unpackedData, srcId, "a"])
                    data = TextAudioSpeakerCollate()([data])
                    x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [x.cpu() for x in data]
                    sid_tgt1 = torch.LongTensor([dstId]).cpu()
                    audio1 = (self.net_g.cpu().voice_conversion(spec, spec_lengths, sid_src=sid_src, sid_tgt=sid_tgt1)[0][0,0].data * self.hps.data.max_wav_value).cpu().float().numpy()
            else:
                with torch.no_grad():
                    dataset = TextAudioSpeakerLoader("dummy.txt", self.hps.data, no_use_textfile=True)
                    data = dataset.get_audio_text_speaker_pair([ unpackedData, srcId, "a"])
                    data = TextAudioSpeakerCollate()([data])
                    x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [x.cuda(gpu) for x in data]
                    sid_tgt1 = torch.LongTensor([dstId]).cuda(gpu)
                    audio1 = (self.net_g.cuda(gpu).voice_conversion(spec, spec_lengths, sid_src=sid_src, sid_tgt=sid_tgt1)[0][0,0].data * self.hps.data.max_wav_value).cpu().float().numpy()
        except Exception as e:
            print("VC PROCESSING!!!! EXCEPTION!!!", e)
            print(traceback.format_exc())
        audio1 = audio1.astype(np.int16)
        return audio1
@app.route('/test', methods=['GET', 'POST'])
 def test():
    try:
        if request.method == 'GET':
            return request.args.get('query', '')
        elif request.method == 'POST':
            print("POST REQUEST PROCESSING....")
            gpu = int(request.json['gpu'])
            srcId = int(request.json['srcId'])
            dstId = int(request.json['dstId'])
            timestamp = int(request.json['timestamp'])
            buffer = request.json['buffer']
            wav = base64.b64decode(buffer)
            # print(wav)
            # print(base64.b64encode(wav))
            changedVoice = voiceChanger.on_request(gpu, srcId, dstId, timestamp, wav)
            changedVoiceBase64 = base64.b64encode(changedVoice).decode('utf-8')
            # print("changedVoice",changedVoice)
            # print("CV64",changedVoiceBase64)
            data = {
                "gpu":gpu,
                "srcId":srcId,
                "dstId":dstId,
                "timestamp":timestamp,
                "changedVoiceBase64":changedVoiceBase64
            }
            return jsonify(data)
        else:
            return abort(400)
    except Exception as e:
        print("REQUEST PROCESSING!!!! EXCEPTION!!!", e)
        print(traceback.format_exc())
        return str(e)
 if __name__ == '__main__':
    args = sys.argv
    PORT = args[1]
    CONFIG = args[2]
    MODEL  = args[3]
    app.logger.info('INITIALIZE MODEL')
    voiceChanger = VoiceChanger(CONFIG, MODEL)
    voiceChanger.on_request(0,0,0,0,0)
    app.logger.info('START APP')
    app.run(debug=True, host='0.0.0.0',port=PORT)
--- a/demo/serverSIO.py
+++ b/demo/serverSIO.py
@ -17,7 +17,7 @@ from text.symbols import symbols
-class MyCustomNamespace(socketio.Namespace): # 名前空間を設定するクラス
+class MyCustomNamespace(socketio.Namespace): 
    def __init__(self, namespace, config, model):
        super().__init__(namespace)
        self.hps =utils.get_hparams_from_file(config)
@ -36,7 +36,7 @@ class MyCustomNamespace(socketio.Namespace): # 名前空間を設定するクラ
        print('[{}] connet sid : {}'.format(datetime.now().strftime('%Y-%m-%d %H:%M:%S') , sid))
        # print('[{}] connet env : {}'.format(datetime.now().strftime('%Y-%m-%d %H:%M:%S') , environ))
-    def on_request_message(self, sid, msg): # 送信してきたクライアントだけにメッセージを送る関数
+    def on_request_message(self, sid, msg): 
        # print("MESSGaa", msg)
        gpu = int(msg[0])
        srcId = int(msg[1])
@ -88,9 +88,9 @@ if __name__ == '__main__':
    print(f"start... PORT:{PORT}, CONFIG:{CONFIG}, MODEL:{MODEL}")    
    # sio = socketio.Server(cors_allowed_origins='http://localhost:8080') 
    sio = socketio.Server(cors_allowed_origins='*') 
-    sio.register_namespace(MyCustomNamespace('/test', CONFIG, MODEL)) # 名前空間を設定
+    sio.register_namespace(MyCustomNamespace('/test', CONFIG, MODEL)) 
    app = socketio.WSGIApp(sio,static_files={
        '': '../frontend/dist',
-    }) # wsgiサーバーミドルウェア生成
+    }) 
-    eventlet.wsgi.server(eventlet.listen(('0.0.0.0',int(PORT))), app) # wsgiサーバー起動
+    eventlet.wsgi.server(eventlet.listen(('0.0.0.0',int(PORT))), app) 
--- a/demo/setupFlask.sh
+++ b/demo/setupFlask.sh
@ -0,0 +1,14 @@
 #!/bin/bash
 echo config: $1
 echo model: $2
 cp -r /resources/* .
 if [[ -e ./setting.json ]]; then
  cp ./setting.json ../frontend/dist/assets/setting.json
 fi
 pip install flask
 pip install flask_cors
 python3 serverFlask.py 8080 $1 $2
--- a/docs/assets/setting.json
+++ b/docs/assets/setting.json
@ -1,8 +1,10 @@
 {
    "app_title": "voice-changer",
    "majar_mode": "docker",
    "voice_changer_server_url": "http://localhost:8080/test",
    "sample_rate": 48000,
    "buffer_size": 1024,
    "prefix_chunk_size": 24,
    "chunk_size": 24,
    "speaker_ids": [100, 107, 101, 102, 103],
    "speaker_names": ["ずんだもん", "user", "そら", "めたん", "つぐみ"],
--- a/docs/audiolet/index.js
+++ b/docs/audiolet/index.js
@ -0,0 +1 @@
 (()=>{"use strict";class e extends AudioWorkletProcessor{initialized=!1;playBuffer=[];deltaChunkSize=24;bufferSize=1024;constructor(){super(),this.initialized=!0,this.port.onmessage=this.handleMessage.bind(this)}prevF32Data=null;handleMessage(e){if(e.data.deltaSize)return void(this.deltaChunkSize=e.data.deltaSize);const t=e.data.data,l=new Int16Array(t),n=new Float32Array(l.length);l.forEach(((e,t)=>{const l=e>=32768?-(65536-e)/32768:e/32767;n[t]=l}));let s=this.prevF32Data?this.prevF32Data.slice(this.prevF32Data.length-this.deltaChunkSize*this.bufferSize/2):null;const h=n.slice(n.length-this.deltaChunkSize*this.bufferSize*2/2,n.length-this.deltaChunkSize*this.bufferSize/2);if(s?.length!==h.length&&(s=null),s)for(let e=0;e<s.length;e++){let t=0;if(e<s.length/3)t=0;else if(e>s.length/3*2)t=1;else{const l=e-s.length/3;t=Math.min(l/(s.length/3),1)}const l=s[e]*(1-t),n=h[e]*t;h[e]=l+n}if(this.playBuffer.length>50)for(console.log("Buffer truncated");this.playBuffer.length>2;)this.playBuffer.shift();let i;for(let e=0;e<h.length;e++){const t=2*e%128;0===t&&(i=new Float32Array(128));const l=h[e],n=e+1<h.length?h[e+1]:h[e];i[t]=l,i[t+1]=(l+n)/2,i.length===t+2&&this.playBuffer.push(i)}this.prevF32Data=n}handleMessage_(e){const t=e.data.data,l=new Int16Array(t),n=new Float32Array(l.length);l.forEach(((e,t)=>{const l=e>=32768?-(65536-e)/32768:e/32767;n[t]=l}));let s=this.prevF32Data?this.prevF32Data.slice(this.prevF32Data.length/2):null;const h=n.slice(0,n.length/2);if(s?.length!==h.length&&(s=null),s)for(let e=0;e<s.length;e++){let t=0;if(e<s.length/3)t=0;else if(e>s.length/3*2)t=1;else{const l=e-s.length/3;t=Math.min(l/(s.length/100),1)}const l=s[e]*(1-t),n=h[e]*t;h[e]=l+n}if(this.playBuffer.length>100)for(console.log("Buffer truncated");this.playBuffer.length>2;)this.playBuffer.shift();let i;for(let e=0;e<h.length;e++){const t=2*e%128;0===t&&(i=new Float32Array(128));const l=h[e],n=e+1<h.length?h[e+1]:h[e];i[t]=l,i[t+1]=(l+n)/2,i.length===t+2&&this.playBuffer.push(i)}this.prevF32Data=n}process(e,t,l){if(!this.initialized)return console.log("worklet_process not ready"),!0;if(0===this.playBuffer.length)return console.log("no play buffer"),!0;const n=this.playBuffer.shift();return t[0][0].set(n),!0}}registerProcessor("voice-player-worklet-processor",e)})();
--- a/docs/index.js
+++ b/docs/index.js
--- a/start2.sh
+++ b/start2.sh
@ -2,7 +2,7 @@
 # 参考:https://programwiz.org/2022/03/22/how-to-write-shell-script-for-option-parsing/
-DOCKER_IMAGE=dannadori/voice-changer:20220829_110113
+DOCKER_IMAGE=dannadori/voice-changer:20220831_151141
 TENSORBOARD_PORT=6006
 VOICE_CHANGER_PORT=8080
--- a/template/setting.json
+++ b/template/setting.json
@ -1,8 +1,10 @@
 {
    "app_title": "voice-changer",
    "majar_mode": "docker",
    "voice_changer_server_url": "http://localhost:8080/test",
    "sample_rate": 48000,
    "buffer_size": 1024,
    "prefix_chunk_size": 24,
    "chunk_size": 24,
    "speaker_ids": [100, 107, 101, 102, 103],
    "speaker_names": ["ずんだもん", "user", "そら", "めたん", "つぐみ"],
@ -11,7 +13,7 @@
    "vf_enable": true,
    "voice_changer_mode": "realtime",
    "gpu": 0,
-    "available_gpus": [-1, 0, 1, 2, 3, 4, 5, 100, 200],
+    "available_gpus": [-1, 0, 1, 2, 3, 4],
    "avatar": {
        "motion_capture_face": true,
        "motion_capture_upperbody": true,
@ -26,6 +28,8 @@
    "advance": {
        "avatar_draw_skip_rate": 3,
        "screen_draw_skip_rate": 3,
-        "visualizer_draw_skip_rate": 3
+        "visualizer_draw_skip_rate": 3,
        "cross_fade_lower_value": 0.1,
        "cross_fade_overlap_rate": 0.03
    }
 }
--- a/template/setting_colab.json
+++ b/template/setting_colab.json
@ -0,0 +1,35 @@
 {
    "app_title": "voice-changer",
    "majar_mode": "colab",
    "voice_changer_server_url": "http://localhost:8080/test",
    "sample_rate": 48000,
    "buffer_size": 1024,
    "prefix_chunk_size": 24,
    "chunk_size": 24,
    "speaker_ids": [100, 107, 101, 102, 103],
    "speaker_names": ["ずんだもん", "user", "そら", "めたん", "つぐみ"],
    "src_id": 107,
    "dst_id": 100,
    "vf_enable": true,
    "voice_changer_mode": "realtime",
    "gpu": 0,
    "available_gpus": [-1, 0, 1, 2, 3, 4],
    "avatar": {
        "motion_capture_face": true,
        "motion_capture_upperbody": true,
        "lip_overwrite_with_voice": true,
        "avatar_url": "./assets/vrm/zundamon/zundamon.vrm",
        "backgournd_image_url": "./assets/images/bg_natural_sougen.jpg",
        "background_color": "#0000dd",
        "chroma_key": "#0000dd",
        "avatar_canvas_size": [1280, 720],
        "screen_canvas_size": [1280, 720]
    },
    "advance": {
        "avatar_draw_skip_rate": 3,
        "screen_draw_skip_rate": 3,
        "visualizer_draw_skip_rate": 3,
        "cross_fade_lower_value": 0.1,
        "cross_fade_overlap_rate": 0.03
    }
 }
--- a/trainer/Dockerfile
+++ b/trainer/Dockerfile
@ -1,4 +1,4 @@
-FROM dannadori/voice-changer-internal:20220829_105932 as front
+FROM dannadori/voice-changer-internal:20220831_150941 as front
 FROM debian:bullseye-slim as base
 ARG DEBIAN_FRONTEND=noninteractive
		`@ -0,0 +1 @@`
							(()=>{"use strict";class e extends AudioWorkletProcessor{initialized=!1;playBuffer=[];deltaChunkSize=24;bufferSize=1024;constructor(){super(),this.initialized=!0,this.port.onmessage=this.handleMessage.bind(this)}prevF32Data=null;handleMessage(e){if(e.data.deltaSize)return void(this.deltaChunkSize=e.data.deltaSize);const t=e.data.data,l=new Int16Array(t),n=new Float32Array(l.length);l.forEach(((e,t)=>{const l=e>=32768?-(65536-e)/32768:e/32767;n[t]=l}));let s=this.prevF32Data?this.prevF32Data.slice(this.prevF32Data.length-this.deltaChunkSizethis.bufferSize/2):null;const h=n.slice(n.length-this.deltaChunkSizethis.bufferSize2/2,n.length-this.deltaChunkSizethis.bufferSize/2);if(s?.length!==h.length&&(s=null),s)for(let e=0;e<s.length;e++){let t=0;if(e<s.length/3)t=0;else if(e>s.length/32)t=1;else{const l=e-s.length/3;t=Math.min(l/(s.length/3),1)}const l=s[e](1-t),n=h[e]t;h[e]=l+n}if(this.playBuffer.length>50)for(console.log("Buffer truncated");this.playBuffer.length>2;)this.playBuffer.shift();let i;for(let e=0;e<h.length;e++){const t=2e%128;0===t&&(i=new Float32Array(128));const l=h[e],n=e+1<h.length?h[e+1]:h[e];i[t]=l,i[t+1]=(l+n)/2,i.length===t+2&&this.playBuffer.push(i)}this.prevF32Data=n}handleMessage_(e){const t=e.data.data,l=new Int16Array(t),n=new Float32Array(l.length);l.forEach(((e,t)=>{const l=e>=32768?-(65536-e)/32768:e/32767;n[t]=l}));let s=this.prevF32Data?this.prevF32Data.slice(this.prevF32Data.length/2):null;const h=n.slice(0,n.length/2);if(s?.length!==h.length&&(s=null),s)for(let e=0;e<s.length;e++){let t=0;if(e<s.length/3)t=0;else if(e>s.length/32)t=1;else{const l=e-s.length/3;t=Math.min(l/(s.length/100),1)}const l=s[e](1-t),n=h[e]t;h[e]=l+n}if(this.playBuffer.length>100)for(console.log("Buffer truncated");this.playBuffer.length>2;)this.playBuffer.shift();let i;for(let e=0;e<h.length;e++){const t=2e%128;0===t&&(i=new Float32Array(128));const l=h[e],n=e+1<h.length?h[e+1]:h[e];i[t]=l,i[t+1]=(l+n)/2,i.length===t+2&&this.playBuffer.push(i)}this.prevF32Data=n}process(e,t,l){if(!this.initialized)return console.log("worklet_process not ready"),!0;if(0===this.playBuffer.length)return console.log("no play buffer"),!0;const n=this.playBuffer.shift();return t[0][0].set(n),!0}}registerProcessor("voice-player-worklet-processor",e)})();