Merge pull request #15 from w-okada/dev

Dev
This commit is contained in:
w-okada 2022-08-31 15:22:10 +09:00 committed by GitHub
commit 92765157ce
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 782 additions and 10 deletions

580
VoiceChangerDemo.ipynb Normal file
View File

@ -0,0 +1,580 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "VoiceChangerDemo",
"provenance": [],
"collapsed_sections": [],
"authorship_tag": "ABX9TyN+8irLJYUFlwMPzvHMSJof",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
},
"accelerator": "GPU",
"gpuClass": "standard"
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/github/w-okada/voice-changer/blob/dev/VoiceChangerDemo.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "markdown",
"source": [],
"metadata": {
"id": "57p7pA1Qb5wa"
}
},
{
"cell_type": "code",
"source": [
"!nvidia-smi"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "vV1t7PBRm-o6",
"outputId": "60fc80b2-a39e-4840-88c1-0d8d483a36ca"
},
"execution_count": 1,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Wed Aug 31 06:14:56 2022 \n",
"+-----------------------------------------------------------------------------+\n",
"| NVIDIA-SMI 460.32.03 Driver Version: 460.32.03 CUDA Version: 11.2 |\n",
"|-------------------------------+----------------------+----------------------+\n",
"| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n",
"| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n",
"| | | MIG M. |\n",
"|===============================+======================+======================|\n",
"| 0 Tesla T4 Off | 00000000:00:04.0 Off | 0 |\n",
"| N/A 72C P8 12W / 70W | 0MiB / 15109MiB | 0% Default |\n",
"| | | N/A |\n",
"+-------------------------------+----------------------+----------------------+\n",
" \n",
"+-----------------------------------------------------------------------------+\n",
"| Processes: |\n",
"| GPU GI CI PID Type Process name GPU Memory |\n",
"| ID ID Usage |\n",
"|=============================================================================|\n",
"| No running processes found |\n",
"+-----------------------------------------------------------------------------+\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"CONFIG=\"/content/drive/MyDrive/VoiceChanger/config.json\"\n",
"MODEL=\"/content/drive/MyDrive/VoiceChanger/G_326000.pth\""
],
"metadata": {
"id": "nSXATMWYb4Ik"
},
"execution_count": 2,
"outputs": []
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "2wxD-gRSMU5R",
"outputId": "83bb80fa-9ced-43e2-a304-d53a3501b142"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Mounted at /content/drive\n"
]
}
],
"source": [
"from google.colab import drive\n",
"drive.mount('/content/drive')"
]
},
{
"cell_type": "code",
"source": [
"!git clone https://github.com/w-okada/voice-changer.git\n",
"%cd voice-changer/demo/\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "86wTFmqsNMnD",
"outputId": "3fc68f14-b6b7-48bb-e285-5bed78e74f26"
},
"execution_count": 4,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Cloning into 'voice-changer'...\n",
"remote: Enumerating objects: 266, done.\u001b[K\n",
"remote: Counting objects: 100% (266/266), done.\u001b[K\n",
"remote: Compressing objects: 100% (189/189), done.\u001b[K\n",
"remote: Total 266 (delta 123), reused 194 (delta 65), pack-reused 0\u001b[K\n",
"Receiving objects: 100% (266/266), 19.11 MiB | 35.44 MiB/s, done.\n",
"Resolving deltas: 100% (123/123), done.\n",
"/content/voice-changer/demo\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"!git checkout dev\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "CBsogR-zWH4r",
"outputId": "f4c9737b-831d-4938-d387-caf07693030e"
},
"execution_count": 5,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Branch 'dev' set up to track remote branch 'dev' from 'origin'.\n",
"Switched to a new branch 'dev'\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"!mkdir -p ../frontend/dist\n",
"!cp -r ../docs/* ../frontend/dist/\n",
"!ls ../frontend/dist\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "uCEKf3_JNoyq",
"outputId": "746e1946-5c3a-49af-df26-d86149f8adb1"
},
"execution_count": 6,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"assets\t coffee.png index.html index.js.LICENSE.txt\n",
"audiolet favicon.ico index.js\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"!cp ../template/setting_colab.json ../frontend/dist/assets/setting.json"
],
"metadata": {
"id": "Bn4kV8TgXp8i"
},
"execution_count": 11,
"outputs": []
},
{
"cell_type": "code",
"source": [
"!cat ../frontend/dist/assets/setting.json"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "pjxPsOOaXXTj",
"outputId": "1bf85102-87ed-462c-e732-cffb878d95f3"
},
"execution_count": 12,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"{\n",
" \"app_title\": \"voice-changer\",\n",
" \"majar_mode\": \"colab\",\n",
" \"voice_changer_server_url\": \"http://localhost:8080/test\",\n",
" \"sample_rate\": 48000,\n",
" \"buffer_size\": 1024,\n",
" \"prefix_chunk_size\": 24,\n",
" \"chunk_size\": 24,\n",
" \"speaker_ids\": [100, 107, 101, 102, 103],\n",
" \"speaker_names\": [\"ずんだもん\", \"user\", \"そら\", \"めたん\", \"つぐみ\"],\n",
" \"src_id\": 107,\n",
" \"dst_id\": 100,\n",
" \"vf_enable\": true,\n",
" \"voice_changer_mode\": \"realtime\",\n",
" \"gpu\": 0,\n",
" \"available_gpus\": [-1, 0, 1, 2, 3, 4],\n",
" \"avatar\": {\n",
" \"motion_capture_face\": true,\n",
" \"motion_capture_upperbody\": true,\n",
" \"lip_overwrite_with_voice\": true,\n",
" \"avatar_url\": \"./assets/vrm/zundamon/zundamon.vrm\",\n",
" \"backgournd_image_url\": \"./assets/images/bg_natural_sougen.jpg\",\n",
" \"background_color\": \"#0000dd\",\n",
" \"chroma_key\": \"#0000dd\",\n",
" \"avatar_canvas_size\": [1280, 720],\n",
" \"screen_canvas_size\": [1280, 720]\n",
" },\n",
" \"advance\": {\n",
" \"avatar_draw_skip_rate\": 3,\n",
" \"screen_draw_skip_rate\": 3,\n",
" \"visualizer_draw_skip_rate\": 3,\n",
" \"cross_fade_lower_value\": 0.1,\n",
" \"cross_fade_overlap_rate\": 0.03\n",
" }\n",
"}\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"# 手作業\n",
"\n",
"・configとモデルをdemoフォルダにコピー\n",
"\n",
"・docsをfrontendに変更\n",
"\n",
"・setting.jsonをfrontendにコピー\n"
],
"metadata": {
"id": "8Na2PbLZSWgZ"
}
},
{
"cell_type": "code",
"source": [
"!apt-get install -y espeak libsndfile1-dev\n",
"!pip install flask\n",
"!pip install python-socketio\n",
"!pip install eventlet\n",
"!pip install unidecode\n",
"!pip install phonemizer\n",
"!pip install retry\n",
"!pip install flask\n",
"!pip install flask_cors\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "LwZAAuqxX7yY",
"outputId": "c67b2741-7a1e-448d-abf9-7b8d8f5e3d15"
},
"execution_count": 13,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Reading package lists... Done\n",
"Building dependency tree \n",
"Reading state information... Done\n",
"libsndfile1-dev is already the newest version (1.0.28-4ubuntu0.18.04.2).\n",
"The following package was automatically installed and is no longer required:\n",
" libnvidia-common-460\n",
"Use 'apt autoremove' to remove it.\n",
"The following additional packages will be installed:\n",
" espeak-data libespeak1 libportaudio2 libsonic0\n",
"The following NEW packages will be installed:\n",
" espeak espeak-data libespeak1 libportaudio2 libsonic0\n",
"0 upgraded, 5 newly installed, 0 to remove and 20 not upgraded.\n",
"Need to get 1,219 kB of archives.\n",
"After this operation, 3,031 kB of additional disk space will be used.\n",
"Get:1 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libportaudio2 amd64 19.6.0-1 [64.6 kB]\n",
"Get:2 http://archive.ubuntu.com/ubuntu bionic/main amd64 libsonic0 amd64 0.2.0-6 [13.4 kB]\n",
"Get:3 http://archive.ubuntu.com/ubuntu bionic/universe amd64 espeak-data amd64 1.48.04+dfsg-5 [934 kB]\n",
"Get:4 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libespeak1 amd64 1.48.04+dfsg-5 [145 kB]\n",
"Get:5 http://archive.ubuntu.com/ubuntu bionic/universe amd64 espeak amd64 1.48.04+dfsg-5 [61.6 kB]\n",
"Fetched 1,219 kB in 1s (1,636 kB/s)\n",
"Selecting previously unselected package libportaudio2:amd64.\n",
"(Reading database ... 155676 files and directories currently installed.)\n",
"Preparing to unpack .../libportaudio2_19.6.0-1_amd64.deb ...\n",
"Unpacking libportaudio2:amd64 (19.6.0-1) ...\n",
"Selecting previously unselected package libsonic0:amd64.\n",
"Preparing to unpack .../libsonic0_0.2.0-6_amd64.deb ...\n",
"Unpacking libsonic0:amd64 (0.2.0-6) ...\n",
"Selecting previously unselected package espeak-data:amd64.\n",
"Preparing to unpack .../espeak-data_1.48.04+dfsg-5_amd64.deb ...\n",
"Unpacking espeak-data:amd64 (1.48.04+dfsg-5) ...\n",
"Selecting previously unselected package libespeak1:amd64.\n",
"Preparing to unpack .../libespeak1_1.48.04+dfsg-5_amd64.deb ...\n",
"Unpacking libespeak1:amd64 (1.48.04+dfsg-5) ...\n",
"Selecting previously unselected package espeak.\n",
"Preparing to unpack .../espeak_1.48.04+dfsg-5_amd64.deb ...\n",
"Unpacking espeak (1.48.04+dfsg-5) ...\n",
"Setting up libportaudio2:amd64 (19.6.0-1) ...\n",
"Setting up espeak-data:amd64 (1.48.04+dfsg-5) ...\n",
"Setting up libsonic0:amd64 (0.2.0-6) ...\n",
"Setting up libespeak1:amd64 (1.48.04+dfsg-5) ...\n",
"Setting up espeak (1.48.04+dfsg-5) ...\n",
"Processing triggers for man-db (2.8.3-2ubuntu0.1) ...\n",
"Processing triggers for libc-bin (2.27-3ubuntu1.5) ...\n",
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
"Requirement already satisfied: flask in /usr/local/lib/python3.7/dist-packages (1.1.4)\n",
"Requirement already satisfied: click<8.0,>=5.1 in /usr/local/lib/python3.7/dist-packages (from flask) (7.1.2)\n",
"Requirement already satisfied: Jinja2<3.0,>=2.10.1 in /usr/local/lib/python3.7/dist-packages (from flask) (2.11.3)\n",
"Requirement already satisfied: itsdangerous<2.0,>=0.24 in /usr/local/lib/python3.7/dist-packages (from flask) (1.1.0)\n",
"Requirement already satisfied: Werkzeug<2.0,>=0.15 in /usr/local/lib/python3.7/dist-packages (from flask) (1.0.1)\n",
"Requirement already satisfied: MarkupSafe>=0.23 in /usr/local/lib/python3.7/dist-packages (from Jinja2<3.0,>=2.10.1->flask) (2.0.1)\n",
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
"Collecting python-socketio\n",
" Downloading python_socketio-5.7.1-py3-none-any.whl (56 kB)\n",
"\u001b[K |████████████████████████████████| 56 kB 5.0 MB/s \n",
"\u001b[?25hCollecting bidict>=0.21.0\n",
" Downloading bidict-0.22.0-py3-none-any.whl (36 kB)\n",
"Collecting python-engineio>=4.3.0\n",
" Downloading python_engineio-4.3.4-py3-none-any.whl (52 kB)\n",
"\u001b[K |████████████████████████████████| 52 kB 2.0 MB/s \n",
"\u001b[?25hInstalling collected packages: python-engineio, bidict, python-socketio\n",
"Successfully installed bidict-0.22.0 python-engineio-4.3.4 python-socketio-5.7.1\n",
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
"Collecting eventlet\n",
" Downloading eventlet-0.33.1-py2.py3-none-any.whl (226 kB)\n",
"\u001b[K |████████████████████████████████| 226 kB 33.3 MB/s \n",
"\u001b[?25hCollecting dnspython>=1.15.0\n",
" Downloading dnspython-2.2.1-py3-none-any.whl (269 kB)\n",
"\u001b[K |████████████████████████████████| 269 kB 52.5 MB/s \n",
"\u001b[?25hRequirement already satisfied: six>=1.10.0 in /usr/local/lib/python3.7/dist-packages (from eventlet) (1.15.0)\n",
"Requirement already satisfied: greenlet>=0.3 in /usr/local/lib/python3.7/dist-packages (from eventlet) (1.1.3)\n",
"Installing collected packages: dnspython, eventlet\n",
"Successfully installed dnspython-2.2.1 eventlet-0.33.1\n",
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
"Collecting unidecode\n",
" Downloading Unidecode-1.3.4-py3-none-any.whl (235 kB)\n",
"\u001b[K |████████████████████████████████| 235 kB 28.6 MB/s \n",
"\u001b[?25hInstalling collected packages: unidecode\n",
"Successfully installed unidecode-1.3.4\n",
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
"Collecting phonemizer\n",
" Downloading phonemizer-3.2.1-py3-none-any.whl (90 kB)\n",
"\u001b[K |████████████████████████████████| 90 kB 9.5 MB/s \n",
"\u001b[?25hCollecting segments\n",
" Downloading segments-2.2.1-py2.py3-none-any.whl (15 kB)\n",
"Requirement already satisfied: joblib in /usr/local/lib/python3.7/dist-packages (from phonemizer) (1.1.0)\n",
"Collecting dlinfo\n",
" Downloading dlinfo-1.2.1-py3-none-any.whl (3.6 kB)\n",
"Requirement already satisfied: typing-extensions in /usr/local/lib/python3.7/dist-packages (from phonemizer) (4.1.1)\n",
"Requirement already satisfied: attrs>=18.1 in /usr/local/lib/python3.7/dist-packages (from phonemizer) (22.1.0)\n",
"Collecting csvw>=1.5.6\n",
" Downloading csvw-3.1.1-py2.py3-none-any.whl (56 kB)\n",
"\u001b[K |████████████████████████████████| 56 kB 5.7 MB/s \n",
"\u001b[?25hRequirement already satisfied: regex in /usr/local/lib/python3.7/dist-packages (from segments->phonemizer) (2022.6.2)\n",
"Collecting clldutils>=1.7.3\n",
" Downloading clldutils-3.12.0-py2.py3-none-any.whl (197 kB)\n",
"\u001b[K |████████████████████████████████| 197 kB 63.8 MB/s \n",
"\u001b[?25hRequirement already satisfied: python-dateutil in /usr/local/lib/python3.7/dist-packages (from clldutils>=1.7.3->segments->phonemizer) (2.8.2)\n",
"Requirement already satisfied: tabulate>=0.7.7 in /usr/local/lib/python3.7/dist-packages (from clldutils>=1.7.3->segments->phonemizer) (0.8.10)\n",
"Collecting colorlog\n",
" Downloading colorlog-6.7.0-py2.py3-none-any.whl (11 kB)\n",
"Collecting colorama\n",
" Downloading colorama-0.4.5-py2.py3-none-any.whl (16 kB)\n",
"Requirement already satisfied: jsonschema in /usr/local/lib/python3.7/dist-packages (from csvw>=1.5.6->segments->phonemizer) (4.3.3)\n",
"Collecting rdflib\n",
" Downloading rdflib-6.2.0-py3-none-any.whl (500 kB)\n",
"\u001b[K |████████████████████████████████| 500 kB 53.6 MB/s \n",
"\u001b[?25hRequirement already satisfied: babel in /usr/local/lib/python3.7/dist-packages (from csvw>=1.5.6->segments->phonemizer) (2.10.3)\n",
"Collecting language-tags\n",
" Downloading language_tags-1.1.0-py2.py3-none-any.whl (210 kB)\n",
"\u001b[K |████████████████████████████████| 210 kB 65.4 MB/s \n",
"\u001b[?25hCollecting rfc3986<2\n",
" Downloading rfc3986-1.5.0-py2.py3-none-any.whl (31 kB)\n",
"Requirement already satisfied: uritemplate>=3.0.0 in /usr/local/lib/python3.7/dist-packages (from csvw>=1.5.6->segments->phonemizer) (3.0.1)\n",
"Collecting isodate\n",
" Downloading isodate-0.6.1-py2.py3-none-any.whl (41 kB)\n",
"\u001b[K |████████████████████████████████| 41 kB 763 kB/s \n",
"\u001b[?25hRequirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from csvw>=1.5.6->segments->phonemizer) (2.23.0)\n",
"Requirement already satisfied: pytz>=2015.7 in /usr/local/lib/python3.7/dist-packages (from babel->csvw>=1.5.6->segments->phonemizer) (2022.2.1)\n",
"Requirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from isodate->csvw>=1.5.6->segments->phonemizer) (1.15.0)\n",
"Requirement already satisfied: pyrsistent!=0.17.0,!=0.17.1,!=0.17.2,>=0.14.0 in /usr/local/lib/python3.7/dist-packages (from jsonschema->csvw>=1.5.6->segments->phonemizer) (0.18.1)\n",
"Requirement already satisfied: importlib-resources>=1.4.0 in /usr/local/lib/python3.7/dist-packages (from jsonschema->csvw>=1.5.6->segments->phonemizer) (5.9.0)\n",
"Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from jsonschema->csvw>=1.5.6->segments->phonemizer) (4.12.0)\n",
"Requirement already satisfied: zipp>=3.1.0 in /usr/local/lib/python3.7/dist-packages (from importlib-resources>=1.4.0->jsonschema->csvw>=1.5.6->segments->phonemizer) (3.8.1)\n",
"Requirement already satisfied: setuptools in /usr/local/lib/python3.7/dist-packages (from rdflib->csvw>=1.5.6->segments->phonemizer) (57.4.0)\n",
"Requirement already satisfied: pyparsing in /usr/local/lib/python3.7/dist-packages (from rdflib->csvw>=1.5.6->segments->phonemizer) (3.0.9)\n",
"Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->csvw>=1.5.6->segments->phonemizer) (3.0.4)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->csvw>=1.5.6->segments->phonemizer) (2022.6.15)\n",
"Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->csvw>=1.5.6->segments->phonemizer) (2.10)\n",
"Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->csvw>=1.5.6->segments->phonemizer) (1.24.3)\n",
"Installing collected packages: isodate, rfc3986, rdflib, language-tags, colorama, csvw, colorlog, clldutils, segments, dlinfo, phonemizer\n",
"Successfully installed clldutils-3.12.0 colorama-0.4.5 colorlog-6.7.0 csvw-3.1.1 dlinfo-1.2.1 isodate-0.6.1 language-tags-1.1.0 phonemizer-3.2.1 rdflib-6.2.0 rfc3986-1.5.0 segments-2.2.1\n",
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
"Collecting retry\n",
" Downloading retry-0.9.2-py2.py3-none-any.whl (8.0 kB)\n",
"Requirement already satisfied: decorator>=3.4.2 in /usr/local/lib/python3.7/dist-packages (from retry) (4.4.2)\n",
"Requirement already satisfied: py<2.0.0,>=1.4.26 in /usr/local/lib/python3.7/dist-packages (from retry) (1.11.0)\n",
"Installing collected packages: retry\n",
"Successfully installed retry-0.9.2\n",
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
"Requirement already satisfied: flask in /usr/local/lib/python3.7/dist-packages (1.1.4)\n",
"Requirement already satisfied: itsdangerous<2.0,>=0.24 in /usr/local/lib/python3.7/dist-packages (from flask) (1.1.0)\n",
"Requirement already satisfied: Jinja2<3.0,>=2.10.1 in /usr/local/lib/python3.7/dist-packages (from flask) (2.11.3)\n",
"Requirement already satisfied: Werkzeug<2.0,>=0.15 in /usr/local/lib/python3.7/dist-packages (from flask) (1.0.1)\n",
"Requirement already satisfied: click<8.0,>=5.1 in /usr/local/lib/python3.7/dist-packages (from flask) (7.1.2)\n",
"Requirement already satisfied: MarkupSafe>=0.23 in /usr/local/lib/python3.7/dist-packages (from Jinja2<3.0,>=2.10.1->flask) (2.0.1)\n",
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
"Collecting flask_cors\n",
" Downloading Flask_Cors-3.0.10-py2.py3-none-any.whl (14 kB)\n",
"Requirement already satisfied: Flask>=0.9 in /usr/local/lib/python3.7/dist-packages (from flask_cors) (1.1.4)\n",
"Requirement already satisfied: Six in /usr/local/lib/python3.7/dist-packages (from flask_cors) (1.15.0)\n",
"Requirement already satisfied: itsdangerous<2.0,>=0.24 in /usr/local/lib/python3.7/dist-packages (from Flask>=0.9->flask_cors) (1.1.0)\n",
"Requirement already satisfied: Jinja2<3.0,>=2.10.1 in /usr/local/lib/python3.7/dist-packages (from Flask>=0.9->flask_cors) (2.11.3)\n",
"Requirement already satisfied: Werkzeug<2.0,>=0.15 in /usr/local/lib/python3.7/dist-packages (from Flask>=0.9->flask_cors) (1.0.1)\n",
"Requirement already satisfied: click<8.0,>=5.1 in /usr/local/lib/python3.7/dist-packages (from Flask>=0.9->flask_cors) (7.1.2)\n",
"Requirement already satisfied: MarkupSafe>=0.23 in /usr/local/lib/python3.7/dist-packages (from Jinja2<3.0,>=2.10.1->Flask>=0.9->flask_cors) (2.0.1)\n",
"Installing collected packages: flask-cors\n",
"Successfully installed flask-cors-3.0.10\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"get_ipython().system_raw(f'python3 serverFlask.py 8082 {CONFIG} {MODEL} >foo 2>&1 &')"
],
"metadata": {
"id": "iNOAB7zISI6J"
},
"execution_count": 14,
"outputs": []
},
{
"cell_type": "code",
"source": [
"!cat foo"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "chu06KpAjEK6",
"outputId": "887c2d50-c49f-4a22-f0d0-8a3667511466"
},
"execution_count": 18,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"[2022-08-31 06:17:58,669] INFO in serverFlask: INITIALIZE MODEL\n",
"[2022-08-31 06:18:08,764] INFO in utils: Loaded checkpoint '/content/drive/MyDrive/VoiceChanger/G_326000.pth' (iteration 1136)\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"from google.colab import output\n",
"\n",
"output.serve_kernel_port_as_window(8082)"
],
"metadata": {
"id": "nkRjZm95l87C",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
},
"outputId": "abf57f92-5cb6-4325-b64a-095d42f561d5"
},
"execution_count": 27,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"<IPython.core.display.Javascript object>"
],
"application/javascript": [
"(async (port, path, text, element) => {\n",
" if (!google.colab.kernel.accessAllowed) {\n",
" return;\n",
" }\n",
" element.appendChild(document.createTextNode(''));\n",
" const url = await google.colab.kernel.proxyPort(port);\n",
" const anchor = document.createElement('a');\n",
" anchor.href = new URL(path, url).toString();\n",
" anchor.target = '_blank';\n",
" anchor.setAttribute('data-href', url + path);\n",
" anchor.textContent = text;\n",
" element.appendChild(anchor);\n",
" })(8082, \"/\", \"https://localhost:8082/\", window.element)"
]
},
"metadata": {}
}
]
},
{
"cell_type": "code",
"source": [
"! ls ../frontend/dist/index.html"
],
"metadata": {
"id": "DKWni4moSyzO",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "b5635a1e-6ac6-41db-a706-dc3e5fb866a5"
},
"execution_count": 23,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"../frontend/dist/index.html\n"
]
}
]
},
{
"cell_type": "code",
"source": [],
"metadata": {
"id": "3hwJmseXZhJY"
},
"execution_count": null,
"outputs": []
}
]
}

BIN
demo/dummy.wav Executable file

Binary file not shown.

136
demo/serverFlask.py Executable file
View File

@ -0,0 +1,136 @@
from flask import Flask, request, Markup, abort, jsonify
from flask_cors import CORS
import logging
from logging.config import dictConfig
import sys
import base64
import torch
import numpy as np
from scipy.io.wavfile import write, read
from datetime import datetime
import traceback
import struct
sys.path.append("mod")
sys.path.append("mod/text")
import utils
from data_utils import TextAudioSpeakerLoader, TextAudioSpeakerCollate
from models import SynthesizerTrn
from text.symbols import symbols
dictConfig({
'version': 1,
'formatters': {'default': {
'format': '[%(asctime)s] %(levelname)s in %(module)s: %(message)s',
}},
'handlers': {'wsgi': {
'class': 'logging.StreamHandler',
'stream': 'ext://flask.logging.wsgi_errors_stream',
'formatter': 'default'
}},
'root': {
'level': 'INFO',
'handlers': ['wsgi']
}
})
app = Flask(__name__, static_folder="../frontend/dist", static_url_path='/')
CORS(app, resources={r"/*": {"origins": "*"}})
class VoiceChanger():
def __init__(self, config, model):
self.hps =utils.get_hparams_from_file(config)
self.net_g = SynthesizerTrn(
len(symbols),
self.hps.data.filter_length // 2 + 1,
self.hps.train.segment_size // self.hps.data.hop_length,
n_speakers=self.hps.data.n_speakers,
**self.hps.model)
self.net_g.eval()
self.gpu_num = torch.cuda.device_count()
print("GPU_NUM:",self.gpu_num)
utils.load_checkpoint( model, self.net_g, None)
def on_request(self, gpu, srcId, dstId, timestamp, wav):
if wav==0:
samplerate, data=read("dummy.wav")
unpackedData = data
else:
unpackedData = np.array(struct.unpack('<%sh'%(len(wav) // struct.calcsize('<h') ), wav))
write("logs/received_data.wav", 24000, unpackedData.astype(np.int16))
try:
if gpu<0 or self.gpu_num==0 :
with torch.no_grad():
dataset = TextAudioSpeakerLoader("dummy.txt", self.hps.data, no_use_textfile=True)
data = dataset.get_audio_text_speaker_pair([ unpackedData, srcId, "a"])
data = TextAudioSpeakerCollate()([data])
x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [x.cpu() for x in data]
sid_tgt1 = torch.LongTensor([dstId]).cpu()
audio1 = (self.net_g.cpu().voice_conversion(spec, spec_lengths, sid_src=sid_src, sid_tgt=sid_tgt1)[0][0,0].data * self.hps.data.max_wav_value).cpu().float().numpy()
else:
with torch.no_grad():
dataset = TextAudioSpeakerLoader("dummy.txt", self.hps.data, no_use_textfile=True)
data = dataset.get_audio_text_speaker_pair([ unpackedData, srcId, "a"])
data = TextAudioSpeakerCollate()([data])
x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [x.cuda(gpu) for x in data]
sid_tgt1 = torch.LongTensor([dstId]).cuda(gpu)
audio1 = (self.net_g.cuda(gpu).voice_conversion(spec, spec_lengths, sid_src=sid_src, sid_tgt=sid_tgt1)[0][0,0].data * self.hps.data.max_wav_value).cpu().float().numpy()
except Exception as e:
print("VC PROCESSING!!!! EXCEPTION!!!", e)
print(traceback.format_exc())
audio1 = audio1.astype(np.int16)
return audio1
@app.route('/test', methods=['GET', 'POST'])
def test():
try:
if request.method == 'GET':
return request.args.get('query', '')
elif request.method == 'POST':
print("POST REQUEST PROCESSING....")
gpu = int(request.json['gpu'])
srcId = int(request.json['srcId'])
dstId = int(request.json['dstId'])
timestamp = int(request.json['timestamp'])
buffer = request.json['buffer']
wav = base64.b64decode(buffer)
# print(wav)
# print(base64.b64encode(wav))
changedVoice = voiceChanger.on_request(gpu, srcId, dstId, timestamp, wav)
changedVoiceBase64 = base64.b64encode(changedVoice).decode('utf-8')
# print("changedVoice",changedVoice)
# print("CV64",changedVoiceBase64)
data = {
"gpu":gpu,
"srcId":srcId,
"dstId":dstId,
"timestamp":timestamp,
"changedVoiceBase64":changedVoiceBase64
}
return jsonify(data)
else:
return abort(400)
except Exception as e:
print("REQUEST PROCESSING!!!! EXCEPTION!!!", e)
print(traceback.format_exc())
return str(e)
if __name__ == '__main__':
args = sys.argv
PORT = args[1]
CONFIG = args[2]
MODEL = args[3]
app.logger.info('INITIALIZE MODEL')
voiceChanger = VoiceChanger(CONFIG, MODEL)
voiceChanger.on_request(0,0,0,0,0)
app.logger.info('START APP')
app.run(debug=True, host='0.0.0.0',port=PORT)

View File

@ -17,7 +17,7 @@ from text.symbols import symbols
class MyCustomNamespace(socketio.Namespace): # 名前空間を設定するクラス class MyCustomNamespace(socketio.Namespace):
def __init__(self, namespace, config, model): def __init__(self, namespace, config, model):
super().__init__(namespace) super().__init__(namespace)
self.hps =utils.get_hparams_from_file(config) self.hps =utils.get_hparams_from_file(config)
@ -36,7 +36,7 @@ class MyCustomNamespace(socketio.Namespace): # 名前空間を設定するクラ
print('[{}] connet sid : {}'.format(datetime.now().strftime('%Y-%m-%d %H:%M:%S') , sid)) print('[{}] connet sid : {}'.format(datetime.now().strftime('%Y-%m-%d %H:%M:%S') , sid))
# print('[{}] connet env : {}'.format(datetime.now().strftime('%Y-%m-%d %H:%M:%S') , environ)) # print('[{}] connet env : {}'.format(datetime.now().strftime('%Y-%m-%d %H:%M:%S') , environ))
def on_request_message(self, sid, msg): # 送信してきたクライアントだけにメッセージを送る関数 def on_request_message(self, sid, msg):
# print("MESSGaa", msg) # print("MESSGaa", msg)
gpu = int(msg[0]) gpu = int(msg[0])
srcId = int(msg[1]) srcId = int(msg[1])
@ -88,9 +88,9 @@ if __name__ == '__main__':
print(f"start... PORT:{PORT}, CONFIG:{CONFIG}, MODEL:{MODEL}") print(f"start... PORT:{PORT}, CONFIG:{CONFIG}, MODEL:{MODEL}")
# sio = socketio.Server(cors_allowed_origins='http://localhost:8080') # sio = socketio.Server(cors_allowed_origins='http://localhost:8080')
sio = socketio.Server(cors_allowed_origins='*') sio = socketio.Server(cors_allowed_origins='*')
sio.register_namespace(MyCustomNamespace('/test', CONFIG, MODEL)) # 名前空間を設定 sio.register_namespace(MyCustomNamespace('/test', CONFIG, MODEL))
app = socketio.WSGIApp(sio,static_files={ app = socketio.WSGIApp(sio,static_files={
'': '../frontend/dist', '': '../frontend/dist',
}) # wsgiサーバーミドルウェア生成 })
eventlet.wsgi.server(eventlet.listen(('0.0.0.0',int(PORT))), app) # wsgiサーバー起動 eventlet.wsgi.server(eventlet.listen(('0.0.0.0',int(PORT))), app)

14
demo/setupFlask.sh Executable file
View File

@ -0,0 +1,14 @@
#!/bin/bash
echo config: $1
echo model: $2
cp -r /resources/* .
if [[ -e ./setting.json ]]; then
cp ./setting.json ../frontend/dist/assets/setting.json
fi
pip install flask
pip install flask_cors
python3 serverFlask.py 8080 $1 $2

View File

@ -1,8 +1,10 @@
{ {
"app_title": "voice-changer", "app_title": "voice-changer",
"majar_mode": "docker",
"voice_changer_server_url": "http://localhost:8080/test", "voice_changer_server_url": "http://localhost:8080/test",
"sample_rate": 48000, "sample_rate": 48000,
"buffer_size": 1024, "buffer_size": 1024,
"prefix_chunk_size": 24,
"chunk_size": 24, "chunk_size": 24,
"speaker_ids": [100, 107, 101, 102, 103], "speaker_ids": [100, 107, 101, 102, 103],
"speaker_names": ["ずんだもん", "user", "そら", "めたん", "つぐみ"], "speaker_names": ["ずんだもん", "user", "そら", "めたん", "つぐみ"],

1
docs/audiolet/index.js Executable file
View File

@ -0,0 +1 @@
(()=>{"use strict";class e extends AudioWorkletProcessor{initialized=!1;playBuffer=[];deltaChunkSize=24;bufferSize=1024;constructor(){super(),this.initialized=!0,this.port.onmessage=this.handleMessage.bind(this)}prevF32Data=null;handleMessage(e){if(e.data.deltaSize)return void(this.deltaChunkSize=e.data.deltaSize);const t=e.data.data,l=new Int16Array(t),n=new Float32Array(l.length);l.forEach(((e,t)=>{const l=e>=32768?-(65536-e)/32768:e/32767;n[t]=l}));let s=this.prevF32Data?this.prevF32Data.slice(this.prevF32Data.length-this.deltaChunkSize*this.bufferSize/2):null;const h=n.slice(n.length-this.deltaChunkSize*this.bufferSize*2/2,n.length-this.deltaChunkSize*this.bufferSize/2);if(s?.length!==h.length&&(s=null),s)for(let e=0;e<s.length;e++){let t=0;if(e<s.length/3)t=0;else if(e>s.length/3*2)t=1;else{const l=e-s.length/3;t=Math.min(l/(s.length/3),1)}const l=s[e]*(1-t),n=h[e]*t;h[e]=l+n}if(this.playBuffer.length>50)for(console.log("Buffer truncated");this.playBuffer.length>2;)this.playBuffer.shift();let i;for(let e=0;e<h.length;e++){const t=2*e%128;0===t&&(i=new Float32Array(128));const l=h[e],n=e+1<h.length?h[e+1]:h[e];i[t]=l,i[t+1]=(l+n)/2,i.length===t+2&&this.playBuffer.push(i)}this.prevF32Data=n}handleMessage_(e){const t=e.data.data,l=new Int16Array(t),n=new Float32Array(l.length);l.forEach(((e,t)=>{const l=e>=32768?-(65536-e)/32768:e/32767;n[t]=l}));let s=this.prevF32Data?this.prevF32Data.slice(this.prevF32Data.length/2):null;const h=n.slice(0,n.length/2);if(s?.length!==h.length&&(s=null),s)for(let e=0;e<s.length;e++){let t=0;if(e<s.length/3)t=0;else if(e>s.length/3*2)t=1;else{const l=e-s.length/3;t=Math.min(l/(s.length/100),1)}const l=s[e]*(1-t),n=h[e]*t;h[e]=l+n}if(this.playBuffer.length>100)for(console.log("Buffer truncated");this.playBuffer.length>2;)this.playBuffer.shift();let i;for(let e=0;e<h.length;e++){const t=2*e%128;0===t&&(i=new Float32Array(128));const l=h[e],n=e+1<h.length?h[e+1]:h[e];i[t]=l,i[t+1]=(l+n)/2,i.length===t+2&&this.playBuffer.push(i)}this.prevF32Data=n}process(e,t,l){if(!this.initialized)return console.log("worklet_process not ready"),!0;if(0===this.playBuffer.length)return console.log("no play buffer"),!0;const n=this.playBuffer.shift();return t[0][0].set(n),!0}}registerProcessor("voice-player-worklet-processor",e)})();

File diff suppressed because one or more lines are too long

View File

@ -2,7 +2,7 @@
# 参考:https://programwiz.org/2022/03/22/how-to-write-shell-script-for-option-parsing/ # 参考:https://programwiz.org/2022/03/22/how-to-write-shell-script-for-option-parsing/
DOCKER_IMAGE=dannadori/voice-changer:20220829_110113 DOCKER_IMAGE=dannadori/voice-changer:20220831_151141
TENSORBOARD_PORT=6006 TENSORBOARD_PORT=6006
VOICE_CHANGER_PORT=8080 VOICE_CHANGER_PORT=8080

View File

@ -1,8 +1,10 @@
{ {
"app_title": "voice-changer", "app_title": "voice-changer",
"majar_mode": "docker",
"voice_changer_server_url": "http://localhost:8080/test", "voice_changer_server_url": "http://localhost:8080/test",
"sample_rate": 48000, "sample_rate": 48000,
"buffer_size": 1024, "buffer_size": 1024,
"prefix_chunk_size": 24,
"chunk_size": 24, "chunk_size": 24,
"speaker_ids": [100, 107, 101, 102, 103], "speaker_ids": [100, 107, 101, 102, 103],
"speaker_names": ["ずんだもん", "user", "そら", "めたん", "つぐみ"], "speaker_names": ["ずんだもん", "user", "そら", "めたん", "つぐみ"],
@ -11,7 +13,7 @@
"vf_enable": true, "vf_enable": true,
"voice_changer_mode": "realtime", "voice_changer_mode": "realtime",
"gpu": 0, "gpu": 0,
"available_gpus": [-1, 0, 1, 2, 3, 4, 5, 100, 200], "available_gpus": [-1, 0, 1, 2, 3, 4],
"avatar": { "avatar": {
"motion_capture_face": true, "motion_capture_face": true,
"motion_capture_upperbody": true, "motion_capture_upperbody": true,
@ -26,6 +28,8 @@
"advance": { "advance": {
"avatar_draw_skip_rate": 3, "avatar_draw_skip_rate": 3,
"screen_draw_skip_rate": 3, "screen_draw_skip_rate": 3,
"visualizer_draw_skip_rate": 3 "visualizer_draw_skip_rate": 3,
"cross_fade_lower_value": 0.1,
"cross_fade_overlap_rate": 0.03
} }
} }

View File

@ -0,0 +1,35 @@
{
"app_title": "voice-changer",
"majar_mode": "colab",
"voice_changer_server_url": "http://localhost:8080/test",
"sample_rate": 48000,
"buffer_size": 1024,
"prefix_chunk_size": 24,
"chunk_size": 24,
"speaker_ids": [100, 107, 101, 102, 103],
"speaker_names": ["ずんだもん", "user", "そら", "めたん", "つぐみ"],
"src_id": 107,
"dst_id": 100,
"vf_enable": true,
"voice_changer_mode": "realtime",
"gpu": 0,
"available_gpus": [-1, 0, 1, 2, 3, 4],
"avatar": {
"motion_capture_face": true,
"motion_capture_upperbody": true,
"lip_overwrite_with_voice": true,
"avatar_url": "./assets/vrm/zundamon/zundamon.vrm",
"backgournd_image_url": "./assets/images/bg_natural_sougen.jpg",
"background_color": "#0000dd",
"chroma_key": "#0000dd",
"avatar_canvas_size": [1280, 720],
"screen_canvas_size": [1280, 720]
},
"advance": {
"avatar_draw_skip_rate": 3,
"screen_draw_skip_rate": 3,
"visualizer_draw_skip_rate": 3,
"cross_fade_lower_value": 0.1,
"cross_fade_overlap_rate": 0.03
}
}

View File

@ -1,4 +1,4 @@
FROM dannadori/voice-changer-internal:20220829_105932 as front FROM dannadori/voice-changer-internal:20220831_150941 as front
FROM debian:bullseye-slim as base FROM debian:bullseye-slim as base
ARG DEBIAN_FRONTEND=noninteractive ARG DEBIAN_FRONTEND=noninteractive