wokada-voice-changer/w_okada's_Voice_Changer_version_2_x.ipynb

{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/w-okada/voice-changer/blob/v.2/w_okada's_Voice_Changer_version_2_x.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "wNCGmSXbfZRr"
      },
      "source": [
        "### w-okada's Voice Changer version 2.x | **Google Colab**\n",
        "\n",
        "## READ ME - VERY IMPORTANT\n",
        "This is an attempt to run [Realtime Voice Changer](https://github.com/w-okada/voice-changer) on Google Colab, still not perfect but is totally usable, you can use the following settings for better results:\n",
        "\n",
        "If you're using a index: `f0: RMVPE_ONNX | Chunk: 24000 or higher | Extra: 7680`\\\n",
        "If you're not using a index: `f0: RMVPE_ONNX | Chunk: 24000 or higher | Extra: 7680`\\\n",
        "**Don't forget to select your Colab GPU in the GPU field (<b>Tesla T4</b>, for free users)*\n",
        "> Seems that PTH models performance better than ONNX for now, you can still try ONNX models and see if it satisfies you\n",
        "\n",
        "\n",
        "*You can always [click here](https://rentry.co/VoiceChangerGuide#gpu-chart-for-known-working-chunkextra\n",
        ") to check if these settings are up-to-date*\n",
        "<br><br>\n",
        "\n",
        "---\n",
        "\n",
        "###Always use Colab GPU (**VERY VERY VERY IMPORTANT!**)\n",
        "You need to use a Colab GPU so the Voice Changer can work faster and better\\\n",
        "Use the menu above and click on **Runtime** » **Change runtime** » **Hardware acceleration** to select a GPU (**T4 is the free one**)\n",
        "\n",
        "---\n",
        "\n",
        "\n",
        "# **Credits and Support**\n",
        "Realtime Voice Changer by [w-okada](https://github.com/w-okada)\\\n",
        "Colab files updated by [rafacasari](https://github.com/Rafacasari)\\\n",
        "Recommended settings by [Raven](https://github.com/ravencutie21)\\\n",
        "Modified again by [Hina](https://github.com/HinaBl)\\\n",
        "Enable FCPE by [TheTrustedComputer](https://github.com/TheTrustedComputer)\n",
        "\n",
        "Need help? [AI Hub Discord](https://discord.gg/aihub) » ***#help-realtime-vc***\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 1,
      "metadata": {
        "id": "W2GYWTHWmRIY",
        "outputId": "e4ce3296-81f9-4004-daf0-219a2977077b",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "cellView": "form"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "GPU is available\n",
            "GPU Name: Tesla T4\n",
            "Welcome to ColabMod\n",
            "Mounted at /content/drive\n",
            "Checking latest version...\n",
            "current_version_hash: eae1b7592ec7c44b88730d2044b7bd9a\n",
            "latest_version_hash : de736169a1c6213e63479eda109c1baf\n",
            "hash not match -> download latest version\n",
            "  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current\n",
            "                                 Dload  Upload   Total   Spent    Left  Speed\n",
            "100  1161  100  1161    0     0   6715      0 --:--:-- --:--:-- --:--:--  6750\n",
            "100 3028M  100 3028M    0     0  23.8M      0  0:02:07  0:02:07 --:--:-- 53.9M\n",
            "Download is done.\n",
            "/content/drive/MyDrive/vcclient\n",
            "Installing modules... Install is done.\n"
          ]
        }
      ],
      "source": [
        "#=================Updated=================\n",
        "# @title **[1]** Clone repository and install dependencies\n",
        "# @markdown This first step will download the latest version of Voice Changer and install the dependencies. **It can take some time to complete.(~5min)**\n",
        "\n",
        "#@markdown ---\n",
        "# @markdown By using Google Drive, you can avoid re-downloading already downloaded versions.\n",
        "\n",
        "\n",
        "import os\n",
        "import time\n",
        "import subprocess\n",
        "import threading\n",
        "import shutil\n",
        "import base64\n",
        "import codecs\n",
        "import torch\n",
        "import sys\n",
        "\n",
        "from typing import Literal, TypeAlias\n",
        "\n",
        "Mode: TypeAlias = Literal[\"elf\", \"zip\"]\n",
        "mode:Mode=\"elf\"\n",
        "\n",
        "# Configs\n",
        "Run_Cell=0\n",
        "Use_Drive=True #@param {type:\"boolean\"}\n",
        "\n",
        "current_version_hash=None\n",
        "latest_version_hash=None\n",
        "\n",
        "# Check GPU\n",
        "if torch.cuda.is_available():\n",
        "    print(\"GPU is available\")\n",
        "    print(\"GPU Name:\", torch.cuda.get_device_name(0))\n",
        "else:\n",
        "    print(\"GPU is not available\")\n",
        "    # sys.exit(\"No GPU available. Change runtime.\")\n",
        "\n",
        "\n",
        "notebook_env=0\n",
        "if os.path.exists('/content'):\n",
        "  notebook_env=1\n",
        "  print(\"Welcome to ColabMod\")\n",
        "  from google.colab import drive\n",
        "\n",
        "elif os.path.exists('/kaggle/working'):\n",
        "  notebook_env=2\n",
        "  print(\"Welcome to Kaggle Mod\")\n",
        "else:\n",
        "  notebook_env=3\n",
        "  print(\"Welcome!\")\n",
        "\n",
        "from IPython.display import clear_output, Javascript\n",
        "\n",
        "if notebook_env==1 and Use_Drive==True:\n",
        "  work_dir = \"/content/drive/MyDrive/vcclient\"\n",
        "  if not os.path.exists('/content/drive'):\n",
        "    drive.mount('/content/drive')\n",
        "\n",
        "  if not os.path.exists(work_dir):\n",
        "    !mkdir -p {work_dir}\n",
        "\n",
        "  print(\"Checking latest version...\")\n",
        "  if os.path.exists(f'{work_dir}/latest_hash.txt'):\n",
        "    current_version_hash = open(f'{work_dir}/latest_hash.txt').read().strip()\n",
        "  else:\n",
        "    current_version_hash = None\n",
        "\n",
        "  !curl -s -L https://huggingface.co/wok000/vcclient000_colab/resolve/main/latest_hash.txt -o latest_hash.txt\n",
        "  latest_version_hash = open('latest_hash.txt').read().strip()\n",
        "\n",
        "  print(f\"current_version_hash: {current_version_hash}\")\n",
        "  print(f\"latest_version_hash : {latest_version_hash}\")\n",
        "\n",
        "  if current_version_hash != latest_version_hash:\n",
        "    print(f\"hash not match -> download latest version\")\n",
        "\n",
        "    latest_hash_path=f'{work_dir}/latest_hash.txt'\n",
        "\n",
        "    if mode == \"elf\":\n",
        "      !curl -L https://huggingface.co/wok000/vcclient000_colab/resolve/main/vcclient_latest_for_colab -o {work_dir}/vcclient_latest_for_colab\n",
        "    elif mode == \"zip\":\n",
        "      !curl -L https://huggingface.co/wok000/vcclient000_colab/resolve/main/vcclient_latest_for_colab.zip -o {work_dir}/vcclient_latest_for_colab.zip\n",
        "\n",
        "    !cp latest_hash.txt {latest_hash_path}\n",
        "    print(\"Download is done.\")\n",
        "  else:\n",
        "    print(\"hash matched. skip download\")\n",
        "\n",
        "else:\n",
        "  work_dir = \"/content\"\n",
        "  print(\"Downloading the latest vcclient... \")\n",
        "  !curl -s -L https://huggingface.co/wok000/vcclient000_colab/resolve/main/latest_hash.txt -o latest_hash.txt\n",
        "  latest_version_hash = open('latest_hash.txt').read().strip()\n",
        "\n",
        "  if mode == \"elf\":\n",
        "    !curl -L https://huggingface.co/wok000/vcclient000_colab/resolve/main/vcclient_latest_for_colab -o {work_dir}/vcclient_latest_for_colab\n",
        "  elif mode == \"zip\":\n",
        "    !curl -L https://huggingface.co/wok000/vcclient000_colab/resolve/main/vcclient_latest_for_colab.zip -o {work_dir}/vcclient_latest_for_colab.zip\n",
        "\n",
        "  print(\"Download is done.\")\n",
        "\n",
        "if current_version_hash != latest_version_hash and mode == \"zip\":\n",
        "  print(f\"Unzip vcclient to {latest_version_hash} ... \")\n",
        "  !cd {work_dir} && unzip -q vcclient_latest_for_colab.zip -d {latest_version_hash}\n",
        "  print(f\"Unzip is done.\")\n",
        "\n",
        "if mode == \"elf\":\n",
        "  %cd {work_dir}\n",
        "  !chmod 0700 vcclient_latest_for_colab\n",
        "elif mode == \"zip\":\n",
        "  %cd {work_dir}/{latest_version_hash}/main\n",
        "  !chmod 0700 main\n",
        "\n",
        "print(\"Installing modules... \",end=\"\")\n",
        "!sudo apt-get install -y libportaudio2 > /dev/null 2>&1\n",
        "!pip install  pyngrok > /dev/null 2>&1\n",
        "print(\"Install is done.\")\n",
        "\n",
        "Run_Cell=1\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 2,
      "metadata": {
        "id": "s7mYqKtW6VOI",
        "outputId": "e86ad79b-cb62-4ea5-a8d9-dc7b9ca9e6ed",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 148
        },
        "cellView": "form"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "--------- SERVER READY! ---------\n",
            "Your server is available. elapsed: 173sec\n",
            "https://2bj201er26o-496ff2e9c6d22116-8003-colab.googleusercontent.com/\n",
            "---------------------------------\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "<IPython.lib.display.Audio object>"
            ],
            "text/html": [
              "\n",
              "                <audio  controls=\"controls\" autoplay=\"autoplay\">\n",
              "                    <source src=\"https://huggingface.co/wok000/voices/resolve/main/vcclient001_vctk229_gpt-sovits_vcclient-ready.wav\" type=\"audio/x-wav\" />\n",
              "                    Your browser does not support the audio element.\n",
              "                </audio>\n",
              "              "
            ]
          },
          "metadata": {}
        }
      ],
      "source": [
        "# @title **[2]** Start server\n",
        "# @markdown This cell will start the server, the first time that you run it will download the models, so it can take a while (2~4 minutes)\n",
        "\n",
        "#@markdown - Options:\n",
        "ClearConsole = True  # @param {type:\"boolean\"}\n",
        "Play_Notification = True  # @param {type:\"boolean\"}\n",
        "\n",
        "PORT=8003\n",
        "\n",
        "LOG_FILE = f\"/content/LOG_FILE_{PORT}\"\n",
        "\n",
        "from IPython.display import Audio, display\n",
        "def play_notification_sound(url):\n",
        "    display(Audio(url=url, autoplay=True))\n",
        "\n",
        "from google.colab.output import eval_js\n",
        "\n",
        "\n",
        "if mode == \"elf\":\n",
        "  # !LD_LIBRARY_PATH=/usr/lib64-nvidia:/usr/lib/x86_64-linux-gnu ./vcclient_latest_for_colab cui --port {PORT} --no_cui true &\n",
        "\n",
        "  get_ipython().system_raw(f'LD_LIBRARY_PATH=/usr/lib64-nvidia:/usr/lib/x86_64-linux-gnu ./vcclient_latest_for_colab cui --port {PORT} --no_cui true >{LOG_FILE} 2>&1 &')\n",
        "elif mode == \"zip\":\n",
        "  !LD_LIBRARY_PATH=/usr/lib64-nvidia:/usr/lib/x86_64-linux-gnu ./main cui --port {PORT} --no_cui true &\n",
        "\n",
        "\n",
        "import socket\n",
        "def wait_for_server():\n",
        "  elapsed_time = 0\n",
        "  start_time = time.time()\n",
        "\n",
        "\n",
        "  while True:\n",
        "      time.sleep(1)\n",
        "      sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)\n",
        "      result = sock.connect_ex(('127.0.0.1', PORT))\n",
        "      if result == 0:\n",
        "          break\n",
        "      sock.close()\n",
        "      # 時刻を出力\n",
        "      current_time = time.time()\n",
        "      elapsed_time = int(current_time - start_time)\n",
        "      clear_output(wait=True)\n",
        "      print(f\"Waiting for server... elapsed: {elapsed_time}sec\")\n",
        "      try:\n",
        "        with open(LOG_FILE, 'r') as f:\n",
        "            lines = f.readlines()[-5:]\n",
        "            for line in lines:\n",
        "                print(line.strip())\n",
        "      except:\n",
        "        pass\n",
        "\n",
        "  if ClearConsole:\n",
        "      clear_output()\n",
        "  print(\"--------- SERVER READY! ---------\")\n",
        "  print(f\"Your server is available. elapsed: {elapsed_time}sec\")\n",
        "  proxy = eval_js( \"google.colab.kernel.proxyPort(\" + str(PORT) + \")\" )\n",
        "  print(f\"{proxy}\")\n",
        "  print(\"---------------------------------\")\n",
        "  if Play_Notification==True:\n",
        "    play_notification_sound('https://huggingface.co/wok000/voices/resolve/main/vcclient001_vctk229_gpt-sovits_vcclient-ready.wav')\n",
        "wait_for_server()\n"
      ]
    }
  ],
  "metadata": {
    "colab": {
      "provenance": [],
      "gpuType": "T4",
      "authorship_tag": "ABX9TyO4xBCjHQ5g0B28Cfbnr1eo",
      "include_colab_link": true
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    },
    "language_info": {
      "name": "python"
    },
    "accelerator": "GPU"
  },
  "nbformat": 4,
  "nbformat_minor": 0
}