WIP:common sample

2023-06-16 18:04:33 +09:00 · 2023-06-16 18:04:33 +09:00 · 435699d387
commit 435699d387
parent 24b55116f2
7 changed files with 31 additions and 303 deletions
--- a/server/const.py
+++ b/server/const.py
@ -107,9 +107,9 @@ def getSampleJsonAndModelIds(mode: RVCSampleMode):
            "https://huggingface.co/wok000/vcclient_model/raw/main/samples_0003_t2.json",
            "https://huggingface.co/wok000/vcclient_model/raw/main/samples_0003_o2.json",
        ], [
-            ("TokinaShigure_o", {"useIndex": True}),
+            # ("TokinaShigure_o", {"useIndex": True}),
-            ("KikotoMahiro_o", {"useIndex": False}),
+            # ("KikotoMahiro_o", {"useIndex": False}),
-            ("Amitaro_o", {"useIndex": False}),
+            # ("Amitaro_o", {"useIndex": False}),
            ("Tsukuyomi-chan_o", {"useIndex": False}),
        ]
    elif mode == "testOfficial":
--- a/server/voice_changer/RVC/ModelSlot.py
+++ b/server/voice_changer/RVC/ModelSlot.py
@ -1,28 +0,0 @@
 from const import EnumInferenceTypes, EnumEmbedderTypes
 from dataclasses import dataclass
@dataclass
 class ModelSlot:
    modelFile: str = ""
    indexFile: str = ""
    defaultTune: int = 0
    defaultIndexRatio: int = 1
    defaultProtect: float = 0.5
    isONNX: bool = False
    modelType: str = EnumInferenceTypes.pyTorchRVC.value
    samplingRate: int = -1
    f0: bool = True
    embChannels: int = 256
    embOutputLayer: int = 9
    useFinalProj: bool = True
    deprecated: bool = False
    embedder: str = EnumEmbedderTypes.hubert.value
    name: str = ""
    description: str = ""
    credit: str = ""
    termsOfUseUrl: str = ""
    sampleId: str = ""
    iconFile: str = ""
--- a/server/voice_changer/RVC/ModelSlotGenerator.py
+++ b/server/voice_changer/RVC/ModelSlotGenerator.py
@ -1,10 +1,11 @@
 from const import EnumEmbedderTypes, EnumInferenceTypes
 from voice_changer.RVC.ModelSlot import ModelSlot
 import torch
 import onnxruntime
 import json
 from data.ModelSlot import ModelSlot
 def _setInfoByPytorch(slot: ModelSlot):
    cpt = torch.load(slot.modelFile, map_location="cpu")
@ -15,22 +16,14 @@ def _setInfoByPytorch(slot: ModelSlot):
        slot.f0 = True if cpt["f0"] == 1 else False
        version = cpt.get("version", "v1")
        if version is None or version == "v1":
-            slot.modelType = (
+            slot.modelType = EnumInferenceTypes.pyTorchRVC.value if slot.f0 else EnumInferenceTypes.pyTorchRVCNono.value
                EnumInferenceTypes.pyTorchRVC.value
                if slot.f0
                else EnumInferenceTypes.pyTorchRVCNono.value
            )
            slot.embChannels = 256
            slot.embOutputLayer = 9
            slot.useFinalProj = True
            slot.embedder = EnumEmbedderTypes.hubert.value
            print("[Voice Changer] Official Model(pyTorch) : v1")
        else:
-            slot.modelType = (
+            slot.modelType = EnumInferenceTypes.pyTorchRVCv2.value if slot.f0 else EnumInferenceTypes.pyTorchRVCv2Nono.value
                EnumInferenceTypes.pyTorchRVCv2.value
                if slot.f0
                else EnumInferenceTypes.pyTorchRVCv2Nono.value
            )
            slot.embChannels = 768
            slot.embOutputLayer = 12
            slot.useFinalProj = False
@ -40,37 +33,21 @@ def _setInfoByPytorch(slot: ModelSlot):
    else:
        # DDPN RVC
        slot.f0 = True if cpt["f0"] == 1 else False
-        slot.modelType = (
+        slot.modelType = EnumInferenceTypes.pyTorchWebUI.value if slot.f0 else EnumInferenceTypes.pyTorchWebUINono.value
            EnumInferenceTypes.pyTorchWebUI.value
            if slot.f0
            else EnumInferenceTypes.pyTorchWebUINono.value
        )
        slot.embChannels = cpt["config"][17]
-        slot.embOutputLayer = (
+        slot.embOutputLayer = cpt["embedder_output_layer"] if "embedder_output_layer" in cpt else 9
            cpt["embedder_output_layer"] if "embedder_output_layer" in cpt else 9
        )
        if slot.embChannels == 256:
            slot.useFinalProj = True
        else:
            slot.useFinalProj = False
        # DDPNモデルの情報を表示
-        if (
+        if slot.embChannels == 256 and slot.embOutputLayer == 9 and slot.useFinalProj is True:
            slot.embChannels == 256
            and slot.embOutputLayer == 9
            and slot.useFinalProj is True
        ):
            print("[Voice Changer] DDPN Model(pyTorch) : Official v1 like")
-        elif (
+        elif slot.embChannels == 768 and slot.embOutputLayer == 12 and slot.useFinalProj is False:
            slot.embChannels == 768
            and slot.embOutputLayer == 12
            and slot.useFinalProj is False
        ):
            print("[Voice Changer] DDPN Model(pyTorch): Official v2 like")
        else:
-            print(
+            print(f"[Voice Changer] DDPN Model(pyTorch): ch:{slot.embChannels}, L:{slot.embOutputLayer}, FP:{slot.useFinalProj}")
                f"[Voice Changer] DDPN Model(pyTorch): ch:{slot.embChannels}, L:{slot.embOutputLayer}, FP:{slot.useFinalProj}"
            )
        slot.embedder = cpt["embedder_name"]
        if slot.embedder.endswith("768"):
@ -91,9 +68,7 @@ def _setInfoByPytorch(slot: ModelSlot):
 def _setInfoByONNX(slot: ModelSlot):
-    tmp_onnx_session = onnxruntime.InferenceSession(
+    tmp_onnx_session = onnxruntime.InferenceSession(slot.modelFile, providers=["CPUExecutionProvider"])
        slot.modelFile, providers=["CPUExecutionProvider"]
    )
    modelmeta = tmp_onnx_session.get_modelmeta()
    try:
        metadata = json.loads(modelmeta.custom_metadata_map["metadata"])
@ -101,16 +76,8 @@ def _setInfoByONNX(slot: ModelSlot):
        # slot.modelType = metadata["modelType"]
        slot.embChannels = metadata["embChannels"]
-        slot.embOutputLayer = (
+        slot.embOutputLayer = metadata["embOutputLayer"] if "embOutputLayer" in metadata else 9
-            metadata["embOutputLayer"] if "embOutputLayer" in metadata else 9
+        slot.useFinalProj = metadata["useFinalProj"] if "useFinalProj" in metadata else True if slot.embChannels == 256 else False
        )
        slot.useFinalProj = (
            metadata["useFinalProj"]
            if "useFinalProj" in metadata
            else True
            if slot.embChannels == 256
            else False
        )
        if slot.embChannels == 256:
            slot.useFinalProj = True
@ -118,22 +85,12 @@ def _setInfoByONNX(slot: ModelSlot):
            slot.useFinalProj = False
        # ONNXモデルの情報を表示
-        if (
+        if slot.embChannels == 256 and slot.embOutputLayer == 9 and slot.useFinalProj is True:
            slot.embChannels == 256
            and slot.embOutputLayer == 9
            and slot.useFinalProj is True
        ):
            print("[Voice Changer] ONNX Model: Official v1 like")
-        elif (
+        elif slot.embChannels == 768 and slot.embOutputLayer == 12 and slot.useFinalProj is False:
            slot.embChannels == 768
            and slot.embOutputLayer == 12
            and slot.useFinalProj is False
        ):
            print("[Voice Changer] ONNX Model: Official v2 like")
        else:
-            print(
+            print(f"[Voice Changer] ONNX Model: ch:{slot.embChannels}, L:{slot.embOutputLayer}, FP:{slot.useFinalProj}")
                f"[Voice Changer] ONNX Model: ch:{slot.embChannels}, L:{slot.embOutputLayer}, FP:{slot.useFinalProj}"
            )
        if "embedder" not in metadata:
            slot.embedder = EnumEmbedderTypes.hubert.value
@ -149,11 +106,7 @@ def _setInfoByONNX(slot: ModelSlot):
        #     raise RuntimeError("[Voice Changer][setInfoByONNX] unknown embedder")
        slot.f0 = metadata["f0"]
-        slot.modelType = (
+        slot.modelType = EnumInferenceTypes.onnxRVC.value if slot.f0 else EnumInferenceTypes.onnxRVCNono.value
            EnumInferenceTypes.onnxRVC.value
            if slot.f0
            else EnumInferenceTypes.onnxRVCNono.value
        )
        slot.samplingRate = metadata["samplingRate"]
        slot.deprecated = False
--- a/server/voice_changer/RVC/RVC.py
+++ b/server/voice_changer/RVC/RVC.py
@ -5,10 +5,8 @@ from typing import cast
 import numpy as np
 import torch
 import torchaudio
-from data.ModelSlot import loadAllSlotInfo
+from data.ModelSlot import RVCModelSlot, loadAllSlotInfo
 from utils.downloader.SampleDownloader import getSampleInfos
 from voice_changer.RVC.ModelSlot import ModelSlot
 from voice_changer.RVC.SampleDownloader import downloadModelFiles
 # avoiding parse arg error in RVC
@ -102,41 +100,16 @@ class RVC:
    def loadModel(self, props: LoadModelParams):
        target_slot_idx = props.slot
        params = props.params
-        slotInfo: ModelSlot = ModelSlot()
+        slotInfo: RVCModelSlot = RVCModelSlot()
        print("loadModel", params)
-        # サンプルが指定されたときはダウンロードしてメタデータをでっちあげる
+        slotInfo.modelFile = params["files"]["rvcModel"]
-        if len(params["sampleId"]) > 0:
+        slotInfo.indexFile = params["files"]["rvcIndex"] if "rvcIndex" in params["files"] else None
            sampleId = params["sampleId"]
            sampleInfo = self.getSampleInfo(sampleId)
            useIndex = params["rvcIndexDownload"]
            if sampleInfo is None:
                print("[Voice Changer] sampleInfo is None")
                return
            modelPath, indexPath, iconPath = downloadModelFiles(sampleInfo, useIndex)
            slotInfo.modelFile = modelPath
            if indexPath is not None:
                slotInfo.indexFile = indexPath
            if iconPath is not None:
                slotInfo.iconFile = iconPath
            slotInfo.sampleId = sampleInfo.id
            slotInfo.credit = sampleInfo.credit
            slotInfo.description = sampleInfo.description
            slotInfo.name = sampleInfo.name
            slotInfo.termsOfUseUrl = sampleInfo.termsOfUseUrl
            # slotInfo.samplingRate = sampleInfo.sampleRate
            # slotInfo.modelType = sampleInfo.modelType
            # slotInfo.f0 = sampleInfo.f0
        else:
            slotInfo.modelFile = params["files"]["rvcModel"]
            slotInfo.indexFile = params["files"]["rvcIndex"] if "rvcIndex" in params["files"] else None
        slotInfo.defaultTune = params["defaultTune"]
        slotInfo.defaultIndexRatio = params["defaultIndexRatio"]
        slotInfo.defaultProtect = params["defaultProtect"]
        slotInfo.voiceChangerType = "RVC"
        slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx")
        if slotInfo.isONNX:
--- a/server/voice_changer/RVC/SampleDownloader.py
+++ b/server/voice_changer/RVC/SampleDownloader.py
@ -1,164 +0,0 @@
 from concurrent.futures import ThreadPoolExecutor
 from dataclasses import asdict
 import os
 from const import RVC_MODEL_DIRNAME, TMP_DIR
 from Downloader import download, download_no_tqdm
 from ModelSample import RVCModelSample, getModelSamples
 import json
 from voice_changer.RVC.ModelSlot import ModelSlot
 from voice_changer.RVC.ModelSlotGenerator import _setInfoByONNX, _setInfoByPytorch
 def checkRvcModelExist(model_dir: str):
    rvcModelDir = os.path.join(model_dir, RVC_MODEL_DIRNAME)
    if not os.path.exists(rvcModelDir):
        return False
    return True
 def downloadInitialSampleModels(sampleJsons: list[str], sampleModelIds: list[str], model_dir: str):
    sampleModels = getModelSamples(sampleJsons, "RVC")
    if sampleModels is None:
        return
    downloadParams = []
    slot_count = 0
    line_num = 0
    for initSampleId in sampleModelIds:
        # 初期サンプルをサーチ
        match = False
        for sample in sampleModels:
            if sample.id == initSampleId[0]:
                match = True
                break
        if match is False:
            print(f"[Voice Changer] initiail sample not found. {initSampleId[0]}")
            continue
        # 検出されたら、、、
        slotInfo: ModelSlot = ModelSlot()
        # sampleParams: Any = {"files": {}}
        slotDir = os.path.join(model_dir, RVC_MODEL_DIRNAME, str(slot_count))
        os.makedirs(slotDir, exist_ok=True)
        modelFilePath = os.path.join(
            slotDir,
            os.path.basename(sample.modelUrl),
        )
        downloadParams.append(
            {
                "url": sample.modelUrl,
                "saveTo": modelFilePath,
                "position": line_num,
            }
        )
        slotInfo.modelFile = modelFilePath
        line_num += 1
        if initSampleId[1] is True and hasattr(sample, "indexUrl") and sample.indexUrl != "":
            indexPath = os.path.join(
                slotDir,
                os.path.basename(sample.indexUrl),
            )
            downloadParams.append(
                {
                    "url": sample.indexUrl,
                    "saveTo": indexPath,
                    "position": line_num,
                }
            )
            slotInfo.indexFile = indexPath
            line_num += 1
        if hasattr(sample, "icon") and sample.icon != "":
            iconPath = os.path.join(
                slotDir,
                os.path.basename(sample.icon),
            )
            downloadParams.append(
                {
                    "url": sample.icon,
                    "saveTo": iconPath,
                    "position": line_num,
                }
            )
            slotInfo.iconFile = iconPath
            line_num += 1
        slotInfo.sampleId = sample.id
        slotInfo.credit = sample.credit
        slotInfo.description = sample.description
        slotInfo.name = sample.name
        slotInfo.termsOfUseUrl = sample.termsOfUseUrl
        slotInfo.defaultTune = 0
        slotInfo.defaultIndexRatio = 0.0
        slotInfo.defaultProtect = 0.5
        slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx")
        # この時点ではまだファイルはダウンロードされていない
        # if slotInfo.isONNX:
        #     _setInfoByONNX(slotInfo)
        # else:
        #     _setInfoByPytorch(slotInfo)
        json.dump(asdict(slotInfo), open(os.path.join(slotDir, "params.json"), "w"))
        slot_count += 1
    # ダウンロード
    print("[Voice Changer] Downloading model files...")
    with ThreadPoolExecutor() as pool:
        pool.map(download, downloadParams)
    # メタデータ作成
    print("[Voice Changer] Generating metadata...")
    for slotId in range(slot_count):
        slotDir = os.path.join(model_dir, RVC_MODEL_DIRNAME, str(slotId))
        jsonDict = json.load(open(os.path.join(slotDir, "params.json")))
        slotInfo = ModelSlot(**jsonDict)
        if slotInfo.isONNX:
            _setInfoByONNX(slotInfo)
        else:
            _setInfoByPytorch(slotInfo)
        json.dump(asdict(slotInfo), open(os.path.join(slotDir, "params.json"), "w"))
 def downloadModelFiles(sampleInfo: RVCModelSample, useIndex: bool = True):
    downloadParams = []
    modelPath = os.path.join(TMP_DIR, os.path.basename(sampleInfo.modelUrl))
    downloadParams.append(
        {
            "url": sampleInfo.modelUrl,
            "saveTo": modelPath,
            "position": 0,
        }
    )
    indexPath = None
    if useIndex is True and hasattr(sampleInfo, "indexUrl") and sampleInfo.indexUrl != "":
        print("[Voice Changer] Download sample with index.")
        indexPath = os.path.join(TMP_DIR, os.path.basename(sampleInfo.indexUrl))
        downloadParams.append(
            {
                "url": sampleInfo.indexUrl,
                "saveTo": indexPath,
                "position": 1,
            }
        )
    iconPath = None
    if hasattr(sampleInfo, "icon") and sampleInfo.icon != "":
        iconPath = os.path.join(TMP_DIR, os.path.basename(sampleInfo.icon))
        downloadParams.append(
            {
                "url": sampleInfo.icon,
                "saveTo": iconPath,
                "position": 2,
            }
        )
    print("[Voice Changer] Downloading model files...", end="")
    with ThreadPoolExecutor() as pool:
        pool.map(download_no_tqdm, downloadParams)
    print("")
    return modelPath, indexPath, iconPath
--- a/server/voice_changer/RVC/onnxExporter/export2onnx.py
+++ b/server/voice_changer/RVC/onnxExporter/export2onnx.py
@ -4,7 +4,7 @@ import torch
 from onnxsim import simplify
 import onnx
 from const import TMP_DIR, EnumInferenceTypes
-from voice_changer.RVC.ModelSlot import ModelSlot
+from data.ModelSlot import ModelSlot
 from voice_changer.RVC.deviceManager.DeviceManager import DeviceManager
 from voice_changer.RVC.onnxExporter.SynthesizerTrnMs256NSFsid_ONNX import (
    SynthesizerTrnMs256NSFsid_ONNX,
@ -30,9 +30,7 @@ def export2onnx(gpu: int, modelSlot: ModelSlot):
    modelFile = modelSlot.modelFile
    output_file = os.path.splitext(os.path.basename(modelFile))[0] + ".onnx"
-    output_file_simple = (
+    output_file_simple = os.path.splitext(os.path.basename(modelFile))[0] + "_simple.onnx"
        os.path.splitext(os.path.basename(modelFile))[0] + "_simple.onnx"
    )
    output_path = os.path.join(TMP_DIR, output_file)
    output_path_simple = os.path.join(TMP_DIR, output_file_simple)
    metadata = {
@ -52,9 +50,7 @@ def export2onnx(gpu: int, modelSlot: ModelSlot):
    if gpuMomory > 0:
        _export2onnx(modelFile, output_path, output_path_simple, True, metadata)
    else:
-        print(
+        print("[Voice Changer] Warning!!! onnx export with float32. maybe size is doubled.")
            "[Voice Changer] Warning!!! onnx export with float32. maybe size is doubled."
        )
        _export2onnx(modelFile, output_path, output_path_simple, False, metadata)
    return output_file_simple
--- a/server/voice_changer/RVC/pipeline/PipelineGenerator.py
+++ b/server/voice_changer/RVC/pipeline/PipelineGenerator.py
@ -1,8 +1,8 @@
 import os
 import traceback
 import faiss
 from data.ModelSlot import RVCModelSlot
 from voice_changer.RVC.ModelSlot import ModelSlot
 from voice_changer.RVC.deviceManager.DeviceManager import DeviceManager
 from voice_changer.RVC.embedder.EmbedderManager import EmbedderManager
 from voice_changer.RVC.inferencer.InferencerManager import InferencerManager
@ -10,15 +10,13 @@ from voice_changer.RVC.pipeline.Pipeline import Pipeline
 from voice_changer.RVC.pitchExtractor.PitchExtractorManager import PitchExtractorManager
-def createPipeline(modelSlot: ModelSlot, gpu: int, f0Detector: str):
+def createPipeline(modelSlot: RVCModelSlot, gpu: int, f0Detector: str):
    dev = DeviceManager.get_instance().getDevice(gpu)
    half = DeviceManager.get_instance().halfPrecisionAvailable(gpu)
    # Inferencer 生成
    try:
-        inferencer = InferencerManager.getInferencer(
+        inferencer = InferencerManager.getInferencer(modelSlot.modelType, modelSlot.modelFile, gpu)
            modelSlot.modelType, modelSlot.modelFile, gpu
        )
    except Exception as e:
        print("[Voice Changer] exception! loading inferencer", e)
        traceback.print_exc()
@ -54,7 +52,7 @@ def createPipeline(modelSlot: ModelSlot, gpu: int, f0Detector: str):
    return pipeline
-def _loadIndex(modelSlot: ModelSlot):
+def _loadIndex(modelSlot: RVCModelSlot):
    # Indexのロード
    print("[Voice Changer] Loading index...")
    # ファイル指定がない場合はNone