WIP:common sample

This commit is contained in:
wataru 2023-06-16 18:04:33 +09:00
parent 24b55116f2
commit 435699d387
7 changed files with 31 additions and 303 deletions

View File

@ -107,9 +107,9 @@ def getSampleJsonAndModelIds(mode: RVCSampleMode):
"https://huggingface.co/wok000/vcclient_model/raw/main/samples_0003_t2.json", "https://huggingface.co/wok000/vcclient_model/raw/main/samples_0003_t2.json",
"https://huggingface.co/wok000/vcclient_model/raw/main/samples_0003_o2.json", "https://huggingface.co/wok000/vcclient_model/raw/main/samples_0003_o2.json",
], [ ], [
("TokinaShigure_o", {"useIndex": True}), # ("TokinaShigure_o", {"useIndex": True}),
("KikotoMahiro_o", {"useIndex": False}), # ("KikotoMahiro_o", {"useIndex": False}),
("Amitaro_o", {"useIndex": False}), # ("Amitaro_o", {"useIndex": False}),
("Tsukuyomi-chan_o", {"useIndex": False}), ("Tsukuyomi-chan_o", {"useIndex": False}),
] ]
elif mode == "testOfficial": elif mode == "testOfficial":

View File

@ -1,28 +0,0 @@
from const import EnumInferenceTypes, EnumEmbedderTypes
from dataclasses import dataclass
@dataclass
class ModelSlot:
modelFile: str = ""
indexFile: str = ""
defaultTune: int = 0
defaultIndexRatio: int = 1
defaultProtect: float = 0.5
isONNX: bool = False
modelType: str = EnumInferenceTypes.pyTorchRVC.value
samplingRate: int = -1
f0: bool = True
embChannels: int = 256
embOutputLayer: int = 9
useFinalProj: bool = True
deprecated: bool = False
embedder: str = EnumEmbedderTypes.hubert.value
name: str = ""
description: str = ""
credit: str = ""
termsOfUseUrl: str = ""
sampleId: str = ""
iconFile: str = ""

View File

@ -1,10 +1,11 @@
from const import EnumEmbedderTypes, EnumInferenceTypes from const import EnumEmbedderTypes, EnumInferenceTypes
from voice_changer.RVC.ModelSlot import ModelSlot
import torch import torch
import onnxruntime import onnxruntime
import json import json
from data.ModelSlot import ModelSlot
def _setInfoByPytorch(slot: ModelSlot): def _setInfoByPytorch(slot: ModelSlot):
cpt = torch.load(slot.modelFile, map_location="cpu") cpt = torch.load(slot.modelFile, map_location="cpu")
@ -15,22 +16,14 @@ def _setInfoByPytorch(slot: ModelSlot):
slot.f0 = True if cpt["f0"] == 1 else False slot.f0 = True if cpt["f0"] == 1 else False
version = cpt.get("version", "v1") version = cpt.get("version", "v1")
if version is None or version == "v1": if version is None or version == "v1":
slot.modelType = ( slot.modelType = EnumInferenceTypes.pyTorchRVC.value if slot.f0 else EnumInferenceTypes.pyTorchRVCNono.value
EnumInferenceTypes.pyTorchRVC.value
if slot.f0
else EnumInferenceTypes.pyTorchRVCNono.value
)
slot.embChannels = 256 slot.embChannels = 256
slot.embOutputLayer = 9 slot.embOutputLayer = 9
slot.useFinalProj = True slot.useFinalProj = True
slot.embedder = EnumEmbedderTypes.hubert.value slot.embedder = EnumEmbedderTypes.hubert.value
print("[Voice Changer] Official Model(pyTorch) : v1") print("[Voice Changer] Official Model(pyTorch) : v1")
else: else:
slot.modelType = ( slot.modelType = EnumInferenceTypes.pyTorchRVCv2.value if slot.f0 else EnumInferenceTypes.pyTorchRVCv2Nono.value
EnumInferenceTypes.pyTorchRVCv2.value
if slot.f0
else EnumInferenceTypes.pyTorchRVCv2Nono.value
)
slot.embChannels = 768 slot.embChannels = 768
slot.embOutputLayer = 12 slot.embOutputLayer = 12
slot.useFinalProj = False slot.useFinalProj = False
@ -40,37 +33,21 @@ def _setInfoByPytorch(slot: ModelSlot):
else: else:
# DDPN RVC # DDPN RVC
slot.f0 = True if cpt["f0"] == 1 else False slot.f0 = True if cpt["f0"] == 1 else False
slot.modelType = ( slot.modelType = EnumInferenceTypes.pyTorchWebUI.value if slot.f0 else EnumInferenceTypes.pyTorchWebUINono.value
EnumInferenceTypes.pyTorchWebUI.value
if slot.f0
else EnumInferenceTypes.pyTorchWebUINono.value
)
slot.embChannels = cpt["config"][17] slot.embChannels = cpt["config"][17]
slot.embOutputLayer = ( slot.embOutputLayer = cpt["embedder_output_layer"] if "embedder_output_layer" in cpt else 9
cpt["embedder_output_layer"] if "embedder_output_layer" in cpt else 9
)
if slot.embChannels == 256: if slot.embChannels == 256:
slot.useFinalProj = True slot.useFinalProj = True
else: else:
slot.useFinalProj = False slot.useFinalProj = False
# DDPNモデルの情報を表示 # DDPNモデルの情報を表示
if ( if slot.embChannels == 256 and slot.embOutputLayer == 9 and slot.useFinalProj is True:
slot.embChannels == 256
and slot.embOutputLayer == 9
and slot.useFinalProj is True
):
print("[Voice Changer] DDPN Model(pyTorch) : Official v1 like") print("[Voice Changer] DDPN Model(pyTorch) : Official v1 like")
elif ( elif slot.embChannels == 768 and slot.embOutputLayer == 12 and slot.useFinalProj is False:
slot.embChannels == 768
and slot.embOutputLayer == 12
and slot.useFinalProj is False
):
print("[Voice Changer] DDPN Model(pyTorch): Official v2 like") print("[Voice Changer] DDPN Model(pyTorch): Official v2 like")
else: else:
print( print(f"[Voice Changer] DDPN Model(pyTorch): ch:{slot.embChannels}, L:{slot.embOutputLayer}, FP:{slot.useFinalProj}")
f"[Voice Changer] DDPN Model(pyTorch): ch:{slot.embChannels}, L:{slot.embOutputLayer}, FP:{slot.useFinalProj}"
)
slot.embedder = cpt["embedder_name"] slot.embedder = cpt["embedder_name"]
if slot.embedder.endswith("768"): if slot.embedder.endswith("768"):
@ -91,9 +68,7 @@ def _setInfoByPytorch(slot: ModelSlot):
def _setInfoByONNX(slot: ModelSlot): def _setInfoByONNX(slot: ModelSlot):
tmp_onnx_session = onnxruntime.InferenceSession( tmp_onnx_session = onnxruntime.InferenceSession(slot.modelFile, providers=["CPUExecutionProvider"])
slot.modelFile, providers=["CPUExecutionProvider"]
)
modelmeta = tmp_onnx_session.get_modelmeta() modelmeta = tmp_onnx_session.get_modelmeta()
try: try:
metadata = json.loads(modelmeta.custom_metadata_map["metadata"]) metadata = json.loads(modelmeta.custom_metadata_map["metadata"])
@ -101,16 +76,8 @@ def _setInfoByONNX(slot: ModelSlot):
# slot.modelType = metadata["modelType"] # slot.modelType = metadata["modelType"]
slot.embChannels = metadata["embChannels"] slot.embChannels = metadata["embChannels"]
slot.embOutputLayer = ( slot.embOutputLayer = metadata["embOutputLayer"] if "embOutputLayer" in metadata else 9
metadata["embOutputLayer"] if "embOutputLayer" in metadata else 9 slot.useFinalProj = metadata["useFinalProj"] if "useFinalProj" in metadata else True if slot.embChannels == 256 else False
)
slot.useFinalProj = (
metadata["useFinalProj"]
if "useFinalProj" in metadata
else True
if slot.embChannels == 256
else False
)
if slot.embChannels == 256: if slot.embChannels == 256:
slot.useFinalProj = True slot.useFinalProj = True
@ -118,22 +85,12 @@ def _setInfoByONNX(slot: ModelSlot):
slot.useFinalProj = False slot.useFinalProj = False
# ONNXモデルの情報を表示 # ONNXモデルの情報を表示
if ( if slot.embChannels == 256 and slot.embOutputLayer == 9 and slot.useFinalProj is True:
slot.embChannels == 256
and slot.embOutputLayer == 9
and slot.useFinalProj is True
):
print("[Voice Changer] ONNX Model: Official v1 like") print("[Voice Changer] ONNX Model: Official v1 like")
elif ( elif slot.embChannels == 768 and slot.embOutputLayer == 12 and slot.useFinalProj is False:
slot.embChannels == 768
and slot.embOutputLayer == 12
and slot.useFinalProj is False
):
print("[Voice Changer] ONNX Model: Official v2 like") print("[Voice Changer] ONNX Model: Official v2 like")
else: else:
print( print(f"[Voice Changer] ONNX Model: ch:{slot.embChannels}, L:{slot.embOutputLayer}, FP:{slot.useFinalProj}")
f"[Voice Changer] ONNX Model: ch:{slot.embChannels}, L:{slot.embOutputLayer}, FP:{slot.useFinalProj}"
)
if "embedder" not in metadata: if "embedder" not in metadata:
slot.embedder = EnumEmbedderTypes.hubert.value slot.embedder = EnumEmbedderTypes.hubert.value
@ -149,11 +106,7 @@ def _setInfoByONNX(slot: ModelSlot):
# raise RuntimeError("[Voice Changer][setInfoByONNX] unknown embedder") # raise RuntimeError("[Voice Changer][setInfoByONNX] unknown embedder")
slot.f0 = metadata["f0"] slot.f0 = metadata["f0"]
slot.modelType = ( slot.modelType = EnumInferenceTypes.onnxRVC.value if slot.f0 else EnumInferenceTypes.onnxRVCNono.value
EnumInferenceTypes.onnxRVC.value
if slot.f0
else EnumInferenceTypes.onnxRVCNono.value
)
slot.samplingRate = metadata["samplingRate"] slot.samplingRate = metadata["samplingRate"]
slot.deprecated = False slot.deprecated = False

View File

@ -5,10 +5,8 @@ from typing import cast
import numpy as np import numpy as np
import torch import torch
import torchaudio import torchaudio
from data.ModelSlot import loadAllSlotInfo from data.ModelSlot import RVCModelSlot, loadAllSlotInfo
from utils.downloader.SampleDownloader import getSampleInfos from utils.downloader.SampleDownloader import getSampleInfos
from voice_changer.RVC.ModelSlot import ModelSlot
from voice_changer.RVC.SampleDownloader import downloadModelFiles
# avoiding parse arg error in RVC # avoiding parse arg error in RVC
@ -102,41 +100,16 @@ class RVC:
def loadModel(self, props: LoadModelParams): def loadModel(self, props: LoadModelParams):
target_slot_idx = props.slot target_slot_idx = props.slot
params = props.params params = props.params
slotInfo: ModelSlot = ModelSlot() slotInfo: RVCModelSlot = RVCModelSlot()
print("loadModel", params) print("loadModel", params)
# サンプルが指定されたときはダウンロードしてメタデータをでっちあげる slotInfo.modelFile = params["files"]["rvcModel"]
if len(params["sampleId"]) > 0: slotInfo.indexFile = params["files"]["rvcIndex"] if "rvcIndex" in params["files"] else None
sampleId = params["sampleId"]
sampleInfo = self.getSampleInfo(sampleId)
useIndex = params["rvcIndexDownload"]
if sampleInfo is None:
print("[Voice Changer] sampleInfo is None")
return
modelPath, indexPath, iconPath = downloadModelFiles(sampleInfo, useIndex)
slotInfo.modelFile = modelPath
if indexPath is not None:
slotInfo.indexFile = indexPath
if iconPath is not None:
slotInfo.iconFile = iconPath
slotInfo.sampleId = sampleInfo.id
slotInfo.credit = sampleInfo.credit
slotInfo.description = sampleInfo.description
slotInfo.name = sampleInfo.name
slotInfo.termsOfUseUrl = sampleInfo.termsOfUseUrl
# slotInfo.samplingRate = sampleInfo.sampleRate
# slotInfo.modelType = sampleInfo.modelType
# slotInfo.f0 = sampleInfo.f0
else:
slotInfo.modelFile = params["files"]["rvcModel"]
slotInfo.indexFile = params["files"]["rvcIndex"] if "rvcIndex" in params["files"] else None
slotInfo.defaultTune = params["defaultTune"] slotInfo.defaultTune = params["defaultTune"]
slotInfo.defaultIndexRatio = params["defaultIndexRatio"] slotInfo.defaultIndexRatio = params["defaultIndexRatio"]
slotInfo.defaultProtect = params["defaultProtect"] slotInfo.defaultProtect = params["defaultProtect"]
slotInfo.voiceChangerType = "RVC"
slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx") slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx")
if slotInfo.isONNX: if slotInfo.isONNX:

View File

@ -1,164 +0,0 @@
from concurrent.futures import ThreadPoolExecutor
from dataclasses import asdict
import os
from const import RVC_MODEL_DIRNAME, TMP_DIR
from Downloader import download, download_no_tqdm
from ModelSample import RVCModelSample, getModelSamples
import json
from voice_changer.RVC.ModelSlot import ModelSlot
from voice_changer.RVC.ModelSlotGenerator import _setInfoByONNX, _setInfoByPytorch
def checkRvcModelExist(model_dir: str):
rvcModelDir = os.path.join(model_dir, RVC_MODEL_DIRNAME)
if not os.path.exists(rvcModelDir):
return False
return True
def downloadInitialSampleModels(sampleJsons: list[str], sampleModelIds: list[str], model_dir: str):
sampleModels = getModelSamples(sampleJsons, "RVC")
if sampleModels is None:
return
downloadParams = []
slot_count = 0
line_num = 0
for initSampleId in sampleModelIds:
# 初期サンプルをサーチ
match = False
for sample in sampleModels:
if sample.id == initSampleId[0]:
match = True
break
if match is False:
print(f"[Voice Changer] initiail sample not found. {initSampleId[0]}")
continue
# 検出されたら、、、
slotInfo: ModelSlot = ModelSlot()
# sampleParams: Any = {"files": {}}
slotDir = os.path.join(model_dir, RVC_MODEL_DIRNAME, str(slot_count))
os.makedirs(slotDir, exist_ok=True)
modelFilePath = os.path.join(
slotDir,
os.path.basename(sample.modelUrl),
)
downloadParams.append(
{
"url": sample.modelUrl,
"saveTo": modelFilePath,
"position": line_num,
}
)
slotInfo.modelFile = modelFilePath
line_num += 1
if initSampleId[1] is True and hasattr(sample, "indexUrl") and sample.indexUrl != "":
indexPath = os.path.join(
slotDir,
os.path.basename(sample.indexUrl),
)
downloadParams.append(
{
"url": sample.indexUrl,
"saveTo": indexPath,
"position": line_num,
}
)
slotInfo.indexFile = indexPath
line_num += 1
if hasattr(sample, "icon") and sample.icon != "":
iconPath = os.path.join(
slotDir,
os.path.basename(sample.icon),
)
downloadParams.append(
{
"url": sample.icon,
"saveTo": iconPath,
"position": line_num,
}
)
slotInfo.iconFile = iconPath
line_num += 1
slotInfo.sampleId = sample.id
slotInfo.credit = sample.credit
slotInfo.description = sample.description
slotInfo.name = sample.name
slotInfo.termsOfUseUrl = sample.termsOfUseUrl
slotInfo.defaultTune = 0
slotInfo.defaultIndexRatio = 0.0
slotInfo.defaultProtect = 0.5
slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx")
# この時点ではまだファイルはダウンロードされていない
# if slotInfo.isONNX:
# _setInfoByONNX(slotInfo)
# else:
# _setInfoByPytorch(slotInfo)
json.dump(asdict(slotInfo), open(os.path.join(slotDir, "params.json"), "w"))
slot_count += 1
# ダウンロード
print("[Voice Changer] Downloading model files...")
with ThreadPoolExecutor() as pool:
pool.map(download, downloadParams)
# メタデータ作成
print("[Voice Changer] Generating metadata...")
for slotId in range(slot_count):
slotDir = os.path.join(model_dir, RVC_MODEL_DIRNAME, str(slotId))
jsonDict = json.load(open(os.path.join(slotDir, "params.json")))
slotInfo = ModelSlot(**jsonDict)
if slotInfo.isONNX:
_setInfoByONNX(slotInfo)
else:
_setInfoByPytorch(slotInfo)
json.dump(asdict(slotInfo), open(os.path.join(slotDir, "params.json"), "w"))
def downloadModelFiles(sampleInfo: RVCModelSample, useIndex: bool = True):
downloadParams = []
modelPath = os.path.join(TMP_DIR, os.path.basename(sampleInfo.modelUrl))
downloadParams.append(
{
"url": sampleInfo.modelUrl,
"saveTo": modelPath,
"position": 0,
}
)
indexPath = None
if useIndex is True and hasattr(sampleInfo, "indexUrl") and sampleInfo.indexUrl != "":
print("[Voice Changer] Download sample with index.")
indexPath = os.path.join(TMP_DIR, os.path.basename(sampleInfo.indexUrl))
downloadParams.append(
{
"url": sampleInfo.indexUrl,
"saveTo": indexPath,
"position": 1,
}
)
iconPath = None
if hasattr(sampleInfo, "icon") and sampleInfo.icon != "":
iconPath = os.path.join(TMP_DIR, os.path.basename(sampleInfo.icon))
downloadParams.append(
{
"url": sampleInfo.icon,
"saveTo": iconPath,
"position": 2,
}
)
print("[Voice Changer] Downloading model files...", end="")
with ThreadPoolExecutor() as pool:
pool.map(download_no_tqdm, downloadParams)
print("")
return modelPath, indexPath, iconPath

View File

@ -4,7 +4,7 @@ import torch
from onnxsim import simplify from onnxsim import simplify
import onnx import onnx
from const import TMP_DIR, EnumInferenceTypes from const import TMP_DIR, EnumInferenceTypes
from voice_changer.RVC.ModelSlot import ModelSlot from data.ModelSlot import ModelSlot
from voice_changer.RVC.deviceManager.DeviceManager import DeviceManager from voice_changer.RVC.deviceManager.DeviceManager import DeviceManager
from voice_changer.RVC.onnxExporter.SynthesizerTrnMs256NSFsid_ONNX import ( from voice_changer.RVC.onnxExporter.SynthesizerTrnMs256NSFsid_ONNX import (
SynthesizerTrnMs256NSFsid_ONNX, SynthesizerTrnMs256NSFsid_ONNX,
@ -30,9 +30,7 @@ def export2onnx(gpu: int, modelSlot: ModelSlot):
modelFile = modelSlot.modelFile modelFile = modelSlot.modelFile
output_file = os.path.splitext(os.path.basename(modelFile))[0] + ".onnx" output_file = os.path.splitext(os.path.basename(modelFile))[0] + ".onnx"
output_file_simple = ( output_file_simple = os.path.splitext(os.path.basename(modelFile))[0] + "_simple.onnx"
os.path.splitext(os.path.basename(modelFile))[0] + "_simple.onnx"
)
output_path = os.path.join(TMP_DIR, output_file) output_path = os.path.join(TMP_DIR, output_file)
output_path_simple = os.path.join(TMP_DIR, output_file_simple) output_path_simple = os.path.join(TMP_DIR, output_file_simple)
metadata = { metadata = {
@ -52,9 +50,7 @@ def export2onnx(gpu: int, modelSlot: ModelSlot):
if gpuMomory > 0: if gpuMomory > 0:
_export2onnx(modelFile, output_path, output_path_simple, True, metadata) _export2onnx(modelFile, output_path, output_path_simple, True, metadata)
else: else:
print( print("[Voice Changer] Warning!!! onnx export with float32. maybe size is doubled.")
"[Voice Changer] Warning!!! onnx export with float32. maybe size is doubled."
)
_export2onnx(modelFile, output_path, output_path_simple, False, metadata) _export2onnx(modelFile, output_path, output_path_simple, False, metadata)
return output_file_simple return output_file_simple

View File

@ -1,8 +1,8 @@
import os import os
import traceback import traceback
import faiss import faiss
from data.ModelSlot import RVCModelSlot
from voice_changer.RVC.ModelSlot import ModelSlot
from voice_changer.RVC.deviceManager.DeviceManager import DeviceManager from voice_changer.RVC.deviceManager.DeviceManager import DeviceManager
from voice_changer.RVC.embedder.EmbedderManager import EmbedderManager from voice_changer.RVC.embedder.EmbedderManager import EmbedderManager
from voice_changer.RVC.inferencer.InferencerManager import InferencerManager from voice_changer.RVC.inferencer.InferencerManager import InferencerManager
@ -10,15 +10,13 @@ from voice_changer.RVC.pipeline.Pipeline import Pipeline
from voice_changer.RVC.pitchExtractor.PitchExtractorManager import PitchExtractorManager from voice_changer.RVC.pitchExtractor.PitchExtractorManager import PitchExtractorManager
def createPipeline(modelSlot: ModelSlot, gpu: int, f0Detector: str): def createPipeline(modelSlot: RVCModelSlot, gpu: int, f0Detector: str):
dev = DeviceManager.get_instance().getDevice(gpu) dev = DeviceManager.get_instance().getDevice(gpu)
half = DeviceManager.get_instance().halfPrecisionAvailable(gpu) half = DeviceManager.get_instance().halfPrecisionAvailable(gpu)
# Inferencer 生成 # Inferencer 生成
try: try:
inferencer = InferencerManager.getInferencer( inferencer = InferencerManager.getInferencer(modelSlot.modelType, modelSlot.modelFile, gpu)
modelSlot.modelType, modelSlot.modelFile, gpu
)
except Exception as e: except Exception as e:
print("[Voice Changer] exception! loading inferencer", e) print("[Voice Changer] exception! loading inferencer", e)
traceback.print_exc() traceback.print_exc()
@ -54,7 +52,7 @@ def createPipeline(modelSlot: ModelSlot, gpu: int, f0Detector: str):
return pipeline return pipeline
def _loadIndex(modelSlot: ModelSlot): def _loadIndex(modelSlot: RVCModelSlot):
# Indexのロード # Indexのロード
print("[Voice Changer] Loading index...") print("[Voice Changer] Loading index...")
# ファイル指定がない場合はNone # ファイル指定がない場合はNone