WIP: Japanese Hubert
This commit is contained in:
parent
bfb2de9ea1
commit
48846aad7f
@ -8,6 +8,11 @@ class NoModeLoadedException(Exception):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class HalfPrecisionChangingException(Exception):
|
||||||
|
def __str__(self):
|
||||||
|
return repr("HalfPrecision related exception.")
|
||||||
|
|
||||||
|
|
||||||
class ONNXInputArgumentException(Exception):
|
class ONNXInputArgumentException(Exception):
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return repr("ONNX received invalid argument.")
|
return repr("ONNX received invalid argument.")
|
||||||
|
@ -48,6 +48,15 @@ def _setInfoByPytorch(slot: ModelSlot, file: str):
|
|||||||
if slot.embedder.endswith("768"):
|
if slot.embedder.endswith("768"):
|
||||||
slot.embedder = slot.embedder[:-3]
|
slot.embedder = slot.embedder[:-3]
|
||||||
|
|
||||||
|
if slot.embedder == EnumEmbedderTypes.hubert.value:
|
||||||
|
slot.embedder = EnumEmbedderTypes.hubert
|
||||||
|
elif slot.embedder == EnumEmbedderTypes.contentvec.value:
|
||||||
|
slot.embedder = EnumEmbedderTypes.contentvec
|
||||||
|
elif slot.embedder == EnumEmbedderTypes.hubert_jp.value:
|
||||||
|
slot.embedder = EnumEmbedderTypes.hubert_jp
|
||||||
|
else:
|
||||||
|
raise RuntimeError("[Voice Changer][setInfoByONNX] unknown embedder")
|
||||||
|
|
||||||
slot.samplingRate = cpt["config"][-1]
|
slot.samplingRate = cpt["config"][-1]
|
||||||
|
|
||||||
del cpt
|
del cpt
|
||||||
@ -63,9 +72,18 @@ def _setInfoByONNX(slot: ModelSlot, file: str):
|
|||||||
|
|
||||||
slot.modelType = metadata["modelType"]
|
slot.modelType = metadata["modelType"]
|
||||||
slot.embChannels = metadata["embChannels"]
|
slot.embChannels = metadata["embChannels"]
|
||||||
slot.embedder = (
|
|
||||||
metadata["embedder"] if "embedder" in metadata else EnumEmbedderTypes.hubert
|
if "embedder" not in metadata:
|
||||||
)
|
slot.embedder = EnumEmbedderTypes.hubert
|
||||||
|
elif metadata["embedder"] == EnumEmbedderTypes.hubert.value:
|
||||||
|
slot.embedder = EnumEmbedderTypes.hubert
|
||||||
|
elif metadata["embedder"] == EnumEmbedderTypes.contentvec.value:
|
||||||
|
slot.embedder = EnumEmbedderTypes.contentvec
|
||||||
|
elif metadata["embedder"] == EnumEmbedderTypes.hubert_jp.value:
|
||||||
|
slot.embedder = EnumEmbedderTypes.hubert_jp
|
||||||
|
else:
|
||||||
|
raise RuntimeError("[Voice Changer][setInfoByONNX] unknown embedder")
|
||||||
|
|
||||||
slot.f0 = metadata["f0"]
|
slot.f0 = metadata["f0"]
|
||||||
slot.modelType = (
|
slot.modelType = (
|
||||||
EnumInferenceTypes.onnxRVC if slot.f0 else EnumInferenceTypes.onnxRVCNono
|
EnumInferenceTypes.onnxRVC if slot.f0 else EnumInferenceTypes.onnxRVCNono
|
||||||
@ -73,7 +91,7 @@ def _setInfoByONNX(slot: ModelSlot, file: str):
|
|||||||
slot.samplingRate = metadata["samplingRate"]
|
slot.samplingRate = metadata["samplingRate"]
|
||||||
slot.deprecated = False
|
slot.deprecated = False
|
||||||
|
|
||||||
except:
|
except Exception as e:
|
||||||
slot.modelType = EnumInferenceTypes.onnxRVC
|
slot.modelType = EnumInferenceTypes.onnxRVC
|
||||||
slot.embChannels = 256
|
slot.embChannels = 256
|
||||||
slot.embedder = EnumEmbedderTypes.hubert
|
slot.embedder = EnumEmbedderTypes.hubert
|
||||||
@ -81,6 +99,7 @@ def _setInfoByONNX(slot: ModelSlot, file: str):
|
|||||||
slot.samplingRate = 48000
|
slot.samplingRate = 48000
|
||||||
slot.deprecated = True
|
slot.deprecated = True
|
||||||
|
|
||||||
|
print("[Voice Changer] setInfoByONNX", e)
|
||||||
print("[Voice Changer] ############## !!!! CAUTION !!!! ####################")
|
print("[Voice Changer] ############## !!!! CAUTION !!!! ####################")
|
||||||
print("[Voice Changer] This onnxfie is depricated. Please regenerate onnxfile.")
|
print("[Voice Changer] This onnxfie is depricated. Please regenerate onnxfile.")
|
||||||
print("[Voice Changer] ############## !!!! CAUTION !!!! ####################")
|
print("[Voice Changer] ############## !!!! CAUTION !!!! ####################")
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
|
from voice_changer.RVC.ModelSlot import ModelSlot
|
||||||
from voice_changer.RVC.deviceManager.DeviceManager import DeviceManager
|
from voice_changer.RVC.deviceManager.DeviceManager import DeviceManager
|
||||||
|
|
||||||
from voice_changer.RVC.pitchExtractor.PitchExtractor import PitchExtractor
|
from voice_changer.RVC.pitchExtractor.PitchExtractor import PitchExtractor
|
||||||
@ -22,7 +23,6 @@ import resampy
|
|||||||
from voice_changer.RVC.MergeModel import merge_model
|
from voice_changer.RVC.MergeModel import merge_model
|
||||||
from voice_changer.RVC.MergeModelRequest import MergeModelRequest
|
from voice_changer.RVC.MergeModelRequest import MergeModelRequest
|
||||||
from voice_changer.RVC.ModelSlotGenerator import generateModelSlot
|
from voice_changer.RVC.ModelSlotGenerator import generateModelSlot
|
||||||
from Exceptions import NoModeLoadedException
|
|
||||||
from voice_changer.RVC.RVCSettings import RVCSettings
|
from voice_changer.RVC.RVCSettings import RVCSettings
|
||||||
from voice_changer.RVC.embedder.Embedder import Embedder
|
from voice_changer.RVC.embedder.Embedder import Embedder
|
||||||
from voice_changer.RVC.embedder.EmbedderManager import EmbedderManager
|
from voice_changer.RVC.embedder.EmbedderManager import EmbedderManager
|
||||||
@ -42,7 +42,7 @@ import torch
|
|||||||
import traceback
|
import traceback
|
||||||
import faiss
|
import faiss
|
||||||
|
|
||||||
from const import TMP_DIR, UPLOAD_DIR
|
from const import TMP_DIR, UPLOAD_DIR, EnumEmbedderTypes
|
||||||
|
|
||||||
|
|
||||||
from voice_changer.RVC.custom_vc_infer_pipeline import VC
|
from voice_changer.RVC.custom_vc_infer_pipeline import VC
|
||||||
@ -56,34 +56,29 @@ providers = [
|
|||||||
|
|
||||||
|
|
||||||
class RVC:
|
class RVC:
|
||||||
audio_buffer: AudioInOut | None = None
|
initialLoad: bool = True
|
||||||
|
settings: RVCSettings = RVCSettings()
|
||||||
|
|
||||||
embedder: Embedder | None = None
|
embedder: Embedder | None = None
|
||||||
inferencer: Inferencer | None = None
|
inferencer: Inferencer | None = None
|
||||||
|
|
||||||
pitchExtractor: PitchExtractor | None = None
|
pitchExtractor: PitchExtractor | None = None
|
||||||
deviceManager = DeviceManager.get_instance()
|
deviceManager = DeviceManager.get_instance()
|
||||||
|
|
||||||
|
audio_buffer: AudioInOut | None = None
|
||||||
|
prevVol: float = 0
|
||||||
|
params: VoiceChangerParams
|
||||||
|
currentSlot: int = -1
|
||||||
|
needSwitch: bool = False
|
||||||
|
|
||||||
def __init__(self, params: VoiceChangerParams):
|
def __init__(self, params: VoiceChangerParams):
|
||||||
self.initialLoad = True
|
|
||||||
self.settings = RVCSettings()
|
|
||||||
self.pitchExtractor = PitchExtractorManager.getPitchExtractor(
|
self.pitchExtractor = PitchExtractorManager.getPitchExtractor(
|
||||||
self.settings.f0Detector
|
self.settings.f0Detector
|
||||||
)
|
)
|
||||||
|
|
||||||
self.feature_file = None
|
|
||||||
self.index_file = None
|
|
||||||
|
|
||||||
self.prevVol = 0
|
|
||||||
self.params = params
|
self.params = params
|
||||||
|
|
||||||
self.currentSlot = -1
|
|
||||||
print("RVC initialization: ", params)
|
print("RVC initialization: ", params)
|
||||||
|
|
||||||
def loadModel(self, props: LoadModelParams):
|
def loadModel(self, props: LoadModelParams):
|
||||||
"""
|
|
||||||
loadModelはスロットへのエントリ(推論向けにはロードしない)。
|
|
||||||
例外的に、まだ一つも推論向けにロードされていない場合と稼働中スロットの場合は、ロードする。
|
|
||||||
"""
|
|
||||||
self.is_half = props.isHalf
|
|
||||||
target_slot_idx = props.slot
|
target_slot_idx = props.slot
|
||||||
params_str = props.params
|
params_str = props.params
|
||||||
params = json.loads(params_str)
|
params = json.loads(params_str)
|
||||||
@ -94,167 +89,175 @@ class RVC:
|
|||||||
f"[Voice Changer] RVC new model is uploaded,{target_slot_idx}",
|
f"[Voice Changer] RVC new model is uploaded,{target_slot_idx}",
|
||||||
asdict(modelSlot),
|
asdict(modelSlot),
|
||||||
)
|
)
|
||||||
|
"""
|
||||||
|
[Voice Changer] RVC new model is uploaded,0 {'pyTorchModelFile': 'upload_dir/0/kurage.pth', 'onnxModelFile': None, 'featureFile': None, 'indexFile': None, 'defaultTrans': 16, 'isONNX': False, 'modelType': <EnumInferenceTypes.pyTorchWebUI: 'pyTorchWebUI'>, 'samplingRate': 48000, 'f0': True, 'embChannels': 768, 'deprecated': False, 'embedder': 'hubert-base-japanese'}
|
||||||
|
"""
|
||||||
|
|
||||||
# 初回のみロード
|
# 初回のみロード
|
||||||
if self.initialLoad or target_slot_idx == self.currentSlot:
|
if self.initialLoad:
|
||||||
self.prepareModel(target_slot_idx)
|
self.prepareModel(target_slot_idx)
|
||||||
self.settings.modelSlotIndex = target_slot_idx
|
self.settings.modelSlotIndex = target_slot_idx
|
||||||
# self.currentSlot = self.settings.modelSlotIndex
|
|
||||||
self.switchModel()
|
self.switchModel()
|
||||||
self.initialLoad = False
|
self.initialLoad = False
|
||||||
|
elif target_slot_idx == self.currentSlot:
|
||||||
|
self.prepareModel(target_slot_idx)
|
||||||
|
self.needSwitch = True
|
||||||
|
|
||||||
return self.get_info()
|
return self.get_info()
|
||||||
|
|
||||||
# def _getDevice(self):
|
def createPipeline(self, modelSlot: ModelSlot):
|
||||||
# if self.settings.gpu < 0 or (self.gpu_num == 0 and self.mps_enabled is False):
|
dev = self.deviceManager.getDevice(self.settings.gpu)
|
||||||
# dev = torch.device("cpu")
|
half = self.deviceManager.halfPrecisionAvailable(self.settings.gpu)
|
||||||
# elif self.mps_enabled:
|
# ファイル名特定(Inferencer)
|
||||||
# dev = torch.device("mps")
|
inferencerFilename = (
|
||||||
# else:
|
modelSlot.onnxModelFile if modelSlot.isONNX else modelSlot.pyTorchModelFile
|
||||||
# dev = torch.device("cuda", index=self.settings.gpu)
|
)
|
||||||
# return dev
|
# ファイル名特定(embedder)
|
||||||
|
if modelSlot.embedder == EnumEmbedderTypes.hubert:
|
||||||
|
emmbedderFilename = self.params.hubert_base
|
||||||
|
elif modelSlot.embedder == EnumEmbedderTypes.contentvec:
|
||||||
|
emmbedderFilename = self.params.content_vec_500
|
||||||
|
elif modelSlot.embedder == EnumEmbedderTypes.hubert_jp:
|
||||||
|
emmbedderFilename = self.params.hubert_base_jp
|
||||||
|
else:
|
||||||
|
raise RuntimeError(
|
||||||
|
"[Voice Changer] Exception loading embedder failed. unknwon type:",
|
||||||
|
modelSlot.embedder,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Inferencer 生成
|
||||||
|
try:
|
||||||
|
inferencer = InferencerManager.getInferencer(
|
||||||
|
modelSlot.modelType,
|
||||||
|
inferencerFilename,
|
||||||
|
half,
|
||||||
|
dev,
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
print("[Voice Changer] exception! loading inferencer", e)
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
|
# Embedder 生成
|
||||||
|
try:
|
||||||
|
print("AFASFDAFDAFDASDFASDFSADFASDFA", half, self.settings.gpu)
|
||||||
|
embedder = EmbedderManager.getEmbedder(
|
||||||
|
modelSlot.embedder,
|
||||||
|
emmbedderFilename,
|
||||||
|
half,
|
||||||
|
dev,
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
print("[Voice Changer] exception! loading embedder", e)
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
|
return inferencer, embedder
|
||||||
|
|
||||||
|
def loadIndex(self, modelSlot: ModelSlot):
|
||||||
|
# Indexのロード
|
||||||
|
print("[Voice Changer] Loading index...")
|
||||||
|
# ファイル指定がない場合はNone
|
||||||
|
if modelSlot.featureFile is None or modelSlot.indexFile is None:
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
# ファイル指定があってもファイルがない場合はNone
|
||||||
|
if (
|
||||||
|
os.path.exists(modelSlot.featureFile) is not True
|
||||||
|
or os.path.exists(modelSlot.indexFile) is not True
|
||||||
|
):
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
try:
|
||||||
|
index = faiss.read_index(modelSlot.indexFile)
|
||||||
|
feature = np.load(modelSlot.featureFile)
|
||||||
|
except:
|
||||||
|
print("[Voice Changer] load index failed. Use no index.")
|
||||||
|
traceback.print_exc()
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
return index, feature
|
||||||
|
|
||||||
def prepareModel(self, slot: int):
|
def prepareModel(self, slot: int):
|
||||||
if slot < 0:
|
if slot < 0:
|
||||||
return self.get_info()
|
return self.get_info()
|
||||||
print("[Voice Changer] Prepare Model of slot:", slot)
|
print("[Voice Changer] Prepare Model of slot:", slot)
|
||||||
modelSlot = self.settings.modelSlots[slot]
|
modelSlot = self.settings.modelSlots[slot]
|
||||||
filename = (
|
|
||||||
modelSlot.onnxModelFile if modelSlot.isONNX else modelSlot.pyTorchModelFile
|
|
||||||
)
|
|
||||||
dev = self.deviceManager.getDevice(self.settings.gpu)
|
|
||||||
|
|
||||||
# Inferencerのロード
|
# Inferencer, embedderのロード
|
||||||
inferencer = InferencerManager.getInferencer(
|
inferencer, embedder = self.createPipeline(modelSlot)
|
||||||
modelSlot.modelType,
|
|
||||||
filename,
|
|
||||||
self.settings.isHalf,
|
|
||||||
dev,
|
|
||||||
)
|
|
||||||
self.next_inferencer = inferencer
|
self.next_inferencer = inferencer
|
||||||
|
self.next_embedder = embedder
|
||||||
|
|
||||||
# Indexのロード
|
# Indexのロード
|
||||||
print("[Voice Changer] Loading index...")
|
index, feature = self.loadIndex(modelSlot)
|
||||||
if modelSlot.featureFile is not None and modelSlot.indexFile is not None:
|
self.next_index = index
|
||||||
if (
|
self.next_feature = feature
|
||||||
os.path.exists(modelSlot.featureFile) is True
|
|
||||||
and os.path.exists(modelSlot.indexFile) is True
|
|
||||||
):
|
|
||||||
try:
|
|
||||||
self.next_index = faiss.read_index(modelSlot.indexFile)
|
|
||||||
self.next_feature = np.load(modelSlot.featureFile)
|
|
||||||
except:
|
|
||||||
print("[Voice Changer] load index failed. Use no index.")
|
|
||||||
traceback.print_exc()
|
|
||||||
self.next_index = self.next_feature = None
|
|
||||||
else:
|
|
||||||
print("[Voice Changer] Index file is not found. Use no index.")
|
|
||||||
self.next_index = self.next_feature = None
|
|
||||||
else:
|
|
||||||
self.next_index = self.next_feature = None
|
|
||||||
|
|
||||||
|
# その他の設定
|
||||||
self.next_trans = modelSlot.defaultTrans
|
self.next_trans = modelSlot.defaultTrans
|
||||||
self.next_samplingRate = modelSlot.samplingRate
|
self.next_samplingRate = modelSlot.samplingRate
|
||||||
self.next_embedder = modelSlot.embedder
|
|
||||||
self.next_framework = "ONNX" if modelSlot.isONNX else "PyTorch"
|
self.next_framework = "ONNX" if modelSlot.isONNX else "PyTorch"
|
||||||
|
self.needSwitch = True
|
||||||
print("[Voice Changer] Prepare done.")
|
print("[Voice Changer] Prepare done.")
|
||||||
return self.get_info()
|
return self.get_info()
|
||||||
|
|
||||||
def switchModel(self):
|
def switchModel(self):
|
||||||
print("[Voice Changer] Switching model..")
|
print("[Voice Changer] Switching model..")
|
||||||
dev = self.deviceManager.getDevice(self.settings.gpu)
|
self.embedder = self.next_embedder
|
||||||
|
|
||||||
# embedderはモデルによらず再利用できる可能性が高いので、Switchのタイミングでこちらで取得
|
|
||||||
try:
|
|
||||||
self.embedder = EmbedderManager.getEmbedder(
|
|
||||||
self.next_embedder,
|
|
||||||
self.params.hubert_base,
|
|
||||||
True,
|
|
||||||
dev,
|
|
||||||
)
|
|
||||||
except Exception as e:
|
|
||||||
print("[Voice Changer] load hubert error", e)
|
|
||||||
traceback.print_exc()
|
|
||||||
|
|
||||||
self.inferencer = self.next_inferencer
|
self.inferencer = self.next_inferencer
|
||||||
self.feature = self.next_feature
|
self.feature = self.next_feature
|
||||||
self.index = self.next_index
|
self.index = self.next_index
|
||||||
self.settings.tran = self.next_trans
|
self.settings.tran = self.next_trans
|
||||||
self.settings.framework = self.next_framework
|
|
||||||
self.settings.modelSamplingRate = self.next_samplingRate
|
self.settings.modelSamplingRate = self.next_samplingRate
|
||||||
|
self.settings.framework = self.next_framework
|
||||||
|
|
||||||
self.next_net_g = None
|
|
||||||
self.next_onnx_session = None
|
|
||||||
print(
|
print(
|
||||||
"[Voice Changer] Switching model..done",
|
"[Voice Changer] Switching model..done",
|
||||||
)
|
)
|
||||||
|
|
||||||
def update_settings(self, key: str, val: int | float | str):
|
def update_settings(self, key: str, val: int | float | str):
|
||||||
# if key == "onnxExecutionProvider" and self.onnx_session is not None:
|
|
||||||
# if val == "CUDAExecutionProvider":
|
|
||||||
# if self.settings.gpu < 0 or self.settings.gpu >= self.gpu_num:
|
|
||||||
# self.settings.gpu = 0
|
|
||||||
# provider_options = [{"device_id": self.settings.gpu}]
|
|
||||||
# self.onnx_session.set_providers(
|
|
||||||
# providers=[val], provider_options=provider_options
|
|
||||||
# )
|
|
||||||
# if hasattr(self, "hubert_onnx"):
|
|
||||||
# self.hubert_onnx.set_providers(
|
|
||||||
# providers=[val], provider_options=provider_options
|
|
||||||
# )
|
|
||||||
# else:
|
|
||||||
# self.onnx_session.set_providers(providers=[val])
|
|
||||||
# if hasattr(self, "hubert_onnx"):
|
|
||||||
# self.hubert_onnx.set_providers(providers=[val])
|
|
||||||
# elif key == "onnxExecutionProvider" and self.onnx_session is None:
|
|
||||||
# print("Onnx is not enabled. Please load model.")
|
|
||||||
# return False
|
|
||||||
if key in self.settings.intData:
|
if key in self.settings.intData:
|
||||||
|
# 設定前処理
|
||||||
val = cast(int, val)
|
val = cast(int, val)
|
||||||
# if (
|
|
||||||
# key == "gpu"
|
|
||||||
# and val >= 0
|
|
||||||
# and val < self.gpu_num
|
|
||||||
# and self.onnx_session is not None
|
|
||||||
# ):
|
|
||||||
# providers = self.onnx_session.get_providers()
|
|
||||||
# print("Providers:", providers)
|
|
||||||
# if "CUDAExecutionProvider" in providers:
|
|
||||||
# provider_options = [{"device_id": self.settings.gpu}]
|
|
||||||
# self.onnx_session.set_providers(
|
|
||||||
# providers=["CUDAExecutionProvider"],
|
|
||||||
# provider_options=provider_options,
|
|
||||||
# )
|
|
||||||
if key == "modelSlotIndex":
|
if key == "modelSlotIndex":
|
||||||
if int(val) < 0:
|
if val < 0:
|
||||||
return True
|
return True
|
||||||
# self.switchModel(int(val))
|
val = val % 1000 # Quick hack for same slot is selected
|
||||||
val = int(val) % 1000 # Quick hack for same slot is selected
|
|
||||||
self.prepareModel(val)
|
self.prepareModel(val)
|
||||||
self.currentSlot = -1
|
self.needSwitch = True
|
||||||
setattr(self.settings, key, int(val))
|
|
||||||
|
# 設定
|
||||||
|
setattr(self.settings, key, val)
|
||||||
|
|
||||||
|
if key == "gpu" and self.embedder is not None:
|
||||||
|
dev = self.deviceManager.getDevice(val)
|
||||||
|
half = self.deviceManager.halfPrecisionAvailable(val)
|
||||||
|
|
||||||
|
# half-precisionの使用可否が変わるときは作り直し
|
||||||
|
if (
|
||||||
|
self.inferencer is not None
|
||||||
|
and self.inferencer.isHalf == half
|
||||||
|
and self.embedder.isHalf == half
|
||||||
|
):
|
||||||
|
print(
|
||||||
|
"NOT NEED CHAGE TO NEW PIPELINE!!!!!!!!!!!!!!!!!!!!!!!!!!!",
|
||||||
|
half,
|
||||||
|
)
|
||||||
|
self.embedder.setDevice(dev)
|
||||||
|
self.inferencer.setDevice(dev)
|
||||||
|
else:
|
||||||
|
print("CHAGE TO NEW PIPELINE!!!!!!!!!!!!!!!!!!!!!!!!!!!", half)
|
||||||
|
self.prepareModel(self.settings.modelSlotIndex)
|
||||||
elif key in self.settings.floatData:
|
elif key in self.settings.floatData:
|
||||||
setattr(self.settings, key, float(val))
|
setattr(self.settings, key, float(val))
|
||||||
elif key in self.settings.strData:
|
elif key in self.settings.strData:
|
||||||
setattr(self.settings, key, str(val))
|
setattr(self.settings, key, str(val))
|
||||||
else:
|
else:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def get_info(self):
|
def get_info(self):
|
||||||
data = asdict(self.settings)
|
data = asdict(self.settings)
|
||||||
|
|
||||||
# data["onnxExecutionProviders"] = (
|
|
||||||
# self.onnx_session.get_providers() if self.onnx_session is not None else []
|
|
||||||
# )
|
|
||||||
files = ["configFile", "pyTorchModelFile", "onnxModelFile"]
|
|
||||||
for f in files:
|
|
||||||
if data[f] is not None and os.path.exists(data[f]):
|
|
||||||
data[f] = os.path.basename(data[f])
|
|
||||||
else:
|
|
||||||
data[f] = ""
|
|
||||||
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def get_processing_sampling_rate(self):
|
def get_processing_sampling_rate(self):
|
||||||
@ -295,118 +298,6 @@ class RVC:
|
|||||||
|
|
||||||
return (self.audio_buffer, convertSize, vol)
|
return (self.audio_buffer, convertSize, vol)
|
||||||
|
|
||||||
def _onnx_inference(self, data):
|
|
||||||
if hasattr(self, "onnx_session") is False or self.onnx_session is None:
|
|
||||||
print("[Voice Changer] No onnx session.")
|
|
||||||
raise NoModeLoadedException("ONNX")
|
|
||||||
|
|
||||||
if self.settings.gpu < 0 or self.gpu_num == 0:
|
|
||||||
dev = torch.device("cpu")
|
|
||||||
else:
|
|
||||||
dev = torch.device("cuda", index=self.settings.gpu)
|
|
||||||
|
|
||||||
# self.hubert_model = self.hubert_model.to(dev)
|
|
||||||
self.embedder = self.embedder.to(dev)
|
|
||||||
|
|
||||||
audio = data[0]
|
|
||||||
convertSize = data[1]
|
|
||||||
vol = data[2]
|
|
||||||
|
|
||||||
audio = resampy.resample(audio, self.settings.modelSamplingRate, 16000)
|
|
||||||
|
|
||||||
if vol < self.settings.silentThreshold:
|
|
||||||
return np.zeros(convertSize).astype(np.int16)
|
|
||||||
|
|
||||||
with torch.no_grad():
|
|
||||||
repeat = 3 if self.is_half else 1
|
|
||||||
repeat *= self.settings.rvcQuality # 0 or 3
|
|
||||||
vc = VC(
|
|
||||||
self.settings.modelSamplingRate,
|
|
||||||
torch.device("cuda:0"),
|
|
||||||
self.is_half,
|
|
||||||
repeat,
|
|
||||||
)
|
|
||||||
sid = 0
|
|
||||||
f0_up_key = self.settings.tran
|
|
||||||
f0_method = self.settings.f0Detector
|
|
||||||
index_rate = self.settings.indexRatio
|
|
||||||
if_f0 = 1 if self.settings.modelSlots[self.currentSlot].f0 else 0
|
|
||||||
|
|
||||||
embChannels = self.settings.modelSlots[self.currentSlot].embChannels
|
|
||||||
audio_out = vc.pipeline(
|
|
||||||
# self.hubert_model,
|
|
||||||
self.embedder,
|
|
||||||
self.onnx_session,
|
|
||||||
self.pitchExtractor,
|
|
||||||
sid,
|
|
||||||
audio,
|
|
||||||
f0_up_key,
|
|
||||||
f0_method,
|
|
||||||
self.index,
|
|
||||||
self.feature,
|
|
||||||
index_rate,
|
|
||||||
if_f0,
|
|
||||||
silence_front=self.settings.extraConvertSize
|
|
||||||
/ self.settings.modelSamplingRate,
|
|
||||||
embChannels=embChannels,
|
|
||||||
)
|
|
||||||
result = audio_out * np.sqrt(vol)
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
def _pyTorch_inference(self, data):
|
|
||||||
# if hasattr(self, "net_g") is False or self.net_g is None:
|
|
||||||
# print(
|
|
||||||
# "[Voice Changer] No pyTorch session.",
|
|
||||||
# hasattr(self, "net_g"),
|
|
||||||
# self.net_g,
|
|
||||||
# )
|
|
||||||
# raise NoModeLoadedException("pytorch")
|
|
||||||
|
|
||||||
dev = self.deviceManager.getDevice(self.settings.gpu)
|
|
||||||
self.embedder = self.embedder.to(dev)
|
|
||||||
self.inferencer = self.inferencer.to(dev)
|
|
||||||
|
|
||||||
audio = data[0]
|
|
||||||
convertSize = data[1]
|
|
||||||
vol = data[2]
|
|
||||||
|
|
||||||
audio = resampy.resample(audio, self.settings.modelSamplingRate, 16000)
|
|
||||||
|
|
||||||
if vol < self.settings.silentThreshold:
|
|
||||||
return np.zeros(convertSize).astype(np.int16)
|
|
||||||
|
|
||||||
repeat = 3 if self.is_half else 1
|
|
||||||
repeat *= self.settings.rvcQuality # 0 or 3
|
|
||||||
vc = VC(self.settings.modelSamplingRate, dev, self.is_half, repeat)
|
|
||||||
sid = 0
|
|
||||||
f0_up_key = self.settings.tran
|
|
||||||
f0_method = self.settings.f0Detector
|
|
||||||
index_rate = self.settings.indexRatio
|
|
||||||
if_f0 = 1 if self.settings.modelSlots[self.currentSlot].f0 else 0
|
|
||||||
|
|
||||||
embChannels = self.settings.modelSlots[self.currentSlot].embChannels
|
|
||||||
audio_out = vc.pipeline(
|
|
||||||
self.embedder,
|
|
||||||
self.inferencer,
|
|
||||||
self.pitchExtractor,
|
|
||||||
sid,
|
|
||||||
audio,
|
|
||||||
f0_up_key,
|
|
||||||
f0_method,
|
|
||||||
self.index,
|
|
||||||
self.feature,
|
|
||||||
index_rate,
|
|
||||||
if_f0,
|
|
||||||
silence_front=self.settings.extraConvertSize
|
|
||||||
/ self.settings.modelSamplingRate,
|
|
||||||
embChannels=embChannels,
|
|
||||||
)
|
|
||||||
|
|
||||||
result = audio_out * np.sqrt(vol)
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
def inference(self, data):
|
def inference(self, data):
|
||||||
if self.settings.modelSlotIndex < 0:
|
if self.settings.modelSlotIndex < 0:
|
||||||
print(
|
print(
|
||||||
@ -415,15 +306,17 @@ class RVC:
|
|||||||
self.currentSlot,
|
self.currentSlot,
|
||||||
)
|
)
|
||||||
raise NoModeLoadedException("model_common")
|
raise NoModeLoadedException("model_common")
|
||||||
|
if self.needSwitch:
|
||||||
if self.currentSlot != self.settings.modelSlotIndex:
|
|
||||||
print(f"Switch model {self.currentSlot} -> {self.settings.modelSlotIndex}")
|
print(f"Switch model {self.currentSlot} -> {self.settings.modelSlotIndex}")
|
||||||
self.currentSlot = self.settings.modelSlotIndex
|
self.currentSlot = self.settings.modelSlotIndex
|
||||||
self.switchModel()
|
self.switchModel()
|
||||||
|
self.needSwitch = False
|
||||||
|
|
||||||
dev = self.deviceManager.getDevice(self.settings.gpu)
|
dev = self.deviceManager.getDevice(self.settings.gpu)
|
||||||
self.embedder = self.embedder.to(dev)
|
half = self.deviceManager.halfPrecisionAvailable(self.settings.gpu)
|
||||||
self.inferencer = self.inferencer.to(dev)
|
|
||||||
|
# self.embedder = self.embedder.setDevice(dev)
|
||||||
|
# self.inferencer = self.inferencer.setDevice(dev)
|
||||||
|
|
||||||
audio = data[0]
|
audio = data[0]
|
||||||
convertSize = data[1]
|
convertSize = data[1]
|
||||||
@ -434,16 +327,16 @@ class RVC:
|
|||||||
if vol < self.settings.silentThreshold:
|
if vol < self.settings.silentThreshold:
|
||||||
return np.zeros(convertSize).astype(np.int16)
|
return np.zeros(convertSize).astype(np.int16)
|
||||||
|
|
||||||
repeat = 3 if self.is_half else 1
|
repeat = 3 if half else 1
|
||||||
repeat *= self.settings.rvcQuality # 0 or 3
|
repeat *= self.settings.rvcQuality # 0 or 3
|
||||||
vc = VC(self.settings.modelSamplingRate, dev, self.is_half, repeat)
|
vc = VC(self.settings.modelSamplingRate, dev, half, repeat)
|
||||||
sid = 0
|
sid = 0
|
||||||
f0_up_key = self.settings.tran
|
f0_up_key = self.settings.tran
|
||||||
f0_method = self.settings.f0Detector
|
|
||||||
index_rate = self.settings.indexRatio
|
index_rate = self.settings.indexRatio
|
||||||
if_f0 = 1 if self.settings.modelSlots[self.currentSlot].f0 else 0
|
if_f0 = 1 if self.settings.modelSlots[self.currentSlot].f0 else 0
|
||||||
|
|
||||||
embChannels = self.settings.modelSlots[self.currentSlot].embChannels
|
embChannels = self.settings.modelSlots[self.currentSlot].embChannels
|
||||||
|
|
||||||
audio_out = vc.pipeline(
|
audio_out = vc.pipeline(
|
||||||
self.embedder,
|
self.embedder,
|
||||||
self.inferencer,
|
self.inferencer,
|
||||||
@ -451,7 +344,6 @@ class RVC:
|
|||||||
sid,
|
sid,
|
||||||
audio,
|
audio,
|
||||||
f0_up_key,
|
f0_up_key,
|
||||||
f0_method,
|
|
||||||
self.index,
|
self.index,
|
||||||
self.feature,
|
self.feature,
|
||||||
index_rate,
|
index_rate,
|
||||||
|
@ -15,9 +15,6 @@ class RVCSettings:
|
|||||||
clusterInferRatio: float = 0.1
|
clusterInferRatio: float = 0.1
|
||||||
|
|
||||||
framework: str = "PyTorch" # PyTorch or ONNX
|
framework: str = "PyTorch" # PyTorch or ONNX
|
||||||
pyTorchModelFile: str = ""
|
|
||||||
onnxModelFile: str = ""
|
|
||||||
configFile: str = ""
|
|
||||||
modelSlots: list[ModelSlot] = field(
|
modelSlots: list[ModelSlot] = field(
|
||||||
default_factory=lambda: [ModelSlot(), ModelSlot(), ModelSlot(), ModelSlot()]
|
default_factory=lambda: [ModelSlot(), ModelSlot(), ModelSlot(), ModelSlot()]
|
||||||
)
|
)
|
||||||
|
@ -1,13 +0,0 @@
|
|||||||
import torch
|
|
||||||
from transformers import HubertModel
|
|
||||||
from voice_changer.utils.VoiceChangerModel import AudioInOut
|
|
||||||
|
|
||||||
|
|
||||||
class RinnaHubertBase:
|
|
||||||
def __init__(self):
|
|
||||||
model = HubertModel.from_pretrained("rinna/japanese-hubert-base")
|
|
||||||
model.eval()
|
|
||||||
self.model = model
|
|
||||||
|
|
||||||
def extract(self, audio: AudioInOut):
|
|
||||||
return self.model(audio)
|
|
@ -3,6 +3,7 @@ import numpy as np
|
|||||||
# import parselmouth
|
# import parselmouth
|
||||||
import torch
|
import torch
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
|
from Exceptions import HalfPrecisionChangingException
|
||||||
|
|
||||||
from voice_changer.RVC.embedder.Embedder import Embedder
|
from voice_changer.RVC.embedder.Embedder import Embedder
|
||||||
from voice_changer.RVC.inferencer.Inferencer import Inferencer
|
from voice_changer.RVC.inferencer.Inferencer import Inferencer
|
||||||
@ -26,7 +27,6 @@ class VC(object):
|
|||||||
sid,
|
sid,
|
||||||
audio,
|
audio,
|
||||||
f0_up_key,
|
f0_up_key,
|
||||||
f0_method,
|
|
||||||
index,
|
index,
|
||||||
big_npy,
|
big_npy,
|
||||||
index_rate,
|
index_rate,
|
||||||
@ -68,7 +68,13 @@ class VC(object):
|
|||||||
|
|
||||||
# embedding
|
# embedding
|
||||||
padding_mask = torch.BoolTensor(feats.shape).to(self.device).fill_(False)
|
padding_mask = torch.BoolTensor(feats.shape).to(self.device).fill_(False)
|
||||||
feats = embedder.extractFeatures(feats, embChannels)
|
try:
|
||||||
|
feats = embedder.extractFeatures(feats, embChannels)
|
||||||
|
except RuntimeError as e:
|
||||||
|
if "HALF" in e.__str__().upper():
|
||||||
|
raise HalfPrecisionChangingException()
|
||||||
|
else:
|
||||||
|
raise e
|
||||||
|
|
||||||
# Index - feature抽出
|
# Index - feature抽出
|
||||||
if (
|
if (
|
||||||
@ -103,34 +109,46 @@ class VC(object):
|
|||||||
|
|
||||||
# 推論実行
|
# 推論実行
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
if pitch is not None:
|
audio1 = (
|
||||||
audio1 = (
|
(inferencer.infer(feats, p_len, pitch, pitchf, sid)[0][0, 0] * 32768)
|
||||||
(
|
.data.cpu()
|
||||||
inferencer.infer(feats, p_len, pitch, pitchf, sid)[0][0, 0]
|
.float()
|
||||||
* 32768
|
.numpy()
|
||||||
)
|
.astype(np.int16)
|
||||||
.data.cpu()
|
)
|
||||||
.float()
|
|
||||||
.numpy()
|
# if pitch is not None:
|
||||||
.astype(np.int16)
|
# print("INFERENCE 1 ")
|
||||||
)
|
# audio1 = (
|
||||||
else:
|
# (
|
||||||
if hasattr(inferencer, "infer_pitchless"):
|
# inferencer.infer(feats, p_len, pitch, pitchf, sid)[0][0, 0]
|
||||||
audio1 = (
|
# * 32768
|
||||||
(inferencer.infer_pitchless(feats, p_len, sid)[0][0, 0] * 32768)
|
# )
|
||||||
.data.cpu()
|
# .data.cpu()
|
||||||
.float()
|
# .float()
|
||||||
.numpy()
|
# .numpy()
|
||||||
.astype(np.int16)
|
# .astype(np.int16)
|
||||||
)
|
# )
|
||||||
else:
|
# else:
|
||||||
audio1 = (
|
# if hasattr(inferencer, "infer_pitchless"):
|
||||||
(inferencer.infer(feats, p_len, sid)[0][0, 0] * 32768)
|
# print("INFERENCE 2 ")
|
||||||
.data.cpu()
|
|
||||||
.float()
|
# audio1 = (
|
||||||
.numpy()
|
# (inferencer.infer_pitchless(feats, p_len, sid)[0][0, 0] * 32768)
|
||||||
.astype(np.int16)
|
# .data.cpu()
|
||||||
)
|
# .float()
|
||||||
|
# .numpy()
|
||||||
|
# .astype(np.int16)
|
||||||
|
# )
|
||||||
|
# else:
|
||||||
|
# print("INFERENCE 3 ")
|
||||||
|
# audio1 = (
|
||||||
|
# (inferencer.infer(feats, p_len, sid)[0][0, 0] * 32768)
|
||||||
|
# .data.cpu()
|
||||||
|
# .float()
|
||||||
|
# .numpy()
|
||||||
|
# .astype(np.int16)
|
||||||
|
# )
|
||||||
|
|
||||||
del feats, p_len, padding_mask
|
del feats, p_len, padding_mask
|
||||||
torch.cuda.empty_cache()
|
torch.cuda.empty_cache()
|
||||||
|
@ -29,6 +29,9 @@ class DeviceManager(object):
|
|||||||
def halfPrecisionAvailable(self, id: int):
|
def halfPrecisionAvailable(self, id: int):
|
||||||
if self.gpu_num == 0:
|
if self.gpu_num == 0:
|
||||||
return False
|
return False
|
||||||
|
if id < 0:
|
||||||
|
return False
|
||||||
|
|
||||||
gpuName = torch.cuda.get_device_name(id).upper()
|
gpuName = torch.cuda.get_device_name(id).upper()
|
||||||
|
|
||||||
# original: https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/main/config.py
|
# original: https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/main/config.py
|
||||||
@ -39,3 +42,5 @@ class DeviceManager(object):
|
|||||||
or "1080" in gpuName
|
or "1080" in gpuName
|
||||||
):
|
):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
@ -36,11 +36,14 @@ class Embedder(Protocol):
|
|||||||
self.isHalf = isHalf
|
self.isHalf = isHalf
|
||||||
if self.model is not None and isHalf:
|
if self.model is not None and isHalf:
|
||||||
self.model = self.model.half()
|
self.model = self.model.half()
|
||||||
|
elif self.model is not None and isHalf is False:
|
||||||
|
self.model = self.model.float()
|
||||||
|
|
||||||
def setDevice(self, dev: device):
|
def setDevice(self, dev: device):
|
||||||
self.dev = dev
|
self.dev = dev
|
||||||
if self.model is not None:
|
if self.model is not None:
|
||||||
self.model = self.model.to(self.dev)
|
self.model = self.model.to(self.dev)
|
||||||
|
return self
|
||||||
|
|
||||||
def matchCondition(self, embedderType: EnumEmbedderTypes, file: str) -> bool:
|
def matchCondition(self, embedderType: EnumEmbedderTypes, file: str) -> bool:
|
||||||
# Check Type
|
# Check Type
|
||||||
@ -63,11 +66,3 @@ class Embedder(Protocol):
|
|||||||
|
|
||||||
else:
|
else:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def to(self, dev: torch.device):
|
|
||||||
if self.model is not None:
|
|
||||||
self.model = self.model.to(dev)
|
|
||||||
return self
|
|
||||||
|
|
||||||
def printDevice(self):
|
|
||||||
print("embedder device:", self.model.device)
|
|
||||||
|
@ -23,6 +23,8 @@ class EmbedderManager:
|
|||||||
else:
|
else:
|
||||||
cls.currentEmbedder.setDevice(dev)
|
cls.currentEmbedder.setDevice(dev)
|
||||||
cls.currentEmbedder.setHalf(isHalf)
|
cls.currentEmbedder.setHalf(isHalf)
|
||||||
|
# print("[Voice Changer] generate new embedder. (ANYWAY)", isHalf)
|
||||||
|
# cls.currentEmbedder = cls.loadEmbedder(embederType, file, isHalf, dev)
|
||||||
return cls.currentEmbedder
|
return cls.currentEmbedder
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -4,6 +4,7 @@ import torch
|
|||||||
from torch import device
|
from torch import device
|
||||||
|
|
||||||
from const import EnumInferenceTypes
|
from const import EnumInferenceTypes
|
||||||
|
import onnxruntime
|
||||||
|
|
||||||
|
|
||||||
class Inferencer(Protocol):
|
class Inferencer(Protocol):
|
||||||
@ -12,7 +13,7 @@ class Inferencer(Protocol):
|
|||||||
isHalf: bool = True
|
isHalf: bool = True
|
||||||
dev: device
|
dev: device
|
||||||
|
|
||||||
model: Any | None = None
|
model: onnxruntime.InferenceSession | Any | None = None
|
||||||
|
|
||||||
def loadModel(self, file: str, dev: device, isHalf: bool = True):
|
def loadModel(self, file: str, dev: device, isHalf: bool = True):
|
||||||
...
|
...
|
||||||
@ -43,16 +44,11 @@ class Inferencer(Protocol):
|
|||||||
self.isHalf = isHalf
|
self.isHalf = isHalf
|
||||||
if self.model is not None and isHalf:
|
if self.model is not None and isHalf:
|
||||||
self.model = self.model.half()
|
self.model = self.model.half()
|
||||||
|
elif self.model is not None and isHalf is False:
|
||||||
|
self.model = self.model.float()
|
||||||
|
|
||||||
def setDevice(self, dev: device):
|
def setDevice(self, dev: device):
|
||||||
self.dev = dev
|
self.dev = dev
|
||||||
if self.model is not None:
|
if self.model is not None:
|
||||||
self.model = self.model.to(self.dev)
|
self.model = self.model.to(self.dev)
|
||||||
|
|
||||||
def to(self, dev: torch.device):
|
|
||||||
if self.model is not None:
|
|
||||||
self.model = self.model.to(dev)
|
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def printDevice(self):
|
|
||||||
print("inferencer device:", self.model.device)
|
|
||||||
|
@ -2,8 +2,8 @@ from torch import device
|
|||||||
|
|
||||||
from const import EnumInferenceTypes
|
from const import EnumInferenceTypes
|
||||||
from voice_changer.RVC.inferencer.Inferencer import Inferencer
|
from voice_changer.RVC.inferencer.Inferencer import Inferencer
|
||||||
from voice_changer.RVC.inferencer.OnnxRVCInferencer import OnnxRVCInference
|
from voice_changer.RVC.inferencer.OnnxRVCInferencer import OnnxRVCInferencer
|
||||||
from voice_changer.RVC.inferencer.OnnxRVCInferencerNono import OnnxRVCInferenceNono
|
from voice_changer.RVC.inferencer.OnnxRVCInferencerNono import OnnxRVCInferencerNono
|
||||||
from voice_changer.RVC.inferencer.RVCInferencer import RVCInferencer
|
from voice_changer.RVC.inferencer.RVCInferencer import RVCInferencer
|
||||||
from voice_changer.RVC.inferencer.RVCInferencerNono import RVCInferencerNono
|
from voice_changer.RVC.inferencer.RVCInferencerNono import RVCInferencerNono
|
||||||
from voice_changer.RVC.inferencer.WebUIInferencer import WebUIInferencer
|
from voice_changer.RVC.inferencer.WebUIInferencer import WebUIInferencer
|
||||||
@ -48,11 +48,11 @@ class InferencerManager:
|
|||||||
inferencerType == EnumInferenceTypes.onnxRVC
|
inferencerType == EnumInferenceTypes.onnxRVC
|
||||||
or inferencerType == EnumInferenceTypes.onnxRVC.value
|
or inferencerType == EnumInferenceTypes.onnxRVC.value
|
||||||
):
|
):
|
||||||
return OnnxRVCInference().loadModel(file, dev, isHalf)
|
return OnnxRVCInferencer().loadModel(file, dev, isHalf)
|
||||||
elif (
|
elif (
|
||||||
inferencerType == EnumInferenceTypes.onnxRVCNono
|
inferencerType == EnumInferenceTypes.onnxRVCNono
|
||||||
or inferencerType == EnumInferenceTypes.onnxRVCNono.value
|
or inferencerType == EnumInferenceTypes.onnxRVCNono.value
|
||||||
):
|
):
|
||||||
return OnnxRVCInferenceNono().loadModel(file, dev, isHalf)
|
return OnnxRVCInferencerNono().loadModel(file, dev, isHalf)
|
||||||
else:
|
else:
|
||||||
raise RuntimeError("[Voice Changer] Inferencer not found", inferencerType)
|
raise RuntimeError("[Voice Changer] Inferencer not found", inferencerType)
|
||||||
|
@ -8,18 +8,16 @@ import numpy as np
|
|||||||
providers = ["CPUExecutionProvider"]
|
providers = ["CPUExecutionProvider"]
|
||||||
|
|
||||||
|
|
||||||
class OnnxRVCInference(Inferencer):
|
class OnnxRVCInferencer(Inferencer):
|
||||||
def loadModel(self, file: str, dev: device, isHalf: bool = True):
|
def loadModel(self, file: str, dev: device, isHalf: bool = True):
|
||||||
super().setProps(EnumInferenceTypes.onnxRVC, file, dev, isHalf)
|
super().setProps(EnumInferenceTypes.onnxRVC, file, dev, isHalf)
|
||||||
# ort_options = onnxruntime.SessionOptions()
|
# ort_options = onnxruntime.SessionOptions()
|
||||||
# ort_options.intra_op_num_threads = 8
|
# ort_options.intra_op_num_threads = 8
|
||||||
|
|
||||||
onnx_session = onnxruntime.InferenceSession(
|
onnx_session = onnxruntime.InferenceSession(file, providers=providers)
|
||||||
self.onnx_model, providers=providers
|
|
||||||
)
|
|
||||||
|
|
||||||
# check half-precision
|
# check half-precision
|
||||||
first_input_type = self.onnx_session.get_inputs()[0].type
|
first_input_type = onnx_session.get_inputs()[0].type
|
||||||
if first_input_type == "tensor(float)":
|
if first_input_type == "tensor(float)":
|
||||||
self.isHalf = False
|
self.isHalf = False
|
||||||
else:
|
else:
|
||||||
@ -32,13 +30,16 @@ class OnnxRVCInference(Inferencer):
|
|||||||
self,
|
self,
|
||||||
feats: torch.Tensor,
|
feats: torch.Tensor,
|
||||||
pitch_length: torch.Tensor,
|
pitch_length: torch.Tensor,
|
||||||
pitch: torch.Tensor | None,
|
pitch: torch.Tensor,
|
||||||
pitchf: torch.Tensor | None,
|
pitchf: torch.Tensor,
|
||||||
sid: torch.Tensor,
|
sid: torch.Tensor,
|
||||||
) -> torch.Tensor:
|
) -> torch.Tensor:
|
||||||
if pitch is None or pitchf is None:
|
if pitch is None or pitchf is None:
|
||||||
raise RuntimeError("[Voice Changer] Pitch or Pitchf is not found.")
|
raise RuntimeError("[Voice Changer] Pitch or Pitchf is not found.")
|
||||||
|
|
||||||
|
print("INFER1", self.model.get_providers())
|
||||||
|
print("INFER2", self.model.get_provider_options())
|
||||||
|
print("INFER3", self.model.get_session_options())
|
||||||
if self.isHalf:
|
if self.isHalf:
|
||||||
audio1 = self.model.run(
|
audio1 = self.model.run(
|
||||||
["audio"],
|
["audio"],
|
||||||
@ -65,14 +66,22 @@ class OnnxRVCInference(Inferencer):
|
|||||||
return torch.tensor(np.array(audio1))
|
return torch.tensor(np.array(audio1))
|
||||||
|
|
||||||
def setHalf(self, isHalf: bool):
|
def setHalf(self, isHalf: bool):
|
||||||
raise RuntimeError("half-precision is not changable.", self.isHalf)
|
self.isHalf = isHalf
|
||||||
|
pass
|
||||||
|
# raise RuntimeError("half-precision is not changable.", self.isHalf)
|
||||||
|
|
||||||
def setDevice(self, dev: device):
|
def setDevice(self, dev: device):
|
||||||
self.dev = dev
|
index = dev.index
|
||||||
if self.model is not None:
|
type = dev.type
|
||||||
self.model = self.model.to(self.dev)
|
if type == "cpu":
|
||||||
|
self.model.set_providers(providers=["CPUExecutionProvider"])
|
||||||
|
elif type == "cuda":
|
||||||
|
provider_options = [{"device_id": index}]
|
||||||
|
self.model.set_providers(
|
||||||
|
providers=["CUDAExecutionProvider"],
|
||||||
|
provider_options=provider_options,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
self.model.set_providers(providers=["CPUExecutionProvider"])
|
||||||
|
|
||||||
def to(self, dev: torch.device):
|
|
||||||
if self.model is not None:
|
|
||||||
self.model = self.model.to(dev)
|
|
||||||
return self
|
return self
|
||||||
|
@ -2,13 +2,14 @@ import torch
|
|||||||
from torch import device
|
from torch import device
|
||||||
import onnxruntime
|
import onnxruntime
|
||||||
from const import EnumInferenceTypes
|
from const import EnumInferenceTypes
|
||||||
from voice_changer.RVC.inferencer.Inferencer import Inferencer
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
from voice_changer.RVC.inferencer.OnnxRVCInferencer import OnnxRVCInferencer
|
||||||
|
|
||||||
providers = ["CPUExecutionProvider"]
|
providers = ["CPUExecutionProvider"]
|
||||||
|
|
||||||
|
|
||||||
class OnnxRVCInferenceNono(Inferencer):
|
class OnnxRVCInferencerNono(OnnxRVCInferencer):
|
||||||
def loadModel(self, file: str, dev: device, isHalf: bool = True):
|
def loadModel(self, file: str, dev: device, isHalf: bool = True):
|
||||||
super().setProps(EnumInferenceTypes.onnxRVC, file, dev, isHalf)
|
super().setProps(EnumInferenceTypes.onnxRVC, file, dev, isHalf)
|
||||||
# ort_options = onnxruntime.SessionOptions()
|
# ort_options = onnxruntime.SessionOptions()
|
||||||
@ -56,16 +57,3 @@ class OnnxRVCInferenceNono(Inferencer):
|
|||||||
)
|
)
|
||||||
|
|
||||||
return torch.tensor(np.array(audio1))
|
return torch.tensor(np.array(audio1))
|
||||||
|
|
||||||
def setHalf(self, isHalf: bool):
|
|
||||||
raise RuntimeError("half-precision is not changable.", self.isHalf)
|
|
||||||
|
|
||||||
def setDevice(self, dev: device):
|
|
||||||
self.dev = dev
|
|
||||||
if self.model is not None:
|
|
||||||
self.model = self.model.to(self.dev)
|
|
||||||
|
|
||||||
def to(self, dev: torch.device):
|
|
||||||
if self.model is not None:
|
|
||||||
self.model = self.model.to(dev)
|
|
||||||
return self
|
|
||||||
|
@ -16,6 +16,8 @@ class RVCInferencer(Inferencer):
|
|||||||
|
|
||||||
model.eval()
|
model.eval()
|
||||||
model.load_state_dict(cpt["weight"], strict=False)
|
model.load_state_dict(cpt["weight"], strict=False)
|
||||||
|
|
||||||
|
model = model.to(dev)
|
||||||
if isHalf:
|
if isHalf:
|
||||||
model = model.half()
|
model = model.half()
|
||||||
|
|
||||||
@ -26,8 +28,8 @@ class RVCInferencer(Inferencer):
|
|||||||
self,
|
self,
|
||||||
feats: torch.Tensor,
|
feats: torch.Tensor,
|
||||||
pitch_length: torch.Tensor,
|
pitch_length: torch.Tensor,
|
||||||
pitch: torch.Tensor | None,
|
pitch: torch.Tensor,
|
||||||
pitchf: torch.Tensor | None,
|
pitchf: torch.Tensor,
|
||||||
sid: torch.Tensor,
|
sid: torch.Tensor,
|
||||||
) -> torch.Tensor:
|
) -> torch.Tensor:
|
||||||
return self.model.infer(feats, pitch_length, pitch, pitchf, sid)
|
return self.model.infer(feats, pitch_length, pitch, pitchf, sid)
|
||||||
|
@ -16,6 +16,8 @@ class RVCInferencerNono(Inferencer):
|
|||||||
|
|
||||||
model.eval()
|
model.eval()
|
||||||
model.load_state_dict(cpt["weight"], strict=False)
|
model.load_state_dict(cpt["weight"], strict=False)
|
||||||
|
|
||||||
|
model = model.to(dev)
|
||||||
if isHalf:
|
if isHalf:
|
||||||
model = model.half()
|
model = model.half()
|
||||||
|
|
||||||
|
@ -14,6 +14,8 @@ class WebUIInferencer(Inferencer):
|
|||||||
|
|
||||||
model.eval()
|
model.eval()
|
||||||
model.load_state_dict(cpt["weight"], strict=False)
|
model.load_state_dict(cpt["weight"], strict=False)
|
||||||
|
|
||||||
|
model = model.to(dev)
|
||||||
if isHalf:
|
if isHalf:
|
||||||
model = model.half()
|
model = model.half()
|
||||||
|
|
||||||
@ -24,8 +26,8 @@ class WebUIInferencer(Inferencer):
|
|||||||
self,
|
self,
|
||||||
feats: torch.Tensor,
|
feats: torch.Tensor,
|
||||||
pitch_length: torch.Tensor,
|
pitch_length: torch.Tensor,
|
||||||
pitch: torch.Tensor | None,
|
pitch: torch.Tensor,
|
||||||
pitchf: torch.Tensor | None,
|
pitchf: torch.Tensor,
|
||||||
sid: torch.Tensor,
|
sid: torch.Tensor,
|
||||||
) -> torch.Tensor:
|
) -> torch.Tensor:
|
||||||
return self.model.infer(feats, pitch_length, pitch, pitchf, sid)
|
return self.model.infer(feats, pitch_length, pitch, pitchf, sid)
|
||||||
|
@ -14,6 +14,8 @@ class WebUIInferencerNono(Inferencer):
|
|||||||
|
|
||||||
model.eval()
|
model.eval()
|
||||||
model.load_state_dict(cpt["weight"], strict=False)
|
model.load_state_dict(cpt["weight"], strict=False)
|
||||||
|
|
||||||
|
model = model.to(dev)
|
||||||
if isHalf:
|
if isHalf:
|
||||||
model = model.half()
|
model = model.half()
|
||||||
|
|
||||||
|
@ -13,7 +13,11 @@ from voice_changer.utils.LoadModelParams import LoadModelParams
|
|||||||
|
|
||||||
from voice_changer.utils.Timer import Timer
|
from voice_changer.utils.Timer import Timer
|
||||||
from voice_changer.utils.VoiceChangerModel import VoiceChangerModel, AudioInOut
|
from voice_changer.utils.VoiceChangerModel import VoiceChangerModel, AudioInOut
|
||||||
from Exceptions import NoModeLoadedException, ONNXInputArgumentException
|
from Exceptions import (
|
||||||
|
HalfPrecisionChangingException,
|
||||||
|
NoModeLoadedException,
|
||||||
|
ONNXInputArgumentException,
|
||||||
|
)
|
||||||
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
|
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
|
||||||
|
|
||||||
providers = [
|
providers = [
|
||||||
@ -341,6 +345,9 @@ class VoiceChanger:
|
|||||||
except ONNXInputArgumentException as e:
|
except ONNXInputArgumentException as e:
|
||||||
print("[Voice Changer] [Exception]", e)
|
print("[Voice Changer] [Exception]", e)
|
||||||
return np.zeros(1).astype(np.int16), [0, 0, 0]
|
return np.zeros(1).astype(np.int16), [0, 0, 0]
|
||||||
|
except HalfPrecisionChangingException as e:
|
||||||
|
print("[Voice Changer] Switching model configuration....", e)
|
||||||
|
return np.zeros(1).astype(np.int16), [0, 0, 0]
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("VC PROCESSING!!!! EXCEPTION!!!", e)
|
print("VC PROCESSING!!!! EXCEPTION!!!", e)
|
||||||
print(traceback.format_exc())
|
print(traceback.format_exc())
|
||||||
|
Loading…
x
Reference in New Issue
Block a user