diff --git a/server/Exceptions.py b/server/Exceptions.py index 2fc4a21e..22f61a1a 100644 --- a/server/Exceptions.py +++ b/server/Exceptions.py @@ -8,6 +8,11 @@ class NoModeLoadedException(Exception): ) +class HalfPrecisionChangingException(Exception): + def __str__(self): + return repr("HalfPrecision related exception.") + + class ONNXInputArgumentException(Exception): def __str__(self): return repr("ONNX received invalid argument.") diff --git a/server/voice_changer/RVC/ModelSlotGenerator.py b/server/voice_changer/RVC/ModelSlotGenerator.py index 036aa0f6..58563bc5 100644 --- a/server/voice_changer/RVC/ModelSlotGenerator.py +++ b/server/voice_changer/RVC/ModelSlotGenerator.py @@ -48,6 +48,15 @@ def _setInfoByPytorch(slot: ModelSlot, file: str): if slot.embedder.endswith("768"): slot.embedder = slot.embedder[:-3] + if slot.embedder == EnumEmbedderTypes.hubert.value: + slot.embedder = EnumEmbedderTypes.hubert + elif slot.embedder == EnumEmbedderTypes.contentvec.value: + slot.embedder = EnumEmbedderTypes.contentvec + elif slot.embedder == EnumEmbedderTypes.hubert_jp.value: + slot.embedder = EnumEmbedderTypes.hubert_jp + else: + raise RuntimeError("[Voice Changer][setInfoByONNX] unknown embedder") + slot.samplingRate = cpt["config"][-1] del cpt @@ -63,9 +72,18 @@ def _setInfoByONNX(slot: ModelSlot, file: str): slot.modelType = metadata["modelType"] slot.embChannels = metadata["embChannels"] - slot.embedder = ( - metadata["embedder"] if "embedder" in metadata else EnumEmbedderTypes.hubert - ) + + if "embedder" not in metadata: + slot.embedder = EnumEmbedderTypes.hubert + elif metadata["embedder"] == EnumEmbedderTypes.hubert.value: + slot.embedder = EnumEmbedderTypes.hubert + elif metadata["embedder"] == EnumEmbedderTypes.contentvec.value: + slot.embedder = EnumEmbedderTypes.contentvec + elif metadata["embedder"] == EnumEmbedderTypes.hubert_jp.value: + slot.embedder = EnumEmbedderTypes.hubert_jp + else: + raise RuntimeError("[Voice Changer][setInfoByONNX] unknown embedder") + slot.f0 = metadata["f0"] slot.modelType = ( EnumInferenceTypes.onnxRVC if slot.f0 else EnumInferenceTypes.onnxRVCNono @@ -73,7 +91,7 @@ def _setInfoByONNX(slot: ModelSlot, file: str): slot.samplingRate = metadata["samplingRate"] slot.deprecated = False - except: + except Exception as e: slot.modelType = EnumInferenceTypes.onnxRVC slot.embChannels = 256 slot.embedder = EnumEmbedderTypes.hubert @@ -81,6 +99,7 @@ def _setInfoByONNX(slot: ModelSlot, file: str): slot.samplingRate = 48000 slot.deprecated = True + print("[Voice Changer] setInfoByONNX", e) print("[Voice Changer] ############## !!!! CAUTION !!!! ####################") print("[Voice Changer] This onnxfie is depricated. Please regenerate onnxfile.") print("[Voice Changer] ############## !!!! CAUTION !!!! ####################") diff --git a/server/voice_changer/RVC/RVC.py b/server/voice_changer/RVC/RVC.py index 5ee10a99..b0c13c41 100644 --- a/server/voice_changer/RVC/RVC.py +++ b/server/voice_changer/RVC/RVC.py @@ -1,5 +1,6 @@ import sys import os +from voice_changer.RVC.ModelSlot import ModelSlot from voice_changer.RVC.deviceManager.DeviceManager import DeviceManager from voice_changer.RVC.pitchExtractor.PitchExtractor import PitchExtractor @@ -22,7 +23,6 @@ import resampy from voice_changer.RVC.MergeModel import merge_model from voice_changer.RVC.MergeModelRequest import MergeModelRequest from voice_changer.RVC.ModelSlotGenerator import generateModelSlot -from Exceptions import NoModeLoadedException from voice_changer.RVC.RVCSettings import RVCSettings from voice_changer.RVC.embedder.Embedder import Embedder from voice_changer.RVC.embedder.EmbedderManager import EmbedderManager @@ -42,7 +42,7 @@ import torch import traceback import faiss -from const import TMP_DIR, UPLOAD_DIR +from const import TMP_DIR, UPLOAD_DIR, EnumEmbedderTypes from voice_changer.RVC.custom_vc_infer_pipeline import VC @@ -56,34 +56,29 @@ providers = [ class RVC: - audio_buffer: AudioInOut | None = None + initialLoad: bool = True + settings: RVCSettings = RVCSettings() + embedder: Embedder | None = None inferencer: Inferencer | None = None + pitchExtractor: PitchExtractor | None = None deviceManager = DeviceManager.get_instance() + audio_buffer: AudioInOut | None = None + prevVol: float = 0 + params: VoiceChangerParams + currentSlot: int = -1 + needSwitch: bool = False + def __init__(self, params: VoiceChangerParams): - self.initialLoad = True - self.settings = RVCSettings() self.pitchExtractor = PitchExtractorManager.getPitchExtractor( self.settings.f0Detector ) - - self.feature_file = None - self.index_file = None - - self.prevVol = 0 self.params = params - - self.currentSlot = -1 print("RVC initialization: ", params) def loadModel(self, props: LoadModelParams): - """ - loadModelはスロットへのエントリ(推論向けにはロードしない)。 - 例外的に、まだ一つも推論向けにロードされていない場合と稼働中スロットの場合は、ロードする。 - """ - self.is_half = props.isHalf target_slot_idx = props.slot params_str = props.params params = json.loads(params_str) @@ -94,167 +89,175 @@ class RVC: f"[Voice Changer] RVC new model is uploaded,{target_slot_idx}", asdict(modelSlot), ) + """ + [Voice Changer] RVC new model is uploaded,0 {'pyTorchModelFile': 'upload_dir/0/kurage.pth', 'onnxModelFile': None, 'featureFile': None, 'indexFile': None, 'defaultTrans': 16, 'isONNX': False, 'modelType': , 'samplingRate': 48000, 'f0': True, 'embChannels': 768, 'deprecated': False, 'embedder': 'hubert-base-japanese'} + """ # 初回のみロード - if self.initialLoad or target_slot_idx == self.currentSlot: + if self.initialLoad: self.prepareModel(target_slot_idx) self.settings.modelSlotIndex = target_slot_idx - # self.currentSlot = self.settings.modelSlotIndex self.switchModel() self.initialLoad = False + elif target_slot_idx == self.currentSlot: + self.prepareModel(target_slot_idx) + self.needSwitch = True return self.get_info() - # def _getDevice(self): - # if self.settings.gpu < 0 or (self.gpu_num == 0 and self.mps_enabled is False): - # dev = torch.device("cpu") - # elif self.mps_enabled: - # dev = torch.device("mps") - # else: - # dev = torch.device("cuda", index=self.settings.gpu) - # return dev + def createPipeline(self, modelSlot: ModelSlot): + dev = self.deviceManager.getDevice(self.settings.gpu) + half = self.deviceManager.halfPrecisionAvailable(self.settings.gpu) + # ファイル名特定(Inferencer) + inferencerFilename = ( + modelSlot.onnxModelFile if modelSlot.isONNX else modelSlot.pyTorchModelFile + ) + # ファイル名特定(embedder) + if modelSlot.embedder == EnumEmbedderTypes.hubert: + emmbedderFilename = self.params.hubert_base + elif modelSlot.embedder == EnumEmbedderTypes.contentvec: + emmbedderFilename = self.params.content_vec_500 + elif modelSlot.embedder == EnumEmbedderTypes.hubert_jp: + emmbedderFilename = self.params.hubert_base_jp + else: + raise RuntimeError( + "[Voice Changer] Exception loading embedder failed. unknwon type:", + modelSlot.embedder, + ) + + # Inferencer 生成 + try: + inferencer = InferencerManager.getInferencer( + modelSlot.modelType, + inferencerFilename, + half, + dev, + ) + except Exception as e: + print("[Voice Changer] exception! loading inferencer", e) + traceback.print_exc() + + # Embedder 生成 + try: + print("AFASFDAFDAFDASDFASDFSADFASDFA", half, self.settings.gpu) + embedder = EmbedderManager.getEmbedder( + modelSlot.embedder, + emmbedderFilename, + half, + dev, + ) + except Exception as e: + print("[Voice Changer] exception! loading embedder", e) + traceback.print_exc() + + return inferencer, embedder + + def loadIndex(self, modelSlot: ModelSlot): + # Indexのロード + print("[Voice Changer] Loading index...") + # ファイル指定がない場合はNone + if modelSlot.featureFile is None or modelSlot.indexFile is None: + return None, None + + # ファイル指定があってもファイルがない場合はNone + if ( + os.path.exists(modelSlot.featureFile) is not True + or os.path.exists(modelSlot.indexFile) is not True + ): + return None, None + + try: + index = faiss.read_index(modelSlot.indexFile) + feature = np.load(modelSlot.featureFile) + except: + print("[Voice Changer] load index failed. Use no index.") + traceback.print_exc() + return None, None + + return index, feature def prepareModel(self, slot: int): if slot < 0: return self.get_info() print("[Voice Changer] Prepare Model of slot:", slot) modelSlot = self.settings.modelSlots[slot] - filename = ( - modelSlot.onnxModelFile if modelSlot.isONNX else modelSlot.pyTorchModelFile - ) - dev = self.deviceManager.getDevice(self.settings.gpu) - # Inferencerのロード - inferencer = InferencerManager.getInferencer( - modelSlot.modelType, - filename, - self.settings.isHalf, - dev, - ) + # Inferencer, embedderのロード + inferencer, embedder = self.createPipeline(modelSlot) + self.next_inferencer = inferencer + self.next_embedder = embedder # Indexのロード - print("[Voice Changer] Loading index...") - if modelSlot.featureFile is not None and modelSlot.indexFile is not None: - if ( - os.path.exists(modelSlot.featureFile) is True - and os.path.exists(modelSlot.indexFile) is True - ): - try: - self.next_index = faiss.read_index(modelSlot.indexFile) - self.next_feature = np.load(modelSlot.featureFile) - except: - print("[Voice Changer] load index failed. Use no index.") - traceback.print_exc() - self.next_index = self.next_feature = None - else: - print("[Voice Changer] Index file is not found. Use no index.") - self.next_index = self.next_feature = None - else: - self.next_index = self.next_feature = None + index, feature = self.loadIndex(modelSlot) + self.next_index = index + self.next_feature = feature + # その他の設定 self.next_trans = modelSlot.defaultTrans self.next_samplingRate = modelSlot.samplingRate - self.next_embedder = modelSlot.embedder self.next_framework = "ONNX" if modelSlot.isONNX else "PyTorch" + self.needSwitch = True print("[Voice Changer] Prepare done.") return self.get_info() def switchModel(self): print("[Voice Changer] Switching model..") - dev = self.deviceManager.getDevice(self.settings.gpu) - - # embedderはモデルによらず再利用できる可能性が高いので、Switchのタイミングでこちらで取得 - try: - self.embedder = EmbedderManager.getEmbedder( - self.next_embedder, - self.params.hubert_base, - True, - dev, - ) - except Exception as e: - print("[Voice Changer] load hubert error", e) - traceback.print_exc() - + self.embedder = self.next_embedder self.inferencer = self.next_inferencer self.feature = self.next_feature self.index = self.next_index self.settings.tran = self.next_trans - self.settings.framework = self.next_framework self.settings.modelSamplingRate = self.next_samplingRate + self.settings.framework = self.next_framework - self.next_net_g = None - self.next_onnx_session = None print( "[Voice Changer] Switching model..done", ) def update_settings(self, key: str, val: int | float | str): - # if key == "onnxExecutionProvider" and self.onnx_session is not None: - # if val == "CUDAExecutionProvider": - # if self.settings.gpu < 0 or self.settings.gpu >= self.gpu_num: - # self.settings.gpu = 0 - # provider_options = [{"device_id": self.settings.gpu}] - # self.onnx_session.set_providers( - # providers=[val], provider_options=provider_options - # ) - # if hasattr(self, "hubert_onnx"): - # self.hubert_onnx.set_providers( - # providers=[val], provider_options=provider_options - # ) - # else: - # self.onnx_session.set_providers(providers=[val]) - # if hasattr(self, "hubert_onnx"): - # self.hubert_onnx.set_providers(providers=[val]) - # elif key == "onnxExecutionProvider" and self.onnx_session is None: - # print("Onnx is not enabled. Please load model.") - # return False if key in self.settings.intData: + # 設定前処理 val = cast(int, val) - # if ( - # key == "gpu" - # and val >= 0 - # and val < self.gpu_num - # and self.onnx_session is not None - # ): - # providers = self.onnx_session.get_providers() - # print("Providers:", providers) - # if "CUDAExecutionProvider" in providers: - # provider_options = [{"device_id": self.settings.gpu}] - # self.onnx_session.set_providers( - # providers=["CUDAExecutionProvider"], - # provider_options=provider_options, - # ) if key == "modelSlotIndex": - if int(val) < 0: + if val < 0: return True - # self.switchModel(int(val)) - val = int(val) % 1000 # Quick hack for same slot is selected + val = val % 1000 # Quick hack for same slot is selected self.prepareModel(val) - self.currentSlot = -1 - setattr(self.settings, key, int(val)) + self.needSwitch = True + + # 設定 + setattr(self.settings, key, val) + + if key == "gpu" and self.embedder is not None: + dev = self.deviceManager.getDevice(val) + half = self.deviceManager.halfPrecisionAvailable(val) + + # half-precisionの使用可否が変わるときは作り直し + if ( + self.inferencer is not None + and self.inferencer.isHalf == half + and self.embedder.isHalf == half + ): + print( + "NOT NEED CHAGE TO NEW PIPELINE!!!!!!!!!!!!!!!!!!!!!!!!!!!", + half, + ) + self.embedder.setDevice(dev) + self.inferencer.setDevice(dev) + else: + print("CHAGE TO NEW PIPELINE!!!!!!!!!!!!!!!!!!!!!!!!!!!", half) + self.prepareModel(self.settings.modelSlotIndex) elif key in self.settings.floatData: setattr(self.settings, key, float(val)) elif key in self.settings.strData: setattr(self.settings, key, str(val)) else: return False - return True def get_info(self): data = asdict(self.settings) - - # data["onnxExecutionProviders"] = ( - # self.onnx_session.get_providers() if self.onnx_session is not None else [] - # ) - files = ["configFile", "pyTorchModelFile", "onnxModelFile"] - for f in files: - if data[f] is not None and os.path.exists(data[f]): - data[f] = os.path.basename(data[f]) - else: - data[f] = "" - return data def get_processing_sampling_rate(self): @@ -295,118 +298,6 @@ class RVC: return (self.audio_buffer, convertSize, vol) - def _onnx_inference(self, data): - if hasattr(self, "onnx_session") is False or self.onnx_session is None: - print("[Voice Changer] No onnx session.") - raise NoModeLoadedException("ONNX") - - if self.settings.gpu < 0 or self.gpu_num == 0: - dev = torch.device("cpu") - else: - dev = torch.device("cuda", index=self.settings.gpu) - - # self.hubert_model = self.hubert_model.to(dev) - self.embedder = self.embedder.to(dev) - - audio = data[0] - convertSize = data[1] - vol = data[2] - - audio = resampy.resample(audio, self.settings.modelSamplingRate, 16000) - - if vol < self.settings.silentThreshold: - return np.zeros(convertSize).astype(np.int16) - - with torch.no_grad(): - repeat = 3 if self.is_half else 1 - repeat *= self.settings.rvcQuality # 0 or 3 - vc = VC( - self.settings.modelSamplingRate, - torch.device("cuda:0"), - self.is_half, - repeat, - ) - sid = 0 - f0_up_key = self.settings.tran - f0_method = self.settings.f0Detector - index_rate = self.settings.indexRatio - if_f0 = 1 if self.settings.modelSlots[self.currentSlot].f0 else 0 - - embChannels = self.settings.modelSlots[self.currentSlot].embChannels - audio_out = vc.pipeline( - # self.hubert_model, - self.embedder, - self.onnx_session, - self.pitchExtractor, - sid, - audio, - f0_up_key, - f0_method, - self.index, - self.feature, - index_rate, - if_f0, - silence_front=self.settings.extraConvertSize - / self.settings.modelSamplingRate, - embChannels=embChannels, - ) - result = audio_out * np.sqrt(vol) - - return result - - def _pyTorch_inference(self, data): - # if hasattr(self, "net_g") is False or self.net_g is None: - # print( - # "[Voice Changer] No pyTorch session.", - # hasattr(self, "net_g"), - # self.net_g, - # ) - # raise NoModeLoadedException("pytorch") - - dev = self.deviceManager.getDevice(self.settings.gpu) - self.embedder = self.embedder.to(dev) - self.inferencer = self.inferencer.to(dev) - - audio = data[0] - convertSize = data[1] - vol = data[2] - - audio = resampy.resample(audio, self.settings.modelSamplingRate, 16000) - - if vol < self.settings.silentThreshold: - return np.zeros(convertSize).astype(np.int16) - - repeat = 3 if self.is_half else 1 - repeat *= self.settings.rvcQuality # 0 or 3 - vc = VC(self.settings.modelSamplingRate, dev, self.is_half, repeat) - sid = 0 - f0_up_key = self.settings.tran - f0_method = self.settings.f0Detector - index_rate = self.settings.indexRatio - if_f0 = 1 if self.settings.modelSlots[self.currentSlot].f0 else 0 - - embChannels = self.settings.modelSlots[self.currentSlot].embChannels - audio_out = vc.pipeline( - self.embedder, - self.inferencer, - self.pitchExtractor, - sid, - audio, - f0_up_key, - f0_method, - self.index, - self.feature, - index_rate, - if_f0, - silence_front=self.settings.extraConvertSize - / self.settings.modelSamplingRate, - embChannels=embChannels, - ) - - result = audio_out * np.sqrt(vol) - - return result - def inference(self, data): if self.settings.modelSlotIndex < 0: print( @@ -415,15 +306,17 @@ class RVC: self.currentSlot, ) raise NoModeLoadedException("model_common") - - if self.currentSlot != self.settings.modelSlotIndex: + if self.needSwitch: print(f"Switch model {self.currentSlot} -> {self.settings.modelSlotIndex}") self.currentSlot = self.settings.modelSlotIndex self.switchModel() + self.needSwitch = False dev = self.deviceManager.getDevice(self.settings.gpu) - self.embedder = self.embedder.to(dev) - self.inferencer = self.inferencer.to(dev) + half = self.deviceManager.halfPrecisionAvailable(self.settings.gpu) + + # self.embedder = self.embedder.setDevice(dev) + # self.inferencer = self.inferencer.setDevice(dev) audio = data[0] convertSize = data[1] @@ -434,16 +327,16 @@ class RVC: if vol < self.settings.silentThreshold: return np.zeros(convertSize).astype(np.int16) - repeat = 3 if self.is_half else 1 + repeat = 3 if half else 1 repeat *= self.settings.rvcQuality # 0 or 3 - vc = VC(self.settings.modelSamplingRate, dev, self.is_half, repeat) + vc = VC(self.settings.modelSamplingRate, dev, half, repeat) sid = 0 f0_up_key = self.settings.tran - f0_method = self.settings.f0Detector index_rate = self.settings.indexRatio if_f0 = 1 if self.settings.modelSlots[self.currentSlot].f0 else 0 embChannels = self.settings.modelSlots[self.currentSlot].embChannels + audio_out = vc.pipeline( self.embedder, self.inferencer, @@ -451,7 +344,6 @@ class RVC: sid, audio, f0_up_key, - f0_method, self.index, self.feature, index_rate, diff --git a/server/voice_changer/RVC/RVCSettings.py b/server/voice_changer/RVC/RVCSettings.py index 2bc269c7..b2cc2157 100644 --- a/server/voice_changer/RVC/RVCSettings.py +++ b/server/voice_changer/RVC/RVCSettings.py @@ -15,9 +15,6 @@ class RVCSettings: clusterInferRatio: float = 0.1 framework: str = "PyTorch" # PyTorch or ONNX - pyTorchModelFile: str = "" - onnxModelFile: str = "" - configFile: str = "" modelSlots: list[ModelSlot] = field( default_factory=lambda: [ModelSlot(), ModelSlot(), ModelSlot(), ModelSlot()] ) diff --git a/server/voice_changer/RVC/RinnaHubertBase.py b/server/voice_changer/RVC/RinnaHubertBase.py deleted file mode 100644 index 451d5769..00000000 --- a/server/voice_changer/RVC/RinnaHubertBase.py +++ /dev/null @@ -1,13 +0,0 @@ -import torch -from transformers import HubertModel -from voice_changer.utils.VoiceChangerModel import AudioInOut - - -class RinnaHubertBase: - def __init__(self): - model = HubertModel.from_pretrained("rinna/japanese-hubert-base") - model.eval() - self.model = model - - def extract(self, audio: AudioInOut): - return self.model(audio) diff --git a/server/voice_changer/RVC/custom_vc_infer_pipeline.py b/server/voice_changer/RVC/custom_vc_infer_pipeline.py index 88fdc8e6..70a7da49 100644 --- a/server/voice_changer/RVC/custom_vc_infer_pipeline.py +++ b/server/voice_changer/RVC/custom_vc_infer_pipeline.py @@ -3,6 +3,7 @@ import numpy as np # import parselmouth import torch import torch.nn.functional as F +from Exceptions import HalfPrecisionChangingException from voice_changer.RVC.embedder.Embedder import Embedder from voice_changer.RVC.inferencer.Inferencer import Inferencer @@ -26,7 +27,6 @@ class VC(object): sid, audio, f0_up_key, - f0_method, index, big_npy, index_rate, @@ -68,7 +68,13 @@ class VC(object): # embedding padding_mask = torch.BoolTensor(feats.shape).to(self.device).fill_(False) - feats = embedder.extractFeatures(feats, embChannels) + try: + feats = embedder.extractFeatures(feats, embChannels) + except RuntimeError as e: + if "HALF" in e.__str__().upper(): + raise HalfPrecisionChangingException() + else: + raise e # Index - feature抽出 if ( @@ -103,34 +109,46 @@ class VC(object): # 推論実行 with torch.no_grad(): - if pitch is not None: - audio1 = ( - ( - inferencer.infer(feats, p_len, pitch, pitchf, sid)[0][0, 0] - * 32768 - ) - .data.cpu() - .float() - .numpy() - .astype(np.int16) - ) - else: - if hasattr(inferencer, "infer_pitchless"): - audio1 = ( - (inferencer.infer_pitchless(feats, p_len, sid)[0][0, 0] * 32768) - .data.cpu() - .float() - .numpy() - .astype(np.int16) - ) - else: - audio1 = ( - (inferencer.infer(feats, p_len, sid)[0][0, 0] * 32768) - .data.cpu() - .float() - .numpy() - .astype(np.int16) - ) + audio1 = ( + (inferencer.infer(feats, p_len, pitch, pitchf, sid)[0][0, 0] * 32768) + .data.cpu() + .float() + .numpy() + .astype(np.int16) + ) + + # if pitch is not None: + # print("INFERENCE 1 ") + # audio1 = ( + # ( + # inferencer.infer(feats, p_len, pitch, pitchf, sid)[0][0, 0] + # * 32768 + # ) + # .data.cpu() + # .float() + # .numpy() + # .astype(np.int16) + # ) + # else: + # if hasattr(inferencer, "infer_pitchless"): + # print("INFERENCE 2 ") + + # audio1 = ( + # (inferencer.infer_pitchless(feats, p_len, sid)[0][0, 0] * 32768) + # .data.cpu() + # .float() + # .numpy() + # .astype(np.int16) + # ) + # else: + # print("INFERENCE 3 ") + # audio1 = ( + # (inferencer.infer(feats, p_len, sid)[0][0, 0] * 32768) + # .data.cpu() + # .float() + # .numpy() + # .astype(np.int16) + # ) del feats, p_len, padding_mask torch.cuda.empty_cache() diff --git a/server/voice_changer/RVC/deviceManager/DeviceManager.py b/server/voice_changer/RVC/deviceManager/DeviceManager.py index 901c5dff..abfb4386 100644 --- a/server/voice_changer/RVC/deviceManager/DeviceManager.py +++ b/server/voice_changer/RVC/deviceManager/DeviceManager.py @@ -29,6 +29,9 @@ class DeviceManager(object): def halfPrecisionAvailable(self, id: int): if self.gpu_num == 0: return False + if id < 0: + return False + gpuName = torch.cuda.get_device_name(id).upper() # original: https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/main/config.py @@ -39,3 +42,5 @@ class DeviceManager(object): or "1080" in gpuName ): return False + + return True diff --git a/server/voice_changer/RVC/embedder/Embedder.py b/server/voice_changer/RVC/embedder/Embedder.py index 5fa36ce6..728f1e7a 100644 --- a/server/voice_changer/RVC/embedder/Embedder.py +++ b/server/voice_changer/RVC/embedder/Embedder.py @@ -36,11 +36,14 @@ class Embedder(Protocol): self.isHalf = isHalf if self.model is not None and isHalf: self.model = self.model.half() + elif self.model is not None and isHalf is False: + self.model = self.model.float() def setDevice(self, dev: device): self.dev = dev if self.model is not None: self.model = self.model.to(self.dev) + return self def matchCondition(self, embedderType: EnumEmbedderTypes, file: str) -> bool: # Check Type @@ -63,11 +66,3 @@ class Embedder(Protocol): else: return True - - def to(self, dev: torch.device): - if self.model is not None: - self.model = self.model.to(dev) - return self - - def printDevice(self): - print("embedder device:", self.model.device) diff --git a/server/voice_changer/RVC/embedder/EmbedderManager.py b/server/voice_changer/RVC/embedder/EmbedderManager.py index 729b84cb..b6d6d100 100644 --- a/server/voice_changer/RVC/embedder/EmbedderManager.py +++ b/server/voice_changer/RVC/embedder/EmbedderManager.py @@ -23,6 +23,8 @@ class EmbedderManager: else: cls.currentEmbedder.setDevice(dev) cls.currentEmbedder.setHalf(isHalf) + # print("[Voice Changer] generate new embedder. (ANYWAY)", isHalf) + # cls.currentEmbedder = cls.loadEmbedder(embederType, file, isHalf, dev) return cls.currentEmbedder @classmethod diff --git a/server/voice_changer/RVC/inferencer/Inferencer.py b/server/voice_changer/RVC/inferencer/Inferencer.py index 960f98c2..46738dc5 100644 --- a/server/voice_changer/RVC/inferencer/Inferencer.py +++ b/server/voice_changer/RVC/inferencer/Inferencer.py @@ -4,6 +4,7 @@ import torch from torch import device from const import EnumInferenceTypes +import onnxruntime class Inferencer(Protocol): @@ -12,7 +13,7 @@ class Inferencer(Protocol): isHalf: bool = True dev: device - model: Any | None = None + model: onnxruntime.InferenceSession | Any | None = None def loadModel(self, file: str, dev: device, isHalf: bool = True): ... @@ -43,16 +44,11 @@ class Inferencer(Protocol): self.isHalf = isHalf if self.model is not None and isHalf: self.model = self.model.half() + elif self.model is not None and isHalf is False: + self.model = self.model.float() def setDevice(self, dev: device): self.dev = dev if self.model is not None: self.model = self.model.to(self.dev) - - def to(self, dev: torch.device): - if self.model is not None: - self.model = self.model.to(dev) return self - - def printDevice(self): - print("inferencer device:", self.model.device) diff --git a/server/voice_changer/RVC/inferencer/InferencerManager.py b/server/voice_changer/RVC/inferencer/InferencerManager.py index 12daabe6..f59c69ee 100644 --- a/server/voice_changer/RVC/inferencer/InferencerManager.py +++ b/server/voice_changer/RVC/inferencer/InferencerManager.py @@ -2,8 +2,8 @@ from torch import device from const import EnumInferenceTypes from voice_changer.RVC.inferencer.Inferencer import Inferencer -from voice_changer.RVC.inferencer.OnnxRVCInferencer import OnnxRVCInference -from voice_changer.RVC.inferencer.OnnxRVCInferencerNono import OnnxRVCInferenceNono +from voice_changer.RVC.inferencer.OnnxRVCInferencer import OnnxRVCInferencer +from voice_changer.RVC.inferencer.OnnxRVCInferencerNono import OnnxRVCInferencerNono from voice_changer.RVC.inferencer.RVCInferencer import RVCInferencer from voice_changer.RVC.inferencer.RVCInferencerNono import RVCInferencerNono from voice_changer.RVC.inferencer.WebUIInferencer import WebUIInferencer @@ -48,11 +48,11 @@ class InferencerManager: inferencerType == EnumInferenceTypes.onnxRVC or inferencerType == EnumInferenceTypes.onnxRVC.value ): - return OnnxRVCInference().loadModel(file, dev, isHalf) + return OnnxRVCInferencer().loadModel(file, dev, isHalf) elif ( inferencerType == EnumInferenceTypes.onnxRVCNono or inferencerType == EnumInferenceTypes.onnxRVCNono.value ): - return OnnxRVCInferenceNono().loadModel(file, dev, isHalf) + return OnnxRVCInferencerNono().loadModel(file, dev, isHalf) else: raise RuntimeError("[Voice Changer] Inferencer not found", inferencerType) diff --git a/server/voice_changer/RVC/inferencer/OnnxRVCInferencer.py b/server/voice_changer/RVC/inferencer/OnnxRVCInferencer.py index eb9df727..91873b89 100644 --- a/server/voice_changer/RVC/inferencer/OnnxRVCInferencer.py +++ b/server/voice_changer/RVC/inferencer/OnnxRVCInferencer.py @@ -8,18 +8,16 @@ import numpy as np providers = ["CPUExecutionProvider"] -class OnnxRVCInference(Inferencer): +class OnnxRVCInferencer(Inferencer): def loadModel(self, file: str, dev: device, isHalf: bool = True): super().setProps(EnumInferenceTypes.onnxRVC, file, dev, isHalf) # ort_options = onnxruntime.SessionOptions() # ort_options.intra_op_num_threads = 8 - onnx_session = onnxruntime.InferenceSession( - self.onnx_model, providers=providers - ) + onnx_session = onnxruntime.InferenceSession(file, providers=providers) # check half-precision - first_input_type = self.onnx_session.get_inputs()[0].type + first_input_type = onnx_session.get_inputs()[0].type if first_input_type == "tensor(float)": self.isHalf = False else: @@ -32,13 +30,16 @@ class OnnxRVCInference(Inferencer): self, feats: torch.Tensor, pitch_length: torch.Tensor, - pitch: torch.Tensor | None, - pitchf: torch.Tensor | None, + pitch: torch.Tensor, + pitchf: torch.Tensor, sid: torch.Tensor, ) -> torch.Tensor: if pitch is None or pitchf is None: raise RuntimeError("[Voice Changer] Pitch or Pitchf is not found.") + print("INFER1", self.model.get_providers()) + print("INFER2", self.model.get_provider_options()) + print("INFER3", self.model.get_session_options()) if self.isHalf: audio1 = self.model.run( ["audio"], @@ -65,14 +66,22 @@ class OnnxRVCInference(Inferencer): return torch.tensor(np.array(audio1)) def setHalf(self, isHalf: bool): - raise RuntimeError("half-precision is not changable.", self.isHalf) + self.isHalf = isHalf + pass + # raise RuntimeError("half-precision is not changable.", self.isHalf) def setDevice(self, dev: device): - self.dev = dev - if self.model is not None: - self.model = self.model.to(self.dev) + index = dev.index + type = dev.type + if type == "cpu": + self.model.set_providers(providers=["CPUExecutionProvider"]) + elif type == "cuda": + provider_options = [{"device_id": index}] + self.model.set_providers( + providers=["CUDAExecutionProvider"], + provider_options=provider_options, + ) + else: + self.model.set_providers(providers=["CPUExecutionProvider"]) - def to(self, dev: torch.device): - if self.model is not None: - self.model = self.model.to(dev) return self diff --git a/server/voice_changer/RVC/inferencer/OnnxRVCInferencerNono.py b/server/voice_changer/RVC/inferencer/OnnxRVCInferencerNono.py index 67a673c4..4254b6d2 100644 --- a/server/voice_changer/RVC/inferencer/OnnxRVCInferencerNono.py +++ b/server/voice_changer/RVC/inferencer/OnnxRVCInferencerNono.py @@ -2,13 +2,14 @@ import torch from torch import device import onnxruntime from const import EnumInferenceTypes -from voice_changer.RVC.inferencer.Inferencer import Inferencer import numpy as np +from voice_changer.RVC.inferencer.OnnxRVCInferencer import OnnxRVCInferencer + providers = ["CPUExecutionProvider"] -class OnnxRVCInferenceNono(Inferencer): +class OnnxRVCInferencerNono(OnnxRVCInferencer): def loadModel(self, file: str, dev: device, isHalf: bool = True): super().setProps(EnumInferenceTypes.onnxRVC, file, dev, isHalf) # ort_options = onnxruntime.SessionOptions() @@ -56,16 +57,3 @@ class OnnxRVCInferenceNono(Inferencer): ) return torch.tensor(np.array(audio1)) - - def setHalf(self, isHalf: bool): - raise RuntimeError("half-precision is not changable.", self.isHalf) - - def setDevice(self, dev: device): - self.dev = dev - if self.model is not None: - self.model = self.model.to(self.dev) - - def to(self, dev: torch.device): - if self.model is not None: - self.model = self.model.to(dev) - return self diff --git a/server/voice_changer/RVC/inferencer/RVCInferencer.py b/server/voice_changer/RVC/inferencer/RVCInferencer.py index 7f6f5c9d..91bbf636 100644 --- a/server/voice_changer/RVC/inferencer/RVCInferencer.py +++ b/server/voice_changer/RVC/inferencer/RVCInferencer.py @@ -16,6 +16,8 @@ class RVCInferencer(Inferencer): model.eval() model.load_state_dict(cpt["weight"], strict=False) + + model = model.to(dev) if isHalf: model = model.half() @@ -26,8 +28,8 @@ class RVCInferencer(Inferencer): self, feats: torch.Tensor, pitch_length: torch.Tensor, - pitch: torch.Tensor | None, - pitchf: torch.Tensor | None, + pitch: torch.Tensor, + pitchf: torch.Tensor, sid: torch.Tensor, ) -> torch.Tensor: return self.model.infer(feats, pitch_length, pitch, pitchf, sid) diff --git a/server/voice_changer/RVC/inferencer/RVCInferencerNono.py b/server/voice_changer/RVC/inferencer/RVCInferencerNono.py index 97a9c429..f84d9c3b 100644 --- a/server/voice_changer/RVC/inferencer/RVCInferencerNono.py +++ b/server/voice_changer/RVC/inferencer/RVCInferencerNono.py @@ -16,6 +16,8 @@ class RVCInferencerNono(Inferencer): model.eval() model.load_state_dict(cpt["weight"], strict=False) + + model = model.to(dev) if isHalf: model = model.half() diff --git a/server/voice_changer/RVC/inferencer/WebUIInferencer.py b/server/voice_changer/RVC/inferencer/WebUIInferencer.py index 7945968d..a9d3b0a8 100644 --- a/server/voice_changer/RVC/inferencer/WebUIInferencer.py +++ b/server/voice_changer/RVC/inferencer/WebUIInferencer.py @@ -14,6 +14,8 @@ class WebUIInferencer(Inferencer): model.eval() model.load_state_dict(cpt["weight"], strict=False) + + model = model.to(dev) if isHalf: model = model.half() @@ -24,8 +26,8 @@ class WebUIInferencer(Inferencer): self, feats: torch.Tensor, pitch_length: torch.Tensor, - pitch: torch.Tensor | None, - pitchf: torch.Tensor | None, + pitch: torch.Tensor, + pitchf: torch.Tensor, sid: torch.Tensor, ) -> torch.Tensor: return self.model.infer(feats, pitch_length, pitch, pitchf, sid) diff --git a/server/voice_changer/RVC/inferencer/WebUIInferencerNono.py b/server/voice_changer/RVC/inferencer/WebUIInferencerNono.py index faa4c4c3..c17465cc 100644 --- a/server/voice_changer/RVC/inferencer/WebUIInferencerNono.py +++ b/server/voice_changer/RVC/inferencer/WebUIInferencerNono.py @@ -14,6 +14,8 @@ class WebUIInferencerNono(Inferencer): model.eval() model.load_state_dict(cpt["weight"], strict=False) + + model = model.to(dev) if isHalf: model = model.half() diff --git a/server/voice_changer/VoiceChanger.py b/server/voice_changer/VoiceChanger.py index c82fea65..d00d3dc8 100755 --- a/server/voice_changer/VoiceChanger.py +++ b/server/voice_changer/VoiceChanger.py @@ -13,7 +13,11 @@ from voice_changer.utils.LoadModelParams import LoadModelParams from voice_changer.utils.Timer import Timer from voice_changer.utils.VoiceChangerModel import VoiceChangerModel, AudioInOut -from Exceptions import NoModeLoadedException, ONNXInputArgumentException +from Exceptions import ( + HalfPrecisionChangingException, + NoModeLoadedException, + ONNXInputArgumentException, +) from voice_changer.utils.VoiceChangerParams import VoiceChangerParams providers = [ @@ -341,6 +345,9 @@ class VoiceChanger: except ONNXInputArgumentException as e: print("[Voice Changer] [Exception]", e) return np.zeros(1).astype(np.int16), [0, 0, 0] + except HalfPrecisionChangingException as e: + print("[Voice Changer] Switching model configuration....", e) + return np.zeros(1).astype(np.int16), [0, 0, 0] except Exception as e: print("VC PROCESSING!!!! EXCEPTION!!!", e) print(traceback.format_exc())