From 6b9777f3a272f459a1f94ff3eab8cc3912f75e45 Mon Sep 17 00:00:00 2001 From: nadare <1na2da0re3@gmail.com> Date: Wed, 31 May 2023 23:50:43 +0900 Subject: [PATCH] =?UTF-8?q?protect=20+=20crepe=E5=AF=BE=E5=BF=9C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- server/voice_changer/RVC/ModelSlot.py | 1 + .../voice_changer/RVC/ModelSlotGenerator.py | 3 +++ server/voice_changer/RVC/RVC.py | 6 ++++++ server/voice_changer/RVC/RVCSettings.py | 3 ++- server/voice_changer/RVC/SampleDownloader.py | 1 + .../RVC/modelMerger/MergeModelRequest.py | 1 + server/voice_changer/RVC/pipeline/Pipeline.py | 20 +++++++++++++++++-- .../RVC/pitchExtractor/CrepePitchExtractor.py | 7 +++++-- 8 files changed, 37 insertions(+), 5 deletions(-) diff --git a/server/voice_changer/RVC/ModelSlot.py b/server/voice_changer/RVC/ModelSlot.py index b008f943..856652f7 100644 --- a/server/voice_changer/RVC/ModelSlot.py +++ b/server/voice_changer/RVC/ModelSlot.py @@ -12,6 +12,7 @@ class ModelSlot: indexFile: str = "" defaultTune: int = 0 defaultIndexRatio: int = 1 + # defaultProtect: float = .5 isONNX: bool = False modelType: EnumInferenceTypes = EnumInferenceTypes.pyTorchRVC samplingRate: int = -1 diff --git a/server/voice_changer/RVC/ModelSlotGenerator.py b/server/voice_changer/RVC/ModelSlotGenerator.py index 03c0c28b..41f1b43a 100644 --- a/server/voice_changer/RVC/ModelSlotGenerator.py +++ b/server/voice_changer/RVC/ModelSlotGenerator.py @@ -36,6 +36,9 @@ def generateModelSlot(slotDir: str): modelSlot.defaultIndexRatio = ( params["defaultIndexRatio"] if "defaultIndexRatio" in params else 0 ) + # modelSlot.defaultProtect = ( + # params["defaultProtect"] if "defaultProtect" in params else 0.5 + # ) modelSlot.name = params["name"] if "name" in params else None modelSlot.description = params["description"] if "description" in params else None modelSlot.credit = params["credit"] if "credit" in params else None diff --git a/server/voice_changer/RVC/RVC.py b/server/voice_changer/RVC/RVC.py index 09003cc5..1871bd28 100644 --- a/server/voice_changer/RVC/RVC.py +++ b/server/voice_changer/RVC/RVC.py @@ -242,6 +242,7 @@ class RVC: # その他の設定 self.next_trans = modelSlot.defaultTune self.next_index_ratio = modelSlot.defaultIndexRatio + # self.next_protect = modelSlot.defaultProtect self.next_samplingRate = modelSlot.samplingRate self.next_framework = "ONNX" if modelSlot.isONNX else "PyTorch" # self.needSwitch = True @@ -254,6 +255,7 @@ class RVC: self.pipeline = self.next_pipeline self.settings.tran = self.next_trans self.settings.indexRatio = self.next_index_ratio + # self.settings.protect = self.next_protect self.settings.modelSamplingRate = self.next_samplingRate self.settings.framework = self.next_framework @@ -336,6 +338,7 @@ class RVC: sid = 0 f0_up_key = self.settings.tran index_rate = self.settings.indexRatio + protect = .5# self.settings.protect if_f0 = 1 if self.settings.modelSlots[self.currentSlot].f0 else 0 embOutputLayer = self.settings.modelSlots[self.currentSlot].embOutputLayer useFinalProj = self.settings.modelSlots[self.currentSlot].useFinalProj @@ -350,6 +353,7 @@ class RVC: embOutputLayer, useFinalProj, repeat, + protect ) result = audio_out.detach().cpu().numpy() * np.sqrt(vol) @@ -411,6 +415,7 @@ class RVC: params = { "defaultTune": req.defaultTune, "defaultIndexRatio": req.defaultIndexRatio, + # "defaultProtect": req.defaultProtect "sampleId": "", "files": {"rvcModel": storeFile}, } @@ -432,6 +437,7 @@ class RVC: ) params["defaultTune"] = self.settings.tran params["defaultIndexRatio"] = self.settings.indexRatio + # params["defaultProtect"] = self.settings.protect json.dump(params, open(os.path.join(slotDir, "params.json"), "w")) self.loadSlots() diff --git a/server/voice_changer/RVC/RVCSettings.py b/server/voice_changer/RVC/RVCSettings.py index 23153558..c74979a9 100644 --- a/server/voice_changer/RVC/RVCSettings.py +++ b/server/voice_changer/RVC/RVCSettings.py @@ -29,6 +29,7 @@ class RVCSettings: sampleModels: list[RVCModelSample] = field(default_factory=lambda: []) indexRatio: float = 0 + # protect: float = 0.5 rvcQuality: int = 0 silenceFront: int = 1 # 0:off, 1:on modelSamplingRate: int = 48000 @@ -50,5 +51,5 @@ class RVCSettings: "isHalf", "enableDirectML", ] - floatData = ["silentThreshold", "indexRatio"] + floatData = ["silentThreshold", "indexRatio"] # , "protect"] strData = ["framework", "f0Detector"] diff --git a/server/voice_changer/RVC/SampleDownloader.py b/server/voice_changer/RVC/SampleDownloader.py index a0e4daf1..cfc35fab 100644 --- a/server/voice_changer/RVC/SampleDownloader.py +++ b/server/voice_changer/RVC/SampleDownloader.py @@ -81,6 +81,7 @@ def downloadInitialSampleModels(sampleJsons: list[str], model_dir: str): sampleParams["sampleId"] = sample.id sampleParams["defaultTune"] = 0 sampleParams["defaultIndexRatio"] = 1 + # sampleParams["defaultProtect"] = 0.5 sampleParams["credit"] = sample.credit sampleParams["description"] = sample.description sampleParams["name"] = sample.name diff --git a/server/voice_changer/RVC/modelMerger/MergeModelRequest.py b/server/voice_changer/RVC/modelMerger/MergeModelRequest.py index 86eb322d..68cd6a71 100644 --- a/server/voice_changer/RVC/modelMerger/MergeModelRequest.py +++ b/server/voice_changer/RVC/modelMerger/MergeModelRequest.py @@ -17,4 +17,5 @@ class MergeModelRequest: slot: int = -1 defaultTune: int = 0 defaultIndexRatio: int = 1 + # defaultProtect: float = .5 files: List[MergeFile] = field(default_factory=lambda: []) diff --git a/server/voice_changer/RVC/pipeline/Pipeline.py b/server/voice_changer/RVC/pipeline/Pipeline.py index e44d0e6c..d5f17d91 100644 --- a/server/voice_changer/RVC/pipeline/Pipeline.py +++ b/server/voice_changer/RVC/pipeline/Pipeline.py @@ -85,7 +85,9 @@ class Pipeline(object): embOutputLayer, useFinalProj, repeat, + protect=0.5, ): + search_index = self.index is not None and self.big_npy is not None and index_rate != 0 self.t_pad = self.sr * repeat self.t_pad_tgt = self.targetSR * repeat @@ -136,10 +138,12 @@ class Pipeline(object): raise DeviceChangingException() else: raise e + if protect < 0.5 and search_index: + feats0 = feats.clone() # Index - feature抽出 # if self.index is not None and self.feature is not None and index_rate != 0: - if self.index is not None and self.big_npy is not None and index_rate != 0: + if search_index: npy = feats[0].cpu().numpy() if self.isHalf is True: npy = npy.astype("float32") @@ -165,7 +169,10 @@ class Pipeline(object): + (1 - index_rate) * feats ) feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1) - + if protect < 0.5 and search_index: + feats0 = F.interpolate(feats0.permute(0, 2, 1), scale_factor=2).permute( + 0, 2, 1 + ) # ピッチサイズ調整 p_len = audio_pad.shape[0] // self.window if feats.shape[1] < p_len: @@ -173,6 +180,15 @@ class Pipeline(object): if pitch is not None and pitchf is not None: pitch = pitch[:, :p_len] pitchf = pitchf[:, :p_len] + + # pitchの推定が上手くいかない(pitchf=0)場合、検索前の特徴を混ぜる + if protect < 0.5 and search_index: + pitchff = pitchf.clone() + pitchff[pitchf > 0] = 1 + pitchff[pitchf < 1] = protect + pitchff = pitchff.unsqueeze(-1) + feats = feats * pitchff + feats0 * (1 - pitchff) + feats = feats.to(feats0.dtype) p_len = torch.tensor([p_len], device=self.device).long() # 推論実行 diff --git a/server/voice_changer/RVC/pitchExtractor/CrepePitchExtractor.py b/server/voice_changer/RVC/pitchExtractor/CrepePitchExtractor.py index 493ef945..bea0f32e 100644 --- a/server/voice_changer/RVC/pitchExtractor/CrepePitchExtractor.py +++ b/server/voice_changer/RVC/pitchExtractor/CrepePitchExtractor.py @@ -26,7 +26,7 @@ class CrepePitchExtractor(PitchExtractor): f0_mel_min = 1127 * np.log(1 + f0_min / 700) f0_mel_max = 1127 * np.log(1 + f0_max / 700) - f0 = torchcrepe.predict( + f0, pd = torchcrepe.predict( audio.unsqueeze(0), sr, hop_length=window, @@ -37,8 +37,11 @@ class CrepePitchExtractor(PitchExtractor): batch_size=256, decoder=torchcrepe.decode.weighted_argmax, device=self.device, + return_periodicity=True, ) - f0 = torchcrepe.filter.median(f0, 3) + f0 = torchcrepe.filter.median(f0, 3) # 本家だとmeanですが、harvestに合わせmedianフィルタ + pd = torchcrepe.filter.median(pd, 3) + f0[pd < 0.1] = 0 f0 = f0.squeeze() f0 = torch.nn.functional.pad(