From 37468e3cc9f0ea8cad0304134e62369b567fbeb2 Mon Sep 17 00:00:00 2001 From: w-okada Date: Mon, 17 Jul 2023 07:41:32 +0900 Subject: [PATCH] WIP: Diffusion svc refining: auto speaker id detect --- server/data/ModelSlot.py | 2 ++ .../DiffusionSVC/DiffusionSVCModelSlotGenerator.py | 6 ++++-- .../DiffusionSVC/inferencer/DiffusionSVCInferencer.py | 4 ++-- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/server/data/ModelSlot.py b/server/data/ModelSlot.py index 609f34ad..5c989b5d 100644 --- a/server/data/ModelSlot.py +++ b/server/data/ModelSlot.py @@ -115,6 +115,8 @@ class DiffusionSVCModelSlot(ModelSlot): defaultKstep: int = 20 defaultSpeedup: int = 10 kStepMax: int = 100 + nLayers: int = 20 + nnLayers: int = 20 speakers: dict = field(default_factory=lambda: {1: "user"}) embedder: EmbedderType = "hubert_base" samplingRate: int = 44100 diff --git a/server/voice_changer/DiffusionSVC/DiffusionSVCModelSlotGenerator.py b/server/voice_changer/DiffusionSVC/DiffusionSVCModelSlotGenerator.py index 86f1ca22..a6bdfc88 100644 --- a/server/voice_changer/DiffusionSVC/DiffusionSVCModelSlotGenerator.py +++ b/server/voice_changer/DiffusionSVC/DiffusionSVCModelSlotGenerator.py @@ -33,8 +33,10 @@ class DiffusionSVCModelSlotGenerator(ModelSlotGenerator): def _setInfoByPytorch(cls, slot: DiffusionSVCModelSlot): diff_model, diff_args, naive_model, naive_args, vocoder = load_model_vocoder_from_combo(slot.modelFile, device="cpu") slot.kStepMax = diff_args.model.k_step_max - slot.n_layers = diff_args.model.n_layers - + slot.nLayers = diff_args.model.n_layers + slot.nnLayers = naive_args.model.n_layers + diff_args.model.n_spk + slot.speakers = {(x+1): f"user{x+1}" for x in range(diff_args.model.n_spk)} return slot @classmethod diff --git a/server/voice_changer/DiffusionSVC/inferencer/DiffusionSVCInferencer.py b/server/voice_changer/DiffusionSVC/inferencer/DiffusionSVCInferencer.py index eeb3e43b..eea196c4 100644 --- a/server/voice_changer/DiffusionSVC/inferencer/DiffusionSVCInferencer.py +++ b/server/voice_changer/DiffusionSVC/inferencer/DiffusionSVCInferencer.py @@ -40,7 +40,7 @@ class DiffusionSVCInferencer(Inferencer): # self.model = model return self - + def getConfig(self) -> tuple[int, int]: model_sampling_rate = int(self.diff_args.data.sampling_rate) model_block_size = int(self.diff_args.data.block_size) @@ -128,5 +128,5 @@ class DiffusionSVCInferencer(Inferencer): out_wav *= mask # print("[ ----Timer::3: ]", t.secs, start_frame, out_mel.shape) - + return out_wav.squeeze()