WIP: Diffusion svc refining: auto speaker id detect

This commit is contained in:
w-okada 2023-07-17 07:41:32 +09:00
parent b9429c7655
commit 37468e3cc9
3 changed files with 8 additions and 4 deletions

View File

@ -115,6 +115,8 @@ class DiffusionSVCModelSlot(ModelSlot):
defaultKstep: int = 20
defaultSpeedup: int = 10
kStepMax: int = 100
nLayers: int = 20
nnLayers: int = 20
speakers: dict = field(default_factory=lambda: {1: "user"})
embedder: EmbedderType = "hubert_base"
samplingRate: int = 44100

View File

@ -33,8 +33,10 @@ class DiffusionSVCModelSlotGenerator(ModelSlotGenerator):
def _setInfoByPytorch(cls, slot: DiffusionSVCModelSlot):
diff_model, diff_args, naive_model, naive_args, vocoder = load_model_vocoder_from_combo(slot.modelFile, device="cpu")
slot.kStepMax = diff_args.model.k_step_max
slot.n_layers = diff_args.model.n_layers
slot.nLayers = diff_args.model.n_layers
slot.nnLayers = naive_args.model.n_layers
diff_args.model.n_spk
slot.speakers = {(x+1): f"user{x+1}" for x in range(diff_args.model.n_spk)}
return slot
@classmethod

View File

@ -40,7 +40,7 @@ class DiffusionSVCInferencer(Inferencer):
# self.model = model
return self
def getConfig(self) -> tuple[int, int]:
model_sampling_rate = int(self.diff_args.data.sampling_rate)
model_block_size = int(self.diff_args.data.block_size)
@ -128,5 +128,5 @@ class DiffusionSVCInferencer(Inferencer):
out_wav *= mask
# print("[ ----Timer::3: ]", t.secs, start_frame, out_mel.shape)
return out_wav.squeeze()