WIP: support DDSP_SVC

2023-03-24 16:19:01 +09:00 · 2023-03-24 16:19:01 +09:00 · 8a4e76795a
commit 8a4e76795a
parent 807120201f
2 changed files with 3 additions and 6 deletions
--- a/server/voice_changer/DDSP_SVC/DDSP_SVC.py
+++ b/server/voice_changer/DDSP_SVC/DDSP_SVC.py
@ -24,7 +24,7 @@ from slicer import Slicer
 import librosa
 providers = ['OpenVINOExecutionProvider', "CUDAExecutionProvider", "DmlExecutionProvider", "CPUExecutionProvider"]

-
+import resampy
 from scipy.io import wavfile
 SAMPLING_RATE = 44100

@ -234,7 +234,7 @@ class DDSP_SVC:
            return np.zeros(convertSize).astype(np.int16)

        with torch.no_grad():
-            spk_id = torch.LongTensor(np.array([[int(2)]]))
+            spk_id = torch.LongTensor(np.array([[int(1)]]))
            seg_output, _, (s_h, s_n) = self.model(c, f0, volume, spk_id=spk_id, spk_mix_dict=None)
            seg_output *= mask

@ -245,7 +245,6 @@ class DDSP_SVC:
                self.args.data.block_size,
                adaptive_key=float(0))
            result = seg_output.squeeze().cpu().numpy() * 32768.0
-
        return np.array(result).astype(np.int16)

    def inference(self, data):
--- a/server/voice_changer/VoiceChanger.py
+++ b/server/voice_changer/VoiceChanger.py
@ -244,12 +244,10 @@ class VoiceChanger():
                self.ioRecorder.writeOutput(outputData.tobytes())

            # if receivedData.shape[0] != outputData.shape[0]:
+            #     print(f"Padding, in:{receivedData.shape[0]} out:{outputData.shape[0]}")
            #     outputData = pad_array(outputData, receivedData.shape[0])
            #     # print_convert_processing(
            #     #     f" Padded!, Output data size of {result.shape[0]}/{processing_sampling_rate}hz {outputData.shape[0]}/{self.settings.inputSampleRate}hz")
-            #     print(
-            #         f" Padded!, Output data size of {result.shape[0]}/{processing_sampling_rate}hz {outputData.shape[0]}/{self.settings.inputSampleRate}hz")
-
        postprocess_time = t.secs

        print_convert_processing(f" [fin] Input/Output size:{receivedData.shape[0]},{outputData.shape[0]}")