MMVC minimum input length

2023-05-26 23:04:56 +09:00 · 2023-05-26 23:04:56 +09:00 · 39cd234268
commit 39cd234268
parent 18481feb7e
3 changed files with 24 additions and 20 deletions
--- a/server/voice_changer/MMVCv13/MMVCv13.py
+++ b/server/voice_changer/MMVCv13/MMVCv13.py
@ -200,8 +200,8 @@ class MMVCv13:

        convertSize = inputSize + crossfadeSize + solaSearchFrame

-        if convertSize < 8192:
-            convertSize = 8192
+        # if convertSize < 8192:
+        #     convertSize = 8192
        if convertSize % self.hps.data.hop_length != 0:  # モデルの出力のホップサイズで切り捨てが発生するので補う。
            convertSize = convertSize + (
                self.hps.data.hop_length - (convertSize % self.hps.data.hop_length)
--- a/server/voice_changer/MMVCv15/MMVCv15.py
+++ b/server/voice_changer/MMVCv15/MMVCv15.py
@ -103,12 +103,14 @@ class MMVCv15:
            requires_grad_dec=self.hps.requires_grad.dec,
        )
        if self.settings.pyTorchModelFile is not None:
+            self.settings.framework = "PyTorch"
            self.net_g.eval()
            load_checkpoint(self.settings.pyTorchModelFile, self.net_g, None)

        # ONNXモデル生成
        self.onxx_input_length = 8192
        if self.settings.onnxModelFile is not None:
+            self.settings.framework = "ONNX"
            providers, options = self.getOnnxExecutionProvider()
            self.onnx_session = onnxruntime.InferenceSession(
                self.settings.onnxModelFile,
--- a/server/voice_changer/VoiceChanger.py
+++ b/server/voice_changer/VoiceChanger.py
@ -134,8 +134,7 @@ class VoiceChanger:
        vc: VoiceChanger = _vc

        currentInputDeviceId = -1
-        currentInputSampleRate = -1
-        model_sampling_rate = -1
+        currentModelSamplingRate = -1
        currentOutputDeviceId = -1
        currentInputChunkNum = -1
        while True:
@ -176,34 +175,43 @@ class VoiceChanger:
                block_frame = currentInputChunkNum * 128

                # sample rate precheck(alsa cannot use 40000?)
-                model_sampling_rate = self.voiceChanger.get_processing_sampling_rate()
+                try:
+                    currentModelSamplingRate = (
+                        self.voiceChanger.get_processing_sampling_rate()
+                    )
+                except Exception as e:
+                    print("[Voice Changer] ex: get_processing_sampling_rate", e)
+                    continue
                try:
                    with sd.Stream(
                        callback=self.audio_callback,
                        blocksize=block_frame,
-                        samplerate=model_sampling_rate,
+                        samplerate=currentModelSamplingRate,
                        dtype="float32",
                        channels=[currentInputChannelNum, currentOutputChannelNum],
                    ):
                        pass
-                    currentInputSampleRate = model_sampling_rate
-                    vc.settings.serverInputAudioSampleRate = model_sampling_rate
-                    print(f"[Voice Changer] sample rate {model_sampling_rate}")
+                    vc.settings.serverInputAudioSampleRate = currentModelSamplingRate
+                    vc.settings.inputSampleRate = currentModelSamplingRate
+                    print(
+                        f"[Voice Changer] sample rate {vc.settings.serverInputAudioSampleRate}"
+                    )
                except Exception as e:
                    print(
-                        "[Voice Changer] ex: fallback to device default samplerate", e
+                        "[Voice Changer] ex: fallback to device default samplerate",
+                        e,
                    )
-                    currentInputSampleRate = serverInputAudioDevice.default_samplerate
                    vc.settings.serverInputAudioSampleRate = (
                        serverInputAudioDevice.default_samplerate
                    )
+                    vc.settings.inputSampleRate = vc.settings.serverInputAudioSampleRate

                # main loop
                try:
                    with sd.Stream(
                        callback=self.audio_callback,
                        blocksize=block_frame,
-                        samplerate=currentInputSampleRate,
+                        samplerate=vc.settings.serverInputAudioSampleRate,
                        dtype="float32",
                        channels=[currentInputChannelNum, currentOutputChannelNum],
                    ):
@ -212,16 +220,10 @@ class VoiceChanger:
                            and currentInputDeviceId == vc.settings.serverInputDeviceId
                            and currentOutputDeviceId
                            == vc.settings.serverOutputDeviceId
-                            and model_sampling_rate
+                            and currentModelSamplingRate
                            == self.voiceChanger.get_processing_sampling_rate()
                            and currentInputChunkNum == vc.settings.serverReadChunkSize
                        ):
-                            vc.settings.serverInputAudioSampleRate = (
-                                self.voiceChanger.get_processing_sampling_rate()
-                            )
-                            vc.settings.inputSampleRate = (
-                                vc.settings.serverInputAudioSampleRate
-                            )
                            time.sleep(2)
                            print(
                                "[Voice Changer] server audio",
@ -232,7 +234,7 @@ class VoiceChanger:
                                vc.settings.serverAudioStated,
                                currentInputDeviceId,
                                currentOutputDeviceId,
-                                currentInputSampleRate,
+                                vc.settings.serverInputAudioSampleRate,
                                currentInputChunkNum,
                            )