sola for mmvcv13

2023-04-15 04:25:30 +09:00 · 2023-04-15 04:25:30 +09:00 · e342d53b76
commit e342d53b76
parent f782ebb320
7 changed files with 1642 additions and 40 deletions
--- a/client/demo/dist/assets/gui_settings/MMVCv13.json
+++ b/client/demo/dist/assets/gui_settings/MMVCv13.json
@ -100,7 +100,9 @@
        "converterSetting": [
            {
                "name": "inputChunkNum",
-                "options": {}
+                "options": {
+                    "nums": [1, 2, 4, 8, 16, 24, 32, 40, 48, 56, 64]
+                }
            },
            {
                "name": "gpu",
@ -143,6 +145,10 @@
            {
                "name": "trancateNumThreshold",
                "options": {}
+            },
+            {
+                "name": "solaEnable",
+                "options": {}
            }
        ]
    },
--- a/client/demo/dist/index.html
+++ b/client/demo/dist/index.html
@ -1 +1,10 @@
-<!doctype html><html style="width:100%;height:100%;overflow:hidden"><head><meta charset="utf-8"/><title>Voice Changer Client Demo</title><script defer="defer" src="index.js"></script></head><body style="width:100%;height:100%;margin:0"><div id="app" style="width:100%;height:100%"></div></body></html>
+<!DOCTYPE html>
+<html style="width: 100%; height: 100%; overflow: hidden">
+    <head>
+        <meta charset="utf-8" />
+        <title>Voice Changer Client Demo</title>
+    <script defer src="index.js"></script></head>
+    <body style="width: 100%; height: 100%; margin: 0px">
+        <div id="app" style="width: 100%; height: 100%"></div>
+    </body>
+</html>
--- a/client/demo/dist/index.js
+++ b/client/demo/dist/index.js
--- a/client/demo/dist/index.js.LICENSE.txt
+++ b/client/demo/dist/index.js.LICENSE.txt
@ -1,31 +0,0 @@
-/*! regenerator-runtime -- Copyright (c) 2014-present, Facebook, Inc. -- license (MIT): https://github.com/facebook/regenerator/blob/main/LICENSE */
-
-/**
- * @license React
- * react-dom.production.min.js
- *
- * Copyright (c) Facebook, Inc. and its affiliates.
- *
- * This source code is licensed under the MIT license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-/**
- * @license React
- * react.production.min.js
- *
- * Copyright (c) Facebook, Inc. and its affiliates.
- *
- * This source code is licensed under the MIT license found in the
- * LICENSE file in the root directory of this source tree.
- */
-
-/**
- * @license React
- * scheduler.production.min.js
- *
- * Copyright (c) Facebook, Inc. and its affiliates.
- *
- * This source code is licensed under the MIT license found in the
- * LICENSE file in the root directory of this source tree.
- */
--- a/client/demo/src/components/demo/components/701_InputChunkNumRow.tsx
+++ b/client/demo/src/components/demo/components/701_InputChunkNumRow.tsx
@ -2,10 +2,17 @@ import React, { useMemo } from "react"
 import { useAppState } from "../../../001_provider/001_AppStateProvider"

 export type InputChunkNumRowProps = {
+    nums: number[]
 }
-export const InputChunkNumRow = (_props: InputChunkNumRowProps) => {
+export const InputChunkNumRow = (props: InputChunkNumRowProps) => {
    const appState = useAppState()
    const inputChunkNumRow = useMemo(() => {
+        let nums: number[]
+        if (!props.nums) {
+            nums = [8, 16, 24, 32, 40, 48, 64, 128, 192, 256, 320, 384, 448, 512, 576, 640, 704, 768, 832, 896, 960, 1024, 2048]
+        } else {
+            nums = props.nums
+        }
        return (
            <div className="body-row split-3-2-1-4 left-padding-1 guided">
                <div className="body-item-title left-padding-1">Input Chunk Num(128sample/chunk)</div>
@ -15,7 +22,7 @@ export const InputChunkNumRow = (_props: InputChunkNumRowProps) => {
                        appState.workletNodeSetting.trancateBuffer()
                    }}>
                        {
-                            [8, 16, 24, 32, 40, 48, 64, 128, 192, 256, 320, 384, 448, 512, 576, 640, 704, 768, 832, 896, 960, 1024, 2048].map(x => {
+                            nums.map(x => {
                                return <option key={x} value={x}>{x}</option>
                            })
                        }
--- a/server/voice_changer/MMVCv13/MMVCv13.py
+++ b/server/voice_changer/MMVCv13/MMVCv13.py
@ -130,7 +130,37 @@ class MMVCv13:
        spec = torch.squeeze(spec, 0)
        return spec

-    def generate_input(self, newData: any, inputSize: int, crossfadeSize: int):
+    def generate_input(self, newData: any, inputSize: int, crossfadeSize: int, solaEnabled: bool = False, solaSearchFrame: int = 0):
+        newData = newData.astype(np.float32) / self.hps.data.max_wav_value
+
+        if hasattr(self, "audio_buffer"):
+            self.audio_buffer = np.concatenate([self.audio_buffer, newData], 0)  # 過去のデータに連結
+        else:
+            self.audio_buffer = newData
+
+        if solaEnabled:
+            convertSize = inputSize + crossfadeSize + solaSearchFrame
+        else:
+            convertSize = inputSize + crossfadeSize
+
+        if convertSize < 8192:
+            convertSize = 8192
+        if convertSize % self.hps.data.hop_length != 0:  # モデルの出力のホップサイズで切り捨てが発生するので補う。
+            convertSize = convertSize + (self.hps.data.hop_length - (convertSize % self.hps.data.hop_length))
+
+        self.audio_buffer = self.audio_buffer[-1 * convertSize:]  # 変換対象の部分だけ抽出
+
+        audio = torch.FloatTensor(self.audio_buffer)
+        audio_norm = audio.unsqueeze(0)  # unsqueeze
+        spec = self._get_spec(audio_norm)
+        sid = torch.LongTensor([int(self.settings.srcId)])
+
+        data = (self.text_norm, spec, audio_norm, sid)
+        data = TextAudioSpeakerCollate()([data])
+
+        return data
+
+    def generate_input_old(self, newData: any, inputSize: int, crossfadeSize: int):
        newData = newData.astype(np.float32) / self.hps.data.max_wav_value

        if hasattr(self, "audio_buffer"):
--- a/server/voice_changer/VoiceChanger.py
+++ b/server/voice_changer/VoiceChanger.py
@ -208,13 +208,13 @@ class VoiceChanger():

    #  receivedData: tuple of short
    def on_request(self, receivedData: AudioInOut) -> tuple[AudioInOut, list[Union[int, float]]]:
-        if self.settings.solaEnabled and self.modelType == "RVC":
+        if self.settings.solaEnabled and (self.modelType == "RVC" or self.modelType == "MMVCv13"):
            return self.on_request_sola(receivedData)
        else:
            return self.on_request_legacy(receivedData)

    def on_request_sola(self, receivedData: AudioInOut) -> tuple[AudioInOut, list[Union[int, float]]]:
-        # print("processing with sola")
+        print("processing with sola")
        processing_sampling_rate = self.voiceChanger.get_processing_sampling_rate()

        # 前処理