sola for mmvcv13

This commit is contained in:
wataru 2023-04-15 04:25:30 +09:00
parent f782ebb320
commit e342d53b76
7 changed files with 1642 additions and 40 deletions

View File

@ -100,7 +100,9 @@
"converterSetting": [
{
"name": "inputChunkNum",
"options": {}
"options": {
"nums": [1, 2, 4, 8, 16, 24, 32, 40, 48, 56, 64]
}
},
{
"name": "gpu",
@ -143,6 +145,10 @@
{
"name": "trancateNumThreshold",
"options": {}
},
{
"name": "solaEnable",
"options": {}
}
]
},

View File

@ -1 +1,10 @@
<!doctype html><html style="width:100%;height:100%;overflow:hidden"><head><meta charset="utf-8"/><title>Voice Changer Client Demo</title><script defer="defer" src="index.js"></script></head><body style="width:100%;height:100%;margin:0"><div id="app" style="width:100%;height:100%"></div></body></html>
<!DOCTYPE html>
<html style="width: 100%; height: 100%; overflow: hidden">
<head>
<meta charset="utf-8" />
<title>Voice Changer Client Demo</title>
<script defer src="index.js"></script></head>
<body style="width: 100%; height: 100%; margin: 0px">
<div id="app" style="width: 100%; height: 100%"></div>
</body>
</html>

File diff suppressed because one or more lines are too long

View File

@ -1,31 +0,0 @@
/*! regenerator-runtime -- Copyright (c) 2014-present, Facebook, Inc. -- license (MIT): https://github.com/facebook/regenerator/blob/main/LICENSE */
/**
* @license React
* react-dom.production.min.js
*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
/**
* @license React
* react.production.min.js
*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
/**
* @license React
* scheduler.production.min.js
*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/

View File

@ -2,10 +2,17 @@ import React, { useMemo } from "react"
import { useAppState } from "../../../001_provider/001_AppStateProvider"
export type InputChunkNumRowProps = {
nums: number[]
}
export const InputChunkNumRow = (_props: InputChunkNumRowProps) => {
export const InputChunkNumRow = (props: InputChunkNumRowProps) => {
const appState = useAppState()
const inputChunkNumRow = useMemo(() => {
let nums: number[]
if (!props.nums) {
nums = [8, 16, 24, 32, 40, 48, 64, 128, 192, 256, 320, 384, 448, 512, 576, 640, 704, 768, 832, 896, 960, 1024, 2048]
} else {
nums = props.nums
}
return (
<div className="body-row split-3-2-1-4 left-padding-1 guided">
<div className="body-item-title left-padding-1">Input Chunk Num(128sample/chunk)</div>
@ -15,7 +22,7 @@ export const InputChunkNumRow = (_props: InputChunkNumRowProps) => {
appState.workletNodeSetting.trancateBuffer()
}}>
{
[8, 16, 24, 32, 40, 48, 64, 128, 192, 256, 320, 384, 448, 512, 576, 640, 704, 768, 832, 896, 960, 1024, 2048].map(x => {
nums.map(x => {
return <option key={x} value={x}>{x}</option>
})
}

View File

@ -130,7 +130,37 @@ class MMVCv13:
spec = torch.squeeze(spec, 0)
return spec
def generate_input(self, newData: any, inputSize: int, crossfadeSize: int):
def generate_input(self, newData: any, inputSize: int, crossfadeSize: int, solaEnabled: bool = False, solaSearchFrame: int = 0):
newData = newData.astype(np.float32) / self.hps.data.max_wav_value
if hasattr(self, "audio_buffer"):
self.audio_buffer = np.concatenate([self.audio_buffer, newData], 0) # 過去のデータに連結
else:
self.audio_buffer = newData
if solaEnabled:
convertSize = inputSize + crossfadeSize + solaSearchFrame
else:
convertSize = inputSize + crossfadeSize
if convertSize < 8192:
convertSize = 8192
if convertSize % self.hps.data.hop_length != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。
convertSize = convertSize + (self.hps.data.hop_length - (convertSize % self.hps.data.hop_length))
self.audio_buffer = self.audio_buffer[-1 * convertSize:] # 変換対象の部分だけ抽出
audio = torch.FloatTensor(self.audio_buffer)
audio_norm = audio.unsqueeze(0) # unsqueeze
spec = self._get_spec(audio_norm)
sid = torch.LongTensor([int(self.settings.srcId)])
data = (self.text_norm, spec, audio_norm, sid)
data = TextAudioSpeakerCollate()([data])
return data
def generate_input_old(self, newData: any, inputSize: int, crossfadeSize: int):
newData = newData.astype(np.float32) / self.hps.data.max_wav_value
if hasattr(self, "audio_buffer"):

View File

@ -208,13 +208,13 @@ class VoiceChanger():
# receivedData: tuple of short
def on_request(self, receivedData: AudioInOut) -> tuple[AudioInOut, list[Union[int, float]]]:
if self.settings.solaEnabled and self.modelType == "RVC":
if self.settings.solaEnabled and (self.modelType == "RVC" or self.modelType == "MMVCv13"):
return self.on_request_sola(receivedData)
else:
return self.on_request_legacy(receivedData)
def on_request_sola(self, receivedData: AudioInOut) -> tuple[AudioInOut, list[Union[int, float]]]:
# print("processing with sola")
print("processing with sola")
processing_sampling_rate = self.voiceChanger.get_processing_sampling_rate()
# 前処理