WIP: so-vits-svc 40v2, alpha (before refactoring)

This commit is contained in:
wataru 2023-03-13 00:54:49 +09:00
parent 01f64cc8ec
commit ca4d454d1a
5 changed files with 76 additions and 17 deletions

View File

@ -70,6 +70,7 @@ export const ServerSettingKey = {
"noiceScale": "noiceScale",
"predictF0": "predictF0",
"silentThreshold": "silentThreshold",
"processingLength": "processingLength",
"inputSampleRate": "inputSampleRate",
} as const
@ -96,6 +97,7 @@ export type VoiceChangerServerSetting = {
noiceScale: number // so-vits-svc
predictF0: number // so-vits-svc
silentThreshold: number // so-vits-svc
processingLength: number// so-vits-svc
inputSampleRate: InputSampleRate
}
@ -127,6 +129,7 @@ export const DefaultServerSetting_MMVCv15: ServerInfo = {
noiceScale: 0,
predictF0: 0,
silentThreshold: 0,
processingLength: 0,
inputSampleRate: 24000,
@ -157,6 +160,7 @@ export const DefaultServerSetting_MMVCv13: ServerInfo = {
noiceScale: 0,
predictF0: 0,
silentThreshold: 0,
processingLength: 0,
inputSampleRate: 24000,
@ -183,14 +187,15 @@ export const DefaultServerSetting_so_vits_svc_40v2: ServerInfo = {
f0Detector: F0Detector.dio,
recordIO: 0,
tran: 0,
noiceScale: 0,
predictF0: 0,
silentThreshold: 0,
// tran: 10,
// noiceScale: 0.3,
// tran: 0,
// noiceScale: 0,
// predictF0: 0,
// silentThreshold: 0.00001,
// silentThreshold: 0,
tran: 10,
noiceScale: 0.3,
predictF0: 0,
silentThreshold: 0.00001,
processingLength: 1024 * 32,
inputSampleRate: 24000,
@ -254,6 +259,14 @@ export const DefaultWorkletNodeSetting: WorkletNodeSetting = {
downSamplingMode: "average"
}
export const DefaultWorkletNodeSetting_so_vits_svc_40v2: WorkletNodeSetting = {
serverUrl: "",
protocol: "sio",
sendingSampleRate: 24000,
inputChunkNum: 128,
downSamplingMode: "average"
}
///////////////////////
// クライアントセッティング
///////////////////////

View File

@ -1,6 +1,6 @@
import { useState, useMemo, useEffect } from "react"
import { ClientType, DefaultWorkletNodeSetting, INDEXEDDB_KEY_WORKLETNODE, WorkletNodeSetting } from "../const"
import { ClientType, DefaultWorkletNodeSetting, DefaultWorkletNodeSetting_so_vits_svc_40v2, INDEXEDDB_KEY_WORKLETNODE, WorkletNodeSetting } from "../const"
import { VoiceChangerClient } from "../VoiceChangerClient"
import { useIndexedDB } from "./useIndexedDB"
@ -19,7 +19,19 @@ export type WorkletNodeSettingState = {
}
export const useWorkletNodeSetting = (props: UseWorkletNodeSettingProps): WorkletNodeSettingState => {
const [workletNodeSetting, _setWorkletNodeSetting] = useState<WorkletNodeSetting>(DefaultWorkletNodeSetting)
const defaultWorkletNodeSetting = useMemo(() => {
if (props.clientType == "MMVCv13") {
return DefaultWorkletNodeSetting
} else if (props.clientType == "MMVCv15") {
return DefaultWorkletNodeSetting
} else if (props.clientType == "so_vits_svc_40v2c") {
return DefaultWorkletNodeSetting_so_vits_svc_40v2
} else {
return DefaultWorkletNodeSetting
}
}, [])
const [workletNodeSetting, _setWorkletNodeSetting] = useState<WorkletNodeSetting>(defaultWorkletNodeSetting)
const { setItem, getItem, removeItem } = useIndexedDB({ clientType: props.clientType })
// 初期化 その1 DBから取得

File diff suppressed because one or more lines are too long

View File

@ -25,20 +25,48 @@ export const useConvertSetting = (): ConvertSettingState => {
<div className="body-row split-3-2-1-4 left-padding-1 guided">
<div className="body-item-title left-padding-1">Input Chunk Num(128sample/chunk)</div>
<div className="body-input-container">
<input type="number" min={1} max={256} step={1} value={appState.workletNodeSetting.workletNodeSetting.inputChunkNum} onChange={(e) => {
<select className="body-select" value={appState.workletNodeSetting.workletNodeSetting.inputChunkNum} onChange={(e) => {
appState.workletNodeSetting.updateWorkletNodeSetting({ ...appState.workletNodeSetting.workletNodeSetting, inputChunkNum: Number(e.target.value) })
appState.workletNodeSetting.trancateBuffer()
}} />
}}>
{
[32, 64, 96, 128, 160, 192, 256, 384, 512].map(x => {
return <option key={x} value={x}>{x}</option>
})
}
</select>
</div>
<div className="body-item-text">
<div>buff: {(appState.workletNodeSetting.workletNodeSetting.inputChunkNum * 128 * 1000 / 48000).toFixed(1)}ms</div>
</div>
<div className="body-item-text"></div>
</div>
)
}, [appState.workletNodeSetting.workletNodeSetting.inputChunkNum, appState.workletNodeSetting.updateWorkletNodeSetting])
const processingLengthRow = useMemo(() => {
return (
<div className="body-row split-3-2-1-4 left-padding-1 guided">
<div className="body-item-title left-padding-1">Processing Length</div>
<div className="body-input-container">
<select className="body-select" value={appState.serverSetting.serverSetting.processingLength} onChange={(e) => {
appState.serverSetting.updateServerSettings({ ...appState.serverSetting.serverSetting, processingLength: Number(e.target.value) })
appState.workletNodeSetting.trancateBuffer()
}}>
{
[1024 * 4, 1024 * 8, 1024 * 16, 1024 * 32, 1024 * 64, 1024 * 128].map(x => {
return <option key={x} value={x}>{x}</option>
})
}
</select>
</div>
<div className="body-item-text">
</div>
<div className="body-item-text"></div>
</div>
)
}, [appState.serverSetting.serverSetting, appState.serverSetting.updateServerSettings])
const gpuRow = useMemo(() => {
return (
<div className="body-row split-3-7 left-padding-1 guided">
@ -69,12 +97,13 @@ export const useConvertSetting = (): ConvertSettingState => {
<div className="partition-content">
{inputChunkNumRow}
{processingLengthRow}
{gpuRow}
</div>
</div>
</>
)
}, [inputChunkNumRow, gpuRow])
}, [inputChunkNumRow, processingLengthRow, gpuRow])
return {
convertSetting,

View File

@ -36,6 +36,7 @@ class SoVitsSvc40v2Settings():
noiceScale: float = 0.3
predictF0: int = 0 # 0:False, 1:True
silentThreshold: float = 0.00001
processingLength: int = 1024 * 32
framework: str = "PyTorch" # PyTorch or ONNX
pyTorchModelFile: str = ""
@ -43,7 +44,7 @@ class SoVitsSvc40v2Settings():
configFile: str = ""
# ↓mutableな物だけ列挙
intData = ["gpu", "dstId", "tran", "predictF0"]
intData = ["gpu", "dstId", "tran", "predictF0", "processingLength"]
floatData = ["noiceScale", "silentThreshold"]
strData = ["framework", "f0Detector"]
@ -170,7 +171,11 @@ class SoVitsSvc40v2:
else:
self.audio_buffer = newData
self.audio_buffer = self.audio_buffer[-(convertSize):] # 変換対象の部分だけ抽出
# self.audio_buffer = self.audio_buffer[-(convertSize):] # 変換対象の部分だけ抽出
# self.audio_buffer = self.audio_buffer[-1024 * 32:] # 変換対象の部分だけ抽出
# self.audio_buffer = self.audio_buffer[-1024 * 128:] # 変換対象の部分だけ抽出
# self.audio_buffer = self.audio_buffer[(-1 * 1024 * 32) + (-1 * convertSize):] # 変換対象の部分だけ抽出
self.audio_buffer = self.audio_buffer[-1 * self.settings.processingLength + (-1 * convertSize):] # 変換対象の部分だけ抽出
crop = self.audio_buffer[cropRange[0]:cropRange[1]]