WIP:DDSP-SVC
This commit is contained in:
parent
84ca7974e9
commit
eb37febce8
186
client/demo/dist/assets/gui_settings/DDSP-SVC.json
vendored
Normal file
186
client/demo/dist/assets/gui_settings/DDSP-SVC.json
vendored
Normal file
@ -0,0 +1,186 @@
|
||||
{
|
||||
"type": "demo",
|
||||
"id": "DDSP-SVC",
|
||||
"front": {
|
||||
"title": [
|
||||
{
|
||||
"name": "title",
|
||||
"options": {
|
||||
"mainTitle": "Realtime Voice Changer Client",
|
||||
"subTitle": "for DDSP-SVC",
|
||||
"lineNum": 1
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "clearSetting",
|
||||
"options": {}
|
||||
}
|
||||
],
|
||||
"serverControl": [
|
||||
{
|
||||
"name": "startButton",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"name": "performance",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"name": "serverInfo",
|
||||
"options": {}
|
||||
}
|
||||
],
|
||||
"modelSetting": [
|
||||
{
|
||||
"name": "modelUploader",
|
||||
"options": {
|
||||
"showConfig": true,
|
||||
"showOnnx": false,
|
||||
"showPyTorch": true,
|
||||
"showCorrespondence": false,
|
||||
"showPyTorchCluster": false,
|
||||
|
||||
"showFeature": false,
|
||||
"showIndex": false,
|
||||
"showHalfPrecision": false,
|
||||
"defaultEnablePyTorch": true,
|
||||
|
||||
"showOnnxExportButton": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "framework",
|
||||
"options": {
|
||||
"showFramework": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "modelSamplingRate",
|
||||
"options": {}
|
||||
}
|
||||
],
|
||||
"deviceSetting": [
|
||||
{
|
||||
"name": "audioInput",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"name": "audioOutput",
|
||||
"options": {}
|
||||
}
|
||||
],
|
||||
"qualityControl": [
|
||||
{
|
||||
"name": "noiseControl",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"name": "gainControl",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"name": "f0Detector",
|
||||
"options": {
|
||||
"detectors": ["dio", "parselmouth", "harvest", "crepe"]
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "divider",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"name": "analyzer",
|
||||
"options": {}
|
||||
}
|
||||
],
|
||||
"speakerSetting": [
|
||||
{
|
||||
"name": "dstId",
|
||||
"options": {
|
||||
"showF0": true,
|
||||
"useServerInfo": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "tune",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"name": "indexRatio",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"name": "silentThreshold",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"name": "enableEnhancer",
|
||||
"options": {}
|
||||
}
|
||||
],
|
||||
"converterSetting": [
|
||||
{
|
||||
"name": "inputChunkNum",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"name": "extraDataLength",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"name": "gpu",
|
||||
"options": {}
|
||||
}
|
||||
],
|
||||
"advancedSetting": [
|
||||
{
|
||||
"name": "serverURL",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"name": "protocol",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"name": "sampleRate",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"name": "sendingSampleRate",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"name": "crossFadeOverlapSize",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"name": "crossFadeOffsetRate",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"name": "crossFadeEndRate",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"name": "downSamplingMode",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"name": "trancateNumThreshold",
|
||||
"options": {}
|
||||
}
|
||||
]
|
||||
},
|
||||
|
||||
"dialogs": {
|
||||
"license": [
|
||||
{
|
||||
"title": "Retrieval-based-Voice-Conversion-WebUI",
|
||||
"auther": "liujing04",
|
||||
"contact": "",
|
||||
"url": "https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI",
|
||||
"license": "MIT"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -80,7 +80,9 @@
|
||||
},
|
||||
{
|
||||
"name": "f0Detector",
|
||||
"options": {}
|
||||
"options": {
|
||||
"detectors": ["pm", "harvest"]
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "divider",
|
||||
|
21
client/demo/dist/index.js
vendored
21
client/demo/dist/index.js
vendored
File diff suppressed because one or more lines are too long
186
client/demo/public/assets/gui_settings/DDSP-SVC.json
Normal file
186
client/demo/public/assets/gui_settings/DDSP-SVC.json
Normal file
@ -0,0 +1,186 @@
|
||||
{
|
||||
"type": "demo",
|
||||
"id": "DDSP-SVC",
|
||||
"front": {
|
||||
"title": [
|
||||
{
|
||||
"name": "title",
|
||||
"options": {
|
||||
"mainTitle": "Realtime Voice Changer Client",
|
||||
"subTitle": "for DDSP-SVC",
|
||||
"lineNum": 1
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "clearSetting",
|
||||
"options": {}
|
||||
}
|
||||
],
|
||||
"serverControl": [
|
||||
{
|
||||
"name": "startButton",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"name": "performance",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"name": "serverInfo",
|
||||
"options": {}
|
||||
}
|
||||
],
|
||||
"modelSetting": [
|
||||
{
|
||||
"name": "modelUploader",
|
||||
"options": {
|
||||
"showConfig": true,
|
||||
"showOnnx": false,
|
||||
"showPyTorch": true,
|
||||
"showCorrespondence": false,
|
||||
"showPyTorchCluster": false,
|
||||
|
||||
"showFeature": false,
|
||||
"showIndex": false,
|
||||
"showHalfPrecision": false,
|
||||
"defaultEnablePyTorch": true,
|
||||
|
||||
"showOnnxExportButton": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "framework",
|
||||
"options": {
|
||||
"showFramework": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "modelSamplingRate",
|
||||
"options": {}
|
||||
}
|
||||
],
|
||||
"deviceSetting": [
|
||||
{
|
||||
"name": "audioInput",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"name": "audioOutput",
|
||||
"options": {}
|
||||
}
|
||||
],
|
||||
"qualityControl": [
|
||||
{
|
||||
"name": "noiseControl",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"name": "gainControl",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"name": "f0Detector",
|
||||
"options": {
|
||||
"detectors": ["dio", "parselmouth", "harvest", "crepe"]
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "divider",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"name": "analyzer",
|
||||
"options": {}
|
||||
}
|
||||
],
|
||||
"speakerSetting": [
|
||||
{
|
||||
"name": "dstId",
|
||||
"options": {
|
||||
"showF0": true,
|
||||
"useServerInfo": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "tune",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"name": "indexRatio",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"name": "silentThreshold",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"name": "enableEnhancer",
|
||||
"options": {}
|
||||
}
|
||||
],
|
||||
"converterSetting": [
|
||||
{
|
||||
"name": "inputChunkNum",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"name": "extraDataLength",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"name": "gpu",
|
||||
"options": {}
|
||||
}
|
||||
],
|
||||
"advancedSetting": [
|
||||
{
|
||||
"name": "serverURL",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"name": "protocol",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"name": "sampleRate",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"name": "sendingSampleRate",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"name": "crossFadeOverlapSize",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"name": "crossFadeOffsetRate",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"name": "crossFadeEndRate",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"name": "downSamplingMode",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"name": "trancateNumThreshold",
|
||||
"options": {}
|
||||
}
|
||||
]
|
||||
},
|
||||
|
||||
"dialogs": {
|
||||
"license": [
|
||||
{
|
||||
"title": "Retrieval-based-Voice-Conversion-WebUI",
|
||||
"auther": "liujing04",
|
||||
"contact": "",
|
||||
"url": "https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI",
|
||||
"license": "MIT"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
@ -80,7 +80,9 @@
|
||||
},
|
||||
{
|
||||
"name": "f0Detector",
|
||||
"options": {}
|
||||
"options": {
|
||||
"detectors": ["pm", "harvest"]
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "divider",
|
||||
|
@ -32,6 +32,7 @@ export const ClientSelector = () => {
|
||||
<div></div>
|
||||
<div className="body-button-container">
|
||||
<div className="body-button w40 bold" onClick={() => { setClientType("RVC"); setItem(INDEXEDDB_KEY_DEFAULT_MODEL_TYPE, "RVC") }}>RVC</div>
|
||||
<div className="body-button w40 bold" onClick={() => { setClientType("DDSP-SVC"); setItem(INDEXEDDB_KEY_DEFAULT_MODEL_TYPE, "DDSP-SVC") }}>DDSP-SVC(N/A)</div>
|
||||
</div>
|
||||
<div></div>
|
||||
</div>
|
||||
|
@ -38,6 +38,7 @@ import { RVCQualityRow, RVCQualityRowProps } from "./components/810_RVCQualityRo
|
||||
import { ModelSamplingRateRow, ModelSamplingRateRowProps } from "./components/303_ModelSamplingRateRow"
|
||||
// import { OnnxExportRow, OnnxExportRowProps } from "./components/304_OnnxExportRow"
|
||||
import { SolaEnableRow, SolaEnableRowProps } from "./components/811_SolaEnableRow"
|
||||
import { EnableEnhancerRow, EnableEnhancerRowProps } from "./components/610_EnableEnhancerRow"
|
||||
|
||||
export const catalog: { [key: string]: (props: any) => JSX.Element } = {}
|
||||
|
||||
@ -85,6 +86,7 @@ const initialize = () => {
|
||||
addToCatalog("noiseScale", (props: NoiseScaleRowProps) => { return <NoiseScaleRow {...props} /> })
|
||||
addToCatalog("silentThreshold", (props: SilentThresholdRowProps) => { return <SilentThresholdRow {...props} /> })
|
||||
addToCatalog("indexRatio", (props: IndexRatioRowProps) => { return <IndexRatioRow {...props} /> })
|
||||
addToCatalog("enableEnhancer", (props: EnableEnhancerRowProps) => { return <EnableEnhancerRow {...props} /> })
|
||||
|
||||
|
||||
|
||||
|
@ -9,7 +9,7 @@ export const ConfigSelectRow = () => {
|
||||
const configFilenameText = appState.serverSetting.fileUploadSetting.configFile?.filename || appState.serverSetting.fileUploadSetting.configFile?.file?.name || ""
|
||||
const onConfigFileLoadClicked = async () => {
|
||||
const file = await fileSelector("")
|
||||
if (file.name.endsWith(".json") == false) {
|
||||
if (file.name.endsWith(".json") == false && file.name.endsWith(".yaml") == false) {
|
||||
alert("モデルファイルの拡張子はjsonである必要があります。")
|
||||
return
|
||||
}
|
||||
|
@ -4,12 +4,14 @@ import { F0Detector } from "@dannadori/voice-changer-client-js";
|
||||
|
||||
|
||||
export type F0DetectorRowProps = {
|
||||
detectors: string[]
|
||||
}
|
||||
export const F0DetectorRow = (_props: F0DetectorRowProps) => {
|
||||
export const F0DetectorRow = (props: F0DetectorRowProps) => {
|
||||
const appState = useAppState()
|
||||
|
||||
const f0DetectorRow = useMemo(() => {
|
||||
const desc = { "harvest": "harvest(High Quality)", "dio": "dio/pm(Light Weight)" }
|
||||
const detectors = props.detectors || ["dio", "harvest"]
|
||||
|
||||
return (
|
||||
<div className="body-row split-3-7 left-padding-1 guided">
|
||||
@ -19,9 +21,9 @@ export const F0DetectorRow = (_props: F0DetectorRowProps) => {
|
||||
appState.serverSetting.updateServerSettings({ ...appState.serverSetting.serverSetting, f0Detector: e.target.value as F0Detector })
|
||||
}}>
|
||||
{
|
||||
Object.values(F0Detector).map(x => {
|
||||
Object.values(detectors).map(x => {
|
||||
//@ts-ignore
|
||||
return <option key={x} value={x}>{desc[x]}</option>
|
||||
return <option key={x} value={x}>{x}</option>
|
||||
})
|
||||
}
|
||||
</select>
|
||||
|
@ -0,0 +1,45 @@
|
||||
import React, { useMemo } from "react"
|
||||
import { useAppState } from "../../../001_provider/001_AppStateProvider"
|
||||
|
||||
export type EnableEnhancerRowProps = {
|
||||
}
|
||||
|
||||
export const EnableEnhancerRow = (_props: EnableEnhancerRowProps) => {
|
||||
const appState = useAppState()
|
||||
|
||||
const clusterRatioRow = useMemo(() => {
|
||||
return (
|
||||
<>
|
||||
<div className="body-row split-3-7 left-padding-1 guided">
|
||||
<div className="body-item-title left-padding-1 ">Enhancer</div>
|
||||
<div className="body-input-container">
|
||||
<select value={appState.serverSetting.serverSetting.enableEnhancer} onChange={(e) => {
|
||||
appState.serverSetting.updateServerSettings({ ...appState.serverSetting.serverSetting, enableEnhancer: Number(e.target.value) })
|
||||
}}>
|
||||
<option value="0" >disable</option>
|
||||
<option value="1" >enable</option>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
<div className="body-row split-3-3-4 left-padding-1 guided">
|
||||
<div className="body-item-title left-padding-1 ">Enhancer Tune</div>
|
||||
<div>
|
||||
<input type="range" className="body-item-input-slider" min="0" max="10" step="1" value={appState.serverSetting.serverSetting.enhancerTune || 0} onChange={(e) => {
|
||||
appState.serverSetting.updateServerSettings({ ...appState.serverSetting.serverSetting, enhancerTune: Number(e.target.value) })
|
||||
}}></input>
|
||||
<span className="body-item-input-slider-val">{appState.serverSetting.serverSetting.enhancerTune}</span>
|
||||
</div>
|
||||
<div className="body-button-container">
|
||||
</div>
|
||||
</div>
|
||||
</>
|
||||
)
|
||||
}, [
|
||||
appState.serverSetting.serverSetting,
|
||||
appState.serverSetting.updateServerSettings
|
||||
])
|
||||
|
||||
return clusterRatioRow
|
||||
}
|
||||
|
||||
|
@ -10,6 +10,7 @@ export const ClientType = {
|
||||
"so-vits-svc-40": "so-vits-svc-40",
|
||||
"so-vits-svc-40_c": "so-vits-svc-40_c",
|
||||
"so-vits-svc-40v2": "so-vits-svc-40v2",
|
||||
"DDSP-SVC": "DDSP-SVC",
|
||||
"RVC": "RVC"
|
||||
|
||||
} as const
|
||||
@ -58,6 +59,8 @@ export type Framework = typeof Framework[keyof typeof Framework]
|
||||
export const F0Detector = {
|
||||
"dio": "dio",
|
||||
"harvest": "harvest",
|
||||
"parselmouth": "parselmouth",
|
||||
"crepe": "crepe",
|
||||
} as const
|
||||
export type F0Detector = typeof F0Detector[keyof typeof F0Detector]
|
||||
|
||||
@ -91,6 +94,8 @@ export const ServerSettingKey = {
|
||||
"rvcQuality": "rvcQuality",
|
||||
"modelSamplingRate": "modelSamplingRate",
|
||||
|
||||
"enableEnhancer": "enableEnhancer",
|
||||
"enhancerTune": "enhancerTune",
|
||||
|
||||
"inputSampleRate": "inputSampleRate",
|
||||
} as const
|
||||
@ -125,6 +130,9 @@ export type VoiceChangerServerSetting = {
|
||||
rvcQuality: number // 0:low, 1:high
|
||||
modelSamplingRate: ModelSamplingRate // 32000,40000,48000
|
||||
|
||||
enableEnhancer: number // DDSP-SVC
|
||||
enhancerTune: number // DDSP-SVC
|
||||
|
||||
inputSampleRate: InputSampleRate
|
||||
}
|
||||
|
||||
@ -166,6 +174,9 @@ export const DefaultServerSetting_MMVCv15: ServerInfo = {
|
||||
rvcQuality: 0,
|
||||
modelSamplingRate: 48000,
|
||||
|
||||
enableEnhancer: 0,
|
||||
enhancerTune: 0,
|
||||
|
||||
inputSampleRate: 24000,
|
||||
|
||||
//
|
||||
@ -203,6 +214,8 @@ export const DefaultServerSetting_MMVCv13: ServerInfo = {
|
||||
rvcQuality: 0,
|
||||
modelSamplingRate: 48000,
|
||||
|
||||
enableEnhancer: 0,
|
||||
enhancerTune: 0,
|
||||
|
||||
inputSampleRate: 24000,
|
||||
|
||||
@ -245,6 +258,8 @@ export const DefaultServerSetting_so_vits_svc_40: ServerInfo = {
|
||||
rvcQuality: 0,
|
||||
modelSamplingRate: 48000,
|
||||
|
||||
enableEnhancer: 0,
|
||||
enhancerTune: 0,
|
||||
|
||||
inputSampleRate: 24000,
|
||||
|
||||
@ -287,6 +302,8 @@ export const DefaultServerSetting_so_vits_svc_40_c: ServerInfo = {
|
||||
rvcQuality: 0,
|
||||
modelSamplingRate: 48000,
|
||||
|
||||
enableEnhancer: 0,
|
||||
enhancerTune: 0,
|
||||
|
||||
inputSampleRate: 24000,
|
||||
|
||||
@ -328,6 +345,52 @@ export const DefaultServerSetting_so_vits_svc_40v2: ServerInfo = {
|
||||
rvcQuality: 0,
|
||||
modelSamplingRate: 48000,
|
||||
|
||||
enableEnhancer: 0,
|
||||
enhancerTune: 0,
|
||||
|
||||
inputSampleRate: 24000,
|
||||
|
||||
//
|
||||
status: "ok",
|
||||
configFile: "",
|
||||
pyTorchModelFile: "",
|
||||
onnxModelFile: "",
|
||||
onnxExecutionProviders: []
|
||||
}
|
||||
|
||||
export const DefaultServerSetting_DDSP_SVC: ServerInfo = {
|
||||
srcId: 0,
|
||||
dstId: 0,
|
||||
gpu: 0,
|
||||
|
||||
crossFadeOffsetRate: 0.0,
|
||||
crossFadeEndRate: 1.0,
|
||||
crossFadeOverlapSize: CrossFadeOverlapSize[1024],
|
||||
solaEnabled: 0,
|
||||
|
||||
framework: Framework.PyTorch,
|
||||
f0Factor: 1.0,
|
||||
onnxExecutionProvider: OnnxExecutionProvider.CPUExecutionProvider,
|
||||
f0Detector: F0Detector.dio,
|
||||
recordIO: 0,
|
||||
|
||||
// tran: 0,
|
||||
// noiceScale: 0,
|
||||
// predictF0: 0,
|
||||
// silentThreshold: 0,
|
||||
tran: 10,
|
||||
noiceScale: 0.3,
|
||||
predictF0: 0,
|
||||
silentThreshold: 0.00001,
|
||||
extraConvertSize: 1024 * 32,
|
||||
clusterInferRatio: 0.1,
|
||||
|
||||
indexRatio: 0,
|
||||
rvcQuality: 0,
|
||||
modelSamplingRate: 48000,
|
||||
|
||||
enableEnhancer: 0,
|
||||
enhancerTune: 0,
|
||||
|
||||
inputSampleRate: 24000,
|
||||
|
||||
@ -371,6 +434,9 @@ export const DefaultServerSetting_RVC: ServerInfo = {
|
||||
rvcQuality: 0,
|
||||
modelSamplingRate: 48000,
|
||||
|
||||
enableEnhancer: 0,
|
||||
enhancerTune: 0,
|
||||
|
||||
inputSampleRate: 48000,
|
||||
|
||||
//
|
||||
@ -449,6 +515,13 @@ export const DefaultWorkletNodeSetting_so_vits_svc_40v2: WorkletNodeSetting = {
|
||||
downSamplingMode: "average"
|
||||
}
|
||||
|
||||
export const DefaultWorkletNodeSetting_DDSP_SVC: WorkletNodeSetting = {
|
||||
serverUrl: "",
|
||||
protocol: "sio",
|
||||
sendingSampleRate: 48000,
|
||||
inputChunkNum: 256,
|
||||
downSamplingMode: "average"
|
||||
}
|
||||
|
||||
export const DefaultWorkletNodeSetting_RVC: WorkletNodeSetting = {
|
||||
serverUrl: "",
|
||||
|
@ -1,5 +1,5 @@
|
||||
import { useState, useMemo, useEffect } from "react"
|
||||
import { VoiceChangerServerSetting, ServerInfo, ServerSettingKey, INDEXEDDB_KEY_SERVER, INDEXEDDB_KEY_MODEL_DATA, ClientType, DefaultServerSetting_MMVCv13, DefaultServerSetting_MMVCv15, DefaultServerSetting_so_vits_svc_40v2, DefaultServerSetting_so_vits_svc_40, DefaultServerSetting_so_vits_svc_40_c, DefaultServerSetting_RVC, OnnxExporterInfo } from "../const"
|
||||
import { VoiceChangerServerSetting, ServerInfo, ServerSettingKey, INDEXEDDB_KEY_SERVER, INDEXEDDB_KEY_MODEL_DATA, ClientType, DefaultServerSetting_MMVCv13, DefaultServerSetting_MMVCv15, DefaultServerSetting_so_vits_svc_40v2, DefaultServerSetting_so_vits_svc_40, DefaultServerSetting_so_vits_svc_40_c, DefaultServerSetting_RVC, OnnxExporterInfo, DefaultServerSetting_DDSP_SVC } from "../const"
|
||||
import { VoiceChangerClient } from "../VoiceChangerClient"
|
||||
import { useIndexedDB } from "./useIndexedDB"
|
||||
|
||||
@ -72,6 +72,8 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
|
||||
return DefaultServerSetting_so_vits_svc_40_c
|
||||
} else if (props.clientType == "so-vits-svc-40v2") {
|
||||
return DefaultServerSetting_so_vits_svc_40v2
|
||||
} else if (props.clientType == "DDSP-SVC") {
|
||||
return DefaultServerSetting_DDSP_SVC
|
||||
} else if (props.clientType == "RVC") {
|
||||
return DefaultServerSetting_RVC
|
||||
} else {
|
||||
|
@ -1,6 +1,6 @@
|
||||
import { useState, useMemo, useEffect } from "react"
|
||||
|
||||
import { ClientType, DefaultWorkletNodeSetting, DefaultWorkletNodeSetting_RVC, DefaultWorkletNodeSetting_so_vits_svc_40, DefaultWorkletNodeSetting_so_vits_svc_40v2, INDEXEDDB_KEY_WORKLETNODE, WorkletNodeSetting } from "../const"
|
||||
import { ClientType, DefaultWorkletNodeSetting, DefaultWorkletNodeSetting_DDSP_SVC, DefaultWorkletNodeSetting_RVC, DefaultWorkletNodeSetting_so_vits_svc_40, DefaultWorkletNodeSetting_so_vits_svc_40v2, INDEXEDDB_KEY_WORKLETNODE, WorkletNodeSetting } from "../const"
|
||||
import { VoiceChangerClient } from "../VoiceChangerClient"
|
||||
import { useIndexedDB } from "./useIndexedDB"
|
||||
|
||||
@ -28,6 +28,8 @@ export const useWorkletNodeSetting = (props: UseWorkletNodeSettingProps): Workle
|
||||
return DefaultWorkletNodeSetting_so_vits_svc_40
|
||||
} else if (props.clientType == "so-vits-svc-40v2") {
|
||||
return DefaultWorkletNodeSetting_so_vits_svc_40v2
|
||||
} else if (props.clientType == "DDSP-SVC") {
|
||||
return DefaultWorkletNodeSetting_DDSP_SVC
|
||||
} else if (props.clientType == "RVC") {
|
||||
return DefaultWorkletNodeSetting_RVC
|
||||
} else {
|
||||
|
@ -40,17 +40,4 @@ def setModelType(_modelType: ModelType):
|
||||
|
||||
def getFrontendPath():
|
||||
frontend_path = os.path.join(sys._MEIPASS, "dist") if hasattr(sys, "_MEIPASS") else "../client/demo/dist"
|
||||
# if modelType == "MMVCv15":
|
||||
|
||||
# elif modelType == "MMVCv13":
|
||||
# frontend_path = os.path.join(sys._MEIPASS, "dist_v13") if hasattr(sys, "_MEIPASS") else "../client/demo_v13/dist"
|
||||
# elif modelType == "so-vits-svc-40":
|
||||
# frontend_path = os.path.join(sys._MEIPASS, "dist_v13") if hasattr(sys, "_MEIPASS") else "../client/demo_v13/dist"
|
||||
# elif modelType == "so-vits-svc-40v2":
|
||||
# frontend_path = os.path.join(sys._MEIPASS, "dist_v13") if hasattr(sys, "_MEIPASS") else "../client/demo_v13/dist"
|
||||
# elif modelType == "DDSP-SVC":
|
||||
# frontend_path = os.path.join(sys._MEIPASS, "dist_v13") if hasattr(sys, "_MEIPASS") else "../client/demo_v13/dist"
|
||||
# elif modelType == "so-vits-svc-40_c":
|
||||
# frontend_path = os.path.join(sys._MEIPASS, "dist_v13") if hasattr(sys, "_MEIPASS") \
|
||||
# else "../client/demo_v13/dist"
|
||||
return frontend_path
|
||||
|
@ -34,13 +34,14 @@ class DDSP_SVCSettings():
|
||||
gpu: int = 0
|
||||
dstId: int = 0
|
||||
|
||||
f0Detector: str = "dio" # dio or harvest
|
||||
f0Detector: str = "dio" # dio or harvest # parselmouth
|
||||
tran: int = 20
|
||||
noiceScale: float = 0.3
|
||||
predictF0: int = 0 # 0:False, 1:True
|
||||
silentThreshold: float = 0.00001
|
||||
extraConvertSize: int = 1024 * 32
|
||||
clusterInferRatio: float = 0.1
|
||||
|
||||
enableEnhancer: int = 0
|
||||
enhancerTune: int = 0
|
||||
|
||||
framework: str = "PyTorch" # PyTorch or ONNX
|
||||
pyTorchModelFile: str = ""
|
||||
@ -52,7 +53,7 @@ class DDSP_SVCSettings():
|
||||
)
|
||||
|
||||
# ↓mutableな物だけ列挙
|
||||
intData = ["gpu", "dstId", "tran", "predictF0", "extraConvertSize"]
|
||||
intData = ["gpu", "dstId", "tran", "predictF0", "extraConvertSize", "enableEnhancer", "enhancerTune"]
|
||||
floatData = ["noiceScale", "silentThreshold", "clusterInferRatio"]
|
||||
strData = ["framework", "f0Detector"]
|
||||
|
||||
@ -63,23 +64,24 @@ class DDSP_SVC:
|
||||
self.net_g = None
|
||||
self.onnx_session = None
|
||||
|
||||
self.raw_path = io.BytesIO()
|
||||
self.gpu_num = torch.cuda.device_count()
|
||||
self.prevVol = 0
|
||||
self.params = params
|
||||
print("DDSP-SVC initialization:", params)
|
||||
|
||||
def loadModel(self, config: str, pyTorch_model_file: str = None, onnx_model_file: str = None, clusterTorchModel: str = None):
|
||||
|
||||
self.settings.configFile = config
|
||||
def loadModel(self, props):
|
||||
self.settings.configFile = props["files"]["configFilename"]
|
||||
self.settings.pyTorchModelFile = props["files"]["pyTorchModelFilename"]
|
||||
# model
|
||||
model, args = vo.load_model(pyTorch_model_file)
|
||||
model, args = vo.load_model(self.settings.pyTorchModelFile)
|
||||
self.model = model
|
||||
self.args = args
|
||||
self.hop_size = int(self.args.data.block_size * SAMPLING_RATE / self.args.data.sampling_rate)
|
||||
# self.sampling_rate = args.data.sampling_rate
|
||||
|
||||
# hubert
|
||||
vec_path = self.params["hubert"]
|
||||
# vec_path = self.params["hubert"]
|
||||
vec_path = "./model_DDSP-SVC/hubert-soft-0d54a1f4.pt"
|
||||
self.encoder = vo.Units_Encoder(
|
||||
args.data.encoder,
|
||||
vec_path,
|
||||
@ -134,6 +136,16 @@ class DDSP_SVC:
|
||||
setattr(self.settings, key, float(val))
|
||||
elif key in self.settings.strData:
|
||||
setattr(self.settings, key, str(val))
|
||||
if key == "f0Detector":
|
||||
print("f0Detector update", val)
|
||||
if val == "dio":
|
||||
val = "parselmouth"
|
||||
self.f0_detector = vo.F0_Extractor(
|
||||
val,
|
||||
SAMPLING_RATE,
|
||||
self.hop_size,
|
||||
float(50),
|
||||
float(1100))
|
||||
else:
|
||||
return False
|
||||
|
||||
@ -155,7 +167,7 @@ class DDSP_SVC:
|
||||
def get_processing_sampling_rate(self):
|
||||
return SAMPLING_RATE
|
||||
|
||||
def generate_input(self, newData: any, inputSize: int, crossfadeSize: int):
|
||||
def generate_input(self, newData: any, inputSize: int, crossfadeSize: int, solaSearchFrame: int = 0):
|
||||
newData = newData.astype(np.float32) / 32768.0
|
||||
|
||||
if hasattr(self, "audio_buffer"):
|
||||
@ -163,7 +175,8 @@ class DDSP_SVC:
|
||||
else:
|
||||
self.audio_buffer = newData
|
||||
|
||||
convertSize = inputSize + crossfadeSize + self.settings.extraConvertSize
|
||||
convertSize = inputSize + crossfadeSize + solaSearchFrame + self.settings.extraConvertSize
|
||||
|
||||
if convertSize % self.hop_size != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。
|
||||
convertSize = convertSize + (self.hop_size - (convertSize % self.hop_size))
|
||||
|
||||
@ -228,8 +241,6 @@ class DDSP_SVC:
|
||||
|
||||
return result
|
||||
|
||||
pass
|
||||
|
||||
def _pyTorch_inference(self, data):
|
||||
|
||||
if hasattr(self, "model") == False or self.model == None:
|
||||
@ -244,6 +255,8 @@ class DDSP_SVC:
|
||||
convertSize = data[4]
|
||||
vol = data[5]
|
||||
|
||||
print(volume.device)
|
||||
|
||||
# if vol < self.settings.silentThreshold:
|
||||
# print("threshold")
|
||||
# return np.zeros(convertSize).astype(np.int16)
|
||||
@ -253,12 +266,14 @@ class DDSP_SVC:
|
||||
seg_output, _, (s_h, s_n) = self.model(c, f0, volume, spk_id=spk_id, spk_mix_dict=None)
|
||||
seg_output *= mask
|
||||
|
||||
if self.settings.enableEnhancer:
|
||||
seg_output, output_sample_rate = self.enhancer.enhance(
|
||||
seg_output,
|
||||
self.args.data.sampling_rate,
|
||||
f0,
|
||||
self.args.data.block_size,
|
||||
adaptive_key=float(3))
|
||||
adaptive_key=float(self.settings.enhancerTune))
|
||||
|
||||
result = seg_output.squeeze().cpu().numpy() * 32768.0
|
||||
return np.array(result).astype(np.int16)
|
||||
|
||||
@ -277,12 +292,15 @@ class DDSP_SVC:
|
||||
del self.net_g
|
||||
del self.onnx_session
|
||||
|
||||
remove_path = os.path.join("DDSP-SVC")
|
||||
sys.path = [x for x in sys.path if x.endswith(remove_path) == False]
|
||||
|
||||
def cross_fade(a: np.ndarray, b: np.ndarray, idx: int):
|
||||
result = np.zeros(idx + b.shape[0])
|
||||
fade_len = a.shape[0] - idx
|
||||
np.copyto(dst=result[:idx], src=a[:idx])
|
||||
k = np.linspace(0, 1.0, num=fade_len, endpoint=True)
|
||||
result[idx: a.shape[0]] = (1 - k) * a[idx:] + k * b[: fade_len]
|
||||
np.copyto(dst=result[a.shape[0]:], src=b[fade_len:])
|
||||
return result
|
||||
for key in list(sys.modules):
|
||||
val = sys.modules.get(key)
|
||||
try:
|
||||
file_path = val.__file__
|
||||
if file_path.find("DDSP-SVC" + os.path.sep) >= 0:
|
||||
print("remove", key, file_path)
|
||||
sys.modules.pop(key)
|
||||
except Exception as e:
|
||||
pass
|
||||
|
Loading…
x
Reference in New Issue
Block a user