From 98ad3b4ff0f0f8a5738183fbd924754c7cf937ea Mon Sep 17 00:00:00 2001 From: wataru Date: Sun, 19 Feb 2023 10:12:25 +0900 Subject: [PATCH] WIP: refactoring, passthru server config --- client/demo/src/101_server_control.tsx | 8 +- client/demo/src/102_model_setting.tsx | 14 +- client/demo/src/103_device_setting.tsx | 1 - client/demo/src/104_qulity_control.tsx | 14 +- client/demo/src/105_speaker_setting.tsx | 58 +++-- client/demo/src/106_convert_setting.tsx | 6 +- client/demo/src/107_advanced_setting.tsx | 70 +---- client/lib/src/const.ts | 213 ++++++++-------- client/lib/src/hooks/useServerSetting.ts | 310 ++++------------------- server/voice_changer/VoiceChanger.py | 51 +--- 10 files changed, 233 insertions(+), 512 deletions(-) diff --git a/client/demo/src/101_server_control.tsx b/client/demo/src/101_server_control.tsx index 305a7737..c0e4b12c 100644 --- a/client/demo/src/101_server_control.tsx +++ b/client/demo/src/101_server_control.tsx @@ -94,9 +94,9 @@ export const useServerControl = () => {
Model Info:
- {appState.serverSetting.serverInfo?.configFile || ""} - {appState.serverSetting.serverInfo?.pyTorchModelFile || ""} - {appState.serverSetting.serverInfo?.onnxModelFile || ""} + {appState.serverSetting.serverSetting.configFile || ""} + {appState.serverSetting.serverSetting.pyTorchModelFile || ""} + {appState.serverSetting.serverSetting.onnxModelFile || ""}
@@ -106,7 +106,7 @@ export const useServerControl = () => {
) - }, [appState.getInfo, appState.serverSetting.serverInfo]) + }, [appState.getInfo, appState.serverSetting.serverSetting]) const serverControl = useMemo(() => { return ( diff --git a/client/demo/src/102_model_setting.tsx b/client/demo/src/102_model_setting.tsx index 12d5d886..a9e4f87e 100644 --- a/client/demo/src/102_model_setting.tsx +++ b/client/demo/src/102_model_setting.tsx @@ -190,13 +190,13 @@ export const useModelSettingArea = (): ServerSettingState => { const frameworkRow = useMemo(() => { const onFrameworkChanged = async (val: Framework) => { - appState.serverSetting.setFramework(val) + appState.serverSetting.updateServerSettings({ ...appState.serverSetting.serverSetting, framework: val }) } return (
Framework
- { onFrameworkChanged(e.target.value as Framework) }}> @@ -209,20 +209,20 @@ export const useModelSettingArea = (): ServerSettingState => {
) - }, [appState.serverSetting.setting.framework, appState.serverSetting.setFramework]) + }, [appState.serverSetting.serverSetting.framework, appState.serverSetting.updateServerSettings]) const onnxExecutionProviderRow = useMemo(() => { - if (appState.serverSetting.setting.framework != "ONNX") { + if (appState.serverSetting.serverSetting.framework != "ONNX") { return } const onOnnxExecutionProviderChanged = async (val: OnnxExecutionProvider) => { - appState.serverSetting.setOnnxExecutionProvider(val) + appState.serverSetting.updateServerSettings({ ...appState.serverSetting.serverSetting, onnxExecutionProvider: val }) } return (
OnnxExecutionProvider
- { onOnnxExecutionProviderChanged(e.target.value as OnnxExecutionProvider) }}> @@ -235,7 +235,7 @@ export const useModelSettingArea = (): ServerSettingState => {
) - }, [appState.serverSetting.setting.framework, appState.serverSetting.setting.onnxExecutionProvider, appState.serverSetting.setOnnxExecutionProvider]) + }, [appState.serverSetting.serverSetting.framework, appState.serverSetting.serverSetting.onnxExecutionProvider, appState.serverSetting.updateServerSettings]) const modelSetting = useMemo(() => { return ( diff --git a/client/demo/src/103_device_setting.tsx b/client/demo/src/103_device_setting.tsx index f47c6c1c..9c322c95 100644 --- a/client/demo/src/103_device_setting.tsx +++ b/client/demo/src/103_device_setting.tsx @@ -157,7 +157,6 @@ export const useDeviceSetting = (): DeviceSettingState => { } else { console.log("server mic") appState.clientSetting.setAudioInput(null) - appState.serverSetting.setServerMicrophone(Number(audioInputForGUI)) } } }, [appState.audioContext, audioInputForGUI, appState.clientSetting.setAudioInput]) diff --git a/client/demo/src/104_qulity_control.tsx b/client/demo/src/104_qulity_control.tsx index 5fa899a8..70ef2358 100644 --- a/client/demo/src/104_qulity_control.tsx +++ b/client/demo/src/104_qulity_control.tsx @@ -110,8 +110,8 @@ export const useQualityControl = (): QualityControlState => {
F0 Detector
- { + appState.serverSetting.updateServerSettings({ ...appState.serverSetting.serverSetting, f0Detector: e.target.value as F0Detector }) }}> { Object.values(F0Detector).map(x => { @@ -123,17 +123,17 @@ export const useQualityControl = (): QualityControlState => {
) - }, [appState.serverSetting.setting.f0Detector, appState.serverSetting.setF0Detector]) + }, [appState.serverSetting.serverSetting.f0Detector, appState.serverSetting.updateServerSettings]) const recordIORow = useMemo(() => { const onRecordStartClicked = async () => { setRecording(true) - await appState.serverSetting.setRecordIO(1) + appState.serverSetting.updateServerSettings({ ...appState.serverSetting.serverSetting, recordIO: 1 }) } const onRecordStopClicked = async () => { setRecording(false) - await appState.serverSetting.setRecordIO(0) + appState.serverSetting.updateServerSettings({ ...appState.serverSetting.serverSetting, recordIO: 0 }) } const onRecordAnalizeClicked = async () => { if (appState.frontendManagerState.isConverting) { @@ -141,7 +141,7 @@ export const useQualityControl = (): QualityControlState => { return } appState.frontendManagerState.setIsAnalyzing(true) - await appState.serverSetting.setRecordIO(2) + appState.serverSetting.updateServerSettings({ ...appState.serverSetting.serverSetting, recordIO: 2 }) // set spectrogram (dio) const imageDio = document.getElementById("body-image-container-img-dio") as HTMLImageElement imageDio.src = "/tmp/analyze-dio.png?" + new Date().getTime() @@ -252,7 +252,7 @@ export const useQualityControl = (): QualityControlState => { ) - }, [appState.serverSetting.setting.recordIO, appState.serverSetting.setRecordIO, outputAudioDeviceInfo, audioOutputForGUI, appState.frontendManagerState.isAnalyzing, appState.frontendManagerState.isConverting]) + }, [appState.serverSetting.serverSetting.recordIO, appState.serverSetting.updateServerSettings, outputAudioDeviceInfo, audioOutputForGUI, appState.frontendManagerState.isAnalyzing, appState.frontendManagerState.isConverting]) const QualityControlContent = useMemo(() => { return ( diff --git a/client/demo/src/105_speaker_setting.tsx b/client/demo/src/105_speaker_setting.tsx index 8740107c..00412374 100644 --- a/client/demo/src/105_speaker_setting.tsx +++ b/client/demo/src/105_speaker_setting.tsx @@ -19,33 +19,45 @@ export const useSpeakerSetting = () => { const [editSpeakerTargetId, setEditSpeakerTargetId] = useState(0) const [editSpeakerTargetName, setEditSpeakerTargetName] = useState("") - useEffect(() => { + // useEffect(() => { + // const src = appState.clientSetting.setting.correspondences?.find(x => { + // return x.sid == appState.serverSetting.serverSetting.srcId + // }) + // const dst = appState.clientSetting.setting.correspondences?.find(x => { + // return x.sid == appState.serverSetting.serverSetting.dstId + // }) + // const recommendedF0Factor = dst && src ? dst.correspondence / src.correspondence : 0 + // appState.serverSetting.updateServerSettings({ ...appState.serverSetting.serverSetting, f0Factor: recommendedF0Factor }) + + // }, [appState.serverSetting.serverSetting.srcId, appState.serverSetting.serverSetting.dstId, appState.serverSetting.updateServerSettings]) + + + const calcDefaultF0Factor = (srcId: number, dstId: number) => { const src = appState.clientSetting.setting.correspondences?.find(x => { - return x.sid == appState.serverSetting.setting.srcId + return x.sid == srcId }) const dst = appState.clientSetting.setting.correspondences?.find(x => { - return x.sid == appState.serverSetting.setting.dstId + return x.sid == dstId }) const recommendedF0Factor = dst && src ? dst.correspondence / src.correspondence : 0 - appState.serverSetting.setF0Factor(recommendedF0Factor) + return recommendedF0Factor + } - }, [appState.serverSetting.setting.srcId, appState.serverSetting.setting.dstId]) + console.log() const srcIdRow = useMemo(() => { const selected = appState.clientSetting.setting.correspondences?.find(x => { - return x.sid == appState.serverSetting.setting.srcId + return x.sid == appState.serverSetting.serverSetting.srcId }) return (
Source Speaker Id
- { + const recF0 = calcDefaultF0Factor(Number(e.target.value), appState.serverSetting.serverSetting.dstId) + appState.serverSetting.updateServerSettings({ ...appState.serverSetting.serverSetting, srcId: Number(e.target.value), f0Factor: recF0 }) }}> { - // appState.clientSetting.setting.speakers.map(x => { - // return - // }) appState.clientSetting.setting.correspondences?.map(x => { return }) @@ -59,18 +71,20 @@ export const useSpeakerSetting = () => {
) - }, [appState.clientSetting.setting.speakers, appState.serverSetting.setting.srcId, appState.clientSetting.setting.correspondences, appState.serverSetting.setSrcId]) + }, [appState.clientSetting.setting.speakers, appState.serverSetting.serverSetting.srcId, appState.serverSetting.serverSetting.dstId, appState.clientSetting.setting.correspondences, appState.serverSetting.updateServerSettings]) const dstIdRow = useMemo(() => { const selected = appState.clientSetting.setting.correspondences?.find(x => { - return x.sid == appState.serverSetting.setting.dstId + return x.sid == appState.serverSetting.serverSetting.dstId }) return (
Destination Speaker Id
- { + const recF0 = calcDefaultF0Factor(appState.serverSetting.serverSetting.srcId, Number(e.target.value)) + appState.serverSetting.updateServerSettings({ ...appState.serverSetting.serverSetting, dstId: Number(e.target.value), f0Factor: recF0 }) + }}> { // appState.clientSetting.setting.speakers.map(x => { @@ -88,7 +102,7 @@ export const useSpeakerSetting = () => {
) - }, [appState.clientSetting.setting.speakers, appState.serverSetting.setting.dstId, appState.clientSetting.setting.correspondences, appState.serverSetting.setDstId]) + }, [appState.clientSetting.setting.speakers, appState.serverSetting.serverSetting.srcId, appState.serverSetting.serverSetting.dstId, appState.clientSetting.setting.correspondences, appState.serverSetting.updateServerSettings]) const editSpeakerIdMappingRow = useMemo(() => { const onSetSpeakerMappingClicked = async () => { @@ -139,10 +153,10 @@ export const useSpeakerSetting = () => { const f0FactorRow = useMemo(() => { const src = appState.clientSetting.setting.correspondences?.find(x => { - return x.sid == appState.serverSetting.setting.srcId + return x.sid == appState.serverSetting.serverSetting.srcId }) const dst = appState.clientSetting.setting.correspondences?.find(x => { - return x.sid == appState.serverSetting.setting.dstId + return x.sid == appState.serverSetting.serverSetting.dstId }) const recommendedF0Factor = dst && src ? dst.correspondence / src.correspondence : 0 @@ -151,16 +165,16 @@ export const useSpeakerSetting = () => {
F0 Factor
- { - appState.serverSetting.setF0Factor(Number(e.target.value)) + { + appState.serverSetting.updateServerSettings({ ...appState.serverSetting.serverSetting, f0Factor: Number(e.target.value) }) }}> - {appState.serverSetting.setting.f0Factor.toFixed(1)} + {appState.serverSetting.serverSetting.f0Factor.toFixed(1)}
recommend: {recommendedF0Factor.toFixed(1)}
) - }, [appState.serverSetting.setting.f0Factor, appState.serverSetting.setting.srcId, appState.serverSetting.setting.dstId, appState.clientSetting.setting.correspondences, appState.serverSetting.setF0Factor]) + }, [appState.serverSetting.serverSetting.f0Factor, appState.serverSetting.serverSetting.srcId, appState.serverSetting.serverSetting.dstId, appState.clientSetting.setting.correspondences, appState.serverSetting.updateServerSettings]) const speakerSetting = useMemo(() => { return ( diff --git a/client/demo/src/106_convert_setting.tsx b/client/demo/src/106_convert_setting.tsx index e3bc1878..b7ca1aff 100644 --- a/client/demo/src/106_convert_setting.tsx +++ b/client/demo/src/106_convert_setting.tsx @@ -43,13 +43,13 @@ export const useConvertSetting = (): ConvertSettingState => {
GPU
- { - appState.serverSetting.setGpu(Number(e.target.value)) + { + appState.serverSetting.updateServerSettings({ ...appState.serverSetting.serverSetting, gpu: Number(e.target.value) }) }} />
) - }, [appState.serverSetting.setting.gpu, appState.serverSetting.setGpu]) + }, [appState.serverSetting.serverSetting.gpu, appState.serverSetting.updateServerSettings]) const convertSetting = useMemo(() => { diff --git a/client/demo/src/107_advanced_setting.tsx b/client/demo/src/107_advanced_setting.tsx index e42b3f2a..bd1a93fd 100644 --- a/client/demo/src/107_advanced_setting.tsx +++ b/client/demo/src/107_advanced_setting.tsx @@ -89,8 +89,7 @@ export const useAdvancedSetting = (): AdvancedSettingState => {
{ - appState.serverSetting.setConvertChunkNum(Number(e.target.value)) - }} /> -
-
- ) - }, [appState.serverSetting.setting.convertChunkNum, appState.serverSetting.setConvertChunkNum]) - - const minConvertSizeRow = useMemo(() => { - return ( - -
-
Min Convert Size(byte)
-
- { - appState.serverSetting.setMinConvertSize(Number(e.target.value)) - }} /> -
-
- ) - }, [appState.serverSetting.setting.minConvertSize, appState.serverSetting.setMinConvertSize]) - - const crossFadeOverlapRateRow = useMemo(() => { - return ( -
-
Cross Fade Overlap Rate
-
- { - appState.serverSetting.setCrossFadeOverlapRate(Number(e.target.value)) - }} /> -
-
- ) - }, [appState.serverSetting.setting.crossFadeOverlapRate, appState.serverSetting.setCrossFadeOverlapRate]) - const crossFadeOverlapSizeRow = useMemo(() => { return (
Cross Fade Overlap Size
- { + appState.serverSetting.updateServerSettings({ ...appState.serverSetting.serverSetting, crossFadeOverlapSize: Number(e.target.value) as CrossFadeOverlapSize }) }}> { Object.values(CrossFadeOverlapSize).map(x => { @@ -182,33 +140,33 @@ export const useAdvancedSetting = (): AdvancedSettingState => {
) - }, [appState.serverSetting.setting.crossFadeOverlapSize, appState.serverSetting.setCrossFadeOverlapSize]) + }, [appState.serverSetting.serverSetting.crossFadeOverlapSize, appState.serverSetting.updateServerSettings]) const crossFadeOffsetRateRow = useMemo(() => { return (
Cross Fade Offset Rate
- { - appState.serverSetting.setCrossFadeOffsetRate(Number(e.target.value)) + { + appState.serverSetting.updateServerSettings({ ...appState.serverSetting.serverSetting, crossFadeOffsetRate: Number(e.target.value) }) }} />
) - }, [appState.serverSetting.setting.crossFadeOffsetRate, appState.serverSetting.setCrossFadeOffsetRate]) + }, [appState.serverSetting.serverSetting.crossFadeOffsetRate, appState.serverSetting.updateServerSettings]) const crossFadeEndRateRow = useMemo(() => { return (
Cross Fade End Rate
- { - appState.serverSetting.setCrossFadeEndRate(Number(e.target.value)) + { + appState.serverSetting.updateServerSettings({ ...appState.serverSetting.serverSetting, crossFadeEndRate: Number(e.target.value) }) }} />
) - }, [appState.serverSetting.setting.crossFadeEndRate, appState.serverSetting.setCrossFadeEndRate]) + }, [appState.serverSetting.serverSetting.crossFadeEndRate, appState.serverSetting.updateServerSettings]) const voiceChangeModeRow = useMemo(() => { @@ -306,10 +264,6 @@ export const useAdvancedSetting = (): AdvancedSettingState => { {sendingSampleRateRow} {bufferSizeRow}
- - {convertChunkNumRow} - {minConvertSizeRow} - {crossFadeOverlapRateRow} {crossFadeOverlapSizeRow} {crossFadeOffsetRateRow} {crossFadeEndRateRow} @@ -322,7 +276,7 @@ export const useAdvancedSetting = (): AdvancedSettingState => { ) - }, [mmvcServerUrlRow, protocolRow, sampleRateRow, sendingSampleRateRow, bufferSizeRow, convertChunkNumRow, minConvertSizeRow, crossFadeOverlapRateRow, crossFadeOverlapSizeRow, crossFadeOffsetRateRow, crossFadeEndRateRow, voiceChangeModeRow, workletSettingRow, downSamplingModeRow]) + }, [mmvcServerUrlRow, protocolRow, sampleRateRow, sendingSampleRateRow, bufferSizeRow, crossFadeOverlapSizeRow, crossFadeOffsetRateRow, crossFadeEndRateRow, voiceChangeModeRow, workletSettingRow, downSamplingModeRow]) const advancedSetting = useMemo(() => { diff --git a/client/lib/src/const.ts b/client/lib/src/const.ts index ab2069ca..92cb1da2 100644 --- a/client/lib/src/const.ts +++ b/client/lib/src/const.ts @@ -4,35 +4,128 @@ // 24000sample -> 1sec, 128sample(1chunk) -> 5.333msec // 187.5chunk -> 1sec +/////////////////////// +// サーバセッティング +/////////////////////// +export const InputSampleRate = { + "48000": 48000, + "24000": 24000 +} as const +export type InputSampleRate = typeof InputSampleRate[keyof typeof InputSampleRate] + +export const CrossFadeOverlapSize = { + "1024": 1024, + "2048": 2048, + "4096": 4096, +} as const +export type CrossFadeOverlapSize = typeof CrossFadeOverlapSize[keyof typeof CrossFadeOverlapSize] + + +export const OnnxExecutionProvider = { + "CPUExecutionProvider": "CPUExecutionProvider", + "CUDAExecutionProvider": "CUDAExecutionProvider", + "DmlExecutionProvider": "DmlExecutionProvider", + "OpenVINOExecutionProvider": "OpenVINOExecutionProvider", +} as const +export type OnnxExecutionProvider = typeof OnnxExecutionProvider[keyof typeof OnnxExecutionProvider] + +export const Framework = { + "PyTorch": "PyTorch", + "ONNX": "ONNX", +} as const +export type Framework = typeof Framework[keyof typeof Framework] + +export const F0Detector = { + "dio": "dio", + "harvest": "harvest", +} as const +export type F0Detector = typeof F0Detector[keyof typeof F0Detector] + + + +export const ServerSettingKey = { + "srcId": "srcId", + "dstId": "dstId", + "gpu": "gpu", + + "crossFadeOffsetRate": "crossFadeOffsetRate", + "crossFadeEndRate": "crossFadeEndRate", + "crossFadeOverlapSize": "crossFadeOverlapSize", + + "framework": "framework", + "onnxExecutionProvider": "onnxExecutionProvider", + + "f0Factor": "f0Factor", + "f0Detector": "f0Detector", + "recordIO": "recordIO", + + "inputSampleRate": "inputSampleRate", +} as const +export type ServerSettingKey = typeof ServerSettingKey[keyof typeof ServerSettingKey] + -// types export type VoiceChangerServerSetting = { - - // VITSに入力する変換サイズ。(入力データの2倍以上の大きさで指定。それより小さいものが指定された場合は、 - // サーバ側で自動的に入力の2倍のサイズが設定される。) - convertChunkNum: number, - minConvertSize: number, // この値より小さい場合にこの値に揃える。 - srcId: number, dstId: number, gpu: number, - crossFadeLowerValue: number, crossFadeOffsetRate: number, crossFadeEndRate: number, - crossFadeOverlapRate: number, - crossFadeOverlapSize: number, + crossFadeOverlapSize: CrossFadeOverlapSize, framework: Framework onnxExecutionProvider: OnnxExecutionProvider, f0Factor: number - f0Detector: string // dio or harvest + f0Detector: F0Detector // dio or harvest recordIO: number // 0:off, 1:on - serverMicProps: string + inputSampleRate: InputSampleRate } +export type ServerInfo = VoiceChangerServerSetting & { + status: string + configFile: string, + pyTorchModelFile: string, + onnxModelFile: string, + onnxExecutionProviders: OnnxExecutionProvider[] +} + +export const DefaultServerSetting: ServerInfo = { + srcId: 0, + dstId: 101, + gpu: 0, + + crossFadeOffsetRate: 0.1, + crossFadeEndRate: 0.9, + crossFadeOverlapSize: CrossFadeOverlapSize[4096], + + framework: Framework.PyTorch, + f0Factor: 1.0, + onnxExecutionProvider: OnnxExecutionProvider.CPUExecutionProvider, + f0Detector: F0Detector.dio, + recordIO: 0, + + inputSampleRate: 48000, + + // + status: "ok", + configFile: "", + pyTorchModelFile: "", + onnxModelFile: "", + onnxExecutionProviders: [] +} + + +/////////////////////// +// Workletセッティング +/////////////////////// + +/////////////////////// +// Clientセッティング +/////////////////////// + + export type VoiceChangerClientSetting = { audioInput: string | MediaStream | null, mmvcServerUrl: string, @@ -70,29 +163,6 @@ export type Correspondence = { } -export type ServerInfo = { - status: string - configFile: string, - pyTorchModelFile: string, - onnxModelFile: string, - convertChunkNum: number, - minConvertSize: number, - crossFadeOffsetRate: number, - crossFadeEndRate: number, - crossFadeOverlapRate: number, - crossFadeOverlapSize: number, - gpu: number, - srcId: number, - dstId: number, - framework: Framework, - onnxExecutionProvider: string[] - f0Factor: number - f0Detector: string - recordIO: number - serverMicProps: string - inputSampleRate: InputSampleRate -} - export type ServerAudioDevice = { kind: string, index: number, @@ -137,12 +207,6 @@ export const SendingSampleRate = { } as const export type SendingSampleRate = typeof SendingSampleRate[keyof typeof SendingSampleRate] -export const InputSampleRate = { - "48000": 48000, - "24000": 24000 -} as const -export type InputSampleRate = typeof InputSampleRate[keyof typeof InputSampleRate] - export const BufferSize = { "256": 256, "512": 512, @@ -154,73 +218,8 @@ export const BufferSize = { } as const export type BufferSize = typeof BufferSize[keyof typeof BufferSize] -export const OnnxExecutionProvider = { - "CPUExecutionProvider": "CPUExecutionProvider", - "CUDAExecutionProvider": "CUDAExecutionProvider", - "DmlExecutionProvider": "DmlExecutionProvider", - "OpenVINOExecutionProvider": "OpenVINOExecutionProvider", -} as const -export type OnnxExecutionProvider = typeof OnnxExecutionProvider[keyof typeof OnnxExecutionProvider] - -export const Framework = { - "PyTorch": "PyTorch", - "ONNX": "ONNX", -} as const -export type Framework = typeof Framework[keyof typeof Framework] - -export const F0Detector = { - "dio": "dio", - "harvest": "harvest", -} as const -export type F0Detector = typeof F0Detector[keyof typeof F0Detector] - -export const CrossFadeOverlapSize = { - "1024": 1024, - "2048": 2048, - "4096": 4096, -} as const -export type CrossFadeOverlapSize = typeof CrossFadeOverlapSize[keyof typeof CrossFadeOverlapSize] - -export const ServerSettingKey = { - "srcId": "srcId", - "dstId": "dstId", - "convertChunkNum": "convertChunkNum", - "minConvertSize": "minConvertSize", - "gpu": "gpu", - "crossFadeOffsetRate": "crossFadeOffsetRate", - "crossFadeEndRate": "crossFadeEndRate", - "crossFadeOverlapRate": "crossFadeOverlapRate", - "crossFadeOverlapSize": "crossFadeOverlapSize", - "framework": "framework", - "onnxExecutionProvider": "onnxExecutionProvider", - "f0Factor": "f0Factor", - "f0Detector": "f0Detector", - "recordIO": "recordIO", - "serverMicProps": "serverMicProps", - "inputSampleRate": "inputSampleRate", -} as const -export type ServerSettingKey = typeof ServerSettingKey[keyof typeof ServerSettingKey] - // Defaults -export const DefaultVoiceChangerServerSetting: VoiceChangerServerSetting = { - convertChunkNum: 32, //(★1) - minConvertSize: 0, - srcId: 0, - dstId: 101, - gpu: 0, - crossFadeLowerValue: 0.1, - crossFadeOffsetRate: 0.1, - crossFadeEndRate: 0.9, - crossFadeOverlapRate: 0.5, - crossFadeOverlapSize: CrossFadeOverlapSize[4096], - framework: "PyTorch", - f0Factor: 1.0, - onnxExecutionProvider: "CPUExecutionProvider", - f0Detector: "dio", - recordIO: 0, - serverMicProps: "", - inputSampleRate: 48000 -} + export const DefaultVoiceChangerClientSetting: VoiceChangerClientSetting = { audioInput: null, diff --git a/client/lib/src/hooks/useServerSetting.ts b/client/lib/src/hooks/useServerSetting.ts index d68b5753..57fa963b 100644 --- a/client/lib/src/hooks/useServerSetting.ts +++ b/client/lib/src/hooks/useServerSetting.ts @@ -1,15 +1,9 @@ -import { useState, useMemo, useRef, useEffect } from "react" -import { VoiceChangerServerSetting, ServerInfo, Framework, OnnxExecutionProvider, DefaultVoiceChangerServerSetting, ServerSettingKey, INDEXEDDB_KEY_SERVER, INDEXEDDB_KEY_MODEL_DATA, ServerAudioDevices, InputSampleRate } from "../const" +import { useState, useMemo, useEffect } from "react" +import { VoiceChangerServerSetting, ServerInfo, ServerSettingKey, INDEXEDDB_KEY_SERVER, INDEXEDDB_KEY_MODEL_DATA, ServerAudioDevices, DefaultServerSetting } from "../const" import { VoiceChangerClient } from "../VoiceChangerClient" import { useIndexedDB } from "./useIndexedDB" -// export type FileUploadSetting = { -// pyTorchModel: File | null -// configFile: File | null -// onnxModel: File | null -// } - type ModelData = { file?: File data?: ArrayBuffer @@ -22,7 +16,6 @@ export type FileUploadSetting = { configFile: ModelData | null } - const InitialFileUploadSetting: FileUploadSetting = { pyTorchModel: null, configFile: null, @@ -34,38 +27,24 @@ export type UseServerSettingProps = { } export type ServerSettingState = { - setting: VoiceChangerServerSetting; + serverSetting: ServerInfo + updateServerSettings: (setting: ServerInfo) => Promise clearSetting: () => Promise - serverInfo: ServerInfo | undefined; - fileUploadSetting: FileUploadSetting - setFramework: (framework: Framework) => Promise; - setOnnxExecutionProvider: (provider: OnnxExecutionProvider) => Promise; - setSrcId: (num: number) => Promise; - setDstId: (num: number) => Promise; - setConvertChunkNum: (num: number) => Promise; - setMinConvertSize: (num: number) => Promise - setGpu: (num: number) => Promise; - setCrossFadeOffsetRate: (num: number) => Promise; - setCrossFadeEndRate: (num: number) => Promise; - setCrossFadeOverlapRate: (num: number) => Promise; - setCrossFadeOverlapSize: (num: number) => Promise; - setF0Factor: (num: number) => Promise; - setF0Detector: (val: string) => Promise; - setRecordIO: (num: number) => Promise; - setServerMicrophone: (index: number) => Promise - setInputSampleRate: (num: InputSampleRate) => Promise reloadServerInfo: () => Promise; + + fileUploadSetting: FileUploadSetting setFileUploadSetting: (val: FileUploadSetting) => void loadModel: () => Promise - getServerDevices: () => Promise uploadProgress: number isUploading: boolean + + getServerDevices: () => Promise + } export const useServerSetting = (props: UseServerSettingProps): ServerSettingState => { - const settingRef = useRef(DefaultVoiceChangerServerSetting) - const [setting, _setSetting] = useState(settingRef.current) - const [serverInfo, _setServerInfo] = useState() + // const settingRef = useRef(DefaultVoiceChangerServerSetting) + const [serverSetting, setServerSetting] = useState(DefaultServerSetting) const [fileUploadSetting, setFileUploadSetting] = useState(InitialFileUploadSetting) const { setItem, getItem, removeItem } = useIndexedDB() @@ -76,9 +55,8 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta const setting = await getItem(INDEXEDDB_KEY_SERVER) if (!setting) { } else { - settingRef.current = setting as VoiceChangerServerSetting + setServerSetting(setting as ServerInfo) } - _setSetting({ ...settingRef.current }) const fileuploadSetting = await getItem(INDEXEDDB_KEY_MODEL_DATA) if (!fileuploadSetting) { @@ -90,210 +68,42 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta loadCache() }, []) - // クライアントへ設定反映 初期化, 設定変更 + // クライアントへ設定反映 (キャッシュ反映) useEffect(() => { if (!props.voiceChangerClient) return - props.voiceChangerClient.updateServerSettings(ServerSettingKey.framework, setting.framework) - props.voiceChangerClient.updateServerSettings(ServerSettingKey.onnxExecutionProvider, setting.onnxExecutionProvider) - props.voiceChangerClient.updateServerSettings(ServerSettingKey.srcId, "" + setting.srcId) - props.voiceChangerClient.updateServerSettings(ServerSettingKey.dstId, "" + setting.dstId) - props.voiceChangerClient.updateServerSettings(ServerSettingKey.convertChunkNum, "" + setting.convertChunkNum) - props.voiceChangerClient.updateServerSettings(ServerSettingKey.minConvertSize, "" + setting.minConvertSize) - props.voiceChangerClient.updateServerSettings(ServerSettingKey.gpu, "" + setting.gpu) - props.voiceChangerClient.updateServerSettings(ServerSettingKey.crossFadeOffsetRate, "" + setting.crossFadeOffsetRate) - props.voiceChangerClient.updateServerSettings(ServerSettingKey.crossFadeEndRate, "" + setting.crossFadeEndRate) - props.voiceChangerClient.updateServerSettings(ServerSettingKey.crossFadeOverlapRate, "" + setting.crossFadeOverlapRate) - props.voiceChangerClient.updateServerSettings(ServerSettingKey.f0Factor, "" + setting.f0Factor) - props.voiceChangerClient.updateServerSettings(ServerSettingKey.f0Detector, "" + setting.f0Detector) - props.voiceChangerClient.updateServerSettings(ServerSettingKey.recordIO, "" + setting.recordIO) - - - // setting["convertChunkNum"] = 1 - // const a = "convertChunkNum" - // setting[a] = "" - - }, [props.voiceChangerClient, setting]) + for (let i = 0; i < Object.values(ServerSettingKey).length; i++) { + const k = Object.values(ServerSettingKey)[i] as keyof VoiceChangerServerSetting + const v = serverSetting[k] + if (v) { + props.voiceChangerClient.updateServerSettings(k, "" + v) + } + } + reloadServerInfo() + }, [props.voiceChangerClient]) ////////////// // 設定 ///////////// - //// サーバに設定後、反映された情報と照合して値が一致していることを確認。一致していない場合はalert - const _set_and_store = async (key: ServerSettingKey, newVal: string) => { - if (!props.voiceChangerClient) return false - - const res = await props.voiceChangerClient.updateServerSettings(key, "" + newVal) - - _setServerInfo(res) - if (newVal == res[key]) { - const newSetting: VoiceChangerServerSetting = { - ...settingRef.current, - convertChunkNum: res.convertChunkNum, - minConvertSize: res.minConvertSize, - srcId: res.srcId, - dstId: res.dstId, - gpu: res.gpu, - crossFadeOffsetRate: res.crossFadeOffsetRate, - crossFadeEndRate: res.crossFadeEndRate, - crossFadeOverlapRate: res.crossFadeOverlapRate, - crossFadeOverlapSize: res.crossFadeOverlapSize, - framework: res.framework, - onnxExecutionProvider: (!!res.onnxExecutionProvider && res.onnxExecutionProvider.length > 0) ? res.onnxExecutionProvider[0] as OnnxExecutionProvider : DefaultVoiceChangerServerSetting.onnxExecutionProvider, - f0Factor: res.f0Factor, - f0Detector: res.f0Detector, - recordIO: res.recordIO + const updateServerSettings = useMemo(() => { + return async (setting: ServerInfo) => { + if (!props.voiceChangerClient) return + for (let i = 0; i < Object.values(ServerSettingKey).length; i++) { + const k = Object.values(ServerSettingKey)[i] as keyof VoiceChangerServerSetting + const cur_v = serverSetting[k] + const new_v = setting[k] + if (cur_v != new_v) { + // console.log("update server setting!!!4", k, cur_v, new_v) + const res = await props.voiceChangerClient.updateServerSettings(k, "" + new_v) + // console.log("update server setting!!!5", res) + setServerSetting(res) + setItem(INDEXEDDB_KEY_SERVER, res) + } } - _setSetting(newSetting) - setItem(INDEXEDDB_KEY_SERVER, newSetting) - return true - } else { - alert(`[ServerSetting] 設定が反映されていません([key:${key}, new:${newVal}, res:${res[key]}])。モデルの切り替えの場合、処理が非同期で行われるため反映されていないように見える場合があります。サーバコントロールのリロードボタンを押すとGUIに反映されるます。`) - return false } - - } - - // // New Trial - // // 設定 _setSettingがトリガでuseEffectが呼ばれて、workletに設定が飛ぶ - // const setSetting = useMemo(() => { - // return (setting: ) => { + }, [props.voiceChangerClient, serverSetting]) - // if (!props.voiceChangerClient) return false - - // const res = await props.voiceChangerClient.updateServerSettings(key, "" + newVal) - - // _setServerInfo(res) - // if (newVal == res[key]) { - // const newSetting: VoiceChangerServerSetting = { - // ...settingRef.current, - // convertChunkNum: res.convertChunkNum, - // minConvertSize: res.minConvertSize, - // srcId: res.srcId, - // dstId: res.dstId, - // gpu: res.gpu, - // crossFadeOffsetRate: res.crossFadeOffsetRate, - // crossFadeEndRate: res.crossFadeEndRate, - // crossFadeOverlapRate: res.crossFadeOverlapRate, - // crossFadeOverlapSize: res.crossFadeOverlapSize, - // framework: res.framework, - // onnxExecutionProvider: (!!res.onnxExecutionProvider && res.onnxExecutionProvider.length > 0) ? res.onnxExecutionProvider[0] as OnnxExecutionProvider : DefaultVoiceChangerServerSetting.onnxExecutionProvider, - // f0Factor: res.f0Factor, - // f0Detector: res.f0Detector, - // recordIO: res.recordIO - - // } - // _setSetting(newSetting) - // setItem(INDEXEDDB_KEY_SERVER, newSetting) - // return true - // } else { - // alert(`[ServerSetting] 設定が反映されていません([key:${key}, new:${newVal}, res:${res[key]}])。モデルの切り替えの場合、処理が非同期で行われるため反映されていないように見える場合があります。サーバコントロールのリロードボタンを押すとGUIに反映されるます。`) - // return false - // } - - // } - // }, [props.voiceChangerClient]) - - const setFramework = useMemo(() => { - return async (framework: Framework) => { - return await _set_and_store(ServerSettingKey.framework, "" + framework) - } - }, [props.voiceChangerClient]) - - const setOnnxExecutionProvider = useMemo(() => { - return async (provider: OnnxExecutionProvider) => { - return await _set_and_store(ServerSettingKey.onnxExecutionProvider, "" + provider) - } - }, [props.voiceChangerClient]) - - const setSrcId = useMemo(() => { - return async (num: number) => { - return await _set_and_store(ServerSettingKey.srcId, "" + num) - } - }, [props.voiceChangerClient]) - - const setDstId = useMemo(() => { - return async (num: number) => { - return await _set_and_store(ServerSettingKey.dstId, "" + num) - } - }, [props.voiceChangerClient]) - - const setConvertChunkNum = useMemo(() => { - return async (num: number) => { - return await _set_and_store(ServerSettingKey.convertChunkNum, "" + num) - } - }, [props.voiceChangerClient]) - - const setMinConvertSize = useMemo(() => { - return async (num: number) => { - return await _set_and_store(ServerSettingKey.minConvertSize, "" + num) - } - }, [props.voiceChangerClient]) - - - const setGpu = useMemo(() => { - return async (num: number) => { - return await _set_and_store(ServerSettingKey.gpu, "" + num) - } - }, [props.voiceChangerClient]) - - const setCrossFadeOffsetRate = useMemo(() => { - return async (num: number) => { - return await _set_and_store(ServerSettingKey.crossFadeOffsetRate, "" + num) - } - }, [props.voiceChangerClient]) - const setCrossFadeEndRate = useMemo(() => { - return async (num: number) => { - return await _set_and_store(ServerSettingKey.crossFadeEndRate, "" + num) - } - }, [props.voiceChangerClient]) - const setCrossFadeOverlapRate = useMemo(() => { - return async (num: number) => { - return await _set_and_store(ServerSettingKey.crossFadeOverlapRate, "" + num) - } - }, [props.voiceChangerClient]) - const setCrossFadeOverlapSize = useMemo(() => { - return async (num: number) => { - return await _set_and_store(ServerSettingKey.crossFadeOverlapSize, "" + num) - } - }, [props.voiceChangerClient]) - - - const setF0Factor = useMemo(() => { - return async (num: number) => { - return await _set_and_store(ServerSettingKey.f0Factor, "" + num) - } - }, [props.voiceChangerClient]) - - const setF0Detector = useMemo(() => { - return async (val: string) => { - return await _set_and_store(ServerSettingKey.f0Detector, "" + val) - } - }, [props.voiceChangerClient]) - const setRecordIO = useMemo(() => { - return async (num: number) => { - return await _set_and_store(ServerSettingKey.recordIO, "" + num) - } - }, [props.voiceChangerClient]) - const setServerMicrophone = useMemo(() => { - return async (index: number) => { - if (!props.voiceChangerClient) { - return - } - const sid = props.voiceChangerClient.getSocketId() - const serverMicProps = { - sid: sid, - deviceIndex: index - } - return await _set_and_store(ServerSettingKey.serverMicProps, JSON.stringify(serverMicProps)) - } - }, [props.voiceChangerClient]) - - const setInputSampleRate = useMemo(() => { - return async (num: number) => { - return await _set_and_store(ServerSettingKey.inputSampleRate, "" + num) - } - }, [props.voiceChangerClient]) ////////////// // 操作 ///////////// @@ -372,24 +182,12 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta const reloadServerInfo = useMemo(() => { return async () => { + console.log("reload server info") + if (!props.voiceChangerClient) return const res = await props.voiceChangerClient.getServerSettings() - _setServerInfo(res) - _setSetting({ - ...settingRef.current, - convertChunkNum: res.convertChunkNum, - srcId: res.srcId, - dstId: res.dstId, - gpu: res.gpu, - crossFadeOffsetRate: res.crossFadeOffsetRate, - crossFadeEndRate: res.crossFadeEndRate, - crossFadeOverlapRate: res.crossFadeOverlapRate, - framework: res.framework, - onnxExecutionProvider: (!!res.onnxExecutionProvider && res.onnxExecutionProvider.length > 0) ? res.onnxExecutionProvider[0] as OnnxExecutionProvider : DefaultVoiceChangerServerSetting.onnxExecutionProvider, - f0Factor: res.f0Factor, - f0Detector: res.f0Detector, - recordIO: res.recordIO - }) + setServerSetting(res) + setItem(INDEXEDDB_KEY_SERVER, res) } }, [props.voiceChangerClient]) @@ -410,31 +208,17 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta } return { - setting, + serverSetting, + updateServerSettings, clearSetting, - serverInfo, - fileUploadSetting, - setFramework, - setOnnxExecutionProvider, - setSrcId, - setDstId, - setConvertChunkNum, - setMinConvertSize, - setGpu, - setCrossFadeOffsetRate, - setCrossFadeEndRate, - setCrossFadeOverlapRate, - setCrossFadeOverlapSize, - setF0Factor, - setF0Detector, - setRecordIO, - setServerMicrophone, - setInputSampleRate, reloadServerInfo, + + fileUploadSetting, setFileUploadSetting, loadModel, - getServerDevices, uploadProgress, isUploading, + + getServerDevices, } } \ No newline at end of file diff --git a/server/voice_changer/VoiceChanger.py b/server/voice_changer/VoiceChanger.py index 36bef690..2ef093f7 100755 --- a/server/voice_changer/VoiceChanger.py +++ b/server/voice_changer/VoiceChanger.py @@ -89,26 +89,26 @@ class VocieChangerSettings(): gpu: int = 0 srcId: int = 107 dstId: int = 100 + + inputSampleRate: int = 48000 # 48000 or 24000 + crossFadeOffsetRate: float = 0.1 crossFadeEndRate: float = 0.9 crossFadeOverlapSize: int = 4096 - convertChunkNum: int = 32 - minConvertSize: int = 0 - framework: str = "PyTorch" # PyTorch or ONNX + f0Factor: float = 1.0 f0Detector: str = "dio" # dio or harvest recordIO: int = 1 # 0:off, 1:on - serverMicProps: str = "" - inputSampleRate: int = 48000 # 48000 or 24000 + framework: str = "PyTorch" # PyTorch or ONNX pyTorchModelFile: str = "" onnxModelFile: str = "" configFile: str = "" # ↓mutableな物だけ列挙 - intData = ["gpu", "srcId", "dstId", "convertChunkNum", "minConvertSize", "recordIO", "inputSampleRate", "crossFadeOverlapSize"] + intData = ["gpu", "srcId", "dstId", "inputSampleRate", "crossFadeOverlapSize", "recordIO"] floatData = ["crossFadeOffsetRate", "crossFadeEndRate", "f0Factor"] - strData = ["framework", "f0Detector", "serverMicProps"] + strData = ["framework", "f0Detector"] def readMicrophone(queue, sid, deviceIndex): @@ -288,35 +288,6 @@ class VoiceChanger(): setattr(self.settings, key, float(val)) elif key in self.settings.strData: setattr(self.settings, key, str(val)) - if key == "serverMicProps": - if hasattr(self, "serverMicrophoneReaderProcess"): - self.serverMicrophoneReaderProcess.terminate() - - if len(val) == 0: - print("server mic close") - - pass - else: - props = json.loads(val) - print(props) - sid = props["sid"] - deviceIndex = props["deviceIndex"] - self.serverMicrophoneReaderProcessQueue = Queue() - self.serverMicrophoneReaderProcess = Process(target=readMicrophone, args=( - self.serverMicrophoneReaderProcessQueue, sid, deviceIndex,)) - self.serverMicrophoneReaderProcess.start() - - try: - print(sid, deviceIndex) - except Exception as e: - print(e) - # audio = pyaudio.PyAudio() - # audio_input_stream = audio.open(format=pyaudio.paInt16, - # channels=1, - # rate=SAMPLING_RATE, - # frames_per_buffer=4096, - # input_device_index=val, - # input=True) else: print(f"{key} is not mutalbe variable!") @@ -505,8 +476,8 @@ class VoiceChanger(): powered_cur = cur_overlap * self.cur_strength powered_result = powered_prev + powered_cur - print(overlapSize, prev_overlap.shape, cur_overlap.shape, self.prev_strength.shape, self.cur_strength.shape) - print(self.prev_audio1.shape, audio1.shape, inputSize, overlapSize) + # print(overlapSize, prev_overlap.shape, cur_overlap.shape, self.prev_strength.shape, self.cur_strength.shape) + # print(self.prev_audio1.shape, audio1.shape, inputSize, overlapSize) cur = audio1[-1 * inputSize:-1 * overlapSize] # 今回のインプットの生部分。(インプット - 次回のCrossfade部分)。 result = torch.cat([powered_result, cur], axis=0) # Crossfadeと今回のインプットの生部分を結合 @@ -521,11 +492,11 @@ class VoiceChanger(): def on_request(self, unpackedData: any): if self.settings.inputSampleRate != 24000: - print("convert sampling rate!", self.settings.inputSampleRate) + # print("convert sampling rate!", self.settings.inputSampleRate) unpackedData = resampy.resample(unpackedData, 48000, 24000) convertSize = unpackedData.shape[0] + min(self.settings.crossFadeOverlapSize, unpackedData.shape[0]) - print(convertSize, unpackedData.shape[0]) + # print(convertSize, unpackedData.shape[0]) if convertSize < 8192: convertSize = 8192