server device mode fallback

2023-05-26 16:26:17 +09:00 · 2023-05-26 16:26:17 +09:00 · 3d8dc7a9bd
commit 3d8dc7a9bd
parent 7746f3168f
10 changed files with 1793 additions and 2807 deletions
--- a/client/demo/dist/index.html
+++ b/client/demo/dist/index.html
@ -1 +1,10 @@
-<!doctype html><html style="width:100%;height:100%;overflow:hidden"><head><meta charset="utf-8"/><title>Voice Changer Client Demo</title><script defer="defer" src="index.js"></script></head><body style="width:100%;height:100%;margin:0"><div id="app" style="width:100%;height:100%"></div></body></html>
+<!DOCTYPE html>
+<html style="width: 100%; height: 100%; overflow: hidden">
+    <head>
+        <meta charset="utf-8" />
+        <title>Voice Changer Client Demo</title>
+    <script defer src="index.js"></script></head>
+    <body style="width: 100%; height: 100%; margin: 0px">
+        <div id="app" style="width: 100%; height: 100%"></div>
+    </body>
+</html>
--- a/client/demo/dist/index.js
+++ b/client/demo/dist/index.js
--- a/client/demo/dist/index.js.LICENSE.txt
+++ b/client/demo/dist/index.js.LICENSE.txt
--- a/client/demo/src/components/demo/components/202_PerformanceRow.tsx
+++ b/client/demo/src/components/demo/components/202_PerformanceRow.tsx
@ -9,14 +9,14 @@ export const PerformanceRow = (_props: PerformanceRowProps) => {
    const [showPerformanceDetail, setShowPerformanceDetail] = useState<boolean>(false)

    const performanceRow = useMemo(() => {
-        if (appState.serverSetting.serverSetting.enableServerAudio) {
-            return (
-                <div className="body-row split-3-7 left-padding-1 guided">
-                    <div className="body-item-title left-padding-1">monitor:</div>
-                    <div className="body-item-text">server device mode. refer console.</div>
-                </div>
-            )
-        }
+        // if (appState.serverSetting.serverSetting.enableServerAudio) {
+        //     return (
+        //         <div className="body-row split-3-7 left-padding-1 guided">
+        //             <div className="body-item-title left-padding-1">monitor:</div>
+        //             <div className="body-item-text">server device mode. refer console.</div>
+        //         </div>
+        //     )
+        // }
        const performanceDetailLabel = showPerformanceDetail ? "[pre, main, post] <<" : "more >>"
        const performanceData = showPerformanceDetail ? `[${appState.performance.preprocessTime}, ${appState.performance.mainprocessTime},${appState.performance.postprocessTime}]` : ""
        return (
--- a/client/demo/src/components/demo/components/207_ServerOpertationRow.tsx
+++ b/client/demo/src/components/demo/components/207_ServerOpertationRow.tsx
@ -15,7 +15,6 @@ export const ServerOpertationRow = (props: ServerOpertationRowProps) => {
    const guiState = useGuiState()

    const serverOpertationRow = useMemo(() => {
-        console.log("UPDATE OPERATION ROW:::::::::::::::::::::::")

        const onnxExportButtonAction = async () => {

--- a/client/demo/src/components/demo/components/301-k_SampleDownloadControl.tsx
+++ b/client/demo/src/components/demo/components/301-k_SampleDownloadControl.tsx
@ -18,7 +18,7 @@ export const SampleDownloadControlRow = (_props: SampleDownloadControlRowProps)
        }
        return (
            <div className="body-row split-3-3-4 left-padding-1 guided">
-                <div className="body-item-title left-padding-1 ">Advanced Configuration</div>
+                <div className="body-item-title left-padding-2 ">Advanced Configuration</div>
                <div>
                    <input type="checkbox" checked={fileUploadSetting.rvcIndexDownload} onChange={(e) => {
                        appState.serverSetting.setFileUploadSetting(slot, { ...fileUploadSetting, rvcIndexDownload: e.target.checked })
--- a/client/lib/src/VoiceChangerWorkletNode.ts
+++ b/client/lib/src/VoiceChangerWorkletNode.ts
@ -74,15 +74,32 @@ export class VoiceChangerWorkletNode extends AudioWorkletNode {
                this.listener.notifyException(VOICE_CHANGER_CLIENT_EXCEPTION.ERR_SIO_CONNECT_FAILED, `[SIO] rconnection failed ${err}`)
            })
            this.socket.on('connect', () => {
-                console.log(`[SIO] sonnect to ${this.setting.serverUrl}`)
+                console.log(`[SIO] connect to ${this.setting.serverUrl}`)
                console.log(`[SIO] ${this.socket?.id}`)
            });
+            this.socket.on('close', function (socket) {
+                console.log(`[SIO] close ${socket.id}`)
+            });
+
+
+            this.socket.on('message', (response: any[]) => {
+                console.log("message:", response)
+            });
+
            this.socket.on('response', (response: any[]) => {
-                // console.log("response:", response)
+
                const cur = Date.now()
                const responseTime = cur - response[0]
                const result = response[1] as ArrayBuffer
                const perf = response[2]
+
+                // Quick hack for server device mode
+                if (response[0] == 0) {
+                    this.listener.notifyResponseTime(Math.round(perf[0] * 1000), perf.slice(1, 4))
+                    return
+                }
+
+
                if (result.byteLength < 128 * 2) {
                    this.listener.notifyException(VOICE_CHANGER_CLIENT_EXCEPTION.ERR_SIO_INVALID_RESPONSE, `[SIO] recevied data is too short ${result.byteLength}`)
                } else {
--- a/server/sio/MMVC_Namespace.py
+++ b/server/sio/MMVC_Namespace.py
@ -4,13 +4,27 @@ import numpy as np
 import socketio
 from voice_changer.VoiceChangerManager import VoiceChangerManager

+import asyncio
+

 class MMVC_Namespace(socketio.AsyncNamespace):
    sid: int = 0

+    async def emitTo(self, data):
+        timestamp = 0
+        audio1 = np.zeros(1).astype(np.int16)
+        bin = struct.pack("<%sh" % len(audio1), *audio1)
+        perf = data
+
+        await self.emit("response", [timestamp, bin, perf], to=self.sid)
+
+    def emit_coroutine(self, data):
+        asyncio.run(self.emitTo(data))
+
    def __init__(self, namespace: str, voiceChangerManager: VoiceChangerManager):
        super().__init__(namespace)
        self.voiceChangerManager = voiceChangerManager
+        self.voiceChangerManager.voiceChanger.emitTo = self.emit_coroutine

    @classmethod
    def get_instance(cls, voiceChangerManager: VoiceChangerManager):
@ -29,6 +43,7 @@ class MMVC_Namespace(socketio.AsyncNamespace):

    async def on_request_message(self, sid, msg):
        self.sid = sid
+        await self.asynctest("on req")
        timestamp = int(msg[0])
        data = msg[1]
        if isinstance(data, str):
--- a/server/voice_changer/Local/AudioDeviceList.py
+++ b/server/voice_changer/Local/AudioDeviceList.py
@ -11,13 +11,14 @@ class ServerAudioDevice:
    hostAPI: str = ""
    maxInputChannels: int = 0
    maxOutputChannels: int = 0
+    default_samplerate: int = 0


 def list_audio_device():
    audioDeviceList = sd.query_devices()

    inputAudioDeviceList = [d for d in audioDeviceList if d["max_input_channels"] > 0]
-    outputDeviceList = [d for d in audioDeviceList if d["max_output_channels"] > 0]
+    outputAudioDeviceList = [d for d in audioDeviceList if d["max_output_channels"] > 0]
    hostapis = sd.query_hostapis()

    # print("input:", inputAudioDeviceList)
@ -34,9 +35,10 @@ def list_audio_device():
            hostAPI=hostapis[d["hostapi"]]["name"],
            maxInputChannels=d["max_input_channels"],
            maxOutputChannels=d["max_output_channels"],
+            default_samplerate=d["default_samplerate"],
        )
        serverAudioInputDevices.append(serverInputAudioDevice)
-    for d in outputDeviceList:
+    for d in outputAudioDeviceList:
        serverOutputAudioDevice: ServerAudioDevice = ServerAudioDevice(
            kind=ServerAudioDeviceTypes.audiooutput,
            index=d["index"],
@ -44,6 +46,7 @@ def list_audio_device():
            hostAPI=hostapis[d["hostapi"]]["name"],
            maxInputChannels=d["max_input_channels"],
            maxOutputChannels=d["max_output_channels"],
+            default_samplerate=d["default_samplerate"],
        )
        serverAudioOutputDevices.append(serverOutputAudioDevice)

--- a/server/voice_changer/VoiceChanger.py
+++ b/server/voice_changer/VoiceChanger.py
@ -99,6 +99,8 @@ class VoiceChanger:

    localPerformanceShowTime = 0.0

+    emitTo = None
+
    def audio_callback(
        self, indata: np.ndarray, outdata: np.ndarray, frames, times, status
    ):
@ -113,6 +115,8 @@ class VoiceChanger:
                )
            all_inference_time = t.secs
            performance = [all_inference_time] + times
+            if self.emitTo is not None:
+                self.emitTo(performance)
            self.settings.performance = [round(x * 1000) for x in performance]
        except Exception as e:
            print("[Voice Changer] ex:", e)
@ -131,6 +135,7 @@ class VoiceChanger:

        currentInputDeviceId = -1
        currentInputSampleRate = -1
+        model_sampling_rate = -1
        currentOutputDeviceId = -1
        currentInputChunkNum = -1
        while True:
@ -166,13 +171,33 @@ class VoiceChanger:
                currentInputChannelNum = serverInputAudioDevice.maxInputChannels
                currentOutputChannelNum = serverOutputAudioDevice.maxOutputChannels

-                vc.settings.serverInputAudioSampleRate = (
-                    self.voiceChanger.get_processing_sampling_rate()
-                )
-                currentInputSampleRate = vc.settings.serverInputAudioSampleRate
                currentInputChunkNum = vc.settings.serverReadChunkSize
                block_frame = currentInputChunkNum * 128

+                # sample rate precheck(alsa cannot use 40000?)
+                model_sampling_rate = self.voiceChanger.get_processing_sampling_rate()
+                try:
+                    with sd.Stream(
+                        callback=self.audio_callback,
+                        blocksize=block_frame,
+                        samplerate=model_sampling_rate,
+                        dtype="float32",
+                        channels=[currentInputChannelNum, currentOutputChannelNum],
+                    ):
+                        pass
+                    currentInputSampleRate = model_sampling_rate
+                    vc.settings.serverInputAudioSampleRate = model_sampling_rate
+                    print(f"[Voice Changer] sample rate {model_sampling_rate}")
+                except Exception as e:
+                    print(
+                        "[Voice Changer] ex: fallback to device default samplerate", e
+                    )
+                    currentInputSampleRate = serverInputAudioDevice.default_samplerate
+                    vc.settings.serverInputAudioSampleRate = (
+                        serverInputAudioDevice.default_samplerate
+                    )
+
+                # main loop
                try:
                    with sd.Stream(
                        callback=self.audio_callback,
@ -181,14 +206,13 @@ class VoiceChanger:
                        dtype="float32",
                        channels=[currentInputChannelNum, currentOutputChannelNum],
                    ):
-                        print()
                        while (
                            vc.settings.serverAudioStated == 1
                            and currentInputDeviceId == vc.settings.serverInputDeviceId
                            and currentOutputDeviceId
                            == vc.settings.serverOutputDeviceId
-                            and currentInputSampleRate
-                            == vc.settings.serverInputAudioSampleRate
+                            and model_sampling_rate
+                            == self.voiceChanger.get_processing_sampling_rate()
                            and currentInputChunkNum == vc.settings.serverReadChunkSize
                        ):
                            vc.settings.serverInputAudioSampleRate = (