WIP: local auido

2023-02-18 09:30:50 +09:00 · 2023-02-18 09:30:50 +09:00 · fc5c4aa2fb
commit fc5c4aa2fb
parent b97dc18654
3 changed files with 185 additions and 122 deletions
--- a/client/native/cli_client.py
+++ b/client/native/cli_client.py
@ -1,122 +0,0 @@
 import argparse
 import pyaudio
 import wave
 import struct
 import socketio
 import ssl
 from datetime import datetime
 import time
 context = ssl.SSLContext(ssl.PROTOCOL_TLSv1_2)
 context.verify_mode = ssl.CERT_NONE
 def setupArgParser():
    parser = argparse.ArgumentParser()
    parser.add_argument("-p", type=int, default=18888, help="port")
    parser.add_argument("-d", type=int, help="device index")
    parser.add_argument("-s", type=str, default="", help="sid")
    return parser
 class MockStream:
    """
    オーディオストリーミング入出力をファイル入出力にそのまま置き換えるためのモック
    """
    def __init__(self, sampling_rate):
        self.sampling_rate = sampling_rate
        self.start_count = 2
        self.end_count = 2
        self.fr = None
        self.fw = None
    def open_inputfile(self, input_filename):
        self.fr = wave.open(input_filename, 'rb')
    def open_outputfile(self, output_filename):
        self.fw = wave.open(output_filename, 'wb')
        self.fw.setnchannels(1)
        self.fw.setsampwidth(2)
        self.fw.setframerate(self.sampling_rate)
    def read(self, length, exception_on_overflow=False):
        if self.start_count > 0:
            wav = bytes(length * 2)
            self.start_count -= 1  # 最初の2回はダミーの空データ送る
        else:
            wav = self.fr.readframes(length)
        if len(wav) <= 0:  # データなくなってから最後の2回はダミーの空データを送る
            wav = bytes(length * 2)
            self.end_count -= 1
            if self.end_count < 0:
                Hyperparameters.VC_END_FLAG = True
        return wav
    def write(self, wav):
        self.fw.writeframes(wav)
    def stop_stream(self):
        pass
    def close(self):
        if self.fr != None:
            self.fr.close()
            self.fr = None
        if self.fw != None:
            self.fw.close()
            self.fw = None
 mock_stream_out = MockStream(24000)
 mock_stream_out.open_outputfile("test.wav")
 class MyCustomNamespace(socketio.ClientNamespace):  # 名前空間を設定するクラス
    def on_connect(self):
        print('[{}] connect'.format(datetime.now().strftime('%Y-%m-%d %H:%M:%S')))
    def on_disconnect(self):
        print('[{}] disconnect'.format(datetime.now().strftime('%Y-%m-%d %H:%M:%S')))
    def on_response(self, msg):
        print('[{}] response : {}'.format(datetime.now().strftime('%Y-%m-%d %H:%M:%S'), msg))
        timestamp = msg[0]
        data = msg[1]
        unpackedData = struct.unpack('<%sh' % (len(data) // struct.calcsize('<h')), data)
        mock_stream_out.write(data)
 def my_background_task(sio):  # ここにバックグランド処理のコードを書く
    while True:
        sio.emit('broadcast_message', "aaa", namespace="/test")  # ターミナルで入力された文字をサーバーに送信
        sio.sleep(1)
 if __name__ == '__main__':
    parser = setupArgParser()
    args, unknown = parser.parse_known_args()
    port = args.p
    deviceIndex = args.d
    sid = args.s
    audio = pyaudio.PyAudio()
    audio_input_stream = audio.open(
        format=pyaudio.paInt16,
        channels=1,
        rate=24000,
        frames_per_buffer=4096,
        input_device_index=args.d,
        input=True)
    sio = socketio.Client(ssl_verify=False)
    sio.register_namespace(MyCustomNamespace("/test"))
    sio.connect("https://192.168.0.3:18888")
    while True:
        in_wav = audio_input_stream.read(4096, exception_on_overflow=False)
        bin = struct.pack('<%sh' % len(in_wav), *in_wav)
        sio.emit('request_message', [1000, bin], namespace="/test")
        # sio.start_background_task(my_background_task, sio)
--- a/client/python/audio_device_list.py
+++ b/client/python/audio_device_list.py
@ -0,0 +1,32 @@
 import pyaudio
 if __name__ == '__main__':
    audio = pyaudio.PyAudio()
    audio_input_devices = []
    audio_output_devices = []
    audio_devices = {}
    host_apis = []
    for api_index in range(audio.get_host_api_count()):
        host_apis.append(audio.get_host_api_info_by_index(api_index)['name'])
    for x in range(0, audio.get_device_count()):
        device = audio.get_device_info_by_index(x)
        try:
            deviceName = device['name'].encode('shift-jis').decode('utf-8')
        except (UnicodeDecodeError, UnicodeEncodeError):
            deviceName = device['name']
        deviceIndex = device['index']
        hostAPI = host_apis[device['hostApi']]
        if device['maxInputChannels'] > 0:
            audio_input_devices.append({"kind": "audioinput", "index": deviceIndex, "name": deviceName, "hostAPI": hostAPI})
        if device['maxOutputChannels'] > 0:
            audio_output_devices.append({"kind": "audiooutput", "index": deviceIndex, "name": deviceName, "hostAPI": hostAPI})
    audio_devices["audio_input_devices"] = audio_input_devices
    audio_devices["audio_output_devices"] = audio_output_devices
    json_compatible_item_data = jsonable_encoder(audio_devices)
    print(json_compatible_item_data)
--- a/client/python/vc_client.py
+++ b/client/python/vc_client.py
@ -0,0 +1,153 @@
 import argparse
 import pyaudio
 import wave
 import struct
 import socketio
 import ssl
 from datetime import datetime
 import time
 import urllib3
 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
 import signal
 import sys
 import numpy as np
 BUFFER_SIZE = 2048
 def setupArgParser():
    parser = argparse.ArgumentParser()
    parser.add_argument("--url", type=str, default="http://localhost:18888", help="url")
    parser.add_argument("--input", type=int, required=True, help="input device index")
    parser.add_argument("--output", type=int, default=-1, help="input device index")
    parser.add_argument("--to", type=str, default="", help="sid")
    return parser
 class MockStream:
    def __init__(self, sampling_rate):
        self.sampling_rate = sampling_rate
        self.start_count = 2
        self.end_count = 2
        self.fr = None
        self.fw = None
    def open_inputfile(self, input_filename):
        self.fr = wave.open(input_filename, 'rb')
    def open_outputfile(self, output_filename):
        self.fw = wave.open(output_filename, 'wb')
        self.fw.setnchannels(1)
        self.fw.setsampwidth(2)
        self.fw.setframerate(self.sampling_rate)
    def read(self, length, exception_on_overflow=False):
        if self.start_count > 0:
            wav = bytes(length * 2)
            self.start_count -= 1
        else:
            wav = self.fr.readframes(length)
        if len(wav) <= 0:
            wav = bytes(length * 2)
            self.end_count -= 1
            if self.end_count < 0:
                Hyperparameters.VC_END_FLAG = True
        return wav
    def write(self, wav):
        self.fw.writeframes(wav)
    def stop_stream(self):
        pass
    def close(self):
        if self.fr != None:
            self.fr.close()
            self.fr = None
        if self.fw != None:
            self.fw.close()
            self.fw = None
 class MyCustomNamespace(socketio.ClientNamespace):
    def __init__(self, namespace: str, audio_output_stream, file_output_stream):
        super().__init__(namespace)
        self.audio_output_stream = audio_output_stream
        self.file_output_stream = file_output_stream
    def on_connect(self):
        print(f'connected')
    def on_disconnect(self):
        print(f'disconnected')
    def on_response(self, msg):
        timestamp = msg[0]
        responseTime = time.time() * 1000 - timestamp
        data = msg[1]
        print(f"RT:{responseTime}msec")
        unpackedData = struct.unpack('<%sh' % (len(data) // struct.calcsize('<h')), data)
        if self.file_output_stream != None:
            self.file_output_stream.write(data)
        if self.audio_output_stream != None:
            self.audio_output_stream.write(data)
 if __name__ == '__main__':
    parser = setupArgParser()
    args, unknown = parser.parse_known_args()
    url = args.url
    inputDevice = args.input
    outputDevice = args.output
    to = args.to
    audio = pyaudio.PyAudio()
    audio_input_stream = audio.open(
        format=pyaudio.paInt16,
        channels=1,
        rate=24000,
        frames_per_buffer=BUFFER_SIZE,
        input_device_index=inputDevice,
        input=True)
    if outputDevice >= 0:
        audio_output_stream = audio.open(
            format=pyaudio.paInt16,
            channels=1,
            rate=24000,
            frames_per_buffer=BUFFER_SIZE,
            input_device_index=outputDevice,
            output=True)
    else:
        audio_output_stream = None
    # mock_stream_out = MockStream(24000)
    # mock_stream_out.open_outputfile("test.wav")
    mock_stream_out = None
    # mock_stream_in = MockStream(24000)
    # mock_stream_in.open_outputfile("test_in.wav")
    my_namespace = MyCustomNamespace("/test", audio_output_stream, mock_stream_out)
    sio = socketio.Client(ssl_verify=False)
    sio.register_namespace(my_namespace)
    sio.connect(url)
    try:
        while True:
            in_wav = audio_input_stream.read(BUFFER_SIZE, exception_on_overflow=False)
            sio.emit('request_message', [time.time() * 1000, in_wav], namespace="/test")
    except KeyboardInterrupt:
        audio_input_stream.stop_stream()
        audio_input_stream.close()
        audio_output_stream.stop_stream()
        audio_output_stream.close()
        audio.terminate()
        mock_stream_out.close()