Fix dockerfile for pyaudio (#623)

* Readmes, deps, api workers * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix speed loss after compiling * revert log * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add dockerfile dep: gcc * Move READMES in subfolder * Fix dockerfile * Fix dockerfile * restore docker setup * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Leng Yue <lengyue@lengyue.me>
2024-10-18 14:38:18 +08:00 · 2024-10-18 14:38:18 +08:00 · 23fa4d7e38
commit 23fa4d7e38
parent 6f260179ad
4 changed files with 31 additions and 6 deletions
--- a/.github/workflows/build-docker-image.yml
+++ b/.github/workflows/build-docker-image.yml
@ -5,7 +5,7 @@ on:
    branches:
      - main
    tags:
-      - 'v*'
+      - "v*"

 jobs:
  build:
--- a/4
+++ b/4
@ -18,6 +18,10 @@ ARG DEPENDENCIES="  \
    libsox-dev \
    build-essential \
    cmake \
+    libasound-dev \
+    portaudio19-dev \
+    libportaudio2 \
+    libportaudiocpp0 \
    ffmpeg"

 RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
--- a/dockerfile.dev
+++ b/dockerfile.dev
@ -17,6 +17,10 @@ ARG TOOLS="               \
        openssh-server    \
        sudo              \
        protobuf-compiler \
+        libasound-dev     \
+        portaudio19-dev   \
+        libportaudio2     \
+        libportaudiocpp0  \
        cmake"

 RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
--- a/tools/msgpack_api.py
+++ b/tools/msgpack_api.py
@ -1,8 +1,14 @@
+import os
+from argparse import ArgumentParser
+from pathlib import Path
+
 import httpx
 import ormsgpack

 from tools.commons import ServeReferenceAudio, ServeTTSRequest

+api_key = os.environ.get("FISH_API_KEY", "YOUR_API_KEY")
+

 def audio_request():
    # priority: ref_id > references
@ -18,6 +24,8 @@ def audio_request():
        streaming=True,
    )

+    api_key = os.environ.get("FISH_API_KEY", "YOUR_API_KEY")
+
    with (
        httpx.Client() as client,
        open("hello.wav", "wb") as f,
@ -27,7 +35,7 @@ def audio_request():
            "http://127.0.0.1:8080/v1/tts",
            content=ormsgpack.packb(request, option=ormsgpack.OPT_SERIALIZE_PYDANTIC),
            headers={
-                "authorization": "Bearer YOUR_API_KEY",
+                "authorization": f"Bearer {api_key}",
                "content-type": "application/msgpack",
            },
            timeout=None,
@ -36,11 +44,11 @@ def audio_request():
                f.write(chunk)


-def asr_request():
+def asr_request(audio_path: Path):

    # Read the audio file
    with open(
-        r"D:\PythonProject\fish-speech\.cache\test_audios\prompts\2648200402409733590.wav",
+        str(audio_path),
        "rb",
    ) as audio_file:
        audio_data = audio_file.read()
@ -57,7 +65,7 @@ def asr_request():
        response = client.post(
            "https://api.fish.audio/v1/asr",
            headers={
-                "Authorization": "Bearer 8eda4aeed2bc4aec9489b3efad003799",
+                "Authorization": f"Bearer {api_key}",
                "Content-Type": "application/msgpack",
            },
            content=ormsgpack.packb(request_data),
@ -74,5 +82,14 @@ def asr_request():
        print(f"Start time: {segment['start']}, End time: {segment['end']}")


+def parse_args():
+    parser = ArgumentParser()
+    parser.add_argument("--audio_path", type=Path, default="audio/ref/trump.mp3")
+
+    return parser.parse_args()
+
+
 if __name__ == "__main__":
-    asr_request()
+    args = parse_args()
+
+    asr_request(args.audio_path)