Fix dockerfile for pyaudio (#623)

* Readmes, deps, api workers

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Fix

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Fix speed loss after compiling

* revert log

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* add dockerfile dep: gcc

* Move READMES in subfolder

* Fix dockerfile

* Fix dockerfile

* restore docker setup

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Leng Yue <lengyue@lengyue.me>
This commit is contained in:
spicysama 2024-10-18 14:38:18 +08:00 committed by GitHub
parent 6f260179ad
commit 23fa4d7e38
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 31 additions and 6 deletions

View File

@ -5,7 +5,7 @@ on:
branches:
- main
tags:
- 'v*'
- "v*"
jobs:
build:

View File

@ -18,6 +18,10 @@ ARG DEPENDENCIES=" \
libsox-dev \
build-essential \
cmake \
libasound-dev \
portaudio19-dev \
libportaudio2 \
libportaudiocpp0 \
ffmpeg"
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \

View File

@ -17,6 +17,10 @@ ARG TOOLS=" \
openssh-server \
sudo \
protobuf-compiler \
libasound-dev \
portaudio19-dev \
libportaudio2 \
libportaudiocpp0 \
cmake"
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \

View File

@ -1,8 +1,14 @@
import os
from argparse import ArgumentParser
from pathlib import Path
import httpx
import ormsgpack
from tools.commons import ServeReferenceAudio, ServeTTSRequest
api_key = os.environ.get("FISH_API_KEY", "YOUR_API_KEY")
def audio_request():
# priority: ref_id > references
@ -18,6 +24,8 @@ def audio_request():
streaming=True,
)
api_key = os.environ.get("FISH_API_KEY", "YOUR_API_KEY")
with (
httpx.Client() as client,
open("hello.wav", "wb") as f,
@ -27,7 +35,7 @@ def audio_request():
"http://127.0.0.1:8080/v1/tts",
content=ormsgpack.packb(request, option=ormsgpack.OPT_SERIALIZE_PYDANTIC),
headers={
"authorization": "Bearer YOUR_API_KEY",
"authorization": f"Bearer {api_key}",
"content-type": "application/msgpack",
},
timeout=None,
@ -36,11 +44,11 @@ def audio_request():
f.write(chunk)
def asr_request():
def asr_request(audio_path: Path):
# Read the audio file
with open(
r"D:\PythonProject\fish-speech\.cache\test_audios\prompts\2648200402409733590.wav",
str(audio_path),
"rb",
) as audio_file:
audio_data = audio_file.read()
@ -57,7 +65,7 @@ def asr_request():
response = client.post(
"https://api.fish.audio/v1/asr",
headers={
"Authorization": "Bearer 8eda4aeed2bc4aec9489b3efad003799",
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/msgpack",
},
content=ormsgpack.packb(request_data),
@ -74,5 +82,14 @@ def asr_request():
print(f"Start time: {segment['start']}, End time: {segment['end']}")
def parse_args():
parser = ArgumentParser()
parser.add_argument("--audio_path", type=Path, default="audio/ref/trump.mp3")
return parser.parse_args()
if __name__ == "__main__":
asr_request()
args = parse_args()
asr_request(args.audio_path)