fish-speech/tools/msgpack_api.py
PoTaTo 834b07257c
Agent inference (#650)
* support basic TTS inference

* Agent (#648)

* agent

* rm fastapi

* routes

* dry run: tts

* api_invoke_cahta

* .gradio ignore

* small fix

* Fix llama generate

* add lots

* add agent

* fix agent

* fix agent

* fix route

* fix compile

* Add fixed timbre

* Fix duplicated audio

* Fix

* remove unused

* Improve ui

* okok

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>

* Update Agent Webui and doc

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Co-authored-by: Lengyue <lengyue@lengyue.me>
Co-authored-by: spicysama <a2983352531@outlook.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2024-11-01 09:09:53 -07:00

96 lines
2.6 KiB
Python

import os
from argparse import ArgumentParser
from pathlib import Path
import httpx
import ormsgpack
from tools.schema import ServeReferenceAudio, ServeTTSRequest
api_key = os.environ.get("FISH_API_KEY", "YOUR_API_KEY")
def audio_request():
# priority: ref_id > references
request = ServeTTSRequest(
text="你说的对, 但是原神是一款由米哈游自主研发的开放世界手游.",
# reference_id="114514",
references=[
ServeReferenceAudio(
audio=open("lengyue.wav", "rb").read(),
text=open("lengyue.lab", "r", encoding="utf-8").read(),
)
],
streaming=True,
)
api_key = os.environ.get("FISH_API_KEY", "YOUR_API_KEY")
with (
httpx.Client() as client,
open("hello.wav", "wb") as f,
):
with client.stream(
"POST",
"http://127.0.0.1:8080/v1/tts",
content=ormsgpack.packb(request, option=ormsgpack.OPT_SERIALIZE_PYDANTIC),
headers={
"authorization": f"Bearer {api_key}",
"content-type": "application/msgpack",
},
timeout=None,
) as response:
for chunk in response.iter_bytes():
f.write(chunk)
def asr_request(audio_path: Path):
# Read the audio file
with open(
str(audio_path),
"rb",
) as audio_file:
audio_data = audio_file.read()
# Prepare the request data
request_data = {
"audio": audio_data,
"language": "en", # Optional: specify the language
"ignore_timestamps": False, # Optional: set to True to ignore precise timestamps
}
# Send the request
with httpx.Client() as client:
response = client.post(
"https://api.fish.audio/v1/asr",
headers={
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/msgpack",
},
content=ormsgpack.packb(request_data),
)
# Parse the response
result = response.json()
print(f"Transcribed text: {result['text']}")
print(f"Audio duration: {result['duration']} seconds")
for segment in result["segments"]:
print(f"Segment: {segment['text']}")
print(f"Start time: {segment['start']}, End time: {segment['end']}")
def parse_args():
parser = ArgumentParser()
parser.add_argument("--audio_path", type=Path, default="audio/ref/trump.mp3")
return parser.parse_args()
if __name__ == "__main__":
args = parse_args()
asr_request(args.audio_path)