Fix timbre problem (#1009)
* [feature]add dataset classs * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [dev]combine agent and tts infer * [feature]:update inference * [feature]:update uv.lock * [Merge]:merge upstream/main * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [fix]:remove unused files * [fix]:remove unused files * [fix]:remove unused files * [fix]:fix infer bugs * [docs]:update introduction and optinize front appearence * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [docs]:update README for OpenAudio-S1 * [docs]:update docs * [docs]:Update video * [docs]:fix video * [docs]:fix video * [fix]:fix timbre problem * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
parent
89ea53dedc
commit
9021a57dce
130
.gitignore
vendored
130
.gitignore
vendored
@ -1,33 +1,113 @@
|
||||
# =============================================================================
|
||||
# Fish Speech - .gitignore
|
||||
# =============================================================================
|
||||
|
||||
# Operating System Files
|
||||
# -----------------------
|
||||
.DS_Store
|
||||
.pgx.*
|
||||
.DS_Store?
|
||||
._*
|
||||
.Spotlight-V100
|
||||
.Trashes
|
||||
ehthumbs.db
|
||||
Thumbs.db
|
||||
|
||||
# IDEs and Editors
|
||||
# ----------------
|
||||
.vscode/
|
||||
.idea/
|
||||
*.swp
|
||||
*.swo
|
||||
*~
|
||||
|
||||
# Python
|
||||
# ------
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
*.so
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# Virtual Environments
|
||||
# --------------------
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
/fishenv/
|
||||
|
||||
# Project Dependencies
|
||||
# --------------------
|
||||
.pdm-python
|
||||
/fish_speech.egg-info
|
||||
__pycache__
|
||||
/results
|
||||
/data
|
||||
/*.test.sh
|
||||
|
||||
# Data and Model Files
|
||||
# --------------------
|
||||
/data/
|
||||
/results/
|
||||
/checkpoints/
|
||||
/references/
|
||||
/demo-audios/
|
||||
/example/
|
||||
filelists/
|
||||
*.filelist
|
||||
filelists
|
||||
|
||||
# Audio Files
|
||||
# -----------
|
||||
*.wav
|
||||
*.mp3
|
||||
*.flac
|
||||
*.ogg
|
||||
*.m4a
|
||||
|
||||
# Data Files
|
||||
# ----------
|
||||
*.npy
|
||||
*.npz
|
||||
*.pkl
|
||||
*.pickle
|
||||
*.lab
|
||||
/fish_speech/text/cmudict_cache.pickle
|
||||
/checkpoints
|
||||
/.vscode
|
||||
/data_server/target
|
||||
/*.npy
|
||||
/*.wav
|
||||
/*.mp3
|
||||
/*.lab
|
||||
/results
|
||||
/data
|
||||
/.idea
|
||||
|
||||
# Cache and Temporary Files
|
||||
# --------------------------
|
||||
/.cache/
|
||||
/.gradio/
|
||||
/.locale/
|
||||
.pgx.*
|
||||
*log
|
||||
*.log
|
||||
|
||||
# External Tools
|
||||
# --------------
|
||||
ffmpeg.exe
|
||||
ffprobe.exe
|
||||
/faster_whisper/
|
||||
|
||||
# Server Related
|
||||
# --------------
|
||||
/data_server/target/
|
||||
|
||||
# Test Files
|
||||
# ----------
|
||||
/*.test.sh
|
||||
asr-label*
|
||||
/.cache
|
||||
/fishenv
|
||||
/.locale
|
||||
/demo-audios
|
||||
/references
|
||||
/example
|
||||
/faster_whisper
|
||||
/.gradio
|
||||
*log
|
||||
|
@ -339,7 +339,7 @@ def generate_long(
|
||||
temperature: float = 0.8,
|
||||
compile: bool = False,
|
||||
iterative_prompt: bool = True,
|
||||
chunk_length: int = 150,
|
||||
chunk_length: int = 512,
|
||||
prompt_text: Optional[str | list[str]] = None,
|
||||
prompt_tokens: Optional[torch.Tensor | list[torch.Tensor]] = None,
|
||||
):
|
||||
@ -365,6 +365,24 @@ def generate_long(
|
||||
texts = split_text(text, chunk_length) if iterative_prompt else [text]
|
||||
max_length = model.config.max_seq_len
|
||||
|
||||
# if use_prompt:
|
||||
# base_content_sequence.append(
|
||||
# [
|
||||
# TextPart(text=prompt_text[0]),
|
||||
# VQPart(codes=prompt_tokens[0]),
|
||||
# ],
|
||||
# add_end=True,
|
||||
# )
|
||||
|
||||
# for text in texts:
|
||||
# content_sequence = ContentSequence(modality=None)
|
||||
# base_content_sequence.append(
|
||||
# [
|
||||
# TextPart(text=text),
|
||||
# ],
|
||||
# add_end=True,
|
||||
# )
|
||||
|
||||
if use_prompt:
|
||||
for t, c in zip(prompt_text, prompt_tokens):
|
||||
base_content_sequence.append(
|
||||
@ -385,7 +403,7 @@ def generate_long(
|
||||
|
||||
encoded = []
|
||||
for text in texts:
|
||||
content_sequence = ContentSequence(modality=None)
|
||||
content_sequence = ContentSequence(modality="text")
|
||||
content_sequence.append(TextPart(text=text))
|
||||
encoded.append(
|
||||
content_sequence.encode_for_inference(
|
||||
|
2
uv.lock
generated
2
uv.lock
generated
@ -942,6 +942,7 @@ dependencies = [
|
||||
{ name = "cachetools" },
|
||||
{ name = "datasets" },
|
||||
{ name = "descript-audio-codec" },
|
||||
{ name = "descript-audiotools" },
|
||||
{ name = "einops" },
|
||||
{ name = "einx", extra = ["torch"] },
|
||||
{ name = "faster-whisper" },
|
||||
@ -985,6 +986,7 @@ requires-dist = [
|
||||
{ name = "cachetools" },
|
||||
{ name = "datasets", specifier = "==2.18.0" },
|
||||
{ name = "descript-audio-codec" },
|
||||
{ name = "descript-audiotools" },
|
||||
{ name = "einops", specifier = ">=0.7.0" },
|
||||
{ name = "einx", extras = ["torch"], specifier = "==0.2.2" },
|
||||
{ name = "faster-whisper" },
|
||||
|
Loading…
x
Reference in New Issue
Block a user