Compare commits

...

99 Commits

Author SHA1 Message Date
github-actions[bot]
d9d890975a
@mallocfree009 has signed the CLA from Pull Request #1504 2025-05-17 10:32:01 +00:00
wok
8043fce1ce READMEファイルを更新し、v.2.0.78-betaの新機能とバグ修正を追加しました。 2025-05-16 01:33:01 +09:00
wok
3677f6e268 RTX 5090に関する新機能を追加し、各言語のREADMEファイルを更新しました。 2025-05-03 04:06:57 +09:00
wok
0318700981 update 2025-02-16 01:26:08 +09:00
wok
66cbbeed1a update 2024-11-15 04:10:35 +09:00
wok
b262d28c10 update 2024-11-13 02:01:48 +09:00
wok
38a9164e5c update 2024-11-08 23:26:14 +09:00
wok
e472934bb4 update 2024-11-08 12:34:18 +09:00
wok
6129780229 fix typo 2024-10-08 20:10:41 +09:00
wok
e821960c59 Merge branch 'master' of github.com:w-okada/voice-changer 2024-10-08 14:54:33 +09:00
wok
fa77d69bed update 2024-10-08 14:54:30 +09:00
w-okada
7ab6a63a67
Merge pull request #1347 from QweRezOn/master
Add Russian Readme File
2024-09-15 08:01:23 +09:00
github-actions[bot]
763a6a0763
@QweRezOn has signed the CLA from Pull Request #1347 2024-09-13 17:04:56 +00:00
QweRez
dfbc95bd61
Update README_ru.md 2024-09-13 20:03:50 +03:00
QweRez
33387bd351
Update README.md 2024-09-13 20:02:44 +03:00
QweRez
b02c4f48c3
Create README_dev_ru.md 2024-09-13 20:02:19 +03:00
QweRez
006b9d575c
Update README_dev_en.md 2024-09-13 19:59:08 +03:00
QweRez
4ebcd670e7
Update README_en.md 2024-09-13 19:57:30 +03:00
QweRez
0b5daf162b
Create README_ru.md
add ru
2024-09-13 19:56:56 +03:00
wok
11b5deecb8 update 2024-08-27 09:29:07 +09:00
wok
fd849db239 update 2024-08-21 10:29:31 +09:00
wok
6d9e735883 update 2024-08-18 23:13:17 +09:00
wok
b5d3e5f066 update 2024-08-07 19:51:20 +09:00
wok
a75f87e433 update 2024-08-06 23:47:11 +09:00
wok
285615d67c update 2024-08-01 11:01:20 +09:00
wok
eef8395205 update 2024-07-27 18:14:50 +09:00
wok
465ab1ff23 update 2024-07-21 02:29:03 +09:00
wok
1f51581ae3 update 2024-07-20 05:37:14 +09:00
wok
87b547e724 update 2024-07-20 02:32:21 +09:00
wok
3b83221cec update 2024-07-20 02:30:06 +09:00
wok
f79855f8b2 update 2024-07-10 23:54:40 +09:00
wok
1952c76533 update 2024-06-30 17:07:52 +09:00
wok
92f0b1aaf5 update 2024-06-30 16:17:10 +09:00
wok
ebea9d2692 update 2024-06-29 07:07:58 +09:00
wok
a91ef76b64 update 2024-06-29 07:06:55 +09:00
wok
0cd7f69931 update 2024-06-29 07:05:57 +09:00
wok
b350812083 update 2024-06-29 07:05:30 +09:00
wok
80ccc0b1d7 update 2024-06-29 07:03:40 +09:00
wok
cc60c7adfb update 2024-06-29 07:03:11 +09:00
wok
d61f6b8e99 update 2024-06-29 07:02:35 +09:00
wok
7adc1f1cf5 update 2024-06-29 07:02:04 +09:00
wok
7e177ee84c update 2024-06-29 07:01:26 +09:00
wok
51046638d6 update 2024-06-29 07:00:57 +09:00
wok
2522d44f13 update 2024-06-29 07:00:30 +09:00
wok
018cab3ded update 2024-06-29 07:00:01 +09:00
wok
a1714878a7 update 2024-06-29 06:59:34 +09:00
wok
23b69ba121 update 2024-06-29 06:56:19 +09:00
wok
9f6903e4e9 update 2024-06-29 06:48:05 +09:00
wok
4c59ab5431 update 2024-06-24 03:49:37 +09:00
wok
33d74e8e73 Merge branch 'master' of github.com:w-okada/voice-changer 2024-06-24 03:47:52 +09:00
wok
5f1ca7af51 update 2024-06-24 03:47:25 +09:00
github-actions[bot]
56a5094881
@Nick088Official has signed the CLA from Pull Request #1241 2024-06-15 16:27:47 +00:00
wok
cde810a9d0 add cuda question 2024-06-12 05:01:52 +09:00
wok
73bb47f745 update 2024-06-10 20:09:30 +09:00
wok
349d268189 update 2024-06-05 18:39:35 +09:00
wok
3a8cbb07de update 2024-06-03 20:57:28 +09:00
github-actions[bot]
800285f2cd
@vitaliylag has signed the CLA from Pull Request #1224 2024-06-01 03:14:09 +00:00
github-actions[bot]
d3add2561d
@mrs1669 has signed the CLA from Pull Request #1171 2024-04-04 10:53:26 +00:00
w-okada
621ad25a8a
Merge pull request #1153 from deiteris/harden-security
Harden web server security
2024-04-02 16:04:02 +09:00
Yury
8dd8d7127d Refactor and add origin check to SIO 2024-03-18 22:52:46 +02:00
Yury
ce9b599501 Improve allowed origins input and use set 2024-03-17 16:26:55 +02:00
github-actions[bot]
28fc541891
@deiteris has signed the CLA from Pull Request #1153 2024-03-16 22:24:48 +00:00
Yury
cf2b693334 Harden web server security 2024-03-17 00:11:16 +02:00
w-okada
11672e9653 Merge branch 'master' of github.com:w-okada/voice-changer 2024-03-05 23:47:48 +09:00
w-okada
a42051bb40 update 2024-03-05 23:45:46 +09:00
w-okada
aa620e1cf0
Merge pull request #1141 from richardhbtz/patch-1
Misspelling "trouble"
2024-03-04 10:35:17 +09:00
w-okada
22bd9e3d7c Merge branch 'master' of github.com:w-okada/voice-changer 2024-03-04 10:33:56 +09:00
w-okada
6e774a1458 v.1.5.3.18 2024-03-04 10:33:16 +09:00
Richard Habitzreuter
0e2078a268
Misspelling "trouble" 2024-02-29 16:58:06 -03:00
github-actions[bot]
51233e0cbe
@brandonkovacs has signed the CLA from Pull Request #1137 2024-02-29 02:05:13 +00:00
w-okada
2ac5ec9feb update 2024-02-28 23:23:22 +09:00
w-okada
bc6e8a9c08 update 2024-02-28 23:08:49 +09:00
w-okada
39e0d0cfd6 update 2024-02-21 08:54:39 +09:00
w-okada
a1a3def686 Merge branch 'master' into v.1.5.3 2024-02-21 08:25:58 +09:00
w-okada
67804cad3c
Merge pull request #1092 from tg-develop/master
Bugfix FCPE
2024-02-21 08:25:25 +09:00
w-okada
ce8f843746 Merge branch 'master' into v.1.5.3 2024-02-21 08:22:36 +09:00
Tobias
0b954131b4 Bugfix FCPE 2024-01-21 14:02:35 +01:00
w-okada
927bba6467
Merge pull request #1077 from icecoins/master
implement of the fcpe in RVC
2024-01-18 06:32:12 +09:00
icecoins
8f230e5c45
Update FcpePitchExtractor.py 2024-01-12 02:28:17 +08:00
github-actions[bot]
41238258ba
@icecoins has signed the CLA from Pull Request #1077 2024-01-11 14:05:09 +00:00
icecoins
1cf9be54c7
undo modification 2024-01-11 22:02:36 +08:00
icecoins
303a15fef3
implement fcpe 2024-01-11 21:10:44 +08:00
icecoins
04f93b193f
implement fcpe 2024-01-11 21:09:57 +08:00
icecoins
fbf69cda19
implement fcpe 2024-01-11 21:08:47 +08:00
icecoins
8e42927880
implement fcpe 2024-01-11 21:07:38 +08:00
icecoins
4e254e42f7
implement fcpe 2024-01-11 21:07:03 +08:00
icecoins
cc72b93198
implement fcpe 2024-01-11 21:05:57 +08:00
icecoins
cc4783b85c
implement fcpe 2024-01-11 21:04:54 +08:00
icecoins
5fd31999e7
implement fcpe 2024-01-11 21:04:15 +08:00
icecoins
9f9e7016e2
Update GUI.json 2024-01-11 21:03:41 +08:00
icecoins
b96ba86be3
Update README.md 2024-01-11 21:00:50 +08:00
icecoins
98ee26e353
Update README.md 2024-01-11 20:58:49 +08:00
icecoins
e8244d61b7
Update README.md 2024-01-11 20:57:50 +08:00
github-actions[bot]
87d2382828
@sonphantrung has signed the CLA from Pull Request #1063 2024-01-04 08:20:51 +00:00
github-actions[bot]
03caf942b2
@Poleyn has signed the CLA from Pull Request #1057 2024-01-01 17:42:14 +00:00
w-okada
b215f3ba84 Modification:
- Timer update
  - Diffusion SVC Performance monitor
2023-12-21 04:11:25 +09:00
w-okada
0f0225cfcd update 2023-12-03 03:31:27 +09:00
w-okada
afb13bf976 bugfix: macos model_stati_dir 2023-12-03 02:50:51 +09:00
w-okada
06b8cf78d1 bugfix:
- clear setting
improve
  - file sanitizer
chage:
  - default input chunk size: 192.
    - decided by this chart.(https://rentry.co/VoiceChangerGuide#gpu-chart-for-known-working-chunkextra)
2023-12-03 02:02:28 +09:00
77 changed files with 6443 additions and 2451 deletions

View File

@ -1,4 +1,4 @@
name: Issue or Bug Report
name: Issue or Bug Report for v.1.x.x.x
description: Please provide as much detail as possible to convey the history of your problem.
title: "[ISSUE]: "
body:

View File

@ -0,0 +1,82 @@
name: Issue or Bug Report for v.2.x.x
description: Please provide as much detail as possible to convey the history of your problem.
title: "[ISSUE for v2]: "
body:
- type: markdown
attributes:
value: Please read our [FAQ](https://github.com/w-okada/voice-changer/blob/master/.github/FAQ.md) before making a bug report!
- type: input
id: vc-client-version
attributes:
label: Voice Changer Version
description: Downloaded File Name (.zip)
placeholder: vcclient_win_std_x.y.x.zip, vcclient_win_cuda_torch_cuda_x.y.x.zip, or so
validations:
required: true
- type: input
id: OS
attributes:
label: Operational System
description: e.g. Windows 10, Ubuntu 20.04, MacOS Venture, MacOS Monterey, etc...
placeholder: Windows 10
validations:
required: true
- type: input
id: GPU
attributes:
label: GPU
description: If you have no gpu, please input none.
validations:
required: true
- type: input
id: CUDA
attributes:
label: CUDA Version
description: If you have nvidia gpu, please input version of cuda. Otherwise, please input none.
validations:
required: true
- type: checkboxes
id: checks
attributes:
label: Read carefully and check the options
options:
- label: If you use win_cuda_torch_cuda edition, setup cuda? [see here](https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#requirements)
- label: If you use win_cuda edition, setup cuda and cudnn? [see here](https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#requirements)
- label: If you use mac edition, client is not launched automatically. Use chrome to open application.?
- label: I've tried to change the Chunk Size
- label: I've tried to set the Index to zero
- label: I've read the [tutorial](https://github.com/w-okada/voice-changer/blob/master/tutorials/tutorial_rvc_en_latest.md)
- label: I've tried to extract to another folder (or re-extract) the .zip file
- type: dropdown
id: sample-model-work
attributes:
label: Does pre-installed model work?
options:
- "No"
- "YES"
default: 0
- type: input
id: vc-type
attributes:
label: Model Type
description: MMVC, so-vits-rvc, RVC, DDSP-SVC
placeholder: RVC
validations:
required: true
- type: textarea
id: issue
attributes:
label: Issue Description
description: Please provide as much reproducible information and logs as possible
- type: textarea
id: capture
attributes:
label: Application Screenshot
description: Please provide a screenshot of your application so we can see your settings (you can paste or drag-n-drop)
- type: textarea
id: logs-on-terminal
attributes:
label: Logs on console
description: Copy and paste the log on your console here
validations:
required: true

235
README.md
View File

@ -1,154 +1,110 @@
## VC Client
[日本語](/README.md) /
[英語](/docs_i18n/README_en.md) /
[韓国語](/docs_i18n/README_ko.md)/
[中国語](/docs_i18n/README_zh.md)/
[ドイツ語](/docs_i18n/README_de.md)/
[アラビア語](/docs_i18n/README_ar.md)/
[ギリシャ語](/docs_i18n/README_el.md)/
[スペイン語](/docs_i18n/README_es.md)/
[フランス語](/docs_i18n/README_fr.md)/
[イタリア語](/docs_i18n/README_it.md)/
[ラテン語](/docs_i18n/README_la.md)/
[マレー語](/docs_i18n/README_ms.md)/
[ロシア語](/docs_i18n/README_ru.md)
*日本語以外は機械翻訳です。
[English](/README_en.md) [Korean](/README_ko.md)
## VCClient
VCClientは、AIを用いてリアルタイム音声変換を行うソフトウェアです。
## What's New!
- v.1.5.3.17a
- Bug Fixes:
- Server mode error
- RVC Model merger
- Misc
- Add RVC Sample Chihaya-Jinja (https://chihaya369.booth.pm/items/4701666)
* v.2.0.78-beta
* bugfix: RVCモデルのアップロードエラーを回避
* ver.1.x との同時起動ができるようになりました。
* 選択できるchunk sizeを増やしました。
- v.1.5.3.17
- New Features:
- Added similarity graph for Beatrice speaker selection
- Bug Fixes:
- Fixed crossfade issue with Beatrice speaker
* v.2.0.77-beta (only for RTX 5090, experimental)
* 関連モジュールを5090対応 (開発者がRTX5090未所持のため、動作未検証)
* v.2.0.76-beta
* new feature:
* Beatrice: 話者マージの実装
* Beatrice: オートピッチシフト
* bugfix:
* サーバモードのデバイス選択時の不具合対応
* v.2.0.73-beta
* new feature:
* 編集したbeatrice modelのダウンロード
* bugfix:
* beatrice v2 のpitch, formantが反映されないバグを修正
* Applio のembedderを使用しているモデルのONNXができないバグを修正
- v.1.5.3.16a
- Bug fix:
- Lazy load Beatrice.
## ダウンロードと関連リンク
- v.1.5.3.16 (Only for Windows, CPU dependent)
- New Feature:
- Beatrice is supported(experimental)
Windows版、 M1 Mac版はhugging faceのリポジトリからダウンロードできます。
- v.1.5.3.15
- Improve:
- new rmvpe checkpoint for rvc (torch, onnx)
- Mac: upgrade torch version 2.1.0
* [VCClient のリポジトリ](https://huggingface.co/wok000/vcclient000/tree/main)
* [Light VCClient for Beatrice v2 のリポジトリ](https://huggingface.co/wok000/light_vcclient_beatrice/tree/main)
*1 Linuxはリポジトリをcloneしてお使いください。
### 関連リンク
* [Beatrice V2 トレーニングコードのリポジトリ](https://huggingface.co/fierce-cats/beatrice-trainer)
* [Beatrice V2 トレーニングコード Colab版](https://github.com/w-okada/beatrice-trainer-colab)
# VC Client とは
### 関連ソフトウェア
1. 各種音声変換 AI(VC, Voice Conversion)を用いてリアルタイム音声変換を行うためのクライアントソフトウェアです。サポートしている音声変換 AI は次のものになります。
* [リアルタイムボイスチェンジャ VCClient](https://github.com/w-okada/voice-changer)
* [読み上げソフトウェア TTSClient](https://github.com/w-okada/ttsclient)
* [リアルタイム音声認識ソフトウェア ASRClient](https://github.com/w-okada/asrclient)
- サポートする音声変換 AI (サポート VC
- [MMVC](https://github.com/isletennos/MMVC_Trainer)
- [so-vits-svc](https://github.com/svc-develop-team/so-vits-svc)
- [RVC(Retrieval-based-Voice-Conversion)](https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI)
- [DDSP-SVC](https://github.com/yxlllc/DDSP-SVC)
- [Beatrice JVS Corpus Edition](https://prj-beatrice.com/) * experimental, (***NOT MIT Licnsence*** see [readme](https://github.com/w-okada/voice-changer/blob/master/server/voice_changer/Beatrice/)) * Only for Windows, CPU dependent
1. 本ソフトウェアは、ネットワークを介した利用も可能であり、ゲームなどの高負荷なアプリケーションと同時に使用する場合などに音声変換処理の負荷を外部にオフロードすることができます。
## VC Clientの特徴
## 多様なAIモデルをサポート
| AIモデル | v.2 | v.1 | ライセンス |
| ------------------------------------------------------------------------------------------------------------ | --------- | -------------------- | ------------------------------------------------------------------------------------------ |
| [RVC ](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/main/docs/jp/README.ja.md) | supported | supported | リポジトリを参照してください。 |
| [Beatrice v1](https://prj-beatrice.com/) | n/a | supported (only win) | [独自](https://github.com/w-okada/voice-changer/tree/master/server/voice_changer/Beatrice) |
| [Beatrice v2](https://prj-beatrice.com/) | supported | n/a | [独自](https://huggingface.co/wok000/vcclient_model/blob/main/beatrice_v2_beta/readme.md) |
| [MMVC](https://github.com/isletennos/MMVC_Trainer) | n/a | supported | リポジトリを参照してください。 |
| [so-vits-svc](https://github.com/svc-develop-team/so-vits-svc) | n/a | supported | リポジトリを参照してください。 |
| [DDSP-SVC](https://github.com/yxlllc/DDSP-SVC) | n/a | supported | リポジトリを参照してください。 |
## スタンドアロン、ネットワーク経由の両構成をサポート
ローカルPCで完結した音声変換も、ネットワークを介した音声変換もサポートしています。
ネットワークを介した利用を行うことで、ゲームなどの高負荷なアプリケーションと同時に使用する場合に音声変換の負荷を外部にオフロードすることができます。
![image](https://user-images.githubusercontent.com/48346627/206640768-53f6052d-0a96-403b-a06c-6714a0b7471d.png)
3. 複数のプラットフォームに対応しています。
## 複数プラットフォームに対応
- Windows, Mac(M1), Linux, Google Colab (MMVC のみ)
Windows, Mac(M1), Linux, Google Colab
# 使用方法
*1 Linuxはリポジトリをcloneしてお使いください。
大きく 2 つの方法でご利用できます。難易度順に次の通りです。
## REST APIを提供
- 事前ビルド済みの Binary での利用
- Docker や Anaconda など環境構築を行った上での利用
各種プログラミング言語でクライアントを作成することができます。
本ソフトウェアや MMVC になじみの薄い方は上から徐々に慣れていくとよいと思います。
また、curlなどのOSに組み込まれているHTTPクライアントを使って操作ができます。
## (1) 事前ビルド済みの Binary での利用
## トラブルシュート
- 実行形式のバイナリをダウンロードして実行することができます。
[通信編](tutorials/trouble_shoot_communication_ja.md)
- チュートリアルは[こちら](tutorials/tutorial_rvc_ja_latest.md)をご覧ください。([ネットワークのトラブルシュート](https://github.com/w-okada/voice-changer/blob/master/tutorials/trouble_shoot_communication_ja.md))
- [Google Colaboratory](https://github.com/w-okada/voice-changer/blob/master/Realtime_Voice_Changer_on_Colab.ipynb) で簡単にお試しいただけるようになりました。左上の Open in Colab のボタンから起動できます。
<img src="https://github.com/w-okada/voice-changer/assets/48346627/3f092e2d-6834-42f6-bbfd-7d389111604e" width="400" height="150">
- Windows 版と Mac 版を提供しています。
- Windows かつ Nvidia の GPU をご使用の方は、ONNX(cpu,cuda), PyTorch(cpu,cuda)をダウンロードしてください。
- Windows かつ AMD/Intel の GPU をご使用の方は、ONNX(cpu,DirectML), PyTorch(cpu,cuda)をダウンロードしてください。AMD/Intel の GPU は onnx のモデルを使用する場合のみ有効になります。
- いずれの GPU のサポート状況についても、PyTorch、Onnxruntime がサポートしている場合のみ有効になります。
- Windows で GPU をご使用にならない方は、ONNX(cpu,cuda), PyTorch(cpu,cuda)をダウンロードしてください。
- Windows 版は、ダウンロードした zip ファイルを解凍して、`start_http.bat`を実行してください。
- Mac 版はダウンロードファイルを解凍したのちに、`startHttp.command`を実行してください。開発元を検証できない旨が示される場合は、再度コントロールキーを押してクリックして実行してください(or 右クリックから実行してください)。
- 初回起動時は各種データをダウンロードします。ダウンロードに時間がかかる可能性があります。ダウンロードが完了すると、ブラウザが立ち上がります。
- リモートから接続する場合は、`.bat`ファイル(win)、`.command`ファイル(mac)の http が https に置き換わっているものを使用してください。
- DDPS-SVC の encoder は hubert-soft のみ対応です。
- ダウンロードはこちらから。
| Version | OS | フレームワーク | link | サポート VC | サイズ |
| ----------- | --- | ------------------------------------- | ------------------------------------------------------------------- | ----------------------------------------------------------------------------------- | ------ |
| v.1.5.3.17a | mac | ONNX(cpu), PyTorch(cpu,mps) | N/A | MMVC v.1.5.x, MMVC v.1.3.x, so-vits-svc 4.0, RVC | 797MB |
| | win | ONNX(cpu,cuda), PyTorch(cpu,cuda) | [hugging face](https://huggingface.co/wok000/vcclient000/tree/main) | MMVC v.1.5.x, MMVC v.1.3.x, so-vits-svc 4.0, RVC, DDSP-SVC, Diffusion-SVC, Beatrice | 3240MB |
| | win | ONNX(cpu,DirectML), PyTorch(cpu,cuda) | [hugging face](https://huggingface.co/wok000/vcclient000/tree/main) | MMVC v.1.5.x, MMVC v.1.3.x, so-vits-svc 4.0, RVC, DDSP-SVC, Diffusion-SVC, Beatrice | 3125MB |
| v.1.5.3.17 | mac | ONNX(cpu), PyTorch(cpu,mps) | N/A | MMVC v.1.5.x, MMVC v.1.3.x, so-vits-svc 4.0, RVC | 797MB |
| | win | ONNX(cpu,cuda), PyTorch(cpu,cuda) | [hugging face](https://huggingface.co/wok000/vcclient000/tree/main) | MMVC v.1.5.x, MMVC v.1.3.x, so-vits-svc 4.0, RVC, DDSP-SVC, Diffusion-SVC, Beatrice | 3240MB |
| | win | ONNX(cpu,DirectML), PyTorch(cpu,cuda) | [hugging face](https://huggingface.co/wok000/vcclient000/tree/main) | MMVC v.1.5.x, MMVC v.1.3.x, so-vits-svc 4.0, RVC, DDSP-SVC, Diffusion-SVC, Beatrice | 3125MB |
| v.1.5.3.16a | mac | ONNX(cpu), PyTorch(cpu,mps) | N/A | MMVC v.1.5.x, MMVC v.1.3.x, so-vits-svc 4.0, RVC | 797MB |
| | win | ONNX(cpu,cuda), PyTorch(cpu,cuda) | [hugging face](https://huggingface.co/wok000/vcclient000/tree/main) | MMVC v.1.5.x, MMVC v.1.3.x, so-vits-svc 4.0, RVC, DDSP-SVC, Diffusion-SVC, Beatrice | 3240MB |
| | win | ONNX(cpu,DirectML), PyTorch(cpu,cuda) | [hugging face](https://huggingface.co/wok000/vcclient000/tree/main) | MMVC v.1.5.x, MMVC v.1.3.x, so-vits-svc 4.0, RVC, DDSP-SVC, Diffusion-SVC, Beatrice | 3125MB |
| v.1.5.3.16 | mac | ONNX(cpu), PyTorch(cpu,mps) | N/A | MMVC v.1.5.x, MMVC v.1.3.x, so-vits-svc 4.0, RVC | 797MB |
| | win | ONNX(cpu,cuda), PyTorch(cpu,cuda) | [hugging face](https://huggingface.co/wok000/vcclient000/tree/main) | MMVC v.1.5.x, MMVC v.1.3.x, so-vits-svc 4.0, RVC, DDSP-SVC, Diffusion-SVC, Beatrice | 3240MB |
| | win | ONNX(cpu,DirectML), PyTorch(cpu,cuda) | [hugging face](https://huggingface.co/wok000/vcclient000/tree/main) | MMVC v.1.5.x, MMVC v.1.3.x, so-vits-svc 4.0, RVC, DDSP-SVC, Diffusion-SVC, Beatrice | 3125MB |
| v.1.5.3.15 | mac | ONNX(cpu), PyTorch(cpu,mps) | [hugging face](https://huggingface.co/wok000/vcclient000/tree/main) | MMVC v.1.5.x, MMVC v.1.3.x, so-vits-svc 4.0, RVC | 797MB |
| | win | ONNX(cpu,cuda), PyTorch(cpu,cuda) | [hugging face](https://huggingface.co/wok000/vcclient000/tree/main) | MMVC v.1.5.x, MMVC v.1.3.x, so-vits-svc 4.0, RVC, DDSP-SVC, Diffusion-SVC | 3240MB |
| | win | ONNX(cpu,DirectML), PyTorch(cpu,cuda) | [hugging face](https://huggingface.co/wok000/vcclient000/tree/main) | MMVC v.1.5.x, MMVC v.1.3.x, so-vits-svc 4.0, RVC, DDSP-SVC, Diffusion-SVC | 3125MB |
(\*1) Google Drive からダウンロードできない方は[hugging_face](https://huggingface.co/wok000/vcclient000/tree/main)からダウンロードしてみてください
(\*2) 開発者が AMD のグラフィックボードを持っていないので動作確認していません。onnxruntime-directml を同梱しただけのものです。
(\*3) 解凍や起動が遅い場合、ウィルス対策ソフトのチェックが走っている可能性があります。ファイルやフォルダを対象外にして実行してみてください。(自己責任です)
## (2) Docker や Anaconda など環境構築を行った上での利用
本リポジトリをクローンして利用します。Windows では WSL2 の環境構築が必須になります。また、WSL2 上で Docker もしくは Anaconda などの仮想環境の構築が必要となります。Mac では Anaconda などの Python の仮想環境の構築が必要となります。事前準備が必要となりますが、多くの環境においてこの方法が一番高速で動きます。**<font color="red"> GPU が無くてもそこそこ新しい CPU であれば十分動く可能性があります </font>(下記のリアルタイム性の節を参照)**。
[WSL2 と Docker のインストールの解説動画](https://youtu.be/POo_Cg0eFMU)
[WSL2 と Anaconda のインストールの解説動画](https://youtu.be/fba9Zhsukqw)
Docker での実行は、[Docker を使用する](docker_vcclient/README.md)を参考にサーバを起動してください。
Anaconda の仮想環境上での実行は、[サーバ開発者向けのページ](README_dev_ja.md)を参考にサーバを起動してください。
# トラブルシュート
- [通信編](tutorials/trouble_shoot_communication_ja.md)
# リアルタイム性MMVC
GPU を使用するとほとんどタイムラグなく変換可能です。
https://twitter.com/DannadoriYellow/status/1613483372579545088?s=20&t=7CLD79h1F3dfKiTb7M8RUQ
CPU でも最近のであればそれなりの速度で変換可能。
https://twitter.com/DannadoriYellow/status/1613553862773997569?s=20&t=7CLD79h1F3dfKiTb7M8RUQ
古い CPU( i7-4770)だと、1000msec くらいかかってしまう。
# 開発者の署名について
## 開発者の署名について
本ソフトウェアは開発元の署名しておりません。下記のように警告が出ますが、コントロールキーを押しながらアイコンをクリックすると実行できるようになります。これは Apple のセキュリティポリシーによるものです。実行は自己責任となります。
![image](https://user-images.githubusercontent.com/48346627/212567711-c4a8d599-e24c-4fa3-8145-a5df7211f023.png)
# Acknowledgments
## Acknowledgments
- [立ちずんだもん素材](https://seiga.nicovideo.jp/seiga/im10792934)
- [いらすとや](https://www.irasutoya.com/)
- [つくよみちゃん](https://tyc.rei-yumesaki.net/)
* [立ちずんだもん素材](https://seiga.nicovideo.jp/seiga/im10792934)
* [いらすとや](https://www.irasutoya.com/)
* [つくよみちゃん](https://tyc.rei-yumesaki.net/)
```
本ソフトウェアの音声合成には、フリー素材キャラクター「つくよみちゃん」が無料公開している音声データを使用しています。
@ -157,12 +113,12 @@ https://twitter.com/DannadoriYellow/status/1613553862773997569?s=20&t=7CLD79h1F3
© Rei Yumesaki
```
- [あみたろの声素材工房](https://amitaro.net/)
- [れぷりかどーる](https://kikyohiroto1227.wixsite.com/kikoto-utau)
* [あみたろの声素材工房](https://amitaro.net/)
* [れぷりかどーる](https://kikyohiroto1227.wixsite.com/kikoto-utau)
# 利用規約
## 利用規約
- リアルタイムボイスチェンジャーつくよみちゃんについては、つくよみちゃんコーパスの利用規約に準じ、次の目的で変換後の音声を使用することを禁止します。
* リアルタイムボイスチェンジャーつくよみちゃんについては、つくよみちゃんコーパスの利用規約に準じ、次の目的で変換後の音声を使用することを禁止します。
```
@ -176,7 +132,7 @@ https://twitter.com/DannadoriYellow/status/1613553862773997569?s=20&t=7CLD79h1F3
※鑑賞用の作品として配布・販売していただくことは問題ございません。
```
- リアルタイムボイスチェンジャーあみたろについては、あみたろの声素材工房様の次の利用規約に準じます。詳細は[こちら](https://amitaro.net/voice/faq/#index_id6)です。
* リアルタイムボイスチェンジャーあみたろについては、あみたろの声素材工房様の次の利用規約に準じます。詳細は[こちら](https://amitaro.net/voice/faq/#index_id6)
```
あみたろの声素材やコーパス読み上げ音声を使って音声モデルを作ったり、ボイスチェンジャーや声質変換などを使用して、自分の声をあみたろの声に変換して使うのもOKです。
@ -185,31 +141,8 @@ https://twitter.com/DannadoriYellow/status/1613553862773997569?s=20&t=7CLD79h1F3
また、あみたろの声で話す内容は声素材の利用規約の範囲内のみとし、センシティブな発言などはしないでください。
```
- リアルタイムボイスチェンジャー黄琴まひろについては、れぷりかどーるの利用規約に準じます。詳細は[こちら](https://kikyohiroto1227.wixsite.com/kikoto-utau/ter%EF%BD%8Ds-of-service)です。
* リアルタイムボイスチェンジャー黄琴まひろについては、れぷりかどーるの利用規約に準じます。詳細は[こちら](https://kikyohiroto1227.wixsite.com/kikoto-utau/ter%EF%BD%8Ds-of-service)
# 免責事項
## 免責事項
本ソフトウェアの使用または使用不能により生じたいかなる直接損害・間接損害・波及的損害・結果的損害 または特別損害についても、一切責任を負いません。
# (1) レコーダー(トレーニング用音声録音アプリ)
MMVC トレーニング用の音声を簡単に録音できるアプリです。
Github Pages 上で実行できるため、ブラウザのみあれば様々なプラットフォームからご利用可能です。
録音したデータは、ブラウザ上に保存されます。外部に漏れることはありません。
[録音アプリ on Github Pages](https://w-okada.github.io/voice-changer/)
[解説動画](https://youtu.be/s_GirFEGvaA)
# 過去バージョン
| Version | OS | フレームワーク | link | サポート VC | サイズ |
| ---------- | --- | --------------------------------- | ---------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------- | ------ |
| v.1.5.2.9e | mac | ONNX(cpu), PyTorch(cpu,mps) | [normal](https://drive.google.com/uc?id=1W0d7I7619PcO7kjb1SPXp6MmH5Unvd78&export=download) \*1 | MMVC v.1.5.x, MMVC v.1.3.x, so-vits-svc 4.0, RVC | 796MB |
| | win | ONNX(cpu,cuda), PyTorch(cpu,cuda) | [normal](https://drive.google.com/uc?id=1tmTMJRRggS2Sb4goU-eHlRvUBR88RZDl&export=download) \*1 | MMVC v.1.5.x, MMVC v.1.3.x, so-vits-svc 4.0, so-vits-svc 4.0v2, RVC, DDSP-SVC | 2872MB |
| v.1.5.3.1 | mac | ONNX(cpu), PyTorch(cpu,mps) | [normal](https://drive.google.com/uc?id=1oswF72q_cQQeXhIn6W275qLnoBAmcrR_&export=download) \*1 | MMVC v.1.5.x, MMVC v.1.3.x, so-vits-svc 4.0, RVC | 796MB |
| | win | ONNX(cpu,cuda), PyTorch(cpu,cuda) | [normal](https://drive.google.com/uc?id=1AWjDhW4w2Uljp1-9P8YUJBZsIlnhkJX2&export=download) \*1 | MMVC v.1.5.x, MMVC v.1.3.x, so-vits-svc 4.0, so-vits-svc 4.0v2, RVC, DDSP-SVC | 2872MB |
# For Contributor
このリポジトリは[CLA](https://raw.githubusercontent.com/w-okada/voice-changer/master/LICENSE-CLA)を設定しています。

View File

@ -1,6 +1,6 @@
## For Developper
[Japanese](/README_dev_ja.md)
[Japanese](/README_dev_ja.md) [Russian](/README_dev_ru.md)
## Prerequisit

124
README_dev_ru.md Normal file
View File

@ -0,0 +1,124 @@
Вот перевод файла `README_dev_en.md` на русский язык:
## Для разработчиков
[Японский](/README_dev_ja.md) [Английский](/README_dev_en.md)
## Требования
- Linux (Ubuntu, Debian) или WSL2 (другие дистрибуции Linux и Mac не тестировались)
- Anaconda
## Подготовка
1. Создайте виртуальную среду Anaconda:
```
$ conda create -n vcclient-dev python=3.10
$ conda activate vcclient-dev
```
2. Клонируйте репозиторий:
```
$ git clone https://github.com/w-okada/voice-changer.git
```
## Для серверных разработчиков
1. Установите необходимые зависимости:
```
$ cd voice-changer/server
$ pip install -r requirements.txt
```
2. Запустите сервер
Запустите сервер с помощью следующей команды. Вы можете указать свои пути к весам моделей.
```
$ python3 MMVCServerSIO.py -p 18888 --https true \
--content_vec_500 pretrain/checkpoint_best_legacy_500.pt \
--content_vec_500_onnx pretrain/content_vec_500.onnx \
--content_vec_500_onnx_on true \
--hubert_base pretrain/hubert_base.pt \
--hubert_base_jp pretrain/rinna_hubert_base_jp.pt \
--hubert_soft pretrain/hubert/hubert-soft-0d54a1f4.pt \
--nsf_hifigan pretrain/nsf_hifigan/model \
--crepe_onnx_full pretrain/crepe_onnx_full.onnx \
--crepe_onnx_tiny pretrain/crepe_onnx_tiny.onnx \
--rmvpe pretrain/rmvpe.pt \
--model_dir model_dir \
--samples samples.json
```
Откройте браузер (на данный момент поддерживается только Chrome), и вы увидите графический интерфейс.
2-1. Устранение неполадок
(1) OSError: не найдена библиотека PortAudio
Если вы получите сообщение ниже, необходимо установить дополнительную библиотеку:
```
OSError: PortAudio library not found
```
Вы можете установить библиотеку командой:
```
$ sudo apt-get install libportaudio2
$ sudo apt-get install libasound-dev
```
(2) Не запускается! Чертова программа!
Клиент не запускается автоматически. Пожалуйста, откройте браузер и перейдите по URL, отображаемому в консоли. И будьте осторожны со словами.
(3) Не удалось загрузить библиотеку libcudnn_cnn_infer.so.8
При использовании WSL может возникнуть ошибка `Could not load library libcudnn_cnn_infer.so.8. Error: libcuda.so: cannot open shared object file: No such file or directory`. Это часто связано с тем, что путь к библиотеке не установлен. Установите путь с помощью команды ниже. Вы можете добавить эту команду в ваш скрипт запуска, например, в .bashrc.
```
export LD_LIBRARY_PATH=/usr/lib/wsl/lib:$LD_LIBRARY_PATH
```
- ссылки:
- https://qiita.com/cacaoMath/items/811146342946cdde5b83
- https://github.com/microsoft/WSL/issues/8587
3. Наслаждайтесь разработкой.
### Приложение
1. Windows + Anaconda (не поддерживается)
Используйте conda для установки PyTorch:
```
conda install pytorch torchvision torchaudio pytorch-cuda=11.8 -c pytorch -c nvidia
```
Также выполните эти команды:
```
pip install chardet
pip install numpy==1.24.0
```
## Для клиентских разработчиков
1. Импорт модулей и начальная сборка:
```
cd client
cd lib
npm install
npm run build:dev
cd ../demo
npm install
npm run build:dev
```
2. Наслаждайтесь.

View File

@ -1,45 +1,41 @@
## VC Client
[Japanese](/README_ja.md) [Korean](/README_ko.md)
[Japanese](/README_ja.md) [Korean](/README_ko.md) [Russian](/README_ru.md)
## What's New!
- v.1.5.3.17a
- Bug Fixes:
- Server mode error
- RVC Model merger
- Misc
- Add RVC Sample Chihaya-Jinja (https://chihaya369.booth.pm/items/4701666)
- v.1.5.3.17
- New Features:
- Added similarity graph for Beatrice speaker selection
- Bug Fixes:
- Fixed crossfade issue with Beatrice speaker
- v.1.5.3.16a
- Bug fix:
- Lazy load Beatrice.
- v.1.5.3.16 (Only for Windows, CPU dependent)
- New Feature:
- Beatrice is supported(experimental)
- v.1.5.3.15
- Improve:
- new rmvpe checkpoint for rvc (torch, onnx)
- Mac: upgrade torch version 2.1.0
- We have released a sister product, the Text To Speech client.
- You can enjoy voice generation with a simple interface.
- For more details, click [here](https://github.com/w-okada/ttsclient).
- Beatrice V2 Training Code Released!!!
- [Training Code Repository](https://huggingface.co/fierce-cats/beatrice-trainer)
- [Colab Version](https://github.com/w-okada/beatrice-trainer-colab)
- v.2.0.70-beta (only for m1 mac)
- [HERE](https://github.com/w-okada/voice-changer/tree/v.2)
- new feature:
- The M1 Mac version of VCClient now supports Beatrice v2 beta.1.
- v.2.0.69-beta (only for win)
- [HERE](https://github.com/w-okada/voice-changer/tree/v.2)
- bugfix:
- Fixed a bug where the start button would not be displayed in case of some exceptions
- Adjusted the output buffer for server device mode
- Fixed a bug where the sampling rate would change when settings were modified while using server device mode
- Fixed a bug when using Japanese hubert
- misc:
- Added host API filter (highlighted) for server device mode
- v.2.0.65-beta
- [HERE](https://github.com/w-okada/voice-changer/tree/v.2)
- new feature: We have supported Beatrice v2 beta.1, enabling even higher quality voice conversion.
# What is VC Client
1. This is a client software for performing real-time voice conversion using various Voice Conversion (VC) AI. The supported AI for voice conversion are as follows.
- [MMVC](https://github.com/isletennos/MMVC_Trainer)
- [so-vits-svc](https://github.com/svc-develop-team/so-vits-svc)
- [MMVC](https://github.com/isletennos/MMVC_Trainer) (only v1)
- [so-vits-svc](https://github.com/svc-develop-team/so-vits-svc) (only v1)
- [RVC(Retrieval-based-Voice-Conversion)](https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI)
- [DDSP-SVC](https://github.com/yxlllc/DDSP-SVC)
- [Beatrice JVS Corpus Edition](https://prj-beatrice.com/) * experimental, (***NOT MIT Licnsence*** see [readme](https://github.com/w-okada/voice-changer/blob/master/server/voice_changer/Beatrice/)) * Only for Windows, CPU dependent
- [DDSP-SVC](https://github.com/yxlllc/DDSP-SVC) (only v1)
- [Beatrice JVS Corpus Edition](https://prj-beatrice.com/) * experimental, (***NOT MIT License*** see [readme](https://github.com/w-okada/voice-changer/blob/master/server/voice_changer/Beatrice/)) * Only for Windows, CPU dependent (only v1)
- [Beatrice v2](https://prj-beatrice.com/) (only for v2)
1. Distribute the load by running Voice Changer on a different PC
The real-time voice changer of this application works on a server-client configuration. By running the MMVC server on a separate PC, you can run it while minimizing the impact on other resource-intensive processes such as gaming commentary.
@ -48,7 +44,10 @@
3. Cross-platform compatibility
Supports Windows, Mac (including Apple Silicon M1), Linux, and Google Colaboratory.
## Related Software
- [Real-time Voice Changer VCClient](https://github.com/w-okada/voice-changer)
- [Text-to-Speech Software TTSClient](https://github.com/w-okada/ttsclient)
- [Real-Time Speech Recognition Software ASRClient](https://github.com/w-okada/asrclient)
# usage
This is an app for performing voice changes with MMVC and so-vits-svc.
@ -62,14 +61,19 @@ It can be used in two main ways, in order of difficulty:
- You can download and run executable binaries.
- Please see [here](tutorials/tutorial_rvc_en_latest.md) for the tutorial. ([troubule shoot](https://github.com/w-okada/voice-changer/blob/master/tutorials/trouble_shoot_communication_ja.md))
- Please see [here](tutorials/tutorial_rvc_en_latest.md) for the tutorial. ([trouble shoot](https://github.com/w-okada/voice-changer/blob/master/tutorials/trouble_shoot_communication_ja.md))
- It's now easy to try it out on [Google Colaboratory](https://github.com/w-okada/voice-changer/blob/master/Realtime_Voice_Changer_on_Colab.ipynb) (requires a ngrok account). You can launch it from the 'Open in Colab' button in the top left corner.
- It's now easy to try it out on [Google Colaboratory](https://github.com/w-okada/voice-changer/tree/v.2/w_okada's_Voice_Changer_version_2_x.ipynb) (requires a ngrok account). You can launch it from the 'Open in Colab' button in the top left corner.
<img src="https://github.com/w-okada/voice-changer/assets/48346627/3f092e2d-6834-42f6-bbfd-7d389111604e" width="400" height="150">
- We offer Windows and Mac versions.
- We offer Windows and Mac versions on [hugging face](https://huggingface.co/wok000/vcclient000/tree/main)
- v2 for Windows
- Please download and use `vcclient_win_std_xxx.zip`. You can perform voice conversion using a reasonably high-performance CPU without a GPU, or by utilizing DirectML to leverage GPUs (AMD, Nvidia). v2 supports both torch and onnx.
- If you have an Nvidia GPU, you can achieve faster voice conversion by using `vcclient_win_cuda_xxx.zip`.
- v2 for Mac (Apple Silicon)
- Please download and use `vcclient_mac_xxx.zip`.
- v1
- If you are using a Windows and Nvidia GPU, please download ONNX (cpu, cuda), PyTorch (cpu, cuda).
- If you are using a Windows and AMD/Intel GPU, please download ONNX (cpu, DirectML) and PyTorch (cpu, cuda). AMD/Intel GPUs are only enabled for ONNX models.
- In either case, for GPU support, PyTorch and Onnxruntime are only enabled if supported.
@ -83,29 +87,7 @@ It can be used in two main ways, in order of difficulty:
- The encoder of DDPS-SVC only supports hubert-soft.
- Download (When you cannot download from google drive, try [hugging_face](https://huggingface.co/wok000/vcclient000/tree/main))
| Version | OS | フレームワーク | link | サポート VC | サイズ |
| ----------- | --- | ------------------------------------- | ------------------------------------------------------------------- | ----------------------------------------------------------------------------------- | ------ |
| v.1.5.3.17a | mac | ONNX(cpu), PyTorch(cpu,mps) | N/A | MMVC v.1.5.x, MMVC v.1.3.x, so-vits-svc 4.0, RVC | 797MB |
| | win | ONNX(cpu,cuda), PyTorch(cpu,cuda) | [hugging face](https://huggingface.co/wok000/vcclient000/tree/main) | MMVC v.1.5.x, MMVC v.1.3.x, so-vits-svc 4.0, RVC, DDSP-SVC, Diffusion-SVC, Beatrice | 3240MB |
| | win | ONNX(cpu,DirectML), PyTorch(cpu,cuda) | [hugging face](https://huggingface.co/wok000/vcclient000/tree/main) | MMVC v.1.5.x, MMVC v.1.3.x, so-vits-svc 4.0, RVC, DDSP-SVC, Diffusion-SVC, Beatrice | 3125MB |
| v.1.5.3.17 | mac | ONNX(cpu), PyTorch(cpu,mps) | N/A | MMVC v.1.5.x, MMVC v.1.3.x, so-vits-svc 4.0, RVC | 797MB |
| | win | ONNX(cpu,cuda), PyTorch(cpu,cuda) | [hugging face](https://huggingface.co/wok000/vcclient000/tree/main) | MMVC v.1.5.x, MMVC v.1.3.x, so-vits-svc 4.0, RVC, DDSP-SVC, Diffusion-SVC, Beatrice | 3240MB |
| | win | ONNX(cpu,DirectML), PyTorch(cpu,cuda) | [hugging face](https://huggingface.co/wok000/vcclient000/tree/main) | MMVC v.1.5.x, MMVC v.1.3.x, so-vits-svc 4.0, RVC, DDSP-SVC, Diffusion-SVC, Beatrice | 3125MB |
| v.1.5.3.16a | mac | ONNX(cpu), PyTorch(cpu,mps) | N/A | MMVC v.1.5.x, MMVC v.1.3.x, so-vits-svc 4.0, RVC | 797MB |
| | win | ONNX(cpu,cuda), PyTorch(cpu,cuda) | [hugging face](https://huggingface.co/wok000/vcclient000/tree/main) | MMVC v.1.5.x, MMVC v.1.3.x, so-vits-svc 4.0, RVC, DDSP-SVC, Diffusion-SVC, Beatrice | 3240MB |
| | win | ONNX(cpu,DirectML), PyTorch(cpu,cuda) | [hugging face](https://huggingface.co/wok000/vcclient000/tree/main) | MMVC v.1.5.x, MMVC v.1.3.x, so-vits-svc 4.0, RVC, DDSP-SVC, Diffusion-SVC, Beatrice | 3125MB |
| v.1.5.3.16 | mac | ONNX(cpu), PyTorch(cpu,mps) | N/A | MMVC v.1.5.x, MMVC v.1.3.x, so-vits-svc 4.0, RVC | 797MB |
| | win | ONNX(cpu,cuda), PyTorch(cpu,cuda) | [hugging face](https://huggingface.co/wok000/vcclient000/tree/main) | MMVC v.1.5.x, MMVC v.1.3.x, so-vits-svc 4.0, RVC, DDSP-SVC, Diffusion-SVC, Beatrice | 3240MB |
| | win | ONNX(cpu,DirectML), PyTorch(cpu,cuda) | [hugging face](https://huggingface.co/wok000/vcclient000/tree/main) | MMVC v.1.5.x, MMVC v.1.3.x, so-vits-svc 4.0, RVC, DDSP-SVC, Diffusion-SVC, Beatrice | 3125MB |
| v.1.5.3.15 | mac | ONNX(cpu), PyTorch(cpu,mps) | [hugging face](https://huggingface.co/wok000/vcclient000/tree/main) | MMVC v.1.5.x, MMVC v.1.3.x, so-vits-svc 4.0, RVC | 797MB |
| | win | ONNX(cpu,cuda), PyTorch(cpu,cuda) | [hugging face](https://huggingface.co/wok000/vcclient000/tree/main) | MMVC v.1.5.x, MMVC v.1.3.x, so-vits-svc 4.0, RVC, DDSP-SVC, Diffusion-SVC | 3240MB |
| | win | ONNX(cpu,DirectML), PyTorch(cpu,cuda) | [hugging face](https://huggingface.co/wok000/vcclient000/tree/main) | MMVC v.1.5.x, MMVC v.1.3.x, so-vits-svc 4.0, RVC, DDSP-SVC, Diffusion-SVC | 3125MB |
(\*1) You can also download from [hugging_face](https://huggingface.co/wok000/vcclient000/tree/main)
(\*2) The developer does not have an AMD graphics card, so it has not been tested. This package only includes onnxruntime-directml.
(\*3) If unpacking or starting is slow, there is a possibility that virus checking is running on your antivirus software. Please try running it with the file or folder excluded from the target. (At your own risk)
- [Download from hugging face](https://huggingface.co/wok000/vcclient000/tree/main)
## (2) Usage after setting up the environment such as Docker or Anaconda
@ -121,17 +103,6 @@ To run on Anaconda venv, see [server developer's guide](README_dev_en.md)
To run on Linux using an AMD GPU, see [setup guide linux](tutorials/tutorial_anaconda_amd_rocm.md)
# Real-time performance
Conversion is almost instantaneous when using GPU.
https://twitter.com/DannadoriYellow/status/1613483372579545088?s=20&t=7CLD79h1F3dfKiTb7M8RUQ
Even with CPU, recent ones can perform conversions at a reasonable speed.
https://twitter.com/DannadoriYellow/status/1613553862773997569?s=20&t=7CLD79h1F3dfKiTb7M8RUQ
With an old CPU (i7-4770), it takes about 1000 msec for conversion.
# Software Signing

View File

@ -3,46 +3,40 @@
[English](/README_en.md) [Korean](/README_ko.md)
## 새로운 기능!
- v.1.5.3.17a
- Bug Fixes:
- Server mode error
- RVC Model merger
- Misc
- Add RVC Sample Chihaya-Jinja (https://chihaya369.booth.pm/items/4701666)
- v.1.5.3.17
- New Features:
- Added similarity graph for Beatrice speaker selection
- Bug Fixes:
- Fixed crossfade issue with Beatrice speaker
- v.1.5.3.16a
- Bug fix:
- Lazy load Beatrice.
- v.1.5.3.16 (Only for Windows, CPU dependent)
- New Feature:
- Beatrice is supported(experimental)
- v.1.5.3.15
- Improve:
- new rmvpe checkpoint for rvc (torch, onnx)
- Mac: upgrade torch version 2.1.0
- 자매품으로 텍스트 음성 변환 클라이언트를 출시하였습니다.
- 간단한 인터페이스로 음성 생성을 즐길 수 있습니다.
- 자세한 내용은 [여기](https://github.com/w-okada/ttsclient)를 참조하세요.
- Beatrice V2 훈련 코드 공개!!!
- [훈련 코드 리포지토리](https://huggingface.co/fierce-cats/beatrice-trainer)
- [Colab 버전](https://github.com/w-okada/beatrice-trainer-colab)
- v.2.0.70-beta (only for m1 mac)
- [여기를 참조하십시오](https://github.com/w-okada/voice-changer/tree/v.2)
- new feature:
- M1 Mac 버전 VCClient에서도 Beatrice v2 beta.1을 지원합니다.
- v.2.0.69-beta (only for win)
- [여기를 참조하십시오](https://github.com/w-okada/voice-changer/tree/v.2)
- 버그 수정:
- 일부 예외 발생 시 시작 버튼이 표시되지 않는 버그를 수정
- 서버 디바이스 모드의 출력 버퍼 조정
- 서버 디바이스 모드 사용 중 설정 변경 시 샘플링 레이트가 변하는 버그 수정
- 일본어 hubert 사용 시 버그 수정
- 기타:
- 서버 디바이스 모드에 호스트 API 필터 추가 (강조 표시)
- v.2.0.65-beta
- [여기를 참조하십시오](https://github.com/w-okada/voice-changer/tree/v.2)
- new feature: Beatrice v2 beta.1를 지원하여 더 높은 품질의 음성 변환이 가능해졌습니다
# VC Client란
1. 각종 음성 변환 AI(VC, Voice Conversion)를 활용해 실시간 음성 변환을 하기 위한 클라이언트 소프트웨어입니다. 지원하는 음성 변환 AI는 다음과 같습니다.
- 지원하는 음성 변환 AI (지원 VC)
- [MMVC](https://github.com/isletennos/MMVC_Trainer)
- [so-vits-svc](https://github.com/svc-develop-team/so-vits-svc)
- [MMVC](https://github.com/isletennos/MMVC_Trainer) (only v1)
- [so-vits-svc](https://github.com/svc-develop-team/so-vits-svc) (only v1)
- [RVC(Retrieval-based-Voice-Conversion)](https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI)
- [DDSP-SVC](https://github.com/yxlllc/DDSP-SVC)
- [Beatrice JVS Corpus Edition](https://prj-beatrice.com/) * experimental, (***NOT MIT Licnsence*** see [readme](https://github.com/w-okada/voice-changer/blob/master/server/voice_changer/Beatrice/)) * Only for Windows, CPU dependent
- [DDSP-SVC](https://github.com/yxlllc/DDSP-SVC) (only v1)
- [Beatrice JVS Corpus Edition](https://prj-beatrice.com/) * experimental, (***NOT MIT License*** see [readme](https://github.com/w-okada/voice-changer/blob/master/server/voice_changer/Beatrice/)) * Only for Windows, CPU dependent (only v1)
- [Beatrice v2](https://prj-beatrice.com/) (only for v2)
-
1. 이 소프트웨어는 네트워크를 통한 사용도 가능하며, 게임 등 부하가 큰 애플리케이션과 동시에 사용할 경우 음성 변화 처리의 부하를 외부로 돌릴 수도 있습니다.
@ -51,7 +45,10 @@
3. 여러 플랫폼을 지원합니다.
- Windows, Mac(M1), Linux, Google Colab (MMVC만 지원)
## 관련 소프트웨어
- [실시간 음성 변조기 VCClient](https://github.com/w-okada/voice-changer)
- [텍스트 읽기 소프트웨어 TTSClient](https://github.com/w-okada/ttsclient)
- [실시간 음성 인식 소프트웨어 ASRClient](https://github.com/w-okada/asrclient)
# 사용 방법
크게 두 가지 방법으로 사용할 수 있습니다. 난이도 순서는 다음과 같습니다.
@ -67,12 +64,17 @@
- 튜토리얼은 [이곳](tutorials/tutorial_rvc_ko_latest.md)을 확인하세요。([네트워크 문제 해결법](https://github.com/w-okada/voice-changer/blob/master/tutorials/trouble_shoot_communication_ko.md))
- [Google Colaboratory](https://github.com/w-okada/voice-changer/blob/master/Realtime_Voice_Changer_on_Colab.ipynb) で簡単にお試しいただけるようになりました。左上の Open in Colab のボタンから起動できます。
- [Google Colaboratory](https://github.com/w-okada/voice-changer/tree/v.2/w_okada's_Voice_Changer_version_2_x.ipynb) で簡単にお試しいただけるようになりました。左上の Open in Colab のボタンから起動できます。
<img src="https://github.com/w-okada/voice-changer/assets/48346627/3f092e2d-6834-42f6-bbfd-7d389111604e" width="400" height="150">
- Windows 버전과 Mac 버전을 제공하고 있습니다.
- Windows 버전과 Mac 버전을 제공하고 있습니다. [Hugging Face](https://huggingface.co/wok000/vcclient000/tree/main)에서 다운로드할 수 있습니다.
- Windows용 v2
- `vcclient_win_std_xxx.zip`를 다운로드하여 사용하세요. GPU를 사용하지 않고도 (어느 정도 고성능의) CPU를 사용한 음성 변환이나, DirectML을 사용해 GPU(AMD, Nvidia)를 활용한 음성 변환이 가능합니다. v2에서는 torch와 onnx 모두를 지원합니다.
- Nvidia GPU를 가지고 계신 분들은 `vcclient_win_cuda_xxx.zip`를 사용하시면 더 빠른 음성 변환이 가능합니다.
- Mac (Apple Silicon)용 v2
- `vcclient_mac_xxx.zip`를 다운로드하여 사용하세요.
- v1
- Windows와 NVIDIA GPU를 사용하는 분은 ONNX(cpu, cuda), PyTorch(cpu, cuda)를 다운로드하세요.
- Windows와 AMD/Intel GPU를 사용하는 분은 ONNX(cpu, DirectML), PyTorch(cpu, cuda)를 다운로드하세요 AMD/Intel GPU는 ONNX 모델을 사용할 때만 적용됩니다.
- 그 외 GPU도 PyTorch, Onnxruntime가 지원할 경우에만 적용됩니다.
@ -88,26 +90,6 @@
- DDPS-SVC의 encoder는 hubert-soft만 지원합니다.
- 다운로드는 아래에서 하세요.
| Version | OS | 프레임워크 | 링크 | 지원 VC | 파일 크기 |
| ----------- | --- | ------------------------------------- | ------------------------------------------------------------------- | ----------------------------------------------------------------------------------- | --------- |
| v.1.5.3.17a | mac | ONNX(cpu), PyTorch(cpu,mps) | N/A | MMVC v.1.5.x, MMVC v.1.3.x, so-vits-svc 4.0, RVC | 797MB |
| | win | ONNX(cpu,cuda), PyTorch(cpu,cuda) | [hugging face](https://huggingface.co/wok000/vcclient000/tree/main) | MMVC v.1.5.x, MMVC v.1.3.x, so-vits-svc 4.0, RVC, DDSP-SVC, Diffusion-SVC, Beatrice | 3240MB |
| | win | ONNX(cpu,DirectML), PyTorch(cpu,cuda) | [hugging face](https://huggingface.co/wok000/vcclient000/tree/main) | MMVC v.1.5.x, MMVC v.1.3.x, so-vits-svc 4.0, RVC, DDSP-SVC, Diffusion-SVC, Beatrice | 3125MB |
| v.1.5.3.17 | mac | ONNX(cpu), PyTorch(cpu,mps) | N/A | MMVC v.1.5.x, MMVC v.1.3.x, so-vits-svc 4.0, RVC | 797MB |
| | win | ONNX(cpu,cuda), PyTorch(cpu,cuda) | [hugging face](https://huggingface.co/wok000/vcclient000/tree/main) | MMVC v.1.5.x, MMVC v.1.3.x, so-vits-svc 4.0, RVC, DDSP-SVC, Diffusion-SVC, Beatrice | 3240MB |
| | win | ONNX(cpu,DirectML), PyTorch(cpu,cuda) | [hugging face](https://huggingface.co/wok000/vcclient000/tree/main) | MMVC v.1.5.x, MMVC v.1.3.x, so-vits-svc 4.0, RVC, DDSP-SVC, Diffusion-SVC, Beatrice | 3125MB |
| v.1.5.3.16a | mac | ONNX(cpu), PyTorch(cpu,mps) | N/A | MMVC v.1.5.x, MMVC v.1.3.x, so-vits-svc 4.0, RVC | 797MB |
| | win | ONNX(cpu,cuda), PyTorch(cpu,cuda) | [hugging face](https://huggingface.co/wok000/vcclient000/tree/main) | MMVC v.1.5.x, MMVC v.1.3.x, so-vits-svc 4.0, RVC, DDSP-SVC, Diffusion-SVC, Beatrice | 3240MB |
| | win | ONNX(cpu,DirectML), PyTorch(cpu,cuda) | [hugging face](https://huggingface.co/wok000/vcclient000/tree/main) | MMVC v.1.5.x, MMVC v.1.3.x, so-vits-svc 4.0, RVC, DDSP-SVC, Diffusion-SVC, Beatrice | 3125MB |
| v.1.5.3.15 | mac | ONNX(cpu), PyTorch(cpu,mps) | [hugging face](https://huggingface.co/wok000/vcclient000/tree/main) | MMVC v.1.5.x, MMVC v.1.3.x, so-vits-svc 4.0, RVC | 797MB |
| | win | ONNX(cpu,cuda), PyTorch(cpu,cuda) | [hugging face](https://huggingface.co/wok000/vcclient000/tree/main) | MMVC v.1.5.x, MMVC v.1.3.x, so-vits-svc 4.0, RVC, DDSP-SVC, Diffusion-SVC | 3240MB |
| | win | ONNX(cpu,DirectML), PyTorch(cpu,cuda) | [hugging face](https://huggingface.co/wok000/vcclient000/tree/main) | MMVC v.1.5.x, MMVC v.1.3.x, so-vits-svc 4.0, RVC, DDSP-SVC, Diffusion-SVC | 3125MB |
(\*1) Google Drive에서 다운로드가 안 되는 분은 [hugging_face](https://huggingface.co/wok000/vcclient000/tree/main)에서 시도해 보세요
(\*2) 개발자가 AMD 그래픽카드를 갖고 있지 않아서 작동 확인을 할 수 없습니다. onnxruntime-directml를 같이 첨부한 것이 전부입니다.
(\*3) 압축 해제나 실행 속도가 느릴 경우에는 바이러스 검사가 진행 중일 가능성이 있습니다. 파일과 폴더를 검사 대상 제외를 한 후에 시도해 보세요. (이에 개발자는 책임이 없음)
## (2) Docker나 Anaconda 등으로 구축된 개발 환경에서 사용
@ -125,17 +107,6 @@ Anaconda 가상 환경에서 실행은 [서버 개발자용 문서](README_dev_k
- [통신편](tutorials/trouble_shoot_communication_ko.md)
# 실시간성(MMVC)
GPU를 사용하면 시간 차가 거의 없이 변환할 수 있습니다.
https://twitter.com/DannadoriYellow/status/1613483372579545088?s=20&t=7CLD79h1F3dfKiTb7M8RUQ
CPU도 최근 제품이라면 어느 정도 빠르게 변환할 수 있습니다.
https://twitter.com/DannadoriYellow/status/1613553862773997569?s=20&t=7CLD79h1F3dfKiTb7M8RUQ
오래된 CPU(i7-4770)면, 1000msec 정도 걸립니다.
# 개발자 서명에 대하여

119
README_ru.md Normal file
View File

@ -0,0 +1,119 @@
[Японский](/README_ja.md) [Корейский](/README_ko.md) [Английский](/README_en.md)
## Что нового!
- Мы выпустили продукт-сестру - клиент Text To Speech.
- Вы можете насладиться генерацией голоса через простой интерфейс.
- Подробнее [здесь](https://github.com/w-okada/ttsclient).
- Код тренировки Beatrice V2 теперь доступен!
- [Репозиторий кода тренировки](https://huggingface.co/fierce-cats/beatrice-trainer)
- [Версия для Colab](https://github.com/w-okada/beatrice-trainer-colab)
- v.2.0.70-beta (only for m1 mac)
- [HERE](https://github.com/w-okada/voice-changer/tree/v.2)
- new feature:
- В версии VCClient для Mac на базе M1 теперь поддерживается Beatrice v2 beta.1.
- v.2.0.69-beta (only for win)
- [HERE](https://github.com/w-okada/voice-changer/tree/v.2)
- Исправления ошибок:
- Исправлена ошибка, из-за которой кнопка запуска не отображалась в случае некоторых исключений
- Настроен выходной буфер для режима серверного устройства
- Исправлена ошибка, при которой изменялась частота дискретизации при изменении настроек в режиме серверного устройства
- Исправлена ошибка при использовании японского hubert
- Прочее:
- Добавлен фильтр API хоста (выделено) для режима серверного устройства
- v.2.0.65-beta
- [HERE](https://github.com/w-okada/voice-changer/tree/v.2)
- new feature: We have supported Beatrice v2 beta.1, enabling even higher quality voice conversion.
# Что такое VC Клиент
1. Это клиентское ПО для выполнения преобразования голоса в реальном времени с использованием различных AI для преобразования голоса. Поддерживаемые AI:
- [MMVC](https://github.com/isletennos/MMVC_Trainer) (только v1)
- [so-vits-svc](https://github.com/svc-develop-team/so-vits-svc) (только v1)
- [RVC (Retrieval-based Voice Conversion)](https://github.com/liujing04/Retrieval-based-Voice-Conversion-WebUI)
- [DDSP-SVC](https://github.com/yxlllc/DDSP-SVC) (только v1)
- [Beatrice JVS Corpus Edition](https://prj-beatrice.com/) * экспериментальный * (не по лицензии MIT, см. [readme](https://github.com/w-okada/voice-changer/blob/master/server/voice_changer/Beatrice/)), только для Windows, зависит от процессора (только v1)
- [Beatrice v2](https://prj-beatrice.com/) (только v2)
2. Распределение нагрузки между разными ПК
Реализация преобразования голоса работает по схеме "сервер-клиент". Вы можете запустить сервер MMVC на отдельном ПК для минимизации влияния на другие ресурсоёмкие процессы, такие как стриминг.
![image](https://user-images.githubusercontent.com/48346627/206640768-53f6052d-0a96-403b-a06c-6714a0b7471d.png)
3. Кроссплатформенная совместимость
Поддержка Windows, Mac (включая Apple Silicon M1), Linux и Google Colaboratory.
# Как использовать
Это приложение для изменения голоса с использованием MMVC и so-vits-svc.
Есть два основных способа использования, в порядке сложности:
- Использование готового исполняемого файла
- Настройка окружения с Docker или Anaconda
## (1) Использование готовых исполняемых файлов
- Вы можете скачать и запустить исполняемые файлы.
- Смотрите [здесь](tutorials/tutorial_rvc_en_latest.md) для получения руководства. ([устранение неполадок](https://github.com/w-okada/voice-changer/blob/master/tutorials/trouble_shoot_communication_ja.md))
- Теперь попробовать можно на [Google Colaboratory](https://github.com/w-okada/voice-changer/tree/v.2/w_okada's_Voice_Changer_version_2_x.ipynb) (требуется аккаунт ngrok). Вы можете запустить его через кнопку "Открыть в Colab" в верхнем левом углу.
<img src="https://github.com/w-okada/voice-changer/assets/48346627/3f092e2d-6834-42f6-bbfd-7d389111604e" width="400" height="150">
- Мы предлагаем версии для Windows и Mac на [hugging face](https://huggingface.co/wok000/vcclient000/tree/main)
- v2 для Windows
- Пожалуйста, скачайте и используйте `vcclient_win_std_xxx.zip`. Преобразование голоса можно выполнять с использованием мощного процессора без GPU или с использованием DirectML для GPU (AMD, Nvidia). v2 поддерживает как torch, так и onnx.
- Если у вас Nvidia GPU, скачайте `vcclient_win_cuda_xxx.zip` для более быстрого преобразования.
- v2 для Mac (Apple Silicon)
- Пожалуйста, скачайте и используйте `vcclient_mac_xxx.zip`.
- v1
- Для Windows с Nvidia GPU скачайте ONNX (cpu, cuda), PyTorch (cpu, cuda).
- Для Windows с AMD/Intel GPU скачайте ONNX (cpu, DirectML) и PyTorch (cpu, cuda). AMD/Intel GPU поддерживаются только для ONNX моделей.
- Для пользователей Windows: после распаковки zip-файла запустите соответствующий `start_http.bat` файл.
- Для Mac: после распаковки zip-файла дважды щёлкните на `startHttp.command`. Если появится сообщение о невозможности проверки разработчика, нажмите Ctrl и повторно запустите.
- Если подключаетесь удалённо, используйте `.command` (Mac) или `.bat` (Windows) файл с https вместо http.
- Энкодер DDPS-SVC поддерживает только hubert-soft.
- [Скачать с hugging face](https://huggingface.co/wok000/vcclient000/tree/main)
## (2) Использование после настройки окружения с Docker или Anaconda
Клонируйте этот репозиторий и используйте его. Для Windows требуется настройка WSL2. Для Mac нужно настроить виртуальные среды Python, например Anaconda. Этот метод обеспечивает наивысшую скорость в большинстве случаев. **<font color="red"> Даже без GPU можно получить достаточную производительность на современном процессоре </font>(смотрите раздел о производительности в реальном времени ниже)**.
[Видео-инструкция по установке WSL2 и Docker](https://youtu.be/POo_Cg0eFMU)
[Видео-инструкция по установке WSL2 и Anaconda](https://youtu.be/fba9Zhsukqw)
Для запуска Docker смотрите [start docker](docker_vcclient/README_en.md).
Для запуска на Anaconda venv смотрите [руководство разработчика](README_dev_ru.md).
Для запуска на Linux с AMD GPU смотрите [руководство](tutorials/tutorial_anaconda_amd_rocm.md).
# Подпись программного обеспечения
Это ПО не подписано разработчиком. Появится предупреждение, но его можно запустить, нажав на иконку с удержанием клавиши Ctrl. Это связано с политикой безопасности Apple. Использование ПО на ваш риск.
![image](https://user-images.githubusercontent.com/48346627/212567711-c4a8d599-e24c-4fa3-8145-a5df7211f023.png)
https://user-images.githubusercontent.com/48346627/212569645-e30b7f4e-079d-4504-8cf8-7816c5f40b00.mp4
# Благодарности
- [Материалы Tachizunda-mon](https://seiga.nicovideo.jp/seiga/im10792934)
- [Irasutoya](https://www.irasutoya.com/)
- [Tsukuyomi-chan](https://tyc.rei-yumesaki.net)
> Это ПО использует голосовые данные бесплатного материала персонажа "Цукуёми-тян", предоставленного CV. Юмесаки Рэй.
>
> - Корпус Цукуёми-тян (CV. Юмесаки Рэй)
>
> https://tyc.rei-yumesaki.net/material/corpus/
>
> Авторское право. Юмесаки Рэй, Все права защищены.

11
client/.vscode/settings.json vendored Normal file
View File

@ -0,0 +1,11 @@
{
"workbench.colorCustomizations": {
"tab.activeBackground": "#65952acc"
},
"editor.defaultFormatter": "esbenp.prettier-vscode",
"prettier.printWidth": 1024,
"prettier.tabWidth": 4,
"files.associations": {
"*.css": "postcss"
}
}

View File

@ -1,8 +1,11 @@
{
"files.associations": {
"*.css": "postcss"
},
"workbench.colorCustomizations": {
"tab.activeBackground": "#65952acc"
},
"editor.defaultFormatter": "esbenp.prettier-vscode",
"prettier.printWidth": 1024,
"prettier.tabWidth": 4,
"files.associations": {
"*.css": "postcss"
}
}

View File

@ -21,7 +21,7 @@
{
"name": "configArea",
"options": {
"detectors": ["dio", "harvest", "crepe", "crepe_full", "crepe_tiny", "rmvpe", "rmvpe_onnx"],
"detectors": ["dio", "harvest", "crepe", "crepe_full", "crepe_tiny", "rmvpe", "rmvpe_onnx", "fcpe"],
"inputChunkNums": [1, 2, 4, 6, 8, 16, 24, 32, 40, 48, 64, 80, 96, 112, 128, 192, 256, 320, 384, 448, 512, 576, 640, 704, 768, 832, 896, 960, 1024, 2048, 4096, 8192, 16384]
}
}

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

View File

@ -28,51 +28,51 @@
"author": "wataru.okada@flect.co.jp",
"license": "ISC",
"devDependencies": {
"@babel/core": "^7.23.3",
"@babel/plugin-transform-runtime": "^7.23.4",
"@babel/preset-env": "^7.23.3",
"@babel/core": "^7.24.0",
"@babel/plugin-transform-runtime": "^7.24.0",
"@babel/preset-env": "^7.24.0",
"@babel/preset-react": "^7.23.3",
"@babel/preset-typescript": "^7.23.3",
"@types/node": "^20.10.0",
"@types/react": "^18.2.39",
"@types/react-dom": "^18.2.17",
"autoprefixer": "^10.4.16",
"@types/node": "^20.11.21",
"@types/react": "^18.2.60",
"@types/react-dom": "^18.2.19",
"autoprefixer": "^10.4.17",
"babel-loader": "^9.1.3",
"copy-webpack-plugin": "^11.0.0",
"css-loader": "^6.8.1",
"eslint": "^8.54.0",
"eslint-config-prettier": "^9.0.0",
"eslint-plugin-prettier": "^5.0.1",
"copy-webpack-plugin": "^12.0.2",
"css-loader": "^6.10.0",
"eslint": "^8.57.0",
"eslint-config-prettier": "^9.1.0",
"eslint-plugin-prettier": "^5.1.3",
"eslint-plugin-react": "^7.33.2",
"eslint-webpack-plugin": "^4.0.1",
"html-loader": "^4.2.0",
"html-webpack-plugin": "^5.5.3",
"html-loader": "^5.0.0",
"html-webpack-plugin": "^5.6.0",
"npm-run-all": "^4.1.5",
"postcss-loader": "^7.3.3",
"postcss-loader": "^8.1.1",
"postcss-nested": "^6.0.1",
"prettier": "^3.1.0",
"prettier": "^3.2.5",
"rimraf": "^5.0.5",
"style-loader": "^3.3.3",
"style-loader": "^3.3.4",
"ts-loader": "^9.5.1",
"tsconfig-paths": "^4.2.0",
"typescript": "^5.3.2",
"webpack": "^5.89.0",
"typescript": "^5.3.3",
"webpack": "^5.90.3",
"webpack-cli": "^5.1.4",
"webpack-dev-server": "^4.15.1"
"webpack-dev-server": "^5.0.2"
},
"dependencies": {
"@alexanderolsen/libsamplerate-js": "^2.1.0",
"@dannadori/voice-changer-client-js": "^1.0.178",
"@alexanderolsen/libsamplerate-js": "^2.1.1",
"@dannadori/voice-changer-client-js": "^1.0.182",
"@dannadori/voice-changer-js": "^1.0.2",
"@dannadori/worker-manager": "^1.0.20",
"@fortawesome/fontawesome-svg-core": "^6.5.0",
"@fortawesome/free-brands-svg-icons": "^6.5.0",
"@fortawesome/free-regular-svg-icons": "^6.5.0",
"@fortawesome/free-solid-svg-icons": "^6.5.0",
"@fortawesome/fontawesome-svg-core": "^6.5.1",
"@fortawesome/free-brands-svg-icons": "^6.5.1",
"@fortawesome/free-regular-svg-icons": "^6.5.1",
"@fortawesome/free-solid-svg-icons": "^6.5.1",
"@fortawesome/react-fontawesome": "^0.2.0",
"@tensorflow/tfjs": "^4.13.0",
"onnxruntime-web": "^1.16.3",
"protobufjs": "^7.2.5",
"@tensorflow/tfjs": "^4.17.0",
"onnxruntime-web": "^1.17.1",
"protobufjs": "^7.2.6",
"react": "^18.2.0",
"react-dom": "^18.2.0"
}

View File

@ -21,7 +21,7 @@
{
"name": "configArea",
"options": {
"detectors": ["dio", "harvest", "crepe", "crepe_full", "crepe_tiny", "rmvpe", "rmvpe_onnx"],
"detectors": ["dio", "harvest", "crepe", "crepe_full", "crepe_tiny", "rmvpe", "rmvpe_onnx", "fcpe"],
"inputChunkNums": [1, 2, 4, 6, 8, 16, 24, 32, 40, 48, 64, 80, 96, 112, 128, 192, 256, 320, 384, 448, 512, 576, 640, 704, 768, 832, 896, 960, 1024, 2048, 4096, 8192, 16384]
}
}

View File

@ -113,23 +113,29 @@ const f0ModelUrl: { [modelType in VoiceChangerType]: { [inputLength in InputLeng
rvcv2: {
"24000": {
"40k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/amitaro/rvcv2_amitaro_v2_40k_f0_24000.bin",
"32k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/amitaro/rvcv2_amitaro_v2_32k_f0_24000.bin",
"16k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/amitaro/rvcv2_amitaro_v2_16k_f0_24000.bin",
// "32k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/amitaro/rvcv2_amitaro_v2_32k_f0_24000.bin",
"32k": "https://192.168.0.247:8080/models/rvcv2_exp_v2_32k_f0_24000.bin",
// "16k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/amitaro/rvcv2_amitaro_v2_16k_f0_24000.bin",
// "16k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/vctk/rvcv2_vctk_v2_16k_f0_24000.bin",
"16k": "https://192.168.0.247:8080/models/rvcv2_vctk_v2_16k_f0_24000.bin",
},
"16000": {
"40k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/amitaro/rvcv2_amitaro_v2_40k_f0_16000.bin",
"32k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/amitaro/rvcv2_amitaro_v2_32k_f0_16000.bin",
"16k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/amitaro/rvcv2_amitaro_v2_16k_f0_16000.bin",
// "16k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/amitaro/rvcv2_amitaro_v2_16k_f0_16000.bin",
"16k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/vctk/rvcv2_vctk_v2_16k_f0_16000.bin",
},
"12000": {
"40k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/amitaro/rvcv2_amitaro_v2_40k_f0_12000.bin",
"32k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/amitaro/rvcv2_amitaro_v2_32k_f0_12000.bin",
"16k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/amitaro/rvcv2_amitaro_v2_16k_f0_12000.bin",
// "16k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/amitaro/rvcv2_amitaro_v2_16k_f0_12000.bin",
"16k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/vctk/rvcv2_vctk_v2_16k_f0_16000.bin",
},
"8000": {
"40k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/amitaro/rvcv2_amitaro_v2_40k_f0_8000.bin",
"32k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/amitaro/rvcv2_amitaro_v2_32k_f0_8000.bin",
"16k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/amitaro/rvcv2_amitaro_v2_16k_f0_8000.bin",
// "16k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/amitaro/rvcv2_amitaro_v2_16k_f0_8000.bin",
"16k": "https://huggingface.co/wok000/vcclient_model/resolve/main/web_model/v_01_alpha/vctk/rvcv2_vctk_v2_16k_f0_8000.bin",
},
},
};
@ -137,7 +143,7 @@ const f0ModelUrl: { [modelType in VoiceChangerType]: { [inputLength in InputLeng
export const useWebInfo = (props: UseWebInfoProps): WebInfoStateAndMethod => {
const initVoiceChangerType: VoiceChangerType = "rvcv2";
const initInputLength: InputLengthKey = "24000";
const initUseF0 = false;
const initUseF0 = true;
const initSampleRate: ModelSampleRateStr = "32k";
const progressCallback = (data: ProgreeeUpdateCallbcckInfo) => {

View File

@ -1,4 +1,4 @@
import React from "react"
import React from "react";
import { GuiStateProvider } from "./001_GuiStateProvider";
import { Dialogs } from "./900_Dialogs";
import { ModelSlotControl } from "./b00_ModelSlotControl";
@ -13,5 +13,5 @@ export const Demo = () => {
<ModelSlotControl></ModelSlotControl>
</div>
</GuiStateProvider>
)
}
);
};

View File

@ -125,6 +125,11 @@ export const FileUploaderScreen = (props: FileUploaderScreenProps) => {
return x.kind == "llvcConfig";
});
return enough;
} else if (setting.voiceChangerType == "EasyVC") {
const enough = !!setting.files.find((x) => {
return x.kind == "easyVCModel";
});
return enough;
}
return false;
};
@ -189,6 +194,8 @@ export const FileUploaderScreen = (props: FileUploaderScreenProps) => {
} else if (vcType == "LLVC") {
rows.push(generateFileRow(uploadSetting!, "Model", "llvcModel", ["pth"]));
rows.push(generateFileRow(uploadSetting!, "Config", "llvcConfig", ["json"]));
} else if (vcType == "EasyVC") {
rows.push(generateFileRow(uploadSetting!, "Model", "easyVCModel", ["onnx"]));
}
return rows;
};

View File

@ -4,7 +4,6 @@ import { useAppRoot } from "../../../001_provider/001_AppRootProvider";
import { useAppState } from "../../../001_provider/001_AppStateProvider";
import { useIndexedDB } from "@dannadori/voice-changer-client-js";
import { useMessageBuilder } from "../../../hooks/useMessageBuilder";
import { removeDB as webDBRemove } from "@dannadori/voice-changer-js";
export type HeaderAreaProps = {
mainTitle: string;
@ -16,7 +15,7 @@ export const HeaderArea = (props: HeaderAreaProps) => {
const messageBuilderState = useMessageBuilder();
const { clearSetting, webInfoState } = useAppState();
const { removeItem } = useIndexedDB({ clientType: null });
const { removeItem, removeDB } = useIndexedDB({ clientType: null });
useMemo(() => {
messageBuilderState.setMessage(__filename, "github", { ja: "github", en: "github" });
@ -120,7 +119,7 @@ export const HeaderArea = (props: HeaderAreaProps) => {
const onClearSettingClicked = async () => {
await clearSetting();
await removeItem(INDEXEDDB_KEY_AUDIO_OUTPUT);
await webDBRemove();
await removeDB();
location.reload();
};

View File

@ -98,12 +98,12 @@ module.exports = {
// new CopyPlugin({
// patterns: [{ from: "public/models/rvcv1_amitaro_v1_32k_nof0_24000.bin", to: "models/rvcv1_amitaro_v1_32k_nof0_24000.bin" }],
// }),
// new CopyPlugin({
// patterns: [{ from: "public/models/rvcv1_amitaro_v1_40k_f0_24000.bin", to: "models/rvcv1_amitaro_v1_40k_f0_24000.bin" }],
// }),
// new CopyPlugin({
// patterns: [{ from: "public/models/rvcv1_amitaro_v1_40k_nof0_24000.bin", to: "models/rvcv1_amitaro_v1_40k_nof0_24000.bin" }],
// }),
new CopyPlugin({
patterns: [{ from: "public/models/rvcv2_exp_v2_32k_f0_24000.bin", to: "models/rvcv2_exp_v2_32k_f0_24000.bin" }],
}),
new CopyPlugin({
patterns: [{ from: "public/models/rvcv2_vctk_v2_16k_f0_24000.bin", to: "models/rvcv2_vctk_v2_16k_f0_24000.bin" }],
}),
// new CopyPlugin({
// patterns: [{ from: "public/models/amitaro.png", to: "models/amitaro.png" }],
// }),

View File

@ -1,8 +1,11 @@
{
"workbench.colorCustomizations": {
"tab.activeBackground": "#65952acc"
},
"editor.defaultFormatter": "esbenp.prettier-vscode",
"prettier.printWidth": 1024,
"prettier.tabWidth": 4
"workbench.colorCustomizations": {
"tab.activeBackground": "#65952acc"
},
"editor.defaultFormatter": "esbenp.prettier-vscode",
"prettier.printWidth": 1024,
"prettier.tabWidth": 4,
"files.associations": {
"*.css": "postcss"
}
}

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
{
"name": "@dannadori/voice-changer-client-js",
"version": "1.0.178",
"version": "1.0.182",
"description": "",
"main": "dist/index.js",
"directories": {
@ -26,35 +26,35 @@
"author": "wataru.okada@flect.co.jp",
"license": "ISC",
"devDependencies": {
"@types/audioworklet": "^0.0.50",
"@types/jest": "^29.5.8",
"@types/node": "^20.9.2",
"@types/react": "18.2.37",
"@types/react-dom": "18.2.15",
"eslint": "^8.54.0",
"eslint-config-prettier": "^9.0.0",
"eslint-plugin-prettier": "^5.0.1",
"@types/audioworklet": "^0.0.54",
"@types/jest": "^29.5.12",
"@types/node": "^20.11.21",
"@types/react": "18.2.60",
"@types/react-dom": "18.2.19",
"eslint": "^8.57.0",
"eslint-config-prettier": "^9.1.0",
"eslint-plugin-prettier": "^5.1.3",
"eslint-plugin-react": "^7.33.2",
"eslint-webpack-plugin": "^4.0.1",
"jest": "^29.7.0",
"npm-run-all": "^4.1.5",
"prettier": "^3.1.0",
"prettier": "^3.2.5",
"raw-loader": "^4.0.2",
"rimraf": "^5.0.5",
"ts-loader": "^9.5.1",
"typescript": "^5.2.2",
"webpack": "^5.89.0",
"typescript": "^5.3.3",
"webpack": "^5.90.3",
"webpack-cli": "^5.1.4",
"webpack-dev-server": "^4.15.1"
"webpack-dev-server": "^5.0.2"
},
"dependencies": {
"@types/readable-stream": "^4.0.6",
"amazon-chime-sdk-js": "^3.18.2",
"@types/readable-stream": "^4.0.10",
"amazon-chime-sdk-js": "^3.20.0",
"buffer": "^6.0.3",
"localforage": "^1.10.0",
"protobufjs": "^7.2.5",
"protobufjs": "^7.2.6",
"react": "^18.2.0",
"react-dom": "^18.2.0",
"socket.io-client": "^4.7.2"
"socket.io-client": "^4.7.4"
}
}

View File

@ -13,6 +13,7 @@ export const VoiceChangerType = {
Beatrice: "Beatrice",
LLVC: "LLVC",
WebModel: "WebModel",
EasyVC: "EasyVC",
} as const;
export type VoiceChangerType = (typeof VoiceChangerType)[keyof typeof VoiceChangerType];
@ -56,6 +57,7 @@ export const F0Detector = {
crepe_tiny: "crepe_tiny",
rmvpe: "rmvpe",
rmvpe_onnx: "rmvpe_onnx",
fcpe: "fcpe",
} as const;
export type F0Detector = (typeof F0Detector)[keyof typeof F0Detector];
@ -527,7 +529,7 @@ export const DefaultClientSettng: ClientSetting = {
serverUrl: "",
protocol: "sio",
sendingSampleRate: 48000,
inputChunkNum: 48,
inputChunkNum: 192,
downSamplingMode: "average",
},
voiceChangerClientSetting: {

View File

@ -127,7 +127,7 @@ export const useClient = (props: UseClientProps): ClientState => {
};
// 設定データ管理
const { setItem, getItem } = useIndexedDB({ clientType: null });
const { setItem, getItem, removeItem } = useIndexedDB({ clientType: null });
// 設定データの更新と保存
const _setSetting = (_setting: ClientSetting) => {
const storeData = { ..._setting };
@ -231,7 +231,7 @@ export const useClient = (props: UseClientProps): ClientState => {
}, [voiceChangerClientSetting.reloadClientSetting, serverSetting.reloadServerInfo]);
const clearSetting = async () => {
// TBD
await removeItem("clientSetting");
};
// 設定変更

View File

@ -1,231 +1,279 @@
import { useState, useMemo } from "react";
import { VoiceChangerServerSetting, ServerInfo, ServerSettingKey, OnnxExporterInfo, MergeModelRequest, VoiceChangerType, DefaultServerSetting } from "../const";
import {
VoiceChangerServerSetting,
ServerInfo,
ServerSettingKey,
OnnxExporterInfo,
MergeModelRequest,
VoiceChangerType,
DefaultServerSetting,
} from "../const";
import { VoiceChangerClient } from "../VoiceChangerClient";
export const ModelAssetName = {
iconFile: "iconFile",
iconFile: "iconFile",
} as const;
export type ModelAssetName = (typeof ModelAssetName)[keyof typeof ModelAssetName];
export type ModelAssetName =
(typeof ModelAssetName)[keyof typeof ModelAssetName];
export const ModelFileKind = {
mmvcv13Config: "mmvcv13Config",
mmvcv13Model: "mmvcv13Model",
mmvcv15Config: "mmvcv15Config",
mmvcv15Model: "mmvcv15Model",
mmvcv15Correspondence: "mmvcv15Correspondence",
mmvcv13Config: "mmvcv13Config",
mmvcv13Model: "mmvcv13Model",
mmvcv15Config: "mmvcv15Config",
mmvcv15Model: "mmvcv15Model",
mmvcv15Correspondence: "mmvcv15Correspondence",
soVitsSvc40Config: "soVitsSvc40Config",
soVitsSvc40Model: "soVitsSvc40Model",
soVitsSvc40Cluster: "soVitsSvc40Cluster",
soVitsSvc40Config: "soVitsSvc40Config",
soVitsSvc40Model: "soVitsSvc40Model",
soVitsSvc40Cluster: "soVitsSvc40Cluster",
rvcModel: "rvcModel",
rvcIndex: "rvcIndex",
rvcModel: "rvcModel",
rvcIndex: "rvcIndex",
ddspSvcModel: "ddspSvcModel",
ddspSvcModelConfig: "ddspSvcModelConfig",
ddspSvcDiffusion: "ddspSvcDiffusion",
ddspSvcDiffusionConfig: "ddspSvcDiffusionConfig",
ddspSvcModel: "ddspSvcModel",
ddspSvcModelConfig: "ddspSvcModelConfig",
ddspSvcDiffusion: "ddspSvcDiffusion",
ddspSvcDiffusionConfig: "ddspSvcDiffusionConfig",
diffusionSVCModel: "diffusionSVCModel",
diffusionSVCModel: "diffusionSVCModel",
beatriceModel: "beatriceModel",
beatriceModel: "beatriceModel",
llvcModel: "llvcModel",
llvcConfig: "llvcConfig",
llvcModel: "llvcModel",
llvcConfig: "llvcConfig",
easyVCModel: "easyVCModel",
} as const;
export type ModelFileKind = (typeof ModelFileKind)[keyof typeof ModelFileKind];
export type ModelFile = {
file: File;
kind: ModelFileKind;
dir: string;
file: File;
kind: ModelFileKind;
dir: string;
};
export type ModelUploadSetting = {
voiceChangerType: VoiceChangerType;
slot: number;
isSampleMode: boolean;
sampleId: string | null;
voiceChangerType: VoiceChangerType;
slot: number;
isSampleMode: boolean;
sampleId: string | null;
files: ModelFile[];
params: any;
files: ModelFile[];
params: any;
};
export type ModelFileForServer = Omit<ModelFile, "file"> & {
name: string;
kind: ModelFileKind;
name: string;
kind: ModelFileKind;
};
export type ModelUploadSettingForServer = Omit<ModelUploadSetting, "files"> & {
files: ModelFileForServer[];
files: ModelFileForServer[];
};
type AssetUploadSetting = {
slot: number;
name: ModelAssetName;
file: string;
slot: number;
name: ModelAssetName;
file: string;
};
export type UseServerSettingProps = {
voiceChangerClient: VoiceChangerClient | null;
voiceChangerClient: VoiceChangerClient | null;
};
export type ServerSettingState = {
serverSetting: ServerInfo;
updateServerSettings: (setting: ServerInfo) => Promise<void>;
reloadServerInfo: () => Promise<void>;
serverSetting: ServerInfo;
updateServerSettings: (setting: ServerInfo) => Promise<void>;
reloadServerInfo: () => Promise<void>;
uploadModel: (setting: ModelUploadSetting) => Promise<void>;
uploadProgress: number;
isUploading: boolean;
uploadModel: (setting: ModelUploadSetting) => Promise<void>;
uploadProgress: number;
isUploading: boolean;
getOnnx: () => Promise<OnnxExporterInfo>;
mergeModel: (request: MergeModelRequest) => Promise<ServerInfo>;
updateModelDefault: () => Promise<ServerInfo>;
updateModelInfo: (slot: number, key: string, val: string) => Promise<ServerInfo>;
uploadAssets: (slot: number, name: ModelAssetName, file: File) => Promise<void>;
getOnnx: () => Promise<OnnxExporterInfo>;
mergeModel: (request: MergeModelRequest) => Promise<ServerInfo>;
updateModelDefault: () => Promise<ServerInfo>;
updateModelInfo: (
slot: number,
key: string,
val: string
) => Promise<ServerInfo>;
uploadAssets: (
slot: number,
name: ModelAssetName,
file: File
) => Promise<void>;
};
export const useServerSetting = (props: UseServerSettingProps): ServerSettingState => {
const [serverSetting, _setServerSetting] = useState<ServerInfo>(DefaultServerSetting);
const setServerSetting = (info: ServerInfo) => {
if (!info.modelSlots) {
// サーバが情報を空で返したとき。Web版対策
return;
export const useServerSetting = (
props: UseServerSettingProps
): ServerSettingState => {
const [serverSetting, _setServerSetting] =
useState<ServerInfo>(DefaultServerSetting);
const setServerSetting = (info: ServerInfo) => {
if (!info.modelSlots) {
// サーバが情報を空で返したとき。Web版対策
return;
}
_setServerSetting(info);
};
//////////////
// 設定
/////////////
const updateServerSettings = useMemo(() => {
return async (setting: ServerInfo) => {
if (!props.voiceChangerClient) return;
for (let i = 0; i < Object.values(ServerSettingKey).length; i++) {
const k = Object.values(ServerSettingKey)[
i
] as keyof VoiceChangerServerSetting;
const cur_v = serverSetting[k];
const new_v = setting[k];
if (cur_v != new_v) {
const res = await props.voiceChangerClient.updateServerSettings(
k,
"" + new_v
);
setServerSetting(res);
}
_setServerSetting(info);
}
};
}, [props.voiceChangerClient, serverSetting]);
//////////////
// 設定
/////////////
const updateServerSettings = useMemo(() => {
return async (setting: ServerInfo) => {
if (!props.voiceChangerClient) return;
for (let i = 0; i < Object.values(ServerSettingKey).length; i++) {
const k = Object.values(ServerSettingKey)[i] as keyof VoiceChangerServerSetting;
const cur_v = serverSetting[k];
const new_v = setting[k];
//////////////
// 操作
/////////////
const [uploadProgress, setUploadProgress] = useState<number>(0);
const [isUploading, setIsUploading] = useState<boolean>(false);
if (cur_v != new_v) {
const res = await props.voiceChangerClient.updateServerSettings(k, "" + new_v);
setServerSetting(res);
}
}
};
}, [props.voiceChangerClient, serverSetting]);
//////////////
// 操作
/////////////
const [uploadProgress, setUploadProgress] = useState<number>(0);
const [isUploading, setIsUploading] = useState<boolean>(false);
// (e) モデルアップロード
const _uploadFile2 = useMemo(() => {
return async (file: File, onprogress: (progress: number, end: boolean) => void, dir: string = "") => {
if (!props.voiceChangerClient) return;
const num = await props.voiceChangerClient.uploadFile2(dir, file, onprogress);
const res = await props.voiceChangerClient.concatUploadedFile(dir + file.name, num);
console.log("uploaded", num, res);
};
}, [props.voiceChangerClient]);
// 新しいアップローダ
const uploadModel = useMemo(() => {
return async (setting: ModelUploadSetting) => {
if (!props.voiceChangerClient) {
return;
}
setUploadProgress(0);
setIsUploading(true);
if (setting.isSampleMode == false) {
const progRate = 1 / setting.files.length;
for (let i = 0; i < setting.files.length; i++) {
const progOffset = 100 * i * progRate;
await _uploadFile2(
setting.files[i].file,
(progress: number, _end: boolean) => {
setUploadProgress(progress * progRate + progOffset);
},
setting.files[i].dir
);
}
}
const params: ModelUploadSettingForServer = {
...setting,
files: setting.files.map((f) => {
return { name: f.file.name, kind: f.kind, dir: f.dir };
}),
};
const loadPromise = props.voiceChangerClient.loadModel(0, false, JSON.stringify(params));
await loadPromise;
setUploadProgress(0);
setIsUploading(false);
reloadServerInfo();
};
}, [props.voiceChangerClient]);
const uploadAssets = useMemo(() => {
return async (slot: number, name: ModelAssetName, file: File) => {
if (!props.voiceChangerClient) return;
await _uploadFile2(file, (progress: number, _end: boolean) => {
console.log(progress, _end);
});
const assetUploadSetting: AssetUploadSetting = {
slot,
name,
file: file.name,
};
await props.voiceChangerClient.uploadAssets(JSON.stringify(assetUploadSetting));
reloadServerInfo();
};
}, [props.voiceChangerClient]);
const reloadServerInfo = useMemo(() => {
return async () => {
if (!props.voiceChangerClient) return;
const res = await props.voiceChangerClient.getServerSettings();
setServerSetting(res);
};
}, [props.voiceChangerClient]);
const getOnnx = async () => {
return props.voiceChangerClient!.getOnnx();
// (e) モデルアップロード
const _uploadFile2 = useMemo(() => {
return async (
file: File,
onprogress: (progress: number, end: boolean) => void,
dir: string = ""
) => {
if (!props.voiceChangerClient) return;
const num = await props.voiceChangerClient.uploadFile2(
dir,
file,
onprogress
);
const res = await props.voiceChangerClient.concatUploadedFile(
dir + file.name,
num
);
console.log("uploaded", num, res);
};
}, [props.voiceChangerClient]);
const mergeModel = async (request: MergeModelRequest) => {
const serverInfo = await props.voiceChangerClient!.mergeModel(request);
setServerSetting(serverInfo);
return serverInfo;
};
// 新しいアップローダ
const uploadModel = useMemo(() => {
return async (setting: ModelUploadSetting) => {
if (!props.voiceChangerClient) {
return;
}
const updateModelDefault = async () => {
const serverInfo = await props.voiceChangerClient!.updateModelDefault();
setServerSetting(serverInfo);
return serverInfo;
};
const updateModelInfo = async (slot: number, key: string, val: string) => {
const serverInfo = await props.voiceChangerClient!.updateModelInfo(slot, key, val);
setServerSetting(serverInfo);
return serverInfo;
};
setUploadProgress(0);
setIsUploading(true);
return {
serverSetting,
updateServerSettings,
reloadServerInfo,
if (setting.isSampleMode == false) {
const progRate = 1 / setting.files.length;
for (let i = 0; i < setting.files.length; i++) {
const progOffset = 100 * i * progRate;
await _uploadFile2(
setting.files[i].file,
(progress: number, _end: boolean) => {
setUploadProgress(progress * progRate + progOffset);
},
setting.files[i].dir
);
}
}
const params: ModelUploadSettingForServer = {
...setting,
files: setting.files.map((f) => {
return { name: f.file.name, kind: f.kind, dir: f.dir };
}),
};
uploadModel,
uploadProgress,
isUploading,
getOnnx,
mergeModel,
updateModelDefault,
updateModelInfo,
uploadAssets,
const loadPromise = props.voiceChangerClient.loadModel(
0,
false,
JSON.stringify(params)
);
await loadPromise;
setUploadProgress(0);
setIsUploading(false);
reloadServerInfo();
};
}, [props.voiceChangerClient]);
const uploadAssets = useMemo(() => {
return async (slot: number, name: ModelAssetName, file: File) => {
if (!props.voiceChangerClient) return;
await _uploadFile2(file, (progress: number, _end: boolean) => {
console.log(progress, _end);
});
const assetUploadSetting: AssetUploadSetting = {
slot,
name,
file: file.name,
};
await props.voiceChangerClient.uploadAssets(
JSON.stringify(assetUploadSetting)
);
reloadServerInfo();
};
}, [props.voiceChangerClient]);
const reloadServerInfo = useMemo(() => {
return async () => {
if (!props.voiceChangerClient) return;
const res = await props.voiceChangerClient.getServerSettings();
setServerSetting(res);
};
}, [props.voiceChangerClient]);
const getOnnx = async () => {
return props.voiceChangerClient!.getOnnx();
};
const mergeModel = async (request: MergeModelRequest) => {
const serverInfo = await props.voiceChangerClient!.mergeModel(request);
setServerSetting(serverInfo);
return serverInfo;
};
const updateModelDefault = async () => {
const serverInfo = await props.voiceChangerClient!.updateModelDefault();
setServerSetting(serverInfo);
return serverInfo;
};
const updateModelInfo = async (slot: number, key: string, val: string) => {
const serverInfo = await props.voiceChangerClient!.updateModelInfo(
slot,
key,
val
);
setServerSetting(serverInfo);
return serverInfo;
};
return {
serverSetting,
updateServerSettings,
reloadServerInfo,
uploadModel,
uploadProgress,
isUploading,
getOnnx,
mergeModel,
updateModelDefault,
updateModelInfo,
uploadAssets,
};
};

148
docs_i18n/README_ar.md Normal file
View File

@ -0,0 +1,148 @@
[اليابانية](/README.md) /
[الإنجليزية](/docs_i18n/README_en.md) /
[الكورية](/docs_i18n/README_ko.md)/
[الصينية](/docs_i18n/README_zh.md)/
[الألمانية](/docs_i18n/README_de.md)/
[العربية](/docs_i18n/README_ar.md)/
[اليونانية](/docs_i18n/README_el.md)/
[الإسبانية](/docs_i18n/README_es.md)/
[الفرنسية](/docs_i18n/README_fr.md)/
[الإيطالية](/docs_i18n/README_it.md)/
[اللاتينية](/docs_i18n/README_la.md)/
[الماليزية](/docs_i18n/README_ms.md)/
[الروسية](/docs_i18n/README_ru.md)
*جميع اللغات باستثناء اليابانية مترجمة آليًا.
## VCClient
VCClient هو برنامج يقوم بتحويل الصوت في الوقت الحقيقي باستخدام الذكاء الاصطناعي.
## ما الجديد!
* v.2.0.78-beta
* إصلاح خطأ: تم تجنب خطأ تحميل نموذج RVC
* أصبح من الممكن الآن التشغيل بالتزامن مع الإصدار 1.x
* تمت زيادة أحجام القطع القابلة للاختيار
* v.2.0.77-beta (لـ RTX 5090 فقط، تجريبي)
* دعم الوحدات المتعلقة بـ RTX 5090 (غير مثبت لأن المطور لا يمتلك RTX 5090)
* v.2.0.76-beta
* ميزة جديدة:
* Beatrice: تنفيذ دمج المتحدثين
* Beatrice: تحويل النغمة التلقائي
* إصلاح الأخطاء:
* حل مشكلة اختيار الجهاز في وضع الخادم
* v.2.0.73-beta
* ميزة جديدة:
* تحميل نموذج beatrice المعدل
* إصلاح الأخطاء:
* تم إصلاح خطأ عدم انعكاس النغمة والصيغة في beatrice v2
* تم إصلاح خطأ عدم إمكانية إنشاء ONNX للنماذج التي تستخدم embedder Applio
## التنزيل والروابط ذات الصلة
يمكن تنزيل نسخة الويندوز ونسخة M1 Mac من مستودع hugging face.
* [مستودع VCClient](https://huggingface.co/wok000/vcclient000/tree/main)
* [مستودع Light VCClient لـ Beatrice v2](https://huggingface.co/wok000/light_vcclient_beatrice/tree/main)
*1 بالنسبة للينكس، يرجى استنساخ المستودع لاستخدامه.
### روابط ذات صلة
* [مستودع كود التدريب لـ Beatrice V2](https://huggingface.co/fierce-cats/beatrice-trainer)
* [نسخة Colab من كود التدريب لـ Beatrice V2](https://github.com/w-okada/beatrice-trainer-colab)
### البرامج ذات الصلة
* [مغير الصوت في الوقت الحقيقي VCClient](https://github.com/w-okada/voice-changer)
* [برنامج قراءة النصوص TTSClient](https://github.com/w-okada/ttsclient)
* [برنامج التعرف على الصوت في الوقت الحقيقي ASRClient](https://github.com/w-okada/asrclient)
## ميزات VC Client
## يدعم نماذج الذكاء الاصطناعي المتنوعة
| نماذج الذكاء الاصطناعي | v.2 | v.1 | الترخيص |
| ------------------------------------------------------------------------------------------------------------ | --------- | -------------------- | ------------------------------------------------------------------------------------------ |
| [RVC ](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/main/docs/jp/README.ja.md) | مدعوم | مدعوم | يرجى الرجوع إلى المستودع. |
| [Beatrice v1](https://prj-beatrice.com/) | غير متاح | مدعوم (فقط للويندوز) | [خاص](https://github.com/w-okada/voice-changer/tree/master/server/voice_changer/Beatrice) |
| [Beatrice v2](https://prj-beatrice.com/) | مدعوم | غير متاح | [خاص](https://huggingface.co/wok000/vcclient_model/blob/main/beatrice_v2_beta/readme.md) |
| [MMVC](https://github.com/isletennos/MMVC_Trainer) | غير متاح | مدعوم | يرجى الرجوع إلى المستودع. |
| [so-vits-svc](https://github.com/svc-develop-team/so-vits-svc) | غير متاح | مدعوم | يرجى الرجوع إلى المستودع. |
| [DDSP-SVC](https://github.com/yxlllc/DDSP-SVC) | غير متاح | مدعوم | يرجى الرجوع إلى المستودع. |
## يدعم كلا من التكوين المستقل وعبر الشبكة
يدعم تحويل الصوت المكتمل على جهاز الكمبيوتر المحلي وكذلك عبر الشبكة.
عند استخدامه عبر الشبكة، يمكن تفريغ عبء تحويل الصوت إلى الخارج عند استخدامه مع تطبيقات عالية التحميل مثل الألعاب.
![image](https://user-images.githubusercontent.com/48346627/206640768-53f6052d-0a96-403b-a06c-6714a0b7471d.png)
## يدعم منصات متعددة
ويندوز، ماك (M1)، ينكس، جوجل كولاب
*1 بالنسبة للينكس، يرجى استنساخ المستودع لاستخدامه.
## يوفر REST API
يمكنك إنشاء عميل باستخدام لغات البرمجة المختلفة.
يمكنك أيضًا استخدام عملاء HTTP المدمجة في نظام التشغيل مثل curl للتحكم.
## استكشاف الأخطاء وإصلاحها
[قسم الاتصال](tutorials/trouble_shoot_communication_ja.md)
## حول توقيع المطور
هذا البرنامج غير موقع من قبل المطور. ستظهر تحذيرات كما هو موضح أدناه، ولكن يمكنك تشغيله بالضغط على مفتاح التحكم أثناء النقر على الأيقونة. هذا بسبب سياسة أمان Apple. التشغيل يكون على مسؤوليتك الخاصة.
![image](https://user-images.githubusercontent.com/48346627/212567711-c4a8d599-e24c-4fa3-8145-a5df7211f023.png)
## الشكر والتقدير
* [مواد Tachi Zundamon](https://seiga.nicovideo.jp/seiga/im10792934)
* [إيراستويا](https://www.irasutoya.com/)
* [Tsukuyomi-chan](https://tyc.rei-yumesaki.net/)
```
本ソフトウェアの音声合成には、フリー素材キャラクター「つくよみちゃん」が無料公開している音声データを使用しています。
■つくよみちゃんコーパスCV.夢前黎)
https://tyc.rei-yumesaki.net/material/corpus/
© Rei Yumesaki
```
* [ورشة عمل صوت Amitaro](https://amitaro.net/)
* [Replikadoru](https://kikyohiroto1227.wixsite.com/kikoto-utau)
## شروط الاستخدام
* بالنسبة لمغير الصوت في الوقت الحقيقي Tsukuyomi-chan، يُحظر استخدام الصوت المحول للأغراض التالية وفقًا لشروط استخدام كوربوس Tsukuyomi-chan.
```
■人を批判・攻撃すること。(「批判・攻撃」の定義は、つくよみちゃんキャラクターライセンスに準じます)
■特定の政治的立場・宗教・思想への賛同または反対を呼びかけること。
■刺激の強い表現をゾーニングなしで公開すること。
■他者に対して二次利用(素材としての利用)を許可する形で公開すること。
※鑑賞用の作品として配布・販売していただくことは問題ございません。
```
* بالنسبة لمغير الصوت في الوقت الحقيقي Amitaro، يُتبع شروط استخدام ورشة عمل صوت Amitaro. التفاصيل[هنا](https://amitaro.net/voice/faq/#index_id6)
```
あみたろの声素材やコーパス読み上げ音声を使って音声モデルを作ったり、ボイスチェンジャーや声質変換などを使用して、自分の声をあみたろの声に変換して使うのもOKです。
ただしその場合は絶対に、あみたろ(もしくは小春音アミ)の声に声質変換していることを明記し、あみたろ(および小春音アミ)が話しているわけではないことが誰でもわかるようにしてください。
また、あみたろの声で話す内容は声素材の利用規約の範囲内のみとし、センシティブな発言などはしないでください。
```
* بالنسبة لمغير الصوت في الوقت الحقيقي Kogane Mahiro، يُتبع شروط استخدام Replikadoru. التفاصيل[هنا](https://kikyohiroto1227.wixsite.com/kikoto-utau/ter%EF%BD%8Ds-of-service)
## إخلاء المسؤولية
لا نتحمل أي مسؤولية عن أي أضرار مباشرة أو غير مباشرة أو تبعية أو خاصة تنشأ عن استخدام أو عدم القدرة على استخدام هذا البرنامج.

148
docs_i18n/README_de.md Normal file
View File

@ -0,0 +1,148 @@
[Japanisch](/README.md) /
[Englisch](/docs_i18n/README_en.md) /
[Koreanisch](/docs_i18n/README_ko.md)/
[Chinesisch](/docs_i18n/README_zh.md)/
[Deutsch](/docs_i18n/README_de.md)/
[Arabisch](/docs_i18n/README_ar.md)/
[Griechisch](/docs_i18n/README_el.md)/
[Spanisch](/docs_i18n/README_es.md)/
[Französisch](/docs_i18n/README_fr.md)/
[Italienisch](/docs_i18n/README_it.md)/
[Latein](/docs_i18n/README_la.md)/
[Malaiisch](/docs_i18n/README_ms.md)/
[Russisch](/docs_i18n/README_ru.md)
*Außer Japanisch sind alle Übersetzungen maschinell.
## VCClient
VCClient ist eine Software, die mithilfe von KI eine Echtzeit-Sprachumwandlung durchführt.
## What's New!
* v.2.0.78-beta
* Fehlerbehebung: Upload-Fehler für RVC-Modell vermieden
* Gleichzeitiger Start mit Version 1.x jetzt möglich
* Auswahlbare Chunk-Größen erhöht
* v.2.0.77-beta (nur für RTX 5090, experimentell)
* Unterstützung für RTX 5090 verwandte Module (nicht verifiziert, da Entwickler kein RTX 5090 besitzt)
* v.2.0.76-beta
* neues Feature:
* Beatrice: Implementierung der Sprecherzusammenführung
* Beatrice: Automatische Tonhöhenverschiebung
* Fehlerbehebung:
* Problembehebung bei der Gerätauswahl im Servermodus
* v.2.0.73-beta
* neues Feature:
* Download des bearbeiteten Beatrice-Modells
* Fehlerbehebung:
* Fehler behoben, bei dem Pitch und Formant von Beatrice v2 nicht reflektiert wurden
* Fehler behoben, bei dem das ONNX-Modell mit dem Applio-Embedder nicht erstellt werden konnte
## Downloads und verwandte Links
Windows- und M1 Mac-Versionen können aus dem Repository von Hugging Face heruntergeladen werden.
* [VCClient-Repository](https://huggingface.co/wok000/vcclient000/tree/main)
* [Light VCClient für Beatrice v2 Repository](https://huggingface.co/wok000/light_vcclient_beatrice/tree/main)
*1 Linux: Bitte klonen Sie das Repository zur Nutzung.
### Verwandte Links
* [Beatrice V2 Trainingscode-Repository](https://huggingface.co/fierce-cats/beatrice-trainer)
* [Beatrice V2 Trainingscode Colab-Version](https://github.com/w-okada/beatrice-trainer-colab)
### Verwandte Software
* [Echtzeit-Voice-Changer VCClient](https://github.com/w-okada/voice-changer)
* [Vorlesesoftware TTSClient](https://github.com/w-okada/ttsclient)
* [Echtzeit-Spracherkennungssoftware ASRClient](https://github.com/w-okada/asrclient)
## Merkmale des VC Clients
## Unterstützt verschiedene KI-Modelle
| KI-Modelle | v.2 | v.1 | Lizenz |
| ------------------------------------------------------------------------------------------------------------ | --------- | -------------------- | ------------------------------------------------------------------------------------------ |
| [RVC ](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/main/docs/jp/README.ja.md) | unterstützt | unterstützt | Bitte das Repository konsultieren. |
| [Beatrice v1](https://prj-beatrice.com/) | n/a | unterstützt (nur Windows) | [Eigen](https://github.com/w-okada/voice-changer/tree/master/server/voice_changer/Beatrice) |
| [Beatrice v2](https://prj-beatrice.com/) | unterstützt | n/a | [Eigen](https://huggingface.co/wok000/vcclient_model/blob/main/beatrice_v2_beta/readme.md) |
| [MMVC](https://github.com/isletennos/MMVC_Trainer) | n/a | unterstützt | Bitte das Repository konsultieren. |
| [so-vits-svc](https://github.com/svc-develop-team/so-vits-svc) | n/a | unterstützt | Bitte das Repository konsultieren. |
| [DDSP-SVC](https://github.com/yxlllc/DDSP-SVC) | n/a | unterstützt | Bitte das Repository konsultieren. |
## Unterstützt sowohl Standalone- als auch Netzwerk-Konfigurationen
Unterstützt sowohl Sprachumwandlung auf dem lokalen PC als auch über das Netzwerk.
Durch die Nutzung über das Netzwerk kann die Belastung der Sprachumwandlung auf externe Ressourcen ausgelagert werden, wenn gleichzeitig ressourcenintensive Anwendungen wie Spiele genutzt werden.
![image](https://user-images.githubusercontent.com/48346627/206640768-53f6052d-0a96-403b-a06c-6714a0b7471d.png)
## Unterstützt mehrere Plattformen
Windows, Mac(M1), Linux, Google Colab
*1 Linux: Bitte klonen Sie das Repository zur Nutzung.
## Bietet REST API
Clients können in verschiedenen Programmiersprachen erstellt werden.
Außerdem kann die Bedienung mit in das Betriebssystem integrierten HTTP-Clients wie curl erfolgen.
## Fehlerbehebung
[Kommunikationsprobleme](tutorials/trouble_shoot_communication_ja.md)
## Über die Signatur des Entwicklers
Diese Software ist nicht vom Entwickler signiert. Es wird eine Warnung wie unten angezeigt, aber Sie können sie ausführen, indem Sie die Steuerungstaste gedrückt halten und auf das Symbol klicken. Dies liegt an den Sicherheitsrichtlinien von Apple. Die Ausführung erfolgt auf eigenes Risiko.
![image](https://user-images.githubusercontent.com/48346627/212567711-c4a8d599-e24c-4fa3-8145-a5df7211f023.png)
## Danksagungen
* [Tachizundamon-Material](https://seiga.nicovideo.jp/seiga/im10792934)
* [Irasutoya](https://www.irasutoya.com/)
* [Tsukuyomi-chan](https://tyc.rei-yumesaki.net/)
```
本ソフトウェアの音声合成には、フリー素材キャラクター「つくよみちゃん」が無料公開している音声データを使用しています。
■つくよみちゃんコーパスCV.夢前黎)
https://tyc.rei-yumesaki.net/material/corpus/
© Rei Yumesaki
```
* [Amitaro's Voice Material Studio](https://amitaro.net/)
* [Replikador](https://kikyohiroto1227.wixsite.com/kikoto-utau)
## Nutzungsbedingungen
* Für den Echtzeit-Voice-Changer Tsukuyomi-chan gelten die Nutzungsbedingungen des Tsukuyomi-chan-Korpus, und die Verwendung der umgewandelten Stimme für die folgenden Zwecke ist untersagt.
```
■人を批判・攻撃すること。(「批判・攻撃」の定義は、つくよみちゃんキャラクターライセンスに準じます)
■特定の政治的立場・宗教・思想への賛同または反対を呼びかけること。
■刺激の強い表現をゾーニングなしで公開すること。
■他者に対して二次利用(素材としての利用)を許可する形で公開すること。
※鑑賞用の作品として配布・販売していただくことは問題ございません。
```
* Für den Echtzeit-Voice-Changer Amitaro gelten die folgenden Nutzungsbedingungen von Amitaro's Voice Material Studio. Details finden Sie[hier](https://amitaro.net/voice/faq/#index_id6)
```
あみたろの声素材やコーパス読み上げ音声を使って音声モデルを作ったり、ボイスチェンジャーや声質変換などを使用して、自分の声をあみたろの声に変換して使うのもOKです。
ただしその場合は絶対に、あみたろ(もしくは小春音アミ)の声に声質変換していることを明記し、あみたろ(および小春音アミ)が話しているわけではないことが誰でもわかるようにしてください。
また、あみたろの声で話す内容は声素材の利用規約の範囲内のみとし、センシティブな発言などはしないでください。
```
* Für den Echtzeit-Voice-Changer Koto Mahiro gelten die Nutzungsbedingungen von Replikador. Details finden Sie[hier](https://kikyohiroto1227.wixsite.com/kikoto-utau/ter%EF%BD%8Ds-of-service)
## Haftungsausschluss
Wir übernehmen keine Verantwortung für direkte, indirekte, Folgeschäden, resultierende oder besondere Schäden, die durch die Nutzung oder Unfähigkeit zur Nutzung dieser Software entstehen.

148
docs_i18n/README_el.md Normal file
View File

@ -0,0 +1,148 @@
[Ιαπωνικά](/README.md) /
[Αγγλικά](/docs_i18n/README_en.md) /
[Κορεατικά](/docs_i18n/README_ko.md)/
[Κινέζικα](/docs_i18n/README_zh.md)/
[Γερμανικά](/docs_i18n/README_de.md)/
[Αραβικά](/docs_i18n/README_ar.md)/
[Ελληνικά](/docs_i18n/README_el.md)/
[Ισπανικά](/docs_i18n/README_es.md)/
[Γαλλικά](/docs_i18n/README_fr.md)/
[Ιταλικά](/docs_i18n/README_it.md)/
[Λατινικά](/docs_i18n/README_la.md)/
[Μαλαισιανά](/docs_i18n/README_ms.md)/
[Ρωσικά](/docs_i18n/README_ru.md)
*Οι γλώσσες εκτός των Ιαπωνικών είναι μεταφρασμένες αυτόματα.
## VCClient
Το VCClient είναι λογισμικό που χρησιμοποιεί AI για μετατροπή φωνής σε πραγματικό χρόνο.
## What's New!
* v.2.0.78-beta
* διόρθωση σφάλματος: αποφεύχθηκε το σφάλμα μεταφόρτωσης του μοντέλου RVC
* Τώρα είναι δυνατή η ταυτόχρονη εκκίνηση με την έκδοση 1.x
* Αυξήθηκαν τα διαθέσιμα μεγέθη chunk
* v.2.0.77-beta (μόνο για RTX 5090, πειραματικό)
* Υποστήριξη για σχετικές μονάδες RTX 5090 (δεν επαληθεύτηκε καθώς ο προγραμματιστής δεν διαθέτει RTX 5090)
* v.2.0.76-beta
* νέα δυνατότητα:
* Beatrice: Εφαρμογή συγχώνευσης ομιλητών
* Beatrice: Αυτόματη μετατόπιση τόνου
* διόρθωση σφαλμάτων:
* Αντιμετώπιση προβλημάτων κατά την επιλογή συσκευής σε λειτουργία διακομιστή
* v.2.0.73-beta
* νέα δυνατότητα:
* Λήψη του επεξεργασμένου μοντέλου beatrice
* διόρθωση σφαλμάτων:
* Διορθώθηκε το σφάλμα όπου το pitch και το formant του beatrice v2 δεν εφαρμόζονταν
* Διορθώθηκε το σφάλμα όπου δεν μπορούσε να δημιουργηθεί το ONNX για μοντέλα που χρησιμοποιούν το embedder του Applio
## Λήψη και σχετικοί σύνδεσμοι
Οι εκδόσεις για Windows και M1 Mac μπορούν να ληφθούν από το αποθετήριο του hugging face.
* [Αποθετήριο του VCClient](https://huggingface.co/wok000/vcclient000/tree/main)
* [Αποθετήριο για το Light VCClient for Beatrice v2](https://huggingface.co/wok000/light_vcclient_beatrice/tree/main)
*1 Για Linux, παρακαλώ κλωνοποιήστε το αποθετήριο.
### Σχετικοί σύνδεσμοι
* [Αποθετήριο κώδικα εκπαίδευσης Beatrice V2](https://huggingface.co/fierce-cats/beatrice-trainer)
* [Έκδοση Colab του κώδικα εκπαίδευσης Beatrice V2](https://github.com/w-okada/beatrice-trainer-colab)
### Σχετικό λογισμικό
* [Μετατροπέας φωνής σε πραγματικό χρόνο VCClient](https://github.com/w-okada/voice-changer)
* [Λογισμικό ανάγνωσης TTSClient](https://github.com/w-okada/ttsclient)
* [Λογισμικό αναγνώρισης φωνής σε πραγματικό χρόνο ASRClient](https://github.com/w-okada/asrclient)
## Χαρακτηριστικά του VC Client
## Υποστήριξη ποικίλων μοντέλων AI
| Μοντέλα AI | v.2 | v.1 | Άδεια |
| ------------------------------------------------------------------------------------------------------------ | --------- | -------------------- | ------------------------------------------------------------------------------------------ |
| [RVC ](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/main/docs/jp/README.ja.md) | υποστηρίζεται | υποστηρίζεται | Παρακαλώ ανατρέξτε στο αποθετήριο. |
| [Beatrice v1](https://prj-beatrice.com/) | n/a | υποστηρίζεται (μόνο win) | [ιδιόκτητο](https://github.com/w-okada/voice-changer/tree/master/server/voice_changer/Beatrice) |
| [Beatrice v2](https://prj-beatrice.com/) | υποστηρίζεται | n/a | [ιδιόκτητο](https://huggingface.co/wok000/vcclient_model/blob/main/beatrice_v2_beta/readme.md) |
| [MMVC](https://github.com/isletennos/MMVC_Trainer) | n/a | υποστηρίζεται | Παρακαλώ ανατρέξτε στο αποθετήριο. |
| [so-vits-svc](https://github.com/svc-develop-team/so-vits-svc) | n/a | υποστηρίζεται | Παρακαλώ ανατρέξτε στο αποθετήριο. |
| [DDSP-SVC](https://github.com/yxlllc/DDSP-SVC) | n/a | υποστηρίζεται | Παρακαλώ ανατρέξτε στο αποθετήριο. |
## Υποστήριξη τόσο για αυτόνομη όσο και για δικτυακή διαμόρφωση
Υποστηρίζεται η μετατροπή φωνής που ολοκληρώνεται σε τοπικό υπολογιστή καθώς και μέσω δικτύου.
Χρησιμοποιώντας το μέσω δικτύου, μπορείτε να εκφορτώσετε το φορτίο της μετατροπής φωνής σε εξωτερικό χώρο όταν χρησιμοποιείτε ταυτόχρονα εφαρμογές υψηλής φόρτωσης όπως παιχνίδια.
![image](https://user-images.githubusercontent.com/48346627/206640768-53f6052d-0a96-403b-a06c-6714a0b7471d.png)
## Υποστήριξη πολλαπλών πλατφορμών
Windows, Mac(M1), Linux, Google Colab
*1 Για Linux, παρακαλώ κλωνοποιήστε το αποθετήριο.
## Παροχή REST API
Μπορείτε να δημιουργήσετε πελάτες σε διάφορες γλώσσες προγραμματισμού.
Επίσης, μπορείτε να το χειριστείτε χρησιμοποιώντας HTTP πελάτες ενσωματωμένους στο λειτουργικό σύστημα όπως το curl.
## Αντιμετώπιση προβλημάτων
[Θέματα επικοινωνίας](tutorials/trouble_shoot_communication_ja.md)
## Σχετικά με την υπογραφή του προγραμματιστή
Αυτό το λογισμικό δεν είναι υπογεγραμμένο από τον προγραμματιστή. Εμφανίζεται προειδοποίηση όπως παρακάτω, αλλά μπορείτε να το εκτελέσετε κάνοντας κλικ στο εικονίδιο ενώ κρατάτε πατημένο το πλήκτρο ελέγχου. Αυτό οφείλεται στην πολιτική ασφαλείας της Apple. Η εκτέλεση γίνεται με δική σας ευθύνη.
![image](https://user-images.githubusercontent.com/48346627/212567711-c4a8d599-e24c-4fa3-8145-a5df7211f023.png)
## Ευχαριστίες
* [Υλικό από το Tachizundamon](https://seiga.nicovideo.jp/seiga/im10792934)
* [Irasutoya](https://www.irasutoya.com/)
* [Tsukuyomi-chan](https://tyc.rei-yumesaki.net/)
```
本ソフトウェアの音声合成には、フリー素材キャラクター「つくよみちゃん」が無料公開している音声データを使用しています。
■つくよみちゃんコーパスCV.夢前黎)
https://tyc.rei-yumesaki.net/material/corpus/
© Rei Yumesaki
```
* [Εργαστήριο φωνητικών υλικών Amitaro](https://amitaro.net/)
* [Reprikadoru](https://kikyohiroto1227.wixsite.com/kikoto-utau)
## Όροι χρήσης
* Για το μετατροπέα φωνής σε πραγματικό χρόνο Tsukuyomi-chan, απαγορεύεται η χρήση της μετατραπείσας φωνής για τους παρακάτω σκοπούς σύμφωνα με τους όρους χρήσης του Tsukuyomi-chan corpus.
```
■人を批判・攻撃すること。(「批判・攻撃」の定義は、つくよみちゃんキャラクターライセンスに準じます)
■特定の政治的立場・宗教・思想への賛同または反対を呼びかけること。
■刺激の強い表現をゾーニングなしで公開すること。
■他者に対して二次利用(素材としての利用)を許可する形で公開すること。
※鑑賞用の作品として配布・販売していただくことは問題ございません。
```
* Για το μετατροπέα φωνής σε πραγματικό χρόνο Amitaro, ισχύουν οι ακόλουθοι όροι χρήσης του εργαστηρίου φωνητικών υλικών Amitaro. Για λεπτομέρειες,[εδώ](https://amitaro.net/voice/faq/#index_id6)
```
あみたろの声素材やコーパス読み上げ音声を使って音声モデルを作ったり、ボイスチェンジャーや声質変換などを使用して、自分の声をあみたろの声に変換して使うのもOKです。
ただしその場合は絶対に、あみたろ(もしくは小春音アミ)の声に声質変換していることを明記し、あみたろ(および小春音アミ)が話しているわけではないことが誰でもわかるようにしてください。
また、あみたろの声で話す内容は声素材の利用規約の範囲内のみとし、センシティブな発言などはしないでください。
```
* Για το μετατροπέα φωνής σε πραγματικό χρόνο Kogane Mahiro, ισχύουν οι όροι χρήσης του Reprikadoru. Για λεπτομέρειες,[εδώ](https://kikyohiroto1227.wixsite.com/kikoto-utau/ter%EF%BD%8Ds-of-service)
## Αποποίηση ευθυνών
Δεν φέρουμε καμία ευθύνη για οποιαδήποτε άμεση, έμμεση, επακόλουθη, ή ειδική ζημία που προκύπτει από τη χρήση ή την αδυναμία χρήσης αυτού του λογισμικού.

148
docs_i18n/README_en.md Normal file
View File

@ -0,0 +1,148 @@
[Japanese](/README.md) /
[English](/docs_i18n/README_en.md) /
[Korean](/docs_i18n/README_ko.md)/
[Chinese](/docs_i18n/README_zh.md)/
[German](/docs_i18n/README_de.md)/
[Arabic](/docs_i18n/README_ar.md)/
[Greek](/docs_i18n/README_el.md)/
[Spanish](/docs_i18n/README_es.md)/
[French](/docs_i18n/README_fr.md)/
[Italian](/docs_i18n/README_it.md)/
[Latin](/docs_i18n/README_la.md)/
[Malay](/docs_i18n/README_ms.md)/
[Russian](/docs_i18n/README_ru.md)
*Languages other than Japanese are machine translated.
## VCClient
VCClient is software that performs real-time voice conversion using AI.
## What's New!
* v.2.0.78-beta
* bugfix: Avoided upload error for RVC model
* Now possible to run simultaneously with ver.1.x
* Increased selectable chunk sizes
* v.2.0.77-beta (only for RTX 5090, experimental)
* Related modules support for RTX 5090 (not verified as developer does not own RTX 5090)
* v.2.0.76-beta
* new feature:
* Beatrice: Implementation of speaker merge
* Beatrice: Auto pitch shift
* bugfix:
* Fixed issue with device selection in server mode
* v.2.0.73-beta
* new feature:
* Download edited Beatrice model
* bugfix:
* Fixed a bug where pitch and formant of Beatrice v2 were not reflected
* Fixed a bug where ONNX could not be created for models using Applio's embedder
## Download and Related Links
Windows and M1 Mac versions can be downloaded from the hugging face repository.
* [VCClient Repository](https://huggingface.co/wok000/vcclient000/tree/main)
* [Light VCClient for Beatrice v2 Repository](https://huggingface.co/wok000/light_vcclient_beatrice/tree/main)
*1 Please clone the repository for Linux use.
### Related Links
* [Beatrice V2 Training Code Repository](https://huggingface.co/fierce-cats/beatrice-trainer)
* [Beatrice V2 Training Code Colab Version](https://github.com/w-okada/beatrice-trainer-colab)
### Related Software
* [Real-time Voice Changer VCClient](https://github.com/w-okada/voice-changer)
* [Text-to-Speech Software TTSClient](https://github.com/w-okada/ttsclient)
* [Real-time Speech Recognition Software ASRClient](https://github.com/w-okada/asrclient)
## Features of VC Client
## Supports various AI models
| AI Model | v.2 | v.1 | License |
| ------------------------------------------------------------------------------------------------------------ | --------- | -------------------- | ------------------------------------------------------------------------------------------ |
| [RVC ](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/main/docs/jp/README.ja.md) | supported | supported | Please refer to the repository. |
| [Beatrice v1](https://prj-beatrice.com/) | n/a | supported (only win) | [Proprietary](https://github.com/w-okada/voice-changer/tree/master/server/voice_changer/Beatrice) |
| [Beatrice v2](https://prj-beatrice.com/) | supported | n/a | [Proprietary](https://huggingface.co/wok000/vcclient_model/blob/main/beatrice_v2_beta/readme.md) |
| [MMVC](https://github.com/isletennos/MMVC_Trainer) | n/a | supported | Please refer to the repository. |
| [so-vits-svc](https://github.com/svc-develop-team/so-vits-svc) | n/a | supported | Please refer to the repository. |
| [DDSP-SVC](https://github.com/yxlllc/DDSP-SVC) | n/a | supported | Please refer to the repository. |
## Supports both standalone and network configurations
Supports voice conversion completed on a local PC as well as voice conversion via network.
By using it over a network, you can offload the voice conversion load externally when using it simultaneously with high-load applications such as games.
![image](https://user-images.githubusercontent.com/48346627/206640768-53f6052d-0a96-403b-a06c-6714a0b7471d.png)
## Compatible with multiple platforms
Windows, Mac(M1), Linux, Google Colab
*1 Please clone the repository for Linux use.
## Provides REST API
Clients can be created in various programming languages.
You can also operate it using HTTP clients built into the OS, such as curl.
## Troubleshoot
[Communication Edition](tutorials/trouble_shoot_communication_ja.md)
## About Developer Signature
This software is not signed by the developer. A warning will appear as shown below, but you can run it by clicking the icon while holding down the control key. This is due to Apple's security policy. Execution is at your own risk.
![image](https://user-images.githubusercontent.com/48346627/212567711-c4a8d599-e24c-4fa3-8145-a5df7211f023.png)
## Acknowledgments
* [Tachizundamon Materials](https://seiga.nicovideo.jp/seiga/im10792934)
* [Irasutoya](https://www.irasutoya.com/)
* [Tsukuyomi-chan](https://tyc.rei-yumesaki.net/)
```
本ソフトウェアの音声合成には、フリー素材キャラクター「つくよみちゃん」が無料公開している音声データを使用しています。
■つくよみちゃんコーパスCV.夢前黎)
https://tyc.rei-yumesaki.net/material/corpus/
© Rei Yumesaki
```
* [Amitaro's Voice Material Workshop](https://amitaro.net/)
* [Replica Doll](https://kikyohiroto1227.wixsite.com/kikoto-utau)
## Terms of Use
* Regarding the real-time voice changer Tsukuyomi-chan, it is prohibited to use the converted voice for the following purposes in accordance with the terms of use of the Tsukuyomi-chan corpus.
```
■人を批判・攻撃すること。(「批判・攻撃」の定義は、つくよみちゃんキャラクターライセンスに準じます)
■特定の政治的立場・宗教・思想への賛同または反対を呼びかけること。
■刺激の強い表現をゾーニングなしで公開すること。
■他者に対して二次利用(素材としての利用)を許可する形で公開すること。
※鑑賞用の作品として配布・販売していただくことは問題ございません。
```
* Regarding the real-time voice changer Amitaro, it complies with the following terms of use of Amitaro's Voice Material Workshop. For details,[here](https://amitaro.net/voice/faq/#index_id6)
```
あみたろの声素材やコーパス読み上げ音声を使って音声モデルを作ったり、ボイスチェンジャーや声質変換などを使用して、自分の声をあみたろの声に変換して使うのもOKです。
ただしその場合は絶対に、あみたろ(もしくは小春音アミ)の声に声質変換していることを明記し、あみたろ(および小春音アミ)が話しているわけではないことが誰でもわかるようにしてください。
また、あみたろの声で話す内容は声素材の利用規約の範囲内のみとし、センシティブな発言などはしないでください。
```
* Regarding the real-time voice changer Koto Mahiro, it complies with the terms of use of Replica Doll. For details,[here](https://kikyohiroto1227.wixsite.com/kikoto-utau/ter%EF%BD%8Ds-of-service)
## Disclaimer
We are not responsible for any direct, indirect, consequential, or special damages arising from the use or inability to use this software.

148
docs_i18n/README_es.md Normal file
View File

@ -0,0 +1,148 @@
[Japonés](/README.md) /
[Inglés](/docs_i18n/README_en.md) /
[Coreano](/docs_i18n/README_ko.md)/
[Chino](/docs_i18n/README_zh.md)/
[Alemán](/docs_i18n/README_de.md)/
[Árabe](/docs_i18n/README_ar.md)/
[Griego](/docs_i18n/README_el.md)/
[Español](/docs_i18n/README_es.md)/
[Francés](/docs_i18n/README_fr.md)/
[Italiano](/docs_i18n/README_it.md)/
[Latín](/docs_i18n/README_la.md)/
[Malayo](/docs_i18n/README_ms.md)/
[Ruso](/docs_i18n/README_ru.md)
*Los idiomas distintos al japonés son traducciones automáticas.
## VCClient
VCClient es un software que utiliza IA para realizar conversión de voz en tiempo real.
## What's New!
* v.2.0.78-beta
* corrección de errores: se evitó el error de carga del modelo RVC
* Ahora es posible ejecutar simultáneamente con la versión 1.x
* Se aumentaron los tamaños de chunk seleccionables
* v.2.0.77-beta (solo para RTX 5090, experimental)
* Soporte para módulos relacionados con RTX 5090 (no verificado ya que el desarrollador no posee RTX 5090)
* v.2.0.76-beta
* nueva característica:
* Beatrice: Implementación de fusión de hablantes
* Beatrice: Cambio de tono automático
* corrección de errores:
* Solución de problemas al seleccionar dispositivos en modo servidor
* v.2.0.73-beta
* nueva característica:
* Descarga del modelo Beatrice editado
* corrección de errores:
* Se corrigió un error donde el pitch y el formante de Beatrice v2 no se reflejaban
* Se corrigió un error donde no se podía crear ONNX para modelos que usan el embedder de Applio
## Descargas y enlaces relacionados
Las versiones para Windows y Mac M1 se pueden descargar desde el repositorio de hugging face.
* [Repositorio de VCClient](https://huggingface.co/wok000/vcclient000/tree/main)
* [Repositorio de Light VCClient para Beatrice v2](https://huggingface.co/wok000/light_vcclient_beatrice/tree/main)
*1 Para Linux, clone el repositorio para su uso.
### Enlaces relacionados
* [Repositorio de código de entrenamiento de Beatrice V2](https://huggingface.co/fierce-cats/beatrice-trainer)
* [Versión Colab del código de entrenamiento de Beatrice V2](https://github.com/w-okada/beatrice-trainer-colab)
### Software relacionado
* [Cambiador de voz en tiempo real VCClient](https://github.com/w-okada/voice-changer)
* [Software de lectura TTSClient](https://github.com/w-okada/ttsclient)
* [Software de reconocimiento de voz en tiempo real ASRClient](https://github.com/w-okada/asrclient)
## Características de VC Client
## Soporta diversos modelos de IA
| Modelos de IA | v.2 | v.1 | Licencia |
| ------------------------------------------------------------------------------------------------------------ | --------- | -------------------- | ------------------------------------------------------------------------------------------ |
| [RVC ](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/main/docs/jp/README.ja.md) | soportado | soportado | Consulte el repositorio. |
| [Beatrice v1](https://prj-beatrice.com/) | n/a | soportado (solo win) | [Propio](https://github.com/w-okada/voice-changer/tree/master/server/voice_changer/Beatrice) |
| [Beatrice v2](https://prj-beatrice.com/) | soportado | n/a | [Propio](https://huggingface.co/wok000/vcclient_model/blob/main/beatrice_v2_beta/readme.md) |
| [MMVC](https://github.com/isletennos/MMVC_Trainer) | n/a | soportado | Consulte el repositorio. |
| [so-vits-svc](https://github.com/svc-develop-team/so-vits-svc) | n/a | soportado | Consulte el repositorio. |
| [DDSP-SVC](https://github.com/yxlllc/DDSP-SVC) | n/a | soportado | Consulte el repositorio. |
## Soporta configuraciones tanto autónomas como a través de la red
Soporta tanto la conversión de voz completada en una PC local como la conversión de voz a través de la red.
Al utilizarlo a través de la red, puede descargar la carga de conversión de voz externamente cuando se usa simultáneamente con aplicaciones de alta carga como juegos.
![image](https://user-images.githubusercontent.com/48346627/206640768-53f6052d-0a96-403b-a06c-6714a0b7471d.png)
## Compatible con múltiples plataformas
Windows, Mac(M1), Linux, Google Colab
*1 Para Linux, clone el repositorio para su uso.
## Proporciona API REST
Puede crear clientes en varios lenguajes de programación.
Además, puede operar usando clientes HTTP integrados en el sistema operativo como curl.
## Solución de problemas
[Sección de comunicación](tutorials/trouble_shoot_communication_ja.md)
## Sobre la firma del desarrollador
Este software no está firmado por el desarrollador. Aunque aparece una advertencia como se muestra a continuación, puede ejecutarlo haciendo clic en el icono mientras mantiene presionada la tecla de control. Esto se debe a la política de seguridad de Apple. La ejecución es bajo su propio riesgo.
![image](https://user-images.githubusercontent.com/48346627/212567711-c4a8d599-e24c-4fa3-8145-a5df7211f023.png)
## Agradecimientos
* [Material de Tachi Zundamon](https://seiga.nicovideo.jp/seiga/im10792934)
* [Ilustraciones de Irasutoya](https://www.irasutoya.com/)
* [Tsukuyomi-chan](https://tyc.rei-yumesaki.net/)
```
本ソフトウェアの音声合成には、フリー素材キャラクター「つくよみちゃん」が無料公開している音声データを使用しています。
■つくよみちゃんコーパスCV.夢前黎)
https://tyc.rei-yumesaki.net/material/corpus/
© Rei Yumesaki
```
* [Taller de voz de Amitaro](https://amitaro.net/)
* [Replikador](https://kikyohiroto1227.wixsite.com/kikoto-utau)
## Términos de uso
* En cuanto a Tsukuyomi-chan, el cambiador de voz en tiempo real, está prohibido usar la voz convertida para los siguientes propósitos, de acuerdo con los términos de uso del corpus de Tsukuyomi-chan.
```
■人を批判・攻撃すること。(「批判・攻撃」の定義は、つくよみちゃんキャラクターライセンスに準じます)
■特定の政治的立場・宗教・思想への賛同または反対を呼びかけること。
■刺激の強い表現をゾーニングなしで公開すること。
■他者に対して二次利用(素材としての利用)を許可する形で公開すること。
※鑑賞用の作品として配布・販売していただくことは問題ございません。
```
* En cuanto a Amitaro, el cambiador de voz en tiempo real, se adhiere a los siguientes términos de uso del Taller de voz de Amitaro. Para más detalles, [aquí](https://amitaro.net/voice/faq/#index_id6)
```
あみたろの声素材やコーパス読み上げ音声を使って音声モデルを作ったり、ボイスチェンジャーや声質変換などを使用して、自分の声をあみたろの声に変換して使うのもOKです。
ただしその場合は絶対に、あみたろ(もしくは小春音アミ)の声に声質変換していることを明記し、あみたろ(および小春音アミ)が話しているわけではないことが誰でもわかるようにしてください。
また、あみたろの声で話す内容は声素材の利用規約の範囲内のみとし、センシティブな発言などはしないでください。
```
* En cuanto a Koto Mahiro, el cambiador de voz en tiempo real, se adhiere a los términos de uso de Replikador. Para más detalles, [aquí](https://kikyohiroto1227.wixsite.com/kikoto-utau/ter%EF%BD%8Ds-of-service)
## Descargo de responsabilidad
No nos hacemos responsables de ningún daño directo, indirecto, consecuente, resultante o especial que surja del uso o la imposibilidad de uso de este software.

148
docs_i18n/README_fr.md Normal file
View File

@ -0,0 +1,148 @@
[Japonais](/README.md) /
[Anglais](/docs_i18n/README_en.md) /
[Coréen](/docs_i18n/README_ko.md)/
[Chinois](/docs_i18n/README_zh.md)/
[Allemand](/docs_i18n/README_de.md)/
[Arabe](/docs_i18n/README_ar.md)/
[Grec](/docs_i18n/README_el.md)/
[Espagnol](/docs_i18n/README_es.md)/
[Français](/docs_i18n/README_fr.md)/
[Italien](/docs_i18n/README_it.md)/
[Latin](/docs_i18n/README_la.md)/
[Malais](/docs_i18n/README_ms.md)/
[Russe](/docs_i18n/README_ru.md)
*Les langues autres que le japonais sont traduites automatiquement.
## VCClient
VCClient est un logiciel qui utilise l'IA pour effectuer une conversion vocale en temps réel.
## What's New!
* v.2.0.78-beta
* correction de bug : évitement de l'erreur de téléchargement du modèle RVC
* Il est désormais possible de lancer simultanément avec la version 1.x
* Augmentation des tailles de chunk sélectionnables
* v.2.0.77-beta (uniquement pour RTX 5090, expérimental)
* Support des modules liés à RTX 5090 (non vérifié car le développeur ne possède pas de RTX 5090)
* v.2.0.76-beta
* nouvelle fonctionnalité :
* Beatrice : Implémentation de la fusion des locuteurs
* Beatrice : Pitch shift automatique
* correction de bug :
* Correction d'un problème lors de la sélection de l'appareil en mode serveur
* v.2.0.73-beta
* nouvelle fonctionnalité :
* Téléchargement du modèle Beatrice modifié
* correction de bug :
* Correction du bug où le pitch et le formant de Beatrice v2 n'étaient pas appliqués
* Correction du bug empêchant la création de l'ONNX pour les modèles utilisant l'embedder d'Applio
## Téléchargement et liens associés
Les versions Windows et Mac M1 peuvent être téléchargées depuis le référentiel hugging face.
* [Référentiel de VCClient](https://huggingface.co/wok000/vcclient000/tree/main)
* [Référentiel de Light VCClient pour Beatrice v2](https://huggingface.co/wok000/light_vcclient_beatrice/tree/main)
*1 Pour Linux, veuillez cloner le référentiel pour l'utiliser.
### Liens associés
* [Référentiel de code d'entraînement Beatrice V2](https://huggingface.co/fierce-cats/beatrice-trainer)
* [Version Colab du code d'entraînement Beatrice V2](https://github.com/w-okada/beatrice-trainer-colab)
### Logiciels associés
* [Changeur de voix en temps réel VCClient](https://github.com/w-okada/voice-changer)
* [Logiciel de synthèse vocale TTSClient](https://github.com/w-okada/ttsclient)
* [Logiciel de reconnaissance vocale en temps réel ASRClient](https://github.com/w-okada/asrclient)
## Caractéristiques de VC Client
## Prend en charge divers modèles d'IA
| Modèle d'IA | v.2 | v.1 | Licence |
| ------------------------------------------------------------------------------------------------------------ | --------- | -------------------- | ------------------------------------------------------------------------------------------ |
| [RVC ](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/main/docs/jp/README.ja.md) | pris en charge | pris en charge | Veuillez consulter le référentiel. |
| [Beatrice v1](https://prj-beatrice.com/) | n/a | pris en charge (uniquement Windows) | [Propriétaire](https://github.com/w-okada/voice-changer/tree/master/server/voice_changer/Beatrice) |
| [Beatrice v2](https://prj-beatrice.com/) | pris en charge | n/a | [Propriétaire](https://huggingface.co/wok000/vcclient_model/blob/main/beatrice_v2_beta/readme.md) |
| [MMVC](https://github.com/isletennos/MMVC_Trainer) | n/a | pris en charge | Veuillez consulter le référentiel. |
| [so-vits-svc](https://github.com/svc-develop-team/so-vits-svc) | n/a | pris en charge | Veuillez consulter le référentiel. |
| [DDSP-SVC](https://github.com/yxlllc/DDSP-SVC) | n/a | pris en charge | Veuillez consulter le référentiel. |
## Prend en charge les configurations autonomes et via réseau
Prend en charge la conversion vocale entièrement sur PC local ainsi que via réseau.
En utilisant via réseau, la charge de conversion vocale peut être déportée à l'extérieur lors de l'utilisation simultanée avec des applications à forte charge comme les jeux.
![image](https://user-images.githubusercontent.com/48346627/206640768-53f6052d-0a96-403b-a06c-6714a0b7471d.png)
## Compatible avec plusieurs plateformes
Windows, Mac(M1), Linux, Google Colab
*1 Pour Linux, veuillez cloner le référentiel pour l'utiliser.
## Fournit une API REST
Vous pouvez créer des clients dans divers langages de programmation.
Vous pouvez également utiliser des clients HTTP intégrés au système d'exploitation comme curl pour les opérations.
## Dépannage
[Communication](tutorials/trouble_shoot_communication_ja.md)
## À propos de la signature du développeur
Ce logiciel n'est pas signé par le développeur. Un avertissement s'affiche comme ci-dessous, mais vous pouvez l'exécuter en cliquant sur l'icône tout en maintenant la touche Contrôle. Ceci est dû à la politique de sécurité d'Apple. L'exécution est à vos propres risques.
![image](https://user-images.githubusercontent.com/48346627/212567711-c4a8d599-e24c-4fa3-8145-a5df7211f023.png)
## Remerciements
* [Matériel de Tachi Zundamon](https://seiga.nicovideo.jp/seiga/im10792934)
* [Irasutoya](https://www.irasutoya.com/)
* [Tsukuyomi-chan](https://tyc.rei-yumesaki.net/)
```
本ソフトウェアの音声合成には、フリー素材キャラクター「つくよみちゃん」が無料公開している音声データを使用しています。
■つくよみちゃんコーパスCV.夢前黎)
https://tyc.rei-yumesaki.net/material/corpus/
© Rei Yumesaki
```
* [Atelier de voix d'Amitaro](https://amitaro.net/)
* [Replika Doll](https://kikyohiroto1227.wixsite.com/kikoto-utau)
## Conditions d'utilisation
* En ce qui concerne le changeur de voix en temps réel Tsukuyomi-chan, l'utilisation de la voix convertie est interdite aux fins suivantes, conformément aux conditions d'utilisation du corpus Tsukuyomi-chan.
```
■人を批判・攻撃すること。(「批判・攻撃」の定義は、つくよみちゃんキャラクターライセンスに準じます)
■特定の政治的立場・宗教・思想への賛同または反対を呼びかけること。
■刺激の強い表現をゾーニングなしで公開すること。
■他者に対して二次利用(素材としての利用)を許可する形で公開すること。
※鑑賞用の作品として配布・販売していただくことは問題ございません。
```
* En ce qui concerne le changeur de voix en temps réel Amitaro, il est conforme aux conditions d'utilisation de l'atelier de voix d'Amitaro. Pour plus de détails, [ici](https://amitaro.net/voice/faq/#index_id6)
```
あみたろの声素材やコーパス読み上げ音声を使って音声モデルを作ったり、ボイスチェンジャーや声質変換などを使用して、自分の声をあみたろの声に変換して使うのもOKです。
ただしその場合は絶対に、あみたろ(もしくは小春音アミ)の声に声質変換していることを明記し、あみたろ(および小春音アミ)が話しているわけではないことが誰でもわかるようにしてください。
また、あみたろの声で話す内容は声素材の利用規約の範囲内のみとし、センシティブな発言などはしないでください。
```
* En ce qui concerne le changeur de voix en temps réel Koto Mahiro, il est conforme aux conditions d'utilisation de Replika Doll. Pour plus de détails, [ici](https://kikyohiroto1227.wixsite.com/kikoto-utau/ter%EF%BD%8Ds-of-service)
## Clause de non-responsabilité
Nous déclinons toute responsabilité pour tout dommage direct, indirect, consécutif, résultant ou spécial causé par l'utilisation ou l'incapacité d'utiliser ce logiciel.

148
docs_i18n/README_it.md Normal file
View File

@ -0,0 +1,148 @@
[Giapponese](/README.md) /
[Inglese](/docs_i18n/README_en.md) /
[Coreano](/docs_i18n/README_ko.md)/
[Cinese](/docs_i18n/README_zh.md)/
[Tedesco](/docs_i18n/README_de.md)/
[Arabo](/docs_i18n/README_ar.md)/
[Greco](/docs_i18n/README_el.md)/
[Spagnolo](/docs_i18n/README_es.md)/
[Francese](/docs_i18n/README_fr.md)/
[Italiano](/docs_i18n/README_it.md)/
[Latino](/docs_i18n/README_la.md)/
[Malese](/docs_i18n/README_ms.md)/
[Russo](/docs_i18n/README_ru.md)
*Le lingue diverse dal giapponese sono tradotte automaticamente.
## VCClient
VCClient è un software che utilizza l'IA per la conversione vocale in tempo reale.
## What's New!
* v.2.0.78-beta
* correzione bug: evitato errore di upload del modello RVC
* Ora è possibile l'avvio simultaneo con la versione 1.x
* Aumentate le dimensioni dei chunk selezionabili
* v.2.0.77-beta (solo per RTX 5090, sperimentale)
* Supporto per moduli relativi a RTX 5090 (non verificato poiché lo sviluppatore non possiede RTX 5090)
* v.2.0.76-beta
* nuova funzionalità:
* Beatrice: Implementazione della fusione degli speaker
* Beatrice: Auto pitch shift
* correzione bug:
* Risolto il problema nella selezione del dispositivo in modalità server
* v.2.0.73-beta
* nuova funzionalità:
* Download del modello beatrice modificato
* correzione bug:
* Corretto un bug per cui pitch e formant di beatrice v2 non venivano applicati
* Corretto un bug per cui non era possibile creare ONNX per i modelli che utilizzano l'embedder di Applio
## Download e link correlati
Le versioni per Windows e Mac M1 possono essere scaricate dal repository di hugging face.
* [Repository di VCClient](https://huggingface.co/wok000/vcclient000/tree/main)
* [Repository di Light VCClient per Beatrice v2](https://huggingface.co/wok000/light_vcclient_beatrice/tree/main)
*1 Per Linux, clona il repository per l'uso.
### Link correlati
* [Repository del codice di allenamento Beatrice V2](https://huggingface.co/fierce-cats/beatrice-trainer)
* [Versione Colab del codice di allenamento Beatrice V2](https://github.com/w-okada/beatrice-trainer-colab)
### Software correlato
* [Cambiavoce in tempo reale VCClient](https://github.com/w-okada/voice-changer)
* [Software di sintesi vocale TTSClient](https://github.com/w-okada/ttsclient)
* [Software di riconoscimento vocale in tempo reale ASRClient](https://github.com/w-okada/asrclient)
## Caratteristiche di VC Client
## Supporta vari modelli di IA
| Modello di IA | v.2 | v.1 | Licenza |
| ------------------------------------------------------------------------------------------------------------ | --------- | -------------------- | ------------------------------------------------------------------------------------------ |
| [RVC ](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/main/docs/jp/README.ja.md) | supportato | supportato | Si prega di consultare il repository. |
| [Beatrice v1](https://prj-beatrice.com/) | n/a | supportato (solo win) | [Proprietario](https://github.com/w-okada/voice-changer/tree/master/server/voice_changer/Beatrice) |
| [Beatrice v2](https://prj-beatrice.com/) | supportato | n/a | [Proprietario](https://huggingface.co/wok000/vcclient_model/blob/main/beatrice_v2_beta/readme.md) |
| [MMVC](https://github.com/isletennos/MMVC_Trainer) | n/a | supportato | Si prega di consultare il repository. |
| [so-vits-svc](https://github.com/svc-develop-team/so-vits-svc) | n/a | supportato | Si prega di consultare il repository. |
| [DDSP-SVC](https://github.com/yxlllc/DDSP-SVC) | n/a | supportato | Si prega di consultare il repository. |
## Supporta sia la configurazione standalone che tramite rete
Supporta sia la conversione vocale completata su PC locale che tramite rete.
Utilizzando tramite rete, è possibile scaricare il carico della conversione vocale su un dispositivo esterno quando si utilizzano applicazioni ad alto carico come i giochi.
![image](https://user-images.githubusercontent.com/48346627/206640768-53f6052d-0a96-403b-a06c-6714a0b7471d.png)
## Compatibile con più piattaforme
Windows, Mac(M1), Linux, Google Colab
*1 Per Linux, clona il repository per l'uso.
## Fornisce un'API REST
È possibile creare client in vari linguaggi di programmazione.
È inoltre possibile operare utilizzando client HTTP incorporati nel sistema operativo come curl.
## Risoluzione dei problemi
[Sezione comunicazione](tutorials/trouble_shoot_communication_ja.md)
## Informazioni sulla firma dello sviluppatore
Questo software non è firmato dallo sviluppatore. Anche se viene visualizzato un avviso come di seguito, è possibile eseguirlo facendo clic sull'icona tenendo premuto il tasto di controllo. Questo è dovuto alla politica di sicurezza di Apple. L'esecuzione è a proprio rischio.
![image](https://user-images.githubusercontent.com/48346627/212567711-c4a8d599-e24c-4fa3-8145-a5df7211f023.png)
## Ringraziamenti
* [Materiale di Tachi Zundamon](https://seiga.nicovideo.jp/seiga/im10792934)
* [Irasutoya](https://www.irasutoya.com/)
* [Tsukuyomi-chan](https://tyc.rei-yumesaki.net/)
```
本ソフトウェアの音声合成には、フリー素材キャラクター「つくよみちゃん」が無料公開している音声データを使用しています。
■つくよみちゃんコーパスCV.夢前黎)
https://tyc.rei-yumesaki.net/material/corpus/
© Rei Yumesaki
```
* [Atelier di materiali vocali di Amitaro](https://amitaro.net/)
* [Replica Doll](https://kikyohiroto1227.wixsite.com/kikoto-utau)
## Termini di utilizzo
* Per quanto riguarda il cambiavoce in tempo reale Tsukuyomi-chan, è vietato utilizzare la voce convertita per i seguenti scopi in conformità con i termini di utilizzo del corpus di Tsukuyomi-chan.
```
■人を批判・攻撃すること。(「批判・攻撃」の定義は、つくよみちゃんキャラクターライセンスに準じます)
■特定の政治的立場・宗教・思想への賛同または反対を呼びかけること。
■刺激の強い表現をゾーニングなしで公開すること。
■他者に対して二次利用(素材としての利用)を許可する形で公開すること。
※鑑賞用の作品として配布・販売していただくことは問題ございません。
```
* Per quanto riguarda il cambiavoce in tempo reale Amitaro, si applicano i seguenti termini di utilizzo dell'Atelier di materiali vocali di Amitaro. Per dettagli, [qui](https://amitaro.net/voice/faq/#index_id6)
```
あみたろの声素材やコーパス読み上げ音声を使って音声モデルを作ったり、ボイスチェンジャーや声質変換などを使用して、自分の声をあみたろの声に変換して使うのもOKです。
ただしその場合は絶対に、あみたろ(もしくは小春音アミ)の声に声質変換していることを明記し、あみたろ(および小春音アミ)が話しているわけではないことが誰でもわかるようにしてください。
また、あみたろの声で話す内容は声素材の利用規約の範囲内のみとし、センシティブな発言などはしないでください。
```
* Per quanto riguarda il cambiavoce in tempo reale Koto Mahiro, si applicano i termini di utilizzo di Replica Doll. Per dettagli, [qui](https://kikyohiroto1227.wixsite.com/kikoto-utau/ter%EF%BD%8Ds-of-service)
## Clausola di esclusione della responsabilità
Non ci assumiamo alcuna responsabilità per eventuali danni diretti, indiretti, consequenziali, risultanti o speciali derivanti dall'uso o dall'impossibilità di utilizzare questo software.

148
docs_i18n/README_ja.md Normal file
View File

@ -0,0 +1,148 @@
[日本語](/README.md) /
[英語](/docs_i18n/README_en.md) /
[韓国語](/docs_i18n/README_ko.md)/
[中国語](/docs_i18n/README_zh.md)/
[ドイツ語](/docs_i18n/README_de.md)/
[アラビア語](/docs_i18n/README_ar.md)/
[ギリシャ語](/docs_i18n/README_el.md)/
[スペイン語](/docs_i18n/README_es.md)/
[フランス語](/docs_i18n/README_fr.md)/
[イタリア語](/docs_i18n/README_it.md)/
[ラテン語](/docs_i18n/README_la.md)/
[マレー語](/docs_i18n/README_ms.md)/
[ロシア語](/docs_i18n/README_ru.md)
*日本語以外は機械翻訳です。
## VCClient
VCClientは、AIを用いてリアルタイム音声変換を行うソフトウェアです。
## What's New!
* v.2.0.78-beta
* bugfix: RVCモデルのアップロードエラーを回避
* ver.1.x との同時起動ができるようになりました。
* 選択できるchunk sizeを増やしました。
* v.2.0.77-beta (only for RTX 5090, experimental)
* 関連モジュールを5090対応 (開発者がRTX5090未所持のため、動作未検証)
* v.2.0.76-beta
* new feature:
* Beatrice: 話者マージの実装
* Beatrice: オートピッチシフト
* bugfix:
* サーバモードのデバイス選択時の不具合対応
* v.2.0.73-beta
* new feature:
* 編集したbeatrice modelのダウンロード
* bugfix:
* beatrice v2 のpitch, formantが反映されないバグを修正
* Applio のembedderを使用しているモデルのONNXができないバグを修正
## ダウンロードと関連リンク
Windows版、 M1 Mac版はhugging faceのリポジトリからダウンロードできます。
* [VCClient のリポジトリ](https://huggingface.co/wok000/vcclient000/tree/main)
* [Light VCClient for Beatrice v2 のリポジトリ](https://huggingface.co/wok000/light_vcclient_beatrice/tree/main)
*1 Linuxはリポジトリをcloneしてお使いください。
### 関連リンク
* [Beatrice V2 トレーニングコードのリポジトリ](https://huggingface.co/fierce-cats/beatrice-trainer)
* [Beatrice V2 トレーニングコード Colab版](https://github.com/w-okada/beatrice-trainer-colab)
### 関連ソフトウェア
* [リアルタイムボイスチェンジャ VCClient](https://github.com/w-okada/voice-changer)
* [読み上げソフトウェア TTSClient](https://github.com/w-okada/ttsclient)
* [リアルタイム音声認識ソフトウェア ASRClient](https://github.com/w-okada/asrclient)
## VC Clientの特徴
## 多様なAIモデルをサポート
| AIモデル | v.2 | v.1 | ライセンス |
| ------------------------------------------------------------------------------------------------------------ | --------- | -------------------- | ------------------------------------------------------------------------------------------ |
| [RVC ](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/main/docs/jp/README.ja.md) | supported | supported | リポジトリを参照してください。 |
| [Beatrice v1](https://prj-beatrice.com/) | n/a | supported (only win) | [独自](https://github.com/w-okada/voice-changer/tree/master/server/voice_changer/Beatrice) |
| [Beatrice v2](https://prj-beatrice.com/) | supported | n/a | [独自](https://huggingface.co/wok000/vcclient_model/blob/main/beatrice_v2_beta/readme.md) |
| [MMVC](https://github.com/isletennos/MMVC_Trainer) | n/a | supported | リポジトリを参照してください。 |
| [so-vits-svc](https://github.com/svc-develop-team/so-vits-svc) | n/a | supported | リポジトリを参照してください。 |
| [DDSP-SVC](https://github.com/yxlllc/DDSP-SVC) | n/a | supported | リポジトリを参照してください。 |
## スタンドアロン、ネットワーク経由の両構成をサポート
ローカルPCで完結した音声変換も、ネットワークを介した音声変換もサポートしています。
ネットワークを介した利用を行うことで、ゲームなどの高負荷なアプリケーションと同時に使用する場合に音声変換の負荷を外部にオフロードすることができます。
![image](https://user-images.githubusercontent.com/48346627/206640768-53f6052d-0a96-403b-a06c-6714a0b7471d.png)
## 複数プラットフォームに対応
Windows, Mac(M1), Linux, Google Colab
*1 Linuxはリポジトリをcloneしてお使いください。
## REST APIを提供
各種プログラミング言語でクライアントを作成することができます。
また、curlなどのOSに組み込まれているHTTPクライアントを使って操作ができます。
## トラブルシュート
[通信編](tutorials/trouble_shoot_communication_ja.md)
## 開発者の署名について
本ソフトウェアは開発元の署名しておりません。下記のように警告が出ますが、コントロールキーを押しながらアイコンをクリックすると実行できるようになります。これは Apple のセキュリティポリシーによるものです。実行は自己責任となります。
![image](https://user-images.githubusercontent.com/48346627/212567711-c4a8d599-e24c-4fa3-8145-a5df7211f023.png)
## Acknowledgments
* [立ちずんだもん素材](https://seiga.nicovideo.jp/seiga/im10792934)
* [いらすとや](https://www.irasutoya.com/)
* [つくよみちゃん](https://tyc.rei-yumesaki.net/)
```
本ソフトウェアの音声合成には、フリー素材キャラクター「つくよみちゃん」が無料公開している音声データを使用しています。
■つくよみちゃんコーパスCV.夢前黎)
https://tyc.rei-yumesaki.net/material/corpus/
© Rei Yumesaki
```
* [あみたろの声素材工房](https://amitaro.net/)
* [れぷりかどーる](https://kikyohiroto1227.wixsite.com/kikoto-utau)
## 利用規約
* リアルタイムボイスチェンジャーつくよみちゃんについては、つくよみちゃんコーパスの利用規約に準じ、次の目的で変換後の音声を使用することを禁止します。
```
■人を批判・攻撃すること。(「批判・攻撃」の定義は、つくよみちゃんキャラクターライセンスに準じます)
■特定の政治的立場・宗教・思想への賛同または反対を呼びかけること。
■刺激の強い表現をゾーニングなしで公開すること。
■他者に対して二次利用(素材としての利用)を許可する形で公開すること。
※鑑賞用の作品として配布・販売していただくことは問題ございません。
```
* リアルタイムボイスチェンジャーあみたろについては、あみたろの声素材工房様の次の利用規約に準じます。詳細は[こちら](https://amitaro.net/voice/faq/#index_id6)
```
あみたろの声素材やコーパス読み上げ音声を使って音声モデルを作ったり、ボイスチェンジャーや声質変換などを使用して、自分の声をあみたろの声に変換して使うのもOKです。
ただしその場合は絶対に、あみたろ(もしくは小春音アミ)の声に声質変換していることを明記し、あみたろ(および小春音アミ)が話しているわけではないことが誰でもわかるようにしてください。
また、あみたろの声で話す内容は声素材の利用規約の範囲内のみとし、センシティブな発言などはしないでください。
```
* リアルタイムボイスチェンジャー黄琴まひろについては、れぷりかどーるの利用規約に準じます。詳細は[こちら](https://kikyohiroto1227.wixsite.com/kikoto-utau/ter%EF%BD%8Ds-of-service)
## 免責事項
本ソフトウェアの使用または使用不能により生じたいかなる直接損害・間接損害・波及的損害・結果的損害 または特別損害についても、一切責任を負いません。

148
docs_i18n/README_ko.md Normal file
View File

@ -0,0 +1,148 @@
[일본어](/README.md) /
[영어](/docs_i18n/README_en.md) /
[한국어](/docs_i18n/README_ko.md)/
[중국어](/docs_i18n/README_zh.md)/
[독일어](/docs_i18n/README_de.md)/
[아랍어](/docs_i18n/README_ar.md)/
[그리스어](/docs_i18n/README_el.md)/
[스페인어](/docs_i18n/README_es.md)/
[프랑스어](/docs_i18n/README_fr.md)/
[이탈리아어](/docs_i18n/README_it.md)/
[라틴어](/docs_i18n/README_la.md)/
[말레이어](/docs_i18n/README_ms.md)/
[러시아어](/docs_i18n/README_ru.md)
*일본어 외에는 기계 번역입니다.
## VCClient
VCClient는 AI를 사용하여 실시간 음성 변환을 수행하는 소프트웨어입니다.
## What's New!
* v.2.0.78-beta
* 버그 수정: RVC 모델 업로드 오류 회피
* ver.1.x와 동시에 실행 가능해졌습니다.
* 선택 가능한 chunk size를 늘렸습니다.
* v.2.0.77-beta (RTX 5090 전용, 실험적)
* RTX 5090 관련 모듈 지원 (개발자가 RTX 5090을 보유하지 않아 검증되지 않음)
* v.2.0.76-beta
* new feature:
* Beatrice: 화자 병합 구현
* Beatrice: 자동 피치 시프트
* bugfix:
* 서버 모드에서 장치 선택 시의 문제 해결
* v.2.0.73-beta
* new feature:
* 편집한 beatrice 모델 다운로드
* bugfix:
* beatrice v2의 pitch, formant가 반영되지 않는 버그를 수정
* Applio의 embedder를 사용하고 있는 모델의 ONNX가 생성되지 않는 버그를 수정
## 다운로드 및 관련 링크
Windows 버전, M1 Mac 버전은 hugging face의 리포지토리에서 다운로드할 수 있습니다.
* [VCClient의 리포지토리](https://huggingface.co/wok000/vcclient000/tree/main)
* [Light VCClient for Beatrice v2의 리포지토리](https://huggingface.co/wok000/light_vcclient_beatrice/tree/main)
*1 Linux는 리포지토리를 클론하여 사용하세요.
### 관련 링크
* [Beatrice V2 트레이닝 코드의 리포지토리](https://huggingface.co/fierce-cats/beatrice-trainer)
* [Beatrice V2 트레이닝 코드 Colab 버전](https://github.com/w-okada/beatrice-trainer-colab)
### 관련 소프트웨어
* [실시간 보이스 체인저 VCClient](https://github.com/w-okada/voice-changer)
* [읽기 소프트웨어 TTSClient](https://github.com/w-okada/ttsclient)
* [실시간 음성 인식 소프트웨어 ASRClient](https://github.com/w-okada/asrclient)
## VC Client의 특징
## 다양한 AI 모델을 지원
| AI 모델 | v.2 | v.1 | 라이선스 |
| ------------------------------------------------------------------------------------------------------------ | --------- | -------------------- | ------------------------------------------------------------------------------------------ |
| [RVC ](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/main/docs/jp/README.ja.md) | supported | supported | 리포지토리를 참조하세요. |
| [Beatrice v1](https://prj-beatrice.com/) | n/a | supported (only win) | [독자](https://github.com/w-okada/voice-changer/tree/master/server/voice_changer/Beatrice) |
| [Beatrice v2](https://prj-beatrice.com/) | supported | n/a | [독자](https://huggingface.co/wok000/vcclient_model/blob/main/beatrice_v2_beta/readme.md) |
| [MMVC](https://github.com/isletennos/MMVC_Trainer) | n/a | supported | 리포지토리를 참조하세요. |
| [so-vits-svc](https://github.com/svc-develop-team/so-vits-svc) | n/a | supported | 리포지토리를 참조하세요. |
| [DDSP-SVC](https://github.com/yxlllc/DDSP-SVC) | n/a | supported | 리포지토리를 참조하세요. |
## 독립형, 네트워크 경유의 두 가지 구성을 지원
로컬 PC에서 완료된 음성 변환과 네트워크를 통한 음성 변환을 지원합니다.
네트워크를 통해 사용하면 게임 등 고부하 애플리케이션과 동시에 사용할 때 음성 변환의 부하를 외부로 오프로드할 수 있습니다.
![image](https://user-images.githubusercontent.com/48346627/206640768-53f6052d-0a96-403b-a06c-6714a0b7471d.png)
## 다중 플랫폼에 대응
Windows, Mac(M1), Linux, Google Colab
*1 Linux는 리포지토리를 클론하여 사용하세요.
## REST API를 제공
각종 프로그래밍 언어로 클라이언트를 만들 수 있습니다.
또한, curl 등 OS에 내장된 HTTP 클라이언트를 사용하여 조작할 수 있습니다.
## 문제 해결
[통신 편](tutorials/trouble_shoot_communication_ja.md)
## 개발자의 서명에 대해
이 소프트웨어는 개발자의 서명이 되어 있지 않습니다. 아래와 같은 경고가 나오지만, 컨트롤 키를 누른 상태에서 아이콘을 클릭하면 실행할 수 있습니다. 이는 Apple의 보안 정책에 따른 것입니다. 실행은 본인의 책임입니다.
![image](https://user-images.githubusercontent.com/48346627/212567711-c4a8d599-e24c-4fa3-8145-a5df7211f023.png)
## Acknowledgments
* [타치준다몬 소재](https://seiga.nicovideo.jp/seiga/im10792934)
* [일러스트야](https://www.irasutoya.com/)
* [츠쿠요미짱](https://tyc.rei-yumesaki.net/)
```
本ソフトウェアの音声合成には、フリー素材キャラクター「つくよみちゃん」が無料公開している音声データを使用しています。
■つくよみちゃんコーパスCV.夢前黎)
https://tyc.rei-yumesaki.net/material/corpus/
© Rei Yumesaki
```
* [아미타로의 목소리 소재 공방](https://amitaro.net/)
* [레플리카돌](https://kikyohiroto1227.wixsite.com/kikoto-utau)
## 이용 약관
* 실시간 보이스 체인저 츠쿠요미짱에 대해서는 츠쿠요미짱 코퍼스의 이용 약관에 따라 다음 목적에서 변환 후 음성을 사용하는 것을 금지합니다.
```
■人を批判・攻撃すること。(「批判・攻撃」の定義は、つくよみちゃんキャラクターライセンスに準じます)
■特定の政治的立場・宗教・思想への賛同または反対を呼びかけること。
■刺激の強い表現をゾーニングなしで公開すること。
■他者に対して二次利用(素材としての利用)を許可する形で公開すること。
※鑑賞用の作品として配布・販売していただくことは問題ございません。
```
* 실시간 보이스 체인저 아미타로에 대해서는 아미타로の목소리 소재 공방의 다음 이용 약관에 따릅니다. 자세한 내용은[여기](https://amitaro.net/voice/faq/#index_id6)
```
あみたろの声素材やコーパス読み上げ音声を使って音声モデルを作ったり、ボイスチェンジャーや声質変換などを使用して、自分の声をあみたろの声に変換して使うのもOKです。
ただしその場合は絶対に、あみたろ(もしくは小春音アミ)の声に声質変換していることを明記し、あみたろ(および小春音アミ)が話しているわけではないことが誰でもわかるようにしてください。
また、あみたろの声で話す内容は声素材の利用規約の範囲内のみとし、センシティブな発言などはしないでください。
```
* 실시간 보이스 체인저 황금 마히로에 대해서는 레플리카돌의 이용 약관에 따릅니다. 자세한 내용은[여기](https://kikyohiroto1227.wixsite.com/kikoto-utau/ter%EF%BD%8Ds-of-service)
## 면책 조항
이 소프트웨어의 사용 또는 사용 불가으로 인해 발생한 어떠한 직접 손해, 간접 손해, 파급적 손해, 결과적 손해 또는 특별 손해에 대해서도 일체 책임을 지지 않습니다.

148
docs_i18n/README_la.md Normal file
View File

@ -0,0 +1,148 @@
[Lingua Iaponica](/README.md) /
[Lingua Anglica](/docs_i18n/README_en.md) /
[Lingua Coreana](/docs_i18n/README_ko.md)/
[Lingua Sinica](/docs_i18n/README_zh.md)/
[Lingua Theodisca](/docs_i18n/README_de.md)/
[Lingua Arabica](/docs_i18n/README_ar.md)/
[Lingua Graeca](/docs_i18n/README_el.md)/
[Lingua Hispanica](/docs_i18n/README_es.md)/
[Lingua Francogallica](/docs_i18n/README_fr.md)/
[Lingua Italica](/docs_i18n/README_it.md)/
[Lingua Latina](/docs_i18n/README_la.md)/
[Lingua Malaica](/docs_i18n/README_ms.md)/
[Lingua Russica](/docs_i18n/README_ru.md)
*Praeter linguam Iaponicam, omnes linguae sunt a machina translatae.
## VCClient
VCClient est software quod conversionem vocis in tempore reali per AI facit.
## What's New!
* v.2.0.78-beta
* bugfix: error sublationis RVC exemplaris vitata est
* Nunc simul cum versione 1.x incipere potes
* Auctae sunt chunk magnitudines eligibiles
* v.2.0.77-beta (solum pro RTX 5090, experimentale)
* Auxilium pro modulis RTX 5090 relatis (non verificatum quia auctor non habet RTX 5090)
* v.2.0.76-beta
* nova functio:
* Beatrice: Implementatio coniunctionis loquentium
* Beatrice: Automatica mutatio toni
* bugfix:
* Solutio problematum in delectu machinae in modo servientis
* v.2.0.73-beta
* nova functio:
* Download model Beatrice editum
* bugfix:
* Correctus error ubi pitch et formant Beatrice v2 non reflectuntur
* Correctus error ubi ONNX non potest fieri pro modelis utentibus embedder Applio
## Download et nexus pertinentes
Versiones pro Windows et M1 Mac possunt ex repositorio hugging face depromi.
* [Repositorium VCClient](https://huggingface.co/wok000/vcclient000/tree/main)
* [Repositorium Light VCClient pro Beatrice v2](https://huggingface.co/wok000/light_vcclient_beatrice/tree/main)
*1 Linux utatur repositorio clone.
### Nexus pertinentes
* [Repositorium codicis disciplinae Beatrice V2](https://huggingface.co/fierce-cats/beatrice-trainer)
* [Codex disciplinae Beatrice V2 versio Colab](https://github.com/w-okada/beatrice-trainer-colab)
### Software pertinens
* [Mutator vocis in tempore reali VCClient](https://github.com/w-okada/voice-changer)
* [Software lectionis TTSClient](https://github.com/w-okada/ttsclient)
* [Software recognitionis vocis in tempore reali ASRClient](https://github.com/w-okada/asrclient)
## Proprietates VC Client
## Multa AI exempla sustinet
| Exempla AI | v.2 | v.1 | Licentia |
| ------------------------------------------------------------------------------------------------------------ | --------- | -------------------- | ------------------------------------------------------------------------------------------ |
| [RVC ](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/main/docs/jp/README.ja.md) | sustinetur | sustinetur | Vide repositorium. |
| [Beatrice v1](https://prj-beatrice.com/) | n/a | sustinetur (solum win) | [Proprium](https://github.com/w-okada/voice-changer/tree/master/server/voice_changer/Beatrice) |
| [Beatrice v2](https://prj-beatrice.com/) | sustinetur | n/a | [Proprium](https://huggingface.co/wok000/vcclient_model/blob/main/beatrice_v2_beta/readme.md) |
| [MMVC](https://github.com/isletennos/MMVC_Trainer) | n/a | sustinetur | Vide repositorium. |
| [so-vits-svc](https://github.com/svc-develop-team/so-vits-svc) | n/a | sustinetur | Vide repositorium. |
| [DDSP-SVC](https://github.com/yxlllc/DDSP-SVC) | n/a | sustinetur | Vide repositorium. |
## Sustinetur tam structura stand-alone quam per rete
Sustinetur conversio vocis in PC locali et per rete.
Per usum per rete, onus conversionis vocis potest externari cum simul cum applicationibus altis oneribus ut ludis adhibetur.
![image](https://user-images.githubusercontent.com/48346627/206640768-53f6052d-0a96-403b-a06c-6714a0b7471d.png)
## Pluribus suggestis compatitur
Windows, Mac(M1), Linux, Google Colab
*1 Linux utatur repositorio clone.
## REST API praebet
Clientem creare potes in variis linguis programmandi.
Etiam per HTTP clientem in OS incorporatum ut curl operari potes.
## Solutio problematum
[De communicatione](tutorials/trouble_shoot_communication_ja.md)
## De signature auctoris
Hoc software non signatur auctore. Monitio ut infra apparebit, sed si iconem cum claviatura control premes, poteris exsequi. Hoc est secundum securitatem Apple. Exsecutio est tuae responsabilitatis.
![image](https://user-images.githubusercontent.com/48346627/212567711-c4a8d599-e24c-4fa3-8145-a5df7211f023.png)
## Gratias
* [Materia Tachi Zundamon](https://seiga.nicovideo.jp/seiga/im10792934)
* [Irasuto ya](https://www.irasutoya.com/)
* [Tsukuyomi-chan](https://tyc.rei-yumesaki.net/)
```
本ソフトウェアの音声合成には、フリー素材キャラクター「つくよみちゃん」が無料公開している音声データを使用しています。
■つくよみちゃんコーパスCV.夢前黎)
https://tyc.rei-yumesaki.net/material/corpus/
© Rei Yumesaki
```
* [Amitaro vox materiae officina](https://amitaro.net/)
* [Reprica doll](https://kikyohiroto1227.wixsite.com/kikoto-utau)
## Termini usus
* De mutatore vocis in tempore reali Tsukuyomi-chan, secundum Tsukuyomi-chan corpus usus, prohibetur usus vocis post conversionem ad sequentes fines.
```
■人を批判・攻撃すること。(「批判・攻撃」の定義は、つくよみちゃんキャラクターライセンスに準じます)
■特定の政治的立場・宗教・思想への賛同または反対を呼びかけること。
■刺激の強い表現をゾーニングなしで公開すること。
■他者に対して二次利用(素材としての利用)を許可する形で公開すること。
※鑑賞用の作品として配布・販売していただくことは問題ございません。
```
* De mutatore vocis in tempore reali Amitaro, secundum Amitaro vox materiae officinae usus. Pro details[hic](https://amitaro.net/voice/faq/#index_id6)
```
あみたろの声素材やコーパス読み上げ音声を使って音声モデルを作ったり、ボイスチェンジャーや声質変換などを使用して、自分の声をあみたろの声に変換して使うのもOKです。
ただしその場合は絶対に、あみたろ(もしくは小春音アミ)の声に声質変換していることを明記し、あみたろ(および小春音アミ)が話しているわけではないことが誰でもわかるようにしてください。
また、あみたろの声で話す内容は声素材の利用規約の範囲内のみとし、センシティブな発言などはしないでください。
```
* De mutatore vocis in tempore reali Kogane Mahiro, secundum Reprica doll usus. Pro details[hic](https://kikyohiroto1227.wixsite.com/kikoto-utau/ter%EF%BD%8Ds-of-service)
## Disclaimer
Non tenemur pro ullis damnis directis, indirectis, consequentibus, vel specialibus ex usu vel incapacitate usus huius software.

148
docs_i18n/README_ms.md Normal file
View File

@ -0,0 +1,148 @@
[Bahasa Jepun](/README.md) /
[Bahasa Inggeris](/docs_i18n/README_en.md) /
[Bahasa Korea](/docs_i18n/README_ko.md)/
[Bahasa Cina](/docs_i18n/README_zh.md)/
[Bahasa Jerman](/docs_i18n/README_de.md)/
[Bahasa Arab](/docs_i18n/README_ar.md)/
[Bahasa Greek](/docs_i18n/README_el.md)/
[Bahasa Sepanyol](/docs_i18n/README_es.md)/
[Bahasa Perancis](/docs_i18n/README_fr.md)/
[Bahasa Itali](/docs_i18n/README_it.md)/
[Bahasa Latin](/docs_i18n/README_la.md)/
[Bahasa Melayu](/docs_i18n/README_ms.md)/
[Bahasa Rusia](/docs_i18n/README_ru.md)
*Selain bahasa Jepun, semua terjemahan adalah terjemahan mesin.
## VCClient
VCClient adalah perisian yang menggunakan AI untuk menukar suara secara masa nyata.
## What's New!
* v.2.0.78-beta
* pembaikan pepijat: Elakkan ralat muat naik model RVC
* Kini boleh dijalankan serentak dengan ver.1.x
* Saiz chunk yang boleh dipilih telah ditambah
* v.2.0.77-beta (hanya untuk RTX 5090, eksperimen)
* Sokongan untuk modul berkaitan RTX 5090 (tidak disahkan kerana pembangun tidak memiliki RTX 5090)
* v.2.0.76-beta
* ciri baru:
* Beatrice: Pelaksanaan penggabungan pembicara
* Beatrice: Auto pitch shift
* pembaikan pepijat:
* Menangani masalah pemilihan peranti dalam mod pelayan
* v.2.0.73-beta
* ciri baru:
* Muat turun model beatrice yang telah diedit
* pembaikan pepijat:
* Memperbaiki pepijat di mana pitch dan formant beatrice v2 tidak diterapkan
* Memperbaiki pepijat di mana ONNX tidak dapat dibuat untuk model yang menggunakan embedder Applio
## Muat Turun dan Pautan Berkaitan
Versi Windows dan M1 Mac boleh dimuat turun dari repositori hugging face.
* [Repositori VCClient](https://huggingface.co/wok000/vcclient000/tree/main)
* [Repositori Light VCClient untuk Beatrice v2](https://huggingface.co/wok000/light_vcclient_beatrice/tree/main)
*1 Sila klon repositori untuk Linux.
### Pautan Berkaitan
* [Repositori Kod Latihan Beatrice V2](https://huggingface.co/fierce-cats/beatrice-trainer)
* [Versi Colab Kod Latihan Beatrice V2](https://github.com/w-okada/beatrice-trainer-colab)
### Perisian Berkaitan
* [Penukar Suara Masa Nyata VCClient](https://github.com/w-okada/voice-changer)
* [Perisian Pembacaan TTSClient](https://github.com/w-okada/ttsclient)
* [Perisian Pengecaman Suara Masa Nyata ASRClient](https://github.com/w-okada/asrclient)
## Ciri-ciri VC Client
## Menyokong pelbagai model AI
| Model AI | v.2 | v.1 | Lesen |
| ------------------------------------------------------------------------------------------------------------ | --------- | -------------------- | ------------------------------------------------------------------------------------------ |
| [RVC ](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/main/docs/jp/README.ja.md) | disokong | disokong | Sila rujuk repositori. |
| [Beatrice v1](https://prj-beatrice.com/) | n/a | disokong (hanya win) | [Khas](https://github.com/w-okada/voice-changer/tree/master/server/voice_changer/Beatrice) |
| [Beatrice v2](https://prj-beatrice.com/) | disokong | n/a | [Khas](https://huggingface.co/wok000/vcclient_model/blob/main/beatrice_v2_beta/readme.md) |
| [MMVC](https://github.com/isletennos/MMVC_Trainer) | n/a | disokong | Sila rujuk repositori. |
| [so-vits-svc](https://github.com/svc-develop-team/so-vits-svc) | n/a | disokong | Sila rujuk repositori. |
| [DDSP-SVC](https://github.com/yxlllc/DDSP-SVC) | n/a | disokong | Sila rujuk repositori. |
## Menyokong kedua-dua konfigurasi berdiri sendiri dan melalui rangkaian
Menyokong penukaran suara yang lengkap di PC tempatan dan juga melalui rangkaian.
Dengan menggunakan melalui rangkaian, beban penukaran suara boleh dialihkan ke luar apabila digunakan serentak dengan aplikasi yang memerlukan beban tinggi seperti permainan.
![image](https://user-images.githubusercontent.com/48346627/206640768-53f6052d-0a96-403b-a06c-6714a0b7471d.png)
## Menyokong pelbagai platform
Windows, Mac(M1), Linux, Google Colab
*1 Sila klon repositori untuk Linux.
## Menyediakan REST API
Pelanggan boleh dibina dalam pelbagai bahasa pengaturcaraan.
Juga boleh dikendalikan menggunakan klien HTTP yang dibina dalam OS seperti curl.
## Penyelesaian Masalah
[Bahagian Komunikasi](tutorials/trouble_shoot_communication_ja.md)
## Mengenai Tandatangan Pembangun
Perisian ini tidak ditandatangani oleh pembangun. Amaran seperti di bawah akan muncul, tetapi anda boleh menjalankannya dengan menekan kekunci kawalan sambil mengklik ikon. Ini adalah disebabkan oleh dasar keselamatan Apple. Pelaksanaan adalah atas tanggungjawab sendiri.
![image](https://user-images.githubusercontent.com/48346627/212567711-c4a8d599-e24c-4fa3-8145-a5df7211f023.png)
## Penghargaan
* [Bahan Tachizundamon](https://seiga.nicovideo.jp/seiga/im10792934)
* [Irasutoya](https://www.irasutoya.com/)
* [Tsukuyomi-chan](https://tyc.rei-yumesaki.net/)
```
本ソフトウェアの音声合成には、フリー素材キャラクター「つくよみちゃん」が無料公開している音声データを使用しています。
■つくよみちゃんコーパスCV.夢前黎)
https://tyc.rei-yumesaki.net/material/corpus/
© Rei Yumesaki
```
* [Studio Bahan Suara Amitaro](https://amitaro.net/)
* [Replikadol](https://kikyohiroto1227.wixsite.com/kikoto-utau)
## Syarat Penggunaan
* Mengenai penukar suara masa nyata Tsukuyomi-chan, penggunaan suara yang ditukar untuk tujuan berikut adalah dilarang mengikut syarat penggunaan korpus Tsukuyomi-chan.
```
■人を批判・攻撃すること。(「批判・攻撃」の定義は、つくよみちゃんキャラクターライセンスに準じます)
■特定の政治的立場・宗教・思想への賛同または反対を呼びかけること。
■刺激の強い表現をゾーニングなしで公開すること。
■他者に対して二次利用(素材としての利用)を許可する形で公開すること。
※鑑賞用の作品として配布・販売していただくことは問題ございません。
```
* Mengenai penukar suara masa nyata Amitaro, ia mematuhi syarat penggunaan Studio Bahan Suara Amitaro. Untuk maklumat lanjut, sila lihat[di sini](https://amitaro.net/voice/faq/#index_id6)
```
あみたろの声素材やコーパス読み上げ音声を使って音声モデルを作ったり、ボイスチェンジャーや声質変換などを使用して、自分の声をあみたろの声に変換して使うのもOKです。
ただしその場合は絶対に、あみたろ(もしくは小春音アミ)の声に声質変換していることを明記し、あみたろ(および小春音アミ)が話しているわけではないことが誰でもわかるようにしてください。
また、あみたろの声で話す内容は声素材の利用規約の範囲内のみとし、センシティブな発言などはしないでください。
```
* Mengenai penukar suara masa nyata Kogane Mahiro, ia mematuhi syarat penggunaan Replikadol. Untuk maklumat lanjut, sila lihat[di sini](https://kikyohiroto1227.wixsite.com/kikoto-utau/ter%EF%BD%8Ds-of-service)
## Penafian
Kami tidak bertanggungjawab ke atas sebarang kerosakan langsung, tidak langsung, berbangkit, akibat atau khas yang timbul daripada penggunaan atau ketidakupayaan untuk menggunakan perisian ini.

148
docs_i18n/README_ru.md Normal file
View File

@ -0,0 +1,148 @@
[японский](/README.md) /
[английский](/docs_i18n/README_en.md) /
[корейский](/docs_i18n/README_ko.md)/
[китайский](/docs_i18n/README_zh.md)/
[немецкий](/docs_i18n/README_de.md)/
[арабский](/docs_i18n/README_ar.md)/
[греческий](/docs_i18n/README_el.md)/
[испанский](/docs_i18n/README_es.md)/
[французский](/docs_i18n/README_fr.md)/
[итальянский](/docs_i18n/README_it.md)/
[латинский](/docs_i18n/README_la.md)/
[малайский](/docs_i18n/README_ms.md)/
[русский](/docs_i18n/README_ru.md)
*Кроме японского, все переводы выполнены машинным переводом.
## VCClient
VCClient — это программное обеспечение, использующее ИИ для преобразования голоса в реальном времени.
## Что нового!
* v.2.0.78-beta
* Исправление ошибки: предотвращена ошибка загрузки модели RVC
* Теперь возможно одновременное использование с версией 1.x
* Увеличено количество доступных размеров chunk
* v.2.0.77-beta (только для RTX 5090, экспериментальная)
* Поддержка модулей, связанных с RTX 5090 (не проверено, так как разработчик не имеет RTX 5090)
* v.2.0.76-beta
* новая функция:
* Beatrice: реализация слияния говорящих
* Beatrice: автоматический сдвиг тона
* исправление ошибок:
* Исправление ошибки при выборе устройства в серверном режиме
* v.2.0.73-beta
* новая функция:
* Загрузка отредактированной модели beatrice
* исправление ошибок:
* Исправлена ошибка, из-за которой pitch и formant в beatrice v2 не применялись
* Исправлена ошибка, из-за которой ONNX не создавался для моделей, использующих embedder Applio
## Загрузки и связанные ссылки
Версии для Windows и M1 Mac можно скачать из репозитория hugging face.
* [Репозиторий VCClient](https://huggingface.co/wok000/vcclient000/tree/main)
* [Репозиторий Light VCClient для Beatrice v2](https://huggingface.co/wok000/light_vcclient_beatrice/tree/main)
*1 Для Linux клонируйте репозиторий.
### Связанные ссылки
* [Репозиторий кода обучения Beatrice V2](https://huggingface.co/fierce-cats/beatrice-trainer)
* [<EFBFBD><EFBFBD>од обучения Beatrice V2 для Colab](https://github.com/w-okada/beatrice-trainer-colab)
### Связанное программное обеспечение
* [Реалтайм голосовой преобразователь VCClient](https://github.com/w-okada/voice-changer)
* [Программное обеспечение для чтения текста TTSClient](https://github.com/w-okada/ttsclient)
* [Программное обеспечение для распознавания речи в реальном времени ASRClient](https://github.com/w-okada/asrclient)
## Особенности VC Client
## Поддержка различных моделей ИИ
| Модель ИИ | v.2 | v.1 | Лицензия |
| ------------------------------------------------------------------------------------------------------------ | --------- | -------------------- | ------------------------------------------------------------------------------------------ |
| [RVC ](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/main/docs/jp/README.ja.md) | поддерживается | поддерживается | См. репозиторий. |
| [Beatrice v1](https://prj-beatrice.com/) | n/a | поддерживается (только win) | [собственная](https://github.com/w-okada/voice-changer/tree/master/server/voice_changer/Beatrice) |
| [Beatrice v2](https://prj-beatrice.com/) | поддерживается | n/a | [собственная](https://huggingface.co/wok000/vcclient_model/blob/main/beatrice_v2_beta/readme.md) |
| [MMVC](https://github.com/isletennos/MMVC_Trainer) | n/a | поддерживается | См. репозиторий. |
| [so-vits-svc](https://github.com/svc-develop-team/so-vits-svc) | n/a | поддерживается | См. репозиторий. |
| [DDSP-SVC](https://github.com/yxlllc/DDSP-SVC) | n/a | поддерживается | См. репозиторий. |
## Поддержка как автономной, так и сетевой конфигурации
Поддерживается как локальное преобразование голоса на ПК, так и преобразование через сеть.
Использование через сеть позволяет разгрузить преобразование голоса на внешние ресурсы при одновременном использовании с ресурсоемкими приложениями, такими как игры.
![image](https://user-images.githubusercontent.com/48346627/206640768-53f6052d-0a96-403b-a06c-6714a0b7471d.png)
## Поддержка нескольких платформ
Windows, Mac(M1), Linux, Google Colab
*1 Для Linux клонируйте репозиторий.
## Предоставление REST API
Можно создавать клиентов на различных языках программирования.
Также можно управлять с помощью встроенных в ОС HTTP-клиентов, таких как curl.
## Устранение неполадок
[Связь](tutorials/trouble_shoot_communication_ja.md)
## О подписи разработчика
Это программное обеспечение не подписано разработчиком. Появится предупреждение, как показано ниже, но вы можете запустить его, нажав на иконку, удерживая клавишу Control. Это связано с политикой безопасности Apple. Запуск осуществляется на ваш страх и риск.
![image](https://user-images.githubusercontent.com/48346627/212567711-c4a8d599-e24c-4fa3-8145-a5df7211f023.png)
## Благодарности
* [Материалы от Tachi Zundamon](https://seiga.nicovideo.jp/seiga/im10792934)
* [Иллюстрации](https://www.irasutoya.com/)
* [Tsukuyomi-chan](https://tyc.rei-yumesaki.net/)
```
本ソフトウェアの音声合成には、フリー素材キャラクター「つくよみちゃん」が無料公開している音声データを使用しています。
■つくよみちゃんコーパスCV.夢前黎)
https://tyc.rei-yumesaki.net/material/corpus/
© Rei Yumesaki
```
* [Голосовые материалы от Amitaro](https://amitaro.net/)
* [Replikador](https://kikyohiroto1227.wixsite.com/kikoto-utau)
## Условия использования
* Что касается реалтайм голосового преобразователя Tsukuyomi-chan, использование преобразованного голоса запрещено для следующих целей в соответствии с условиями использования корпуса Tsukuyomi-chan.
```
■人を批判・攻撃すること。(「批判・攻撃」の定義は、つくよみちゃんキャラクターライセンスに準じます)
■特定の政治的立場・宗教・思想への賛同または反対を呼びかけること。
■刺激の強い表現をゾーニングなしで公開すること。
■他者に対して二次利用(素材としての利用)を許可する形で公開すること。
※鑑賞用の作品として配布・販売していただくことは問題ございません。
```
* Что касается реалтайм голосового преобразователя Amitaro, он подчиняется следующим условиям использования от Amitaro's Voice Material Workshop. Подробности[здесь](https://amitaro.net/voice/faq/#index_id6)
```
あみたろの声素材やコーパス読み上げ音声を使って音声モデルを作ったり、ボイスチェンジャーや声質変換などを使用して、自分の声をあみたろの声に変換して使うのもOKです。
ただしその場合は絶対に、あみたろ(もしくは小春音アミ)の声に声質変換していることを明記し、あみたろ(および小春音アミ)が話しているわけではないことが誰でもわかるようにしてください。
また、あみたろの声で話す内容は声素材の利用規約の範囲内のみとし、センシティブな発言などはしないでください。
```
* Что касается реалтайм голосового преобразователя Kogane Mahiro, он подчиняется условиям использования Replikador. Подробности[здесь](https://kikyohiroto1227.wixsite.com/kikoto-utau/ter%EF%BD%8Ds-of-service)
## Отказ от ответственности
Мы не несем ответственности за любые прямые, косвенные, побочные, косвенные или особые убытки, возникшие в результате использования или невозможности использования этого программного обеспечения.

148
docs_i18n/README_zh.md Normal file
View File

@ -0,0 +1,148 @@
[日语](/README.md) /
[英语](/docs_i18n/README_en.md) /
[韩语](/docs_i18n/README_ko.md)/
[中文](/docs_i18n/README_zh.md)/
[德语](/docs_i18n/README_de.md)/
[阿拉伯语](/docs_i18n/README_ar.md)/
[希腊语](/docs_i18n/README_el.md)/
[西班牙语](/docs_i18n/README_es.md)/
[法语](/docs_i18n/README_fr.md)/
[意大利语](/docs_i18n/README_it.md)/
[拉丁语](/docs_i18n/README_la.md)/
[马来语](/docs_i18n/README_ms.md)/
[俄语](/docs_i18n/README_ru.md)
*除日语外,其他语言均为机器翻译。
## VCClient
VCClient是一款利用AI进行实时语音转换的软件。
## What's New!
* v.2.0.78-beta
* bug修复避免RVC模型上传错误
* 现在可以与ver.1.x同时启动
* 增加了可选择的chunk size
* v.2.0.77-beta (仅适用于 RTX 5090实验性)
* 相关模块支持 RTX 5090由于开发者未拥有 RTX 5090未经验证
* v.2.0.76-beta
* 新功能:
* Beatrice: 实现说话者合并
* Beatrice: 自动音高转换
* 错误修复:
* 修复服务器模式下设备选择的问题
* v.2.0.73-beta
* 新功能:
* 下载编辑后的beatrice模型
* 错误修复:
* 修复了beatrice v2的音高和共振峰未反映的错误
* 修复了使用Applio的embedder的模型无法生成ONNX的错误
## 下载和相关链接
Windows版、M1 Mac版可以从hugging face的仓库下载。
* [VCClient 的仓库](https://huggingface.co/wok000/vcclient000/tree/main)
* [Light VCClient for Beatrice v2 的仓库](https://huggingface.co/wok000/light_vcclient_beatrice/tree/main)
*1 Linux请克隆仓库使用。
### 相关链接
* [Beatrice V2 训练代码的仓库](https://huggingface.co/fierce-cats/beatrice-trainer)
* [Beatrice V2 训练代码 Colab版](https://github.com/w-okada/beatrice-trainer-colab)
### 相关软件
* [实时变声器 VCClient](https://github.com/w-okada/voice-changer)
* [语音合成软件 TTSClient](https://github.com/w-okada/ttsclient)
* [实时语音识别软件 ASRClient](https://github.com/w-okada/asrclient)
## VC Client的特点
## 支持多种AI模型
| AI模型 | v.2 | v.1 | 许可证 |
| ------------------------------------------------------------------------------------------------------------ | --------- | -------------------- | ------------------------------------------------------------------------------------------ |
| [RVC ](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/main/docs/jp/README.ja.md) | supported | supported | 请参阅仓库。 |
| [Beatrice v1](https://prj-beatrice.com/) | n/a | supported (only win) | [独立](https://github.com/w-okada/voice-changer/tree/master/server/voice_changer/Beatrice) |
| [Beatrice v2](https://prj-beatrice.com/) | supported | n/a | [独立](https://huggingface.co/wok000/vcclient_model/blob/main/beatrice_v2_beta/readme.md) |
| [MMVC](https://github.com/isletennos/MMVC_Trainer) | n/a | supported | 请参阅仓库。 |
| [so-vits-svc](https://github.com/svc-develop-team/so-vits-svc) | n/a | supported | 请参阅仓库。 |
| [DDSP-SVC](https://github.com/yxlllc/DDSP-SVC) | n/a | supported | 请参阅仓库。 |
## 支持独立和通过网络的两种配置
支持在本地PC上完成的语音转换和通过网络的语音转换。
通过网络使用时,可以在与游戏等高负荷应用程序同时使用时将语音转换的负荷转移到外部。
![image](https://user-images.githubusercontent.com/48346627/206640768-53f6052d-0a96-403b-a06c-6714a0b7471d.png)
## 支持多平台
Windows, Mac(M1), Linux, Google Colab
*1 Linux请克隆仓库使用。
## 提供REST API
可以用各种编程语言创建客户端。
还可以使用curl等操作系统内置的HTTP客户端进行操作。
## 故障排除
[通信篇](tutorials/trouble_shoot_communication_ja.md)
## 关于开发者的签名
本软件未由开发者签名。虽然会出现如下警告但按住Control键并点击图标即可运行。这是由于Apple的安全策略所致。运行需自行承担风险。
![image](https://user-images.githubusercontent.com/48346627/212567711-c4a8d599-e24c-4fa3-8145-a5df7211f023.png)
## 致谢
* [立ちずんだもん素材](https://seiga.nicovideo.jp/seiga/im10792934)
* [いらすとや](https://www.irasutoya.com/)
* [つくよみちゃん](https://tyc.rei-yumesaki.net/)
```
本ソフトウェアの音声合成には、フリー素材キャラクター「つくよみちゃん」が無料公開している音声データを使用しています。
■つくよみちゃんコーパスCV.夢前黎)
https://tyc.rei-yumesaki.net/material/corpus/
© Rei Yumesaki
```
* [あみたろの声素材工房](https://amitaro.net/)
* [れぷりかどーる](https://kikyohiroto1227.wixsite.com/kikoto-utau)
## 使用条款
* 关于实时变声器つくよみちゃん,禁止将转换后的语音用于以下目的,遵循つくよみちゃん语料库的使用条款。
```
■人を批判・攻撃すること。(「批判・攻撃」の定義は、つくよみちゃんキャラクターライセンスに準じます)
■特定の政治的立場・宗教・思想への賛同または反対を呼びかけること。
■刺激の強い表現をゾーニングなしで公開すること。
■他者に対して二次利用(素材としての利用)を許可する形で公開すること。
※鑑賞用の作品として配布・販売していただくことは問題ございません。
```
* 关于实时变声器あみたろ,遵循あみたろの声素材工房的以下使用条款。详情请见[这里](https://amitaro.net/voice/faq/#index_id6)
```
あみたろの声素材やコーパス読み上げ音声を使って音声モデルを作ったり、ボイスチェンジャーや声質変換などを使用して、自分の声をあみたろの声に変換して使うのもOKです。
ただしその場合は絶対に、あみたろ(もしくは小春音アミ)の声に声質変換していることを明記し、あみたろ(および小春音アミ)が話しているわけではないことが誰でもわかるようにしてください。
また、あみたろの声で話す内容は声素材の利用規約の範囲内のみとし、センシティブな発言などはしないでください。
```
* 关于实时变声器黄琴まひろ,遵循れぷりかどーる的使用条款。详情请见[这里](https://kikyohiroto1227.wixsite.com/kikoto-utau/ter%EF%BD%8Ds-of-service)
## 免责声明
对于因使用或无法使用本软件而导致的任何直接、间接、衍生、结果性或特殊损害,本软件概不负责。

View File

@ -58,12 +58,16 @@ def setupArgParser():
parser.add_argument("--hubert_base", type=str, default="pretrain/hubert_base.pt", help="path to hubert_base model(pytorch)")
parser.add_argument("--hubert_base_jp", type=str, default="pretrain/rinna_hubert_base_jp.pt", help="path to hubert_base_jp model(pytorch)")
parser.add_argument("--hubert_soft", type=str, default="pretrain/hubert/hubert-soft-0d54a1f4.pt", help="path to hubert_soft model(pytorch)")
parser.add_argument("--whisper_tiny", type=str, default="pretrain/whisper_tiny.pt", help="path to hubert_soft model(pytorch)")
parser.add_argument("--nsf_hifigan", type=str, default="pretrain/nsf_hifigan/model", help="path to nsf_hifigan model(pytorch)")
parser.add_argument("--crepe_onnx_full", type=str, default="pretrain/crepe_onnx_full.onnx", help="path to crepe_onnx_full")
parser.add_argument("--crepe_onnx_tiny", type=str, default="pretrain/crepe_onnx_tiny.onnx", help="path to crepe_onnx_tiny")
parser.add_argument("--rmvpe", type=str, default="pretrain/rmvpe.pt", help="path to rmvpe")
parser.add_argument("--rmvpe_onnx", type=str, default="pretrain/rmvpe.onnx", help="path to rmvpe onnx")
parser.add_argument("--host", type=str, default='127.0.0.1', help="IP address of the network interface to listen for HTTP connections. Specify 0.0.0.0 to listen on all interfaces.")
parser.add_argument("--allowed-origins", action='append', default=[], help="List of URLs to allow connection from, i.e. https://example.com. Allows http(s)://127.0.0.1:{port} and http(s)://localhost:{port} by default.")
return parser
@ -106,22 +110,26 @@ voiceChangerParams = VoiceChangerParams(
rmvpe=args.rmvpe,
rmvpe_onnx=args.rmvpe_onnx,
sample_mode=args.sample_mode,
whisper_tiny=args.whisper_tiny,
)
vcparams = VoiceChangerParamsManager.get_instance()
vcparams.setParams(voiceChangerParams)
printMessage(f"Booting PHASE :{__name__}", level=2)
HOST = args.host
PORT = args.p
def localServer(logLevel: str = "critical"):
def localServer(logLevel: str = "critical", key_path: str | None = None, cert_path: str | None = None):
try:
uvicorn.run(
f"{os.path.basename(__file__)[:-3]}:app_socketio",
host="0.0.0.0",
host=HOST,
port=int(PORT),
reload=False if hasattr(sys, "_MEIPASS") else True,
ssl_keyfile=key_path,
ssl_certfile=cert_path,
log_level=logLevel,
)
except Exception as e:
@ -132,8 +140,8 @@ if __name__ == "MMVCServerSIO":
mp.freeze_support()
voiceChangerManager = VoiceChangerManager.get_instance(voiceChangerParams)
app_fastapi = MMVC_Rest.get_instance(voiceChangerManager, voiceChangerParams)
app_socketio = MMVC_SocketIOApp.get_instance(app_fastapi, voiceChangerManager)
app_fastapi = MMVC_Rest.get_instance(voiceChangerManager, voiceChangerParams, args.allowed_origins, PORT)
app_socketio = MMVC_SocketIOApp.get_instance(app_fastapi, voiceChangerManager, args.allowed_origins, PORT)
if __name__ == "__mp_main__":
@ -218,34 +226,26 @@ if __name__ == "__main__":
printMessage("In many cases, it will launch when you access any of the following URLs.", level=2)
if "EX_PORT" in locals() and "EX_IP" in locals(): # シェルスクリプト経由起動(docker)
if args.https == 1:
printMessage(f"https://127.0.0.1:{EX_PORT}/", level=1)
printMessage(f"https://localhost:{EX_PORT}/", level=1)
for ip in EX_IP.strip().split(" "):
printMessage(f"https://{ip}:{EX_PORT}/", level=1)
else:
printMessage(f"http://127.0.0.1:{EX_PORT}/", level=1)
printMessage(f"http://localhost:{EX_PORT}/", level=1)
else: # 直接python起動
if args.https == 1:
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
s.connect((args.test_connect, 80))
hostname = s.getsockname()[0]
printMessage(f"https://127.0.0.1:{PORT}/", level=1)
printMessage(f"https://localhost:{PORT}/", level=1)
printMessage(f"https://{hostname}:{PORT}/", level=1)
else:
printMessage(f"http://127.0.0.1:{PORT}/", level=1)
printMessage(f"http://localhost:{PORT}/", level=1)
# サーバ起動
if args.https:
# HTTPS サーバ起動
try:
uvicorn.run(
f"{os.path.basename(__file__)[:-3]}:app_socketio",
host="0.0.0.0",
port=int(PORT),
reload=False if hasattr(sys, "_MEIPASS") else True,
ssl_keyfile=key_path,
ssl_certfile=cert_path,
log_level=args.logLevel,
)
localServer(args.logLevel, key_path, cert_path)
except Exception as e:
logger.error(f"[Voice Changer] Web Server(https) Launch Exception, {e}")
@ -254,12 +254,12 @@ if __name__ == "__main__":
p.start()
try:
if sys.platform.startswith("win"):
process = subprocess.Popen([NATIVE_CLIENT_FILE_WIN, "--disable-gpu", "-u", f"http://127.0.0.1:{PORT}/"])
process = subprocess.Popen([NATIVE_CLIENT_FILE_WIN, "--disable-gpu", "-u", f"http://localhost:{PORT}/"])
return_code = process.wait()
logger.info("client closed.")
p.terminate()
elif sys.platform.startswith("darwin"):
process = subprocess.Popen([NATIVE_CLIENT_FILE_MAC, "--disable-gpu", "-u", f"http://127.0.0.1:{PORT}/"])
process = subprocess.Popen([NATIVE_CLIENT_FILE_MAC, "--disable-gpu", "-u", f"http://localhost:{PORT}/"])
return_code = process.wait()
logger.info("client closed.")
p.terminate()

View File

@ -14,6 +14,7 @@ VoiceChangerType: TypeAlias = Literal[
"Diffusion-SVC",
"Beatrice",
"LLVC",
"EasyVC",
]
StaticSlot: TypeAlias = Literal["Beatrice-JVS",]
@ -56,7 +57,12 @@ def getFrontendPath():
return frontend_path
EmbedderType: TypeAlias = Literal["hubert_base", "contentvec", "hubert-base-japanese"]
EmbedderType: TypeAlias = Literal[
"hubert_base",
"contentvec",
"hubert-base-japanese",
"whisper",
]
class EnumInferenceTypes(Enum):
@ -70,6 +76,8 @@ class EnumInferenceTypes(Enum):
onnxRVC = "onnxRVC"
onnxRVCNono = "onnxRVCNono"
easyVC = "easyVC"
DiffusionSVCInferenceType: TypeAlias = Literal["combo",]
@ -82,6 +90,7 @@ PitchExtractorType: TypeAlias = Literal[
"crepe_tiny",
"rmvpe",
"rmvpe_onnx",
"fcpe",
]
ServerAudioDeviceType: TypeAlias = Literal["audioinput", "audiooutput"]

View File

@ -141,6 +141,14 @@ class LLVCModelSlot(ModelSlot):
configFile: str = ""
@dataclass
class EasyVCModelSlot(ModelSlot):
voiceChangerType: VoiceChangerType = "EasyVC"
modelFile: str = ""
version: str = ""
samplingRate: int = -1
ModelSlots: TypeAlias = Union[
ModelSlot,
RVCModelSlot,
@ -151,6 +159,7 @@ ModelSlots: TypeAlias = Union[
DiffusionSVCModelSlot,
BeatriceModelSlot,
LLVCModelSlot,
EasyVCModelSlot,
]
@ -188,6 +197,9 @@ def loadSlotInfo(model_dir: str, slotIndex: int | StaticSlot) -> ModelSlots:
elif slotInfo.voiceChangerType == "LLVC":
slotInfoKey.extend(list(LLVCModelSlot.__annotations__.keys()))
return LLVCModelSlot(**{k: v for k, v in jsonDict.items() if k in slotInfoKey})
elif slotInfo.voiceChangerType == "EasyVC":
slotInfoKey.extend(list(EasyVCModelSlot.__annotations__.keys()))
return EasyVCModelSlot(**{k: v for k, v in jsonDict.items() if k in slotInfoKey})
else:
return ModelSlot()

View File

@ -19,9 +19,19 @@ def downloadWeight(voiceChangerParams: VoiceChangerParams):
crepe_onnx_tiny = voiceChangerParams.crepe_onnx_tiny
rmvpe = voiceChangerParams.rmvpe
rmvpe_onnx = voiceChangerParams.rmvpe_onnx
whisper_tiny = voiceChangerParams.whisper_tiny
weight_files = [content_vec_500_onnx, hubert_base, hubert_base_jp, hubert_soft,
nsf_hifigan, crepe_onnx_full, crepe_onnx_tiny, rmvpe]
weight_files = [
content_vec_500_onnx,
hubert_base,
hubert_base_jp,
hubert_soft,
nsf_hifigan,
crepe_onnx_full,
crepe_onnx_tiny,
rmvpe,
whisper_tiny,
]
# file exists check (currently only for rvc)
downloadParams = []
@ -119,6 +129,15 @@ def downloadWeight(voiceChangerParams: VoiceChangerParams):
}
)
if os.path.exists(whisper_tiny) is False:
downloadParams.append(
{
"url": "https://openaipublic.azureedge.net/main/whisper/models/65147644a518d12f04e32d6f3b26facc3f8dd46e5390956a9424a650c0ce22b9/tiny.pt",
"saveTo": whisper_tiny,
"position": 10,
}
)
with ThreadPoolExecutor() as pool:
pool.map(download, downloadParams)

24
server/mods/origins.py Normal file
View File

@ -0,0 +1,24 @@
from typing import Optional, Sequence
from urllib.parse import urlparse
ENFORCE_URL_ORIGIN_FORMAT = "Input origins must be well-formed URLs, i.e. https://google.com or https://www.google.com."
SCHEMAS = ('http', 'https')
LOCAL_ORIGINS = ('127.0.0.1', 'localhost')
def compute_local_origins(port: Optional[int] = None) -> list[str]:
local_origins = [f'{schema}://{origin}' for schema in SCHEMAS for origin in LOCAL_ORIGINS]
if port is not None:
local_origins = [f'{origin}:{port}' for origin in local_origins]
return local_origins
def normalize_origins(origins: Sequence[str]) -> set[str]:
allowed_origins = set()
for origin in origins:
url = urlparse(origin)
assert url.scheme, ENFORCE_URL_ORIGIN_FORMAT
valid_origin = f'{url.scheme}://{url.hostname}'
if url.port:
valid_origin += f':{url.port}'
allowed_origins.add(valid_origin)
return allowed_origins

View File

@ -27,3 +27,4 @@ websockets==11.0.2
sounddevice==0.4.6
dataclasses_json==0.5.7
onnxsim==0.4.28
torchfcpe==0.0.3

View File

@ -1,12 +1,12 @@
import os
import sys
from restapi.mods.trustedorigin import TrustedOriginMiddleware
from fastapi import FastAPI, Request, Response, HTTPException
from fastapi.routing import APIRoute
from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
from fastapi.exceptions import RequestValidationError
from typing import Callable
from typing import Callable, Optional, Sequence, Literal
from mods.log_control import VoiceChangaerLogger
from voice_changer.VoiceChangerManager import VoiceChangerManager
@ -43,17 +43,17 @@ class MMVC_Rest:
cls,
voiceChangerManager: VoiceChangerManager,
voiceChangerParams: VoiceChangerParams,
allowedOrigins: Optional[Sequence[str]] = None,
port: Optional[int] = None,
):
if cls._instance is None:
logger.info("[Voice Changer] MMVC_Rest initializing...")
app_fastapi = FastAPI()
app_fastapi.router.route_class = ValidationErrorLoggingRoute
app_fastapi.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
TrustedOriginMiddleware,
allowed_origins=allowedOrigins,
port=port
)
app_fastapi.mount(
@ -75,7 +75,10 @@ class MMVC_Rest:
)
app_fastapi.mount("/tmp", StaticFiles(directory=f"{TMP_DIR}"), name="static")
app_fastapi.mount("/upload_dir", StaticFiles(directory=f"{UPLOAD_DIR}"), name="static")
app_fastapi.mount("/model_dir_static", StaticFiles(directory=f"{MODEL_DIR_STATIC}"), name="static")
try:
app_fastapi.mount("/model_dir_static", StaticFiles(directory=f"{MODEL_DIR_STATIC}"), name="static")
except Exception as e:
print("Locating model_dir_static failed", e)
if sys.platform.startswith("darwin"):
p1 = os.path.dirname(sys._MEIPASS)

View File

@ -2,12 +2,22 @@ import os
import shutil
from fastapi import UploadFile
# UPLOAD_DIR = "model_upload_dir"
def sanitize_filename(filename: str) -> str:
safe_filename = os.path.basename(filename)
max_length = 255
if len(safe_filename) > max_length:
file_root, file_ext = os.path.splitext(safe_filename)
safe_filename = file_root[: max_length - len(file_ext)] + file_ext
return safe_filename
def upload_file(upload_dirname: str, file: UploadFile, filename: str):
if file and filename:
fileobj = file.file
filename = sanitize_filename(filename)
target_path = os.path.join(upload_dirname, filename)
target_dir = os.path.dirname(target_path)
os.makedirs(target_dir, exist_ok=True)
@ -19,9 +29,8 @@ def upload_file(upload_dirname: str, file: UploadFile, filename: str):
return {"status": "ERROR", "msg": "uploaded file is not found."}
def concat_file_chunks(
upload_dirname: str, filename: str, chunkNum: int, dest_dirname: str
):
def concat_file_chunks(upload_dirname: str, filename: str, chunkNum: int, dest_dirname: str):
filename = sanitize_filename(filename)
target_path = os.path.join(upload_dirname, filename)
target_dir = os.path.dirname(target_path)
os.makedirs(target_dir, exist_ok=True)

View File

@ -0,0 +1,43 @@
from typing import Optional, Sequence, Literal
from mods.origins import compute_local_origins, normalize_origins
from starlette.datastructures import Headers
from starlette.responses import PlainTextResponse
from starlette.types import ASGIApp, Receive, Scope, Send
class TrustedOriginMiddleware:
def __init__(
self,
app: ASGIApp,
allowed_origins: Optional[Sequence[str]] = None,
port: Optional[int] = None,
) -> None:
self.allowed_origins: set[str] = set()
local_origins = compute_local_origins(port)
self.allowed_origins.update(local_origins)
if allowed_origins is not None:
normalized_origins = normalize_origins(allowed_origins)
self.allowed_origins.update(normalized_origins)
self.app = app
async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None:
if scope["type"] not in (
"http",
"websocket",
): # pragma: no cover
await self.app(scope, receive, send)
return
headers = Headers(scope=scope)
origin = headers.get("origin", "")
# Origin header is not present for same origin
if not origin or origin in self.allowed_origins:
await self.app(scope, receive, send)
return
response = PlainTextResponse("Invalid origin header", status_code=400)
await response(scope, receive, send)

View File

@ -1,6 +1,8 @@
import socketio
from mods.log_control import VoiceChangaerLogger
from mods.origins import compute_local_origins, normalize_origins
from typing import Sequence, Optional
from sio.MMVC_SocketIOServer import MMVC_SocketIOServer
from voice_changer.VoiceChangerManager import VoiceChangerManager
from const import getFrontendPath
@ -12,10 +14,24 @@ class MMVC_SocketIOApp:
_instance: socketio.ASGIApp | None = None
@classmethod
def get_instance(cls, app_fastapi, voiceChangerManager: VoiceChangerManager):
def get_instance(
cls,
app_fastapi,
voiceChangerManager: VoiceChangerManager,
allowedOrigins: Optional[Sequence[str]] = None,
port: Optional[int] = None,
):
if cls._instance is None:
logger.info("[Voice Changer] MMVC_SocketIOApp initializing...")
sio = MMVC_SocketIOServer.get_instance(voiceChangerManager)
allowed_origins: set[str] = set()
local_origins = compute_local_origins(port)
allowed_origins.update(local_origins)
if allowedOrigins is not None:
normalized_origins = normalize_origins(allowedOrigins)
allowed_origins.update(normalized_origins)
sio = MMVC_SocketIOServer.get_instance(voiceChangerManager, list(allowed_origins))
app_socketio = socketio.ASGIApp(
sio,
other_asgi_app=app_fastapi,

View File

@ -8,9 +8,13 @@ class MMVC_SocketIOServer:
_instance: socketio.AsyncServer | None = None
@classmethod
def get_instance(cls, voiceChangerManager: VoiceChangerManager):
def get_instance(
cls,
voiceChangerManager: VoiceChangerManager,
allowedOrigins: list[str],
):
if cls._instance is None:
sio = socketio.AsyncServer(async_mode="asgi", cors_allowed_origins="*")
sio = socketio.AsyncServer(async_mode="asgi", cors_allowed_origins=allowedOrigins)
namespace = MMVC_Namespace.get_instance(voiceChangerManager)
sio.register_namespace(namespace)
cls._instance = sio

View File

@ -107,18 +107,19 @@ class DiffusionSVCInferencer(Inferencer):
silence_front: float,
skip_diffusion: bool = True,
) -> torch.Tensor:
with Timer2("pre-process", False) as t:
use_timer = False
with Timer2(" Naive", use_timer) as t:
gt_spec = self.naive_model_call(feats, pitch, volume, spk_id=sid, spk_mix_dict=None, aug_shift=0, spk_emb=None)
# print("[ ----Timer::1: ]", t.secs)
with Timer2("pre-process", False) as t:
with Timer2(" Diffuser", use_timer) as t:
if skip_diffusion == 0:
out_mel = self.__call__(feats, pitch, volume, spk_id=sid, spk_mix_dict=None, aug_shift=0, gt_spec=gt_spec, infer_speedup=infer_speedup, method="dpm-solver", k_step=k_step, use_tqdm=False, spk_emb=None)
gt_spec = out_mel
# print("[ ----Timer::2: ]", t.secs)
with Timer2("pre-process", False) as t: # NOQA
with Timer2(" Vocoder", use_timer) as t: # NOQA
if self.vocoder_onnx is None:
start_frame = int(silence_front * self.vocoder.vocoder_sample_rate / self.vocoder.vocoder_hop_size)
out_wav = self.mel2wav(gt_spec, pitch, start_frame=start_frame)

View File

@ -102,8 +102,9 @@ class Pipeline(object):
protect=0.5,
skip_diffusion=True,
):
use_timer = False
# print("---------- pipe line --------------------")
with Timer2("pre-process", False) as t:
with Timer2("pre-process", use_timer) as t:
audio_t = torch.from_numpy(audio).float().unsqueeze(0).to(self.device)
audio16k = self.resamplerIn(audio_t)
volume, mask = self.extract_volume_and_mask(audio16k, threshold=-60.0)
@ -111,7 +112,7 @@ class Pipeline(object):
n_frames = int(audio16k.size(-1) // self.hop_size + 1)
# print("[Timer::1: ]", t.secs)
with Timer2("pre-process", False) as t:
with Timer2("extract pitch", use_timer) as t:
# ピッチ検出
try:
# pitch = self.pitchExtractor.extract(
@ -141,7 +142,7 @@ class Pipeline(object):
feats = feats.view(1, -1)
# print("[Timer::2: ]", t.secs)
with Timer2("pre-process", False) as t:
with Timer2("extract feature", use_timer) as t:
# embedding
with autocast(enabled=self.isHalf):
try:
@ -158,7 +159,7 @@ class Pipeline(object):
feats = F.interpolate(feats.permute(0, 2, 1), size=int(n_frames), mode="nearest").permute(0, 2, 1)
# print("[Timer::3: ]", t.secs)
with Timer2("pre-process", False) as t:
with Timer2("infer", use_timer) as t:
# 推論実行
try:
with torch.no_grad():
@ -179,7 +180,7 @@ class Pipeline(object):
raise e
# print("[Timer::4: ]", t.secs)
with Timer2("pre-process", False) as t: # NOQA
with Timer2("post-process", use_timer) as t: # NOQA
feats_buffer = feats.squeeze(0).detach().cpu()
if pitch is not None:
pitch_buffer = pitch.squeeze(0).detach().cpu()

View File

@ -0,0 +1,326 @@
"""
VoiceChangerV2向け
"""
from dataclasses import asdict
import numpy as np
import torch
from data.ModelSlot import RVCModelSlot
from mods.log_control import VoiceChangaerLogger
from voice_changer.EasyVC.EasyVCSettings import EasyVCSettings
from voice_changer.EasyVC.pipeline.Pipeline import Pipeline
from voice_changer.EasyVC.pipeline.PipelineGenerator import createPipeline
from voice_changer.RVC.RVCSettings import RVCSettings
from voice_changer.RVC.embedder.EmbedderManager import EmbedderManager
from voice_changer.utils.Timer import Timer2
from voice_changer.utils.VoiceChangerModel import (
AudioInOut,
PitchfInOut,
FeatureInOut,
VoiceChangerModel,
)
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
from voice_changer.RVC.onnxExporter.export2onnx import export2onnx
from voice_changer.RVC.pitchExtractor.PitchExtractorManager import PitchExtractorManager
from voice_changer.RVC.deviceManager.DeviceManager import DeviceManager
from Exceptions import (
DeviceCannotSupportHalfPrecisionException,
PipelineCreateException,
PipelineNotInitializedException,
)
import resampy
from typing import cast
logger = VoiceChangaerLogger.get_instance().getLogger()
class EasyVC(VoiceChangerModel):
def __init__(self, params: VoiceChangerParams, slotInfo: RVCModelSlot):
logger.info("[Voice Changer] [EasyVC] Creating instance ")
self.voiceChangerType = "RVC"
self.deviceManager = DeviceManager.get_instance()
EmbedderManager.initialize(params)
PitchExtractorManager.initialize(params)
self.settings = EasyVCSettings()
self.params = params
# self.pitchExtractor = PitchExtractorManager.getPitchExtractor(self.settings.f0Detector, self.settings.gpu)
self.pipeline: Pipeline | None = None
self.audio_buffer: AudioInOut | None = None
self.pitchf_buffer: PitchfInOut | None = None
self.feature_buffer: FeatureInOut | None = None
self.prevVol = 0.0
self.slotInfo = slotInfo
# self.initialize()
def initialize(self):
logger.info("[Voice Changer][EasyVC] Initializing... ")
# pipelineの生成
try:
self.pipeline = createPipeline(self.params, self.slotInfo, self.settings.gpu, self.settings.f0Detector)
except PipelineCreateException as e: # NOQA
logger.error("[Voice Changer] pipeline create failed. check your model is valid.")
return
# その他の設定
logger.info("[Voice Changer] [EasyVC] Initializing... done")
def setSamplingRate(self, inputSampleRate, outputSampleRate):
self.inputSampleRate = inputSampleRate
self.outputSampleRate = outputSampleRate
# self.initialize()
def update_settings(self, key: str, val: int | float | str):
logger.info(f"[Voice Changer][RVC]: update_settings {key}:{val}")
if key in self.settings.intData:
setattr(self.settings, key, int(val))
if key == "gpu":
self.deviceManager.setForceTensor(False)
self.initialize()
elif key in self.settings.floatData:
setattr(self.settings, key, float(val))
elif key in self.settings.strData:
setattr(self.settings, key, str(val))
if key == "f0Detector" and self.pipeline is not None:
pitchExtractor = PitchExtractorManager.getPitchExtractor(self.settings.f0Detector, self.settings.gpu)
self.pipeline.setPitchExtractor(pitchExtractor)
else:
return False
return True
def get_info(self):
data = asdict(self.settings)
if self.pipeline is not None:
pipelineInfo = self.pipeline.getPipelineInfo()
data["pipelineInfo"] = pipelineInfo
else:
data["pipelineInfo"] = "None"
return data
def get_processing_sampling_rate(self):
return self.slotInfo.samplingRate
def generate_input(
self,
newData: AudioInOut,
crossfadeSize: int,
solaSearchFrame: int,
extra_frame: int,
):
# 16k で入ってくる。
inputSize = newData.shape[0]
newData = newData.astype(np.float32) / 32768.0
newFeatureLength = inputSize // 160 # hopsize:=160
if self.audio_buffer is not None:
# 過去のデータに連結
self.audio_buffer = np.concatenate([self.audio_buffer, newData], 0)
# if self.slotInfo.f0:
# self.pitchf_buffer = np.concatenate([self.pitchf_buffer, np.zeros(newFeatureLength)], 0)
self.feature_buffer = np.concatenate(
[
self.feature_buffer,
# np.zeros([newFeatureLength, self.slotInfo.embChannels]),
np.zeros([newFeatureLength, 768]),
],
0,
)
else:
self.audio_buffer = newData
# if self.slotInfo.f0:
# self.pitchf_buffer = np.zeros(newFeatureLength)
self.feature_buffer = np.zeros([newFeatureLength, 768])
convertSize = inputSize + crossfadeSize + solaSearchFrame + extra_frame
if convertSize % 160 != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。
convertSize = convertSize + (160 - (convertSize % 160))
outSize = int(((convertSize - extra_frame) / 16000) * self.slotInfo.samplingRate)
# バッファがたまっていない場合はzeroで補う
if self.audio_buffer.shape[0] < convertSize:
self.audio_buffer = np.concatenate([np.zeros([convertSize]), self.audio_buffer])
# if self.slotInfo.f0:
# self.pitchf_buffer = np.concatenate([np.zeros([convertSize // 160]), self.pitchf_buffer])
self.feature_buffer = np.concatenate(
[
np.zeros([convertSize // 160, 768]),
self.feature_buffer,
]
)
# 不要部分をトリミング
convertOffset = -1 * convertSize
featureOffset = convertOffset // 160
self.audio_buffer = self.audio_buffer[convertOffset:] # 変換対象の部分だけ抽出
# if self.slotInfo.f0:
# self.pitchf_buffer = self.pitchf_buffer[featureOffset:]
self.feature_buffer = self.feature_buffer[featureOffset:]
# 出力部分だけ切り出して音量を確認。(TODO:段階的消音にする)
cropOffset = -1 * (inputSize + crossfadeSize)
cropEnd = -1 * (crossfadeSize)
crop = self.audio_buffer[cropOffset:cropEnd]
vol = np.sqrt(np.square(crop).mean())
vol = max(vol, self.prevVol * 0.0)
self.prevVol = vol
return (
self.audio_buffer,
self.pitchf_buffer,
self.feature_buffer,
convertSize,
vol,
outSize,
)
def inference(self, receivedData: AudioInOut, crossfade_frame: int, sola_search_frame: int):
if self.pipeline is None:
logger.info("[Voice Changer] Pipeline is not initialized.")
raise PipelineNotInitializedException()
enableTimer = False
with Timer2("infer-easyvc", enableTimer) as t:
# 処理は16Kで実施(Pitch, embed, (infer))
receivedData = cast(
AudioInOut,
resampy.resample(
receivedData,
self.inputSampleRate,
16000,
filter="kaiser_fast",
),
)
crossfade_frame = int((crossfade_frame / self.inputSampleRate) * 16000)
sola_search_frame = int((sola_search_frame / self.inputSampleRate) * 16000)
extra_frame = int((self.settings.extraConvertSize / self.inputSampleRate) * 16000)
# 入力データ生成
data = self.generate_input(receivedData, crossfade_frame, sola_search_frame, extra_frame)
t.record("generate-input")
audio = data[0]
pitchf = data[1]
feature = data[2]
convertSize = data[3]
vol = data[4]
outSize = data[5]
if vol < self.settings.silentThreshold:
return np.zeros(convertSize).astype(np.int16) * np.sqrt(vol)
device = self.pipeline.device
audio = torch.from_numpy(audio).to(device=device, dtype=torch.float32)
repeat = 0
sid = self.settings.dstId
f0_up_key = self.settings.tran
index_rate = self.settings.indexRatio
protect = self.settings.protect
# if_f0 = 1 if self.slotInfo.f0 else 0
if_f0 = 0
# embOutputLayer = self.slotInfo.embOutputLayer
# useFinalProj = self.slotInfo.useFinalProj
t.record("pre-process")
try:
audio_out, self.pitchf_buffer, self.feature_buffer = self.pipeline.exec(
sid,
audio,
pitchf,
feature,
f0_up_key,
index_rate,
if_f0,
# 0,
self.settings.extraConvertSize / self.inputSampleRate if self.settings.silenceFront else 0.0, # extaraDataSizeの秒数。入力のサンプリングレートで算出
repeat,
outSize,
)
t.record("pipeline-exec")
# result = audio_out.detach().cpu().numpy() * np.sqrt(vol)
result = audio_out[-outSize:].detach().cpu().numpy() * np.sqrt(vol)
result = cast(
AudioInOut,
resampy.resample(
result,
16000,
self.outputSampleRate,
filter="kaiser_fast",
),
)
t.record("resample")
return result
except DeviceCannotSupportHalfPrecisionException as e: # NOQA
logger.warn("[Device Manager] Device cannot support half precision. Fallback to float....")
self.deviceManager.setForceTensor(True)
self.initialize()
# raise e
return
def __del__(self):
del self.pipeline
# print("---------- REMOVING ---------------")
# remove_path = os.path.join("RVC")
# sys.path = [x for x in sys.path if x.endswith(remove_path) is False]
# for key in list(sys.modules):
# val = sys.modules.get(key)
# try:
# file_path = val.__file__
# if file_path.find("RVC" + os.path.sep) >= 0:
# # print("remove", key, file_path)
# sys.modules.pop(key)
# except Exception: # type:ignore
# # print(e)
# pass
def export2onnx(self):
modelSlot = self.slotInfo
if modelSlot.isONNX:
logger.warn("[Voice Changer] export2onnx, No pyTorch filepath.")
return {"status": "ng", "path": ""}
if self.pipeline is not None:
del self.pipeline
self.pipeline = None
torch.cuda.empty_cache()
self.initialize()
output_file_simple = export2onnx(self.settings.gpu, modelSlot)
return {
"status": "ok",
"path": f"/tmp/{output_file_simple}",
"filename": output_file_simple,
}
def get_model_current(self):
return [
{
"key": "defaultTune",
"val": self.settings.tran,
},
{
"key": "defaultIndexRatio",
"val": self.settings.indexRatio,
},
{
"key": "defaultProtect",
"val": self.settings.protect,
},
]

View File

@ -0,0 +1,17 @@
import os
from data.ModelSlot import EasyVCModelSlot
from voice_changer.utils.LoadModelParams import LoadModelParams
from voice_changer.utils.ModelSlotGenerator import ModelSlotGenerator
class EasyVCModelSlotGenerator(ModelSlotGenerator):
@classmethod
def loadModel(cls, props: LoadModelParams):
slotInfo: EasyVCModelSlot = EasyVCModelSlot()
for file in props.files:
if file.kind == "easyVCModel":
slotInfo.modelFile = file.name
slotInfo.name = os.path.splitext(os.path.basename(slotInfo.modelFile))[0]
slotInfo.slotIndex = props.slot
return slotInfo

View File

@ -0,0 +1,33 @@
from dataclasses import dataclass, field
from const import PitchExtractorType
@dataclass
class EasyVCSettings:
gpu: int = -9999
dstId: int = 0
f0Detector: PitchExtractorType = "rmvpe_onnx" # dio or harvest
tran: int = 12
silentThreshold: float = 0.00001
extraConvertSize: int = 1024 * 4
indexRatio: float = 0
protect: float = 0.5
rvcQuality: int = 0
silenceFront: int = 1 # 0:off, 1:on
modelSamplingRate: int = 48000
speakers: dict[str, int] = field(default_factory=lambda: {})
intData = [
"gpu",
"dstId",
"tran",
"extraConvertSize",
"rvcQuality",
"silenceFront",
]
floatData = ["silentThreshold", "indexRatio", "protect"]
strData = ["f0Detector"]

View File

@ -0,0 +1,237 @@
from typing import Any
import math
import torch
import torch.nn.functional as F
from torch.cuda.amp import autocast
from Exceptions import (
DeviceCannotSupportHalfPrecisionException,
DeviceChangingException,
HalfPrecisionChangingException,
NotEnoughDataExtimateF0,
)
from mods.log_control import VoiceChangaerLogger
from voice_changer.RVC.embedder.Embedder import Embedder
from voice_changer.RVC.inferencer.Inferencer import Inferencer
from voice_changer.RVC.inferencer.OnnxRVCInferencer import OnnxRVCInferencer
from voice_changer.RVC.inferencer.OnnxRVCInferencerNono import OnnxRVCInferencerNono
from voice_changer.RVC.pitchExtractor.PitchExtractor import PitchExtractor
from voice_changer.utils.Timer import Timer2
logger = VoiceChangaerLogger.get_instance().getLogger()
class Pipeline(object):
embedder: Embedder
inferencer: Inferencer
pitchExtractor: PitchExtractor
index: Any | None
big_npy: Any | None
# feature: Any | None
targetSR: int
device: torch.device
isHalf: bool
def __init__(
self,
embedder: Embedder,
inferencer: Inferencer,
pitchExtractor: PitchExtractor,
targetSR,
device,
isHalf,
):
self.embedder = embedder
self.inferencer = inferencer
self.pitchExtractor = pitchExtractor
logger.info("GENERATE INFERENCER" + str(self.inferencer))
logger.info("GENERATE EMBEDDER" + str(self.embedder))
logger.info("GENERATE PITCH EXTRACTOR" + str(self.pitchExtractor))
self.targetSR = targetSR
self.device = device
self.isHalf = isHalf
self.sr = 16000
self.window = 160
def getPipelineInfo(self):
inferencerInfo = self.inferencer.getInferencerInfo() if self.inferencer else {}
embedderInfo = self.embedder.getEmbedderInfo()
pitchExtractorInfo = self.pitchExtractor.getPitchExtractorInfo()
return {"inferencer": inferencerInfo, "embedder": embedderInfo, "pitchExtractor": pitchExtractorInfo, "isHalf": self.isHalf}
def setPitchExtractor(self, pitchExtractor: PitchExtractor):
self.pitchExtractor = pitchExtractor
def extractPitch(self, audio_pad, if_f0, pitchf, f0_up_key, silence_front):
try:
if if_f0 == 1:
pitch, pitchf = self.pitchExtractor.extract(
audio_pad,
pitchf,
f0_up_key,
self.sr,
self.window,
silence_front=silence_front,
)
# pitch = pitch[:p_len]
# pitchf = pitchf[:p_len]
pitch = torch.tensor(pitch, device=self.device).unsqueeze(0).long()
pitchf = torch.tensor(pitchf, device=self.device, dtype=torch.float).unsqueeze(0)
else:
pitch = None
pitchf = None
except IndexError as e: # NOQA
print(e)
import traceback
traceback.print_exc()
raise NotEnoughDataExtimateF0()
return pitch, pitchf
def extractFeatures(self, feats):
with autocast(enabled=self.isHalf):
try:
feats = self.embedder.extractFeatures(feats)
if torch.isnan(feats).all():
raise DeviceCannotSupportHalfPrecisionException()
return feats
except RuntimeError as e:
if "HALF" in e.__str__().upper():
raise HalfPrecisionChangingException()
elif "same device" in e.__str__():
raise DeviceChangingException()
else:
raise e
def infer(self, feats, p_len, pitch, pitchf, sid, out_size):
try:
with torch.no_grad():
with autocast(enabled=self.isHalf):
audio1 = self.inferencer.infer(feats, p_len, pitch, pitchf, sid, out_size)
audio1 = (audio1 * 32767.5).data.to(dtype=torch.int16)
return audio1
except RuntimeError as e:
if "HALF" in e.__str__().upper():
print("HalfPresicion Error:", e)
raise HalfPrecisionChangingException()
else:
raise e
def exec(
self,
sid,
audio, # torch.tensor [n]
pitchf, # np.array [m]
feature, # np.array [m, feat]
f0_up_key,
index_rate,
if_f0,
silence_front,
repeat,
out_size=None,
):
# print(f"pipeline exec input, audio:{audio.shape}, pitchf:{pitchf.shape}, feature:{feature.shape}")
# print(f"pipeline exec input, silence_front:{silence_front}, out_size:{out_size}")
enablePipelineTimer = False
with Timer2("Pipeline-Exec", enablePipelineTimer) as t: # NOQA
# 16000のサンプリングレートで入ってきている。以降この世界は16000で処理。
# self.t_pad = self.sr * repeat # 1秒
# self.t_pad_tgt = self.targetSR * repeat # 1秒 出力時のトリミング(モデルのサンプリングで出力される)
audio = audio.unsqueeze(0)
quality_padding_sec = (repeat * (audio.shape[1] - 1)) / self.sr # padding(reflect)のサイズは元のサイズより小さい必要がある。
self.t_pad = round(self.sr * quality_padding_sec) # 前後に音声を追加
self.t_pad_tgt = round(self.targetSR * quality_padding_sec) # 前後に音声を追加 出力時のトリミング(モデルのサンプリングで出力される)
audio_pad = F.pad(audio, (self.t_pad, self.t_pad), mode="reflect").squeeze(0)
p_len = audio_pad.shape[0] // self.window
sid = torch.tensor(sid, device=self.device).unsqueeze(0).long()
# # RVC QualityがOnのときにはsilence_frontをオフに。
# silence_front = silence_front if repeat == 0 else 0
# pitchf = pitchf if repeat == 0 else np.zeros(p_len)
# out_size = out_size if repeat == 0 else None
# tensor型調整
feats = audio_pad
if feats.dim() == 2: # double channels
feats = feats.mean(-1)
assert feats.dim() == 1, feats.dim()
feats = feats.view(1, -1)
t.record("pre-process")
# ピッチ検出
pitch, pitchf = self.extractPitch(audio_pad, if_f0, pitchf, f0_up_key, silence_front)
t.record("extract-pitch")
# embedding
feats = self.extractFeatures(feats)
t.record("extract-feats")
feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1)
# if protect < 0.5 and search_index:
# feats0 = feats.clone()
# ピッチサイズ調整
p_len = audio_pad.shape[0] // self.window
if feats.shape[1] < p_len:
p_len = feats.shape[1]
if pitch is not None and pitchf is not None:
pitch = pitch[:, :p_len]
pitchf = pitchf[:, :p_len]
feats_len = feats.shape[1]
if pitch is not None and pitchf is not None:
pitch = pitch[:, -feats_len:]
pitchf = pitchf[:, -feats_len:]
p_len = torch.tensor([feats_len], device=self.device).long()
# apply silent front for inference
if type(self.inferencer) in [OnnxRVCInferencer, OnnxRVCInferencerNono]:
npyOffset = math.floor(silence_front * 16000) // 360
feats = feats[:, npyOffset * 2 :, :] # NOQA
feats_len = feats.shape[1]
if pitch is not None and pitchf is not None:
pitch = pitch[:, -feats_len:]
pitchf = pitchf[:, -feats_len:]
p_len = torch.tensor([feats_len], device=self.device).long()
t.record("mid-precess")
# 推論実行
audio1 = self.infer(feats, p_len, pitch, pitchf, sid, out_size)
t.record("infer")
feats_buffer = feats.squeeze(0).detach().cpu()
if pitchf is not None:
pitchf_buffer = pitchf.squeeze(0).detach().cpu()
else:
pitchf_buffer = None
del p_len, pitch, pitchf, feats
# torch.cuda.empty_cache()
# inferで出力されるサンプリングレートはモデルのサンプリングレートになる。
# pipelineに入力されるときはhubertように16k
if self.t_pad_tgt != 0:
offset = self.t_pad_tgt
end = -1 * self.t_pad_tgt
audio1 = audio1[offset:end]
del sid
t.record("post-process")
# torch.cuda.empty_cache()
# print("EXEC AVERAGE:", t.avrSecs)
return audio1, pitchf_buffer, feats_buffer
def __del__(self):
del self.embedder
del self.inferencer
del self.pitchExtractor
print("Pipeline has been deleted")

View File

@ -0,0 +1,52 @@
import os
import traceback
from Exceptions import PipelineCreateException
from const import EnumInferenceTypes, PitchExtractorType
from data.ModelSlot import EasyVCModelSlot
from voice_changer.EasyVC.pipeline.Pipeline import Pipeline
from voice_changer.RVC.deviceManager.DeviceManager import DeviceManager
from voice_changer.RVC.embedder.EmbedderManager import EmbedderManager
from voice_changer.RVC.inferencer.InferencerManager import InferencerManager
from voice_changer.RVC.pitchExtractor.PitchExtractorManager import PitchExtractorManager
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
def createPipeline(params: VoiceChangerParams, modelSlot: EasyVCModelSlot, gpu: int, f0Detector: PitchExtractorType):
dev = DeviceManager.get_instance().getDevice(gpu)
half = DeviceManager.get_instance().halfPrecisionAvailable(gpu)
# Inferencer 生成
try:
modelPath = os.path.join(params.model_dir, str(modelSlot.slotIndex), os.path.basename(modelSlot.modelFile))
inferencer = InferencerManager.getInferencer(EnumInferenceTypes.easyVC, modelPath, gpu, modelSlot.version)
except Exception as e:
print("[Voice Changer] exception! loading inferencer", e)
traceback.print_exc()
raise PipelineCreateException("[Voice Changer] exception! loading inferencer")
# Embedder 生成
try:
embedder = EmbedderManager.getEmbedder(
"whisper",
half,
dev,
)
except Exception as e:
print("[Voice Changer] exception! loading embedder", e, dev)
traceback.print_exc()
raise PipelineCreateException("[Voice Changer] exception! loading embedder")
# pitchExtractor
pitchExtractor = PitchExtractorManager.getPitchExtractor(f0Detector, gpu)
pipeline = Pipeline(
embedder,
inferencer,
pitchExtractor,
modelSlot.samplingRate,
dev,
half,
)
return pipeline

View File

@ -1,6 +1,6 @@
import os
from data.ModelSlot import BeatriceModelSlot, LLVCModelSlot
from data.ModelSlot import LLVCModelSlot
from voice_changer.utils.LoadModelParams import LoadModelParams
from voice_changer.utils.ModelSlotGenerator import ModelSlotGenerator

View File

@ -1,6 +1,7 @@
"""
VoiceChangerV2向け
"""
from dataclasses import asdict
import numpy as np
import torch
@ -59,13 +60,9 @@ class RVCr2(VoiceChangerModel):
# pipelineの生成
try:
self.pipeline = createPipeline(
self.params, self.slotInfo, self.settings.gpu, self.settings.f0Detector
)
self.pipeline = createPipeline(self.params, self.slotInfo, self.settings.gpu, self.settings.f0Detector)
except PipelineCreateException as e: # NOQA
logger.error(
"[Voice Changer] pipeline create failed. check your model is valid."
)
logger.error("[Voice Changer] pipeline create failed. check your model is valid.")
return
# その他の設定
@ -91,9 +88,7 @@ class RVCr2(VoiceChangerModel):
elif key in self.settings.strData:
setattr(self.settings, key, str(val))
if key == "f0Detector" and self.pipeline is not None:
pitchExtractor = PitchExtractorManager.getPitchExtractor(
self.settings.f0Detector, self.settings.gpu
)
pitchExtractor = PitchExtractorManager.getPitchExtractor(self.settings.f0Detector, self.settings.gpu)
self.pipeline.setPitchExtractor(pitchExtractor)
else:
return False
@ -127,9 +122,7 @@ class RVCr2(VoiceChangerModel):
# 過去のデータに連結
self.audio_buffer = np.concatenate([self.audio_buffer, newData], 0)
if self.slotInfo.f0:
self.pitchf_buffer = np.concatenate(
[self.pitchf_buffer, np.zeros(newFeatureLength)], 0
)
self.pitchf_buffer = np.concatenate([self.pitchf_buffer, np.zeros(newFeatureLength)], 0)
self.feature_buffer = np.concatenate(
[
self.feature_buffer,
@ -141,27 +134,19 @@ class RVCr2(VoiceChangerModel):
self.audio_buffer = newData
if self.slotInfo.f0:
self.pitchf_buffer = np.zeros(newFeatureLength)
self.feature_buffer = np.zeros(
[newFeatureLength, self.slotInfo.embChannels]
)
self.feature_buffer = np.zeros([newFeatureLength, self.slotInfo.embChannels])
convertSize = inputSize + crossfadeSize + solaSearchFrame + extra_frame
if convertSize % 160 != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。
convertSize = convertSize + (160 - (convertSize % 160))
outSize = int(
((convertSize - extra_frame) / 16000) * self.slotInfo.samplingRate
)
outSize = int(((convertSize - extra_frame) / 16000) * self.slotInfo.samplingRate)
# バッファがたまっていない場合はzeroで補う
if self.audio_buffer.shape[0] < convertSize:
self.audio_buffer = np.concatenate(
[np.zeros([convertSize]), self.audio_buffer]
)
self.audio_buffer = np.concatenate([np.zeros([convertSize]), self.audio_buffer])
if self.slotInfo.f0:
self.pitchf_buffer = np.concatenate(
[np.zeros([convertSize // 160]), self.pitchf_buffer]
)
self.pitchf_buffer = np.concatenate([np.zeros([convertSize // 160]), self.pitchf_buffer])
self.feature_buffer = np.concatenate(
[
np.zeros([convertSize // 160, self.slotInfo.embChannels]),
@ -194,9 +179,7 @@ class RVCr2(VoiceChangerModel):
outSize,
)
def inference(
self, receivedData: AudioInOut, crossfade_frame: int, sola_search_frame: int
):
def inference(self, receivedData: AudioInOut, crossfade_frame: int, sola_search_frame: int):
if self.pipeline is None:
logger.info("[Voice Changer] Pipeline is not initialized.")
raise PipelineNotInitializedException()
@ -208,18 +191,15 @@ class RVCr2(VoiceChangerModel):
receivedData,
self.inputSampleRate,
16000,
filter="kaiser_fast",
),
)
crossfade_frame = int((crossfade_frame / self.inputSampleRate) * 16000)
sola_search_frame = int((sola_search_frame / self.inputSampleRate) * 16000)
extra_frame = int(
(self.settings.extraConvertSize / self.inputSampleRate) * 16000
)
extra_frame = int((self.settings.extraConvertSize / self.inputSampleRate) * 16000)
# 入力データ生成
data = self.generate_input(
receivedData, crossfade_frame, sola_search_frame, extra_frame
)
data = self.generate_input(receivedData, crossfade_frame, sola_search_frame, extra_frame)
audio = data[0]
pitchf = data[1]
@ -254,9 +234,7 @@ class RVCr2(VoiceChangerModel):
index_rate,
if_f0,
# 0,
self.settings.extraConvertSize / self.inputSampleRate
if self.settings.silenceFront
else 0.0, # extaraDataSizeの秒数。入力のサンプリングレートで算出
self.settings.extraConvertSize / self.inputSampleRate if self.settings.silenceFront else 0.0, # extaraDataSizeの秒数。入力のサンプリングレートで算出
embOutputLayer,
useFinalProj,
repeat,
@ -272,14 +250,13 @@ class RVCr2(VoiceChangerModel):
result,
self.slotInfo.samplingRate,
self.outputSampleRate,
filter="kaiser_fast",
),
)
return result
except DeviceCannotSupportHalfPrecisionException as e: # NOQA
logger.warn(
"[Device Manager] Device cannot support half precision. Fallback to float...."
)
logger.warn("[Device Manager] Device cannot support half precision. Fallback to float....")
self.deviceManager.setForceTensor(True)
self.initialize()
# raise e

View File

@ -15,14 +15,6 @@ class Embedder(EmbedderProtocol):
self.model: Any | None = None
def loadModel(self, file: str, dev: device, isHalf: bool = True):
...
def extractFeatures(
self, feats: torch.Tensor, embOutputLayer=9, useFinalProj=True
) -> torch.Tensor:
...
def getEmbedderInfo(self):
return {
"embedderType": self.embedderType,

View File

@ -6,6 +6,7 @@ from voice_changer.RVC.embedder.FairseqContentvec import FairseqContentvec
from voice_changer.RVC.embedder.FairseqHubert import FairseqHubert
from voice_changer.RVC.embedder.FairseqHubertJp import FairseqHubertJp
from voice_changer.RVC.embedder.OnnxContentvec import OnnxContentvec
from voice_changer.RVC.embedder.Whisper import Whisper
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
@ -18,9 +19,7 @@ class EmbedderManager:
cls.params = params
@classmethod
def getEmbedder(
cls, embederType: EmbedderType, isHalf: bool, dev: device
) -> Embedder:
def getEmbedder(cls, embederType: EmbedderType, isHalf: bool, dev: device) -> Embedder:
if cls.currentEmbedder is None:
print("[Voice Changer] generate new embedder. (no embedder)")
cls.currentEmbedder = cls.loadEmbedder(embederType, isHalf, dev)
@ -36,9 +35,7 @@ class EmbedderManager:
return cls.currentEmbedder
@classmethod
def loadEmbedder(
cls, embederType: EmbedderType, isHalf: bool, dev: device
) -> Embedder:
def loadEmbedder(cls, embederType: EmbedderType, isHalf: bool, dev: device) -> Embedder:
if embederType == "hubert_base":
try:
if cls.params.content_vec_500_onnx_on is False:
@ -62,5 +59,8 @@ class EmbedderManager:
print(e)
file = cls.params.hubert_base
return FairseqContentvec().loadModel(file, dev, isHalf)
elif embederType == "whisper":
file = cls.params.whisper_tiny
return Whisper().loadModel(file, dev, isHalf)
else:
return FairseqHubert().loadModel(file, dev, isHalf)

View File

@ -0,0 +1,53 @@
import torch
from torch import device
from voice_changer.RVC.embedder.Embedder import Embedder
from voice_changer.RVC.embedder.whisper.audio import log_mel_spectrogram
from .whisper.whisper import load_model
import numpy as np
import torch.nn.functional as F
class Whisper(Embedder):
def loadModel(self, file: str, dev: device, isHalf: bool = True) -> Embedder:
super().setProps("whisper", file, dev, isHalf)
whisper = load_model(file).to(dev)
self.model = whisper
return self
def extractFeatures(self, audio: torch.Tensor) -> torch.Tensor:
try:
if isinstance(audio, np.ndarray):
audio = torch.from_numpy(audio.astype(np.float32))
audio = audio.to(self.dev)
# if self.isHalf and audio.dtype != torch.float16:
# audio = audio.half()
if self.isHalf is False and audio.dtype != torch.float32:
audio = audio.float()
if audio.dim() != 1:
audio = audio.squeeze(0)
if audio.dim() != 1:
raise RuntimeError(f"Exeption in {self.__class__.__name__} audio.dim is not 1 (size :{audio.dim()}, {audio.shape})")
audln = audio.shape[0]
ppgln = audln // 320
mel = log_mel_spectrogram(audio).to(self.model.device)
# print(f"[whisper_ppg] audio:{audio.shape}({audio.shape[0]/16000}ms) -> ppg:{ppgln}")
# print(f"[whisper_ppg] mel:{mel.shape}({mel.dtype})")
with torch.no_grad():
ppg = self.model.encoder(mel.unsqueeze(0))
padding = (0, 384)
ppg_padded = F.pad(ppg, padding, "constant", 0)
ppg_padded = ppg_padded.data
# print(f"[whisper_ppg] ppg:{ppg.shape}")
except Exception as e:
print(e)
raise RuntimeError(f"Exeption in {self.__class__.__name__}", e)
# raise EmbedderProcessException(f"Exeption in {self.__class__.__name__}", e)
return ppg_padded

View File

@ -0,0 +1,120 @@
import os
from functools import lru_cache
from typing import Union
import numpy as np
import torch
import torch.nn.functional as F
from voice_changer.RVC.embedder.whisper.utils import exact_div
# hard-coded audio hyperparameters
SAMPLE_RATE = 16000
N_FFT = 400
N_MELS = 80
HOP_LENGTH = 160
CHUNK_LENGTH = 30
N_SAMPLES = CHUNK_LENGTH * SAMPLE_RATE # 480000: number of samples in a chunk
N_FRAMES = exact_div(N_SAMPLES, HOP_LENGTH) # 3000: number of frames in a mel spectrogram input
# def load_audio(file: str, sr: int = SAMPLE_RATE):
# """
# Open an audio file and read as mono waveform, resampling as necessary
# Parameters
# ----------
# file: str
# The audio file to open
# sr: int
# The sample rate to resample the audio if necessary
# Returns
# -------
# A NumPy array containing the audio waveform, in float32 dtype.
# """
# try:
# # This launches a subprocess to decode audio while down-mixing and resampling as necessary.
# # Requires the ffmpeg CLI and `ffmpeg-python` package to be installed.
# out, _ = ffmpeg.input(file, threads=0).output("-", format="s16le", acodec="pcm_s16le", ac=1, ar=sr).run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
# except ffmpeg.Error as e:
# raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e
# return np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0
def pad_or_trim(array, length: int = N_SAMPLES, *, axis: int = -1):
"""
Pad or trim the audio array to N_SAMPLES, as expected by the encoder.
"""
if torch.is_tensor(array):
if array.shape[axis] > length:
array = array.index_select(dim=axis, index=torch.arange(length, device=array.device))
if array.shape[axis] < length:
pad_widths = [(0, 0)] * array.ndim
pad_widths[axis] = (0, length - array.shape[axis])
array = F.pad(array, [pad for sizes in pad_widths[::-1] for pad in sizes])
else:
if array.shape[axis] > length:
array = array.take(indices=range(length), axis=axis)
if array.shape[axis] < length:
pad_widths = [(0, 0)] * array.ndim
pad_widths[axis] = (0, length - array.shape[axis])
array = np.pad(array, pad_widths)
return array
@lru_cache(maxsize=None)
def mel_filters(device, n_mels: int = N_MELS) -> torch.Tensor:
"""
load the mel filterbank matrix for projecting STFT into a Mel spectrogram.
Allows decoupling librosa dependency; saved using:
np.savez_compressed(
"mel_filters.npz",
mel_80=librosa.filters.mel(sr=16000, n_fft=400, n_mels=80),
)
"""
assert n_mels == 80, f"Unsupported n_mels: {n_mels}"
with np.load(os.path.join(os.path.dirname(__file__), "assets", "mel_filters.npz")) as f:
return torch.from_numpy(f[f"mel_{n_mels}"]).to(device)
def log_mel_spectrogram(audio: Union[str, np.ndarray, torch.Tensor], n_mels: int = N_MELS):
"""
Compute the log-Mel spectrogram of
Parameters
----------
audio: Union[str, np.ndarray, torch.Tensor], shape = (*)
The path to audio or either a NumPy array or Tensor containing the audio waveform in 16 kHz
n_mels: int
The number of Mel-frequency filters, only 80 is supported
Returns
-------
torch.Tensor, shape = (80, n_frames)
A Tensor that contains the Mel spectrogram
"""
if not torch.is_tensor(audio):
if isinstance(audio, str):
audio = load_audio(audio)
audio = torch.from_numpy(audio)
window = torch.hann_window(N_FFT).to(audio.device) # type: ignore
stft = torch.stft(audio, N_FFT, HOP_LENGTH, window=window, return_complex=True) # type: ignore
magnitudes = stft[..., :-1].abs() ** 2
filters = mel_filters(audio.device, n_mels) # type: ignore
mel_spec = filters @ magnitudes
log_spec = torch.clamp(mel_spec, min=1e-10).log10()
log_spec = torch.maximum(log_spec, log_spec.max() - 8.0)
log_spec = (log_spec + 4.0) / 4.0
return log_spec

View File

@ -0,0 +1,222 @@
from dataclasses import dataclass
from typing import Dict
from typing import Iterable, Optional
import numpy as np
import torch
import torch.nn.functional as F
from torch import Tensor
from torch import nn
# from .decoding import detect_language as detect_language_function, decode as decode_function
@dataclass
class ModelDimensions:
n_mels: int
n_audio_ctx: int
n_audio_state: int
n_audio_head: int
n_audio_layer: int
n_vocab: int
n_text_ctx: int
n_text_state: int
n_text_head: int
n_text_layer: int
class LayerNorm(nn.LayerNorm):
def forward(self, x: Tensor) -> Tensor:
return super().forward(x.float()).type(x.dtype)
class Linear(nn.Linear):
def forward(self, x: Tensor) -> Tensor:
return F.linear(x, self.weight.to(x.dtype), None if self.bias is None else self.bias.to(x.dtype))
class Conv1d(nn.Conv1d):
def _conv_forward(self, x: Tensor, weight: Tensor, bias: Optional[Tensor]) -> Tensor:
return super()._conv_forward(x, weight.to(x.dtype), None if bias is None else bias.to(x.dtype))
def sinusoids(length, channels, max_timescale=10000):
"""Returns sinusoids for positional embedding"""
assert channels % 2 == 0
log_timescale_increment = np.log(max_timescale) / (channels // 2 - 1)
inv_timescales = torch.exp(-log_timescale_increment * torch.arange(channels // 2))
scaled_time = torch.arange(length)[:, np.newaxis] * inv_timescales[np.newaxis, :]
return torch.cat([torch.sin(scaled_time), torch.cos(scaled_time)], dim=1)
class MultiHeadAttention(nn.Module):
def __init__(self, n_state: int, n_head: int):
super().__init__()
self.n_head = n_head
self.query = Linear(n_state, n_state)
self.key = Linear(n_state, n_state, bias=False)
self.value = Linear(n_state, n_state)
self.out = Linear(n_state, n_state)
def forward(
self,
x: Tensor,
xa: Optional[Tensor] = None,
mask: Optional[Tensor] = None,
kv_cache: Optional[dict] = None,
):
q = self.query(x)
if kv_cache is None or xa is None or self.key not in kv_cache:
# hooks, if installed (i.e. kv_cache is not None), will prepend the cached kv tensors;
# otherwise, perform key/value projections for self- or cross-attention as usual.
k = self.key(x if xa is None else xa)
v = self.value(x if xa is None else xa)
else:
# for cross-attention, calculate keys and values once and reuse in subsequent calls.
k = kv_cache[self.key]
v = kv_cache[self.value]
wv, qk = self.qkv_attention(q, k, v, mask)
return self.out(wv), qk
def qkv_attention(self, q: Tensor, k: Tensor, v: Tensor, mask: Optional[Tensor] = None):
n_batch, n_ctx, n_state = q.shape
scale = (n_state // self.n_head) ** -0.25
q = q.view(*q.shape[:2], self.n_head, -1).permute(0, 2, 1, 3) * scale
k = k.view(*k.shape[:2], self.n_head, -1).permute(0, 2, 3, 1) * scale
v = v.view(*v.shape[:2], self.n_head, -1).permute(0, 2, 1, 3)
qk = q @ k
if mask is not None:
qk = qk + mask[:n_ctx, :n_ctx]
qk = qk.float()
w = F.softmax(qk, dim=-1).to(q.dtype)
return (w @ v).permute(0, 2, 1, 3).flatten(start_dim=2), qk.detach()
class ResidualAttentionBlock(nn.Module):
def __init__(self, n_state: int, n_head: int, cross_attention: bool = False):
super().__init__()
self.attn = MultiHeadAttention(n_state, n_head)
self.attn_ln = LayerNorm(n_state)
self.cross_attn = MultiHeadAttention(n_state, n_head) if cross_attention else None
self.cross_attn_ln = LayerNorm(n_state) if cross_attention else None
n_mlp = n_state * 4
self.mlp = nn.Sequential(Linear(n_state, n_mlp), nn.GELU(), Linear(n_mlp, n_state))
self.mlp_ln = LayerNorm(n_state)
def forward(
self,
x: Tensor,
xa: Optional[Tensor] = None,
mask: Optional[Tensor] = None,
kv_cache: Optional[dict] = None,
):
x = x + self.attn(self.attn_ln(x), mask=mask, kv_cache=kv_cache)[0]
if self.cross_attn and self.cross_attn_ln:
x = x + self.cross_attn(self.cross_attn_ln(x), xa, kv_cache=kv_cache)[0]
x = x + self.mlp(self.mlp_ln(x))
return x
class AudioEncoder(nn.Module):
def __init__(self, n_mels: int, n_ctx: int, n_state: int, n_head: int, n_layer: int):
super().__init__()
self.conv1 = Conv1d(n_mels, n_state, kernel_size=3, padding=1)
self.conv2 = Conv1d(n_state, n_state, kernel_size=3, stride=2, padding=1)
self.register_buffer("positional_embedding", sinusoids(n_ctx, n_state))
self.blocks: Iterable[ResidualAttentionBlock] = nn.ModuleList([ResidualAttentionBlock(n_state, n_head) for _ in range(n_layer)])
self.ln_post = LayerNorm(n_state)
def forward(self, x: Tensor):
"""
x : torch.Tensor, shape = (batch_size, n_mels, n_ctx)
the mel spectrogram of the audio
"""
x = F.gelu(self.conv1(x))
x = F.gelu(self.conv2(x))
x = x.permute(0, 2, 1)
x = (x[:, :, :] + self.positional_embedding[: x.shape[1], :]).to(x.dtype)
for j, block in enumerate(self.blocks):
x = block(x)
x = self.ln_post(x)
return x
class TextDecoder(nn.Module):
def __init__(self, n_vocab: int, n_ctx: int, n_state: int, n_head: int, n_layer: int):
super().__init__()
self.token_embedding = nn.Embedding(n_vocab, n_state)
self.positional_embedding = nn.Parameter(torch.empty(n_ctx, n_state))
self.blocks: Iterable[ResidualAttentionBlock] = nn.ModuleList([ResidualAttentionBlock(n_state, n_head, cross_attention=True) for _ in range(n_layer)])
self.ln = LayerNorm(n_state)
mask = torch.empty(n_ctx, n_ctx).fill_(-np.inf).triu_(1)
self.register_buffer("mask", mask, persistent=False)
def forward(self, x: Tensor, xa: Tensor, kv_cache: Optional[dict] = None):
"""
x : torch.LongTensor, shape = (batch_size, <= n_ctx)
the text tokens
xa : torch.Tensor, shape = (batch_size, n_mels, n_audio_ctx)
the encoded audio features to be attended on
"""
offset = next(iter(kv_cache.values())).shape[1] if kv_cache else 0
x = self.token_embedding(x) + self.positional_embedding[offset : offset + x.shape[-1]]
x = x.to(xa.dtype)
for block in self.blocks:
x = block(x, xa, mask=self.mask, kv_cache=kv_cache)
x = self.ln(x)
logits = (x @ torch.transpose(self.token_embedding.weight.to(x.dtype), 0, 1)).float()
return logits
class Whisper(nn.Module):
def __init__(self, dims: ModelDimensions):
super().__init__()
self.dims = dims
self.encoder = AudioEncoder(
self.dims.n_mels,
self.dims.n_audio_ctx,
self.dims.n_audio_state,
self.dims.n_audio_head,
self.dims.n_audio_layer,
)
self.decoder = TextDecoder(
self.dims.n_vocab,
self.dims.n_text_ctx,
self.dims.n_text_state,
self.dims.n_text_head,
self.dims.n_text_layer,
)
def embed_audio(self, mel: torch.Tensor):
return self.encoder(mel)
def logits(self, tokens: torch.Tensor, audio_features: torch.Tensor):
return self.decoder(tokens, audio_features)
def forward(self, mel: torch.Tensor, tokens: torch.Tensor) -> Dict[str, torch.Tensor]:
return self.decoder(tokens, self.encoder(mel))
@property
def device(self):
return next(self.parameters()).device
@property
def is_multilingual(self):
return self.dims.n_vocab == 51865

View File

@ -0,0 +1,22 @@
import sys
system_encoding = sys.getdefaultencoding()
if system_encoding != "utf-8":
def make_safe(string):
# replaces any character not representable using the system default encoding with an '?',
# avoiding UnicodeEncodeError (https://github.com/openai/whisper/discussions/729).
return string.encode(system_encoding, errors="replace").decode(system_encoding)
else:
def make_safe(string):
# utf-8 can encode any Unicode code point, so no need to do the round-trip encoding
return string
def exact_div(x, y):
assert x % y == 0
return x // y

View File

@ -0,0 +1,208 @@
# from whisper_ppg.model import Whisper, ModelDimensions
# from whisper_ppg_custom._LightWhisper import LightWhisper
# from whisper_ppg_custom.Timer import Timer2
# from whisper_ppg_custom.whisper_ppg.audio import load_audio, pad_or_trim, log_mel_spectrogram
# from whisper_ppg_custom.whisper_ppg.model import Whisper, ModelDimensions
import torch
# import numpy as np
# from easy_vc_dev.utils.whisper.audio import load_audio, pad_or_trim
from .model import ModelDimensions, Whisper
# import onnx
# from onnxsim import simplify
# import json
# import onnxruntime
def load_model(path) -> Whisper:
device = "cpu"
checkpoint = torch.load(path, map_location=device)
dims = ModelDimensions(**checkpoint["dims"])
model = Whisper(dims)
model.load_state_dict(checkpoint["model_state_dict"])
model = model.to(device)
return model
# def pred_ppg(whisper: Whisper, wavPath: str, ppgPath: str):
# print("pred")
# # whisper = load_model("base.pt") # "base": "https://openaipublic.azureedge.net/main/whisper/models/ed3a0b6b1c0edf879ad9b11b1af5a0e6ab5db9205f891f668f8b0e6c6326e34e/base.pt"
# audio = load_audio(wavPath)
# audln = audio.shape[0]
# ppgln = audln // 320
# print("audio.shape1", audio.shape, audio.shape[0] / 16000)
# audio = pad_or_trim(audio)
# audio = audio[:400000]
# print("audio.shape2", audio.shape)
# print(f"whisper.device {whisper.device}")
# for i in range(5):
# with Timer2("mainPorcess timer", True) as t:
# mel = log_mel_spectrogram(audio).to(whisper.device)
# with torch.no_grad():
# ppg = whisper.encoder(mel.unsqueeze(0)).squeeze().data.cpu().float().numpy()
# print("ppg.shape", ppg.shape)
# ppg = ppg[:ppgln,]
# print(ppg.shape)
# np.save(ppgPath, ppg, allow_pickle=False)
# t.record("fin")
# print("res", ppg)
# def pred_ppg_onnx(wavPath, ppgPath):
# print("pred")
# # whisper = load_model("base.pt") # "base": "https://openaipublic.azureedge.net/main/whisper/models/ed3a0b6b1c0edf879ad9b11b1af5a0e6ab5db9205f891f668f8b0e6c6326e34e/base.pt"
# whisper = load_model("tiny.pt")
# audio = load_audio(wavPath)
# # audln = audio.shape[0]
# # ppgln = audln // 320
# print("audio.shape1", audio.shape, audio.shape[0] / 16000)
# audio = pad_or_trim(audio)
# audio = audio[:1000]
# print("audio.shape2", audio.shape)
# print(f"whisper.device {whisper.device}")
# onnx_session = onnxruntime.InferenceSession(
# "wencoder_sim.onnx",
# providers=["CPUExecutionProvider"],
# provider_options=[
# {
# "intra_op_num_threads": 8,
# "execution_mode": onnxruntime.ExecutionMode.ORT_PARALLEL,
# "inter_op_num_threads": 8,
# }
# ],
# )
# for i in range(5):
# with Timer2("mainPorcess timer", True) as t:
# mel = log_mel_spectrogram(audio).to(whisper.device).unsqueeze(0)
# onnx_res = onnx_session.run(
# ["ppg"],
# {
# "mel": mel.cpu().numpy(),
# },
# )
# t.record("fin")
# print("onnx_res", onnx_res)
# def export_encoder(wavPath, ppgPath):
# print("pred")
# # whisper = load_model("base.pt") # "base": "https://openaipublic.azureedge.net/main/whisper/models/ed3a0b6b1c0edf879ad9b11b1af5a0e6ab5db9205f891f668f8b0e6c6326e34e/base.pt"
# whisper = load_model("tiny.pt")
# audio = load_audio(wavPath)
# # audln = audio.shape[0]
# # ppgln = audln // 320
# print("audio.shape1", audio.shape, audio.shape[0] / 16000)
# audio = pad_or_trim(audio)
# print("audio.shape2", audio.shape)
# print(f"whisper.device {whisper.device}")
# mel = log_mel_spectrogram(audio).to(whisper.device).unsqueeze(0)
# input_names = ["mel"]
# output_names = ["ppg"]
# torch.onnx.export(
# whisper.encoder,
# (mel,),
# "wencoder.onnx",
# dynamic_axes={
# "mel": [2],
# },
# do_constant_folding=False,
# opset_version=17,
# verbose=False,
# input_names=input_names,
# output_names=output_names,
# )
# metadata = {
# "application": "VC_CLIENT",
# "version": "2.1",
# }
# model_onnx2 = onnx.load("wencoder.onnx")
# model_simp, check = simplify(model_onnx2)
# meta = model_simp.metadata_props.add()
# meta.key = "metadata"
# meta.value = json.dumps(metadata)
# onnx.save(model_simp, "wencoder_sim.onnx")
# def pred_ppg_onnx_w(wavPath, ppgPath):
# print("pred")
# audio = load_audio(wavPath)
# print("audio.shape1", audio.shape, audio.shape[0] / 16000)
# audio = pad_or_trim(audio)
# print("audio.shape2", audio.shape)
# onnx_session = onnxruntime.InferenceSession(
# "wencoder_sim.onnx",
# providers=["CPUExecutionProvider"],
# provider_options=[
# {
# "intra_op_num_threads": 8,
# "execution_mode": onnxruntime.ExecutionMode.ORT_PARALLEL,
# "inter_op_num_threads": 8,
# }
# ],
# )
# for i in range(5):
# with Timer2("mainPorcess timer", True) as t:
# mel = log_mel_spectrogram(audio).to("cpu").unsqueeze(0)
# # mel = mel[:, :, 1500:]
# mel = mel[:, :, 2500:]
# # mel[0, 79, 1499] = 0.1
# print("x.shape", mel.shape)
# onnx_res = onnx_session.run(
# ["ppg"],
# {
# "mel": mel.cpu().numpy(),
# },
# )
# t.record("fin")
# print("onnx_res", onnx_res)
# def export_wrapped_encoder(wavPath, ppgPath):
# print("pred")
# whisper = LightWhisper("tiny.pt")
# audio = load_audio(wavPath)
# # audln = audio.shape[0]
# # ppgln = audln // 320
# print("audio.shape1", audio.shape, audio.shape[0] / 16000)
# audio = pad_or_trim(audio)
# print("audio.shape2", audio.shape)
# mel = log_mel_spectrogram(audio).to("cpu").unsqueeze(0)
# mel = mel[:, :, 1500:]
# input_names = ["mel"]
# output_names = ["ppg"]
# torch.onnx.export(
# whisper,
# (mel,),
# "wencoder.onnx",
# dynamic_axes={
# "mel": [2],
# },
# do_constant_folding=True,
# opset_version=17,
# verbose=False,
# input_names=input_names,
# output_names=output_names,
# )
# metadata = {
# "application": "VC_CLIENT",
# "version": "2.1",
# }
# model_onnx2 = onnx.load("wencoder.onnx")
# model_simp, check = simplify(model_onnx2)
# meta = model_simp.metadata_props.add()
# meta.key = "metadata"
# meta.value = json.dumps(metadata)
# onnx.save(model_simp, "wencoder_sim.onnx")

View File

@ -0,0 +1,46 @@
import torch
import numpy as np
from const import EnumInferenceTypes
from voice_changer.RVC.inferencer.OnnxRVCInferencer import OnnxRVCInferencer
class EasyVCInferencerONNX(OnnxRVCInferencer):
def loadModel(self, file: str, gpu: int, inferencerTypeVersion: str | None = None):
super().loadModel(file, gpu, inferencerTypeVersion)
self.setProps(EnumInferenceTypes.easyVC, file, self.isHalf, gpu)
return self
def infer(
self,
feats: torch.Tensor,
pitch_length: torch.Tensor,
pitch: torch.Tensor | None,
pitchf: torch.Tensor | None,
sid: torch.Tensor,
convert_length: int | None,
) -> torch.Tensor:
if self.isHalf:
audio1 = self.model.run(
["audio"],
{
"feats": feats.cpu().numpy().astype(np.float16),
"p_len": pitch_length.cpu().numpy().astype(np.int64),
},
)
else:
audio1 = self.model.run(
["audio"],
{
"feats": feats.cpu().numpy().astype(np.float32),
"p_len": pitch_length.cpu().numpy().astype(np.int64),
},
)
res = audio1[0][0][0]
# if self.inferencerTypeVersion == "v2.1" or self.inferencerTypeVersion == "v1.1":
# res = audio1[0]
# else:
# res = np.array(audio1)[0][0, 0]
# res = np.clip(res, -1.0, 1.0)
return torch.tensor(res)

View File

@ -1,4 +1,5 @@
from const import EnumInferenceTypes
from voice_changer.RVC.inferencer.EasyVCInferencerONNX import EasyVCInferencerONNX
from voice_changer.RVC.inferencer.Inferencer import Inferencer
from voice_changer.RVC.inferencer.OnnxRVCInferencer import OnnxRVCInferencer
from voice_changer.RVC.inferencer.OnnxRVCInferencerNono import OnnxRVCInferencerNono
@ -42,6 +43,7 @@ class InferencerManager:
elif inferencerType == EnumInferenceTypes.pyTorchVoRASbeta or inferencerType == EnumInferenceTypes.pyTorchVoRASbeta.value:
if sys.platform.startswith("darwin") is False:
from voice_changer.RVC.inferencer.VorasInferencebeta import VoRASInferencer
return VoRASInferencer().loadModel(file, gpu)
else:
raise RuntimeError("[Voice Changer] VoRAS is not supported on macOS")
@ -55,5 +57,8 @@ class InferencerManager:
return OnnxRVCInferencer().loadModel(file, gpu, inferencerTypeVersion)
elif inferencerType == EnumInferenceTypes.onnxRVCNono or inferencerType == EnumInferenceTypes.onnxRVCNono.value:
return OnnxRVCInferencerNono().loadModel(file, gpu, inferencerTypeVersion)
elif inferencerType == EnumInferenceTypes.easyVC or inferencerType == EnumInferenceTypes.easyVC.value:
return EasyVCInferencerONNX().loadModel(file, gpu)
else:
raise RuntimeError("[Voice Changer] Inferencer not found", inferencerType)

View File

@ -0,0 +1,44 @@
import numpy as np
from const import PitchExtractorType
from voice_changer.RVC.deviceManager.DeviceManager import DeviceManager
from voice_changer.RVC.pitchExtractor.PitchExtractor import PitchExtractor
import torchfcpe
class FcpePitchExtractor(PitchExtractor):
def __init__(self, gpu: int):
super().__init__()
self.pitchExtractorType: PitchExtractorType = "fcpe"
self.device = DeviceManager.get_instance().getDevice(gpu)
self.fcpe = torchfcpe.spawn_bundled_infer_model(self.device)
# I merge the code of Voice-Changer-CrepePitchExtractor and RVC-fcpe-infer, sry I don't know how to optimize the function.
def extract(self, audio, pitchf, f0_up_key, sr, window, silence_front=0):
start_frame = int(silence_front * sr / window)
real_silence_front = start_frame * window / sr
silence_front_offset = int(np.round(real_silence_front * sr))
audio = audio[silence_front_offset:]
f0_min = 50
f0_max = 1100
f0_mel_min = 1127 * np.log(1 + f0_min / 700)
f0_mel_max = 1127 * np.log(1 + f0_max / 700)
f0 = self.fcpe.infer(
audio.to(self.device).unsqueeze(0).float(),
sr=16000,
decoder_mode="local_argmax",
threshold=0.006,
)
f0 = f0.squeeze()
f0 *= pow(2, f0_up_key / 12)
pitchf[-f0.shape[0]:] = f0.detach().cpu().numpy()[:pitchf.shape[0]]
f0bak = pitchf.copy()
f0_mel = 1127.0 * np.log(1.0 + f0bak / 700.0)
f0_mel = np.clip(
(f0_mel - f0_mel_min) * 254.0 / (f0_mel_max - f0_mel_min) + 1.0, 1.0, 255.0
)
pitch_coarse = f0_mel.astype(int)
return pitch_coarse, pitchf

View File

@ -8,6 +8,7 @@ from voice_changer.RVC.pitchExtractor.PitchExtractor import PitchExtractor
from voice_changer.RVC.pitchExtractor.RMVPEOnnxPitchExtractor import RMVPEOnnxPitchExtractor
from voice_changer.RVC.pitchExtractor.RMVPEPitchExtractor import RMVPEPitchExtractor
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
from voice_changer.RVC.pitchExtractor.FcpePitchExtractor import FcpePitchExtractor
class PitchExtractorManager(Protocol):
@ -43,6 +44,9 @@ class PitchExtractorManager(Protocol):
return RMVPEPitchExtractor(cls.params.rmvpe, gpu)
elif pitchExtractorType == "rmvpe_onnx":
return RMVPEOnnxPitchExtractor(cls.params.rmvpe_onnx, gpu)
elif pitchExtractorType == "fcpe":
# add the FcpePitchExtractor
return FcpePitchExtractor(gpu)
else:
# return hubert as default
print("[Voice Changer] PitchExctractor not found", pitchExtractorType)

View File

@ -206,6 +206,12 @@ class VoiceChangerManager(ServerDeviceCallbacks):
slotInfo = LLVCModelSlotGenerator.loadModel(params)
self.modelSlotManager.save_model_slot(params.slot, slotInfo)
elif params.voiceChangerType == "EasyVC":
from voice_changer.EasyVC.EasyVCModelSlotGenerator import EasyVCModelSlotGenerator
slotInfo = EasyVCModelSlotGenerator.loadModel(params)
self.modelSlotManager.save_model_slot(params.slot, slotInfo)
logger.info(f"params, {params}")
def get_info(self):
@ -307,6 +313,15 @@ class VoiceChangerManager(ServerDeviceCallbacks):
self.voiceChanger.setModel(self.voiceChangerModel)
pass
elif slotInfo.voiceChangerType == "EasyVC":
logger.info("................EasyVC")
from voice_changer.EasyVC.EasyVC import EasyVC
self.voiceChangerModel = EasyVC(self.params, slotInfo)
self.voiceChanger = VoiceChangerV2(self.params)
self.voiceChanger.setModel(self.voiceChangerModel)
pass
else:
logger.info(f"[Voice Changer] unknown voice changer model: {slotInfo.voiceChangerType}")
if hasattr(self, "voiceChangerModel"):

View File

@ -216,8 +216,8 @@ class VoiceChangerV2(VoiceChangerIF):
try:
if self.voiceChanger is None:
raise VoiceChangerIsNotSelectedException("Voice Changer is not selected.")
with Timer2("main-process", False) as t:
enableMainprocessTimer = False
with Timer2("main-process", enableMainprocessTimer) as t:
processing_sampling_rate = self.voiceChanger.get_processing_sampling_rate()
if self.noCrossFade: # Beatrice, LLVC
@ -234,12 +234,14 @@ class VoiceChangerV2(VoiceChangerIF):
block_frame = receivedData.shape[0]
crossfade_frame = min(self.settings.crossFadeOverlapSize, block_frame)
self._generate_strength(crossfade_frame)
t.record("generate_strength")
audio = self.voiceChanger.inference(
receivedData,
crossfade_frame=crossfade_frame,
sola_search_frame=sola_search_frame,
)
t.record("inference")
if hasattr(self, "sola_buffer") is True:
np.set_printoptions(threshold=10000)
@ -271,6 +273,8 @@ class VoiceChangerV2(VoiceChangerIF):
logger.info("[Voice Changer] warming up... generating sola buffer.")
result = np.zeros(4096).astype(np.int16)
t.record("sora")
if hasattr(self, "sola_buffer") is True and sola_offset < sola_search_frame:
offset = -1 * (sola_search_frame + crossfade_frame - sola_offset)
end = -1 * (sola_search_frame - sola_offset)
@ -280,6 +284,8 @@ class VoiceChangerV2(VoiceChangerIF):
self.sola_buffer = audio[-crossfade_frame:] * self.np_prev_strength
# self.sola_buffer = audio[- crossfade_frame:]
t.record("post")
mainprocess_time = t.secs
# 後処理

View File

@ -22,6 +22,7 @@ LoadModelParamFileKind: TypeAlias = Literal[
"beatriceModel",
"llvcModel",
"llvcConfig",
"easyVCModel",
]

View File

@ -57,7 +57,7 @@ class Timer2(object):
if self.enable is False:
return
self.maxStores = 10
self.maxStores = 1
current_frame = inspect.currentframe()
caller_frame = inspect.getouterframes(current_frame, 2)
@ -72,6 +72,7 @@ class Timer2(object):
if self.enable is False:
return self
self.current = time.time()
self.start = time.time()
return self
def record(self, lapname: str):
@ -88,9 +89,11 @@ class Timer2(object):
def __exit__(self, *_):
if self.enable is False:
return
self.end = time.time()
self.elapsed = (self.end - self.start) * 1000
title = self.key.split("_")[-1]
print(f"---- {title} ----")
print(f"---- {title}(elapsed:{round(self.elapsed,1)}ms) ----")
for key, val in self.storedSecs[self.key].items():
section = key.split("_")[-1]
milisecAvr = sum(val) / len(val) * 1000
print(f"{section}: {milisecAvr} msec")
print(f"{section}: {round(milisecAvr,1)} msec, {val[-1]}")

View File

@ -16,3 +16,4 @@ class VoiceChangerParams:
crepe_onnx_tiny: str
rmvpe: str
rmvpe_onnx: str
whisper_tiny: str

View File

@ -87,6 +87,94 @@
"created_at": "2023-11-24T07:25:45Z",
"repoId": 527419347,
"pullRequestNo": 1017
},
{
"name": "Poleyn",
"id": 77776703,
"comment_id": 1873414904,
"created_at": "2024-01-01T17:41:58Z",
"repoId": 527419347,
"pullRequestNo": 1057
},
{
"name": "sonphantrung",
"id": 94152483,
"comment_id": 1876688569,
"created_at": "2024-01-04T08:20:38Z",
"repoId": 527419347,
"pullRequestNo": 1063
},
{
"name": "icecoins",
"id": 92659856,
"comment_id": 1887227712,
"created_at": "2024-01-11T14:04:56Z",
"repoId": 527419347,
"pullRequestNo": 1077
},
{
"name": "brandonkovacs",
"id": 938900,
"comment_id": 1970265905,
"created_at": "2024-02-29T02:04:59Z",
"repoId": 527419347,
"pullRequestNo": 1137
},
{
"name": "deiteris",
"id": 6103913,
"comment_id": 2002160613,
"created_at": "2024-03-16T22:24:35Z",
"repoId": 527419347,
"pullRequestNo": 1153
},
{
"name": "mrs1669",
"id": 40351476,
"comment_id": 2036836572,
"created_at": "2024-04-04T10:52:11Z",
"repoId": 527419347,
"pullRequestNo": 1171
},
{
"name": "mrs1669",
"id": 40351476,
"comment_id": 2036838458,
"created_at": "2024-04-04T10:53:14Z",
"repoId": 527419347,
"pullRequestNo": 1171
},
{
"name": "vitaliylag",
"id": 12641771,
"comment_id": 2143259823,
"created_at": "2024-06-01T03:13:56Z",
"repoId": 527419347,
"pullRequestNo": 1224
},
{
"name": "Nick088Official",
"id": 91847579,
"comment_id": 2170165841,
"created_at": "2024-06-15T16:27:32Z",
"repoId": 527419347,
"pullRequestNo": 1241
},
{
"name": "QweRezOn",
"id": 131252058,
"comment_id": 2349461674,
"created_at": "2024-09-13T17:04:41Z",
"repoId": 527419347,
"pullRequestNo": 1347
},
{
"name": "mallocfree009",
"id": 211442169,
"comment_id": 2888281620,
"created_at": "2025-05-17T10:31:47Z",
"repoId": 527419347,
"pullRequestNo": 1504
}
]
}