support rvc v2 onnx

This commit is contained in:
wataru 2023-05-21 04:21:54 +09:00
parent 3e32ced744
commit 25bb1ee078
14 changed files with 1967 additions and 38 deletions

View File

@ -1 +1,10 @@
<!doctype html><html style="width:100%;height:100%;overflow:hidden"><head><meta charset="utf-8"/><title>Voice Changer Client Demo</title><script defer="defer" src="index.js"></script></head><body style="width:100%;height:100%;margin:0"><div id="app" style="width:100%;height:100%"></div></body></html>
<!DOCTYPE html>
<html style="width: 100%; height: 100%; overflow: hidden">
<head>
<meta charset="utf-8" />
<title>Voice Changer Client Demo</title>
<script defer src="index.js"></script></head>
<body style="width: 100%; height: 100%; margin: 0px">
<div id="app" style="width: 100%; height: 100%"></div>
</body>
</html>

File diff suppressed because one or more lines are too long

View File

@ -1,31 +0,0 @@
/*! regenerator-runtime -- Copyright (c) 2014-present, Facebook, Inc. -- license (MIT): https://github.com/facebook/regenerator/blob/main/LICENSE */
/**
* @license React
* react-dom.production.min.js
*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
/**
* @license React
* react.production.min.js
*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
/**
* @license React
* scheduler.production.min.js
*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/

View File

@ -19,6 +19,10 @@ export const ModelSwitchRow = (_props: ModelSwitchRowProps) => {
}, 1000 * 2)
}
const onUpdateDefaultClicked = async () => {
await appState.serverSetting.updateModelDefault()
}
const options = appState.serverSetting.serverSetting.modelSlots.map((x, index) => {
let filename = ""
@ -50,7 +54,7 @@ export const ModelSwitchRow = (_props: ModelSwitchRowProps) => {
return (
<>
<div className="body-row split-3-7 left-padding-1 guided">
<div className="body-row split-3-4-3 left-padding-1 guided">
<div className="body-item-title left-padding-1">Switch Model</div>
<div className="body-input-container">
<select className="body-select" value={slot} onChange={(e) => {
@ -60,6 +64,9 @@ export const ModelSwitchRow = (_props: ModelSwitchRowProps) => {
</select>
{selectedTermOfUseUrlLink}
</div>
<div className="body-button-container">
<div className="body-button" onClick={onUpdateDefaultClicked}>update default</div>
</div>
</div>
</>
)

View File

@ -202,4 +202,18 @@ export class ServerConfigurator {
return await info
}
updateModelDefault = async () => {
const url = this.serverUrl + "/update_model_default"
const info = new Promise<ServerInfo>(async (resolve) => {
const request = new Request(url, {
method: 'POST',
});
const res = await (await fetch(request)).json() as ServerInfo
console.log("RESPONSE", res)
resolve(res)
})
return await info
}
}

View File

@ -276,6 +276,9 @@ export class VoiceChangerClient {
mergeModel = async (req: MergeModelRequest) => {
return this.configurator.mergeModel(req)
}
updateModelDefault = async () => {
return this.configurator.updateModelDefault()
}
updateServerSettings = (key: ServerSettingKey, val: string) => {
return this.configurator.updateSettings(key, val)

View File

@ -94,7 +94,7 @@ export type ServerSettingState = {
getOnnx: () => Promise<OnnxExporterInfo>
mergeModel: (request: MergeModelRequest) => Promise<ServerInfo>
// updateDefaultTune: (slot: number, tune: number) => void
updateModelDefault: () => Promise<ServerInfo>
}
@ -493,6 +493,12 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
return serverInfo
}
const updateModelDefault = async () => {
const serverInfo = await props.voiceChangerClient!.updateModelDefault()
setServerSetting(serverInfo)
return serverInfo
}
return {
serverSetting,
updateServerSettings,
@ -506,6 +512,6 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
isUploading,
getOnnx,
mergeModel,
// updateDefaultTune,
updateModelDefault,
}
}

View File

@ -40,6 +40,9 @@ class MMVC_Rest_Fileuploader:
self.router.add_api_route(
"/merge_model", self.post_merge_models, methods=["POST"]
)
self.router.add_api_route(
"/update_model_default", self.post_update_model_default, methods=["POST"]
)
def post_upload_file(self, file: UploadFile = File(...), filename: str = Form(...)):
res = upload_file(UPLOAD_DIR, file, filename)
@ -120,3 +123,8 @@ class MMVC_Rest_Fileuploader:
info = self.voiceChangerManager.merge_models(request)
json_compatible_item_data = jsonable_encoder(info)
return JSONResponse(content=json_compatible_item_data)
def post_update_model_default(self):
info = self.voiceChangerManager.update_model_default()
json_compatible_item_data = jsonable_encoder(info)
return JSONResponse(content=json_compatible_item_data)

View File

@ -416,3 +416,17 @@ class RVC:
self.prepareModel(targetSlot)
self.settings.modelSlotIndex = targetSlot
self.currentSlot = self.settings.modelSlotIndex
def update_model_default(self):
print("[voiceeeeee] UPDATE MODEL DEFAULT!!")
slotDir = os.path.join(
self.params.model_dir, RVC_MODEL_DIRNAME, str(self.currentSlot)
)
params = json.load(
open(os.path.join(slotDir, "params.json"), "r", encoding="utf-8")
)
params["defaultTune"] = self.settings.tran
params["defaultIndexRatio"] = self.settings.indexRatio
json.dump(params, open(os.path.join(slotDir, "params.json"), "w"))
self.loadSlots()

View File

@ -0,0 +1,96 @@
from torch import nn
from infer_pack.models import ( # type:ignore
TextEncoder768,
GeneratorNSF,
PosteriorEncoder,
ResidualCouplingBlock,
)
import torch
class SynthesizerTrnMs768NSFsid_ONNX(nn.Module):
def __init__(
self,
spec_channels,
segment_size,
inter_channels,
hidden_channels,
filter_channels,
n_heads,
n_layers,
kernel_size,
p_dropout,
resblock,
resblock_kernel_sizes,
resblock_dilation_sizes,
upsample_rates,
upsample_initial_channel,
upsample_kernel_sizes,
spk_embed_dim,
gin_channels,
sr,
**kwargs
):
super().__init__()
self.spec_channels = spec_channels
self.inter_channels = inter_channels
self.hidden_channels = hidden_channels
self.filter_channels = filter_channels
self.n_heads = n_heads
self.n_layers = n_layers
self.kernel_size = kernel_size
self.p_dropout = p_dropout
self.resblock = resblock
self.resblock_kernel_sizes = resblock_kernel_sizes
self.resblock_dilation_sizes = resblock_dilation_sizes
self.upsample_rates = upsample_rates
self.upsample_initial_channel = upsample_initial_channel
self.upsample_kernel_sizes = upsample_kernel_sizes
self.segment_size = segment_size
self.gin_channels = gin_channels
# self.hop_length = hop_length#
self.spk_embed_dim = spk_embed_dim
self.enc_p = TextEncoder768(
inter_channels,
hidden_channels,
filter_channels,
n_heads,
n_layers,
kernel_size,
p_dropout,
)
self.dec = GeneratorNSF(
inter_channels,
resblock,
resblock_kernel_sizes,
resblock_dilation_sizes,
upsample_rates,
upsample_initial_channel,
upsample_kernel_sizes,
gin_channels=gin_channels,
sr=sr,
is_half=kwargs["is_half"],
)
self.enc_q = PosteriorEncoder(
spec_channels,
inter_channels,
hidden_channels,
5,
1,
16,
gin_channels=gin_channels,
)
self.flow = ResidualCouplingBlock(
inter_channels, hidden_channels, 5, 1, 3, gin_channels=gin_channels
)
self.emb_g = nn.Embedding(self.spk_embed_dim, gin_channels)
print("gin_channels:", gin_channels, "self.spk_embed_dim:", self.spk_embed_dim)
def forward(self, phone, phone_lengths, pitch, nsff0, sid, max_len=None):
g = self.emb_g(sid).unsqueeze(-1)
m_p, logs_p, x_mask = self.enc_p(phone, pitch, phone_lengths)
z_p = (m_p + torch.exp(logs_p) * torch.randn_like(m_p) * 0.66666) * x_mask
z = self.flow(z_p, x_mask, g=g, reverse=True)
o = self.dec((z * x_mask)[:, :, :max_len], nsff0, g=g)
return o, x_mask, (z, z_p, m_p, logs_p)

View File

@ -0,0 +1,95 @@
from torch import nn
from infer_pack.models import ( # type:ignore
TextEncoder768,
PosteriorEncoder,
ResidualCouplingBlock,
Generator,
)
import torch
class SynthesizerTrnMs768NSFsid_nono_ONNX(nn.Module):
def __init__(
self,
spec_channels,
segment_size,
inter_channels,
hidden_channels,
filter_channels,
n_heads,
n_layers,
kernel_size,
p_dropout,
resblock,
resblock_kernel_sizes,
resblock_dilation_sizes,
upsample_rates,
upsample_initial_channel,
upsample_kernel_sizes,
spk_embed_dim,
gin_channels,
sr=None,
**kwargs
):
super().__init__()
self.spec_channels = spec_channels
self.inter_channels = inter_channels
self.hidden_channels = hidden_channels
self.filter_channels = filter_channels
self.n_heads = n_heads
self.n_layers = n_layers
self.kernel_size = kernel_size
self.p_dropout = p_dropout
self.resblock = resblock
self.resblock_kernel_sizes = resblock_kernel_sizes
self.resblock_dilation_sizes = resblock_dilation_sizes
self.upsample_rates = upsample_rates
self.upsample_initial_channel = upsample_initial_channel
self.upsample_kernel_sizes = upsample_kernel_sizes
self.segment_size = segment_size
self.gin_channels = gin_channels
# self.hop_length = hop_length#
self.spk_embed_dim = spk_embed_dim
self.enc_p = TextEncoder768(
inter_channels,
hidden_channels,
filter_channels,
n_heads,
n_layers,
kernel_size,
p_dropout,
f0=False,
)
self.dec = Generator(
inter_channels,
resblock,
resblock_kernel_sizes,
resblock_dilation_sizes,
upsample_rates,
upsample_initial_channel,
upsample_kernel_sizes,
gin_channels=gin_channels,
)
self.enc_q = PosteriorEncoder(
spec_channels,
inter_channels,
hidden_channels,
5,
1,
16,
gin_channels=gin_channels,
)
self.flow = ResidualCouplingBlock(
inter_channels, hidden_channels, 5, 1, 3, gin_channels=gin_channels
)
self.emb_g = nn.Embedding(self.spk_embed_dim, gin_channels)
print("gin_channels:", gin_channels, "self.spk_embed_dim:", self.spk_embed_dim)
def forward(self, phone, phone_lengths, sid, max_len=None):
g = self.emb_g(sid).unsqueeze(-1)
m_p, logs_p, x_mask = self.enc_p(phone, None, phone_lengths)
z_p = (m_p + torch.exp(logs_p) * torch.randn_like(m_p) * 0.66666) * x_mask
z = self.flow(z_p, x_mask, g=g, reverse=True)
o = self.dec((z * x_mask)[:, :, :max_len], g=g)
return o, x_mask, (z, z_p, m_p, logs_p)

View File

@ -12,6 +12,12 @@ from voice_changer.RVC.onnxExporter.SynthesizerTrnMs256NSFsid_ONNX import (
from voice_changer.RVC.onnxExporter.SynthesizerTrnMs256NSFsid_nono_ONNX import (
SynthesizerTrnMs256NSFsid_nono_ONNX,
)
from voice_changer.RVC.onnxExporter.SynthesizerTrnMs768NSFsid_ONNX import (
SynthesizerTrnMs768NSFsid_ONNX,
)
from voice_changer.RVC.onnxExporter.SynthesizerTrnMs768NSFsid_nono_ONNX import (
SynthesizerTrnMs768NSFsid_nono_ONNX,
)
from voice_changer.RVC.onnxExporter.SynthesizerTrnMsNSFsidNono_webui_ONNX import (
SynthesizerTrnMsNSFsidNono_webui_ONNX,
)
@ -69,11 +75,15 @@ def _export2onnx(input_model, output_model, output_model_simple, is_half, metada
net_g_onnx = SynthesizerTrnMs256NSFsid_nono_ONNX(*cpt["config"])
elif metadata["modelType"] == EnumInferenceTypes.pyTorchWebUINono.value:
net_g_onnx = SynthesizerTrnMsNSFsidNono_webui_ONNX(**cpt["params"])
elif metadata["modelType"] == EnumInferenceTypes.pyTorchRVCv2.value:
net_g_onnx = SynthesizerTrnMs768NSFsid_ONNX(*cpt["config"], is_half=is_half)
elif metadata["modelType"] == EnumInferenceTypes.pyTorchRVCv2Nono.value:
net_g_onnx = SynthesizerTrnMs768NSFsid_nono_ONNX(*cpt["config"])
else:
print(
"unknwon::::: ",
metadata["modelType"],
EnumInferenceTypes.pyTorchWebUI.value,
EnumInferenceTypes.pyTorchRVCv2.value,
)
net_g_onnx.eval().to(dev)

View File

@ -550,6 +550,10 @@ class VoiceChanger:
self.voiceChanger.merge_models(request)
return self.get_info()
def update_model_default(self):
self.voiceChanger.update_model_default()
return self.get_info()
PRINT_CONVERT_PROCESSING: bool = False
# PRINT_CONVERT_PROCESSING = True

View File

@ -66,3 +66,6 @@ class VoiceChangerManager(object):
def merge_models(self, request: str):
return self.voiceChanger.merge_models(request)
def update_model_default(self):
return self.voiceChanger.update_model_default()