support rvc v2 onnx
This commit is contained in:
parent
3e32ced744
commit
25bb1ee078
11
client/demo/dist/index.html
vendored
11
client/demo/dist/index.html
vendored
@ -1 +1,10 @@
|
|||||||
<!doctype html><html style="width:100%;height:100%;overflow:hidden"><head><meta charset="utf-8"/><title>Voice Changer Client Demo</title><script defer="defer" src="index.js"></script></head><body style="width:100%;height:100%;margin:0"><div id="app" style="width:100%;height:100%"></div></body></html>
|
<!DOCTYPE html>
|
||||||
|
<html style="width: 100%; height: 100%; overflow: hidden">
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8" />
|
||||||
|
<title>Voice Changer Client Demo</title>
|
||||||
|
<script defer src="index.js"></script></head>
|
||||||
|
<body style="width: 100%; height: 100%; margin: 0px">
|
||||||
|
<div id="app" style="width: 100%; height: 100%"></div>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
1695
client/demo/dist/index.js
vendored
1695
client/demo/dist/index.js
vendored
File diff suppressed because one or more lines are too long
31
client/demo/dist/index.js.LICENSE.txt
vendored
31
client/demo/dist/index.js.LICENSE.txt
vendored
@ -1,31 +0,0 @@
|
|||||||
/*! regenerator-runtime -- Copyright (c) 2014-present, Facebook, Inc. -- license (MIT): https://github.com/facebook/regenerator/blob/main/LICENSE */
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @license React
|
|
||||||
* react-dom.production.min.js
|
|
||||||
*
|
|
||||||
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
||||||
*
|
|
||||||
* This source code is licensed under the MIT license found in the
|
|
||||||
* LICENSE file in the root directory of this source tree.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @license React
|
|
||||||
* react.production.min.js
|
|
||||||
*
|
|
||||||
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
||||||
*
|
|
||||||
* This source code is licensed under the MIT license found in the
|
|
||||||
* LICENSE file in the root directory of this source tree.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @license React
|
|
||||||
* scheduler.production.min.js
|
|
||||||
*
|
|
||||||
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
||||||
*
|
|
||||||
* This source code is licensed under the MIT license found in the
|
|
||||||
* LICENSE file in the root directory of this source tree.
|
|
||||||
*/
|
|
@ -19,6 +19,10 @@ export const ModelSwitchRow = (_props: ModelSwitchRowProps) => {
|
|||||||
}, 1000 * 2)
|
}, 1000 * 2)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const onUpdateDefaultClicked = async () => {
|
||||||
|
await appState.serverSetting.updateModelDefault()
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
const options = appState.serverSetting.serverSetting.modelSlots.map((x, index) => {
|
const options = appState.serverSetting.serverSetting.modelSlots.map((x, index) => {
|
||||||
let filename = ""
|
let filename = ""
|
||||||
@ -50,7 +54,7 @@ export const ModelSwitchRow = (_props: ModelSwitchRowProps) => {
|
|||||||
|
|
||||||
return (
|
return (
|
||||||
<>
|
<>
|
||||||
<div className="body-row split-3-7 left-padding-1 guided">
|
<div className="body-row split-3-4-3 left-padding-1 guided">
|
||||||
<div className="body-item-title left-padding-1">Switch Model</div>
|
<div className="body-item-title left-padding-1">Switch Model</div>
|
||||||
<div className="body-input-container">
|
<div className="body-input-container">
|
||||||
<select className="body-select" value={slot} onChange={(e) => {
|
<select className="body-select" value={slot} onChange={(e) => {
|
||||||
@ -60,6 +64,9 @@ export const ModelSwitchRow = (_props: ModelSwitchRowProps) => {
|
|||||||
</select>
|
</select>
|
||||||
{selectedTermOfUseUrlLink}
|
{selectedTermOfUseUrlLink}
|
||||||
</div>
|
</div>
|
||||||
|
<div className="body-button-container">
|
||||||
|
<div className="body-button" onClick={onUpdateDefaultClicked}>update default</div>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</>
|
</>
|
||||||
)
|
)
|
||||||
|
@ -202,4 +202,18 @@ export class ServerConfigurator {
|
|||||||
return await info
|
return await info
|
||||||
}
|
}
|
||||||
|
|
||||||
|
updateModelDefault = async () => {
|
||||||
|
const url = this.serverUrl + "/update_model_default"
|
||||||
|
const info = new Promise<ServerInfo>(async (resolve) => {
|
||||||
|
const request = new Request(url, {
|
||||||
|
method: 'POST',
|
||||||
|
});
|
||||||
|
const res = await (await fetch(request)).json() as ServerInfo
|
||||||
|
console.log("RESPONSE", res)
|
||||||
|
resolve(res)
|
||||||
|
})
|
||||||
|
return await info
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -276,6 +276,9 @@ export class VoiceChangerClient {
|
|||||||
mergeModel = async (req: MergeModelRequest) => {
|
mergeModel = async (req: MergeModelRequest) => {
|
||||||
return this.configurator.mergeModel(req)
|
return this.configurator.mergeModel(req)
|
||||||
}
|
}
|
||||||
|
updateModelDefault = async () => {
|
||||||
|
return this.configurator.updateModelDefault()
|
||||||
|
}
|
||||||
|
|
||||||
updateServerSettings = (key: ServerSettingKey, val: string) => {
|
updateServerSettings = (key: ServerSettingKey, val: string) => {
|
||||||
return this.configurator.updateSettings(key, val)
|
return this.configurator.updateSettings(key, val)
|
||||||
|
@ -94,7 +94,7 @@ export type ServerSettingState = {
|
|||||||
|
|
||||||
getOnnx: () => Promise<OnnxExporterInfo>
|
getOnnx: () => Promise<OnnxExporterInfo>
|
||||||
mergeModel: (request: MergeModelRequest) => Promise<ServerInfo>
|
mergeModel: (request: MergeModelRequest) => Promise<ServerInfo>
|
||||||
// updateDefaultTune: (slot: number, tune: number) => void
|
updateModelDefault: () => Promise<ServerInfo>
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -493,6 +493,12 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
|
|||||||
return serverInfo
|
return serverInfo
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const updateModelDefault = async () => {
|
||||||
|
const serverInfo = await props.voiceChangerClient!.updateModelDefault()
|
||||||
|
setServerSetting(serverInfo)
|
||||||
|
return serverInfo
|
||||||
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
serverSetting,
|
serverSetting,
|
||||||
updateServerSettings,
|
updateServerSettings,
|
||||||
@ -506,6 +512,6 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
|
|||||||
isUploading,
|
isUploading,
|
||||||
getOnnx,
|
getOnnx,
|
||||||
mergeModel,
|
mergeModel,
|
||||||
// updateDefaultTune,
|
updateModelDefault,
|
||||||
}
|
}
|
||||||
}
|
}
|
@ -40,6 +40,9 @@ class MMVC_Rest_Fileuploader:
|
|||||||
self.router.add_api_route(
|
self.router.add_api_route(
|
||||||
"/merge_model", self.post_merge_models, methods=["POST"]
|
"/merge_model", self.post_merge_models, methods=["POST"]
|
||||||
)
|
)
|
||||||
|
self.router.add_api_route(
|
||||||
|
"/update_model_default", self.post_update_model_default, methods=["POST"]
|
||||||
|
)
|
||||||
|
|
||||||
def post_upload_file(self, file: UploadFile = File(...), filename: str = Form(...)):
|
def post_upload_file(self, file: UploadFile = File(...), filename: str = Form(...)):
|
||||||
res = upload_file(UPLOAD_DIR, file, filename)
|
res = upload_file(UPLOAD_DIR, file, filename)
|
||||||
@ -120,3 +123,8 @@ class MMVC_Rest_Fileuploader:
|
|||||||
info = self.voiceChangerManager.merge_models(request)
|
info = self.voiceChangerManager.merge_models(request)
|
||||||
json_compatible_item_data = jsonable_encoder(info)
|
json_compatible_item_data = jsonable_encoder(info)
|
||||||
return JSONResponse(content=json_compatible_item_data)
|
return JSONResponse(content=json_compatible_item_data)
|
||||||
|
|
||||||
|
def post_update_model_default(self):
|
||||||
|
info = self.voiceChangerManager.update_model_default()
|
||||||
|
json_compatible_item_data = jsonable_encoder(info)
|
||||||
|
return JSONResponse(content=json_compatible_item_data)
|
||||||
|
@ -416,3 +416,17 @@ class RVC:
|
|||||||
self.prepareModel(targetSlot)
|
self.prepareModel(targetSlot)
|
||||||
self.settings.modelSlotIndex = targetSlot
|
self.settings.modelSlotIndex = targetSlot
|
||||||
self.currentSlot = self.settings.modelSlotIndex
|
self.currentSlot = self.settings.modelSlotIndex
|
||||||
|
|
||||||
|
def update_model_default(self):
|
||||||
|
print("[voiceeeeee] UPDATE MODEL DEFAULT!!")
|
||||||
|
slotDir = os.path.join(
|
||||||
|
self.params.model_dir, RVC_MODEL_DIRNAME, str(self.currentSlot)
|
||||||
|
)
|
||||||
|
params = json.load(
|
||||||
|
open(os.path.join(slotDir, "params.json"), "r", encoding="utf-8")
|
||||||
|
)
|
||||||
|
params["defaultTune"] = self.settings.tran
|
||||||
|
params["defaultIndexRatio"] = self.settings.indexRatio
|
||||||
|
|
||||||
|
json.dump(params, open(os.path.join(slotDir, "params.json"), "w"))
|
||||||
|
self.loadSlots()
|
||||||
|
@ -0,0 +1,96 @@
|
|||||||
|
from torch import nn
|
||||||
|
from infer_pack.models import ( # type:ignore
|
||||||
|
TextEncoder768,
|
||||||
|
GeneratorNSF,
|
||||||
|
PosteriorEncoder,
|
||||||
|
ResidualCouplingBlock,
|
||||||
|
)
|
||||||
|
import torch
|
||||||
|
|
||||||
|
|
||||||
|
class SynthesizerTrnMs768NSFsid_ONNX(nn.Module):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
spec_channels,
|
||||||
|
segment_size,
|
||||||
|
inter_channels,
|
||||||
|
hidden_channels,
|
||||||
|
filter_channels,
|
||||||
|
n_heads,
|
||||||
|
n_layers,
|
||||||
|
kernel_size,
|
||||||
|
p_dropout,
|
||||||
|
resblock,
|
||||||
|
resblock_kernel_sizes,
|
||||||
|
resblock_dilation_sizes,
|
||||||
|
upsample_rates,
|
||||||
|
upsample_initial_channel,
|
||||||
|
upsample_kernel_sizes,
|
||||||
|
spk_embed_dim,
|
||||||
|
gin_channels,
|
||||||
|
sr,
|
||||||
|
**kwargs
|
||||||
|
):
|
||||||
|
super().__init__()
|
||||||
|
self.spec_channels = spec_channels
|
||||||
|
self.inter_channels = inter_channels
|
||||||
|
self.hidden_channels = hidden_channels
|
||||||
|
self.filter_channels = filter_channels
|
||||||
|
self.n_heads = n_heads
|
||||||
|
self.n_layers = n_layers
|
||||||
|
self.kernel_size = kernel_size
|
||||||
|
self.p_dropout = p_dropout
|
||||||
|
self.resblock = resblock
|
||||||
|
self.resblock_kernel_sizes = resblock_kernel_sizes
|
||||||
|
self.resblock_dilation_sizes = resblock_dilation_sizes
|
||||||
|
self.upsample_rates = upsample_rates
|
||||||
|
self.upsample_initial_channel = upsample_initial_channel
|
||||||
|
self.upsample_kernel_sizes = upsample_kernel_sizes
|
||||||
|
self.segment_size = segment_size
|
||||||
|
self.gin_channels = gin_channels
|
||||||
|
# self.hop_length = hop_length#
|
||||||
|
self.spk_embed_dim = spk_embed_dim
|
||||||
|
self.enc_p = TextEncoder768(
|
||||||
|
inter_channels,
|
||||||
|
hidden_channels,
|
||||||
|
filter_channels,
|
||||||
|
n_heads,
|
||||||
|
n_layers,
|
||||||
|
kernel_size,
|
||||||
|
p_dropout,
|
||||||
|
)
|
||||||
|
self.dec = GeneratorNSF(
|
||||||
|
inter_channels,
|
||||||
|
resblock,
|
||||||
|
resblock_kernel_sizes,
|
||||||
|
resblock_dilation_sizes,
|
||||||
|
upsample_rates,
|
||||||
|
upsample_initial_channel,
|
||||||
|
upsample_kernel_sizes,
|
||||||
|
gin_channels=gin_channels,
|
||||||
|
sr=sr,
|
||||||
|
is_half=kwargs["is_half"],
|
||||||
|
)
|
||||||
|
self.enc_q = PosteriorEncoder(
|
||||||
|
spec_channels,
|
||||||
|
inter_channels,
|
||||||
|
hidden_channels,
|
||||||
|
5,
|
||||||
|
1,
|
||||||
|
16,
|
||||||
|
gin_channels=gin_channels,
|
||||||
|
)
|
||||||
|
self.flow = ResidualCouplingBlock(
|
||||||
|
inter_channels, hidden_channels, 5, 1, 3, gin_channels=gin_channels
|
||||||
|
)
|
||||||
|
self.emb_g = nn.Embedding(self.spk_embed_dim, gin_channels)
|
||||||
|
|
||||||
|
print("gin_channels:", gin_channels, "self.spk_embed_dim:", self.spk_embed_dim)
|
||||||
|
|
||||||
|
def forward(self, phone, phone_lengths, pitch, nsff0, sid, max_len=None):
|
||||||
|
g = self.emb_g(sid).unsqueeze(-1)
|
||||||
|
m_p, logs_p, x_mask = self.enc_p(phone, pitch, phone_lengths)
|
||||||
|
z_p = (m_p + torch.exp(logs_p) * torch.randn_like(m_p) * 0.66666) * x_mask
|
||||||
|
z = self.flow(z_p, x_mask, g=g, reverse=True)
|
||||||
|
o = self.dec((z * x_mask)[:, :, :max_len], nsff0, g=g)
|
||||||
|
return o, x_mask, (z, z_p, m_p, logs_p)
|
@ -0,0 +1,95 @@
|
|||||||
|
from torch import nn
|
||||||
|
from infer_pack.models import ( # type:ignore
|
||||||
|
TextEncoder768,
|
||||||
|
PosteriorEncoder,
|
||||||
|
ResidualCouplingBlock,
|
||||||
|
Generator,
|
||||||
|
)
|
||||||
|
import torch
|
||||||
|
|
||||||
|
|
||||||
|
class SynthesizerTrnMs768NSFsid_nono_ONNX(nn.Module):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
spec_channels,
|
||||||
|
segment_size,
|
||||||
|
inter_channels,
|
||||||
|
hidden_channels,
|
||||||
|
filter_channels,
|
||||||
|
n_heads,
|
||||||
|
n_layers,
|
||||||
|
kernel_size,
|
||||||
|
p_dropout,
|
||||||
|
resblock,
|
||||||
|
resblock_kernel_sizes,
|
||||||
|
resblock_dilation_sizes,
|
||||||
|
upsample_rates,
|
||||||
|
upsample_initial_channel,
|
||||||
|
upsample_kernel_sizes,
|
||||||
|
spk_embed_dim,
|
||||||
|
gin_channels,
|
||||||
|
sr=None,
|
||||||
|
**kwargs
|
||||||
|
):
|
||||||
|
super().__init__()
|
||||||
|
self.spec_channels = spec_channels
|
||||||
|
self.inter_channels = inter_channels
|
||||||
|
self.hidden_channels = hidden_channels
|
||||||
|
self.filter_channels = filter_channels
|
||||||
|
self.n_heads = n_heads
|
||||||
|
self.n_layers = n_layers
|
||||||
|
self.kernel_size = kernel_size
|
||||||
|
self.p_dropout = p_dropout
|
||||||
|
self.resblock = resblock
|
||||||
|
self.resblock_kernel_sizes = resblock_kernel_sizes
|
||||||
|
self.resblock_dilation_sizes = resblock_dilation_sizes
|
||||||
|
self.upsample_rates = upsample_rates
|
||||||
|
self.upsample_initial_channel = upsample_initial_channel
|
||||||
|
self.upsample_kernel_sizes = upsample_kernel_sizes
|
||||||
|
self.segment_size = segment_size
|
||||||
|
self.gin_channels = gin_channels
|
||||||
|
# self.hop_length = hop_length#
|
||||||
|
self.spk_embed_dim = spk_embed_dim
|
||||||
|
self.enc_p = TextEncoder768(
|
||||||
|
inter_channels,
|
||||||
|
hidden_channels,
|
||||||
|
filter_channels,
|
||||||
|
n_heads,
|
||||||
|
n_layers,
|
||||||
|
kernel_size,
|
||||||
|
p_dropout,
|
||||||
|
f0=False,
|
||||||
|
)
|
||||||
|
self.dec = Generator(
|
||||||
|
inter_channels,
|
||||||
|
resblock,
|
||||||
|
resblock_kernel_sizes,
|
||||||
|
resblock_dilation_sizes,
|
||||||
|
upsample_rates,
|
||||||
|
upsample_initial_channel,
|
||||||
|
upsample_kernel_sizes,
|
||||||
|
gin_channels=gin_channels,
|
||||||
|
)
|
||||||
|
self.enc_q = PosteriorEncoder(
|
||||||
|
spec_channels,
|
||||||
|
inter_channels,
|
||||||
|
hidden_channels,
|
||||||
|
5,
|
||||||
|
1,
|
||||||
|
16,
|
||||||
|
gin_channels=gin_channels,
|
||||||
|
)
|
||||||
|
self.flow = ResidualCouplingBlock(
|
||||||
|
inter_channels, hidden_channels, 5, 1, 3, gin_channels=gin_channels
|
||||||
|
)
|
||||||
|
self.emb_g = nn.Embedding(self.spk_embed_dim, gin_channels)
|
||||||
|
|
||||||
|
print("gin_channels:", gin_channels, "self.spk_embed_dim:", self.spk_embed_dim)
|
||||||
|
|
||||||
|
def forward(self, phone, phone_lengths, sid, max_len=None):
|
||||||
|
g = self.emb_g(sid).unsqueeze(-1)
|
||||||
|
m_p, logs_p, x_mask = self.enc_p(phone, None, phone_lengths)
|
||||||
|
z_p = (m_p + torch.exp(logs_p) * torch.randn_like(m_p) * 0.66666) * x_mask
|
||||||
|
z = self.flow(z_p, x_mask, g=g, reverse=True)
|
||||||
|
o = self.dec((z * x_mask)[:, :, :max_len], g=g)
|
||||||
|
return o, x_mask, (z, z_p, m_p, logs_p)
|
@ -12,6 +12,12 @@ from voice_changer.RVC.onnxExporter.SynthesizerTrnMs256NSFsid_ONNX import (
|
|||||||
from voice_changer.RVC.onnxExporter.SynthesizerTrnMs256NSFsid_nono_ONNX import (
|
from voice_changer.RVC.onnxExporter.SynthesizerTrnMs256NSFsid_nono_ONNX import (
|
||||||
SynthesizerTrnMs256NSFsid_nono_ONNX,
|
SynthesizerTrnMs256NSFsid_nono_ONNX,
|
||||||
)
|
)
|
||||||
|
from voice_changer.RVC.onnxExporter.SynthesizerTrnMs768NSFsid_ONNX import (
|
||||||
|
SynthesizerTrnMs768NSFsid_ONNX,
|
||||||
|
)
|
||||||
|
from voice_changer.RVC.onnxExporter.SynthesizerTrnMs768NSFsid_nono_ONNX import (
|
||||||
|
SynthesizerTrnMs768NSFsid_nono_ONNX,
|
||||||
|
)
|
||||||
from voice_changer.RVC.onnxExporter.SynthesizerTrnMsNSFsidNono_webui_ONNX import (
|
from voice_changer.RVC.onnxExporter.SynthesizerTrnMsNSFsidNono_webui_ONNX import (
|
||||||
SynthesizerTrnMsNSFsidNono_webui_ONNX,
|
SynthesizerTrnMsNSFsidNono_webui_ONNX,
|
||||||
)
|
)
|
||||||
@ -69,11 +75,15 @@ def _export2onnx(input_model, output_model, output_model_simple, is_half, metada
|
|||||||
net_g_onnx = SynthesizerTrnMs256NSFsid_nono_ONNX(*cpt["config"])
|
net_g_onnx = SynthesizerTrnMs256NSFsid_nono_ONNX(*cpt["config"])
|
||||||
elif metadata["modelType"] == EnumInferenceTypes.pyTorchWebUINono.value:
|
elif metadata["modelType"] == EnumInferenceTypes.pyTorchWebUINono.value:
|
||||||
net_g_onnx = SynthesizerTrnMsNSFsidNono_webui_ONNX(**cpt["params"])
|
net_g_onnx = SynthesizerTrnMsNSFsidNono_webui_ONNX(**cpt["params"])
|
||||||
|
elif metadata["modelType"] == EnumInferenceTypes.pyTorchRVCv2.value:
|
||||||
|
net_g_onnx = SynthesizerTrnMs768NSFsid_ONNX(*cpt["config"], is_half=is_half)
|
||||||
|
elif metadata["modelType"] == EnumInferenceTypes.pyTorchRVCv2Nono.value:
|
||||||
|
net_g_onnx = SynthesizerTrnMs768NSFsid_nono_ONNX(*cpt["config"])
|
||||||
else:
|
else:
|
||||||
print(
|
print(
|
||||||
"unknwon::::: ",
|
"unknwon::::: ",
|
||||||
metadata["modelType"],
|
metadata["modelType"],
|
||||||
EnumInferenceTypes.pyTorchWebUI.value,
|
EnumInferenceTypes.pyTorchRVCv2.value,
|
||||||
)
|
)
|
||||||
|
|
||||||
net_g_onnx.eval().to(dev)
|
net_g_onnx.eval().to(dev)
|
||||||
|
@ -550,6 +550,10 @@ class VoiceChanger:
|
|||||||
self.voiceChanger.merge_models(request)
|
self.voiceChanger.merge_models(request)
|
||||||
return self.get_info()
|
return self.get_info()
|
||||||
|
|
||||||
|
def update_model_default(self):
|
||||||
|
self.voiceChanger.update_model_default()
|
||||||
|
return self.get_info()
|
||||||
|
|
||||||
|
|
||||||
PRINT_CONVERT_PROCESSING: bool = False
|
PRINT_CONVERT_PROCESSING: bool = False
|
||||||
# PRINT_CONVERT_PROCESSING = True
|
# PRINT_CONVERT_PROCESSING = True
|
||||||
|
@ -66,3 +66,6 @@ class VoiceChangerManager(object):
|
|||||||
|
|
||||||
def merge_models(self, request: str):
|
def merge_models(self, request: str):
|
||||||
return self.voiceChanger.merge_models(request)
|
return self.voiceChanger.merge_models(request)
|
||||||
|
|
||||||
|
def update_model_default(self):
|
||||||
|
return self.voiceChanger.update_model_default()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user