WIP: integrate vcs to new gui 2
This commit is contained in:
parent
b453e5fd85
commit
b6996a15fe
4
client/demo/dist/index.js
vendored
4
client/demo/dist/index.js
vendored
File diff suppressed because one or more lines are too long
@ -1,6 +1,6 @@
|
||||
import React, { useEffect, useMemo, useState } from "react";
|
||||
import { useAppState } from "../../001_provider/001_AppStateProvider";
|
||||
import { FileUploadSetting, InitialFileUploadSetting, ModelFileKind, ModelUploadSetting, VoiceChangerType, fileSelector } from "@dannadori/voice-changer-client-js";
|
||||
import { ModelFileKind, ModelUploadSetting, VoiceChangerType, fileSelector } from "@dannadori/voice-changer-client-js";
|
||||
import { useMessageBuilder } from "../../hooks/useMessageBuilder";
|
||||
import { ModelSlotManagerDialogScreen } from "./904_ModelSlotManagerDialog";
|
||||
import { checkExtention, trimfileName } from "../../utils/utils";
|
||||
@ -54,15 +54,31 @@ export const FileUploaderScreen = (props: FileUploaderScreenProps) => {
|
||||
|
||||
const checkModelSetting = (setting: ModelUploadSetting) => {
|
||||
if (setting.voiceChangerType == "RVC") {
|
||||
// const enough = !!setting.files.find(x => { return x.kind == "rvcModel" }) &&
|
||||
// !!setting.files.find(x => { return x.kind == "rvcIndex" })
|
||||
// return enough
|
||||
const enough = !!setting.files.find(x => { return x.kind == "rvcModel" })
|
||||
return enough
|
||||
} else if (setting.voiceChangerType == "MMVCv13") {
|
||||
const enough = !!setting.files.find(x => { return x.kind == "mmvcv13Model" }) &&
|
||||
!!setting.files.find(x => { return x.kind == "mmvcv13Config" })
|
||||
return enough
|
||||
} else if (setting.voiceChangerType == "MMVCv15") {
|
||||
const enough = !!setting.files.find(x => { return x.kind == "mmvcv15Model" }) &&
|
||||
!!setting.files.find(x => { return x.kind == "mmvcv15Config" })
|
||||
return enough
|
||||
} else if (setting.voiceChangerType == "so-vits-svc-40") {
|
||||
const enough = !!setting.files.find(x => { return x.kind == "soVitsSvc40Config" }) &&
|
||||
!!setting.files.find(x => { return x.kind == "soVitsSvc40Model" })
|
||||
return enough
|
||||
} else if (setting.voiceChangerType == "DDSP-SVC") {
|
||||
const enough = !!setting.files.find(x => { return x.kind == "ddspSvcModel" }) &&
|
||||
!!setting.files.find(x => { return x.kind == "ddspSvcModelConfig" }) &&
|
||||
!!setting.files.find(x => { return x.kind == "ddspSvcDiffusion" }) &&
|
||||
!!setting.files.find(x => { return x.kind == "ddspSvcDiffusionConfig" })
|
||||
return enough
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
const generateFileRow = (setting: ModelUploadSetting, title: string, kind: ModelFileKind, ext: string[]) => {
|
||||
const generateFileRow = (setting: ModelUploadSetting, title: string, kind: ModelFileKind, ext: string[], dir: string = "") => {
|
||||
const selectedFile = setting.files.find(x => { return x.kind == kind })
|
||||
const selectedFilename = selectedFile?.file.name || ""
|
||||
return (
|
||||
@ -81,7 +97,7 @@ export const FileUploaderScreen = (props: FileUploaderScreenProps) => {
|
||||
if (selectedFile) {
|
||||
selectedFile.file = file
|
||||
} else {
|
||||
setting.files.push({ kind: kind, file: file })
|
||||
setting.files.push({ kind: kind, file: file, dir: dir })
|
||||
}
|
||||
setUploadSetting({ ...setting })
|
||||
}}>
|
||||
@ -96,6 +112,21 @@ export const FileUploaderScreen = (props: FileUploaderScreenProps) => {
|
||||
if (vcType == "RVC") {
|
||||
rows.push(generateFileRow(uploadSetting!, "Model", "rvcModel", ["pth", "onnx"]))
|
||||
rows.push(generateFileRow(uploadSetting!, "Index", "rvcIndex", ["index", "bin"]))
|
||||
} else if (vcType == "MMVCv13") {
|
||||
rows.push(generateFileRow(uploadSetting!, "Config", "mmvcv13Config", ["json"]))
|
||||
rows.push(generateFileRow(uploadSetting!, "Model", "mmvcv13Model", ["pth", "onnx"]))
|
||||
} else if (vcType == "MMVCv15") {
|
||||
rows.push(generateFileRow(uploadSetting!, "Config", "mmvcv15Config", ["json"]))
|
||||
rows.push(generateFileRow(uploadSetting!, "Model", "mmvcv15Model", ["pth", "onnx"]))
|
||||
} else if (vcType == "so-vits-svc-40") {
|
||||
rows.push(generateFileRow(uploadSetting!, "Config", "soVitsSvc40Config", ["json"]))
|
||||
rows.push(generateFileRow(uploadSetting!, "Model", "soVitsSvc40Model", ["pth"]))
|
||||
rows.push(generateFileRow(uploadSetting!, "Cluster", "soVitsSvc40Cluster", ["pth", "pt"]))
|
||||
} else if (vcType == "DDSP-SVC") {
|
||||
rows.push(generateFileRow(uploadSetting!, "Config", "ddspSvcModelConfig", ["yaml"], "model/"))
|
||||
rows.push(generateFileRow(uploadSetting!, "Model", "ddspSvcModel", ["pth", "pt"], "model/"))
|
||||
rows.push(generateFileRow(uploadSetting!, "Config(diff)", "ddspSvcDiffusionConfig", ["yaml"], "diff/"))
|
||||
rows.push(generateFileRow(uploadSetting!, "Model(diff)", "ddspSvcDiffusion", ["pth", "pt"], "diff/"))
|
||||
}
|
||||
return rows
|
||||
}
|
||||
|
@ -57,13 +57,13 @@ export class ServerConfigurator {
|
||||
return info
|
||||
}
|
||||
|
||||
uploadFile2 = async (file: File, onprogress: (progress: number, end: boolean) => void) => {
|
||||
uploadFile2 = async (dir: string, file: File, onprogress: (progress: number, end: boolean) => void) => {
|
||||
const url = this.serverUrl + "/upload_file"
|
||||
onprogress(0, false)
|
||||
const size = 1024 * 1024;
|
||||
let index = 0; // index値
|
||||
const fileLength = file.size
|
||||
const filename = file.name
|
||||
const filename = dir + file.name
|
||||
const fileChunkNum = Math.ceil(fileLength / size)
|
||||
|
||||
while (true) {
|
||||
|
@ -290,8 +290,8 @@ export class VoiceChangerClient {
|
||||
uploadFile = (buf: ArrayBuffer, filename: string, onprogress: (progress: number, end: boolean) => void) => {
|
||||
return this.configurator.uploadFile(buf, filename, onprogress)
|
||||
}
|
||||
uploadFile2 = (file: File, onprogress: (progress: number, end: boolean) => void) => {
|
||||
return this.configurator.uploadFile2(file, onprogress)
|
||||
uploadFile2 = (dir: string, file: File, onprogress: (progress: number, end: boolean) => void) => {
|
||||
return this.configurator.uploadFile2(dir, file, onprogress)
|
||||
}
|
||||
concatUploadedFile = (filename: string, chunkNum: number) => {
|
||||
return this.configurator.concatUploadedFile(filename, chunkNum)
|
||||
|
@ -41,6 +41,7 @@ export type ModelFileKind = typeof ModelFileKind[keyof typeof ModelFileKind]
|
||||
export type ModelFile = {
|
||||
file: File,
|
||||
kind: ModelFileKind
|
||||
dir: string
|
||||
}
|
||||
|
||||
export type ModelUploadSetting = {
|
||||
@ -296,7 +297,7 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
|
||||
if (!props.voiceChangerClient) return
|
||||
console.log("uploading..1.", file)
|
||||
console.log("uploading..2.", file.name)
|
||||
const num = await props.voiceChangerClient.uploadFile2(file, onprogress)
|
||||
const num = await props.voiceChangerClient.uploadFile2(dir, file, onprogress)
|
||||
const res = await props.voiceChangerClient.concatUploadedFile(dir + file.name, num)
|
||||
console.log("uploaded", num, res)
|
||||
}
|
||||
@ -319,11 +320,11 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
|
||||
const progOffset = 100 * i * progRate
|
||||
await _uploadFile2(setting.files[i].file, (progress: number, _end: boolean) => {
|
||||
setUploadProgress(progress * progRate + progOffset)
|
||||
})
|
||||
}, setting.files[i].dir)
|
||||
}
|
||||
}
|
||||
const params: ModelUploadSettingForServer = {
|
||||
...setting, files: setting.files.map((f) => { return { name: f.file.name, kind: f.kind } })
|
||||
...setting, files: setting.files.map((f) => { return { name: f.file.name, kind: f.kind, dir: f.dir } })
|
||||
}
|
||||
|
||||
const loadPromise = props.voiceChangerClient.loadModel(
|
||||
|
@ -38,7 +38,72 @@ class RVCModelSlot(ModelSlot):
|
||||
iconFile: str = ""
|
||||
|
||||
|
||||
ModelSlots: TypeAlias = Union[ModelSlot, RVCModelSlot]
|
||||
@dataclass
|
||||
class MMVCv13ModelSlot(ModelSlot):
|
||||
voiceChangerType: VoiceChangerType = "MMVCv13"
|
||||
modelFile: str = ""
|
||||
configFile: str = ""
|
||||
srcId: int = 107
|
||||
dstId: int = 100
|
||||
isONNX: bool = False
|
||||
samplingRate: int = 24000
|
||||
|
||||
name: str = ""
|
||||
description: str = ""
|
||||
iconFile: str = ""
|
||||
|
||||
|
||||
@dataclass
|
||||
class MMVCv15ModelSlot(ModelSlot):
|
||||
voiceChangerType: VoiceChangerType = "MMVCv15"
|
||||
modelFile: str = ""
|
||||
configFile: str = ""
|
||||
srcId: int = 0
|
||||
dstId: int = 101
|
||||
isONNX: bool = False
|
||||
samplingRate: int = 24000
|
||||
|
||||
name: str = ""
|
||||
description: str = ""
|
||||
iconFile: str = ""
|
||||
|
||||
|
||||
@dataclass
|
||||
class SoVitsSvc40ModelSlot(ModelSlot):
|
||||
voiceChangerType: VoiceChangerType = "so-vits-svc-40"
|
||||
modelFile: str = ""
|
||||
configFile: str = ""
|
||||
clusterFile: str = ""
|
||||
dstId: int = 0
|
||||
isONNX: bool = False
|
||||
|
||||
name: str = ""
|
||||
description: str = ""
|
||||
credit: str = ""
|
||||
termsOfUseUrl: str = ""
|
||||
sampleId: str = ""
|
||||
iconFile: str = ""
|
||||
|
||||
|
||||
@dataclass
|
||||
class DDSPSVCModelSlot(ModelSlot):
|
||||
voiceChangerType: VoiceChangerType = "DDSP-SVC"
|
||||
modelFile: str = ""
|
||||
configFile: str = ""
|
||||
diffModelFile: str = ""
|
||||
diffConfigFile: str = ""
|
||||
dstId: int = 0
|
||||
isONNX: bool = False
|
||||
|
||||
name: str = ""
|
||||
description: str = ""
|
||||
credit: str = ""
|
||||
termsOfUseUrl: str = ""
|
||||
sampleId: str = ""
|
||||
iconFile: str = ""
|
||||
|
||||
|
||||
ModelSlots: TypeAlias = Union[ModelSlot, RVCModelSlot, MMVCv13ModelSlot, MMVCv15ModelSlot, SoVitsSvc40ModelSlot, DDSPSVCModelSlot]
|
||||
|
||||
|
||||
def loadSlotInfo(model_dir: str, slotIndex: int) -> ModelSlots:
|
||||
@ -50,6 +115,14 @@ def loadSlotInfo(model_dir: str, slotIndex: int) -> ModelSlots:
|
||||
slotInfo = ModelSlot(**{k: v for k, v in jsonDict.items() if k in ModelSlot.__annotations__})
|
||||
if slotInfo.voiceChangerType == "RVC":
|
||||
return RVCModelSlot(**jsonDict)
|
||||
elif slotInfo.voiceChangerType == "MMVCv13":
|
||||
return MMVCv13ModelSlot(**jsonDict)
|
||||
elif slotInfo.voiceChangerType == "MMVCv15":
|
||||
return MMVCv15ModelSlot(**jsonDict)
|
||||
elif slotInfo.voiceChangerType == "so-vits-svc-40":
|
||||
return SoVitsSvc40ModelSlot(**jsonDict)
|
||||
elif slotInfo.voiceChangerType == "DDSP-SVC":
|
||||
return DDSPSVCModelSlot(**jsonDict)
|
||||
else:
|
||||
return ModelSlot()
|
||||
|
||||
|
@ -3,6 +3,7 @@ import os
|
||||
from dataclasses import asdict
|
||||
import numpy as np
|
||||
import torch
|
||||
from data.ModelSlot import DDSPSVCModelSlot
|
||||
from voice_changer.DDSP_SVC.ModelSlot import ModelSlot
|
||||
|
||||
from voice_changer.DDSP_SVC.deviceManager.DeviceManager import DeviceManager
|
||||
@ -21,7 +22,7 @@ from diffusion.infer_gt_mel import DiffGtMel # type: ignore
|
||||
|
||||
from voice_changer.utils.VoiceChangerModel import AudioInOut
|
||||
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
|
||||
from voice_changer.utils.LoadModelParams import LoadModelParams
|
||||
from voice_changer.utils.LoadModelParams import LoadModelParams, LoadModelParams2
|
||||
from voice_changer.DDSP_SVC.DDSP_SVCSetting import DDSP_SVCSettings
|
||||
from voice_changer.RVC.embedder.EmbedderManager import EmbedderManager
|
||||
|
||||
@ -44,11 +45,7 @@ def phase_vocoder(a, b, fade_out, fade_in):
|
||||
deltaphase = deltaphase - 2 * np.pi * torch.floor(deltaphase / 2 / np.pi + 0.5)
|
||||
w = 2 * np.pi * torch.arange(n // 2 + 1).to(a) + deltaphase
|
||||
t = torch.arange(n).unsqueeze(-1).to(a) / n
|
||||
result = (
|
||||
a * (fade_out**2)
|
||||
+ b * (fade_in**2)
|
||||
+ torch.sum(absab * torch.cos(w * t + phia), -1) * fade_out * fade_in / n
|
||||
)
|
||||
result = a * (fade_out**2) + b * (fade_in**2) + torch.sum(absab * torch.cos(w * t + phia), -1) * fade_out * fade_in / n
|
||||
return result
|
||||
|
||||
|
||||
@ -102,9 +99,7 @@ class DDSP_SVC:
|
||||
def reloadModel(self):
|
||||
self.device = self.deviceManager.getDevice(self.settings.gpu)
|
||||
modelFile = self.settings.modelSlots[self.settings.modelSlotIndex].modelFile
|
||||
diffusionFile = self.settings.modelSlots[
|
||||
self.settings.modelSlotIndex
|
||||
].diffusionFile
|
||||
diffusionFile = self.settings.modelSlots[self.settings.modelSlotIndex].diffusionFile
|
||||
|
||||
self.svc_model = SvcDDSP()
|
||||
self.svc_model.setVCParams(self.params)
|
||||
@ -144,15 +139,11 @@ class DDSP_SVC:
|
||||
# newData = newData.astype(np.float32)
|
||||
|
||||
if self.audio_buffer is not None:
|
||||
self.audio_buffer = np.concatenate(
|
||||
[self.audio_buffer, newData], 0
|
||||
) # 過去のデータに連結
|
||||
self.audio_buffer = np.concatenate([self.audio_buffer, newData], 0) # 過去のデータに連結
|
||||
else:
|
||||
self.audio_buffer = newData
|
||||
|
||||
convertSize = (
|
||||
inputSize + crossfadeSize + solaSearchFrame + self.settings.extraConvertSize
|
||||
)
|
||||
convertSize = inputSize + crossfadeSize + solaSearchFrame + self.settings.extraConvertSize
|
||||
|
||||
# if convertSize % self.hop_size != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。
|
||||
# convertSize = convertSize + (self.hop_size - (convertSize % self.hop_size))
|
||||
@ -187,8 +178,7 @@ class DDSP_SVC:
|
||||
f0_min=50,
|
||||
f0_max=1100,
|
||||
# safe_prefix_pad_length=0, # TBD なにこれ?
|
||||
safe_prefix_pad_length=self.settings.extraConvertSize
|
||||
/ self.svc_model.args.data.sampling_rate,
|
||||
safe_prefix_pad_length=self.settings.extraConvertSize / self.svc_model.args.data.sampling_rate,
|
||||
diff_model=self.diff_model,
|
||||
diff_acc=self.settings.diffAcc, # TBD なにこれ?
|
||||
diff_spk_id=self.settings.diffSpkId,
|
||||
@ -196,9 +186,7 @@ class DDSP_SVC:
|
||||
# diff_use_dpm=True if self.settings.useDiffDpm == 1 else False, # TBD なにこれ?
|
||||
method=self.settings.diffMethod,
|
||||
k_step=self.settings.kStep, # TBD なにこれ?
|
||||
diff_silence=True
|
||||
if self.settings.useDiffSilence == 1
|
||||
else False, # TBD なにこれ?
|
||||
diff_silence=True if self.settings.useDiffSilence == 1 else False, # TBD なにこれ?
|
||||
)
|
||||
|
||||
return _audio.cpu().numpy() * 32768.0
|
||||
@ -210,9 +198,21 @@ class DDSP_SVC:
|
||||
audio = self._pyTorch_inference(data)
|
||||
return audio
|
||||
|
||||
# def destroy(self):
|
||||
# del self.net_g
|
||||
# del self.onnx_session
|
||||
@classmethod
|
||||
def loadModel2(cls, props: LoadModelParams2):
|
||||
slotInfo: DDSPSVCModelSlot = DDSPSVCModelSlot()
|
||||
for file in props.files:
|
||||
if file.kind == "ddspSvcModelConfig":
|
||||
slotInfo.configFile = file.name
|
||||
elif file.kind == "ddspSvcModel":
|
||||
slotInfo.modelFile = file.name
|
||||
elif file.kind == "ddspSvcDiffusionConfig":
|
||||
slotInfo.diffConfigFile = file.name
|
||||
elif file.kind == "ddspSvcDiffusion":
|
||||
slotInfo.diffModelFile = file.name
|
||||
slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx")
|
||||
slotInfo.name = os.path.splitext(os.path.basename(slotInfo.modelFile))[0]
|
||||
return slotInfo
|
||||
|
||||
def __del__(self):
|
||||
del self.net_g
|
||||
|
@ -1,7 +1,8 @@
|
||||
import sys
|
||||
import os
|
||||
from data.ModelSlot import MMVCv13ModelSlot
|
||||
|
||||
from voice_changer.utils.LoadModelParams import LoadModelParams
|
||||
from voice_changer.utils.LoadModelParams import LoadModelParams, LoadModelParams2
|
||||
from voice_changer.utils.VoiceChangerModel import AudioInOut
|
||||
|
||||
if sys.platform.startswith("darwin"):
|
||||
@ -77,13 +78,7 @@ class MMVCv13:
|
||||
|
||||
# PyTorchモデル生成
|
||||
if self.settings.pyTorchModelFile is not None:
|
||||
self.net_g = SynthesizerTrn(
|
||||
len(symbols),
|
||||
self.hps.data.filter_length // 2 + 1,
|
||||
self.hps.train.segment_size // self.hps.data.hop_length,
|
||||
n_speakers=self.hps.data.n_speakers,
|
||||
**self.hps.model
|
||||
)
|
||||
self.net_g = SynthesizerTrn(len(symbols), self.hps.data.filter_length // 2 + 1, self.hps.train.segment_size // self.hps.data.hop_length, n_speakers=self.hps.data.n_speakers, **self.hps.model)
|
||||
self.net_g.eval()
|
||||
load_checkpoint(self.settings.pyTorchModelFile, self.net_g, None)
|
||||
|
||||
@ -154,9 +149,7 @@ class MMVCv13:
|
||||
def get_info(self):
|
||||
data = asdict(self.settings)
|
||||
|
||||
data["onnxExecutionProviders"] = (
|
||||
self.onnx_session.get_providers() if self.onnx_session is not None else []
|
||||
)
|
||||
data["onnxExecutionProviders"] = self.onnx_session.get_providers() if self.onnx_session is not None else []
|
||||
files = ["configFile", "pyTorchModelFile", "onnxModelFile"]
|
||||
for f in files:
|
||||
if data[f] is not None and os.path.exists(data[f]):
|
||||
@ -193,9 +186,7 @@ class MMVCv13:
|
||||
newData = newData.astype(np.float32) / self.hps.data.max_wav_value
|
||||
|
||||
if self.audio_buffer is not None:
|
||||
self.audio_buffer = np.concatenate(
|
||||
[self.audio_buffer, newData], 0
|
||||
) # 過去のデータに連結
|
||||
self.audio_buffer = np.concatenate([self.audio_buffer, newData], 0) # 過去のデータに連結
|
||||
else:
|
||||
self.audio_buffer = newData
|
||||
|
||||
@ -204,9 +195,7 @@ class MMVCv13:
|
||||
# if convertSize < 8192:
|
||||
# convertSize = 8192
|
||||
if convertSize % self.hps.data.hop_length != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。
|
||||
convertSize = convertSize + (
|
||||
self.hps.data.hop_length - (convertSize % self.hps.data.hop_length)
|
||||
)
|
||||
convertSize = convertSize + (self.hps.data.hop_length - (convertSize % self.hps.data.hop_length))
|
||||
|
||||
convertOffset = -1 * convertSize
|
||||
self.audio_buffer = self.audio_buffer[convertOffset:] # 変換対象の部分だけ抽出
|
||||
@ -238,7 +227,9 @@ class MMVCv13:
|
||||
"sid_src": sid_src.numpy(),
|
||||
"sid_tgt": sid_tgt1.numpy(),
|
||||
},
|
||||
)[0][0, 0]
|
||||
)[
|
||||
0
|
||||
][0, 0]
|
||||
* self.hps.data.max_wav_value
|
||||
)
|
||||
return audio1
|
||||
@ -254,19 +245,10 @@ class MMVCv13:
|
||||
dev = torch.device("cuda", index=self.settings.gpu)
|
||||
|
||||
with torch.no_grad():
|
||||
x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [
|
||||
x.to(dev) for x in data
|
||||
]
|
||||
x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [x.to(dev) for x in data]
|
||||
sid_target = torch.LongTensor([self.settings.dstId]).to(dev)
|
||||
|
||||
audio1 = (
|
||||
self.net_g.to(dev)
|
||||
.voice_conversion(
|
||||
spec, spec_lengths, sid_src=sid_src, sid_tgt=sid_target
|
||||
)[0, 0]
|
||||
.data
|
||||
* self.hps.data.max_wav_value
|
||||
)
|
||||
audio1 = self.net_g.to(dev).voice_conversion(spec, spec_lengths, sid_src=sid_src, sid_tgt=sid_target)[0, 0].data * self.hps.data.max_wav_value
|
||||
result = audio1.float().cpu().numpy()
|
||||
|
||||
return result
|
||||
@ -278,6 +260,18 @@ class MMVCv13:
|
||||
audio = self._pyTorch_inference(data)
|
||||
return audio
|
||||
|
||||
@classmethod
|
||||
def loadModel2(cls, props: LoadModelParams2):
|
||||
slotInfo: MMVCv13ModelSlot = MMVCv13ModelSlot()
|
||||
for file in props.files:
|
||||
if file.kind == "mmvcv13Model":
|
||||
slotInfo.modelFile = file.name
|
||||
elif file.kind == "mmvcv13Config":
|
||||
slotInfo.configFile = file.name
|
||||
slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx")
|
||||
slotInfo.name = os.path.splitext(os.path.basename(slotInfo.modelFile))[0]
|
||||
return slotInfo
|
||||
|
||||
def __del__(self):
|
||||
del self.net_g
|
||||
del self.onnx_session
|
||||
|
@ -1,7 +1,8 @@
|
||||
import sys
|
||||
import os
|
||||
from data.ModelSlot import MMVCv15ModelSlot
|
||||
|
||||
from voice_changer.utils.LoadModelParams import LoadModelParams
|
||||
from voice_changer.utils.LoadModelParams import LoadModelParams, LoadModelParams2
|
||||
from voice_changer.utils.VoiceChangerModel import AudioInOut
|
||||
|
||||
if sys.platform.startswith("darwin"):
|
||||
@ -172,12 +173,7 @@ class MMVCv15:
|
||||
def get_info(self):
|
||||
data = asdict(self.settings)
|
||||
|
||||
data["onnxExecutionProviders"] = (
|
||||
self.onnx_session.get_providers()
|
||||
if self.settings.onnxModelFile != ""
|
||||
and self.settings.onnxModelFile is not None
|
||||
else []
|
||||
)
|
||||
data["onnxExecutionProviders"] = self.onnx_session.get_providers() if self.settings.onnxModelFile != "" and self.settings.onnxModelFile is not None else []
|
||||
files = ["configFile", "pyTorchModelFile", "onnxModelFile"]
|
||||
for f in files:
|
||||
if data[f] is not None and os.path.exists(data[f]):
|
||||
@ -195,9 +191,7 @@ class MMVCv15:
|
||||
def _get_f0(self, detector: str, newData: AudioInOut):
|
||||
audio_norm_np = newData.astype(np.float64)
|
||||
if detector == "dio":
|
||||
_f0, _time = pw.dio(
|
||||
audio_norm_np, self.hps.data.sampling_rate, frame_period=5.5
|
||||
)
|
||||
_f0, _time = pw.dio(audio_norm_np, self.hps.data.sampling_rate, frame_period=5.5)
|
||||
f0 = pw.stonemask(audio_norm_np, _f0, _time, self.hps.data.sampling_rate)
|
||||
else:
|
||||
f0, t = pw.harvest(
|
||||
@ -207,9 +201,7 @@ class MMVCv15:
|
||||
f0_floor=71.0,
|
||||
f0_ceil=1000.0,
|
||||
)
|
||||
f0 = convert_continuos_f0(
|
||||
f0, int(audio_norm_np.shape[0] / self.hps.data.hop_length)
|
||||
)
|
||||
f0 = convert_continuos_f0(f0, int(audio_norm_np.shape[0] / self.hps.data.hop_length))
|
||||
f0 = torch.from_numpy(f0.astype(np.float32))
|
||||
return f0
|
||||
|
||||
@ -237,9 +229,7 @@ class MMVCv15:
|
||||
newData = newData.astype(np.float32) / self.hps.data.max_wav_value
|
||||
|
||||
if self.audio_buffer is not None:
|
||||
self.audio_buffer = np.concatenate(
|
||||
[self.audio_buffer, newData], 0
|
||||
) # 過去のデータに連結
|
||||
self.audio_buffer = np.concatenate([self.audio_buffer, newData], 0) # 過去のデータに連結
|
||||
else:
|
||||
self.audio_buffer = newData
|
||||
|
||||
@ -248,9 +238,7 @@ class MMVCv15:
|
||||
# if convertSize < 8192:
|
||||
# convertSize = 8192
|
||||
if convertSize % self.hps.data.hop_length != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。
|
||||
convertSize = convertSize + (
|
||||
self.hps.data.hop_length - (convertSize % self.hps.data.hop_length)
|
||||
)
|
||||
convertSize = convertSize + (self.hps.data.hop_length - (convertSize % self.hps.data.hop_length))
|
||||
|
||||
# ONNX は固定長
|
||||
if self.settings.framework == "ONNX":
|
||||
@ -290,16 +278,15 @@ class MMVCv15:
|
||||
"sid_src": sid_src.numpy(),
|
||||
"sid_tgt": sid_tgt1.numpy(),
|
||||
},
|
||||
)[0][0, 0]
|
||||
)[
|
||||
0
|
||||
][0, 0]
|
||||
* self.hps.data.max_wav_value
|
||||
)
|
||||
return audio1
|
||||
|
||||
def _pyTorch_inference(self, data):
|
||||
if (
|
||||
self.settings.pyTorchModelFile == ""
|
||||
or self.settings.pyTorchModelFile is None
|
||||
):
|
||||
if self.settings.pyTorchModelFile == "" or self.settings.pyTorchModelFile is None:
|
||||
print("[Voice Changer] No pyTorch session.")
|
||||
raise NoModeLoadedException("pytorch")
|
||||
|
||||
@ -316,12 +303,7 @@ class MMVCv15:
|
||||
sid_src = sid_src.to(dev)
|
||||
sid_target = torch.LongTensor([self.settings.dstId]).to(dev)
|
||||
|
||||
audio1 = (
|
||||
self.net_g.to(dev)
|
||||
.voice_conversion(spec, spec_lengths, f0, sid_src, sid_target)[0, 0]
|
||||
.data
|
||||
* self.hps.data.max_wav_value
|
||||
)
|
||||
audio1 = self.net_g.to(dev).voice_conversion(spec, spec_lengths, f0, sid_src, sid_target)[0, 0].data * self.hps.data.max_wav_value
|
||||
result = audio1.float().cpu().numpy()
|
||||
return result
|
||||
|
||||
@ -336,6 +318,18 @@ class MMVCv15:
|
||||
print(_e)
|
||||
raise ONNXInputArgumentException()
|
||||
|
||||
@classmethod
|
||||
def loadModel2(cls, props: LoadModelParams2):
|
||||
slotInfo: MMVCv15ModelSlot = MMVCv15ModelSlot()
|
||||
for file in props.files:
|
||||
if file.kind == "mmvcv15Model":
|
||||
slotInfo.modelFile = file.name
|
||||
elif file.kind == "mmvcv15Config":
|
||||
slotInfo.configFile = file.name
|
||||
slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx")
|
||||
slotInfo.name = os.path.splitext(os.path.basename(slotInfo.modelFile))[0]
|
||||
return slotInfo
|
||||
|
||||
def __del__(self):
|
||||
del self.net_g
|
||||
del self.onnx_session
|
||||
|
@ -1,7 +1,8 @@
|
||||
import sys
|
||||
import os
|
||||
from data.ModelSlot import SoVitsSvc40ModelSlot
|
||||
|
||||
from voice_changer.utils.LoadModelParams import LoadModelParams
|
||||
from voice_changer.utils.LoadModelParams import LoadModelParams, LoadModelParams2
|
||||
from voice_changer.utils.VoiceChangerModel import AudioInOut
|
||||
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
|
||||
|
||||
@ -97,11 +98,7 @@ class SoVitsSvc40:
|
||||
self.settings.pyTorchModelFile = modelFile
|
||||
self.settings.onnxModelFile = None
|
||||
|
||||
clusterTorchModel = (
|
||||
params["files"]["soVitsSvc40Cluster"]
|
||||
if "soVitsSvc40Cluster" in params["files"]
|
||||
else None
|
||||
)
|
||||
clusterTorchModel = params["files"]["soVitsSvc40Cluster"] if "soVitsSvc40Cluster" in params["files"] else None
|
||||
|
||||
content_vec_path = self.params.content_vec_500
|
||||
content_vec_onnx_path = self.params.content_vec_500_onnx
|
||||
@ -212,9 +209,7 @@ class SoVitsSvc40:
|
||||
def get_info(self):
|
||||
data = asdict(self.settings)
|
||||
|
||||
data["onnxExecutionProviders"] = (
|
||||
self.onnx_session.get_providers() if self.onnx_session is not None else []
|
||||
)
|
||||
data["onnxExecutionProviders"] = self.onnx_session.get_providers() if self.onnx_session is not None else []
|
||||
files = ["configFile", "pyTorchModelFile", "onnxModelFile"]
|
||||
for f in files:
|
||||
if data[f] is not None and os.path.exists(data[f]):
|
||||
@ -246,9 +241,7 @@ class SoVitsSvc40:
|
||||
)
|
||||
|
||||
if wav_44k.shape[0] % self.hps.data.hop_length != 0:
|
||||
print(
|
||||
f" !!! !!! !!! wav size not multiple of hopsize: {wav_44k.shape[0] / self.hps.data.hop_length}"
|
||||
)
|
||||
print(f" !!! !!! !!! wav size not multiple of hopsize: {wav_44k.shape[0] / self.hps.data.hop_length}")
|
||||
|
||||
f0, uv = utils.interpolate_f0(f0)
|
||||
f0 = torch.FloatTensor(f0)
|
||||
@ -257,14 +250,10 @@ class SoVitsSvc40:
|
||||
f0 = f0.unsqueeze(0)
|
||||
uv = uv.unsqueeze(0)
|
||||
|
||||
wav16k_numpy = librosa.resample(
|
||||
audio_buffer, orig_sr=self.hps.data.sampling_rate, target_sr=16000
|
||||
)
|
||||
wav16k_numpy = librosa.resample(audio_buffer, orig_sr=self.hps.data.sampling_rate, target_sr=16000)
|
||||
wav16k_tensor = torch.from_numpy(wav16k_numpy)
|
||||
|
||||
if (
|
||||
self.settings.gpu < 0 or self.gpu_num == 0
|
||||
) or self.settings.framework == "ONNX":
|
||||
if (self.settings.gpu < 0 or self.gpu_num == 0) or self.settings.framework == "ONNX":
|
||||
dev = torch.device("cpu")
|
||||
else:
|
||||
dev = torch.device("cuda", index=self.settings.gpu)
|
||||
@ -282,44 +271,27 @@ class SoVitsSvc40:
|
||||
if self.hps.model.ssl_dim == 768:
|
||||
self.hubert_model = self.hubert_model.to(dev)
|
||||
wav16k_tensor = wav16k_tensor.to(dev)
|
||||
c = get_hubert_content_layer9(
|
||||
self.hubert_model, wav_16k_tensor=wav16k_tensor
|
||||
)
|
||||
c = get_hubert_content_layer9(self.hubert_model, wav_16k_tensor=wav16k_tensor)
|
||||
else:
|
||||
self.hubert_model = self.hubert_model.to(dev)
|
||||
wav16k_tensor = wav16k_tensor.to(dev)
|
||||
c = utils.get_hubert_content(
|
||||
self.hubert_model, wav_16k_tensor=wav16k_tensor
|
||||
)
|
||||
c = utils.get_hubert_content(self.hubert_model, wav_16k_tensor=wav16k_tensor)
|
||||
|
||||
uv = uv.to(dev)
|
||||
f0 = f0.to(dev)
|
||||
|
||||
c = utils.repeat_expand_2d(c.squeeze(0), f0.shape[1])
|
||||
|
||||
if (
|
||||
self.settings.clusterInferRatio != 0
|
||||
and hasattr(self, "cluster_model")
|
||||
and self.cluster_model is not None
|
||||
):
|
||||
speaker = [
|
||||
key
|
||||
for key, value in self.settings.speakers.items()
|
||||
if value == self.settings.dstId
|
||||
]
|
||||
if self.settings.clusterInferRatio != 0 and hasattr(self, "cluster_model") and self.cluster_model is not None:
|
||||
speaker = [key for key, value in self.settings.speakers.items() if value == self.settings.dstId]
|
||||
if len(speaker) != 1:
|
||||
pass
|
||||
# print("not only one speaker found.", speaker)
|
||||
else:
|
||||
cluster_c = cluster.get_cluster_center_result(
|
||||
self.cluster_model, c.cpu().numpy().T, speaker[0]
|
||||
).T
|
||||
cluster_c = cluster.get_cluster_center_result(self.cluster_model, c.cpu().numpy().T, speaker[0]).T
|
||||
cluster_c = torch.FloatTensor(cluster_c).to(dev)
|
||||
c = c.to(dev)
|
||||
c = (
|
||||
self.settings.clusterInferRatio * cluster_c
|
||||
+ (1 - self.settings.clusterInferRatio) * c
|
||||
)
|
||||
c = self.settings.clusterInferRatio * cluster_c + (1 - self.settings.clusterInferRatio) * c
|
||||
|
||||
c = c.unsqueeze(0)
|
||||
return c, f0, uv
|
||||
@ -334,20 +306,14 @@ class SoVitsSvc40:
|
||||
newData = newData.astype(np.float32) / self.hps.data.max_wav_value
|
||||
|
||||
if self.audio_buffer is not None:
|
||||
self.audio_buffer = np.concatenate(
|
||||
[self.audio_buffer, newData], 0
|
||||
) # 過去のデータに連結
|
||||
self.audio_buffer = np.concatenate([self.audio_buffer, newData], 0) # 過去のデータに連結
|
||||
else:
|
||||
self.audio_buffer = newData
|
||||
|
||||
convertSize = (
|
||||
inputSize + crossfadeSize + solaSearchFrame + self.settings.extraConvertSize
|
||||
)
|
||||
convertSize = inputSize + crossfadeSize + solaSearchFrame + self.settings.extraConvertSize
|
||||
|
||||
if convertSize % self.hps.data.hop_length != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。
|
||||
convertSize = convertSize + (
|
||||
self.hps.data.hop_length - (convertSize % self.hps.data.hop_length)
|
||||
)
|
||||
convertSize = convertSize + (self.hps.data.hop_length - (convertSize % self.hps.data.hop_length))
|
||||
|
||||
convertOffset = -1 * convertSize
|
||||
self.audio_buffer = self.audio_buffer[convertOffset:] # 変換対象の部分だけ抽出
|
||||
@ -389,9 +355,7 @@ class SoVitsSvc40:
|
||||
"f0": f0.astype(np.float32),
|
||||
"uv": uv.astype(np.float32),
|
||||
"g": sid_target.astype(np.int64),
|
||||
"noise_scale": np.array([self.settings.noiseScale]).astype(
|
||||
np.float32
|
||||
),
|
||||
"noise_scale": np.array([self.settings.noiseScale]).astype(np.float32),
|
||||
# "predict_f0": np.array([self.settings.dstId]).astype(np.int64),
|
||||
},
|
||||
)[0][0, 0]
|
||||
@ -457,6 +421,20 @@ class SoVitsSvc40:
|
||||
|
||||
return audio
|
||||
|
||||
@classmethod
|
||||
def loadModel2(cls, props: LoadModelParams2):
|
||||
slotInfo: SoVitsSvc40ModelSlot = SoVitsSvc40ModelSlot()
|
||||
for file in props.files:
|
||||
if file.kind == "soVitsSvc40Config":
|
||||
slotInfo.configFile = file.name
|
||||
elif file.kind == "soVitsSvc40Model":
|
||||
slotInfo.modelFile = file.name
|
||||
elif file.kind == "soVitsSvc40Cluster":
|
||||
slotInfo.clusterFile = file.name
|
||||
slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx")
|
||||
slotInfo.name = os.path.splitext(os.path.basename(slotInfo.modelFile))[0]
|
||||
return slotInfo
|
||||
|
||||
def __del__(self):
|
||||
del self.net_g
|
||||
del self.onnx_session
|
||||
|
@ -95,22 +95,48 @@ class VoiceChangerManager(ServerDeviceCallbacks):
|
||||
# Dataを展開
|
||||
params = LoadModelParams2(**paramDict)
|
||||
params.files = [LoadModelParamFile(**x) for x in paramDict["files"]]
|
||||
|
||||
# ファイルをslotにコピー
|
||||
for file in params.files:
|
||||
print("FILE", file)
|
||||
srcPath = os.path.join(UPLOAD_DIR, file.name)
|
||||
dstDir = os.path.join(self.params.model_dir, str(params.slot))
|
||||
srcPath = os.path.join(UPLOAD_DIR, file.dir, file.name)
|
||||
dstDir = os.path.join(
|
||||
self.params.model_dir,
|
||||
str(params.slot),
|
||||
file.dir,
|
||||
)
|
||||
dstPath = os.path.join(dstDir, file.name)
|
||||
os.makedirs(dstDir, exist_ok=True)
|
||||
print(f"move to {srcPath} -> {dstPath}")
|
||||
shutil.move(srcPath, dstPath)
|
||||
file.name = dstPath
|
||||
|
||||
# メタデータ作成(各VCで定義)
|
||||
if params.voiceChangerType == "RVC":
|
||||
from voice_changer.RVC.RVC import RVC # 起動時にインポートするとパラメータが取れない。
|
||||
|
||||
slotInfo = RVC.loadModel2(params)
|
||||
self.modelSlotManager.save_model_slot(params.slot, slotInfo)
|
||||
elif params.voiceChangerType == "MMVCv13":
|
||||
from voice_changer.MMVCv13.MMVCv13 import MMVCv13
|
||||
|
||||
slotInfo = MMVCv13.loadModel2(params)
|
||||
self.modelSlotManager.save_model_slot(params.slot, slotInfo)
|
||||
elif params.voiceChangerType == "MMVCv15":
|
||||
from voice_changer.MMVCv15.MMVCv15 import MMVCv15
|
||||
|
||||
slotInfo = MMVCv15.loadModel2(params)
|
||||
self.modelSlotManager.save_model_slot(params.slot, slotInfo)
|
||||
elif params.voiceChangerType == "so-vits-svc-40":
|
||||
from voice_changer.SoVitsSvc40.SoVitsSvc40 import SoVitsSvc40
|
||||
|
||||
slotInfo = SoVitsSvc40.loadModel2(params)
|
||||
self.modelSlotManager.save_model_slot(params.slot, slotInfo)
|
||||
elif params.voiceChangerType == "DDSP-SVC":
|
||||
from voice_changer.DDSP_SVC.DDSP_SVC import DDSP_SVC
|
||||
|
||||
slotInfo = DDSP_SVC.loadModel2(params)
|
||||
self.modelSlotManager.save_model_slot(params.slot, slotInfo)
|
||||
print("params", params)
|
||||
|
||||
else:
|
||||
|
@ -43,6 +43,7 @@ LoadModelParamFileKind: TypeAlias = Literal[
|
||||
class LoadModelParamFile:
|
||||
name: str
|
||||
kind: LoadModelParamFileKind
|
||||
dir: str
|
||||
|
||||
|
||||
@dataclass
|
||||
|
Loading…
x
Reference in New Issue
Block a user