WIP: integrate vcs to new gui 2

This commit is contained in:
wataru 2023-06-20 06:39:39 +09:00
parent b453e5fd85
commit b6996a15fe
12 changed files with 251 additions and 153 deletions

File diff suppressed because one or more lines are too long

View File

@ -1,6 +1,6 @@
import React, { useEffect, useMemo, useState } from "react";
import { useAppState } from "../../001_provider/001_AppStateProvider";
import { FileUploadSetting, InitialFileUploadSetting, ModelFileKind, ModelUploadSetting, VoiceChangerType, fileSelector } from "@dannadori/voice-changer-client-js";
import { ModelFileKind, ModelUploadSetting, VoiceChangerType, fileSelector } from "@dannadori/voice-changer-client-js";
import { useMessageBuilder } from "../../hooks/useMessageBuilder";
import { ModelSlotManagerDialogScreen } from "./904_ModelSlotManagerDialog";
import { checkExtention, trimfileName } from "../../utils/utils";
@ -54,15 +54,31 @@ export const FileUploaderScreen = (props: FileUploaderScreenProps) => {
const checkModelSetting = (setting: ModelUploadSetting) => {
if (setting.voiceChangerType == "RVC") {
// const enough = !!setting.files.find(x => { return x.kind == "rvcModel" }) &&
// !!setting.files.find(x => { return x.kind == "rvcIndex" })
// return enough
const enough = !!setting.files.find(x => { return x.kind == "rvcModel" })
return enough
} else if (setting.voiceChangerType == "MMVCv13") {
const enough = !!setting.files.find(x => { return x.kind == "mmvcv13Model" }) &&
!!setting.files.find(x => { return x.kind == "mmvcv13Config" })
return enough
} else if (setting.voiceChangerType == "MMVCv15") {
const enough = !!setting.files.find(x => { return x.kind == "mmvcv15Model" }) &&
!!setting.files.find(x => { return x.kind == "mmvcv15Config" })
return enough
} else if (setting.voiceChangerType == "so-vits-svc-40") {
const enough = !!setting.files.find(x => { return x.kind == "soVitsSvc40Config" }) &&
!!setting.files.find(x => { return x.kind == "soVitsSvc40Model" })
return enough
} else if (setting.voiceChangerType == "DDSP-SVC") {
const enough = !!setting.files.find(x => { return x.kind == "ddspSvcModel" }) &&
!!setting.files.find(x => { return x.kind == "ddspSvcModelConfig" }) &&
!!setting.files.find(x => { return x.kind == "ddspSvcDiffusion" }) &&
!!setting.files.find(x => { return x.kind == "ddspSvcDiffusionConfig" })
return enough
}
return false
}
const generateFileRow = (setting: ModelUploadSetting, title: string, kind: ModelFileKind, ext: string[]) => {
const generateFileRow = (setting: ModelUploadSetting, title: string, kind: ModelFileKind, ext: string[], dir: string = "") => {
const selectedFile = setting.files.find(x => { return x.kind == kind })
const selectedFilename = selectedFile?.file.name || ""
return (
@ -81,7 +97,7 @@ export const FileUploaderScreen = (props: FileUploaderScreenProps) => {
if (selectedFile) {
selectedFile.file = file
} else {
setting.files.push({ kind: kind, file: file })
setting.files.push({ kind: kind, file: file, dir: dir })
}
setUploadSetting({ ...setting })
}}>
@ -96,6 +112,21 @@ export const FileUploaderScreen = (props: FileUploaderScreenProps) => {
if (vcType == "RVC") {
rows.push(generateFileRow(uploadSetting!, "Model", "rvcModel", ["pth", "onnx"]))
rows.push(generateFileRow(uploadSetting!, "Index", "rvcIndex", ["index", "bin"]))
} else if (vcType == "MMVCv13") {
rows.push(generateFileRow(uploadSetting!, "Config", "mmvcv13Config", ["json"]))
rows.push(generateFileRow(uploadSetting!, "Model", "mmvcv13Model", ["pth", "onnx"]))
} else if (vcType == "MMVCv15") {
rows.push(generateFileRow(uploadSetting!, "Config", "mmvcv15Config", ["json"]))
rows.push(generateFileRow(uploadSetting!, "Model", "mmvcv15Model", ["pth", "onnx"]))
} else if (vcType == "so-vits-svc-40") {
rows.push(generateFileRow(uploadSetting!, "Config", "soVitsSvc40Config", ["json"]))
rows.push(generateFileRow(uploadSetting!, "Model", "soVitsSvc40Model", ["pth"]))
rows.push(generateFileRow(uploadSetting!, "Cluster", "soVitsSvc40Cluster", ["pth", "pt"]))
} else if (vcType == "DDSP-SVC") {
rows.push(generateFileRow(uploadSetting!, "Config", "ddspSvcModelConfig", ["yaml"], "model/"))
rows.push(generateFileRow(uploadSetting!, "Model", "ddspSvcModel", ["pth", "pt"], "model/"))
rows.push(generateFileRow(uploadSetting!, "Config(diff)", "ddspSvcDiffusionConfig", ["yaml"], "diff/"))
rows.push(generateFileRow(uploadSetting!, "Model(diff)", "ddspSvcDiffusion", ["pth", "pt"], "diff/"))
}
return rows
}

View File

@ -57,13 +57,13 @@ export class ServerConfigurator {
return info
}
uploadFile2 = async (file: File, onprogress: (progress: number, end: boolean) => void) => {
uploadFile2 = async (dir: string, file: File, onprogress: (progress: number, end: boolean) => void) => {
const url = this.serverUrl + "/upload_file"
onprogress(0, false)
const size = 1024 * 1024;
let index = 0; // index値
const fileLength = file.size
const filename = file.name
const filename = dir + file.name
const fileChunkNum = Math.ceil(fileLength / size)
while (true) {

View File

@ -290,8 +290,8 @@ export class VoiceChangerClient {
uploadFile = (buf: ArrayBuffer, filename: string, onprogress: (progress: number, end: boolean) => void) => {
return this.configurator.uploadFile(buf, filename, onprogress)
}
uploadFile2 = (file: File, onprogress: (progress: number, end: boolean) => void) => {
return this.configurator.uploadFile2(file, onprogress)
uploadFile2 = (dir: string, file: File, onprogress: (progress: number, end: boolean) => void) => {
return this.configurator.uploadFile2(dir, file, onprogress)
}
concatUploadedFile = (filename: string, chunkNum: number) => {
return this.configurator.concatUploadedFile(filename, chunkNum)

View File

@ -41,6 +41,7 @@ export type ModelFileKind = typeof ModelFileKind[keyof typeof ModelFileKind]
export type ModelFile = {
file: File,
kind: ModelFileKind
dir: string
}
export type ModelUploadSetting = {
@ -296,7 +297,7 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
if (!props.voiceChangerClient) return
console.log("uploading..1.", file)
console.log("uploading..2.", file.name)
const num = await props.voiceChangerClient.uploadFile2(file, onprogress)
const num = await props.voiceChangerClient.uploadFile2(dir, file, onprogress)
const res = await props.voiceChangerClient.concatUploadedFile(dir + file.name, num)
console.log("uploaded", num, res)
}
@ -319,11 +320,11 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
const progOffset = 100 * i * progRate
await _uploadFile2(setting.files[i].file, (progress: number, _end: boolean) => {
setUploadProgress(progress * progRate + progOffset)
})
}, setting.files[i].dir)
}
}
const params: ModelUploadSettingForServer = {
...setting, files: setting.files.map((f) => { return { name: f.file.name, kind: f.kind } })
...setting, files: setting.files.map((f) => { return { name: f.file.name, kind: f.kind, dir: f.dir } })
}
const loadPromise = props.voiceChangerClient.loadModel(

View File

@ -38,7 +38,72 @@ class RVCModelSlot(ModelSlot):
iconFile: str = ""
ModelSlots: TypeAlias = Union[ModelSlot, RVCModelSlot]
@dataclass
class MMVCv13ModelSlot(ModelSlot):
voiceChangerType: VoiceChangerType = "MMVCv13"
modelFile: str = ""
configFile: str = ""
srcId: int = 107
dstId: int = 100
isONNX: bool = False
samplingRate: int = 24000
name: str = ""
description: str = ""
iconFile: str = ""
@dataclass
class MMVCv15ModelSlot(ModelSlot):
voiceChangerType: VoiceChangerType = "MMVCv15"
modelFile: str = ""
configFile: str = ""
srcId: int = 0
dstId: int = 101
isONNX: bool = False
samplingRate: int = 24000
name: str = ""
description: str = ""
iconFile: str = ""
@dataclass
class SoVitsSvc40ModelSlot(ModelSlot):
voiceChangerType: VoiceChangerType = "so-vits-svc-40"
modelFile: str = ""
configFile: str = ""
clusterFile: str = ""
dstId: int = 0
isONNX: bool = False
name: str = ""
description: str = ""
credit: str = ""
termsOfUseUrl: str = ""
sampleId: str = ""
iconFile: str = ""
@dataclass
class DDSPSVCModelSlot(ModelSlot):
voiceChangerType: VoiceChangerType = "DDSP-SVC"
modelFile: str = ""
configFile: str = ""
diffModelFile: str = ""
diffConfigFile: str = ""
dstId: int = 0
isONNX: bool = False
name: str = ""
description: str = ""
credit: str = ""
termsOfUseUrl: str = ""
sampleId: str = ""
iconFile: str = ""
ModelSlots: TypeAlias = Union[ModelSlot, RVCModelSlot, MMVCv13ModelSlot, MMVCv15ModelSlot, SoVitsSvc40ModelSlot, DDSPSVCModelSlot]
def loadSlotInfo(model_dir: str, slotIndex: int) -> ModelSlots:
@ -50,6 +115,14 @@ def loadSlotInfo(model_dir: str, slotIndex: int) -> ModelSlots:
slotInfo = ModelSlot(**{k: v for k, v in jsonDict.items() if k in ModelSlot.__annotations__})
if slotInfo.voiceChangerType == "RVC":
return RVCModelSlot(**jsonDict)
elif slotInfo.voiceChangerType == "MMVCv13":
return MMVCv13ModelSlot(**jsonDict)
elif slotInfo.voiceChangerType == "MMVCv15":
return MMVCv15ModelSlot(**jsonDict)
elif slotInfo.voiceChangerType == "so-vits-svc-40":
return SoVitsSvc40ModelSlot(**jsonDict)
elif slotInfo.voiceChangerType == "DDSP-SVC":
return DDSPSVCModelSlot(**jsonDict)
else:
return ModelSlot()

View File

@ -3,6 +3,7 @@ import os
from dataclasses import asdict
import numpy as np
import torch
from data.ModelSlot import DDSPSVCModelSlot
from voice_changer.DDSP_SVC.ModelSlot import ModelSlot
from voice_changer.DDSP_SVC.deviceManager.DeviceManager import DeviceManager
@ -21,7 +22,7 @@ from diffusion.infer_gt_mel import DiffGtMel # type: ignore
from voice_changer.utils.VoiceChangerModel import AudioInOut
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
from voice_changer.utils.LoadModelParams import LoadModelParams
from voice_changer.utils.LoadModelParams import LoadModelParams, LoadModelParams2
from voice_changer.DDSP_SVC.DDSP_SVCSetting import DDSP_SVCSettings
from voice_changer.RVC.embedder.EmbedderManager import EmbedderManager
@ -44,11 +45,7 @@ def phase_vocoder(a, b, fade_out, fade_in):
deltaphase = deltaphase - 2 * np.pi * torch.floor(deltaphase / 2 / np.pi + 0.5)
w = 2 * np.pi * torch.arange(n // 2 + 1).to(a) + deltaphase
t = torch.arange(n).unsqueeze(-1).to(a) / n
result = (
a * (fade_out**2)
+ b * (fade_in**2)
+ torch.sum(absab * torch.cos(w * t + phia), -1) * fade_out * fade_in / n
)
result = a * (fade_out**2) + b * (fade_in**2) + torch.sum(absab * torch.cos(w * t + phia), -1) * fade_out * fade_in / n
return result
@ -102,9 +99,7 @@ class DDSP_SVC:
def reloadModel(self):
self.device = self.deviceManager.getDevice(self.settings.gpu)
modelFile = self.settings.modelSlots[self.settings.modelSlotIndex].modelFile
diffusionFile = self.settings.modelSlots[
self.settings.modelSlotIndex
].diffusionFile
diffusionFile = self.settings.modelSlots[self.settings.modelSlotIndex].diffusionFile
self.svc_model = SvcDDSP()
self.svc_model.setVCParams(self.params)
@ -144,15 +139,11 @@ class DDSP_SVC:
# newData = newData.astype(np.float32)
if self.audio_buffer is not None:
self.audio_buffer = np.concatenate(
[self.audio_buffer, newData], 0
) # 過去のデータに連結
self.audio_buffer = np.concatenate([self.audio_buffer, newData], 0) # 過去のデータに連結
else:
self.audio_buffer = newData
convertSize = (
inputSize + crossfadeSize + solaSearchFrame + self.settings.extraConvertSize
)
convertSize = inputSize + crossfadeSize + solaSearchFrame + self.settings.extraConvertSize
# if convertSize % self.hop_size != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。
# convertSize = convertSize + (self.hop_size - (convertSize % self.hop_size))
@ -187,8 +178,7 @@ class DDSP_SVC:
f0_min=50,
f0_max=1100,
# safe_prefix_pad_length=0, # TBD なにこれ?
safe_prefix_pad_length=self.settings.extraConvertSize
/ self.svc_model.args.data.sampling_rate,
safe_prefix_pad_length=self.settings.extraConvertSize / self.svc_model.args.data.sampling_rate,
diff_model=self.diff_model,
diff_acc=self.settings.diffAcc, # TBD なにこれ?
diff_spk_id=self.settings.diffSpkId,
@ -196,9 +186,7 @@ class DDSP_SVC:
# diff_use_dpm=True if self.settings.useDiffDpm == 1 else False, # TBD なにこれ?
method=self.settings.diffMethod,
k_step=self.settings.kStep, # TBD なにこれ?
diff_silence=True
if self.settings.useDiffSilence == 1
else False, # TBD なにこれ?
diff_silence=True if self.settings.useDiffSilence == 1 else False, # TBD なにこれ?
)
return _audio.cpu().numpy() * 32768.0
@ -210,9 +198,21 @@ class DDSP_SVC:
audio = self._pyTorch_inference(data)
return audio
# def destroy(self):
# del self.net_g
# del self.onnx_session
@classmethod
def loadModel2(cls, props: LoadModelParams2):
slotInfo: DDSPSVCModelSlot = DDSPSVCModelSlot()
for file in props.files:
if file.kind == "ddspSvcModelConfig":
slotInfo.configFile = file.name
elif file.kind == "ddspSvcModel":
slotInfo.modelFile = file.name
elif file.kind == "ddspSvcDiffusionConfig":
slotInfo.diffConfigFile = file.name
elif file.kind == "ddspSvcDiffusion":
slotInfo.diffModelFile = file.name
slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx")
slotInfo.name = os.path.splitext(os.path.basename(slotInfo.modelFile))[0]
return slotInfo
def __del__(self):
del self.net_g

View File

@ -1,7 +1,8 @@
import sys
import os
from data.ModelSlot import MMVCv13ModelSlot
from voice_changer.utils.LoadModelParams import LoadModelParams
from voice_changer.utils.LoadModelParams import LoadModelParams, LoadModelParams2
from voice_changer.utils.VoiceChangerModel import AudioInOut
if sys.platform.startswith("darwin"):
@ -77,13 +78,7 @@ class MMVCv13:
# PyTorchモデル生成
if self.settings.pyTorchModelFile is not None:
self.net_g = SynthesizerTrn(
len(symbols),
self.hps.data.filter_length // 2 + 1,
self.hps.train.segment_size // self.hps.data.hop_length,
n_speakers=self.hps.data.n_speakers,
**self.hps.model
)
self.net_g = SynthesizerTrn(len(symbols), self.hps.data.filter_length // 2 + 1, self.hps.train.segment_size // self.hps.data.hop_length, n_speakers=self.hps.data.n_speakers, **self.hps.model)
self.net_g.eval()
load_checkpoint(self.settings.pyTorchModelFile, self.net_g, None)
@ -154,9 +149,7 @@ class MMVCv13:
def get_info(self):
data = asdict(self.settings)
data["onnxExecutionProviders"] = (
self.onnx_session.get_providers() if self.onnx_session is not None else []
)
data["onnxExecutionProviders"] = self.onnx_session.get_providers() if self.onnx_session is not None else []
files = ["configFile", "pyTorchModelFile", "onnxModelFile"]
for f in files:
if data[f] is not None and os.path.exists(data[f]):
@ -193,9 +186,7 @@ class MMVCv13:
newData = newData.astype(np.float32) / self.hps.data.max_wav_value
if self.audio_buffer is not None:
self.audio_buffer = np.concatenate(
[self.audio_buffer, newData], 0
) # 過去のデータに連結
self.audio_buffer = np.concatenate([self.audio_buffer, newData], 0) # 過去のデータに連結
else:
self.audio_buffer = newData
@ -204,9 +195,7 @@ class MMVCv13:
# if convertSize < 8192:
# convertSize = 8192
if convertSize % self.hps.data.hop_length != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。
convertSize = convertSize + (
self.hps.data.hop_length - (convertSize % self.hps.data.hop_length)
)
convertSize = convertSize + (self.hps.data.hop_length - (convertSize % self.hps.data.hop_length))
convertOffset = -1 * convertSize
self.audio_buffer = self.audio_buffer[convertOffset:] # 変換対象の部分だけ抽出
@ -238,7 +227,9 @@ class MMVCv13:
"sid_src": sid_src.numpy(),
"sid_tgt": sid_tgt1.numpy(),
},
)[0][0, 0]
)[
0
][0, 0]
* self.hps.data.max_wav_value
)
return audio1
@ -254,19 +245,10 @@ class MMVCv13:
dev = torch.device("cuda", index=self.settings.gpu)
with torch.no_grad():
x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [
x.to(dev) for x in data
]
x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [x.to(dev) for x in data]
sid_target = torch.LongTensor([self.settings.dstId]).to(dev)
audio1 = (
self.net_g.to(dev)
.voice_conversion(
spec, spec_lengths, sid_src=sid_src, sid_tgt=sid_target
)[0, 0]
.data
* self.hps.data.max_wav_value
)
audio1 = self.net_g.to(dev).voice_conversion(spec, spec_lengths, sid_src=sid_src, sid_tgt=sid_target)[0, 0].data * self.hps.data.max_wav_value
result = audio1.float().cpu().numpy()
return result
@ -278,6 +260,18 @@ class MMVCv13:
audio = self._pyTorch_inference(data)
return audio
@classmethod
def loadModel2(cls, props: LoadModelParams2):
slotInfo: MMVCv13ModelSlot = MMVCv13ModelSlot()
for file in props.files:
if file.kind == "mmvcv13Model":
slotInfo.modelFile = file.name
elif file.kind == "mmvcv13Config":
slotInfo.configFile = file.name
slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx")
slotInfo.name = os.path.splitext(os.path.basename(slotInfo.modelFile))[0]
return slotInfo
def __del__(self):
del self.net_g
del self.onnx_session

View File

@ -1,7 +1,8 @@
import sys
import os
from data.ModelSlot import MMVCv15ModelSlot
from voice_changer.utils.LoadModelParams import LoadModelParams
from voice_changer.utils.LoadModelParams import LoadModelParams, LoadModelParams2
from voice_changer.utils.VoiceChangerModel import AudioInOut
if sys.platform.startswith("darwin"):
@ -172,12 +173,7 @@ class MMVCv15:
def get_info(self):
data = asdict(self.settings)
data["onnxExecutionProviders"] = (
self.onnx_session.get_providers()
if self.settings.onnxModelFile != ""
and self.settings.onnxModelFile is not None
else []
)
data["onnxExecutionProviders"] = self.onnx_session.get_providers() if self.settings.onnxModelFile != "" and self.settings.onnxModelFile is not None else []
files = ["configFile", "pyTorchModelFile", "onnxModelFile"]
for f in files:
if data[f] is not None and os.path.exists(data[f]):
@ -195,9 +191,7 @@ class MMVCv15:
def _get_f0(self, detector: str, newData: AudioInOut):
audio_norm_np = newData.astype(np.float64)
if detector == "dio":
_f0, _time = pw.dio(
audio_norm_np, self.hps.data.sampling_rate, frame_period=5.5
)
_f0, _time = pw.dio(audio_norm_np, self.hps.data.sampling_rate, frame_period=5.5)
f0 = pw.stonemask(audio_norm_np, _f0, _time, self.hps.data.sampling_rate)
else:
f0, t = pw.harvest(
@ -207,9 +201,7 @@ class MMVCv15:
f0_floor=71.0,
f0_ceil=1000.0,
)
f0 = convert_continuos_f0(
f0, int(audio_norm_np.shape[0] / self.hps.data.hop_length)
)
f0 = convert_continuos_f0(f0, int(audio_norm_np.shape[0] / self.hps.data.hop_length))
f0 = torch.from_numpy(f0.astype(np.float32))
return f0
@ -237,9 +229,7 @@ class MMVCv15:
newData = newData.astype(np.float32) / self.hps.data.max_wav_value
if self.audio_buffer is not None:
self.audio_buffer = np.concatenate(
[self.audio_buffer, newData], 0
) # 過去のデータに連結
self.audio_buffer = np.concatenate([self.audio_buffer, newData], 0) # 過去のデータに連結
else:
self.audio_buffer = newData
@ -248,9 +238,7 @@ class MMVCv15:
# if convertSize < 8192:
# convertSize = 8192
if convertSize % self.hps.data.hop_length != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。
convertSize = convertSize + (
self.hps.data.hop_length - (convertSize % self.hps.data.hop_length)
)
convertSize = convertSize + (self.hps.data.hop_length - (convertSize % self.hps.data.hop_length))
# ONNX は固定長
if self.settings.framework == "ONNX":
@ -290,16 +278,15 @@ class MMVCv15:
"sid_src": sid_src.numpy(),
"sid_tgt": sid_tgt1.numpy(),
},
)[0][0, 0]
)[
0
][0, 0]
* self.hps.data.max_wav_value
)
return audio1
def _pyTorch_inference(self, data):
if (
self.settings.pyTorchModelFile == ""
or self.settings.pyTorchModelFile is None
):
if self.settings.pyTorchModelFile == "" or self.settings.pyTorchModelFile is None:
print("[Voice Changer] No pyTorch session.")
raise NoModeLoadedException("pytorch")
@ -316,12 +303,7 @@ class MMVCv15:
sid_src = sid_src.to(dev)
sid_target = torch.LongTensor([self.settings.dstId]).to(dev)
audio1 = (
self.net_g.to(dev)
.voice_conversion(spec, spec_lengths, f0, sid_src, sid_target)[0, 0]
.data
* self.hps.data.max_wav_value
)
audio1 = self.net_g.to(dev).voice_conversion(spec, spec_lengths, f0, sid_src, sid_target)[0, 0].data * self.hps.data.max_wav_value
result = audio1.float().cpu().numpy()
return result
@ -336,6 +318,18 @@ class MMVCv15:
print(_e)
raise ONNXInputArgumentException()
@classmethod
def loadModel2(cls, props: LoadModelParams2):
slotInfo: MMVCv15ModelSlot = MMVCv15ModelSlot()
for file in props.files:
if file.kind == "mmvcv15Model":
slotInfo.modelFile = file.name
elif file.kind == "mmvcv15Config":
slotInfo.configFile = file.name
slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx")
slotInfo.name = os.path.splitext(os.path.basename(slotInfo.modelFile))[0]
return slotInfo
def __del__(self):
del self.net_g
del self.onnx_session

View File

@ -1,7 +1,8 @@
import sys
import os
from data.ModelSlot import SoVitsSvc40ModelSlot
from voice_changer.utils.LoadModelParams import LoadModelParams
from voice_changer.utils.LoadModelParams import LoadModelParams, LoadModelParams2
from voice_changer.utils.VoiceChangerModel import AudioInOut
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
@ -97,11 +98,7 @@ class SoVitsSvc40:
self.settings.pyTorchModelFile = modelFile
self.settings.onnxModelFile = None
clusterTorchModel = (
params["files"]["soVitsSvc40Cluster"]
if "soVitsSvc40Cluster" in params["files"]
else None
)
clusterTorchModel = params["files"]["soVitsSvc40Cluster"] if "soVitsSvc40Cluster" in params["files"] else None
content_vec_path = self.params.content_vec_500
content_vec_onnx_path = self.params.content_vec_500_onnx
@ -212,9 +209,7 @@ class SoVitsSvc40:
def get_info(self):
data = asdict(self.settings)
data["onnxExecutionProviders"] = (
self.onnx_session.get_providers() if self.onnx_session is not None else []
)
data["onnxExecutionProviders"] = self.onnx_session.get_providers() if self.onnx_session is not None else []
files = ["configFile", "pyTorchModelFile", "onnxModelFile"]
for f in files:
if data[f] is not None and os.path.exists(data[f]):
@ -246,9 +241,7 @@ class SoVitsSvc40:
)
if wav_44k.shape[0] % self.hps.data.hop_length != 0:
print(
f" !!! !!! !!! wav size not multiple of hopsize: {wav_44k.shape[0] / self.hps.data.hop_length}"
)
print(f" !!! !!! !!! wav size not multiple of hopsize: {wav_44k.shape[0] / self.hps.data.hop_length}")
f0, uv = utils.interpolate_f0(f0)
f0 = torch.FloatTensor(f0)
@ -257,14 +250,10 @@ class SoVitsSvc40:
f0 = f0.unsqueeze(0)
uv = uv.unsqueeze(0)
wav16k_numpy = librosa.resample(
audio_buffer, orig_sr=self.hps.data.sampling_rate, target_sr=16000
)
wav16k_numpy = librosa.resample(audio_buffer, orig_sr=self.hps.data.sampling_rate, target_sr=16000)
wav16k_tensor = torch.from_numpy(wav16k_numpy)
if (
self.settings.gpu < 0 or self.gpu_num == 0
) or self.settings.framework == "ONNX":
if (self.settings.gpu < 0 or self.gpu_num == 0) or self.settings.framework == "ONNX":
dev = torch.device("cpu")
else:
dev = torch.device("cuda", index=self.settings.gpu)
@ -282,44 +271,27 @@ class SoVitsSvc40:
if self.hps.model.ssl_dim == 768:
self.hubert_model = self.hubert_model.to(dev)
wav16k_tensor = wav16k_tensor.to(dev)
c = get_hubert_content_layer9(
self.hubert_model, wav_16k_tensor=wav16k_tensor
)
c = get_hubert_content_layer9(self.hubert_model, wav_16k_tensor=wav16k_tensor)
else:
self.hubert_model = self.hubert_model.to(dev)
wav16k_tensor = wav16k_tensor.to(dev)
c = utils.get_hubert_content(
self.hubert_model, wav_16k_tensor=wav16k_tensor
)
c = utils.get_hubert_content(self.hubert_model, wav_16k_tensor=wav16k_tensor)
uv = uv.to(dev)
f0 = f0.to(dev)
c = utils.repeat_expand_2d(c.squeeze(0), f0.shape[1])
if (
self.settings.clusterInferRatio != 0
and hasattr(self, "cluster_model")
and self.cluster_model is not None
):
speaker = [
key
for key, value in self.settings.speakers.items()
if value == self.settings.dstId
]
if self.settings.clusterInferRatio != 0 and hasattr(self, "cluster_model") and self.cluster_model is not None:
speaker = [key for key, value in self.settings.speakers.items() if value == self.settings.dstId]
if len(speaker) != 1:
pass
# print("not only one speaker found.", speaker)
else:
cluster_c = cluster.get_cluster_center_result(
self.cluster_model, c.cpu().numpy().T, speaker[0]
).T
cluster_c = cluster.get_cluster_center_result(self.cluster_model, c.cpu().numpy().T, speaker[0]).T
cluster_c = torch.FloatTensor(cluster_c).to(dev)
c = c.to(dev)
c = (
self.settings.clusterInferRatio * cluster_c
+ (1 - self.settings.clusterInferRatio) * c
)
c = self.settings.clusterInferRatio * cluster_c + (1 - self.settings.clusterInferRatio) * c
c = c.unsqueeze(0)
return c, f0, uv
@ -334,20 +306,14 @@ class SoVitsSvc40:
newData = newData.astype(np.float32) / self.hps.data.max_wav_value
if self.audio_buffer is not None:
self.audio_buffer = np.concatenate(
[self.audio_buffer, newData], 0
) # 過去のデータに連結
self.audio_buffer = np.concatenate([self.audio_buffer, newData], 0) # 過去のデータに連結
else:
self.audio_buffer = newData
convertSize = (
inputSize + crossfadeSize + solaSearchFrame + self.settings.extraConvertSize
)
convertSize = inputSize + crossfadeSize + solaSearchFrame + self.settings.extraConvertSize
if convertSize % self.hps.data.hop_length != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。
convertSize = convertSize + (
self.hps.data.hop_length - (convertSize % self.hps.data.hop_length)
)
convertSize = convertSize + (self.hps.data.hop_length - (convertSize % self.hps.data.hop_length))
convertOffset = -1 * convertSize
self.audio_buffer = self.audio_buffer[convertOffset:] # 変換対象の部分だけ抽出
@ -389,9 +355,7 @@ class SoVitsSvc40:
"f0": f0.astype(np.float32),
"uv": uv.astype(np.float32),
"g": sid_target.astype(np.int64),
"noise_scale": np.array([self.settings.noiseScale]).astype(
np.float32
),
"noise_scale": np.array([self.settings.noiseScale]).astype(np.float32),
# "predict_f0": np.array([self.settings.dstId]).astype(np.int64),
},
)[0][0, 0]
@ -457,6 +421,20 @@ class SoVitsSvc40:
return audio
@classmethod
def loadModel2(cls, props: LoadModelParams2):
slotInfo: SoVitsSvc40ModelSlot = SoVitsSvc40ModelSlot()
for file in props.files:
if file.kind == "soVitsSvc40Config":
slotInfo.configFile = file.name
elif file.kind == "soVitsSvc40Model":
slotInfo.modelFile = file.name
elif file.kind == "soVitsSvc40Cluster":
slotInfo.clusterFile = file.name
slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx")
slotInfo.name = os.path.splitext(os.path.basename(slotInfo.modelFile))[0]
return slotInfo
def __del__(self):
del self.net_g
del self.onnx_session

View File

@ -95,22 +95,48 @@ class VoiceChangerManager(ServerDeviceCallbacks):
# Dataを展開
params = LoadModelParams2(**paramDict)
params.files = [LoadModelParamFile(**x) for x in paramDict["files"]]
# ファイルをslotにコピー
for file in params.files:
print("FILE", file)
srcPath = os.path.join(UPLOAD_DIR, file.name)
dstDir = os.path.join(self.params.model_dir, str(params.slot))
srcPath = os.path.join(UPLOAD_DIR, file.dir, file.name)
dstDir = os.path.join(
self.params.model_dir,
str(params.slot),
file.dir,
)
dstPath = os.path.join(dstDir, file.name)
os.makedirs(dstDir, exist_ok=True)
print(f"move to {srcPath} -> {dstPath}")
shutil.move(srcPath, dstPath)
file.name = dstPath
# メタデータ作成(各VCで定義)
if params.voiceChangerType == "RVC":
from voice_changer.RVC.RVC import RVC # 起動時にインポートするとパラメータが取れない。
slotInfo = RVC.loadModel2(params)
self.modelSlotManager.save_model_slot(params.slot, slotInfo)
elif params.voiceChangerType == "MMVCv13":
from voice_changer.MMVCv13.MMVCv13 import MMVCv13
slotInfo = MMVCv13.loadModel2(params)
self.modelSlotManager.save_model_slot(params.slot, slotInfo)
elif params.voiceChangerType == "MMVCv15":
from voice_changer.MMVCv15.MMVCv15 import MMVCv15
slotInfo = MMVCv15.loadModel2(params)
self.modelSlotManager.save_model_slot(params.slot, slotInfo)
elif params.voiceChangerType == "so-vits-svc-40":
from voice_changer.SoVitsSvc40.SoVitsSvc40 import SoVitsSvc40
slotInfo = SoVitsSvc40.loadModel2(params)
self.modelSlotManager.save_model_slot(params.slot, slotInfo)
elif params.voiceChangerType == "DDSP-SVC":
from voice_changer.DDSP_SVC.DDSP_SVC import DDSP_SVC
slotInfo = DDSP_SVC.loadModel2(params)
self.modelSlotManager.save_model_slot(params.slot, slotInfo)
print("params", params)
else:

View File

@ -43,6 +43,7 @@ LoadModelParamFileKind: TypeAlias = Literal[
class LoadModelParamFile:
name: str
kind: LoadModelParamFileKind
dir: str
@dataclass