WIP: integrate vcs to new gui 2

This commit is contained in:
wataru 2023-06-20 06:39:39 +09:00
parent b453e5fd85
commit b6996a15fe
12 changed files with 251 additions and 153 deletions

File diff suppressed because one or more lines are too long

View File

@ -1,6 +1,6 @@
import React, { useEffect, useMemo, useState } from "react"; import React, { useEffect, useMemo, useState } from "react";
import { useAppState } from "../../001_provider/001_AppStateProvider"; import { useAppState } from "../../001_provider/001_AppStateProvider";
import { FileUploadSetting, InitialFileUploadSetting, ModelFileKind, ModelUploadSetting, VoiceChangerType, fileSelector } from "@dannadori/voice-changer-client-js"; import { ModelFileKind, ModelUploadSetting, VoiceChangerType, fileSelector } from "@dannadori/voice-changer-client-js";
import { useMessageBuilder } from "../../hooks/useMessageBuilder"; import { useMessageBuilder } from "../../hooks/useMessageBuilder";
import { ModelSlotManagerDialogScreen } from "./904_ModelSlotManagerDialog"; import { ModelSlotManagerDialogScreen } from "./904_ModelSlotManagerDialog";
import { checkExtention, trimfileName } from "../../utils/utils"; import { checkExtention, trimfileName } from "../../utils/utils";
@ -54,15 +54,31 @@ export const FileUploaderScreen = (props: FileUploaderScreenProps) => {
const checkModelSetting = (setting: ModelUploadSetting) => { const checkModelSetting = (setting: ModelUploadSetting) => {
if (setting.voiceChangerType == "RVC") { if (setting.voiceChangerType == "RVC") {
// const enough = !!setting.files.find(x => { return x.kind == "rvcModel" }) &&
// !!setting.files.find(x => { return x.kind == "rvcIndex" })
// return enough
const enough = !!setting.files.find(x => { return x.kind == "rvcModel" }) const enough = !!setting.files.find(x => { return x.kind == "rvcModel" })
return enough return enough
} else if (setting.voiceChangerType == "MMVCv13") {
const enough = !!setting.files.find(x => { return x.kind == "mmvcv13Model" }) &&
!!setting.files.find(x => { return x.kind == "mmvcv13Config" })
return enough
} else if (setting.voiceChangerType == "MMVCv15") {
const enough = !!setting.files.find(x => { return x.kind == "mmvcv15Model" }) &&
!!setting.files.find(x => { return x.kind == "mmvcv15Config" })
return enough
} else if (setting.voiceChangerType == "so-vits-svc-40") {
const enough = !!setting.files.find(x => { return x.kind == "soVitsSvc40Config" }) &&
!!setting.files.find(x => { return x.kind == "soVitsSvc40Model" })
return enough
} else if (setting.voiceChangerType == "DDSP-SVC") {
const enough = !!setting.files.find(x => { return x.kind == "ddspSvcModel" }) &&
!!setting.files.find(x => { return x.kind == "ddspSvcModelConfig" }) &&
!!setting.files.find(x => { return x.kind == "ddspSvcDiffusion" }) &&
!!setting.files.find(x => { return x.kind == "ddspSvcDiffusionConfig" })
return enough
} }
return false
} }
const generateFileRow = (setting: ModelUploadSetting, title: string, kind: ModelFileKind, ext: string[]) => { const generateFileRow = (setting: ModelUploadSetting, title: string, kind: ModelFileKind, ext: string[], dir: string = "") => {
const selectedFile = setting.files.find(x => { return x.kind == kind }) const selectedFile = setting.files.find(x => { return x.kind == kind })
const selectedFilename = selectedFile?.file.name || "" const selectedFilename = selectedFile?.file.name || ""
return ( return (
@ -81,7 +97,7 @@ export const FileUploaderScreen = (props: FileUploaderScreenProps) => {
if (selectedFile) { if (selectedFile) {
selectedFile.file = file selectedFile.file = file
} else { } else {
setting.files.push({ kind: kind, file: file }) setting.files.push({ kind: kind, file: file, dir: dir })
} }
setUploadSetting({ ...setting }) setUploadSetting({ ...setting })
}}> }}>
@ -96,6 +112,21 @@ export const FileUploaderScreen = (props: FileUploaderScreenProps) => {
if (vcType == "RVC") { if (vcType == "RVC") {
rows.push(generateFileRow(uploadSetting!, "Model", "rvcModel", ["pth", "onnx"])) rows.push(generateFileRow(uploadSetting!, "Model", "rvcModel", ["pth", "onnx"]))
rows.push(generateFileRow(uploadSetting!, "Index", "rvcIndex", ["index", "bin"])) rows.push(generateFileRow(uploadSetting!, "Index", "rvcIndex", ["index", "bin"]))
} else if (vcType == "MMVCv13") {
rows.push(generateFileRow(uploadSetting!, "Config", "mmvcv13Config", ["json"]))
rows.push(generateFileRow(uploadSetting!, "Model", "mmvcv13Model", ["pth", "onnx"]))
} else if (vcType == "MMVCv15") {
rows.push(generateFileRow(uploadSetting!, "Config", "mmvcv15Config", ["json"]))
rows.push(generateFileRow(uploadSetting!, "Model", "mmvcv15Model", ["pth", "onnx"]))
} else if (vcType == "so-vits-svc-40") {
rows.push(generateFileRow(uploadSetting!, "Config", "soVitsSvc40Config", ["json"]))
rows.push(generateFileRow(uploadSetting!, "Model", "soVitsSvc40Model", ["pth"]))
rows.push(generateFileRow(uploadSetting!, "Cluster", "soVitsSvc40Cluster", ["pth", "pt"]))
} else if (vcType == "DDSP-SVC") {
rows.push(generateFileRow(uploadSetting!, "Config", "ddspSvcModelConfig", ["yaml"], "model/"))
rows.push(generateFileRow(uploadSetting!, "Model", "ddspSvcModel", ["pth", "pt"], "model/"))
rows.push(generateFileRow(uploadSetting!, "Config(diff)", "ddspSvcDiffusionConfig", ["yaml"], "diff/"))
rows.push(generateFileRow(uploadSetting!, "Model(diff)", "ddspSvcDiffusion", ["pth", "pt"], "diff/"))
} }
return rows return rows
} }

View File

@ -57,13 +57,13 @@ export class ServerConfigurator {
return info return info
} }
uploadFile2 = async (file: File, onprogress: (progress: number, end: boolean) => void) => { uploadFile2 = async (dir: string, file: File, onprogress: (progress: number, end: boolean) => void) => {
const url = this.serverUrl + "/upload_file" const url = this.serverUrl + "/upload_file"
onprogress(0, false) onprogress(0, false)
const size = 1024 * 1024; const size = 1024 * 1024;
let index = 0; // index値 let index = 0; // index値
const fileLength = file.size const fileLength = file.size
const filename = file.name const filename = dir + file.name
const fileChunkNum = Math.ceil(fileLength / size) const fileChunkNum = Math.ceil(fileLength / size)
while (true) { while (true) {

View File

@ -290,8 +290,8 @@ export class VoiceChangerClient {
uploadFile = (buf: ArrayBuffer, filename: string, onprogress: (progress: number, end: boolean) => void) => { uploadFile = (buf: ArrayBuffer, filename: string, onprogress: (progress: number, end: boolean) => void) => {
return this.configurator.uploadFile(buf, filename, onprogress) return this.configurator.uploadFile(buf, filename, onprogress)
} }
uploadFile2 = (file: File, onprogress: (progress: number, end: boolean) => void) => { uploadFile2 = (dir: string, file: File, onprogress: (progress: number, end: boolean) => void) => {
return this.configurator.uploadFile2(file, onprogress) return this.configurator.uploadFile2(dir, file, onprogress)
} }
concatUploadedFile = (filename: string, chunkNum: number) => { concatUploadedFile = (filename: string, chunkNum: number) => {
return this.configurator.concatUploadedFile(filename, chunkNum) return this.configurator.concatUploadedFile(filename, chunkNum)

View File

@ -41,6 +41,7 @@ export type ModelFileKind = typeof ModelFileKind[keyof typeof ModelFileKind]
export type ModelFile = { export type ModelFile = {
file: File, file: File,
kind: ModelFileKind kind: ModelFileKind
dir: string
} }
export type ModelUploadSetting = { export type ModelUploadSetting = {
@ -296,7 +297,7 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
if (!props.voiceChangerClient) return if (!props.voiceChangerClient) return
console.log("uploading..1.", file) console.log("uploading..1.", file)
console.log("uploading..2.", file.name) console.log("uploading..2.", file.name)
const num = await props.voiceChangerClient.uploadFile2(file, onprogress) const num = await props.voiceChangerClient.uploadFile2(dir, file, onprogress)
const res = await props.voiceChangerClient.concatUploadedFile(dir + file.name, num) const res = await props.voiceChangerClient.concatUploadedFile(dir + file.name, num)
console.log("uploaded", num, res) console.log("uploaded", num, res)
} }
@ -319,11 +320,11 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
const progOffset = 100 * i * progRate const progOffset = 100 * i * progRate
await _uploadFile2(setting.files[i].file, (progress: number, _end: boolean) => { await _uploadFile2(setting.files[i].file, (progress: number, _end: boolean) => {
setUploadProgress(progress * progRate + progOffset) setUploadProgress(progress * progRate + progOffset)
}) }, setting.files[i].dir)
} }
} }
const params: ModelUploadSettingForServer = { const params: ModelUploadSettingForServer = {
...setting, files: setting.files.map((f) => { return { name: f.file.name, kind: f.kind } }) ...setting, files: setting.files.map((f) => { return { name: f.file.name, kind: f.kind, dir: f.dir } })
} }
const loadPromise = props.voiceChangerClient.loadModel( const loadPromise = props.voiceChangerClient.loadModel(

View File

@ -38,7 +38,72 @@ class RVCModelSlot(ModelSlot):
iconFile: str = "" iconFile: str = ""
ModelSlots: TypeAlias = Union[ModelSlot, RVCModelSlot] @dataclass
class MMVCv13ModelSlot(ModelSlot):
voiceChangerType: VoiceChangerType = "MMVCv13"
modelFile: str = ""
configFile: str = ""
srcId: int = 107
dstId: int = 100
isONNX: bool = False
samplingRate: int = 24000
name: str = ""
description: str = ""
iconFile: str = ""
@dataclass
class MMVCv15ModelSlot(ModelSlot):
voiceChangerType: VoiceChangerType = "MMVCv15"
modelFile: str = ""
configFile: str = ""
srcId: int = 0
dstId: int = 101
isONNX: bool = False
samplingRate: int = 24000
name: str = ""
description: str = ""
iconFile: str = ""
@dataclass
class SoVitsSvc40ModelSlot(ModelSlot):
voiceChangerType: VoiceChangerType = "so-vits-svc-40"
modelFile: str = ""
configFile: str = ""
clusterFile: str = ""
dstId: int = 0
isONNX: bool = False
name: str = ""
description: str = ""
credit: str = ""
termsOfUseUrl: str = ""
sampleId: str = ""
iconFile: str = ""
@dataclass
class DDSPSVCModelSlot(ModelSlot):
voiceChangerType: VoiceChangerType = "DDSP-SVC"
modelFile: str = ""
configFile: str = ""
diffModelFile: str = ""
diffConfigFile: str = ""
dstId: int = 0
isONNX: bool = False
name: str = ""
description: str = ""
credit: str = ""
termsOfUseUrl: str = ""
sampleId: str = ""
iconFile: str = ""
ModelSlots: TypeAlias = Union[ModelSlot, RVCModelSlot, MMVCv13ModelSlot, MMVCv15ModelSlot, SoVitsSvc40ModelSlot, DDSPSVCModelSlot]
def loadSlotInfo(model_dir: str, slotIndex: int) -> ModelSlots: def loadSlotInfo(model_dir: str, slotIndex: int) -> ModelSlots:
@ -50,6 +115,14 @@ def loadSlotInfo(model_dir: str, slotIndex: int) -> ModelSlots:
slotInfo = ModelSlot(**{k: v for k, v in jsonDict.items() if k in ModelSlot.__annotations__}) slotInfo = ModelSlot(**{k: v for k, v in jsonDict.items() if k in ModelSlot.__annotations__})
if slotInfo.voiceChangerType == "RVC": if slotInfo.voiceChangerType == "RVC":
return RVCModelSlot(**jsonDict) return RVCModelSlot(**jsonDict)
elif slotInfo.voiceChangerType == "MMVCv13":
return MMVCv13ModelSlot(**jsonDict)
elif slotInfo.voiceChangerType == "MMVCv15":
return MMVCv15ModelSlot(**jsonDict)
elif slotInfo.voiceChangerType == "so-vits-svc-40":
return SoVitsSvc40ModelSlot(**jsonDict)
elif slotInfo.voiceChangerType == "DDSP-SVC":
return DDSPSVCModelSlot(**jsonDict)
else: else:
return ModelSlot() return ModelSlot()

View File

@ -3,6 +3,7 @@ import os
from dataclasses import asdict from dataclasses import asdict
import numpy as np import numpy as np
import torch import torch
from data.ModelSlot import DDSPSVCModelSlot
from voice_changer.DDSP_SVC.ModelSlot import ModelSlot from voice_changer.DDSP_SVC.ModelSlot import ModelSlot
from voice_changer.DDSP_SVC.deviceManager.DeviceManager import DeviceManager from voice_changer.DDSP_SVC.deviceManager.DeviceManager import DeviceManager
@ -21,7 +22,7 @@ from diffusion.infer_gt_mel import DiffGtMel # type: ignore
from voice_changer.utils.VoiceChangerModel import AudioInOut from voice_changer.utils.VoiceChangerModel import AudioInOut
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
from voice_changer.utils.LoadModelParams import LoadModelParams from voice_changer.utils.LoadModelParams import LoadModelParams, LoadModelParams2
from voice_changer.DDSP_SVC.DDSP_SVCSetting import DDSP_SVCSettings from voice_changer.DDSP_SVC.DDSP_SVCSetting import DDSP_SVCSettings
from voice_changer.RVC.embedder.EmbedderManager import EmbedderManager from voice_changer.RVC.embedder.EmbedderManager import EmbedderManager
@ -44,11 +45,7 @@ def phase_vocoder(a, b, fade_out, fade_in):
deltaphase = deltaphase - 2 * np.pi * torch.floor(deltaphase / 2 / np.pi + 0.5) deltaphase = deltaphase - 2 * np.pi * torch.floor(deltaphase / 2 / np.pi + 0.5)
w = 2 * np.pi * torch.arange(n // 2 + 1).to(a) + deltaphase w = 2 * np.pi * torch.arange(n // 2 + 1).to(a) + deltaphase
t = torch.arange(n).unsqueeze(-1).to(a) / n t = torch.arange(n).unsqueeze(-1).to(a) / n
result = ( result = a * (fade_out**2) + b * (fade_in**2) + torch.sum(absab * torch.cos(w * t + phia), -1) * fade_out * fade_in / n
a * (fade_out**2)
+ b * (fade_in**2)
+ torch.sum(absab * torch.cos(w * t + phia), -1) * fade_out * fade_in / n
)
return result return result
@ -102,9 +99,7 @@ class DDSP_SVC:
def reloadModel(self): def reloadModel(self):
self.device = self.deviceManager.getDevice(self.settings.gpu) self.device = self.deviceManager.getDevice(self.settings.gpu)
modelFile = self.settings.modelSlots[self.settings.modelSlotIndex].modelFile modelFile = self.settings.modelSlots[self.settings.modelSlotIndex].modelFile
diffusionFile = self.settings.modelSlots[ diffusionFile = self.settings.modelSlots[self.settings.modelSlotIndex].diffusionFile
self.settings.modelSlotIndex
].diffusionFile
self.svc_model = SvcDDSP() self.svc_model = SvcDDSP()
self.svc_model.setVCParams(self.params) self.svc_model.setVCParams(self.params)
@ -144,15 +139,11 @@ class DDSP_SVC:
# newData = newData.astype(np.float32) # newData = newData.astype(np.float32)
if self.audio_buffer is not None: if self.audio_buffer is not None:
self.audio_buffer = np.concatenate( self.audio_buffer = np.concatenate([self.audio_buffer, newData], 0) # 過去のデータに連結
[self.audio_buffer, newData], 0
) # 過去のデータに連結
else: else:
self.audio_buffer = newData self.audio_buffer = newData
convertSize = ( convertSize = inputSize + crossfadeSize + solaSearchFrame + self.settings.extraConvertSize
inputSize + crossfadeSize + solaSearchFrame + self.settings.extraConvertSize
)
# if convertSize % self.hop_size != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。 # if convertSize % self.hop_size != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。
# convertSize = convertSize + (self.hop_size - (convertSize % self.hop_size)) # convertSize = convertSize + (self.hop_size - (convertSize % self.hop_size))
@ -187,8 +178,7 @@ class DDSP_SVC:
f0_min=50, f0_min=50,
f0_max=1100, f0_max=1100,
# safe_prefix_pad_length=0, # TBD なにこれ? # safe_prefix_pad_length=0, # TBD なにこれ?
safe_prefix_pad_length=self.settings.extraConvertSize safe_prefix_pad_length=self.settings.extraConvertSize / self.svc_model.args.data.sampling_rate,
/ self.svc_model.args.data.sampling_rate,
diff_model=self.diff_model, diff_model=self.diff_model,
diff_acc=self.settings.diffAcc, # TBD なにこれ? diff_acc=self.settings.diffAcc, # TBD なにこれ?
diff_spk_id=self.settings.diffSpkId, diff_spk_id=self.settings.diffSpkId,
@ -196,9 +186,7 @@ class DDSP_SVC:
# diff_use_dpm=True if self.settings.useDiffDpm == 1 else False, # TBD なにこれ? # diff_use_dpm=True if self.settings.useDiffDpm == 1 else False, # TBD なにこれ?
method=self.settings.diffMethod, method=self.settings.diffMethod,
k_step=self.settings.kStep, # TBD なにこれ? k_step=self.settings.kStep, # TBD なにこれ?
diff_silence=True diff_silence=True if self.settings.useDiffSilence == 1 else False, # TBD なにこれ?
if self.settings.useDiffSilence == 1
else False, # TBD なにこれ?
) )
return _audio.cpu().numpy() * 32768.0 return _audio.cpu().numpy() * 32768.0
@ -210,9 +198,21 @@ class DDSP_SVC:
audio = self._pyTorch_inference(data) audio = self._pyTorch_inference(data)
return audio return audio
# def destroy(self): @classmethod
# del self.net_g def loadModel2(cls, props: LoadModelParams2):
# del self.onnx_session slotInfo: DDSPSVCModelSlot = DDSPSVCModelSlot()
for file in props.files:
if file.kind == "ddspSvcModelConfig":
slotInfo.configFile = file.name
elif file.kind == "ddspSvcModel":
slotInfo.modelFile = file.name
elif file.kind == "ddspSvcDiffusionConfig":
slotInfo.diffConfigFile = file.name
elif file.kind == "ddspSvcDiffusion":
slotInfo.diffModelFile = file.name
slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx")
slotInfo.name = os.path.splitext(os.path.basename(slotInfo.modelFile))[0]
return slotInfo
def __del__(self): def __del__(self):
del self.net_g del self.net_g

View File

@ -1,7 +1,8 @@
import sys import sys
import os import os
from data.ModelSlot import MMVCv13ModelSlot
from voice_changer.utils.LoadModelParams import LoadModelParams from voice_changer.utils.LoadModelParams import LoadModelParams, LoadModelParams2
from voice_changer.utils.VoiceChangerModel import AudioInOut from voice_changer.utils.VoiceChangerModel import AudioInOut
if sys.platform.startswith("darwin"): if sys.platform.startswith("darwin"):
@ -77,13 +78,7 @@ class MMVCv13:
# PyTorchモデル生成 # PyTorchモデル生成
if self.settings.pyTorchModelFile is not None: if self.settings.pyTorchModelFile is not None:
self.net_g = SynthesizerTrn( self.net_g = SynthesizerTrn(len(symbols), self.hps.data.filter_length // 2 + 1, self.hps.train.segment_size // self.hps.data.hop_length, n_speakers=self.hps.data.n_speakers, **self.hps.model)
len(symbols),
self.hps.data.filter_length // 2 + 1,
self.hps.train.segment_size // self.hps.data.hop_length,
n_speakers=self.hps.data.n_speakers,
**self.hps.model
)
self.net_g.eval() self.net_g.eval()
load_checkpoint(self.settings.pyTorchModelFile, self.net_g, None) load_checkpoint(self.settings.pyTorchModelFile, self.net_g, None)
@ -154,9 +149,7 @@ class MMVCv13:
def get_info(self): def get_info(self):
data = asdict(self.settings) data = asdict(self.settings)
data["onnxExecutionProviders"] = ( data["onnxExecutionProviders"] = self.onnx_session.get_providers() if self.onnx_session is not None else []
self.onnx_session.get_providers() if self.onnx_session is not None else []
)
files = ["configFile", "pyTorchModelFile", "onnxModelFile"] files = ["configFile", "pyTorchModelFile", "onnxModelFile"]
for f in files: for f in files:
if data[f] is not None and os.path.exists(data[f]): if data[f] is not None and os.path.exists(data[f]):
@ -193,9 +186,7 @@ class MMVCv13:
newData = newData.astype(np.float32) / self.hps.data.max_wav_value newData = newData.astype(np.float32) / self.hps.data.max_wav_value
if self.audio_buffer is not None: if self.audio_buffer is not None:
self.audio_buffer = np.concatenate( self.audio_buffer = np.concatenate([self.audio_buffer, newData], 0) # 過去のデータに連結
[self.audio_buffer, newData], 0
) # 過去のデータに連結
else: else:
self.audio_buffer = newData self.audio_buffer = newData
@ -204,9 +195,7 @@ class MMVCv13:
# if convertSize < 8192: # if convertSize < 8192:
# convertSize = 8192 # convertSize = 8192
if convertSize % self.hps.data.hop_length != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。 if convertSize % self.hps.data.hop_length != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。
convertSize = convertSize + ( convertSize = convertSize + (self.hps.data.hop_length - (convertSize % self.hps.data.hop_length))
self.hps.data.hop_length - (convertSize % self.hps.data.hop_length)
)
convertOffset = -1 * convertSize convertOffset = -1 * convertSize
self.audio_buffer = self.audio_buffer[convertOffset:] # 変換対象の部分だけ抽出 self.audio_buffer = self.audio_buffer[convertOffset:] # 変換対象の部分だけ抽出
@ -238,7 +227,9 @@ class MMVCv13:
"sid_src": sid_src.numpy(), "sid_src": sid_src.numpy(),
"sid_tgt": sid_tgt1.numpy(), "sid_tgt": sid_tgt1.numpy(),
}, },
)[0][0, 0] )[
0
][0, 0]
* self.hps.data.max_wav_value * self.hps.data.max_wav_value
) )
return audio1 return audio1
@ -254,19 +245,10 @@ class MMVCv13:
dev = torch.device("cuda", index=self.settings.gpu) dev = torch.device("cuda", index=self.settings.gpu)
with torch.no_grad(): with torch.no_grad():
x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [ x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [x.to(dev) for x in data]
x.to(dev) for x in data
]
sid_target = torch.LongTensor([self.settings.dstId]).to(dev) sid_target = torch.LongTensor([self.settings.dstId]).to(dev)
audio1 = ( audio1 = self.net_g.to(dev).voice_conversion(spec, spec_lengths, sid_src=sid_src, sid_tgt=sid_target)[0, 0].data * self.hps.data.max_wav_value
self.net_g.to(dev)
.voice_conversion(
spec, spec_lengths, sid_src=sid_src, sid_tgt=sid_target
)[0, 0]
.data
* self.hps.data.max_wav_value
)
result = audio1.float().cpu().numpy() result = audio1.float().cpu().numpy()
return result return result
@ -278,6 +260,18 @@ class MMVCv13:
audio = self._pyTorch_inference(data) audio = self._pyTorch_inference(data)
return audio return audio
@classmethod
def loadModel2(cls, props: LoadModelParams2):
slotInfo: MMVCv13ModelSlot = MMVCv13ModelSlot()
for file in props.files:
if file.kind == "mmvcv13Model":
slotInfo.modelFile = file.name
elif file.kind == "mmvcv13Config":
slotInfo.configFile = file.name
slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx")
slotInfo.name = os.path.splitext(os.path.basename(slotInfo.modelFile))[0]
return slotInfo
def __del__(self): def __del__(self):
del self.net_g del self.net_g
del self.onnx_session del self.onnx_session

View File

@ -1,7 +1,8 @@
import sys import sys
import os import os
from data.ModelSlot import MMVCv15ModelSlot
from voice_changer.utils.LoadModelParams import LoadModelParams from voice_changer.utils.LoadModelParams import LoadModelParams, LoadModelParams2
from voice_changer.utils.VoiceChangerModel import AudioInOut from voice_changer.utils.VoiceChangerModel import AudioInOut
if sys.platform.startswith("darwin"): if sys.platform.startswith("darwin"):
@ -172,12 +173,7 @@ class MMVCv15:
def get_info(self): def get_info(self):
data = asdict(self.settings) data = asdict(self.settings)
data["onnxExecutionProviders"] = ( data["onnxExecutionProviders"] = self.onnx_session.get_providers() if self.settings.onnxModelFile != "" and self.settings.onnxModelFile is not None else []
self.onnx_session.get_providers()
if self.settings.onnxModelFile != ""
and self.settings.onnxModelFile is not None
else []
)
files = ["configFile", "pyTorchModelFile", "onnxModelFile"] files = ["configFile", "pyTorchModelFile", "onnxModelFile"]
for f in files: for f in files:
if data[f] is not None and os.path.exists(data[f]): if data[f] is not None and os.path.exists(data[f]):
@ -195,9 +191,7 @@ class MMVCv15:
def _get_f0(self, detector: str, newData: AudioInOut): def _get_f0(self, detector: str, newData: AudioInOut):
audio_norm_np = newData.astype(np.float64) audio_norm_np = newData.astype(np.float64)
if detector == "dio": if detector == "dio":
_f0, _time = pw.dio( _f0, _time = pw.dio(audio_norm_np, self.hps.data.sampling_rate, frame_period=5.5)
audio_norm_np, self.hps.data.sampling_rate, frame_period=5.5
)
f0 = pw.stonemask(audio_norm_np, _f0, _time, self.hps.data.sampling_rate) f0 = pw.stonemask(audio_norm_np, _f0, _time, self.hps.data.sampling_rate)
else: else:
f0, t = pw.harvest( f0, t = pw.harvest(
@ -207,9 +201,7 @@ class MMVCv15:
f0_floor=71.0, f0_floor=71.0,
f0_ceil=1000.0, f0_ceil=1000.0,
) )
f0 = convert_continuos_f0( f0 = convert_continuos_f0(f0, int(audio_norm_np.shape[0] / self.hps.data.hop_length))
f0, int(audio_norm_np.shape[0] / self.hps.data.hop_length)
)
f0 = torch.from_numpy(f0.astype(np.float32)) f0 = torch.from_numpy(f0.astype(np.float32))
return f0 return f0
@ -237,9 +229,7 @@ class MMVCv15:
newData = newData.astype(np.float32) / self.hps.data.max_wav_value newData = newData.astype(np.float32) / self.hps.data.max_wav_value
if self.audio_buffer is not None: if self.audio_buffer is not None:
self.audio_buffer = np.concatenate( self.audio_buffer = np.concatenate([self.audio_buffer, newData], 0) # 過去のデータに連結
[self.audio_buffer, newData], 0
) # 過去のデータに連結
else: else:
self.audio_buffer = newData self.audio_buffer = newData
@ -248,9 +238,7 @@ class MMVCv15:
# if convertSize < 8192: # if convertSize < 8192:
# convertSize = 8192 # convertSize = 8192
if convertSize % self.hps.data.hop_length != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。 if convertSize % self.hps.data.hop_length != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。
convertSize = convertSize + ( convertSize = convertSize + (self.hps.data.hop_length - (convertSize % self.hps.data.hop_length))
self.hps.data.hop_length - (convertSize % self.hps.data.hop_length)
)
# ONNX は固定長 # ONNX は固定長
if self.settings.framework == "ONNX": if self.settings.framework == "ONNX":
@ -290,16 +278,15 @@ class MMVCv15:
"sid_src": sid_src.numpy(), "sid_src": sid_src.numpy(),
"sid_tgt": sid_tgt1.numpy(), "sid_tgt": sid_tgt1.numpy(),
}, },
)[0][0, 0] )[
0
][0, 0]
* self.hps.data.max_wav_value * self.hps.data.max_wav_value
) )
return audio1 return audio1
def _pyTorch_inference(self, data): def _pyTorch_inference(self, data):
if ( if self.settings.pyTorchModelFile == "" or self.settings.pyTorchModelFile is None:
self.settings.pyTorchModelFile == ""
or self.settings.pyTorchModelFile is None
):
print("[Voice Changer] No pyTorch session.") print("[Voice Changer] No pyTorch session.")
raise NoModeLoadedException("pytorch") raise NoModeLoadedException("pytorch")
@ -316,12 +303,7 @@ class MMVCv15:
sid_src = sid_src.to(dev) sid_src = sid_src.to(dev)
sid_target = torch.LongTensor([self.settings.dstId]).to(dev) sid_target = torch.LongTensor([self.settings.dstId]).to(dev)
audio1 = ( audio1 = self.net_g.to(dev).voice_conversion(spec, spec_lengths, f0, sid_src, sid_target)[0, 0].data * self.hps.data.max_wav_value
self.net_g.to(dev)
.voice_conversion(spec, spec_lengths, f0, sid_src, sid_target)[0, 0]
.data
* self.hps.data.max_wav_value
)
result = audio1.float().cpu().numpy() result = audio1.float().cpu().numpy()
return result return result
@ -336,6 +318,18 @@ class MMVCv15:
print(_e) print(_e)
raise ONNXInputArgumentException() raise ONNXInputArgumentException()
@classmethod
def loadModel2(cls, props: LoadModelParams2):
slotInfo: MMVCv15ModelSlot = MMVCv15ModelSlot()
for file in props.files:
if file.kind == "mmvcv15Model":
slotInfo.modelFile = file.name
elif file.kind == "mmvcv15Config":
slotInfo.configFile = file.name
slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx")
slotInfo.name = os.path.splitext(os.path.basename(slotInfo.modelFile))[0]
return slotInfo
def __del__(self): def __del__(self):
del self.net_g del self.net_g
del self.onnx_session del self.onnx_session

View File

@ -1,7 +1,8 @@
import sys import sys
import os import os
from data.ModelSlot import SoVitsSvc40ModelSlot
from voice_changer.utils.LoadModelParams import LoadModelParams from voice_changer.utils.LoadModelParams import LoadModelParams, LoadModelParams2
from voice_changer.utils.VoiceChangerModel import AudioInOut from voice_changer.utils.VoiceChangerModel import AudioInOut
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
@ -97,11 +98,7 @@ class SoVitsSvc40:
self.settings.pyTorchModelFile = modelFile self.settings.pyTorchModelFile = modelFile
self.settings.onnxModelFile = None self.settings.onnxModelFile = None
clusterTorchModel = ( clusterTorchModel = params["files"]["soVitsSvc40Cluster"] if "soVitsSvc40Cluster" in params["files"] else None
params["files"]["soVitsSvc40Cluster"]
if "soVitsSvc40Cluster" in params["files"]
else None
)
content_vec_path = self.params.content_vec_500 content_vec_path = self.params.content_vec_500
content_vec_onnx_path = self.params.content_vec_500_onnx content_vec_onnx_path = self.params.content_vec_500_onnx
@ -212,9 +209,7 @@ class SoVitsSvc40:
def get_info(self): def get_info(self):
data = asdict(self.settings) data = asdict(self.settings)
data["onnxExecutionProviders"] = ( data["onnxExecutionProviders"] = self.onnx_session.get_providers() if self.onnx_session is not None else []
self.onnx_session.get_providers() if self.onnx_session is not None else []
)
files = ["configFile", "pyTorchModelFile", "onnxModelFile"] files = ["configFile", "pyTorchModelFile", "onnxModelFile"]
for f in files: for f in files:
if data[f] is not None and os.path.exists(data[f]): if data[f] is not None and os.path.exists(data[f]):
@ -246,9 +241,7 @@ class SoVitsSvc40:
) )
if wav_44k.shape[0] % self.hps.data.hop_length != 0: if wav_44k.shape[0] % self.hps.data.hop_length != 0:
print( print(f" !!! !!! !!! wav size not multiple of hopsize: {wav_44k.shape[0] / self.hps.data.hop_length}")
f" !!! !!! !!! wav size not multiple of hopsize: {wav_44k.shape[0] / self.hps.data.hop_length}"
)
f0, uv = utils.interpolate_f0(f0) f0, uv = utils.interpolate_f0(f0)
f0 = torch.FloatTensor(f0) f0 = torch.FloatTensor(f0)
@ -257,14 +250,10 @@ class SoVitsSvc40:
f0 = f0.unsqueeze(0) f0 = f0.unsqueeze(0)
uv = uv.unsqueeze(0) uv = uv.unsqueeze(0)
wav16k_numpy = librosa.resample( wav16k_numpy = librosa.resample(audio_buffer, orig_sr=self.hps.data.sampling_rate, target_sr=16000)
audio_buffer, orig_sr=self.hps.data.sampling_rate, target_sr=16000
)
wav16k_tensor = torch.from_numpy(wav16k_numpy) wav16k_tensor = torch.from_numpy(wav16k_numpy)
if ( if (self.settings.gpu < 0 or self.gpu_num == 0) or self.settings.framework == "ONNX":
self.settings.gpu < 0 or self.gpu_num == 0
) or self.settings.framework == "ONNX":
dev = torch.device("cpu") dev = torch.device("cpu")
else: else:
dev = torch.device("cuda", index=self.settings.gpu) dev = torch.device("cuda", index=self.settings.gpu)
@ -282,44 +271,27 @@ class SoVitsSvc40:
if self.hps.model.ssl_dim == 768: if self.hps.model.ssl_dim == 768:
self.hubert_model = self.hubert_model.to(dev) self.hubert_model = self.hubert_model.to(dev)
wav16k_tensor = wav16k_tensor.to(dev) wav16k_tensor = wav16k_tensor.to(dev)
c = get_hubert_content_layer9( c = get_hubert_content_layer9(self.hubert_model, wav_16k_tensor=wav16k_tensor)
self.hubert_model, wav_16k_tensor=wav16k_tensor
)
else: else:
self.hubert_model = self.hubert_model.to(dev) self.hubert_model = self.hubert_model.to(dev)
wav16k_tensor = wav16k_tensor.to(dev) wav16k_tensor = wav16k_tensor.to(dev)
c = utils.get_hubert_content( c = utils.get_hubert_content(self.hubert_model, wav_16k_tensor=wav16k_tensor)
self.hubert_model, wav_16k_tensor=wav16k_tensor
)
uv = uv.to(dev) uv = uv.to(dev)
f0 = f0.to(dev) f0 = f0.to(dev)
c = utils.repeat_expand_2d(c.squeeze(0), f0.shape[1]) c = utils.repeat_expand_2d(c.squeeze(0), f0.shape[1])
if ( if self.settings.clusterInferRatio != 0 and hasattr(self, "cluster_model") and self.cluster_model is not None:
self.settings.clusterInferRatio != 0 speaker = [key for key, value in self.settings.speakers.items() if value == self.settings.dstId]
and hasattr(self, "cluster_model")
and self.cluster_model is not None
):
speaker = [
key
for key, value in self.settings.speakers.items()
if value == self.settings.dstId
]
if len(speaker) != 1: if len(speaker) != 1:
pass pass
# print("not only one speaker found.", speaker) # print("not only one speaker found.", speaker)
else: else:
cluster_c = cluster.get_cluster_center_result( cluster_c = cluster.get_cluster_center_result(self.cluster_model, c.cpu().numpy().T, speaker[0]).T
self.cluster_model, c.cpu().numpy().T, speaker[0]
).T
cluster_c = torch.FloatTensor(cluster_c).to(dev) cluster_c = torch.FloatTensor(cluster_c).to(dev)
c = c.to(dev) c = c.to(dev)
c = ( c = self.settings.clusterInferRatio * cluster_c + (1 - self.settings.clusterInferRatio) * c
self.settings.clusterInferRatio * cluster_c
+ (1 - self.settings.clusterInferRatio) * c
)
c = c.unsqueeze(0) c = c.unsqueeze(0)
return c, f0, uv return c, f0, uv
@ -334,20 +306,14 @@ class SoVitsSvc40:
newData = newData.astype(np.float32) / self.hps.data.max_wav_value newData = newData.astype(np.float32) / self.hps.data.max_wav_value
if self.audio_buffer is not None: if self.audio_buffer is not None:
self.audio_buffer = np.concatenate( self.audio_buffer = np.concatenate([self.audio_buffer, newData], 0) # 過去のデータに連結
[self.audio_buffer, newData], 0
) # 過去のデータに連結
else: else:
self.audio_buffer = newData self.audio_buffer = newData
convertSize = ( convertSize = inputSize + crossfadeSize + solaSearchFrame + self.settings.extraConvertSize
inputSize + crossfadeSize + solaSearchFrame + self.settings.extraConvertSize
)
if convertSize % self.hps.data.hop_length != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。 if convertSize % self.hps.data.hop_length != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。
convertSize = convertSize + ( convertSize = convertSize + (self.hps.data.hop_length - (convertSize % self.hps.data.hop_length))
self.hps.data.hop_length - (convertSize % self.hps.data.hop_length)
)
convertOffset = -1 * convertSize convertOffset = -1 * convertSize
self.audio_buffer = self.audio_buffer[convertOffset:] # 変換対象の部分だけ抽出 self.audio_buffer = self.audio_buffer[convertOffset:] # 変換対象の部分だけ抽出
@ -389,9 +355,7 @@ class SoVitsSvc40:
"f0": f0.astype(np.float32), "f0": f0.astype(np.float32),
"uv": uv.astype(np.float32), "uv": uv.astype(np.float32),
"g": sid_target.astype(np.int64), "g": sid_target.astype(np.int64),
"noise_scale": np.array([self.settings.noiseScale]).astype( "noise_scale": np.array([self.settings.noiseScale]).astype(np.float32),
np.float32
),
# "predict_f0": np.array([self.settings.dstId]).astype(np.int64), # "predict_f0": np.array([self.settings.dstId]).astype(np.int64),
}, },
)[0][0, 0] )[0][0, 0]
@ -457,6 +421,20 @@ class SoVitsSvc40:
return audio return audio
@classmethod
def loadModel2(cls, props: LoadModelParams2):
slotInfo: SoVitsSvc40ModelSlot = SoVitsSvc40ModelSlot()
for file in props.files:
if file.kind == "soVitsSvc40Config":
slotInfo.configFile = file.name
elif file.kind == "soVitsSvc40Model":
slotInfo.modelFile = file.name
elif file.kind == "soVitsSvc40Cluster":
slotInfo.clusterFile = file.name
slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx")
slotInfo.name = os.path.splitext(os.path.basename(slotInfo.modelFile))[0]
return slotInfo
def __del__(self): def __del__(self):
del self.net_g del self.net_g
del self.onnx_session del self.onnx_session

View File

@ -95,22 +95,48 @@ class VoiceChangerManager(ServerDeviceCallbacks):
# Dataを展開 # Dataを展開
params = LoadModelParams2(**paramDict) params = LoadModelParams2(**paramDict)
params.files = [LoadModelParamFile(**x) for x in paramDict["files"]] params.files = [LoadModelParamFile(**x) for x in paramDict["files"]]
# ファイルをslotにコピー # ファイルをslotにコピー
for file in params.files: for file in params.files:
print("FILE", file) print("FILE", file)
srcPath = os.path.join(UPLOAD_DIR, file.name) srcPath = os.path.join(UPLOAD_DIR, file.dir, file.name)
dstDir = os.path.join(self.params.model_dir, str(params.slot)) dstDir = os.path.join(
self.params.model_dir,
str(params.slot),
file.dir,
)
dstPath = os.path.join(dstDir, file.name) dstPath = os.path.join(dstDir, file.name)
os.makedirs(dstDir, exist_ok=True) os.makedirs(dstDir, exist_ok=True)
print(f"move to {srcPath} -> {dstPath}") print(f"move to {srcPath} -> {dstPath}")
shutil.move(srcPath, dstPath) shutil.move(srcPath, dstPath)
file.name = dstPath file.name = dstPath
# メタデータ作成(各VCで定義) # メタデータ作成(各VCで定義)
if params.voiceChangerType == "RVC": if params.voiceChangerType == "RVC":
from voice_changer.RVC.RVC import RVC # 起動時にインポートするとパラメータが取れない。 from voice_changer.RVC.RVC import RVC # 起動時にインポートするとパラメータが取れない。
slotInfo = RVC.loadModel2(params) slotInfo = RVC.loadModel2(params)
self.modelSlotManager.save_model_slot(params.slot, slotInfo) self.modelSlotManager.save_model_slot(params.slot, slotInfo)
elif params.voiceChangerType == "MMVCv13":
from voice_changer.MMVCv13.MMVCv13 import MMVCv13
slotInfo = MMVCv13.loadModel2(params)
self.modelSlotManager.save_model_slot(params.slot, slotInfo)
elif params.voiceChangerType == "MMVCv15":
from voice_changer.MMVCv15.MMVCv15 import MMVCv15
slotInfo = MMVCv15.loadModel2(params)
self.modelSlotManager.save_model_slot(params.slot, slotInfo)
elif params.voiceChangerType == "so-vits-svc-40":
from voice_changer.SoVitsSvc40.SoVitsSvc40 import SoVitsSvc40
slotInfo = SoVitsSvc40.loadModel2(params)
self.modelSlotManager.save_model_slot(params.slot, slotInfo)
elif params.voiceChangerType == "DDSP-SVC":
from voice_changer.DDSP_SVC.DDSP_SVC import DDSP_SVC
slotInfo = DDSP_SVC.loadModel2(params)
self.modelSlotManager.save_model_slot(params.slot, slotInfo)
print("params", params) print("params", params)
else: else:

View File

@ -43,6 +43,7 @@ LoadModelParamFileKind: TypeAlias = Literal[
class LoadModelParamFile: class LoadModelParamFile:
name: str name: str
kind: LoadModelParamFileKind kind: LoadModelParamFileKind
dir: str
@dataclass @dataclass