WIP: integrate vcs to new gui 2
This commit is contained in:
parent
b453e5fd85
commit
b6996a15fe
4
client/demo/dist/index.js
vendored
4
client/demo/dist/index.js
vendored
File diff suppressed because one or more lines are too long
@ -1,6 +1,6 @@
|
|||||||
import React, { useEffect, useMemo, useState } from "react";
|
import React, { useEffect, useMemo, useState } from "react";
|
||||||
import { useAppState } from "../../001_provider/001_AppStateProvider";
|
import { useAppState } from "../../001_provider/001_AppStateProvider";
|
||||||
import { FileUploadSetting, InitialFileUploadSetting, ModelFileKind, ModelUploadSetting, VoiceChangerType, fileSelector } from "@dannadori/voice-changer-client-js";
|
import { ModelFileKind, ModelUploadSetting, VoiceChangerType, fileSelector } from "@dannadori/voice-changer-client-js";
|
||||||
import { useMessageBuilder } from "../../hooks/useMessageBuilder";
|
import { useMessageBuilder } from "../../hooks/useMessageBuilder";
|
||||||
import { ModelSlotManagerDialogScreen } from "./904_ModelSlotManagerDialog";
|
import { ModelSlotManagerDialogScreen } from "./904_ModelSlotManagerDialog";
|
||||||
import { checkExtention, trimfileName } from "../../utils/utils";
|
import { checkExtention, trimfileName } from "../../utils/utils";
|
||||||
@ -54,15 +54,31 @@ export const FileUploaderScreen = (props: FileUploaderScreenProps) => {
|
|||||||
|
|
||||||
const checkModelSetting = (setting: ModelUploadSetting) => {
|
const checkModelSetting = (setting: ModelUploadSetting) => {
|
||||||
if (setting.voiceChangerType == "RVC") {
|
if (setting.voiceChangerType == "RVC") {
|
||||||
// const enough = !!setting.files.find(x => { return x.kind == "rvcModel" }) &&
|
|
||||||
// !!setting.files.find(x => { return x.kind == "rvcIndex" })
|
|
||||||
// return enough
|
|
||||||
const enough = !!setting.files.find(x => { return x.kind == "rvcModel" })
|
const enough = !!setting.files.find(x => { return x.kind == "rvcModel" })
|
||||||
return enough
|
return enough
|
||||||
|
} else if (setting.voiceChangerType == "MMVCv13") {
|
||||||
|
const enough = !!setting.files.find(x => { return x.kind == "mmvcv13Model" }) &&
|
||||||
|
!!setting.files.find(x => { return x.kind == "mmvcv13Config" })
|
||||||
|
return enough
|
||||||
|
} else if (setting.voiceChangerType == "MMVCv15") {
|
||||||
|
const enough = !!setting.files.find(x => { return x.kind == "mmvcv15Model" }) &&
|
||||||
|
!!setting.files.find(x => { return x.kind == "mmvcv15Config" })
|
||||||
|
return enough
|
||||||
|
} else if (setting.voiceChangerType == "so-vits-svc-40") {
|
||||||
|
const enough = !!setting.files.find(x => { return x.kind == "soVitsSvc40Config" }) &&
|
||||||
|
!!setting.files.find(x => { return x.kind == "soVitsSvc40Model" })
|
||||||
|
return enough
|
||||||
|
} else if (setting.voiceChangerType == "DDSP-SVC") {
|
||||||
|
const enough = !!setting.files.find(x => { return x.kind == "ddspSvcModel" }) &&
|
||||||
|
!!setting.files.find(x => { return x.kind == "ddspSvcModelConfig" }) &&
|
||||||
|
!!setting.files.find(x => { return x.kind == "ddspSvcDiffusion" }) &&
|
||||||
|
!!setting.files.find(x => { return x.kind == "ddspSvcDiffusionConfig" })
|
||||||
|
return enough
|
||||||
}
|
}
|
||||||
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
const generateFileRow = (setting: ModelUploadSetting, title: string, kind: ModelFileKind, ext: string[]) => {
|
const generateFileRow = (setting: ModelUploadSetting, title: string, kind: ModelFileKind, ext: string[], dir: string = "") => {
|
||||||
const selectedFile = setting.files.find(x => { return x.kind == kind })
|
const selectedFile = setting.files.find(x => { return x.kind == kind })
|
||||||
const selectedFilename = selectedFile?.file.name || ""
|
const selectedFilename = selectedFile?.file.name || ""
|
||||||
return (
|
return (
|
||||||
@ -81,7 +97,7 @@ export const FileUploaderScreen = (props: FileUploaderScreenProps) => {
|
|||||||
if (selectedFile) {
|
if (selectedFile) {
|
||||||
selectedFile.file = file
|
selectedFile.file = file
|
||||||
} else {
|
} else {
|
||||||
setting.files.push({ kind: kind, file: file })
|
setting.files.push({ kind: kind, file: file, dir: dir })
|
||||||
}
|
}
|
||||||
setUploadSetting({ ...setting })
|
setUploadSetting({ ...setting })
|
||||||
}}>
|
}}>
|
||||||
@ -96,6 +112,21 @@ export const FileUploaderScreen = (props: FileUploaderScreenProps) => {
|
|||||||
if (vcType == "RVC") {
|
if (vcType == "RVC") {
|
||||||
rows.push(generateFileRow(uploadSetting!, "Model", "rvcModel", ["pth", "onnx"]))
|
rows.push(generateFileRow(uploadSetting!, "Model", "rvcModel", ["pth", "onnx"]))
|
||||||
rows.push(generateFileRow(uploadSetting!, "Index", "rvcIndex", ["index", "bin"]))
|
rows.push(generateFileRow(uploadSetting!, "Index", "rvcIndex", ["index", "bin"]))
|
||||||
|
} else if (vcType == "MMVCv13") {
|
||||||
|
rows.push(generateFileRow(uploadSetting!, "Config", "mmvcv13Config", ["json"]))
|
||||||
|
rows.push(generateFileRow(uploadSetting!, "Model", "mmvcv13Model", ["pth", "onnx"]))
|
||||||
|
} else if (vcType == "MMVCv15") {
|
||||||
|
rows.push(generateFileRow(uploadSetting!, "Config", "mmvcv15Config", ["json"]))
|
||||||
|
rows.push(generateFileRow(uploadSetting!, "Model", "mmvcv15Model", ["pth", "onnx"]))
|
||||||
|
} else if (vcType == "so-vits-svc-40") {
|
||||||
|
rows.push(generateFileRow(uploadSetting!, "Config", "soVitsSvc40Config", ["json"]))
|
||||||
|
rows.push(generateFileRow(uploadSetting!, "Model", "soVitsSvc40Model", ["pth"]))
|
||||||
|
rows.push(generateFileRow(uploadSetting!, "Cluster", "soVitsSvc40Cluster", ["pth", "pt"]))
|
||||||
|
} else if (vcType == "DDSP-SVC") {
|
||||||
|
rows.push(generateFileRow(uploadSetting!, "Config", "ddspSvcModelConfig", ["yaml"], "model/"))
|
||||||
|
rows.push(generateFileRow(uploadSetting!, "Model", "ddspSvcModel", ["pth", "pt"], "model/"))
|
||||||
|
rows.push(generateFileRow(uploadSetting!, "Config(diff)", "ddspSvcDiffusionConfig", ["yaml"], "diff/"))
|
||||||
|
rows.push(generateFileRow(uploadSetting!, "Model(diff)", "ddspSvcDiffusion", ["pth", "pt"], "diff/"))
|
||||||
}
|
}
|
||||||
return rows
|
return rows
|
||||||
}
|
}
|
||||||
|
@ -57,13 +57,13 @@ export class ServerConfigurator {
|
|||||||
return info
|
return info
|
||||||
}
|
}
|
||||||
|
|
||||||
uploadFile2 = async (file: File, onprogress: (progress: number, end: boolean) => void) => {
|
uploadFile2 = async (dir: string, file: File, onprogress: (progress: number, end: boolean) => void) => {
|
||||||
const url = this.serverUrl + "/upload_file"
|
const url = this.serverUrl + "/upload_file"
|
||||||
onprogress(0, false)
|
onprogress(0, false)
|
||||||
const size = 1024 * 1024;
|
const size = 1024 * 1024;
|
||||||
let index = 0; // index値
|
let index = 0; // index値
|
||||||
const fileLength = file.size
|
const fileLength = file.size
|
||||||
const filename = file.name
|
const filename = dir + file.name
|
||||||
const fileChunkNum = Math.ceil(fileLength / size)
|
const fileChunkNum = Math.ceil(fileLength / size)
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
|
@ -290,8 +290,8 @@ export class VoiceChangerClient {
|
|||||||
uploadFile = (buf: ArrayBuffer, filename: string, onprogress: (progress: number, end: boolean) => void) => {
|
uploadFile = (buf: ArrayBuffer, filename: string, onprogress: (progress: number, end: boolean) => void) => {
|
||||||
return this.configurator.uploadFile(buf, filename, onprogress)
|
return this.configurator.uploadFile(buf, filename, onprogress)
|
||||||
}
|
}
|
||||||
uploadFile2 = (file: File, onprogress: (progress: number, end: boolean) => void) => {
|
uploadFile2 = (dir: string, file: File, onprogress: (progress: number, end: boolean) => void) => {
|
||||||
return this.configurator.uploadFile2(file, onprogress)
|
return this.configurator.uploadFile2(dir, file, onprogress)
|
||||||
}
|
}
|
||||||
concatUploadedFile = (filename: string, chunkNum: number) => {
|
concatUploadedFile = (filename: string, chunkNum: number) => {
|
||||||
return this.configurator.concatUploadedFile(filename, chunkNum)
|
return this.configurator.concatUploadedFile(filename, chunkNum)
|
||||||
|
@ -41,6 +41,7 @@ export type ModelFileKind = typeof ModelFileKind[keyof typeof ModelFileKind]
|
|||||||
export type ModelFile = {
|
export type ModelFile = {
|
||||||
file: File,
|
file: File,
|
||||||
kind: ModelFileKind
|
kind: ModelFileKind
|
||||||
|
dir: string
|
||||||
}
|
}
|
||||||
|
|
||||||
export type ModelUploadSetting = {
|
export type ModelUploadSetting = {
|
||||||
@ -296,7 +297,7 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
|
|||||||
if (!props.voiceChangerClient) return
|
if (!props.voiceChangerClient) return
|
||||||
console.log("uploading..1.", file)
|
console.log("uploading..1.", file)
|
||||||
console.log("uploading..2.", file.name)
|
console.log("uploading..2.", file.name)
|
||||||
const num = await props.voiceChangerClient.uploadFile2(file, onprogress)
|
const num = await props.voiceChangerClient.uploadFile2(dir, file, onprogress)
|
||||||
const res = await props.voiceChangerClient.concatUploadedFile(dir + file.name, num)
|
const res = await props.voiceChangerClient.concatUploadedFile(dir + file.name, num)
|
||||||
console.log("uploaded", num, res)
|
console.log("uploaded", num, res)
|
||||||
}
|
}
|
||||||
@ -319,11 +320,11 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
|
|||||||
const progOffset = 100 * i * progRate
|
const progOffset = 100 * i * progRate
|
||||||
await _uploadFile2(setting.files[i].file, (progress: number, _end: boolean) => {
|
await _uploadFile2(setting.files[i].file, (progress: number, _end: boolean) => {
|
||||||
setUploadProgress(progress * progRate + progOffset)
|
setUploadProgress(progress * progRate + progOffset)
|
||||||
})
|
}, setting.files[i].dir)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
const params: ModelUploadSettingForServer = {
|
const params: ModelUploadSettingForServer = {
|
||||||
...setting, files: setting.files.map((f) => { return { name: f.file.name, kind: f.kind } })
|
...setting, files: setting.files.map((f) => { return { name: f.file.name, kind: f.kind, dir: f.dir } })
|
||||||
}
|
}
|
||||||
|
|
||||||
const loadPromise = props.voiceChangerClient.loadModel(
|
const loadPromise = props.voiceChangerClient.loadModel(
|
||||||
|
@ -38,7 +38,72 @@ class RVCModelSlot(ModelSlot):
|
|||||||
iconFile: str = ""
|
iconFile: str = ""
|
||||||
|
|
||||||
|
|
||||||
ModelSlots: TypeAlias = Union[ModelSlot, RVCModelSlot]
|
@dataclass
|
||||||
|
class MMVCv13ModelSlot(ModelSlot):
|
||||||
|
voiceChangerType: VoiceChangerType = "MMVCv13"
|
||||||
|
modelFile: str = ""
|
||||||
|
configFile: str = ""
|
||||||
|
srcId: int = 107
|
||||||
|
dstId: int = 100
|
||||||
|
isONNX: bool = False
|
||||||
|
samplingRate: int = 24000
|
||||||
|
|
||||||
|
name: str = ""
|
||||||
|
description: str = ""
|
||||||
|
iconFile: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class MMVCv15ModelSlot(ModelSlot):
|
||||||
|
voiceChangerType: VoiceChangerType = "MMVCv15"
|
||||||
|
modelFile: str = ""
|
||||||
|
configFile: str = ""
|
||||||
|
srcId: int = 0
|
||||||
|
dstId: int = 101
|
||||||
|
isONNX: bool = False
|
||||||
|
samplingRate: int = 24000
|
||||||
|
|
||||||
|
name: str = ""
|
||||||
|
description: str = ""
|
||||||
|
iconFile: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class SoVitsSvc40ModelSlot(ModelSlot):
|
||||||
|
voiceChangerType: VoiceChangerType = "so-vits-svc-40"
|
||||||
|
modelFile: str = ""
|
||||||
|
configFile: str = ""
|
||||||
|
clusterFile: str = ""
|
||||||
|
dstId: int = 0
|
||||||
|
isONNX: bool = False
|
||||||
|
|
||||||
|
name: str = ""
|
||||||
|
description: str = ""
|
||||||
|
credit: str = ""
|
||||||
|
termsOfUseUrl: str = ""
|
||||||
|
sampleId: str = ""
|
||||||
|
iconFile: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class DDSPSVCModelSlot(ModelSlot):
|
||||||
|
voiceChangerType: VoiceChangerType = "DDSP-SVC"
|
||||||
|
modelFile: str = ""
|
||||||
|
configFile: str = ""
|
||||||
|
diffModelFile: str = ""
|
||||||
|
diffConfigFile: str = ""
|
||||||
|
dstId: int = 0
|
||||||
|
isONNX: bool = False
|
||||||
|
|
||||||
|
name: str = ""
|
||||||
|
description: str = ""
|
||||||
|
credit: str = ""
|
||||||
|
termsOfUseUrl: str = ""
|
||||||
|
sampleId: str = ""
|
||||||
|
iconFile: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
ModelSlots: TypeAlias = Union[ModelSlot, RVCModelSlot, MMVCv13ModelSlot, MMVCv15ModelSlot, SoVitsSvc40ModelSlot, DDSPSVCModelSlot]
|
||||||
|
|
||||||
|
|
||||||
def loadSlotInfo(model_dir: str, slotIndex: int) -> ModelSlots:
|
def loadSlotInfo(model_dir: str, slotIndex: int) -> ModelSlots:
|
||||||
@ -50,6 +115,14 @@ def loadSlotInfo(model_dir: str, slotIndex: int) -> ModelSlots:
|
|||||||
slotInfo = ModelSlot(**{k: v for k, v in jsonDict.items() if k in ModelSlot.__annotations__})
|
slotInfo = ModelSlot(**{k: v for k, v in jsonDict.items() if k in ModelSlot.__annotations__})
|
||||||
if slotInfo.voiceChangerType == "RVC":
|
if slotInfo.voiceChangerType == "RVC":
|
||||||
return RVCModelSlot(**jsonDict)
|
return RVCModelSlot(**jsonDict)
|
||||||
|
elif slotInfo.voiceChangerType == "MMVCv13":
|
||||||
|
return MMVCv13ModelSlot(**jsonDict)
|
||||||
|
elif slotInfo.voiceChangerType == "MMVCv15":
|
||||||
|
return MMVCv15ModelSlot(**jsonDict)
|
||||||
|
elif slotInfo.voiceChangerType == "so-vits-svc-40":
|
||||||
|
return SoVitsSvc40ModelSlot(**jsonDict)
|
||||||
|
elif slotInfo.voiceChangerType == "DDSP-SVC":
|
||||||
|
return DDSPSVCModelSlot(**jsonDict)
|
||||||
else:
|
else:
|
||||||
return ModelSlot()
|
return ModelSlot()
|
||||||
|
|
||||||
|
@ -3,6 +3,7 @@ import os
|
|||||||
from dataclasses import asdict
|
from dataclasses import asdict
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
|
from data.ModelSlot import DDSPSVCModelSlot
|
||||||
from voice_changer.DDSP_SVC.ModelSlot import ModelSlot
|
from voice_changer.DDSP_SVC.ModelSlot import ModelSlot
|
||||||
|
|
||||||
from voice_changer.DDSP_SVC.deviceManager.DeviceManager import DeviceManager
|
from voice_changer.DDSP_SVC.deviceManager.DeviceManager import DeviceManager
|
||||||
@ -21,7 +22,7 @@ from diffusion.infer_gt_mel import DiffGtMel # type: ignore
|
|||||||
|
|
||||||
from voice_changer.utils.VoiceChangerModel import AudioInOut
|
from voice_changer.utils.VoiceChangerModel import AudioInOut
|
||||||
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
|
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
|
||||||
from voice_changer.utils.LoadModelParams import LoadModelParams
|
from voice_changer.utils.LoadModelParams import LoadModelParams, LoadModelParams2
|
||||||
from voice_changer.DDSP_SVC.DDSP_SVCSetting import DDSP_SVCSettings
|
from voice_changer.DDSP_SVC.DDSP_SVCSetting import DDSP_SVCSettings
|
||||||
from voice_changer.RVC.embedder.EmbedderManager import EmbedderManager
|
from voice_changer.RVC.embedder.EmbedderManager import EmbedderManager
|
||||||
|
|
||||||
@ -44,11 +45,7 @@ def phase_vocoder(a, b, fade_out, fade_in):
|
|||||||
deltaphase = deltaphase - 2 * np.pi * torch.floor(deltaphase / 2 / np.pi + 0.5)
|
deltaphase = deltaphase - 2 * np.pi * torch.floor(deltaphase / 2 / np.pi + 0.5)
|
||||||
w = 2 * np.pi * torch.arange(n // 2 + 1).to(a) + deltaphase
|
w = 2 * np.pi * torch.arange(n // 2 + 1).to(a) + deltaphase
|
||||||
t = torch.arange(n).unsqueeze(-1).to(a) / n
|
t = torch.arange(n).unsqueeze(-1).to(a) / n
|
||||||
result = (
|
result = a * (fade_out**2) + b * (fade_in**2) + torch.sum(absab * torch.cos(w * t + phia), -1) * fade_out * fade_in / n
|
||||||
a * (fade_out**2)
|
|
||||||
+ b * (fade_in**2)
|
|
||||||
+ torch.sum(absab * torch.cos(w * t + phia), -1) * fade_out * fade_in / n
|
|
||||||
)
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
@ -102,9 +99,7 @@ class DDSP_SVC:
|
|||||||
def reloadModel(self):
|
def reloadModel(self):
|
||||||
self.device = self.deviceManager.getDevice(self.settings.gpu)
|
self.device = self.deviceManager.getDevice(self.settings.gpu)
|
||||||
modelFile = self.settings.modelSlots[self.settings.modelSlotIndex].modelFile
|
modelFile = self.settings.modelSlots[self.settings.modelSlotIndex].modelFile
|
||||||
diffusionFile = self.settings.modelSlots[
|
diffusionFile = self.settings.modelSlots[self.settings.modelSlotIndex].diffusionFile
|
||||||
self.settings.modelSlotIndex
|
|
||||||
].diffusionFile
|
|
||||||
|
|
||||||
self.svc_model = SvcDDSP()
|
self.svc_model = SvcDDSP()
|
||||||
self.svc_model.setVCParams(self.params)
|
self.svc_model.setVCParams(self.params)
|
||||||
@ -144,15 +139,11 @@ class DDSP_SVC:
|
|||||||
# newData = newData.astype(np.float32)
|
# newData = newData.astype(np.float32)
|
||||||
|
|
||||||
if self.audio_buffer is not None:
|
if self.audio_buffer is not None:
|
||||||
self.audio_buffer = np.concatenate(
|
self.audio_buffer = np.concatenate([self.audio_buffer, newData], 0) # 過去のデータに連結
|
||||||
[self.audio_buffer, newData], 0
|
|
||||||
) # 過去のデータに連結
|
|
||||||
else:
|
else:
|
||||||
self.audio_buffer = newData
|
self.audio_buffer = newData
|
||||||
|
|
||||||
convertSize = (
|
convertSize = inputSize + crossfadeSize + solaSearchFrame + self.settings.extraConvertSize
|
||||||
inputSize + crossfadeSize + solaSearchFrame + self.settings.extraConvertSize
|
|
||||||
)
|
|
||||||
|
|
||||||
# if convertSize % self.hop_size != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。
|
# if convertSize % self.hop_size != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。
|
||||||
# convertSize = convertSize + (self.hop_size - (convertSize % self.hop_size))
|
# convertSize = convertSize + (self.hop_size - (convertSize % self.hop_size))
|
||||||
@ -187,8 +178,7 @@ class DDSP_SVC:
|
|||||||
f0_min=50,
|
f0_min=50,
|
||||||
f0_max=1100,
|
f0_max=1100,
|
||||||
# safe_prefix_pad_length=0, # TBD なにこれ?
|
# safe_prefix_pad_length=0, # TBD なにこれ?
|
||||||
safe_prefix_pad_length=self.settings.extraConvertSize
|
safe_prefix_pad_length=self.settings.extraConvertSize / self.svc_model.args.data.sampling_rate,
|
||||||
/ self.svc_model.args.data.sampling_rate,
|
|
||||||
diff_model=self.diff_model,
|
diff_model=self.diff_model,
|
||||||
diff_acc=self.settings.diffAcc, # TBD なにこれ?
|
diff_acc=self.settings.diffAcc, # TBD なにこれ?
|
||||||
diff_spk_id=self.settings.diffSpkId,
|
diff_spk_id=self.settings.diffSpkId,
|
||||||
@ -196,9 +186,7 @@ class DDSP_SVC:
|
|||||||
# diff_use_dpm=True if self.settings.useDiffDpm == 1 else False, # TBD なにこれ?
|
# diff_use_dpm=True if self.settings.useDiffDpm == 1 else False, # TBD なにこれ?
|
||||||
method=self.settings.diffMethod,
|
method=self.settings.diffMethod,
|
||||||
k_step=self.settings.kStep, # TBD なにこれ?
|
k_step=self.settings.kStep, # TBD なにこれ?
|
||||||
diff_silence=True
|
diff_silence=True if self.settings.useDiffSilence == 1 else False, # TBD なにこれ?
|
||||||
if self.settings.useDiffSilence == 1
|
|
||||||
else False, # TBD なにこれ?
|
|
||||||
)
|
)
|
||||||
|
|
||||||
return _audio.cpu().numpy() * 32768.0
|
return _audio.cpu().numpy() * 32768.0
|
||||||
@ -210,9 +198,21 @@ class DDSP_SVC:
|
|||||||
audio = self._pyTorch_inference(data)
|
audio = self._pyTorch_inference(data)
|
||||||
return audio
|
return audio
|
||||||
|
|
||||||
# def destroy(self):
|
@classmethod
|
||||||
# del self.net_g
|
def loadModel2(cls, props: LoadModelParams2):
|
||||||
# del self.onnx_session
|
slotInfo: DDSPSVCModelSlot = DDSPSVCModelSlot()
|
||||||
|
for file in props.files:
|
||||||
|
if file.kind == "ddspSvcModelConfig":
|
||||||
|
slotInfo.configFile = file.name
|
||||||
|
elif file.kind == "ddspSvcModel":
|
||||||
|
slotInfo.modelFile = file.name
|
||||||
|
elif file.kind == "ddspSvcDiffusionConfig":
|
||||||
|
slotInfo.diffConfigFile = file.name
|
||||||
|
elif file.kind == "ddspSvcDiffusion":
|
||||||
|
slotInfo.diffModelFile = file.name
|
||||||
|
slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx")
|
||||||
|
slotInfo.name = os.path.splitext(os.path.basename(slotInfo.modelFile))[0]
|
||||||
|
return slotInfo
|
||||||
|
|
||||||
def __del__(self):
|
def __del__(self):
|
||||||
del self.net_g
|
del self.net_g
|
||||||
|
@ -1,7 +1,8 @@
|
|||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
|
from data.ModelSlot import MMVCv13ModelSlot
|
||||||
|
|
||||||
from voice_changer.utils.LoadModelParams import LoadModelParams
|
from voice_changer.utils.LoadModelParams import LoadModelParams, LoadModelParams2
|
||||||
from voice_changer.utils.VoiceChangerModel import AudioInOut
|
from voice_changer.utils.VoiceChangerModel import AudioInOut
|
||||||
|
|
||||||
if sys.platform.startswith("darwin"):
|
if sys.platform.startswith("darwin"):
|
||||||
@ -77,13 +78,7 @@ class MMVCv13:
|
|||||||
|
|
||||||
# PyTorchモデル生成
|
# PyTorchモデル生成
|
||||||
if self.settings.pyTorchModelFile is not None:
|
if self.settings.pyTorchModelFile is not None:
|
||||||
self.net_g = SynthesizerTrn(
|
self.net_g = SynthesizerTrn(len(symbols), self.hps.data.filter_length // 2 + 1, self.hps.train.segment_size // self.hps.data.hop_length, n_speakers=self.hps.data.n_speakers, **self.hps.model)
|
||||||
len(symbols),
|
|
||||||
self.hps.data.filter_length // 2 + 1,
|
|
||||||
self.hps.train.segment_size // self.hps.data.hop_length,
|
|
||||||
n_speakers=self.hps.data.n_speakers,
|
|
||||||
**self.hps.model
|
|
||||||
)
|
|
||||||
self.net_g.eval()
|
self.net_g.eval()
|
||||||
load_checkpoint(self.settings.pyTorchModelFile, self.net_g, None)
|
load_checkpoint(self.settings.pyTorchModelFile, self.net_g, None)
|
||||||
|
|
||||||
@ -154,9 +149,7 @@ class MMVCv13:
|
|||||||
def get_info(self):
|
def get_info(self):
|
||||||
data = asdict(self.settings)
|
data = asdict(self.settings)
|
||||||
|
|
||||||
data["onnxExecutionProviders"] = (
|
data["onnxExecutionProviders"] = self.onnx_session.get_providers() if self.onnx_session is not None else []
|
||||||
self.onnx_session.get_providers() if self.onnx_session is not None else []
|
|
||||||
)
|
|
||||||
files = ["configFile", "pyTorchModelFile", "onnxModelFile"]
|
files = ["configFile", "pyTorchModelFile", "onnxModelFile"]
|
||||||
for f in files:
|
for f in files:
|
||||||
if data[f] is not None and os.path.exists(data[f]):
|
if data[f] is not None and os.path.exists(data[f]):
|
||||||
@ -193,9 +186,7 @@ class MMVCv13:
|
|||||||
newData = newData.astype(np.float32) / self.hps.data.max_wav_value
|
newData = newData.astype(np.float32) / self.hps.data.max_wav_value
|
||||||
|
|
||||||
if self.audio_buffer is not None:
|
if self.audio_buffer is not None:
|
||||||
self.audio_buffer = np.concatenate(
|
self.audio_buffer = np.concatenate([self.audio_buffer, newData], 0) # 過去のデータに連結
|
||||||
[self.audio_buffer, newData], 0
|
|
||||||
) # 過去のデータに連結
|
|
||||||
else:
|
else:
|
||||||
self.audio_buffer = newData
|
self.audio_buffer = newData
|
||||||
|
|
||||||
@ -204,9 +195,7 @@ class MMVCv13:
|
|||||||
# if convertSize < 8192:
|
# if convertSize < 8192:
|
||||||
# convertSize = 8192
|
# convertSize = 8192
|
||||||
if convertSize % self.hps.data.hop_length != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。
|
if convertSize % self.hps.data.hop_length != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。
|
||||||
convertSize = convertSize + (
|
convertSize = convertSize + (self.hps.data.hop_length - (convertSize % self.hps.data.hop_length))
|
||||||
self.hps.data.hop_length - (convertSize % self.hps.data.hop_length)
|
|
||||||
)
|
|
||||||
|
|
||||||
convertOffset = -1 * convertSize
|
convertOffset = -1 * convertSize
|
||||||
self.audio_buffer = self.audio_buffer[convertOffset:] # 変換対象の部分だけ抽出
|
self.audio_buffer = self.audio_buffer[convertOffset:] # 変換対象の部分だけ抽出
|
||||||
@ -238,7 +227,9 @@ class MMVCv13:
|
|||||||
"sid_src": sid_src.numpy(),
|
"sid_src": sid_src.numpy(),
|
||||||
"sid_tgt": sid_tgt1.numpy(),
|
"sid_tgt": sid_tgt1.numpy(),
|
||||||
},
|
},
|
||||||
)[0][0, 0]
|
)[
|
||||||
|
0
|
||||||
|
][0, 0]
|
||||||
* self.hps.data.max_wav_value
|
* self.hps.data.max_wav_value
|
||||||
)
|
)
|
||||||
return audio1
|
return audio1
|
||||||
@ -254,19 +245,10 @@ class MMVCv13:
|
|||||||
dev = torch.device("cuda", index=self.settings.gpu)
|
dev = torch.device("cuda", index=self.settings.gpu)
|
||||||
|
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [
|
x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [x.to(dev) for x in data]
|
||||||
x.to(dev) for x in data
|
|
||||||
]
|
|
||||||
sid_target = torch.LongTensor([self.settings.dstId]).to(dev)
|
sid_target = torch.LongTensor([self.settings.dstId]).to(dev)
|
||||||
|
|
||||||
audio1 = (
|
audio1 = self.net_g.to(dev).voice_conversion(spec, spec_lengths, sid_src=sid_src, sid_tgt=sid_target)[0, 0].data * self.hps.data.max_wav_value
|
||||||
self.net_g.to(dev)
|
|
||||||
.voice_conversion(
|
|
||||||
spec, spec_lengths, sid_src=sid_src, sid_tgt=sid_target
|
|
||||||
)[0, 0]
|
|
||||||
.data
|
|
||||||
* self.hps.data.max_wav_value
|
|
||||||
)
|
|
||||||
result = audio1.float().cpu().numpy()
|
result = audio1.float().cpu().numpy()
|
||||||
|
|
||||||
return result
|
return result
|
||||||
@ -278,6 +260,18 @@ class MMVCv13:
|
|||||||
audio = self._pyTorch_inference(data)
|
audio = self._pyTorch_inference(data)
|
||||||
return audio
|
return audio
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def loadModel2(cls, props: LoadModelParams2):
|
||||||
|
slotInfo: MMVCv13ModelSlot = MMVCv13ModelSlot()
|
||||||
|
for file in props.files:
|
||||||
|
if file.kind == "mmvcv13Model":
|
||||||
|
slotInfo.modelFile = file.name
|
||||||
|
elif file.kind == "mmvcv13Config":
|
||||||
|
slotInfo.configFile = file.name
|
||||||
|
slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx")
|
||||||
|
slotInfo.name = os.path.splitext(os.path.basename(slotInfo.modelFile))[0]
|
||||||
|
return slotInfo
|
||||||
|
|
||||||
def __del__(self):
|
def __del__(self):
|
||||||
del self.net_g
|
del self.net_g
|
||||||
del self.onnx_session
|
del self.onnx_session
|
||||||
|
@ -1,7 +1,8 @@
|
|||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
|
from data.ModelSlot import MMVCv15ModelSlot
|
||||||
|
|
||||||
from voice_changer.utils.LoadModelParams import LoadModelParams
|
from voice_changer.utils.LoadModelParams import LoadModelParams, LoadModelParams2
|
||||||
from voice_changer.utils.VoiceChangerModel import AudioInOut
|
from voice_changer.utils.VoiceChangerModel import AudioInOut
|
||||||
|
|
||||||
if sys.platform.startswith("darwin"):
|
if sys.platform.startswith("darwin"):
|
||||||
@ -172,12 +173,7 @@ class MMVCv15:
|
|||||||
def get_info(self):
|
def get_info(self):
|
||||||
data = asdict(self.settings)
|
data = asdict(self.settings)
|
||||||
|
|
||||||
data["onnxExecutionProviders"] = (
|
data["onnxExecutionProviders"] = self.onnx_session.get_providers() if self.settings.onnxModelFile != "" and self.settings.onnxModelFile is not None else []
|
||||||
self.onnx_session.get_providers()
|
|
||||||
if self.settings.onnxModelFile != ""
|
|
||||||
and self.settings.onnxModelFile is not None
|
|
||||||
else []
|
|
||||||
)
|
|
||||||
files = ["configFile", "pyTorchModelFile", "onnxModelFile"]
|
files = ["configFile", "pyTorchModelFile", "onnxModelFile"]
|
||||||
for f in files:
|
for f in files:
|
||||||
if data[f] is not None and os.path.exists(data[f]):
|
if data[f] is not None and os.path.exists(data[f]):
|
||||||
@ -195,9 +191,7 @@ class MMVCv15:
|
|||||||
def _get_f0(self, detector: str, newData: AudioInOut):
|
def _get_f0(self, detector: str, newData: AudioInOut):
|
||||||
audio_norm_np = newData.astype(np.float64)
|
audio_norm_np = newData.astype(np.float64)
|
||||||
if detector == "dio":
|
if detector == "dio":
|
||||||
_f0, _time = pw.dio(
|
_f0, _time = pw.dio(audio_norm_np, self.hps.data.sampling_rate, frame_period=5.5)
|
||||||
audio_norm_np, self.hps.data.sampling_rate, frame_period=5.5
|
|
||||||
)
|
|
||||||
f0 = pw.stonemask(audio_norm_np, _f0, _time, self.hps.data.sampling_rate)
|
f0 = pw.stonemask(audio_norm_np, _f0, _time, self.hps.data.sampling_rate)
|
||||||
else:
|
else:
|
||||||
f0, t = pw.harvest(
|
f0, t = pw.harvest(
|
||||||
@ -207,9 +201,7 @@ class MMVCv15:
|
|||||||
f0_floor=71.0,
|
f0_floor=71.0,
|
||||||
f0_ceil=1000.0,
|
f0_ceil=1000.0,
|
||||||
)
|
)
|
||||||
f0 = convert_continuos_f0(
|
f0 = convert_continuos_f0(f0, int(audio_norm_np.shape[0] / self.hps.data.hop_length))
|
||||||
f0, int(audio_norm_np.shape[0] / self.hps.data.hop_length)
|
|
||||||
)
|
|
||||||
f0 = torch.from_numpy(f0.astype(np.float32))
|
f0 = torch.from_numpy(f0.astype(np.float32))
|
||||||
return f0
|
return f0
|
||||||
|
|
||||||
@ -237,9 +229,7 @@ class MMVCv15:
|
|||||||
newData = newData.astype(np.float32) / self.hps.data.max_wav_value
|
newData = newData.astype(np.float32) / self.hps.data.max_wav_value
|
||||||
|
|
||||||
if self.audio_buffer is not None:
|
if self.audio_buffer is not None:
|
||||||
self.audio_buffer = np.concatenate(
|
self.audio_buffer = np.concatenate([self.audio_buffer, newData], 0) # 過去のデータに連結
|
||||||
[self.audio_buffer, newData], 0
|
|
||||||
) # 過去のデータに連結
|
|
||||||
else:
|
else:
|
||||||
self.audio_buffer = newData
|
self.audio_buffer = newData
|
||||||
|
|
||||||
@ -248,9 +238,7 @@ class MMVCv15:
|
|||||||
# if convertSize < 8192:
|
# if convertSize < 8192:
|
||||||
# convertSize = 8192
|
# convertSize = 8192
|
||||||
if convertSize % self.hps.data.hop_length != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。
|
if convertSize % self.hps.data.hop_length != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。
|
||||||
convertSize = convertSize + (
|
convertSize = convertSize + (self.hps.data.hop_length - (convertSize % self.hps.data.hop_length))
|
||||||
self.hps.data.hop_length - (convertSize % self.hps.data.hop_length)
|
|
||||||
)
|
|
||||||
|
|
||||||
# ONNX は固定長
|
# ONNX は固定長
|
||||||
if self.settings.framework == "ONNX":
|
if self.settings.framework == "ONNX":
|
||||||
@ -290,16 +278,15 @@ class MMVCv15:
|
|||||||
"sid_src": sid_src.numpy(),
|
"sid_src": sid_src.numpy(),
|
||||||
"sid_tgt": sid_tgt1.numpy(),
|
"sid_tgt": sid_tgt1.numpy(),
|
||||||
},
|
},
|
||||||
)[0][0, 0]
|
)[
|
||||||
|
0
|
||||||
|
][0, 0]
|
||||||
* self.hps.data.max_wav_value
|
* self.hps.data.max_wav_value
|
||||||
)
|
)
|
||||||
return audio1
|
return audio1
|
||||||
|
|
||||||
def _pyTorch_inference(self, data):
|
def _pyTorch_inference(self, data):
|
||||||
if (
|
if self.settings.pyTorchModelFile == "" or self.settings.pyTorchModelFile is None:
|
||||||
self.settings.pyTorchModelFile == ""
|
|
||||||
or self.settings.pyTorchModelFile is None
|
|
||||||
):
|
|
||||||
print("[Voice Changer] No pyTorch session.")
|
print("[Voice Changer] No pyTorch session.")
|
||||||
raise NoModeLoadedException("pytorch")
|
raise NoModeLoadedException("pytorch")
|
||||||
|
|
||||||
@ -316,12 +303,7 @@ class MMVCv15:
|
|||||||
sid_src = sid_src.to(dev)
|
sid_src = sid_src.to(dev)
|
||||||
sid_target = torch.LongTensor([self.settings.dstId]).to(dev)
|
sid_target = torch.LongTensor([self.settings.dstId]).to(dev)
|
||||||
|
|
||||||
audio1 = (
|
audio1 = self.net_g.to(dev).voice_conversion(spec, spec_lengths, f0, sid_src, sid_target)[0, 0].data * self.hps.data.max_wav_value
|
||||||
self.net_g.to(dev)
|
|
||||||
.voice_conversion(spec, spec_lengths, f0, sid_src, sid_target)[0, 0]
|
|
||||||
.data
|
|
||||||
* self.hps.data.max_wav_value
|
|
||||||
)
|
|
||||||
result = audio1.float().cpu().numpy()
|
result = audio1.float().cpu().numpy()
|
||||||
return result
|
return result
|
||||||
|
|
||||||
@ -336,6 +318,18 @@ class MMVCv15:
|
|||||||
print(_e)
|
print(_e)
|
||||||
raise ONNXInputArgumentException()
|
raise ONNXInputArgumentException()
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def loadModel2(cls, props: LoadModelParams2):
|
||||||
|
slotInfo: MMVCv15ModelSlot = MMVCv15ModelSlot()
|
||||||
|
for file in props.files:
|
||||||
|
if file.kind == "mmvcv15Model":
|
||||||
|
slotInfo.modelFile = file.name
|
||||||
|
elif file.kind == "mmvcv15Config":
|
||||||
|
slotInfo.configFile = file.name
|
||||||
|
slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx")
|
||||||
|
slotInfo.name = os.path.splitext(os.path.basename(slotInfo.modelFile))[0]
|
||||||
|
return slotInfo
|
||||||
|
|
||||||
def __del__(self):
|
def __del__(self):
|
||||||
del self.net_g
|
del self.net_g
|
||||||
del self.onnx_session
|
del self.onnx_session
|
||||||
|
@ -1,7 +1,8 @@
|
|||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
|
from data.ModelSlot import SoVitsSvc40ModelSlot
|
||||||
|
|
||||||
from voice_changer.utils.LoadModelParams import LoadModelParams
|
from voice_changer.utils.LoadModelParams import LoadModelParams, LoadModelParams2
|
||||||
from voice_changer.utils.VoiceChangerModel import AudioInOut
|
from voice_changer.utils.VoiceChangerModel import AudioInOut
|
||||||
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
|
from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
|
||||||
|
|
||||||
@ -97,11 +98,7 @@ class SoVitsSvc40:
|
|||||||
self.settings.pyTorchModelFile = modelFile
|
self.settings.pyTorchModelFile = modelFile
|
||||||
self.settings.onnxModelFile = None
|
self.settings.onnxModelFile = None
|
||||||
|
|
||||||
clusterTorchModel = (
|
clusterTorchModel = params["files"]["soVitsSvc40Cluster"] if "soVitsSvc40Cluster" in params["files"] else None
|
||||||
params["files"]["soVitsSvc40Cluster"]
|
|
||||||
if "soVitsSvc40Cluster" in params["files"]
|
|
||||||
else None
|
|
||||||
)
|
|
||||||
|
|
||||||
content_vec_path = self.params.content_vec_500
|
content_vec_path = self.params.content_vec_500
|
||||||
content_vec_onnx_path = self.params.content_vec_500_onnx
|
content_vec_onnx_path = self.params.content_vec_500_onnx
|
||||||
@ -212,9 +209,7 @@ class SoVitsSvc40:
|
|||||||
def get_info(self):
|
def get_info(self):
|
||||||
data = asdict(self.settings)
|
data = asdict(self.settings)
|
||||||
|
|
||||||
data["onnxExecutionProviders"] = (
|
data["onnxExecutionProviders"] = self.onnx_session.get_providers() if self.onnx_session is not None else []
|
||||||
self.onnx_session.get_providers() if self.onnx_session is not None else []
|
|
||||||
)
|
|
||||||
files = ["configFile", "pyTorchModelFile", "onnxModelFile"]
|
files = ["configFile", "pyTorchModelFile", "onnxModelFile"]
|
||||||
for f in files:
|
for f in files:
|
||||||
if data[f] is not None and os.path.exists(data[f]):
|
if data[f] is not None and os.path.exists(data[f]):
|
||||||
@ -246,9 +241,7 @@ class SoVitsSvc40:
|
|||||||
)
|
)
|
||||||
|
|
||||||
if wav_44k.shape[0] % self.hps.data.hop_length != 0:
|
if wav_44k.shape[0] % self.hps.data.hop_length != 0:
|
||||||
print(
|
print(f" !!! !!! !!! wav size not multiple of hopsize: {wav_44k.shape[0] / self.hps.data.hop_length}")
|
||||||
f" !!! !!! !!! wav size not multiple of hopsize: {wav_44k.shape[0] / self.hps.data.hop_length}"
|
|
||||||
)
|
|
||||||
|
|
||||||
f0, uv = utils.interpolate_f0(f0)
|
f0, uv = utils.interpolate_f0(f0)
|
||||||
f0 = torch.FloatTensor(f0)
|
f0 = torch.FloatTensor(f0)
|
||||||
@ -257,14 +250,10 @@ class SoVitsSvc40:
|
|||||||
f0 = f0.unsqueeze(0)
|
f0 = f0.unsqueeze(0)
|
||||||
uv = uv.unsqueeze(0)
|
uv = uv.unsqueeze(0)
|
||||||
|
|
||||||
wav16k_numpy = librosa.resample(
|
wav16k_numpy = librosa.resample(audio_buffer, orig_sr=self.hps.data.sampling_rate, target_sr=16000)
|
||||||
audio_buffer, orig_sr=self.hps.data.sampling_rate, target_sr=16000
|
|
||||||
)
|
|
||||||
wav16k_tensor = torch.from_numpy(wav16k_numpy)
|
wav16k_tensor = torch.from_numpy(wav16k_numpy)
|
||||||
|
|
||||||
if (
|
if (self.settings.gpu < 0 or self.gpu_num == 0) or self.settings.framework == "ONNX":
|
||||||
self.settings.gpu < 0 or self.gpu_num == 0
|
|
||||||
) or self.settings.framework == "ONNX":
|
|
||||||
dev = torch.device("cpu")
|
dev = torch.device("cpu")
|
||||||
else:
|
else:
|
||||||
dev = torch.device("cuda", index=self.settings.gpu)
|
dev = torch.device("cuda", index=self.settings.gpu)
|
||||||
@ -282,44 +271,27 @@ class SoVitsSvc40:
|
|||||||
if self.hps.model.ssl_dim == 768:
|
if self.hps.model.ssl_dim == 768:
|
||||||
self.hubert_model = self.hubert_model.to(dev)
|
self.hubert_model = self.hubert_model.to(dev)
|
||||||
wav16k_tensor = wav16k_tensor.to(dev)
|
wav16k_tensor = wav16k_tensor.to(dev)
|
||||||
c = get_hubert_content_layer9(
|
c = get_hubert_content_layer9(self.hubert_model, wav_16k_tensor=wav16k_tensor)
|
||||||
self.hubert_model, wav_16k_tensor=wav16k_tensor
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
self.hubert_model = self.hubert_model.to(dev)
|
self.hubert_model = self.hubert_model.to(dev)
|
||||||
wav16k_tensor = wav16k_tensor.to(dev)
|
wav16k_tensor = wav16k_tensor.to(dev)
|
||||||
c = utils.get_hubert_content(
|
c = utils.get_hubert_content(self.hubert_model, wav_16k_tensor=wav16k_tensor)
|
||||||
self.hubert_model, wav_16k_tensor=wav16k_tensor
|
|
||||||
)
|
|
||||||
|
|
||||||
uv = uv.to(dev)
|
uv = uv.to(dev)
|
||||||
f0 = f0.to(dev)
|
f0 = f0.to(dev)
|
||||||
|
|
||||||
c = utils.repeat_expand_2d(c.squeeze(0), f0.shape[1])
|
c = utils.repeat_expand_2d(c.squeeze(0), f0.shape[1])
|
||||||
|
|
||||||
if (
|
if self.settings.clusterInferRatio != 0 and hasattr(self, "cluster_model") and self.cluster_model is not None:
|
||||||
self.settings.clusterInferRatio != 0
|
speaker = [key for key, value in self.settings.speakers.items() if value == self.settings.dstId]
|
||||||
and hasattr(self, "cluster_model")
|
|
||||||
and self.cluster_model is not None
|
|
||||||
):
|
|
||||||
speaker = [
|
|
||||||
key
|
|
||||||
for key, value in self.settings.speakers.items()
|
|
||||||
if value == self.settings.dstId
|
|
||||||
]
|
|
||||||
if len(speaker) != 1:
|
if len(speaker) != 1:
|
||||||
pass
|
pass
|
||||||
# print("not only one speaker found.", speaker)
|
# print("not only one speaker found.", speaker)
|
||||||
else:
|
else:
|
||||||
cluster_c = cluster.get_cluster_center_result(
|
cluster_c = cluster.get_cluster_center_result(self.cluster_model, c.cpu().numpy().T, speaker[0]).T
|
||||||
self.cluster_model, c.cpu().numpy().T, speaker[0]
|
|
||||||
).T
|
|
||||||
cluster_c = torch.FloatTensor(cluster_c).to(dev)
|
cluster_c = torch.FloatTensor(cluster_c).to(dev)
|
||||||
c = c.to(dev)
|
c = c.to(dev)
|
||||||
c = (
|
c = self.settings.clusterInferRatio * cluster_c + (1 - self.settings.clusterInferRatio) * c
|
||||||
self.settings.clusterInferRatio * cluster_c
|
|
||||||
+ (1 - self.settings.clusterInferRatio) * c
|
|
||||||
)
|
|
||||||
|
|
||||||
c = c.unsqueeze(0)
|
c = c.unsqueeze(0)
|
||||||
return c, f0, uv
|
return c, f0, uv
|
||||||
@ -334,20 +306,14 @@ class SoVitsSvc40:
|
|||||||
newData = newData.astype(np.float32) / self.hps.data.max_wav_value
|
newData = newData.astype(np.float32) / self.hps.data.max_wav_value
|
||||||
|
|
||||||
if self.audio_buffer is not None:
|
if self.audio_buffer is not None:
|
||||||
self.audio_buffer = np.concatenate(
|
self.audio_buffer = np.concatenate([self.audio_buffer, newData], 0) # 過去のデータに連結
|
||||||
[self.audio_buffer, newData], 0
|
|
||||||
) # 過去のデータに連結
|
|
||||||
else:
|
else:
|
||||||
self.audio_buffer = newData
|
self.audio_buffer = newData
|
||||||
|
|
||||||
convertSize = (
|
convertSize = inputSize + crossfadeSize + solaSearchFrame + self.settings.extraConvertSize
|
||||||
inputSize + crossfadeSize + solaSearchFrame + self.settings.extraConvertSize
|
|
||||||
)
|
|
||||||
|
|
||||||
if convertSize % self.hps.data.hop_length != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。
|
if convertSize % self.hps.data.hop_length != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。
|
||||||
convertSize = convertSize + (
|
convertSize = convertSize + (self.hps.data.hop_length - (convertSize % self.hps.data.hop_length))
|
||||||
self.hps.data.hop_length - (convertSize % self.hps.data.hop_length)
|
|
||||||
)
|
|
||||||
|
|
||||||
convertOffset = -1 * convertSize
|
convertOffset = -1 * convertSize
|
||||||
self.audio_buffer = self.audio_buffer[convertOffset:] # 変換対象の部分だけ抽出
|
self.audio_buffer = self.audio_buffer[convertOffset:] # 変換対象の部分だけ抽出
|
||||||
@ -389,9 +355,7 @@ class SoVitsSvc40:
|
|||||||
"f0": f0.astype(np.float32),
|
"f0": f0.astype(np.float32),
|
||||||
"uv": uv.astype(np.float32),
|
"uv": uv.astype(np.float32),
|
||||||
"g": sid_target.astype(np.int64),
|
"g": sid_target.astype(np.int64),
|
||||||
"noise_scale": np.array([self.settings.noiseScale]).astype(
|
"noise_scale": np.array([self.settings.noiseScale]).astype(np.float32),
|
||||||
np.float32
|
|
||||||
),
|
|
||||||
# "predict_f0": np.array([self.settings.dstId]).astype(np.int64),
|
# "predict_f0": np.array([self.settings.dstId]).astype(np.int64),
|
||||||
},
|
},
|
||||||
)[0][0, 0]
|
)[0][0, 0]
|
||||||
@ -457,6 +421,20 @@ class SoVitsSvc40:
|
|||||||
|
|
||||||
return audio
|
return audio
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def loadModel2(cls, props: LoadModelParams2):
|
||||||
|
slotInfo: SoVitsSvc40ModelSlot = SoVitsSvc40ModelSlot()
|
||||||
|
for file in props.files:
|
||||||
|
if file.kind == "soVitsSvc40Config":
|
||||||
|
slotInfo.configFile = file.name
|
||||||
|
elif file.kind == "soVitsSvc40Model":
|
||||||
|
slotInfo.modelFile = file.name
|
||||||
|
elif file.kind == "soVitsSvc40Cluster":
|
||||||
|
slotInfo.clusterFile = file.name
|
||||||
|
slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx")
|
||||||
|
slotInfo.name = os.path.splitext(os.path.basename(slotInfo.modelFile))[0]
|
||||||
|
return slotInfo
|
||||||
|
|
||||||
def __del__(self):
|
def __del__(self):
|
||||||
del self.net_g
|
del self.net_g
|
||||||
del self.onnx_session
|
del self.onnx_session
|
||||||
|
@ -95,22 +95,48 @@ class VoiceChangerManager(ServerDeviceCallbacks):
|
|||||||
# Dataを展開
|
# Dataを展開
|
||||||
params = LoadModelParams2(**paramDict)
|
params = LoadModelParams2(**paramDict)
|
||||||
params.files = [LoadModelParamFile(**x) for x in paramDict["files"]]
|
params.files = [LoadModelParamFile(**x) for x in paramDict["files"]]
|
||||||
|
|
||||||
# ファイルをslotにコピー
|
# ファイルをslotにコピー
|
||||||
for file in params.files:
|
for file in params.files:
|
||||||
print("FILE", file)
|
print("FILE", file)
|
||||||
srcPath = os.path.join(UPLOAD_DIR, file.name)
|
srcPath = os.path.join(UPLOAD_DIR, file.dir, file.name)
|
||||||
dstDir = os.path.join(self.params.model_dir, str(params.slot))
|
dstDir = os.path.join(
|
||||||
|
self.params.model_dir,
|
||||||
|
str(params.slot),
|
||||||
|
file.dir,
|
||||||
|
)
|
||||||
dstPath = os.path.join(dstDir, file.name)
|
dstPath = os.path.join(dstDir, file.name)
|
||||||
os.makedirs(dstDir, exist_ok=True)
|
os.makedirs(dstDir, exist_ok=True)
|
||||||
print(f"move to {srcPath} -> {dstPath}")
|
print(f"move to {srcPath} -> {dstPath}")
|
||||||
shutil.move(srcPath, dstPath)
|
shutil.move(srcPath, dstPath)
|
||||||
file.name = dstPath
|
file.name = dstPath
|
||||||
|
|
||||||
# メタデータ作成(各VCで定義)
|
# メタデータ作成(各VCで定義)
|
||||||
if params.voiceChangerType == "RVC":
|
if params.voiceChangerType == "RVC":
|
||||||
from voice_changer.RVC.RVC import RVC # 起動時にインポートするとパラメータが取れない。
|
from voice_changer.RVC.RVC import RVC # 起動時にインポートするとパラメータが取れない。
|
||||||
|
|
||||||
slotInfo = RVC.loadModel2(params)
|
slotInfo = RVC.loadModel2(params)
|
||||||
self.modelSlotManager.save_model_slot(params.slot, slotInfo)
|
self.modelSlotManager.save_model_slot(params.slot, slotInfo)
|
||||||
|
elif params.voiceChangerType == "MMVCv13":
|
||||||
|
from voice_changer.MMVCv13.MMVCv13 import MMVCv13
|
||||||
|
|
||||||
|
slotInfo = MMVCv13.loadModel2(params)
|
||||||
|
self.modelSlotManager.save_model_slot(params.slot, slotInfo)
|
||||||
|
elif params.voiceChangerType == "MMVCv15":
|
||||||
|
from voice_changer.MMVCv15.MMVCv15 import MMVCv15
|
||||||
|
|
||||||
|
slotInfo = MMVCv15.loadModel2(params)
|
||||||
|
self.modelSlotManager.save_model_slot(params.slot, slotInfo)
|
||||||
|
elif params.voiceChangerType == "so-vits-svc-40":
|
||||||
|
from voice_changer.SoVitsSvc40.SoVitsSvc40 import SoVitsSvc40
|
||||||
|
|
||||||
|
slotInfo = SoVitsSvc40.loadModel2(params)
|
||||||
|
self.modelSlotManager.save_model_slot(params.slot, slotInfo)
|
||||||
|
elif params.voiceChangerType == "DDSP-SVC":
|
||||||
|
from voice_changer.DDSP_SVC.DDSP_SVC import DDSP_SVC
|
||||||
|
|
||||||
|
slotInfo = DDSP_SVC.loadModel2(params)
|
||||||
|
self.modelSlotManager.save_model_slot(params.slot, slotInfo)
|
||||||
print("params", params)
|
print("params", params)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
@ -43,6 +43,7 @@ LoadModelParamFileKind: TypeAlias = Literal[
|
|||||||
class LoadModelParamFile:
|
class LoadModelParamFile:
|
||||||
name: str
|
name: str
|
||||||
kind: LoadModelParamFileKind
|
kind: LoadModelParamFileKind
|
||||||
|
dir: str
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
Loading…
x
Reference in New Issue
Block a user