WIP:improve model selector (MMVCv13)

This commit is contained in:
wataru 2023-05-08 18:02:15 +09:00
parent 3e0772d955
commit 19e70606c8
17 changed files with 269 additions and 557 deletions

View File

@ -32,22 +32,8 @@
], ],
"modelSetting": [ "modelSetting": [
{ {
"name": "modelUploader", "name": "modelUploaderv2",
"options": { "options": {}
"showConfig": true,
"showOnnx": false,
"showPyTorch": true,
"showCorrespondence": false,
"showPyTorchCluster": false,
"showFeature": false,
"showIndex": false,
"showHalfPrecision": false,
"showPyTorchEnableCheckBox": true,
"defaultEnablePyTorch": true,
"showOnnxExportButton": false
}
}, },
{ {
"name": "commonFileSelect", "name": "commonFileSelect",
@ -84,16 +70,6 @@
{ {
"name": "modelUploadButtonRow2", "name": "modelUploadButtonRow2",
"options": {} "options": {}
},
{
"name": "framework",
"options": {
"showFramework": true
}
},
{
"name": "modelSamplingRate",
"options": {}
} }
], ],
"lab": [], "lab": [],

View File

@ -32,23 +32,28 @@
], ],
"modelSetting": [ "modelSetting": [
{ {
"name": "modelUploader", "name": "modelUploaderv2",
"options": {}
},
{
"name": "commonFileSelect",
"options": { "options": {
"showConfig": true, "title": "Config(.json)",
"showOnnx": true, "acceptExtentions": ["json"],
"showPyTorch": true, "fileKind": "mmvcv13Config"
"showCorrespondence": false,
"showPyTorchCluster": false,
"showPyTorchEnableCheckBox": true,
"defaultEnablePyTorch": false
} }
}, },
{ {
"name": "framework", "name": "commonFileSelect",
"options": { "options": {
"showFramework": true "title": "Model(.pt,.pth,.onxx)",
"acceptExtentions": ["pt", "pth", "onnx"],
"fileKind": "mmvcv13Model"
} }
},
{
"name": "modelUploadButtonRow2",
"options": {}
} }
], ],
"lab": [], "lab": [],

File diff suppressed because one or more lines are too long

View File

@ -32,22 +32,8 @@
], ],
"modelSetting": [ "modelSetting": [
{ {
"name": "modelUploader", "name": "modelUploaderv2",
"options": { "options": {}
"showConfig": true,
"showOnnx": false,
"showPyTorch": true,
"showCorrespondence": false,
"showPyTorchCluster": false,
"showFeature": false,
"showIndex": false,
"showHalfPrecision": false,
"showPyTorchEnableCheckBox": true,
"defaultEnablePyTorch": true,
"showOnnxExportButton": false
}
}, },
{ {
"name": "commonFileSelect", "name": "commonFileSelect",
@ -84,16 +70,6 @@
{ {
"name": "modelUploadButtonRow2", "name": "modelUploadButtonRow2",
"options": {} "options": {}
},
{
"name": "framework",
"options": {
"showFramework": true
}
},
{
"name": "modelSamplingRate",
"options": {}
} }
], ],
"lab": [], "lab": [],

View File

@ -32,23 +32,28 @@
], ],
"modelSetting": [ "modelSetting": [
{ {
"name": "modelUploader", "name": "modelUploaderv2",
"options": {}
},
{
"name": "commonFileSelect",
"options": { "options": {
"showConfig": true, "title": "Config(.json)",
"showOnnx": true, "acceptExtentions": ["json"],
"showPyTorch": true, "fileKind": "mmvcv13Config"
"showCorrespondence": false,
"showPyTorchCluster": false,
"showPyTorchEnableCheckBox": true,
"defaultEnablePyTorch": false
} }
}, },
{ {
"name": "framework", "name": "commonFileSelect",
"options": { "options": {
"showFramework": true "title": "Model(.pt,.pth,.onxx)",
"acceptExtentions": ["pt", "pth", "onnx"],
"fileKind": "mmvcv13Model"
} }
},
{
"name": "modelUploadButtonRow2",
"options": {}
} }
], ],
"lab": [], "lab": [],

View File

@ -50,6 +50,7 @@ import { AudioDeviceModeRow, AudioDeviceModeRowProps } from "./components/410_Au
import { IOBufferRow, IOBufferRowProps } from "./components/411_IOBufferRow" import { IOBufferRow, IOBufferRowProps } from "./components/411_IOBufferRow"
import { CommonFileSelectRow, CommonFileSelectRowProps } from "./components/301-e_CommonFileSelectRow" import { CommonFileSelectRow, CommonFileSelectRowProps } from "./components/301-e_CommonFileSelectRow"
import { ModelUploadButtonRow2, ModelUploadButtonRow2Props } from "./components/301-f_ModelUploadButtonRow" import { ModelUploadButtonRow2, ModelUploadButtonRow2Props } from "./components/301-f_ModelUploadButtonRow"
import { ModelUploaderRowv2, ModelUploaderRowv2Props } from "./components/301_ModelUploaderRowv2"
export const catalog: { [key: string]: (props: any) => JSX.Element } = {} export const catalog: { [key: string]: (props: any) => JSX.Element } = {}
@ -81,6 +82,7 @@ const initialize = () => {
addToCatalog("modelUploader", (props: ModelUploaderRowProps) => { return <ModelUploaderRow {...props} /> }) addToCatalog("modelUploader", (props: ModelUploaderRowProps) => { return <ModelUploaderRow {...props} /> })
addToCatalog("modelUploaderv2", (props: ModelUploaderRowv2Props) => { return <ModelUploaderRowv2 {...props} /> })
addToCatalog("framework", (props: FrameworkRowProps) => { return <FrameworkRow {...props} /> }) addToCatalog("framework", (props: FrameworkRowProps) => { return <FrameworkRow {...props} /> })
addToCatalog("modelSamplingRate", (props: ModelSamplingRateRowProps) => { return <ModelSamplingRateRow {...props} /> }) addToCatalog("modelSamplingRate", (props: ModelSamplingRateRowProps) => { return <ModelSamplingRateRow {...props} /> })
addToCatalog("commonFileSelect", (props: CommonFileSelectRowProps) => { return <CommonFileSelectRow {...props} /> }) addToCatalog("commonFileSelect", (props: CommonFileSelectRowProps) => { return <CommonFileSelectRow {...props} /> })

View File

@ -47,7 +47,7 @@ export const PerformanceRow = (_props: PerformanceRowProps) => {
setTimeout(updatePerformance, 1000 * 2) setTimeout(updatePerformance, 1000 * 2)
} }
} }
updatePerformance() // updatePerformance()
return () => { return () => {
execNext = false execNext = false
} }

View File

@ -10,6 +10,8 @@ export type CommonFileSelectRowProps = {
} }
export const Filekinds = { export const Filekinds = {
"mmvcv13Config": "mmvcv13Config",
"mmvcv13Model": "mmvcv13Model",
"ddspSvcModel": "ddspSvcModel", "ddspSvcModel": "ddspSvcModel",
"ddspSvcModelConfig": "ddspSvcModelConfig", "ddspSvcModelConfig": "ddspSvcModelConfig",
"ddspSvcDiffusion": "ddspSvcDiffusion", "ddspSvcDiffusion": "ddspSvcDiffusion",

View File

@ -0,0 +1,24 @@
import React, { useMemo } from "react"
import { useGuiState } from "../001_GuiStateProvider"
export type ModelUploaderRowv2Props = {}
export const ModelUploaderRowv2 = (_props: ModelUploaderRowv2Props) => {
const guiState = useGuiState()
const modelUploaderRow = useMemo(() => {
return (
<div className="body-row split-3-3-4 left-padding-1 guided">
<div className="body-item-title left-padding-1">Model Uploader</div>
<div className="body-item-text">
<div></div>
</div>
<div className="body-item-text">
</div>
</div>
)
}, [guiState.showPyTorchModelUpload])
return modelUploaderRow
}

View File

@ -31,7 +31,7 @@ export const AudioDeviceModeRow = (_props: AudioDeviceModeRowProps) => {
</div> </div>
<div className="left-padding-1"> <div className="left-padding-1">
<input className="left-padding-1" type="radio" id="server-device" name="device-mode" checked={serverChecked} onChange={() => { onDeviceModeChanged(1) }} /> <input className="left-padding-1" type="radio" id="server-device" name="device-mode" checked={serverChecked} onChange={() => { onDeviceModeChanged(1) }} />
<label htmlFor="server-device">server device</label> <label htmlFor="server-device">server device(exp.)</label>
</div> </div>
</div> </div>
<div></div> <div></div>

View File

@ -25,6 +25,9 @@ export type FileUploadSetting = {
framework: Framework framework: Framework
params: string params: string
mmvcv13Config: ModelData | null
mmvcv13Model: ModelData | null
ddspSvcModel: ModelData | null ddspSvcModel: ModelData | null
ddspSvcModelConfig: ModelData | null ddspSvcModelConfig: ModelData | null
ddspSvcDiffusion: ModelData | null ddspSvcDiffusion: ModelData | null
@ -41,17 +44,21 @@ const InitialFileUploadSetting: FileUploadSetting = {
feature: null, feature: null,
index: null, index: null,
ddspSvcModel: null,
ddspSvcModelConfig: null,
ddspSvcDiffusion: null,
ddspSvcDiffusionConfig: null,
isHalf: true, isHalf: true,
uploaded: false, uploaded: false,
defaultTune: 0, defaultTune: 0,
framework: Framework.PyTorch, framework: Framework.PyTorch,
params: "{}", params: "{}",
mmvcv13Config: null,
mmvcv13Model: null,
ddspSvcModel: null,
ddspSvcModelConfig: null,
ddspSvcDiffusion: null,
ddspSvcDiffusionConfig: null,
} }
export type UseServerSettingProps = { export type UseServerSettingProps = {
@ -213,7 +220,16 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
const loadModel = useMemo(() => { const loadModel = useMemo(() => {
return async (slot: number) => { return async (slot: number) => {
if (props.clientType == "DDSP-SVC") { if (props.clientType == "MMVCv13") {
if (!fileUploadSettings[slot].mmvcv13Config) {
alert("Configファイルを指定する必要があります。")
return
}
if (!fileUploadSettings[slot].mmvcv13Model) {
alert("モデルファイルを指定する必要があります。")
return
}
} else if (props.clientType == "DDSP-SVC") {
if (!fileUploadSettings[slot].ddspSvcModel) { if (!fileUploadSettings[slot].ddspSvcModel) {
alert("DDSPモデルを指定する必要があります。") alert("DDSPモデルを指定する必要があります。")
return return
@ -304,6 +320,22 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
}) })
} }
// MMVCv13
const mmvcv13Models = [fileUploadSetting.mmvcv13Config, fileUploadSetting.mmvcv13Model].filter(x => { return x != null }) as ModelData[]
for (let i = 0; i < mmvcv13Models.length; i++) {
if (!mmvcv13Models[i].data) {
mmvcv13Models[i].data = await mmvcv13Models[i].file!.arrayBuffer()
mmvcv13Models[i].filename = await mmvcv13Models[i].file!.name
}
}
for (let i = 0; i < mmvcv13Models.length; i++) {
const progRate = 1 / mmvcv13Models.length
const progOffset = 100 * i * progRate
await _uploadFile(mmvcv13Models[i], (progress: number, _end: boolean) => {
setUploadProgress(progress * progRate + progOffset)
})
}
// DDSP-SVC // DDSP-SVC
const ddspSvcModels = [fileUploadSetting.ddspSvcModel, fileUploadSetting.ddspSvcModelConfig, fileUploadSetting.ddspSvcDiffusion, fileUploadSetting.ddspSvcDiffusionConfig].filter(x => { return x != null }) as ModelData[] const ddspSvcModels = [fileUploadSetting.ddspSvcModel, fileUploadSetting.ddspSvcModelConfig, fileUploadSetting.ddspSvcDiffusion, fileUploadSetting.ddspSvcDiffusionConfig].filter(x => { return x != null }) as ModelData[]
for (let i = 0; i < ddspSvcModels.length; i++) { for (let i = 0; i < ddspSvcModels.length; i++) {
@ -325,6 +357,8 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
const params = JSON.stringify({ const params = JSON.stringify({
trans: fileUploadSetting.defaultTune || 0, trans: fileUploadSetting.defaultTune || 0,
files: { files: {
mmvcv13Config: fileUploadSetting.mmvcv13Config?.filename || "",
mmvcv13Models: fileUploadSetting.mmvcv13Model?.filename || "",
ddspSvcModel: fileUploadSetting.ddspSvcModel?.filename ? "ddsp_mod/" + fileUploadSetting.ddspSvcModel?.filename : "", ddspSvcModel: fileUploadSetting.ddspSvcModel?.filename ? "ddsp_mod/" + fileUploadSetting.ddspSvcModel?.filename : "",
ddspSvcModelConfig: fileUploadSetting.ddspSvcModelConfig?.filename ? "ddsp_mod/" + fileUploadSetting.ddspSvcModelConfig?.filename : "", ddspSvcModelConfig: fileUploadSetting.ddspSvcModelConfig?.filename ? "ddsp_mod/" + fileUploadSetting.ddspSvcModelConfig?.filename : "",
ddspSvcDiffusion: fileUploadSetting.ddspSvcDiffusion?.filename ? "ddsp_diff/" + fileUploadSetting.ddspSvcDiffusion?.filename : "", ddspSvcDiffusion: fileUploadSetting.ddspSvcDiffusion?.filename ? "ddsp_diff/" + fileUploadSetting.ddspSvcDiffusion?.filename : "",
@ -396,6 +430,10 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
defaultTune: fileUploadSetting.defaultTune, defaultTune: fileUploadSetting.defaultTune,
framework: fileUploadSetting.framework, framework: fileUploadSetting.framework,
params: fileUploadSetting.params, params: fileUploadSetting.params,
mmvcv13Config: fileUploadSetting.mmvcv13Config ? { data: fileUploadSetting.mmvcv13Config.data, filename: fileUploadSetting.mmvcv13Config.filename } : null,
mmvcv13Model: fileUploadSetting.mmvcv13Model ? { data: fileUploadSetting.mmvcv13Model.data, filename: fileUploadSetting.mmvcv13Model.filename } : null,
ddspSvcModel: fileUploadSetting.ddspSvcModel ? { data: fileUploadSetting.ddspSvcModel.data, filename: fileUploadSetting.ddspSvcModel.filename } : null, ddspSvcModel: fileUploadSetting.ddspSvcModel ? { data: fileUploadSetting.ddspSvcModel.data, filename: fileUploadSetting.ddspSvcModel.filename } : null,
ddspSvcModelConfig: fileUploadSetting.ddspSvcModelConfig ? { data: fileUploadSetting.ddspSvcModelConfig.data, filename: fileUploadSetting.ddspSvcModelConfig.filename } : null, ddspSvcModelConfig: fileUploadSetting.ddspSvcModelConfig ? { data: fileUploadSetting.ddspSvcModelConfig.data, filename: fileUploadSetting.ddspSvcModelConfig.filename } : null,
ddspSvcDiffusion: fileUploadSetting.ddspSvcDiffusion ? { data: fileUploadSetting.ddspSvcDiffusion.data, filename: fileUploadSetting.ddspSvcDiffusion.filename } : null, ddspSvcDiffusion: fileUploadSetting.ddspSvcDiffusion ? { data: fileUploadSetting.ddspSvcDiffusion.data, filename: fileUploadSetting.ddspSvcDiffusion.filename } : null,

View File

@ -1,11 +1,11 @@
import json
import sys import sys
import os import os
from dataclasses import asdict from dataclasses import asdict
import numpy as np import numpy as np
import torch import torch
from torchaudio.transforms import Resample from voice_changer.DDSP_SVC.ModelSlot import ModelSlot
from torch.nn import functional as F
from voice_changer.DDSP_SVC.deviceManager.DeviceManager import DeviceManager
if sys.platform.startswith("darwin"): if sys.platform.startswith("darwin"):
baseDir = [x for x in sys.path if x.endswith("Contents/MacOS")] baseDir = [x for x in sys.path if x.endswith("Contents/MacOS")]
@ -17,9 +17,6 @@ if sys.platform.startswith("darwin"):
else: else:
sys.path.append("DDSP-SVC") sys.path.append("DDSP-SVC")
import ddsp.vocoder as vo # type:ignore
from ddsp.core import upsample # type:ignore
from enhancer import Enhancer # type:ignore
from diffusion.infer_gt_mel import DiffGtMel # type: ignore from diffusion.infer_gt_mel import DiffGtMel # type: ignore
from voice_changer.utils.VoiceChangerModel import AudioInOut from voice_changer.utils.VoiceChangerModel import AudioInOut
@ -27,18 +24,11 @@ from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
from voice_changer.utils.LoadModelParams import LoadModelParams from voice_changer.utils.LoadModelParams import LoadModelParams
from voice_changer.DDSP_SVC.DDSP_SVCSetting import DDSP_SVCSettings from voice_changer.DDSP_SVC.DDSP_SVCSetting import DDSP_SVCSettings
from voice_changer.RVC.embedder.EmbedderManager import EmbedderManager from voice_changer.RVC.embedder.EmbedderManager import EmbedderManager
from Exceptions import NoModeLoadedException
# from Exceptions import NoModeLoadedException
from voice_changer.DDSP_SVC.SvcDDSP import SvcDDSP from voice_changer.DDSP_SVC.SvcDDSP import SvcDDSP
providers = [
"OpenVINOExecutionProvider",
"CUDAExecutionProvider",
"DmlExecutionProvider",
"CPUExecutionProvider",
]
def phase_vocoder(a, b, fade_out, fade_in): def phase_vocoder(a, b, fade_out, fade_in):
fa = torch.fft.rfft(a) fa = torch.fft.rfft(a)
fb = torch.fft.rfft(b) fb = torch.fft.rfft(b)
@ -67,6 +57,8 @@ class DDSP_SVC:
settings: DDSP_SVCSettings = DDSP_SVCSettings() settings: DDSP_SVCSettings = DDSP_SVCSettings()
diff_model: DiffGtMel = DiffGtMel() diff_model: DiffGtMel = DiffGtMel()
svc_model: SvcDDSP = SvcDDSP() svc_model: SvcDDSP = SvcDDSP()
deviceManager = DeviceManager.get_instance()
# diff_model: DiffGtMel = DiffGtMel() # diff_model: DiffGtMel = DiffGtMel()
audio_buffer: AudioInOut | None = None audio_buffer: AudioInOut | None = None
@ -80,147 +72,62 @@ class DDSP_SVC:
EmbedderManager.initialize(params) EmbedderManager.initialize(params)
print("DDSP-SVC initialization:", params) print("DDSP-SVC initialization:", params)
# def useDevice(self):
# if self.settings.gpu >= 0 and torch.cuda.is_available():
# return torch.device("cuda", index=self.settings.gpu)
# else:
# return torch.device("cpu")
def loadModel(self, props: LoadModelParams): def loadModel(self, props: LoadModelParams):
# target_slot_idx = props.slot target_slot_idx = props.slot
self.device = torch.device("cuda", index=0)
params = props.params params = props.params
modelFile = params["files"]["ddspSvcModel"] modelFile = params["files"]["ddspSvcModel"]
diffusionFile = params["files"]["ddspSvcDiffusion"] diffusionFile = params["files"]["ddspSvcDiffusion"]
self.svc_model.update_model(modelFile) modelSlot = ModelSlot(
modelFile=modelFile,
diffusionFile=diffusionFile,
defaultTrans=params["trans"] if "trans" in params else 0,
)
self.settings.modelSlots[target_slot_idx] = modelSlot
print("diffusion file", diffusionFile) # 初回のみロード
self.diff_model.flush_model(diffusionFile, ddsp_config=self.svc_model.args) # if self.initialLoad:
# self.prepareModel(target_slot_idx)
# self.settings.modelSlotIndex = target_slot_idx
# self.switchModel()
# self.initialLoad = False
# elif target_slot_idx == self.currentSlot:
# self.prepareModel(target_slot_idx)
self.settings.modelSlotIndex = target_slot_idx
self.reloadModel()
print("params:", params) print("params:", params)
# print("params_arg:", self.args)
# self.settings.pyTorchModelFile = props.files.pyTorchModelFilename
# # model
# model, args = vo.load_model(
# self.settings.pyTorchModelFile, device=self.useDevice()
# )
# self.model = model
# self.args = args
# self.sampling_rate = args.data.sampling_rate
# self.hop_size = int(
# self.args.data.block_size
# * self.sampling_rate
# / self.args.data.sampling_rate
# )
# # hubert
# self.vec_path = self.params.hubert_soft
# self.encoder = vo.Units_Encoder(
# self.args.data.encoder,
# self.vec_path,
# self.args.data.encoder_sample_rate,
# self.args.data.encoder_hop_size,
# device=self.useDevice(),
# )
# # f0dec
# self.f0_detector = vo.F0_Extractor(
# # "crepe",
# self.settings.f0Detector,
# self.sampling_rate,
# self.hop_size,
# float(50),
# float(1100),
# )
# self.volume_extractor = vo.Volume_Extractor(self.hop_size)
# self.enhancer_path = self.params.nsf_hifigan
# self.enhancer = Enhancer(
# self.args.enhancer.type, self.enhancer_path, device=self.useDevice()
# )
return self.get_info() return self.get_info()
def reloadModel(self):
self.device = self.deviceManager.getDevice(self.settings.gpu)
modelFile = self.settings.modelSlots[self.settings.modelSlotIndex].modelFile
diffusionFile = self.settings.modelSlots[
self.settings.modelSlotIndex
].diffusionFile
self.svc_model = SvcDDSP()
self.svc_model.setVCParams(self.params)
self.svc_model.update_model(modelFile, self.device)
self.diff_model = DiffGtMel(device=self.device)
self.diff_model.flush_model(diffusionFile, ddsp_config=self.svc_model.args)
def update_settings(self, key: str, val: int | float | str): def update_settings(self, key: str, val: int | float | str):
# if key == "onnxExecutionProvider" and self.onnx_session is not None: if key in self.settings.intData:
# if val == "CUDAExecutionProvider": val = int(val)
# if self.settings.gpu < 0 or self.settings.gpu >= self.gpu_num: setattr(self.settings, key, val)
# self.settings.gpu = 0 if key == "gpu":
# provider_options = [{"device_id": self.settings.gpu}] self.reloadModel()
# self.onnx_session.set_providers( elif key in self.settings.floatData:
# providers=[val], provider_options=provider_options setattr(self.settings, key, float(val))
# ) elif key in self.settings.strData:
# else: setattr(self.settings, key, str(val))
# self.onnx_session.set_providers(providers=[val]) else:
# elif key in self.settings.intData: return False
# val = int(val)
# setattr(self.settings, key, val)
# if (
# key == "gpu"
# and val >= 0
# and val < self.gpu_num
# and self.onnx_session is not None
# ):
# providers = self.onnx_session.get_providers()
# print("Providers:", providers)
# if "CUDAExecutionProvider" in providers:
# provider_options = [{"device_id": self.settings.gpu}]
# self.onnx_session.set_providers(
# providers=["CUDAExecutionProvider"],
# provider_options=provider_options,
# )
# if key == "gpu" and len(self.settings.pyTorchModelFile) > 0:
# model, _args = vo.load_model(
# self.settings.pyTorchModelFile, device=self.useDevice()
# )
# self.model = model
# self.enhancer = Enhancer(
# self.args.enhancer.type, self.enhancer_path, device=self.useDevice()
# )
# self.encoder = vo.Units_Encoder(
# self.args.data.encoder,
# self.vec_path,
# self.args.data.encoder_sample_rate,
# self.args.data.encoder_hop_size,
# device=self.useDevice(),
# )
# elif key in self.settings.floatData:
# setattr(self.settings, key, float(val))
# elif key in self.settings.strData:
# setattr(self.settings, key, str(val))
# if key == "f0Detector":
# print("f0Detector update", val)
# # if val == "dio":
# # val = "parselmouth"
# if hasattr(self, "sampling_rate") is False:
# self.sampling_rate = 44100
# self.hop_size = 512
# self.f0_detector = vo.F0_Extractor(
# val, self.sampling_rate, self.hop_size, float(50), float(1100)
# )
# else:
# return False
return True return True
def get_info(self): def get_info(self):
# data = asdict(self.settings) data = asdict(self.settings)
# data["onnxExecutionProviders"] = (
# self.onnx_session.get_providers() if self.onnx_session is not None else []
# )
# files = ["configFile", "pyTorchModelFile", "onnxModelFile"]
# for f in files:
# if data[f] is not None and os.path.exists(data[f]):
# data[f] = os.path.basename(data[f])
# else:
# data[f] = ""
data = {}
return data return data
def get_processing_sampling_rate(self): def get_processing_sampling_rate(self):
@ -252,45 +159,7 @@ class DDSP_SVC:
convertOffset = -1 * convertSize convertOffset = -1 * convertSize
self.audio_buffer = self.audio_buffer[convertOffset:] # 変換対象の部分だけ抽出 self.audio_buffer = self.audio_buffer[convertOffset:] # 変換対象の部分だけ抽出
return (self.audio_buffer,)
# # f0
# f0 = self.f0_detector.extract(
# self.audio_buffer * 32768.0,
# uv_interp=True,
# silence_front=self.settings.extraConvertSize / self.sampling_rate,
# )
# f0 = torch.from_numpy(f0).float().unsqueeze(-1).unsqueeze(0)
# f0 = f0 * 2 ** (float(self.settings.tran) / 12)
# # volume, mask
# volume = self.volume_extractor.extract(self.audio_buffer)
# mask = (volume > 10 ** (float(-60) / 20)).astype("float")
# mask = np.pad(mask, (4, 4), constant_values=(mask[0], mask[-1]))
# mask = np.array(
# [np.max(mask[n : n + 9]) for n in range(len(mask) - 8)] # noqa: E203
# )
# mask = torch.from_numpy(mask).float().unsqueeze(-1).unsqueeze(0)
# mask = upsample(mask, self.args.data.block_size).squeeze(-1)
# volume = torch.from_numpy(volume).float().unsqueeze(-1).unsqueeze(0)
# # embed
# audio = (
# torch.from_numpy(self.audio_buffer)
# .float()
# .to(self.useDevice())
# .unsqueeze(0)
# )
# seg_units = self.encoder.encode(audio, self.sampling_rate, self.hop_size)
# cropOffset = -1 * (inputSize + crossfadeSize)
# cropEnd = -1 * (crossfadeSize)
# crop = self.audio_buffer[cropOffset:cropEnd]
# rms = np.sqrt(np.square(crop).mean(axis=0))
# vol = max(rms, self.prevVol * 0.0)
# self.prevVol = vol
return (self.audio_buffer, inputSize, crossfadeSize, solaSearchFrame)
# def _onnx_inference(self, data): # def _onnx_inference(self, data):
# if hasattr(self, "onnx_session") is False or self.onnx_session is None: # if hasattr(self, "onnx_session") is False or self.onnx_session is None:
@ -305,32 +174,21 @@ class DDSP_SVC:
# raise NoModeLoadedException("pytorch") # raise NoModeLoadedException("pytorch")
input_wav = data[0] input_wav = data[0]
# inputSize = data[1]
# crossfadeSize = data[2]
# solaSearchFrame = data[3]
# last_delay_frame = int(0.02 * self.svc_model.args.data.sampling_rate)
# fade_in_window = (
# torch.sin(
# np.pi * torch.arange(0, 1, 1 / crossfadeSize, device=self.device) / 2
# )
# ** 2
# )
# fade_out_window = 1 - fade_in_window
_audio, _model_sr = self.svc_model.infer( _audio, _model_sr = self.svc_model.infer(
input_wav, input_wav,
44100, self.svc_model.args.data.sampling_rate,
spk_id=1, spk_id=1,
threhold=-45, threhold=-45,
pitch_adjust=10, pitch_adjust=self.settings.tran,
use_spk_mix=False, use_spk_mix=False,
spk_mix_dict=None, spk_mix_dict=None,
use_enhancer=False, use_enhancer=False,
pitch_extractor_type="harvest", pitch_extractor_type=self.settings.f0Detector,
f0_min=50, f0_min=50,
f0_max=1100, f0_max=1100,
safe_prefix_pad_length=0, # TBD なにこれ? # safe_prefix_pad_length=0, # TBD なにこれ?
safe_prefix_pad_length=self.settings.extraConvertSize
/ self.svc_model.args.data.sampling_rate,
diff_model=self.diff_model, diff_model=self.diff_model,
diff_acc=20, # TBD なにこれ? diff_acc=20, # TBD なにこれ?
diff_spk_id=1, diff_spk_id=1,
@ -340,94 +198,8 @@ class DDSP_SVC:
diff_silence=False, # TBD なにこれ? diff_silence=False, # TBD なにこれ?
) )
print(" _model_sr", _model_sr)
print("_audio", _audio.shape)
print("_audio", _audio)
return _audio.cpu().numpy() * 32768.0 return _audio.cpu().numpy() * 32768.0
# if _model_sr != self.svc_model.args.data.sampling_rate:
# key_str = str(_model_sr) + "_" + str(self.svc_model.args.data.sampling_rate)
# if key_str not in self.resample_kernel:
# self.resample_kernel[key_str] = Resample(
# _model_sr,
# self.svc_model.args.data.sampling_rate,
# lowpass_filter_width=128,
# ).to(self.device)
# _audio = self.resample_kernel[key_str](_audio)
# temp_wav = _audio[
# -inputSize
# - crossfadeSize
# - solaSearchFrame
# - last_delay_frame : -last_delay_frame
# ]
# # sola shift
# conv_input = temp_wav[None, None, : crossfadeSize + solaSearchFrame]
# cor_nom = F.conv1d(conv_input, self.sola_buffer[None, None, :])
# cor_den = torch.sqrt(
# F.conv1d(
# conv_input**2,
# torch.ones(1, 1, crossfadeSize, device=self.device),
# )
# + 1e-8
# )
# sola_shift = torch.argmax(cor_nom[0, 0] / cor_den[0, 0])
# temp_wav = temp_wav[sola_shift : sola_shift + inputSize + crossfadeSize]
# print("sola_shift: " + str(int(sola_shift)))
# # phase vocoder
# # if self.config.use_phase_vocoder:
# if False:
# temp_wav[:crossfadeSize] = phase_vocoder(
# self.sola_buffer,
# temp_wav[:crossfadeSize],
# fade_out_window,
# fade_in_window,
# )
# else:
# temp_wav[:crossfadeSize] *= fade_in_window
# temp_wav[:crossfadeSize] += self.sola_buffer * fade_out_window
# self.sola_buffer = temp_wav[-crossfadeSize:]
# result = temp_wav[:-crossfadeSize, None].repeat(1, 2).cpu().numpy()
###########################################
# c = data[0].to(self.useDevice())
# f0 = data[1].to(self.useDevice())
# volume = data[2].to(self.useDevice())
# mask = data[3].to(self.useDevice())
# # convertSize = data[4]
# # vol = data[5]
# # if vol < self.settings.silentThreshold:
# # print("threshold")
# # return np.zeros(convertSize).astype(np.int16)
# with torch.no_grad():
# spk_id = torch.LongTensor(np.array([[self.settings.dstId]])).to(
# self.useDevice()
# )
# seg_output, _, (s_h, s_n) = self.model(
# c, f0, volume, spk_id=spk_id, spk_mix_dict=None
# )
# seg_output *= mask
# if self.settings.enableEnhancer:
# seg_output, output_sample_rate = self.enhancer.enhance(
# seg_output,
# self.args.data.sampling_rate,
# f0,
# self.args.data.block_size,
# # adaptive_key=float(self.settings.enhancerTune),
# adaptive_key="auto",
# silence_front=self.settings.extraConvertSize / self.sampling_rate,
# )
# result = seg_output.squeeze().cpu().numpy() * 32768.0
# return np.array(result).astype(np.int16)
def inference(self, data): def inference(self, data):
if self.settings.framework == "ONNX": if self.settings.framework == "ONNX":
audio = self._onnx_inference(data) audio = self._onnx_inference(data)

View File

@ -1,14 +1,15 @@
from dataclasses import dataclass, field from dataclasses import dataclass, field
from voice_changer.DDSP_SVC.ModelSlot import ModelSlot
@dataclass @dataclass
class DDSP_SVCSettings: class DDSP_SVCSettings:
gpu: int = 0 gpu: int = 0
dstId: int = 0 dstId: int = 1
f0Detector: str = "dio" # dio or harvest # parselmouth f0Detector: str = "dio" # dio or harvest or crepe # parselmouth
tran: int = 20 tran: int = 20
predictF0: int = 0 # 0:False, 1:True
silentThreshold: float = 0.00001 silentThreshold: float = 0.00001
extraConvertSize: int = 1024 * 32 extraConvertSize: int = 1024 * 32
@ -21,16 +22,16 @@ class DDSP_SVCSettings:
configFile: str = "" configFile: str = ""
speakers: dict[str, int] = field(default_factory=lambda: {}) speakers: dict[str, int] = field(default_factory=lambda: {})
modelSlotIndex: int = -1
modelSlots: list[ModelSlot] = field(default_factory=lambda: [ModelSlot()])
# ↓mutableな物だけ列挙 # ↓mutableな物だけ列挙
intData = [ intData = [
"gpu", "gpu",
"dstId", "dstId",
"tran", "tran",
"predictF0",
"extraConvertSize", "extraConvertSize",
"enableEnhancer", "enableEnhancer",
"enhancerTune", "enhancerTune",
] ]
floatData = ["silentThreshold", "clusterInferRatio"] floatData = ["silentThreshold"]
strData = ["framework", "f0Detector"] strData = ["framework", "f0Detector"]

View File

@ -1,16 +1,8 @@
from const import EnumInferenceTypes, EnumEmbedderTypes
from dataclasses import dataclass from dataclasses import dataclass
@dataclass @dataclass
class ModelSlot: class ModelSlot:
pyTorchModelFile: str = "" modelFile: str = ""
pyTorchDiffusionModelFile: str = "" diffusionFile: str = ""
defaultTrans: int = 0 defaultTrans: int = 0
# modelType: EnumDDSPSVCInferenceTypes = EnumDDSPSVCInferenceTypes.pyTorchRVC
# samplingRate: int = -1
# f0: bool = True
# embChannels: int = 256
# deprecated: bool = False
# embedder: EnumEmbedderTypes = EnumEmbedderTypes.hubert

View File

@ -1,107 +0,0 @@
from const import EnumEmbedderTypes, EnumInferenceTypes
from voice_changer.RVC.ModelSlot import ModelSlot
from voice_changer.utils.LoadModelParams import FilePaths
import torch
import onnxruntime
import json
def generateModelSlot(files: FilePaths, params):
modelSlot = ModelSlot()
modelSlot.pyTorchModelFile = files.pyTorchModelFilename
modelSlot.onnxModelFile = files.onnxModelFilename
modelSlot.featureFile = files.featureFilename
modelSlot.indexFile = files.indexFilename
modelSlot.defaultTrans = params["trans"] if "trans" in params else 0
modelSlot.isONNX = True if modelSlot.onnxModelFile is not None else False
if modelSlot.isONNX:
_setInfoByONNX(modelSlot, modelSlot.onnxModelFile)
else:
_setInfoByPytorch(modelSlot, modelSlot.pyTorchModelFile)
return modelSlot
def _setInfoByPytorch(slot: ModelSlot, file: str):
cpt = torch.load(file, map_location="cpu")
config_len = len(cpt["config"])
if config_len == 18:
slot.f0 = True if cpt["f0"] == 1 else False
slot.modelType = (
EnumInferenceTypes.pyTorchRVC
if slot.f0
else EnumInferenceTypes.pyTorchRVCNono
)
slot.embChannels = 256
slot.embedder = EnumEmbedderTypes.hubert
else:
slot.f0 = True if cpt["f0"] == 1 else False
slot.modelType = (
EnumInferenceTypes.pyTorchWebUI
if slot.f0
else EnumInferenceTypes.pyTorchWebUINono
)
slot.embChannels = cpt["config"][17]
slot.embedder = cpt["embedder_name"]
if slot.embedder.endswith("768"):
slot.embedder = slot.embedder[:-3]
if slot.embedder == EnumEmbedderTypes.hubert.value:
slot.embedder = EnumEmbedderTypes.hubert
elif slot.embedder == EnumEmbedderTypes.contentvec.value:
slot.embedder = EnumEmbedderTypes.contentvec
elif slot.embedder == EnumEmbedderTypes.hubert_jp.value:
slot.embedder = EnumEmbedderTypes.hubert_jp
else:
raise RuntimeError("[Voice Changer][setInfoByONNX] unknown embedder")
slot.samplingRate = cpt["config"][-1]
del cpt
def _setInfoByONNX(slot: ModelSlot, file: str):
tmp_onnx_session = onnxruntime.InferenceSession(
file, providers=["CPUExecutionProvider"]
)
modelmeta = tmp_onnx_session.get_modelmeta()
try:
metadata = json.loads(modelmeta.custom_metadata_map["metadata"])
# slot.modelType = metadata["modelType"]
slot.embChannels = metadata["embChannels"]
if "embedder" not in metadata:
slot.embedder = EnumEmbedderTypes.hubert
elif metadata["embedder"] == EnumEmbedderTypes.hubert.value:
slot.embedder = EnumEmbedderTypes.hubert
elif metadata["embedder"] == EnumEmbedderTypes.contentvec.value:
slot.embedder = EnumEmbedderTypes.contentvec
elif metadata["embedder"] == EnumEmbedderTypes.hubert_jp.value:
slot.embedder = EnumEmbedderTypes.hubert_jp
else:
raise RuntimeError("[Voice Changer][setInfoByONNX] unknown embedder")
slot.f0 = metadata["f0"]
slot.modelType = (
EnumInferenceTypes.onnxRVC if slot.f0 else EnumInferenceTypes.onnxRVCNono
)
slot.samplingRate = metadata["samplingRate"]
slot.deprecated = False
except Exception as e:
slot.modelType = EnumInferenceTypes.onnxRVC
slot.embChannels = 256
slot.embedder = EnumEmbedderTypes.hubert
slot.f0 = True
slot.samplingRate = 48000
slot.deprecated = True
print("[Voice Changer] setInfoByONNX", e)
print("[Voice Changer] ############## !!!! CAUTION !!!! ####################")
print("[Voice Changer] This onnxfie is depricated. Please regenerate onnxfile.")
print("[Voice Changer] ############## !!!! CAUTION !!!! ####################")
del tmp_onnx_session

View File

@ -21,8 +21,8 @@ class SvcDDSP:
def setVCParams(self, params: VoiceChangerParams): def setVCParams(self, params: VoiceChangerParams):
self.params = params self.params = params
def update_model(self, model_path): def update_model(self, model_path: str, device: torch.device):
self.device = "cuda" if torch.cuda.is_available() else "cpu" self.device = device
# load ddsp model # load ddsp model
if self.model is None or self.model_path != model_path: if self.model is None or self.model_path != model_path:
@ -42,35 +42,33 @@ class SvcDDSP:
else: else:
cnhubertsoft_gate = 10 cnhubertsoft_gate = 10
# if self.args.data.encoder == "hubertsoft": if self.args.data.encoder == "hubertsoft":
# encoderPath = self.params.hubert_soft encoderPath = self.params.hubert_soft
# elif self.args.data.encoder == "hubertbase": elif self.args.data.encoder == "hubertbase":
# encoderPath = self.params.hubert_base encoderPath = self.params.hubert_base
# elif self.args.data.encoder == "hubertbase768": elif self.args.data.encoder == "hubertbase768":
# encoderPath = self.params.hubert_base encoderPath = self.params.hubert_base
# elif self.args.data.encoder == "hubertbase768l12": elif self.args.data.encoder == "hubertbase768l12":
# encoderPath = self.params.hubert_base encoderPath = self.params.hubert_base
# elif self.args.data.encoder == "hubertlarge1024l24": elif self.args.data.encoder == "hubertlarge1024l24":
# encoderPath = self.params.hubert_base encoderPath = self.params.hubert_base
# elif self.args.data.encoder == "contentvec": elif self.args.data.encoder == "contentvec":
# encoderPath = self.params.hubert_base encoderPath = self.params.hubert_base
# elif self.args.data.encoder == "contentvec768": elif self.args.data.encoder == "contentvec768":
# encoderPath = self.params.hubert_base encoderPath = self.params.hubert_base
# elif self.args.data.encoder == "contentvec768l12": elif self.args.data.encoder == "contentvec768l12":
# encoderPath = self.params.hubert_base encoderPath = self.params.hubert_base
self.units_encoder = Units_Encoder( self.units_encoder = Units_Encoder(
self.args.data.encoder, self.args.data.encoder,
# encoderPath, encoderPath,
self.args.data.encoder_ckpt,
self.args.data.encoder_sample_rate, self.args.data.encoder_sample_rate,
self.args.data.encoder_hop_size, self.args.data.encoder_hop_size,
cnhubertsoft_gate=cnhubertsoft_gate, cnhubertsoft_gate=cnhubertsoft_gate,
device=self.device, device=self.device,
) )
self.encoder_type = self.args.data.encoder self.encoder_type = self.args.data.encoder
# self.encoder_ckpt = encoderPath self.encoder_ckpt = encoderPath
self.encoder_ckpt = self.args.data.encoder_ckpt
# load enhancer # load enhancer
if ( if (
@ -109,8 +107,8 @@ class SvcDDSP:
diff_silence=False, diff_silence=False,
audio_alignment=False, audio_alignment=False,
): ):
print("Infering...") # print("Infering...")
print("audio", audio) # print("audio", audio)
# load input # load input
# audio, sample_rate = librosa.load(input_wav, sr=None, mono=True) # audio, sample_rate = librosa.load(input_wav, sr=None, mono=True)
hop_size = ( hop_size = (

View File

@ -32,13 +32,6 @@ from voice_changer.MMVCv13.TrainerFunctions import (
from Exceptions import NoModeLoadedException from Exceptions import NoModeLoadedException
providers = [
"OpenVINOExecutionProvider",
"CUDAExecutionProvider",
"DmlExecutionProvider",
"CPUExecutionProvider",
]
@dataclass @dataclass
class MMVCv13Settings: class MMVCv13Settings:
@ -69,11 +62,18 @@ class MMVCv13:
self.text_norm = torch.LongTensor([0, 6, 0]) self.text_norm = torch.LongTensor([0, 6, 0])
def loadModel(self, props: LoadModelParams): def loadModel(self, props: LoadModelParams):
self.settings.configFile = props.files.configFilename params = props.params
self.settings.configFile = params["files"]["mmvcv13Config"]
self.hps = get_hparams_from_file(self.settings.configFile) self.hps = get_hparams_from_file(self.settings.configFile)
self.settings.pyTorchModelFile = props.files.pyTorchModelFilename modelFile = params["files"]["mmvcv13Models"]
self.settings.onnxModelFile = props.files.onnxModelFilename if modelFile.endswith(".onnx"):
self.settings.pyTorchModelFile = None
self.settings.onnxModelFile = modelFile
else:
self.settings.pyTorchModelFile = modelFile
self.settings.onnxModelFile = None
# PyTorchモデル生成 # PyTorchモデル生成
if self.settings.pyTorchModelFile is not None: if self.settings.pyTorchModelFile is not None:
@ -89,41 +89,58 @@ class MMVCv13:
# ONNXモデル生成 # ONNXモデル生成
if self.settings.onnxModelFile is not None: if self.settings.onnxModelFile is not None:
ort_options = onnxruntime.SessionOptions() # ort_options = onnxruntime.SessionOptions()
ort_options.intra_op_num_threads = 8 # ort_options.intra_op_num_threads = 8
# ort_options.execution_mode = ort_options.ExecutionMode.ORT_PARALLEL
# ort_options.inter_op_num_threads = 8
providers, options = self.getOnnxExecutionProvider()
self.onnx_session = onnxruntime.InferenceSession( self.onnx_session = onnxruntime.InferenceSession(
self.settings.onnxModelFile, providers=providers self.settings.onnxModelFile,
providers=providers,
provider_options=options,
) )
return self.get_info() return self.get_info()
def getOnnxExecutionProvider(self):
if self.settings.gpu >= 0:
return ["CUDAExecutionProvider"], [{"device_id": self.settings.gpu}]
elif "DmlExecutionProvider" in onnxruntime.get_available_providers():
return ["DmlExecutionProvider"], []
else:
return ["CPUExecutionProvider"], [
{
"intra_op_num_threads": 8,
"execution_mode": onnxruntime.ExecutionMode.ORT_PARALLEL,
"inter_op_num_threads": 8,
}
]
def isOnnx(self):
if self.settings.onnxModelFile is not None:
return True
else:
return False
def update_settings(self, key: str, val: int | float | str): def update_settings(self, key: str, val: int | float | str):
if key == "onnxExecutionProvider" and self.onnx_session is not None: if key in self.settings.intData:
if val == "CUDAExecutionProvider":
if self.settings.gpu < 0 or self.settings.gpu >= self.gpu_num:
self.settings.gpu = 0
provider_options = [{"device_id": self.settings.gpu}]
self.onnx_session.set_providers(
providers=[val], provider_options=provider_options
)
else:
self.onnx_session.set_providers(providers=[val])
elif key in self.settings.intData:
val = int(val) val = int(val)
setattr(self.settings, key, val) setattr(self.settings, key, val)
if (
key == "gpu" if key == "gpu" and self.isOnnx():
and val >= 0 providers, options = self.getOnnxExecutionProvider()
and val < self.gpu_num self.onnx_session = onnxruntime.InferenceSession(
and self.onnx_session is not None self.settings.onnxModelFile,
): providers=providers,
providers = self.onnx_session.get_providers() provider_options=options,
print("Providers:", providers) )
if "CUDAExecutionProvider" in providers: # providers = self.onnx_session.get_providers()
provider_options = [{"device_id": self.settings.gpu}] # print("Providers:", providers)
self.onnx_session.set_providers( # if "CUDAExecutionProvider" in providers:
providers=["CUDAExecutionProvider"], # provider_options = [{"device_id": self.settings.gpu}]
provider_options=provider_options, # self.onnx_session.set_providers(
) # providers=["CUDAExecutionProvider"],
# provider_options=provider_options,
# )
elif key in self.settings.floatData: elif key in self.settings.floatData:
setattr(self.settings, key, float(val)) setattr(self.settings, key, float(val))
elif key in self.settings.strData: elif key in self.settings.strData:
@ -254,7 +271,7 @@ class MMVCv13:
return result return result
def inference(self, data): def inference(self, data):
if self.settings.framework == "ONNX": if self.isOnnx():
audio = self._onnx_inference(data) audio = self._onnx_inference(data)
else: else:
audio = self._pyTorch_inference(data) audio = self._pyTorch_inference(data)