WIP: integrate vcs to new gui 2

2023-06-20 06:39:39 +09:00 · 2023-06-20 06:39:39 +09:00 · b6996a15fe
commit b6996a15fe
parent b453e5fd85
12 changed files with 251 additions and 153 deletions
--- a/client/demo/dist/index.js
+++ b/client/demo/dist/index.js
--- a/client/demo/src/components/demo/904-3_FileUploader.tsx
+++ b/client/demo/src/components/demo/904-3_FileUploader.tsx
@ -1,6 +1,6 @@
 import React, { useEffect, useMemo, useState } from "react";
 import { useAppState } from "../../001_provider/001_AppStateProvider";
-import { FileUploadSetting, InitialFileUploadSetting, ModelFileKind, ModelUploadSetting, VoiceChangerType, fileSelector } from "@dannadori/voice-changer-client-js";
+import { ModelFileKind, ModelUploadSetting, VoiceChangerType, fileSelector } from "@dannadori/voice-changer-client-js";
 import { useMessageBuilder } from "../../hooks/useMessageBuilder";
 import { ModelSlotManagerDialogScreen } from "./904_ModelSlotManagerDialog";
 import { checkExtention, trimfileName } from "../../utils/utils";
@ -54,15 +54,31 @@ export const FileUploaderScreen = (props: FileUploaderScreenProps) => {

        const checkModelSetting = (setting: ModelUploadSetting) => {
            if (setting.voiceChangerType == "RVC") {
-                // const enough = !!setting.files.find(x => { return x.kind == "rvcModel" }) &&
-                //     !!setting.files.find(x => { return x.kind == "rvcIndex" })
-                // return enough
                const enough = !!setting.files.find(x => { return x.kind == "rvcModel" })
                return enough
+            } else if (setting.voiceChangerType == "MMVCv13") {
+                const enough = !!setting.files.find(x => { return x.kind == "mmvcv13Model" }) &&
+                    !!setting.files.find(x => { return x.kind == "mmvcv13Config" })
+                return enough
+            } else if (setting.voiceChangerType == "MMVCv15") {
+                const enough = !!setting.files.find(x => { return x.kind == "mmvcv15Model" }) &&
+                    !!setting.files.find(x => { return x.kind == "mmvcv15Config" })
+                return enough
+            } else if (setting.voiceChangerType == "so-vits-svc-40") {
+                const enough = !!setting.files.find(x => { return x.kind == "soVitsSvc40Config" }) &&
+                    !!setting.files.find(x => { return x.kind == "soVitsSvc40Model" })
+                return enough
+            } else if (setting.voiceChangerType == "DDSP-SVC") {
+                const enough = !!setting.files.find(x => { return x.kind == "ddspSvcModel" }) &&
+                    !!setting.files.find(x => { return x.kind == "ddspSvcModelConfig" }) &&
+                    !!setting.files.find(x => { return x.kind == "ddspSvcDiffusion" }) &&
+                    !!setting.files.find(x => { return x.kind == "ddspSvcDiffusionConfig" })
+                return enough
            }
+            return false
        }

-        const generateFileRow = (setting: ModelUploadSetting, title: string, kind: ModelFileKind, ext: string[]) => {
+        const generateFileRow = (setting: ModelUploadSetting, title: string, kind: ModelFileKind, ext: string[], dir: string = "") => {
            const selectedFile = setting.files.find(x => { return x.kind == kind })
            const selectedFilename = selectedFile?.file.name || ""
            return (
@ -81,7 +97,7 @@ export const FileUploaderScreen = (props: FileUploaderScreenProps) => {
                        if (selectedFile) {
                            selectedFile.file = file
                        } else {
-                            setting.files.push({ kind: kind, file: file })
+                            setting.files.push({ kind: kind, file: file, dir: dir })
                        }
                        setUploadSetting({ ...setting })
                    }}>
@ -96,6 +112,21 @@ export const FileUploaderScreen = (props: FileUploaderScreenProps) => {
            if (vcType == "RVC") {
                rows.push(generateFileRow(uploadSetting!, "Model", "rvcModel", ["pth", "onnx"]))
                rows.push(generateFileRow(uploadSetting!, "Index", "rvcIndex", ["index", "bin"]))
+            } else if (vcType == "MMVCv13") {
+                rows.push(generateFileRow(uploadSetting!, "Config", "mmvcv13Config", ["json"]))
+                rows.push(generateFileRow(uploadSetting!, "Model", "mmvcv13Model", ["pth", "onnx"]))
+            } else if (vcType == "MMVCv15") {
+                rows.push(generateFileRow(uploadSetting!, "Config", "mmvcv15Config", ["json"]))
+                rows.push(generateFileRow(uploadSetting!, "Model", "mmvcv15Model", ["pth", "onnx"]))
+            } else if (vcType == "so-vits-svc-40") {
+                rows.push(generateFileRow(uploadSetting!, "Config", "soVitsSvc40Config", ["json"]))
+                rows.push(generateFileRow(uploadSetting!, "Model", "soVitsSvc40Model", ["pth"]))
+                rows.push(generateFileRow(uploadSetting!, "Cluster", "soVitsSvc40Cluster", ["pth", "pt"]))
+            } else if (vcType == "DDSP-SVC") {
+                rows.push(generateFileRow(uploadSetting!, "Config", "ddspSvcModelConfig", ["yaml"], "model/"))
+                rows.push(generateFileRow(uploadSetting!, "Model", "ddspSvcModel", ["pth", "pt"], "model/"))
+                rows.push(generateFileRow(uploadSetting!, "Config(diff)", "ddspSvcDiffusionConfig", ["yaml"], "diff/"))
+                rows.push(generateFileRow(uploadSetting!, "Model(diff)", "ddspSvcDiffusion", ["pth", "pt"], "diff/"))
            }
            return rows
        }
--- a/client/lib/src/ServerConfigurator.ts
+++ b/client/lib/src/ServerConfigurator.ts
@ -57,13 +57,13 @@ export class ServerConfigurator {
        return info
    }

-    uploadFile2 = async (file: File, onprogress: (progress: number, end: boolean) => void) => {
+    uploadFile2 = async (dir: string, file: File, onprogress: (progress: number, end: boolean) => void) => {
        const url = this.serverUrl + "/upload_file"
        onprogress(0, false)
        const size = 1024 * 1024;
        let index = 0; // index値
        const fileLength = file.size
-        const filename = file.name
+        const filename = dir + file.name
        const fileChunkNum = Math.ceil(fileLength / size)

        while (true) {
--- a/client/lib/src/VoiceChangerClient.ts
+++ b/client/lib/src/VoiceChangerClient.ts
@ -290,8 +290,8 @@ export class VoiceChangerClient {
    uploadFile = (buf: ArrayBuffer, filename: string, onprogress: (progress: number, end: boolean) => void) => {
        return this.configurator.uploadFile(buf, filename, onprogress)
    }
-    uploadFile2 = (file: File, onprogress: (progress: number, end: boolean) => void) => {
-        return this.configurator.uploadFile2(file, onprogress)
+    uploadFile2 = (dir: string, file: File, onprogress: (progress: number, end: boolean) => void) => {
+        return this.configurator.uploadFile2(dir, file, onprogress)
    }
    concatUploadedFile = (filename: string, chunkNum: number) => {
        return this.configurator.concatUploadedFile(filename, chunkNum)
--- a/client/lib/src/hooks/useServerSetting.ts
+++ b/client/lib/src/hooks/useServerSetting.ts
@ -41,6 +41,7 @@ export type ModelFileKind = typeof ModelFileKind[keyof typeof ModelFileKind]
 export type ModelFile = {
    file: File,
    kind: ModelFileKind
+    dir: string
 }

 export type ModelUploadSetting = {
@ -296,7 +297,7 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
            if (!props.voiceChangerClient) return
            console.log("uploading..1.", file)
            console.log("uploading..2.", file.name)
-            const num = await props.voiceChangerClient.uploadFile2(file, onprogress)
+            const num = await props.voiceChangerClient.uploadFile2(dir, file, onprogress)
            const res = await props.voiceChangerClient.concatUploadedFile(dir + file.name, num)
            console.log("uploaded", num, res)
        }
@ -319,11 +320,11 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
                    const progOffset = 100 * i * progRate
                    await _uploadFile2(setting.files[i].file, (progress: number, _end: boolean) => {
                        setUploadProgress(progress * progRate + progOffset)
-                    })
+                    }, setting.files[i].dir)
                }
            }
            const params: ModelUploadSettingForServer = {
-                ...setting, files: setting.files.map((f) => { return { name: f.file.name, kind: f.kind } })
+                ...setting, files: setting.files.map((f) => { return { name: f.file.name, kind: f.kind, dir: f.dir } })
            }

            const loadPromise = props.voiceChangerClient.loadModel(
--- a/server/data/ModelSlot.py
+++ b/server/data/ModelSlot.py
@ -38,7 +38,72 @@ class RVCModelSlot(ModelSlot):
    iconFile: str = ""


-ModelSlots: TypeAlias = Union[ModelSlot, RVCModelSlot]
+@dataclass
+class MMVCv13ModelSlot(ModelSlot):
+    voiceChangerType: VoiceChangerType = "MMVCv13"
+    modelFile: str = ""
+    configFile: str = ""
+    srcId: int = 107
+    dstId: int = 100
+    isONNX: bool = False
+    samplingRate: int = 24000
+
+    name: str = ""
+    description: str = ""
+    iconFile: str = ""
+
+
+@dataclass
+class MMVCv15ModelSlot(ModelSlot):
+    voiceChangerType: VoiceChangerType = "MMVCv15"
+    modelFile: str = ""
+    configFile: str = ""
+    srcId: int = 0
+    dstId: int = 101
+    isONNX: bool = False
+    samplingRate: int = 24000
+
+    name: str = ""
+    description: str = ""
+    iconFile: str = ""
+
+
+@dataclass
+class SoVitsSvc40ModelSlot(ModelSlot):
+    voiceChangerType: VoiceChangerType = "so-vits-svc-40"
+    modelFile: str = ""
+    configFile: str = ""
+    clusterFile: str = ""
+    dstId: int = 0
+    isONNX: bool = False
+
+    name: str = ""
+    description: str = ""
+    credit: str = ""
+    termsOfUseUrl: str = ""
+    sampleId: str = ""
+    iconFile: str = ""
+
+
+@dataclass
+class DDSPSVCModelSlot(ModelSlot):
+    voiceChangerType: VoiceChangerType = "DDSP-SVC"
+    modelFile: str = ""
+    configFile: str = ""
+    diffModelFile: str = ""
+    diffConfigFile: str = ""
+    dstId: int = 0
+    isONNX: bool = False
+
+    name: str = ""
+    description: str = ""
+    credit: str = ""
+    termsOfUseUrl: str = ""
+    sampleId: str = ""
+    iconFile: str = ""
+
+
+ModelSlots: TypeAlias = Union[ModelSlot, RVCModelSlot, MMVCv13ModelSlot, MMVCv15ModelSlot, SoVitsSvc40ModelSlot, DDSPSVCModelSlot]


 def loadSlotInfo(model_dir: str, slotIndex: int) -> ModelSlots:
@ -50,6 +115,14 @@ def loadSlotInfo(model_dir: str, slotIndex: int) -> ModelSlots:
    slotInfo = ModelSlot(**{k: v for k, v in jsonDict.items() if k in ModelSlot.__annotations__})
    if slotInfo.voiceChangerType == "RVC":
        return RVCModelSlot(**jsonDict)
+    elif slotInfo.voiceChangerType == "MMVCv13":
+        return MMVCv13ModelSlot(**jsonDict)
+    elif slotInfo.voiceChangerType == "MMVCv15":
+        return MMVCv15ModelSlot(**jsonDict)
+    elif slotInfo.voiceChangerType == "so-vits-svc-40":
+        return SoVitsSvc40ModelSlot(**jsonDict)
+    elif slotInfo.voiceChangerType == "DDSP-SVC":
+        return DDSPSVCModelSlot(**jsonDict)
    else:
        return ModelSlot()

--- a/server/voice_changer/DDSP_SVC/DDSP_SVC.py
+++ b/server/voice_changer/DDSP_SVC/DDSP_SVC.py
@ -3,6 +3,7 @@ import os
 from dataclasses import asdict
 import numpy as np
 import torch
+from data.ModelSlot import DDSPSVCModelSlot
 from voice_changer.DDSP_SVC.ModelSlot import ModelSlot

 from voice_changer.DDSP_SVC.deviceManager.DeviceManager import DeviceManager
@ -21,7 +22,7 @@ from diffusion.infer_gt_mel import DiffGtMel  # type: ignore

 from voice_changer.utils.VoiceChangerModel import AudioInOut
 from voice_changer.utils.VoiceChangerParams import VoiceChangerParams
-from voice_changer.utils.LoadModelParams import LoadModelParams
+from voice_changer.utils.LoadModelParams import LoadModelParams, LoadModelParams2
 from voice_changer.DDSP_SVC.DDSP_SVCSetting import DDSP_SVCSettings
 from voice_changer.RVC.embedder.EmbedderManager import EmbedderManager

@ -44,11 +45,7 @@ def phase_vocoder(a, b, fade_out, fade_in):
    deltaphase = deltaphase - 2 * np.pi * torch.floor(deltaphase / 2 / np.pi + 0.5)
    w = 2 * np.pi * torch.arange(n // 2 + 1).to(a) + deltaphase
    t = torch.arange(n).unsqueeze(-1).to(a) / n
-    result = (
-        a * (fade_out**2)
-        + b * (fade_in**2)
-        + torch.sum(absab * torch.cos(w * t + phia), -1) * fade_out * fade_in / n
-    )
+    result = a * (fade_out**2) + b * (fade_in**2) + torch.sum(absab * torch.cos(w * t + phia), -1) * fade_out * fade_in / n
    return result


@ -102,9 +99,7 @@ class DDSP_SVC:
    def reloadModel(self):
        self.device = self.deviceManager.getDevice(self.settings.gpu)
        modelFile = self.settings.modelSlots[self.settings.modelSlotIndex].modelFile
-        diffusionFile = self.settings.modelSlots[
-            self.settings.modelSlotIndex
-        ].diffusionFile
+        diffusionFile = self.settings.modelSlots[self.settings.modelSlotIndex].diffusionFile

        self.svc_model = SvcDDSP()
        self.svc_model.setVCParams(self.params)
@ -144,15 +139,11 @@ class DDSP_SVC:
        # newData = newData.astype(np.float32)

        if self.audio_buffer is not None:
-            self.audio_buffer = np.concatenate(
-                [self.audio_buffer, newData], 0
-            )  # 過去のデータに連結
+            self.audio_buffer = np.concatenate([self.audio_buffer, newData], 0)  # 過去のデータに連結
        else:
            self.audio_buffer = newData

-        convertSize = (
-            inputSize + crossfadeSize + solaSearchFrame + self.settings.extraConvertSize
-        )
+        convertSize = inputSize + crossfadeSize + solaSearchFrame + self.settings.extraConvertSize

        # if convertSize % self.hop_size != 0:  # モデルの出力のホップサイズで切り捨てが発生するので補う。
        #     convertSize = convertSize + (self.hop_size - (convertSize % self.hop_size))
@ -187,8 +178,7 @@ class DDSP_SVC:
            f0_min=50,
            f0_max=1100,
            # safe_prefix_pad_length=0,  # TBD なにこれ？
-            safe_prefix_pad_length=self.settings.extraConvertSize
-            / self.svc_model.args.data.sampling_rate,
+            safe_prefix_pad_length=self.settings.extraConvertSize / self.svc_model.args.data.sampling_rate,
            diff_model=self.diff_model,
            diff_acc=self.settings.diffAcc,  # TBD なにこれ？
            diff_spk_id=self.settings.diffSpkId,
@ -196,9 +186,7 @@ class DDSP_SVC:
            # diff_use_dpm=True if self.settings.useDiffDpm == 1 else False,  # TBD なにこれ？
            method=self.settings.diffMethod,
            k_step=self.settings.kStep,  # TBD なにこれ？
-            diff_silence=True
-            if self.settings.useDiffSilence == 1
-            else False,  # TBD なにこれ？
+            diff_silence=True if self.settings.useDiffSilence == 1 else False,  # TBD なにこれ？
        )

        return _audio.cpu().numpy() * 32768.0
@ -210,9 +198,21 @@ class DDSP_SVC:
            audio = self._pyTorch_inference(data)
        return audio

-    # def destroy(self):
-    #     del self.net_g
-    #     del self.onnx_session
+    @classmethod
+    def loadModel2(cls, props: LoadModelParams2):
+        slotInfo: DDSPSVCModelSlot = DDSPSVCModelSlot()
+        for file in props.files:
+            if file.kind == "ddspSvcModelConfig":
+                slotInfo.configFile = file.name
+            elif file.kind == "ddspSvcModel":
+                slotInfo.modelFile = file.name
+            elif file.kind == "ddspSvcDiffusionConfig":
+                slotInfo.diffConfigFile = file.name
+            elif file.kind == "ddspSvcDiffusion":
+                slotInfo.diffModelFile = file.name
+        slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx")
+        slotInfo.name = os.path.splitext(os.path.basename(slotInfo.modelFile))[0]
+        return slotInfo

    def __del__(self):
        del self.net_g
--- a/server/voice_changer/MMVCv13/MMVCv13.py
+++ b/server/voice_changer/MMVCv13/MMVCv13.py
@ -1,7 +1,8 @@
 import sys
 import os
+from data.ModelSlot import MMVCv13ModelSlot

-from voice_changer.utils.LoadModelParams import LoadModelParams
+from voice_changer.utils.LoadModelParams import LoadModelParams, LoadModelParams2
 from voice_changer.utils.VoiceChangerModel import AudioInOut

 if sys.platform.startswith("darwin"):
@ -77,13 +78,7 @@ class MMVCv13:

        # PyTorchモデル生成
        if self.settings.pyTorchModelFile is not None:
-            self.net_g = SynthesizerTrn(
-                len(symbols),
-                self.hps.data.filter_length // 2 + 1,
-                self.hps.train.segment_size // self.hps.data.hop_length,
-                n_speakers=self.hps.data.n_speakers,
-                **self.hps.model
-            )
+            self.net_g = SynthesizerTrn(len(symbols), self.hps.data.filter_length // 2 + 1, self.hps.train.segment_size // self.hps.data.hop_length, n_speakers=self.hps.data.n_speakers, **self.hps.model)
            self.net_g.eval()
            load_checkpoint(self.settings.pyTorchModelFile, self.net_g, None)

@ -154,9 +149,7 @@ class MMVCv13:
    def get_info(self):
        data = asdict(self.settings)

-        data["onnxExecutionProviders"] = (
-            self.onnx_session.get_providers() if self.onnx_session is not None else []
-        )
+        data["onnxExecutionProviders"] = self.onnx_session.get_providers() if self.onnx_session is not None else []
        files = ["configFile", "pyTorchModelFile", "onnxModelFile"]
        for f in files:
            if data[f] is not None and os.path.exists(data[f]):
@ -193,9 +186,7 @@ class MMVCv13:
        newData = newData.astype(np.float32) / self.hps.data.max_wav_value

        if self.audio_buffer is not None:
-            self.audio_buffer = np.concatenate(
-                [self.audio_buffer, newData], 0
-            )  # 過去のデータに連結
+            self.audio_buffer = np.concatenate([self.audio_buffer, newData], 0)  # 過去のデータに連結
        else:
            self.audio_buffer = newData

@ -204,9 +195,7 @@ class MMVCv13:
        # if convertSize < 8192:
        #     convertSize = 8192
        if convertSize % self.hps.data.hop_length != 0:  # モデルの出力のホップサイズで切り捨てが発生するので補う。
-            convertSize = convertSize + (
-                self.hps.data.hop_length - (convertSize % self.hps.data.hop_length)
-            )
+            convertSize = convertSize + (self.hps.data.hop_length - (convertSize % self.hps.data.hop_length))

        convertOffset = -1 * convertSize
        self.audio_buffer = self.audio_buffer[convertOffset:]  # 変換対象の部分だけ抽出
@ -238,7 +227,9 @@ class MMVCv13:
                    "sid_src": sid_src.numpy(),
                    "sid_tgt": sid_tgt1.numpy(),
                },
-            )[0][0, 0]
+            )[
+                0
+            ][0, 0]
            * self.hps.data.max_wav_value
        )
        return audio1
@ -254,19 +245,10 @@ class MMVCv13:
            dev = torch.device("cuda", index=self.settings.gpu)

        with torch.no_grad():
-            x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [
-                x.to(dev) for x in data
-            ]
+            x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [x.to(dev) for x in data]
            sid_target = torch.LongTensor([self.settings.dstId]).to(dev)

-            audio1 = (
-                self.net_g.to(dev)
-                .voice_conversion(
-                    spec, spec_lengths, sid_src=sid_src, sid_tgt=sid_target
-                )[0, 0]
-                .data
-                * self.hps.data.max_wav_value
-            )
+            audio1 = self.net_g.to(dev).voice_conversion(spec, spec_lengths, sid_src=sid_src, sid_tgt=sid_target)[0, 0].data * self.hps.data.max_wav_value
            result = audio1.float().cpu().numpy()

        return result
@ -278,6 +260,18 @@ class MMVCv13:
            audio = self._pyTorch_inference(data)
        return audio

+    @classmethod
+    def loadModel2(cls, props: LoadModelParams2):
+        slotInfo: MMVCv13ModelSlot = MMVCv13ModelSlot()
+        for file in props.files:
+            if file.kind == "mmvcv13Model":
+                slotInfo.modelFile = file.name
+            elif file.kind == "mmvcv13Config":
+                slotInfo.configFile = file.name
+        slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx")
+        slotInfo.name = os.path.splitext(os.path.basename(slotInfo.modelFile))[0]
+        return slotInfo
+
    def __del__(self):
        del self.net_g
        del self.onnx_session
--- a/server/voice_changer/MMVCv15/MMVCv15.py
+++ b/server/voice_changer/MMVCv15/MMVCv15.py
@ -1,7 +1,8 @@
 import sys
 import os
+from data.ModelSlot import MMVCv15ModelSlot

-from voice_changer.utils.LoadModelParams import LoadModelParams
+from voice_changer.utils.LoadModelParams import LoadModelParams, LoadModelParams2
 from voice_changer.utils.VoiceChangerModel import AudioInOut

 if sys.platform.startswith("darwin"):
@ -172,12 +173,7 @@ class MMVCv15:
    def get_info(self):
        data = asdict(self.settings)

-        data["onnxExecutionProviders"] = (
-            self.onnx_session.get_providers()
-            if self.settings.onnxModelFile != ""
-            and self.settings.onnxModelFile is not None
-            else []
-        )
+        data["onnxExecutionProviders"] = self.onnx_session.get_providers() if self.settings.onnxModelFile != "" and self.settings.onnxModelFile is not None else []
        files = ["configFile", "pyTorchModelFile", "onnxModelFile"]
        for f in files:
            if data[f] is not None and os.path.exists(data[f]):
@ -195,9 +191,7 @@ class MMVCv15:
    def _get_f0(self, detector: str, newData: AudioInOut):
        audio_norm_np = newData.astype(np.float64)
        if detector == "dio":
-            _f0, _time = pw.dio(
-                audio_norm_np, self.hps.data.sampling_rate, frame_period=5.5
-            )
+            _f0, _time = pw.dio(audio_norm_np, self.hps.data.sampling_rate, frame_period=5.5)
            f0 = pw.stonemask(audio_norm_np, _f0, _time, self.hps.data.sampling_rate)
        else:
            f0, t = pw.harvest(
@ -207,9 +201,7 @@ class MMVCv15:
                f0_floor=71.0,
                f0_ceil=1000.0,
            )
-        f0 = convert_continuos_f0(
-            f0, int(audio_norm_np.shape[0] / self.hps.data.hop_length)
-        )
+        f0 = convert_continuos_f0(f0, int(audio_norm_np.shape[0] / self.hps.data.hop_length))
        f0 = torch.from_numpy(f0.astype(np.float32))
        return f0

@ -237,9 +229,7 @@ class MMVCv15:
        newData = newData.astype(np.float32) / self.hps.data.max_wav_value

        if self.audio_buffer is not None:
-            self.audio_buffer = np.concatenate(
-                [self.audio_buffer, newData], 0
-            )  # 過去のデータに連結
+            self.audio_buffer = np.concatenate([self.audio_buffer, newData], 0)  # 過去のデータに連結
        else:
            self.audio_buffer = newData

@ -248,9 +238,7 @@ class MMVCv15:
        # if convertSize < 8192:
        #     convertSize = 8192
        if convertSize % self.hps.data.hop_length != 0:  # モデルの出力のホップサイズで切り捨てが発生するので補う。
-            convertSize = convertSize + (
-                self.hps.data.hop_length - (convertSize % self.hps.data.hop_length)
-            )
+            convertSize = convertSize + (self.hps.data.hop_length - (convertSize % self.hps.data.hop_length))

        # ONNX は固定長
        if self.settings.framework == "ONNX":
@ -290,16 +278,15 @@ class MMVCv15:
                    "sid_src": sid_src.numpy(),
                    "sid_tgt": sid_tgt1.numpy(),
                },
-            )[0][0, 0]
+            )[
+                0
+            ][0, 0]
            * self.hps.data.max_wav_value
        )
        return audio1

    def _pyTorch_inference(self, data):
-        if (
-            self.settings.pyTorchModelFile == ""
-            or self.settings.pyTorchModelFile is None
-        ):
+        if self.settings.pyTorchModelFile == "" or self.settings.pyTorchModelFile is None:
            print("[Voice Changer] No pyTorch session.")
            raise NoModeLoadedException("pytorch")

@ -316,12 +303,7 @@ class MMVCv15:
            sid_src = sid_src.to(dev)
            sid_target = torch.LongTensor([self.settings.dstId]).to(dev)

-            audio1 = (
-                self.net_g.to(dev)
-                .voice_conversion(spec, spec_lengths, f0, sid_src, sid_target)[0, 0]
-                .data
-                * self.hps.data.max_wav_value
-            )
+            audio1 = self.net_g.to(dev).voice_conversion(spec, spec_lengths, f0, sid_src, sid_target)[0, 0].data * self.hps.data.max_wav_value
            result = audio1.float().cpu().numpy()
        return result

@ -336,6 +318,18 @@ class MMVCv15:
            print(_e)
            raise ONNXInputArgumentException()

+    @classmethod
+    def loadModel2(cls, props: LoadModelParams2):
+        slotInfo: MMVCv15ModelSlot = MMVCv15ModelSlot()
+        for file in props.files:
+            if file.kind == "mmvcv15Model":
+                slotInfo.modelFile = file.name
+            elif file.kind == "mmvcv15Config":
+                slotInfo.configFile = file.name
+        slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx")
+        slotInfo.name = os.path.splitext(os.path.basename(slotInfo.modelFile))[0]
+        return slotInfo
+
    def __del__(self):
        del self.net_g
        del self.onnx_session
--- a/server/voice_changer/SoVitsSvc40/SoVitsSvc40.py
+++ b/server/voice_changer/SoVitsSvc40/SoVitsSvc40.py
@ -1,7 +1,8 @@
 import sys
 import os
+from data.ModelSlot import SoVitsSvc40ModelSlot

-from voice_changer.utils.LoadModelParams import LoadModelParams
+from voice_changer.utils.LoadModelParams import LoadModelParams, LoadModelParams2
 from voice_changer.utils.VoiceChangerModel import AudioInOut
 from voice_changer.utils.VoiceChangerParams import VoiceChangerParams

@ -97,11 +98,7 @@ class SoVitsSvc40:
            self.settings.pyTorchModelFile = modelFile
            self.settings.onnxModelFile = None

-        clusterTorchModel = (
-            params["files"]["soVitsSvc40Cluster"]
-            if "soVitsSvc40Cluster" in params["files"]
-            else None
-        )
+        clusterTorchModel = params["files"]["soVitsSvc40Cluster"] if "soVitsSvc40Cluster" in params["files"] else None

        content_vec_path = self.params.content_vec_500
        content_vec_onnx_path = self.params.content_vec_500_onnx
@ -212,9 +209,7 @@ class SoVitsSvc40:
    def get_info(self):
        data = asdict(self.settings)

-        data["onnxExecutionProviders"] = (
-            self.onnx_session.get_providers() if self.onnx_session is not None else []
-        )
+        data["onnxExecutionProviders"] = self.onnx_session.get_providers() if self.onnx_session is not None else []
        files = ["configFile", "pyTorchModelFile", "onnxModelFile"]
        for f in files:
            if data[f] is not None and os.path.exists(data[f]):
@ -246,9 +241,7 @@ class SoVitsSvc40:
            )

        if wav_44k.shape[0] % self.hps.data.hop_length != 0:
-            print(
-                f" !!! !!! !!! wav size not multiple of hopsize: {wav_44k.shape[0] / self.hps.data.hop_length}"
-            )
+            print(f" !!! !!! !!! wav size not multiple of hopsize: {wav_44k.shape[0] / self.hps.data.hop_length}")

        f0, uv = utils.interpolate_f0(f0)
        f0 = torch.FloatTensor(f0)
@ -257,14 +250,10 @@ class SoVitsSvc40:
        f0 = f0.unsqueeze(0)
        uv = uv.unsqueeze(0)

-        wav16k_numpy = librosa.resample(
-            audio_buffer, orig_sr=self.hps.data.sampling_rate, target_sr=16000
-        )
+        wav16k_numpy = librosa.resample(audio_buffer, orig_sr=self.hps.data.sampling_rate, target_sr=16000)
        wav16k_tensor = torch.from_numpy(wav16k_numpy)

-        if (
-            self.settings.gpu < 0 or self.gpu_num == 0
-        ) or self.settings.framework == "ONNX":
+        if (self.settings.gpu < 0 or self.gpu_num == 0) or self.settings.framework == "ONNX":
            dev = torch.device("cpu")
        else:
            dev = torch.device("cuda", index=self.settings.gpu)
@ -282,44 +271,27 @@ class SoVitsSvc40:
            if self.hps.model.ssl_dim == 768:
                self.hubert_model = self.hubert_model.to(dev)
                wav16k_tensor = wav16k_tensor.to(dev)
-                c = get_hubert_content_layer9(
-                    self.hubert_model, wav_16k_tensor=wav16k_tensor
-                )
+                c = get_hubert_content_layer9(self.hubert_model, wav_16k_tensor=wav16k_tensor)
            else:
                self.hubert_model = self.hubert_model.to(dev)
                wav16k_tensor = wav16k_tensor.to(dev)
-                c = utils.get_hubert_content(
-                    self.hubert_model, wav_16k_tensor=wav16k_tensor
-                )
+                c = utils.get_hubert_content(self.hubert_model, wav_16k_tensor=wav16k_tensor)

        uv = uv.to(dev)
        f0 = f0.to(dev)

        c = utils.repeat_expand_2d(c.squeeze(0), f0.shape[1])

-        if (
-            self.settings.clusterInferRatio != 0
-            and hasattr(self, "cluster_model")
-            and self.cluster_model is not None
-        ):
-            speaker = [
-                key
-                for key, value in self.settings.speakers.items()
-                if value == self.settings.dstId
-            ]
+        if self.settings.clusterInferRatio != 0 and hasattr(self, "cluster_model") and self.cluster_model is not None:
+            speaker = [key for key, value in self.settings.speakers.items() if value == self.settings.dstId]
            if len(speaker) != 1:
                pass
                # print("not only one speaker found.", speaker)
            else:
-                cluster_c = cluster.get_cluster_center_result(
-                    self.cluster_model, c.cpu().numpy().T, speaker[0]
-                ).T
+                cluster_c = cluster.get_cluster_center_result(self.cluster_model, c.cpu().numpy().T, speaker[0]).T
                cluster_c = torch.FloatTensor(cluster_c).to(dev)
                c = c.to(dev)
-                c = (
-                    self.settings.clusterInferRatio * cluster_c
-                    + (1 - self.settings.clusterInferRatio) * c
-                )
+                c = self.settings.clusterInferRatio * cluster_c + (1 - self.settings.clusterInferRatio) * c

        c = c.unsqueeze(0)
        return c, f0, uv
@ -334,20 +306,14 @@ class SoVitsSvc40:
        newData = newData.astype(np.float32) / self.hps.data.max_wav_value

        if self.audio_buffer is not None:
-            self.audio_buffer = np.concatenate(
-                [self.audio_buffer, newData], 0
-            )  # 過去のデータに連結
+            self.audio_buffer = np.concatenate([self.audio_buffer, newData], 0)  # 過去のデータに連結
        else:
            self.audio_buffer = newData

-        convertSize = (
-            inputSize + crossfadeSize + solaSearchFrame + self.settings.extraConvertSize
-        )
+        convertSize = inputSize + crossfadeSize + solaSearchFrame + self.settings.extraConvertSize

        if convertSize % self.hps.data.hop_length != 0:  # モデルの出力のホップサイズで切り捨てが発生するので補う。
-            convertSize = convertSize + (
-                self.hps.data.hop_length - (convertSize % self.hps.data.hop_length)
-            )
+            convertSize = convertSize + (self.hps.data.hop_length - (convertSize % self.hps.data.hop_length))

        convertOffset = -1 * convertSize
        self.audio_buffer = self.audio_buffer[convertOffset:]  # 変換対象の部分だけ抽出
@ -389,9 +355,7 @@ class SoVitsSvc40:
                    "f0": f0.astype(np.float32),
                    "uv": uv.astype(np.float32),
                    "g": sid_target.astype(np.int64),
-                    "noise_scale": np.array([self.settings.noiseScale]).astype(
-                        np.float32
-                    ),
+                    "noise_scale": np.array([self.settings.noiseScale]).astype(np.float32),
                    # "predict_f0": np.array([self.settings.dstId]).astype(np.int64),
                },
            )[0][0, 0]
@ -457,6 +421,20 @@ class SoVitsSvc40:

        return audio

+    @classmethod
+    def loadModel2(cls, props: LoadModelParams2):
+        slotInfo: SoVitsSvc40ModelSlot = SoVitsSvc40ModelSlot()
+        for file in props.files:
+            if file.kind == "soVitsSvc40Config":
+                slotInfo.configFile = file.name
+            elif file.kind == "soVitsSvc40Model":
+                slotInfo.modelFile = file.name
+            elif file.kind == "soVitsSvc40Cluster":
+                slotInfo.clusterFile = file.name
+        slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx")
+        slotInfo.name = os.path.splitext(os.path.basename(slotInfo.modelFile))[0]
+        return slotInfo
+
    def __del__(self):
        del self.net_g
        del self.onnx_session
--- a/server/voice_changer/VoiceChangerManager.py
+++ b/server/voice_changer/VoiceChangerManager.py
@ -95,22 +95,48 @@ class VoiceChangerManager(ServerDeviceCallbacks):
            # Dataを展開
            params = LoadModelParams2(**paramDict)
            params.files = [LoadModelParamFile(**x) for x in paramDict["files"]]
+
            # ファイルをslotにコピー
            for file in params.files:
                print("FILE", file)
-                srcPath = os.path.join(UPLOAD_DIR, file.name)
-                dstDir = os.path.join(self.params.model_dir, str(params.slot))
+                srcPath = os.path.join(UPLOAD_DIR, file.dir, file.name)
+                dstDir = os.path.join(
+                    self.params.model_dir,
+                    str(params.slot),
+                    file.dir,
+                )
                dstPath = os.path.join(dstDir, file.name)
                os.makedirs(dstDir, exist_ok=True)
                print(f"move to {srcPath} -> {dstPath}")
                shutil.move(srcPath, dstPath)
                file.name = dstPath
+
            # メタデータ作成(各VCで定義)
            if params.voiceChangerType == "RVC":
                from voice_changer.RVC.RVC import RVC  # 起動時にインポートするとパラメータが取れない。

                slotInfo = RVC.loadModel2(params)
                self.modelSlotManager.save_model_slot(params.slot, slotInfo)
+            elif params.voiceChangerType == "MMVCv13":
+                from voice_changer.MMVCv13.MMVCv13 import MMVCv13
+
+                slotInfo = MMVCv13.loadModel2(params)
+                self.modelSlotManager.save_model_slot(params.slot, slotInfo)
+            elif params.voiceChangerType == "MMVCv15":
+                from voice_changer.MMVCv15.MMVCv15 import MMVCv15
+
+                slotInfo = MMVCv15.loadModel2(params)
+                self.modelSlotManager.save_model_slot(params.slot, slotInfo)
+            elif params.voiceChangerType == "so-vits-svc-40":
+                from voice_changer.SoVitsSvc40.SoVitsSvc40 import SoVitsSvc40
+
+                slotInfo = SoVitsSvc40.loadModel2(params)
+                self.modelSlotManager.save_model_slot(params.slot, slotInfo)
+            elif params.voiceChangerType == "DDSP-SVC":
+                from voice_changer.DDSP_SVC.DDSP_SVC import DDSP_SVC
+
+                slotInfo = DDSP_SVC.loadModel2(params)
+                self.modelSlotManager.save_model_slot(params.slot, slotInfo)
            print("params", params)

        else:
--- a/server/voice_changer/utils/LoadModelParams.py
+++ b/server/voice_changer/utils/LoadModelParams.py
@ -43,6 +43,7 @@ LoadModelParamFileKind: TypeAlias = Literal[
 class LoadModelParamFile:
    name: str
    kind: LoadModelParamFileKind
+    dir: str


@dataclass