586 lines
15 KiB
TypeScript
Raw Normal View History

2023-01-05 02:28:36 +09:00
// (★1) chunk sizeは 128サンプル, 256byte(int16)と定義。
// (★2) 256byte(最低バッファサイズ256から間引いた個数x2byte)をchunkとして管理。
2023-01-12 03:49:22 +09:00
// 24000sample -> 1sec, 128sample(1chunk) -> 5.333msec
2023-01-12 04:52:01 +09:00
// 187.5chunk -> 1sec
2023-01-05 02:28:36 +09:00
2023-06-19 11:40:16 +09:00
export const VoiceChangerType = {
2024-02-28 23:23:22 +09:00
MMVCv15: "MMVCv15",
MMVCv13: "MMVCv13",
"so-vits-svc-40": "so-vits-svc-40",
"DDSP-SVC": "DDSP-SVC",
RVC: "RVC",
"Diffusion-SVC": "Diffusion-SVC",
Beatrice: "Beatrice",
LLVC: "LLVC",
WebModel: "WebModel",
EasyVC: "EasyVC",
2023-09-25 13:25:07 +09:00
} as const;
2024-02-28 23:23:22 +09:00
export type VoiceChangerType = (typeof VoiceChangerType)[keyof typeof VoiceChangerType];
2023-06-19 11:40:16 +09:00
2023-11-04 04:34:43 +09:00
export const StaticModel = {
2024-02-28 23:23:22 +09:00
BeatriceJVS: "Beatrice-JVS",
2023-11-04 04:34:43 +09:00
} as const;
export type StaticModel = (typeof StaticModel)[keyof typeof StaticModel];
///////////////////////
// サーバセッティング
///////////////////////
export const InputSampleRate = {
2024-02-28 23:23:22 +09:00
"48000": 48000,
"44100": 44100,
"24000": 24000,
2023-09-25 13:25:07 +09:00
} as const;
2024-02-28 23:23:22 +09:00
export type InputSampleRate = (typeof InputSampleRate)[keyof typeof InputSampleRate];
2023-04-08 04:39:04 +09:00
export const ModelSamplingRate = {
2024-02-28 23:23:22 +09:00
"48000": 48000,
"40000": 40000,
"32000": 32000,
2023-09-25 13:25:07 +09:00
} as const;
2024-02-28 23:23:22 +09:00
export type ModelSamplingRate = (typeof InputSampleRate)[keyof typeof InputSampleRate];
2023-04-08 04:39:04 +09:00
export const CrossFadeOverlapSize = {
2024-02-28 23:23:22 +09:00
"128": 128,
"256": 256,
"512": 512,
"1024": 1024,
"2048": 2048,
"4096": 4096,
2023-09-25 13:25:07 +09:00
} as const;
2024-02-28 23:23:22 +09:00
export type CrossFadeOverlapSize = (typeof CrossFadeOverlapSize)[keyof typeof CrossFadeOverlapSize];
2023-02-14 22:32:25 +09:00
export const F0Detector = {
2024-02-28 23:23:22 +09:00
dio: "dio",
harvest: "harvest",
crepe: "crepe",
crepe_full: "crepe_full",
crepe_tiny: "crepe_tiny",
rmvpe: "rmvpe",
rmvpe_onnx: "rmvpe_onnx",
fcpe: "fcpe",
2023-09-25 13:25:07 +09:00
} as const;
export type F0Detector = (typeof F0Detector)[keyof typeof F0Detector];
2023-05-14 01:04:29 +09:00
export const DiffMethod = {
2024-02-28 23:23:22 +09:00
pndm: "pndm",
"dpm-solver": "dpm-solver",
2023-09-25 13:25:07 +09:00
} as const;
export type DiffMethod = (typeof DiffMethod)[keyof typeof DiffMethod];
2023-02-19 19:22:00 +09:00
2023-06-19 11:40:16 +09:00
export const RVCModelType = {
2024-02-28 23:23:22 +09:00
pyTorchRVC: "pyTorchRVC",
pyTorchRVCNono: "pyTorchRVCNono",
pyTorchRVCv2: "pyTorchRVCv2",
pyTorchRVCv2Nono: "pyTorchRVCv2Nono",
pyTorchWebUI: "pyTorchWebUI",
pyTorchWebUINono: "pyTorchWebUINono",
onnxRVC: "onnxRVC",
onnxRVCNono: "onnxRVCNono",
2023-09-25 13:25:07 +09:00
} as const;
export type RVCModelType = (typeof RVCModelType)[keyof typeof RVCModelType];
2023-02-19 19:22:00 +09:00
export const ServerSettingKey = {
2024-02-28 23:23:22 +09:00
passThrough: "passThrough",
srcId: "srcId",
dstId: "dstId",
gpu: "gpu",
crossFadeOffsetRate: "crossFadeOffsetRate",
crossFadeEndRate: "crossFadeEndRate",
crossFadeOverlapSize: "crossFadeOverlapSize",
framework: "framework",
onnxExecutionProvider: "onnxExecutionProvider",
f0Factor: "f0Factor",
f0Detector: "f0Detector",
recordIO: "recordIO",
enableServerAudio: "enableServerAudio",
serverAudioStated: "serverAudioStated",
serverAudioSampleRate: "serverAudioSampleRate",
serverInputAudioSampleRate: "serverInputAudioSampleRate",
serverOutputAudioSampleRate: "serverOutputAudioSampleRate",
serverMonitorAudioSampleRate: "serverMonitorAudioSampleRate",
serverInputAudioBufferSize: "serverInputAudioBufferSize",
serverOutputAudioBufferSize: "serverOutputAudioBufferSize",
serverInputDeviceId: "serverInputDeviceId",
serverOutputDeviceId: "serverOutputDeviceId",
serverMonitorDeviceId: "serverMonitorDeviceId",
serverReadChunkSize: "serverReadChunkSize",
serverInputAudioGain: "serverInputAudioGain",
serverOutputAudioGain: "serverOutputAudioGain",
serverMonitorAudioGain: "serverMonitorAudioGain",
tran: "tran",
noiseScale: "noiseScale",
predictF0: "predictF0",
silentThreshold: "silentThreshold",
extraConvertSize: "extraConvertSize",
clusterInferRatio: "clusterInferRatio",
indexRatio: "indexRatio",
protect: "protect",
rvcQuality: "rvcQuality",
modelSamplingRate: "modelSamplingRate",
silenceFront: "silenceFront",
modelSlotIndex: "modelSlotIndex",
useEnhancer: "useEnhancer",
useDiff: "useDiff",
// "useDiffDpm": "useDiffDpm",
diffMethod: "diffMethod",
useDiffSilence: "useDiffSilence",
diffAcc: "diffAcc",
diffSpkId: "diffSpkId",
kStep: "kStep",
threshold: "threshold",
speedUp: "speedUp",
skipDiffusion: "skipDiffusion",
inputSampleRate: "inputSampleRate",
enableDirectML: "enableDirectML",
2023-09-25 13:25:07 +09:00
} as const;
2024-02-28 23:23:22 +09:00
export type ServerSettingKey = (typeof ServerSettingKey)[keyof typeof ServerSettingKey];
export type VoiceChangerServerSetting = {
2024-02-28 23:23:22 +09:00
passThrough: boolean;
srcId: number;
dstId: number;
gpu: number;
crossFadeOffsetRate: number;
crossFadeEndRate: number;
crossFadeOverlapSize: CrossFadeOverlapSize;
f0Factor: number;
f0Detector: F0Detector; // dio or harvest
recordIO: number; // 0:off, 1:on
enableServerAudio: number; // 0:off, 1:on
serverAudioStated: number; // 0:off, 1:on
serverAudioSampleRate: number;
serverInputAudioSampleRate: number;
serverOutputAudioSampleRate: number;
serverMonitorAudioSampleRate: number;
serverInputAudioBufferSize: number;
serverOutputAudioBufferSize: number;
serverInputDeviceId: number;
serverOutputDeviceId: number;
serverMonitorDeviceId: number;
serverReadChunkSize: number;
serverInputAudioGain: number;
serverOutputAudioGain: number;
serverMonitorAudioGain: number;
tran: number; // so-vits-svc
noiseScale: number; // so-vits-svc
predictF0: number; // so-vits-svc
silentThreshold: number; // so-vits-svc
extraConvertSize: number; // so-vits-svc
clusterInferRatio: number; // so-vits-svc
indexRatio: number; // RVC
protect: number; // RVC
rvcQuality: number; // 0:low, 1:high
silenceFront: number; // 0:off, 1:on
modelSamplingRate: ModelSamplingRate; // 32000,40000,48000
modelSlotIndex: number | StaticModel;
useEnhancer: number; // DDSP-SVC
useDiff: number; // DDSP-SVC
// useDiffDpm: number// DDSP-SVC
diffMethod: DiffMethod; // DDSP-SVC
useDiffSilence: number; // DDSP-SVC
diffAcc: number; // DDSP-SVC
diffSpkId: number; // DDSP-SVC
kStep: number; // DDSP-SVC
threshold: number; // DDSP-SVC
speedUp: number; // Diffusion-SVC
skipDiffusion: number; // Diffusion-SVC 0:off, 1:on
inputSampleRate: InputSampleRate;
enableDirectML: number;
2023-09-25 13:25:07 +09:00
};
2023-01-05 02:28:36 +09:00
type ModelSlot = {
2024-02-28 23:23:22 +09:00
slotIndex: number | StaticModel;
voiceChangerType: VoiceChangerType;
name: string;
description: string;
credit: string;
termsOfUseUrl: string;
iconFile: string;
speakers: { [key: number]: string };
2023-09-25 13:25:07 +09:00
};
2023-06-19 11:40:16 +09:00
export type RVCModelSlot = ModelSlot & {
2024-02-28 23:23:22 +09:00
modelFile: string;
indexFile: string;
defaultIndexRatio: number;
defaultProtect: number;
defaultTune: number;
modelType: RVCModelType;
embChannels: number;
f0: boolean;
samplingRate: number;
deprecated: boolean;
2023-09-25 13:25:07 +09:00
};
2023-06-21 03:32:18 +09:00
export type MMVCv13ModelSlot = ModelSlot & {
2024-02-28 23:23:22 +09:00
modelFile: string;
configFile: string;
srcId: number;
dstId: number;
2023-06-21 03:32:18 +09:00
2024-02-28 23:23:22 +09:00
samplingRate: number;
speakers: { [key: number]: string };
2023-09-25 13:25:07 +09:00
};
2023-06-21 03:32:18 +09:00
export type MMVCv15ModelSlot = ModelSlot & {
2024-02-28 23:23:22 +09:00
modelFile: string;
configFile: string;
srcId: number;
dstId: number;
f0Factor: number;
samplingRate: number;
f0: { [key: number]: number };
2023-09-25 13:25:07 +09:00
};
2023-06-21 03:32:18 +09:00
export type SoVitsSvc40ModelSlot = ModelSlot & {
2024-02-28 23:23:22 +09:00
modelFile: string;
configFile: string;
clusterFile: string;
dstId: number;
2023-06-21 03:32:18 +09:00
2024-02-28 23:23:22 +09:00
samplingRate: number;
2023-06-21 03:32:18 +09:00
2024-02-28 23:23:22 +09:00
defaultTune: number;
defaultClusterInferRatio: number;
noiseScale: number;
speakers: { [key: number]: string };
2023-09-25 13:25:07 +09:00
};
2023-06-21 03:32:18 +09:00
export type DDSPSVCModelSlot = ModelSlot & {
2024-02-28 23:23:22 +09:00
modelFile: string;
configFile: string;
diffModelFile: string;
diffConfigFile: string;
dstId: number;
samplingRate: number;
defaultTune: number;
enhancer: boolean;
diffusion: boolean;
acc: number;
kstep: number;
speakers: { [key: number]: string };
2023-09-25 13:25:07 +09:00
};
2023-07-15 18:35:11 +09:00
export type DiffusionSVCModelSlot = ModelSlot & {
2024-02-28 23:23:22 +09:00
modelFile: string;
dstId: number;
samplingRate: number;
defaultTune: number;
defaultKstep: number;
defaultSpeedup: number;
kStepMax: number;
nLayers: number;
nnLayers: number;
speakers: { [key: number]: string };
2023-09-25 13:25:07 +09:00
};
2023-08-09 16:55:59 +09:00
export type BeatriceModelSlot = ModelSlot & {
2024-02-28 23:23:22 +09:00
modelFile: string;
dstId: number;
2023-08-09 16:55:59 +09:00
2024-02-28 23:23:22 +09:00
speakers: { [key: number]: string };
2023-09-25 13:25:07 +09:00
};
2023-08-09 16:55:59 +09:00
2023-11-12 23:10:58 +09:00
export type LLVCModelSlot = ModelSlot & {
2024-02-28 23:23:22 +09:00
modelFile: string;
configFile: string;
2023-11-12 23:10:58 +09:00
2024-02-28 23:23:22 +09:00
speakers: { [key: number]: string };
2023-11-12 23:10:58 +09:00
};
export type WebModelSlot = ModelSlot & {
2024-02-28 23:23:22 +09:00
modelFile: string;
defaultTune: number;
modelType: RVCModelType;
f0: boolean;
samplingRate: number;
};
2024-02-28 23:23:22 +09:00
export type ModelSlotUnion = RVCModelSlot | MMVCv13ModelSlot | MMVCv15ModelSlot | SoVitsSvc40ModelSlot | DDSPSVCModelSlot | DiffusionSVCModelSlot | BeatriceModelSlot | LLVCModelSlot | WebModelSlot;
2023-06-19 11:40:16 +09:00
2023-05-07 04:18:18 +09:00
type ServerAudioDevice = {
2024-02-28 23:23:22 +09:00
kind: "audioinput" | "audiooutput";
index: number;
name: string;
hostAPI: string;
2023-09-25 13:25:07 +09:00
};
2023-05-07 04:18:18 +09:00
export type ServerInfo = VoiceChangerServerSetting & {
2024-02-28 23:23:22 +09:00
// コンフィグ対象外 (getInfoで取得のみ可能な情報)
status: string;
modelSlots: ModelSlotUnion[];
serverAudioInputDevices: ServerAudioDevice[];
serverAudioOutputDevices: ServerAudioDevice[];
sampleModels: (RVCSampleModel | DiffusionSVCSampleModel)[];
gpus: {
id: number;
name: string;
memory: number;
}[];
maxInputLength: number; // MMVCv15
voiceChangerParams: {
model_dir: string;
};
2023-09-25 13:25:07 +09:00
};
2023-07-21 18:25:28 +09:00
export type SampleModel = {
2024-02-28 23:23:22 +09:00
id: string;
voiceChangerType: VoiceChangerType;
lang: string;
tag: string[];
name: string;
modelUrl: string;
termsOfUseUrl: string;
icon: string;
credit: string;
description: string;
sampleRate: number;
modelType: string;
f0: boolean;
2023-09-25 13:25:07 +09:00
};
export type RVCSampleModel = SampleModel & {
2024-02-28 23:23:22 +09:00
indexUrl: string;
featureUrl: string;
2023-09-25 13:25:07 +09:00
};
export type DiffusionSVCSampleModel = SampleModel & {
2024-02-28 23:23:22 +09:00
numOfDiffLayers: number;
numOfNativeLayers: number;
maxKStep: number;
2023-09-25 13:25:07 +09:00
};
2023-07-21 18:25:28 +09:00
2023-04-20 17:17:43 +09:00
export const DefaultServerSetting: ServerInfo = {
2024-02-28 23:23:22 +09:00
// VC Common
passThrough: false,
inputSampleRate: 48000,
crossFadeOffsetRate: 0.0,
crossFadeEndRate: 1.0,
crossFadeOverlapSize: CrossFadeOverlapSize[1024],
recordIO: 0,
enableServerAudio: 0,
serverAudioStated: 0,
serverAudioSampleRate: 48000,
serverInputAudioSampleRate: 48000,
serverOutputAudioSampleRate: 48000,
serverMonitorAudioSampleRate: 48000,
serverInputAudioBufferSize: 1024 * 24,
serverOutputAudioBufferSize: 1024 * 24,
serverInputDeviceId: -1,
serverOutputDeviceId: -1,
serverMonitorDeviceId: -1,
serverReadChunkSize: 256,
serverInputAudioGain: 1.0,
serverOutputAudioGain: 1.0,
serverMonitorAudioGain: 1.0,
// VC Specific
srcId: 0,
dstId: 1,
gpu: 0,
f0Factor: 1.0,
f0Detector: F0Detector.rmvpe_onnx,
tran: 0,
noiseScale: 0,
predictF0: 0,
silentThreshold: 0,
extraConvertSize: 0,
clusterInferRatio: 0,
indexRatio: 0,
protect: 0.5,
rvcQuality: 0,
modelSamplingRate: 48000,
silenceFront: 1,
modelSlotIndex: 0,
sampleModels: [],
gpus: [],
useEnhancer: 0,
useDiff: 1,
diffMethod: "dpm-solver",
useDiffSilence: 0,
diffAcc: 20,
diffSpkId: 1,
kStep: 120,
threshold: -45,
speedUp: 10,
skipDiffusion: 1,
enableDirectML: 0,
//
status: "ok",
modelSlots: [],
serverAudioInputDevices: [],
serverAudioOutputDevices: [],
maxInputLength: 128 * 2048,
voiceChangerParams: {
model_dir: "",
},
2023-09-25 13:25:07 +09:00
};
2023-04-06 04:03:09 +09:00
///////////////////////
// Workletセッティング
///////////////////////
2023-01-12 04:52:01 +09:00
export type WorkletSetting = {
2024-02-28 23:23:22 +09:00
numTrancateTreshold: number;
volTrancateThreshold: number;
volTrancateLength: number;
2023-09-25 13:25:07 +09:00
};
2023-02-19 14:20:37 +09:00
///////////////////////
2023-02-20 02:21:51 +09:00
// Worklet Nodeセッティング
2023-02-19 14:20:37 +09:00
///////////////////////
2023-01-05 11:45:42 +09:00
export const Protocol = {
2024-02-28 23:23:22 +09:00
sio: "sio",
rest: "rest",
internal: "internal",
2023-09-25 13:25:07 +09:00
} as const;
export type Protocol = (typeof Protocol)[keyof typeof Protocol];
2023-01-05 02:28:36 +09:00
2023-02-19 14:20:37 +09:00
export const SendingSampleRate = {
2024-02-28 23:23:22 +09:00
"48000": 48000,
"44100": 44100,
"24000": 24000,
2023-09-25 13:25:07 +09:00
} as const;
2024-02-28 23:23:22 +09:00
export type SendingSampleRate = (typeof SendingSampleRate)[keyof typeof SendingSampleRate];
2023-01-05 02:28:36 +09:00
2023-02-14 22:32:25 +09:00
export const DownSamplingMode = {
2024-02-28 23:23:22 +09:00
decimate: "decimate",
average: "average",
2023-09-25 13:25:07 +09:00
} as const;
2024-02-28 23:23:22 +09:00
export type DownSamplingMode = (typeof DownSamplingMode)[keyof typeof DownSamplingMode];
2023-02-19 14:20:37 +09:00
2023-02-20 02:21:51 +09:00
export type WorkletNodeSetting = {
2024-02-28 23:23:22 +09:00
serverUrl: string;
protocol: Protocol;
sendingSampleRate: SendingSampleRate;
inputChunkNum: number;
downSamplingMode: DownSamplingMode;
2023-09-25 13:25:07 +09:00
};
2023-04-06 04:03:09 +09:00
2023-02-19 14:20:37 +09:00
///////////////////////
// クライアントセッティング
///////////////////////
2023-01-05 02:28:36 +09:00
export const SampleRate = {
2024-02-28 23:23:22 +09:00
"48000": 48000,
2023-09-25 13:25:07 +09:00
} as const;
export type SampleRate = (typeof SampleRate)[keyof typeof SampleRate];
2023-01-05 02:28:36 +09:00
2023-02-19 14:20:37 +09:00
export type VoiceChangerClientSetting = {
2024-02-28 23:23:22 +09:00
audioInput: string | MediaStream | null;
sampleRate: SampleRate; // 48000Hz
echoCancel: boolean;
noiseSuppression: boolean;
noiseSuppression2: boolean;
2023-02-19 14:20:37 +09:00
2024-02-28 23:23:22 +09:00
inputGain: number;
outputGain: number;
monitorGain: number;
2023-08-26 12:32:12 +09:00
2024-02-28 23:23:22 +09:00
passThroughConfirmationSkip: boolean;
2023-09-25 13:25:07 +09:00
};
2023-01-05 11:45:42 +09:00
2023-06-25 16:39:18 +09:00
///////////////////////
// Client セッティング
///////////////////////
export type ClientSetting = {
2024-02-28 23:23:22 +09:00
workletSetting: WorkletSetting;
workletNodeSetting: WorkletNodeSetting;
voiceChangerClientSetting: VoiceChangerClientSetting;
2023-09-25 13:25:07 +09:00
};
2023-06-25 16:39:18 +09:00
export const DefaultClientSettng: ClientSetting = {
2024-02-28 23:23:22 +09:00
workletSetting: {
// numTrancateTreshold: 512 * 2,
numTrancateTreshold: 100,
volTrancateThreshold: 0.0005,
volTrancateLength: 32,
},
workletNodeSetting: {
serverUrl: "",
protocol: "sio",
sendingSampleRate: 48000,
inputChunkNum: 192,
downSamplingMode: "average",
},
voiceChangerClientSetting: {
audioInput: null,
sampleRate: 48000,
echoCancel: false,
noiseSuppression: false,
noiseSuppression2: false,
inputGain: 1.0,
outputGain: 1.0,
monitorGain: 1.0,
passThroughConfirmationSkip: false,
},
2023-09-25 13:25:07 +09:00
};
2023-01-07 20:07:39 +09:00
2023-02-19 14:20:37 +09:00
////////////////////////////////////
// Exceptions
////////////////////////////////////
2023-01-05 11:45:42 +09:00
export const VOICE_CHANGER_CLIENT_EXCEPTION = {
2024-02-28 23:23:22 +09:00
ERR_SIO_CONNECT_FAILED: "ERR_SIO_CONNECT_FAILED",
ERR_SIO_INVALID_RESPONSE: "ERR_SIO_INVALID_RESPONSE",
ERR_REST_INVALID_RESPONSE: "ERR_REST_INVALID_RESPONSE",
ERR_MIC_STREAM_NOT_INITIALIZED: "ERR_MIC_STREAM_NOT_INITIALIZED",
ERR_INTERNAL_AUDIO_PROCESS_CALLBACK_IS_NOT_INITIALIZED: "ERR_INTERNAL_AUDIO_PROCESS_CALLBACK_IS_NOT_INITIALIZED",
2023-09-25 13:25:07 +09:00
} as const;
2024-02-28 23:23:22 +09:00
export type VOICE_CHANGER_CLIENT_EXCEPTION = (typeof VOICE_CHANGER_CLIENT_EXCEPTION)[keyof typeof VOICE_CHANGER_CLIENT_EXCEPTION];
2023-01-05 02:28:36 +09:00
2023-01-29 09:42:45 +09:00
////////////////////////////////////
// indexedDB
////////////////////////////////////
2023-09-25 13:25:07 +09:00
export const INDEXEDDB_DB_APP_NAME = "INDEXEDDB_KEY_VOICE_CHANGER";
export const INDEXEDDB_DB_NAME = "INDEXEDDB_KEY_VOICE_CHANGER_DB";
export const INDEXEDDB_KEY_CLIENT = "INDEXEDDB_KEY_VOICE_CHANGER_LIB_CLIENT";
export const INDEXEDDB_KEY_SERVER = "INDEXEDDB_KEY_VOICE_CHANGER_LIB_SERVER";
2024-02-28 23:23:22 +09:00
export const INDEXEDDB_KEY_MODEL_DATA = "INDEXEDDB_KEY_VOICE_CHANGER_LIB_MODEL_DATA";
2023-01-29 15:25:44 +09:00
2023-04-13 08:00:28 +09:00
// ONNX
export type OnnxExporterInfo = {
2024-02-28 23:23:22 +09:00
status: string;
path: string;
filename: string;
2023-09-25 13:25:07 +09:00
};
2023-04-21 15:48:12 +09:00
2023-05-01 02:34:01 +09:00
// Merge
export type MergeElement = {
2024-02-28 23:23:22 +09:00
slotIndex: number;
strength: number;
2023-09-25 13:25:07 +09:00
};
2023-05-01 02:34:01 +09:00
export type MergeModelRequest = {
2024-02-28 23:23:22 +09:00
voiceChangerType: VoiceChangerType;
command: "mix";
files: MergeElement[];
2023-09-25 13:25:07 +09:00
};