2023-06-18 22:23:50 +09:00
|
|
|
from typing import *
|
|
|
|
|
|
|
|
from pydantic import BaseModel
|
|
|
|
|
|
|
|
|
|
|
|
class TrainConfigTrain(BaseModel):
|
|
|
|
log_interval: int
|
|
|
|
seed: int
|
|
|
|
epochs: int
|
|
|
|
learning_rate: float
|
|
|
|
betas: List[float]
|
|
|
|
eps: float
|
|
|
|
batch_size: int
|
|
|
|
fp16_run: bool
|
|
|
|
lr_decay: float
|
|
|
|
segment_size: int
|
|
|
|
init_lr_ratio: int
|
|
|
|
warmup_epochs: int
|
|
|
|
c_mel: int
|
|
|
|
c_kl: float
|
|
|
|
|
|
|
|
|
|
|
|
class TrainConfigData(BaseModel):
|
|
|
|
max_wav_value: float
|
|
|
|
sampling_rate: int
|
|
|
|
filter_length: int
|
|
|
|
hop_length: int
|
|
|
|
win_length: int
|
|
|
|
n_mel_channels: int
|
|
|
|
mel_fmin: float
|
|
|
|
mel_fmax: Any
|
|
|
|
|
|
|
|
|
|
|
|
class TrainConfigModel(BaseModel):
|
2023-06-24 10:33:57 +09:00
|
|
|
emb_channels: int
|
2023-06-18 22:23:50 +09:00
|
|
|
inter_channels: int
|
|
|
|
n_layers: int
|
|
|
|
upsample_rates: List[int]
|
|
|
|
use_spectral_norm: bool
|
|
|
|
gin_channels: int
|
|
|
|
spk_embed_dim: int
|
|
|
|
|
|
|
|
|
|
|
|
class TrainConfig(BaseModel):
|
2023-06-24 10:33:57 +09:00
|
|
|
version: Literal["voras"] = "voras"
|
2023-06-18 22:23:50 +09:00
|
|
|
train: TrainConfigTrain
|
|
|
|
data: TrainConfigData
|
|
|
|
model: TrainConfigModel
|
|
|
|
|
|
|
|
|
|
|
|
class DatasetMetaItem(BaseModel):
|
|
|
|
gt_wav: str
|
|
|
|
co256: str
|
|
|
|
f0: Optional[str]
|
|
|
|
f0nsf: Optional[str]
|
|
|
|
speaker_id: int
|
|
|
|
|
|
|
|
|
|
|
|
class DatasetMetadata(BaseModel):
|
|
|
|
files: Dict[str, DatasetMetaItem]
|
|
|
|
# mute: DatasetMetaItem
|