72 lines
1.4 KiB
Python
72 lines
1.4 KiB
Python
from typing import *
|
|
|
|
from pydantic import BaseModel
|
|
|
|
|
|
class TrainConfigTrain(BaseModel):
|
|
log_interval: int
|
|
seed: int
|
|
epochs: int
|
|
learning_rate: float
|
|
betas: List[float]
|
|
eps: float
|
|
batch_size: int
|
|
fp16_run: bool
|
|
lr_decay: float
|
|
segment_size: int
|
|
init_lr_ratio: int
|
|
warmup_epochs: int
|
|
c_mel: int
|
|
c_kl: float
|
|
|
|
|
|
class TrainConfigData(BaseModel):
|
|
max_wav_value: float
|
|
sampling_rate: int
|
|
filter_length: int
|
|
hop_length: int
|
|
win_length: int
|
|
n_mel_channels: int
|
|
mel_fmin: float
|
|
mel_fmax: Any
|
|
|
|
|
|
class TrainConfigModel(BaseModel):
|
|
inter_channels: int
|
|
hidden_channels: int
|
|
filter_channels: int
|
|
n_heads: int
|
|
n_layers: int
|
|
kernel_size: int
|
|
p_dropout: int
|
|
resblock: str
|
|
resblock_kernel_sizes: List[int]
|
|
resblock_dilation_sizes: List[List[int]]
|
|
upsample_rates: List[int]
|
|
upsample_initial_channel: int
|
|
upsample_kernel_sizes: List[int]
|
|
use_spectral_norm: bool
|
|
gin_channels: int
|
|
emb_channels: int
|
|
spk_embed_dim: int
|
|
|
|
|
|
class TrainConfig(BaseModel):
|
|
version: Literal["v1", "v2"] = "v2"
|
|
train: TrainConfigTrain
|
|
data: TrainConfigData
|
|
model: TrainConfigModel
|
|
|
|
|
|
class DatasetMetaItem(BaseModel):
|
|
gt_wav: str
|
|
co256: str
|
|
f0: Optional[str]
|
|
f0nsf: Optional[str]
|
|
speaker_id: int
|
|
|
|
|
|
class DatasetMetadata(BaseModel):
|
|
files: Dict[str, DatasetMetaItem]
|
|
# mute: DatasetMetaItem
|