Skip to content

Commit 753cfa8

Browse files
committed
add muon and mrd
1 parent bec245d commit 753cfa8

File tree

7 files changed

+1430
-80
lines changed

7 files changed

+1430
-80
lines changed

configs/nsf_hifigan_mrd.yaml

Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
# preprocessing
2+
base_config:
3+
- configs/base_hifi.yaml
4+
5+
data_input_path: []
6+
data_out_path: []
7+
val_num: 5
8+
9+
pe: 'parselmouth' # 'parselmouth' or 'harvest'
10+
f0_min: 65
11+
f0_max: 1100
12+
13+
aug_min: 0.9
14+
aug_max: 1.4
15+
aug_num: 1
16+
key_aug: false
17+
key_aug_prob: 0.5
18+
19+
pc_aug: false # pc-nsf training method
20+
pc_aug_rate: 0.4
21+
pc_aug_key: 12
22+
23+
use_stftloss: true
24+
loss_fft_sizes: [2048, 2048, 4096, 1024, 512, 256, 128,1024, 2048, 512]
25+
loss_hop_sizes: [512, 240, 480, 100, 50, 25, 12,120, 240, 50]
26+
loss_win_lengths: [2048, 1200, 2400, 480, 240, 120, 60,600, 1200, 240]
27+
lab_aux_melloss: 45
28+
lab_aux_stftloss: 2.5
29+
30+
raw_data_dir: []
31+
binary_data_dir: null
32+
binarization_args:
33+
num_workers: 8
34+
shuffle: true
35+
36+
DataIndexPath: data
37+
valid_set_name: valid
38+
train_set_name: train
39+
40+
41+
volume_aug: true
42+
volume_aug_prob: 0.5
43+
44+
45+
mel_vmin: -6. #-6.
46+
mel_vmax: 1.5
47+
48+
49+
audio_sample_rate: 44100
50+
audio_num_mel_bins: 128
51+
hop_size: 512 # Hop size.
52+
fft_size: 2048 # FFT size.
53+
win_size: 2048 # FFT size.
54+
fmin: 40
55+
fmax: 16000
56+
fmax_for_loss: null
57+
crop_mel_frames: 32
58+
59+
60+
61+
# global constants
62+
63+
64+
# neural networks
65+
66+
67+
#model_cls: training.nsf_HiFigan_task.nsf_HiFigan
68+
model_args:
69+
mini_nsf: true
70+
noise_sigma: 0.0
71+
upsample_rates: [ 8, 8, 2, 2, 2 ]
72+
upsample_kernel_sizes: [ 16,16, 4, 4, 4 ]
73+
upsample_initial_channel: 512
74+
resblock_kernel_sizes: [ 3,7,11 ]
75+
resblock_dilation_sizes: [ [ 1,3,5 ], [ 1,3,5 ], [ 1,3,5 ] ]
76+
discriminator_periods: [ 2, 3, 5, 7, 11]
77+
resblock: "1"
78+
79+
# training
80+
81+
task_cls: training.nsf_HiFigan_mrd_task.nsf_HiFigan
82+
83+
84+
#sort_by_len: true
85+
#optimizer_args:
86+
# optimizer_cls: torch.optim.AdamW
87+
# lr: 0.0001
88+
# beta1: 0.9
89+
# beta2: 0.98
90+
# weight_decay: 0
91+
#lab_aux_loss: 0.5
92+
discriminate_optimizer_args:
93+
optimizer_cls: modules.optimizer.muon.Muon_AdamW
94+
lr: 0.0002
95+
weight_decay: 0
96+
verbose: false
97+
98+
generater_optimizer_args:
99+
optimizer_cls: modules.optimizer.muon.Muon_AdamW
100+
lr: 0.0002
101+
weight_decay: 0
102+
verbose: false
103+
104+
lr_scheduler_args:
105+
scheduler_cls: lr_scheduler.scheduler.WarmupLR
106+
warmup_steps: 5000
107+
min_lr: 0.00001
108+
109+
clip_grad_norm: 1
110+
accumulate_grad_batches: 1
111+
sampler_frame_count_grid: 6
112+
ds_workers: 4
113+
dataloader_prefetch_factor: 2
114+
115+
batch_size: 16
116+
117+
118+
119+
num_valid_plots: 100
120+
log_interval: 100
121+
num_sanity_val_steps: 2 # steps of validation at the beginning
122+
val_check_interval: 2000
123+
num_ckpt_keep: 5
124+
max_updates: 1000000
125+
permanent_ckpt_start: 200000
126+
permanent_ckpt_interval: 40000
127+
128+
###########
129+
# pytorch lightning
130+
# Read https://lightning.ai/docs/pytorch/stable/common/trainer.html#trainer-class-api for possible values
131+
###########
132+
pl_trainer_accelerator: 'auto'
133+
pl_trainer_devices: 'auto'
134+
pl_trainer_precision: '32-true'
135+
#pl_trainer_precision: 'bf16' #please do not use bf 16
136+
pl_trainer_num_nodes: 1
137+
pl_trainer_strategy:
138+
name: auto
139+
process_group_backend: nccl
140+
find_unused_parameters: true
141+
nccl_p2p: true
142+
seed: 114514
143+
144+
###########
145+
# finetune
146+
###########
147+
148+
finetune_enabled: false
149+
finetune_ckpt_path: ''
150+
finetune_ignored_params: []
151+
finetune_strict_shapes: true
152+
153+
freezing_enabled: false
154+
frozen_params: []

0 commit comments

Comments
 (0)