verstar commited on
Commit
4bafd53
·
verified ·
1 Parent(s): e23c915

Delete bilingual

Browse files
bilingual/stage1/model_ckpt_steps_200000.ckpt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:0cd5e5f2410c4b0bfab2ac8593faeb9e28337d318ba6c75e33fc4c8d3f0f0773
3
- size 304957924
 
 
 
 
bilingual/stage2/config.yaml DELETED
@@ -1,398 +0,0 @@
1
- K_step: 100
2
- accumulate_grad_batches: 1
3
- amp: false
4
- audio_num_mel_bins: 80
5
- audio_sample_rate: 48000
6
- base_config:
7
- - egs/stage1.yaml
8
- binarization_args:
9
- min_sil_duration: 0.1
10
- reset_phone_dict: true
11
- reset_word_dict: true
12
- shuffle: false
13
- test_range:
14
- - 0
15
- - 100
16
- train_range:
17
- - 200
18
- - -1
19
- trim_eos_bos: false
20
- trim_sil: false
21
- valid_range:
22
- - 100
23
- - 200
24
- with_align: true
25
- with_f0: true
26
- with_f0cwt: false
27
- with_linear: false
28
- with_spk_embed: false
29
- with_spk_id: true
30
- with_txt: true
31
- with_wav: true
32
- with_word: true
33
- binarizer_cls: data_gen.tech_binarizer.TechBinarizer
34
- binary_data_dir: data/binary/bilingual
35
- cfg_scale: 1.0
36
- check_val_every_n_epoch: 10
37
- clip_grad_norm: 1
38
- clip_grad_value: 0
39
- conv_use_pos: false
40
- debug: false
41
- dec_dilations:
42
- - 1
43
- - 1
44
- - 1
45
- - 1
46
- dec_ffn_kernel_size: 9
47
- dec_inp_add_noise: false
48
- dec_kernel_size: 5
49
- dec_layers: 4
50
- dec_post_net_kernel: 3
51
- decay_steps: 50000
52
- decoder_rnn_dim: 0
53
- decoder_type: conv
54
- diff_decoder_type: wavenet
55
- diff_loss_type: l1
56
- dilation_cycle_length: 4
57
- drop_tech_prob: 0.2
58
- dropout: 0.0
59
- ds_workers: 4
60
- dur_level: ph
61
- dur_loss: mse
62
- dur_predictor_kernel: 3
63
- dur_predictor_layers: 2
64
- enc_dec_norm: ln
65
- enc_dilations:
66
- - 1
67
- - 1
68
- - 1
69
- - 1
70
- enc_ffn_kernel_size: 9
71
- enc_kernel_size: 5
72
- enc_layers: 4
73
- enc_post_net_kernel: 3
74
- enc_pre_ln: true
75
- enc_prenet: true
76
- encoder_K: 8
77
- encoder_type: rel_fft
78
- endless_ds: true
79
- eval_max_batches: -1
80
- f0_K_step: 100
81
- f0_dilation_cycle_length: 4
82
- f0_gen: flow
83
- f0_infer_with_ref: false
84
- f0_max: 1000
85
- f0_max_beta: 0.06
86
- f0_min: 50
87
- f0_residual_channels: 192
88
- f0_residual_layers: 10
89
- f0_sample_clip: true
90
- f0_timesteps: 1000
91
- ffn_act: gelu
92
- ffn_hidden_size: 1024
93
- fft_size: 1024
94
- flow_decoder_type: wavenet
95
- flow_loss_type: l1
96
- flow_qsample: direct
97
- fmax: 24000
98
- fmin: 20
99
- frames_multiple: 1
100
- fs2_ckpt_dir: checkpoints/stage1
101
- gaussian_start: false
102
- gen_dir_name: ''
103
- griffin_lim_iters: 30
104
- hidden_size: 256
105
- hop_size: 256
106
- infer: false
107
- keep_bins: 80
108
- lambda_commit: 0.25
109
- lambda_energy: 0.1
110
- lambda_f0: 1.0
111
- lambda_ph_dur: 1.0
112
- lambda_sent_dur: 0.0
113
- lambda_uv: 1.0
114
- lambda_word_dur: 0.0
115
- layers_in_block: 2
116
- load_ckpt: ''
117
- loud_norm: false
118
- lr: 0.001
119
- max_beta: 0.06
120
- max_epochs: 1000
121
- max_frames: 3000
122
- max_input_tokens: 1550
123
- max_sentences: 16
124
- max_tokens: 24000
125
- max_updates: 160000
126
- max_valid_sentences: 1
127
- max_valid_tokens: 60000
128
- mel_loss_scale: direct
129
- mel_losses: l1:0.5|ssim:0.5
130
- mel_vmax: 1.5
131
- mel_vmin: -6
132
- min_frames: 0
133
- num_ckpt_keep: 3
134
- num_heads: 2
135
- num_sanity_val_steps: 0
136
- num_spk: 160
137
- num_valid_plots: 10
138
- optimizer_adam_beta1: 0.9
139
- optimizer_adam_beta2: 0.98
140
- out_wav_norm: false
141
- pitch_extractor: parselmouth
142
- pitch_key: pitch
143
- pitch_type: frame
144
- predictor_dropout: 0.0
145
- predictor_grad: 1.0
146
- predictor_hidden: -1
147
- predictor_kernel: 5
148
- predictor_layers: 5
149
- preprocess_args:
150
- add_eos_bos: true
151
- mfa_group_shuffle: false
152
- mfa_offset: 0.02
153
- nsample_per_mfa_group: 1000
154
- reset_phone_dict: true
155
- reset_word_dict: true
156
- save_sil_mask: true
157
- txt_processor: en
158
- use_mfa: true
159
- vad_max_silence_length: 12
160
- wav_processors: []
161
- with_phsep: true
162
- preprocess_cls: ''
163
- print_nan_grads: false
164
- processed_data_dir: data/processed/bilingual
165
- profile_infer: false
166
- raw_data_dir: ''
167
- ref_norm_layer: bn
168
- rename_tmux: true
169
- residual_channels: 256
170
- residual_layers: 20
171
- resume_from_checkpoint: 0
172
- save_best: false
173
- save_codes:
174
- - tasks
175
- - modules
176
- - egs
177
- save_f0: false
178
- save_gt: true
179
- schedule_type: linear
180
- scheduler: warmup
181
- seed: 1234
182
- sort_by_len: true
183
- spec_max:
184
- - 0.03640973940491676
185
- - 0.039425432682037354
186
- - 0.29524752497673035
187
- - 0.45784831047058105
188
- - 0.48333120346069336
189
- - 0.5335848927497864
190
- - 0.6071611046791077
191
- - 0.5474293828010559
192
- - 0.6076506972312927
193
- - 0.5390501022338867
194
- - 0.5743886232376099
195
- - 0.485751211643219
196
- - 0.4248744249343872
197
- - 0.4843744933605194
198
- - 0.43331536650657654
199
- - 0.5356124639511108
200
- - 0.4875929355621338
201
- - 0.48614853620529175
202
- - 0.44228559732437134
203
- - 0.5027499198913574
204
- - 0.6554337739944458
205
- - 0.3469322919845581
206
- - 0.33981558680534363
207
- - 0.37933868169784546
208
- - 0.34751009941101074
209
- - 0.22094282507896423
210
- - 0.252963662147522
211
- - 0.18274202942848206
212
- - 0.1976650059223175
213
- - 0.1770155429840088
214
- - 0.18206502497196198
215
- - 0.1002601608633995
216
- - 0.18640224635601044
217
- - 0.27240633964538574
218
- - 0.04153885692358017
219
- - -0.010289354249835014
220
- - -0.012929759919643402
221
- - 0.035185474902391434
222
- - 0.18124309182167053
223
- - -0.14512233436107635
224
- - -0.1778590828180313
225
- - -0.20491982996463776
226
- - -0.30119436979293823
227
- - -0.1735714226961136
228
- - -0.1039585992693901
229
- - -0.177497997879982
230
- - -0.28803232312202454
231
- - -0.24049188196659088
232
- - -0.4682924747467041
233
- - -0.5791841745376587
234
- - -0.5170156955718994
235
- - -0.6380605697631836
236
- - -0.7147259712219238
237
- - -0.6607836484909058
238
- - -0.7288452982902527
239
- - -0.6338580250740051
240
- - -0.7092624306678772
241
- - -0.8101216554641724
242
- - -0.7633087038993835
243
- - -0.8251329660415649
244
- - -0.6936700940132141
245
- - -0.5180960297584534
246
- - -0.7972619533538818
247
- - -0.807314932346344
248
- - -0.7151175737380981
249
- - -0.7785399556159973
250
- - -0.8709449768066406
251
- - -0.8360402584075928
252
- - -0.8253681659698486
253
- - -0.9778416156768799
254
- - -1.12929368019104
255
- - -1.3274869918823242
256
- - -1.3071579933166504
257
- - -1.5234452486038208
258
- - -1.6191706657409668
259
- - -1.708594799041748
260
- - -1.8246771097183228
261
- - -1.9193823337554932
262
- - -2.1361801624298096
263
- - -2.3829283714294434
264
- spec_min:
265
- - -6.0
266
- - -6.0
267
- - -6.0
268
- - -6.0
269
- - -6.0
270
- - -6.0
271
- - -6.0
272
- - -6.0
273
- - -6.0
274
- - -6.0
275
- - -6.0
276
- - -6.0
277
- - -6.0
278
- - -6.0
279
- - -6.0
280
- - -6.0
281
- - -6.0
282
- - -6.0
283
- - -6.0
284
- - -6.0
285
- - -6.0
286
- - -6.0
287
- - -6.0
288
- - -6.0
289
- - -6.0
290
- - -6.0
291
- - -6.0
292
- - -6.0
293
- - -6.0
294
- - -6.0
295
- - -6.0
296
- - -6.0
297
- - -6.0
298
- - -6.0
299
- - -6.0
300
- - -6.0
301
- - -6.0
302
- - -6.0
303
- - -6.0
304
- - -6.0
305
- - -6.0
306
- - -6.0
307
- - -6.0
308
- - -6.0
309
- - -6.0
310
- - -6.0
311
- - -6.0
312
- - -6.0
313
- - -6.0
314
- - -6.0
315
- - -6.0
316
- - -6.0
317
- - -6.0
318
- - -6.0
319
- - -6.0
320
- - -6.0
321
- - -6.0
322
- - -6.0
323
- - -6.0
324
- - -6.0
325
- - -6.0
326
- - -6.0
327
- - -6.0
328
- - -6.0
329
- - -6.0
330
- - -6.0
331
- - -6.0
332
- - -6.0
333
- - -6.0
334
- - -6.0
335
- - -6.0
336
- - -6.0
337
- - -6.0
338
- - -6.0
339
- - -6.0
340
- - -6.0
341
- - -6.0
342
- - -6.0
343
- - -6.0
344
- - -6.0
345
- task_cls: tasks.TechSinger.techsinger.RFPostnetTask
346
- tb_log_interval: 100
347
- test_ids: []
348
- test_input_yaml: ''
349
- test_num: 100
350
- test_prefixes:
351
- - "Chinese#ZH-Alto-1#Breathy#\u4E0D\u518D\u89C1"
352
- - "Chinese#ZH-Tenor-1#Pharyngeal#\u4E0D\u4E3A\u8C01\u800C\u4F5C\u7684\u6B4C"
353
- - "Chinese#ZH-Alto-1#Vibrato#\u4E0D\u518D\u89C1"
354
- - "Chinese#ZH-Tenor-1#Glissando#\u4E0D\u67D3"
355
- - "Chinese#ZH-Alto-1#Mixed_Voice_and_Falsetto#\u4E00\u6B21\u5C31\u597D"
356
- - English#EN-Alto-1#Breathy#all is found
357
- - English#EN-Alto-1#Pharyngeal#beauty and the beast
358
- - English#EN-Alto-2#Vibrato#A Thousand Years
359
- - English#EN-Alto-2#Glissando#A Thousand Years
360
- - English#EN-Alto-2#Mixed_Voice_and_Falsetto#All of Me
361
- test_set_name: test
362
- timesteps: 1000
363
- train_set_name: train
364
- train_sets: ''
365
- use_gt_dur: false
366
- use_gt_f0: false
367
- use_nsf: true
368
- use_ph_postnet: true
369
- use_pitch_embed: true
370
- use_pos_embed: true
371
- use_spk_embed: false
372
- use_spk_id: true
373
- use_spk_prompt: false
374
- use_uv: true
375
- use_word_input: false
376
- val_check_interval: 10000
377
- valid_infer_interval: 10000
378
- valid_monitor_key: val_loss
379
- valid_monitor_mode: min
380
- valid_prefixes:
381
- - "Chinese#ZH-Alto-1#Breathy#\u4E0D\u518D\u89C1"
382
- - "Chinese#ZH-Tenor-1#Pharyngeal#\u4E0D\u4E3A\u8C01\u800C\u4F5C\u7684\u6B4C"
383
- - "Chinese#ZH-Alto-1#Vibrato#\u4E0D\u518D\u89C1"
384
- - "Chinese#ZH-Tenor-1#Glissando#\u4E0D\u67D3"
385
- - "Chinese#ZH-Alto-1#Mixed_Voice_and_Falsetto#\u4E00\u6B21\u5C31\u597D"
386
- - English#EN-Alto-1#Breathy#all is found
387
- - English#EN-Alto-1#Pharyngeal#beauty and the beast
388
- - English#EN-Alto-2#Vibrato#A Thousand Years
389
- - English#EN-Alto-2#Glissando#A Thousand Years
390
- - English#EN-Alto-2#Mixed_Voice_and_Falsetto#All of Me
391
- valid_set_name: valid
392
- vocoder: HifiGAN_NSF
393
- vocoder_ckpt: checkpoints/hifigan
394
- warmup_updates: 4000
395
- weight_decay: 0
396
- win_size: 1024
397
- word_dict_size: 10000
398
- work_dir: checkpoints/stage2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bilingual/stage2/model_ckpt_steps_160000.ckpt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:0613bd31b2234d8dca83ae4a48a4e3141931cf328cb21e33cfdba7c4dad8bfc7
3
- size 288237596