amupd's picture
SpeechT5 upload
62e9ca6
raw
history blame contribute delete
991 Bytes
# @package _group_
defaults:
- model: null
hydra:
run:
dir: ${common_eval.results_path}/beam${decoding.beam}_th${decoding.beamthreshold}_lmw${decoding.lmweight}_wrd${decoding.wordscore}_sil${decoding.silweight}
sweep:
dir: ${common_eval.results_path}
subdir: beam${decoding.beam}_th${decoding.beamthreshold}_lmw${decoding.lmweight}_wrd${decoding.wordscore}_sil${decoding.silweight}
task:
_name: joint_sc2t_pretraining
data: ???
label_dir: ???
labels: ["ltr"]
store_labels: true
single_target: true
fine_tuning: true
normalize: ??? # must be consistent with pre-training
add_decoder_target: false
pad_audio: false
random_crop: true
hubert_tokenizer: "none"
sp_path: None
decoding:
type: kenlm
lexicon: ???
lmpath: ???
beamthreshold: 100
beam: 500
lmweight: 2
wordscore: -1
silweight: 0
unique_wer_file: true
common_eval:
results_path: ???
path: ???
post_process: letter
dataset:
max_tokens: 1100000
gen_subset: ???