这不是表扬,这意味着三件事情:
1.他没有文档!所有框架代码都没有任何注释,包括函数docstring都没有
2.他没有经过有效测试,估计是抢时间吧!即使是官网Readme里的例子也是无法跑起来的!
3.他是一个框架,而且是一个非常不Pythonic的框架,充斥着inline/包装器/莫名其妙的语法。
4.他大量使用类的静态方法和全局函数,这是一种破坏对象化的机制,实在是太难理解了。
虽然这四点决定他真的对不住Facebook的金字招牌,但是作为一个学习者,总要把他运行起来,那么开始这场针对 FaceBOOK派“全新老爷车”的修车之旅吧!
通过调试Fairseq我深刻的感到痛苦:各种莫名其妙的跳转,你永远不会知道在什么时候和什么地方发生跳转。
如果想利用Fairseq的机制提供管线封装,那么肯定会很累!
Fairseq的代码质量非常低,模型/模型配置参数/模型管线的参数对应的变量名都是model,任务/任务名称/任务的参数集合的变量名都是task,具体的含义需要调试上下文获得,并且不能保证一个函数的两次重入都是一样的含义。
以上批评意见的时间是1.0版,20201123
略。。。网上教程很多的,反正你要自己动手
Fairseq
apex
下载预训练模型
下载训练数据 Lirispeech
先用官网的例子跑一个看看:
import torch
from fairseq.models.wav2vec import Wav2VecModel
cp = torch.load('/home/**/Documents/Research/fairseq/model/wav2vec_vox_960h_pl.pt.zip')
model = Wav2VecModel.build_model(cp['args'], task=None)
model.load_state_dict(cp['model'])
model.eval()
wav_input_16khz = torch.randn(1,10000)
z = model.feature_extractor(wav_input_16khz)
c = model.feature_aggregator(z)
你猜会咋样呢?
>>> Exception "AttributeError"
'Namespace' object has no attribute 'prediction_steps'
File: /home/**/Documents/workspace/fairseq/fairseq/models/wav2vec/wav2vec.py, Line: 175
Exception "unhandled AttributeError"
'Namespace' object has no attribute 'prediction_steps'
File: /home/**/Documents/workspace/fairseq/fairseq/models/wav2vec/wav2vec.py, Line: 175
哈哈,很神奇吧,其实也没啥就是单纯的代码质量太差!
@register_model("wav2vec", dataclass=Wav2VecConfig)
class Wav2VecModel(BaseFairseqModel):
@classmethod
def build_model(cls, cfg: Wav2VecConfig, task: FairseqTask):
"""Build a new model instance."""
model = Wav2VecModel(cfg)
logger.info(model)
return model
def __init__(self, cfg: Wav2VecConfig):
super().__init__()
self.prediction_steps = cfg.prediction_steps
offset = cfg.offset
if cfg.activation == "relu":
activation = nn.ReLU()
elif cfg.activation == "gelu":
activation = nn.GELU()
else:
raise Exception("unknown activation " + cfg.activation)
调试可以看到,cfg这个参数根本就不是Wav2VecConfig类型,这个args的类型是<class 'argparse.Namespace'>,内容是:
Namespace(activation_dropout=0.1, adam_betas='(0.9, 0.98)', adam_eps=1e-08, all_gather_list_size=16384,
apply_mask=True, arch='wav2vec_ctc', attention_dropout=0.0, best_checkpoint_metric='wer',
bf16=False, bpe=None, broadcast_buffers=False, bucket_cap_mb=25, checkpoint_suffix='', clip_norm=0.0,
cpu=False, criterion='wav2vec', curriculum=0, data='/private/home/qiantong/w2v/data/train_36s_short_960h_rescore',
data_buffer_size=10, dataset_impl=None, ddp_backend='no_c10d', decay_steps=250000, device_id=0, disable_validation=False,
distributed_backend='nccl', distributed_init_method='tcp://learnfair1679:56443', distributed_no_spawn=True,
distributed_num_procs=None, distributed_port=56443, distributed_rank=0, distributed_world_size=24,
distributed_wrapper='DDP',
dropout=0.0, dropout_input=0,
empty_cache_freq=0, enable_padding=False, fast_stat_sync=False, feature_grad_mult=0.0, final_dropout=0.0, final_lr_scale=0.05, find_unused_parameters=False,
finetune_from_model=None, fix_batches_to_gpus=False,
fixed_validation_seed=None, fp16=True, fp16_init_scale=128, fp16_no_flatten_grads=False, fp16_scale_tolerance=0.0, fp16_scale_window=None, freeze_finetune_updates=10000,
hold_steps=210000, init_lr_scale=0.01, keep_best_checkpoints=-1, keep_interval_updates=-1,
keep_last_epochs=-1, labels='ltr', layerdrop=0.1, localsgd_frequency=3, log_format='json', log_interval=500, lr=[3e-05], lr_scheduler='tri_stage',
mask_channel_length=64, mask_channel_other=0.0, mask_channel_prob=0.1, mask_channel_selection='static',
mask_length=10, mask_other=0.0, mask_prob=0.1, mask_selection='static', max_epoch=0, max_sample_size=None,
max_sentences=None, max_sentences_valid=None, max_tokens=1280000, max_tokens_valid=1280000, max_update=500000, maximize_best_checkpoint_metric=False,
memory_efficient_bf16=False, memory_efficient_fp16=False, min_loss_scale=0.0001,
min_lr=-1, min_sample_size=None, model_parallel_size=1, no_epoch_checkpoints=True, no_last_checkpoints=False,
no_mask_channel_overlap=False, no_mask_overlap=False, no_pretrained_weights=False, no_progress_bar=False, no_save=False,
no_save_optimizer_state=False, no_seed_provided=False, normalize=True, nprocs_per_node=8, num_workers=4,
optimizer='adam', optimizer_overrides='{}', patience=-1, pipeline_balance=None, pipeline_checkpoint='never',
pipeline_chunks=None, pipeline_devices=None, pipeline_model_parallel=False, profile=False, quantization_config_path=None,
remove_bpe='letter', required_batch_size_multiple=8, required_seq_len_multiple=1, reset_dataloader=False,
reset_lr_scheduler=False, reset_meters=False, reset_optimizer=False, restore_file='checkpoint_last.pt',
sample_rate=16000, save_dir='/checkpoint/michaelauli/asr/pseudolbl/960h_vox.fp16.u500000.savg.nrm.ltr.m_static.mstd0.mask10.mprob0.1.ld0.1.mc_static.mcstd0.maskc64.mcprob0.1.fgm0.0.ffu10000.lr3e-05.warmup40000.hld210000.dec250000.frs0.05.fd0.0.drop0.0.ad0.1.atd0.0.ms1280000.sd1337.uf1.ngpu24',
save_interval=1, save_interval_updates=0, scoring='bleu', seed=1337, sentence_avg=True, skip_invalid_size_inputs_valid_test=False, slowmo_algorithm='LocalSGD',
slowmo_momentum=None, stop_time_hours=0, task='audio_pretraining', tensorboard_logdir='',
threshold_loss_scale=None, tokenizer=None, tpu=False, train_subset='train', update_freq=[1], use_bmuf=False,
use_old_adam=False, user_dir=None, valid_subset='dev_other', validate_after_updates=10000,
validate_interval=1, validate_interval_updates=0, w2v_args=Namespace(activation_dropout=0.0, activation_fn='gelu', adam_betas='(0.9,0.98)',
adam_eps=1e-06, arch='wav2vec2', attention_dropout=0.1, attention_type='default', augment=False,
best_checkpoint_metric='loss', bpe=None, bucket_cap_mb=25, centroids=None, clip_norm=25, codebook_negatives=0, combine_banks=False, combine_dataset=False, conv_bias=True,
conv_feature_layers='[(512, 10, 5)] + [(512, 3, 2)] * 4 + [(512,2,2)] * 2', conv_logs=True, conv_pos=128, conv_pos_groups=16, conv_pos_layers=1, cpu=False, criterion='wav2vec',
cross_sample_negatives=0, curriculum=0, data='/private/home/abaevski/data/librivox/no_silence', dataset_impl=None, ddp_backend='c10d', debug=False, device_id=0, disable_validation=False,
distributed_backend='nccl', distributed_init_method='tcp://learnfair1331:55498', distributed_no_spawn=True, distributed_port=55498, distributed_rank=0, distributed_world_size=128,
div_drop_percent=0, div_pen_threshold=None, dropout=0.0, dropout_features=0.1, dropout_input=0.1, duplicate_negatives=0, empty_cache_freq=0, enable_padding=False,
encode_padded_indiv=False, encoder_attention_heads=16, encoder_embed_dim=1024, encoder_ffn_embed_dim=4096, encoder_layerdrop=0.0, encoder_layers=24, encoder_normalize_before=True,
encoder_optimizer_params=None, encoder_schedule=0, end_learning_rate=0.0, extractor_mode='layer_norm', extractor_model=None, extractor_norm_location='default',
fast_stat_sync=False, feature_glu=False, feature_grad_mult=1.0, feature_noise=0, feature_noise_last=0, features_pen=True, final_dim=768, find_unused_parameters=True, finetune_extractor=True, fix_batches_to_gpus=False, fixed_validation_seed=None, force_anneal=None, fp16=True, fp16_init_scale=128, fp16_no_flatten_grads=False, fp16_scale_tolerance=0.0, fp16_scale_window=None, group_norm_features=False, group_norm_groups=512,
gumbel_noise_gain=1, infomax=True, input_noise=0.0, keep_interval_updates=1, keep_last_epochs=-1, label_smoothing=0.0, labels=None, latent_groups=2, latent_temp='(2.0,0.1,0.999995)',
latent_var_banks=2, latent_vars=320, layer_norm_after=-1, layer_norm_before=0, layer_norm_features=True, layer_norm_first=True, layer_norm_repr=True, lazy_load_labels=False,
log_format='json', log_interval=100, logit_temp=0.1, loss_weights=None, lr=[0.005], lr_scheduler='polynomial_decay', mask_min_space=1, mask_multiple_length=10,
mask_prob=0.65, mask_same_channels=False, mask_same_timesteps=False, mask_selection='static',
mask_stdev=0.0, masking_schedule=0, max_epoch=0, max_positions=8000, max_pred_length=0, max_sample_size=320000, max_sentences=None, max_sentences_valid=None, max_tokens=1200000, max_tokens_valid=1200000, max_update=1000000,
maximize_best_checkpoint_metric=False, memory_efficient_fp16=False, min_loss_scale=0.0001, min_lr=-1, min_sample_size=32000, mlp_mi=768, negatives_from_everywhere=False, new_emb_pen=True, new_logit_pen=False, no_bert_init=False,
no_epoch_checkpoints=True, no_last_checkpoints=False, no_mask_channel_overlap=False, no_mask_overlap=False, no_norm_after=None, no_progress_bar=False, no_save=False, no_save_optimizer_state=False, no_token_positional_embeddings=True, noise_type='gaussian', norm_init_weight=1.0, normalize=True, num_negatives=100, num_workers=6, optimizer='adam', optimizer_overrides='{}',
penalize_transformer=False, penalty_coeff='[0,0,0.1,0]', penalty_temp=1.0, pooler_activation_fn='tanh', pooler_dropout=0.0, power=1.0, pre_norm=False, predict_everything=False, predictor_grad_mult=1.0, preemp=False, project_quantized=True,
quantize_input=False, quantize_targets=True, quantized=False, quantizer_chance=0.0, quantizer_grad_mult=1.0, quantizer_init=True, quantizer_init_gain=1.0, quantizer_init_normal=True, relative_positional_embeddings=0, required_batch_size_multiple=8, rescale_sample_size=False,
reset_dataloader=False, reset_lr_scheduler=False, reset_meters=False, reset_optimizer=False, restore_file='checkpoint_last.pt', same_quantizer=False, sample_rate=16000, save_dir='/checkpoint/abaevski/asr/speechbert_raw_big_dbg/prenorm_ln_stable_repr_lr.qtz.lnfrst.lnr.cb.nrm.mlp768.pq.lv320.lvb2.ab0.9_0.98.lr0.005.wu32000.mask10.mprob0.65.mstd0.drp_i0.1.drp_f0.1.in0.0.nt_gaus.lnf-1.ng512.fgm1.0.nep.qini.qini1.pen0_0_0.1_0.cpl1.ld0.0.wd0.01.uf1.mu1000000.s5.ngpu128',
save_interval=1, save_interval_updates=25000, scale_input=0, scp=False, seed=5, sentence_avg=False, siamese_extractor=False, siamese_feature_layers=None, skip_connections=False,
skip_invalid_size_inputs_valid_test=True, skip_main_loss_prob=0, soft=False, squeeze_constant=20, squeeze_logits='norm_temp', squeeze_pos_emb='add', squeeze_quantizer_logits=False, static_preemp=False, tanh_after_norm=0, target_labels=None,
task='audio_pretraining', tensorboard_logdir='', threshold_loss_scale=None, tokenizer=None, total_num_update=1000000, train_on_full=False, train_subset='train',
unprojected_feats=False, update_freq=[1], use_aggregator_feats=False, use_bmuf=False, use_old_adam=False, user_dir=None, valid_subset='valid', validate_after_updates=0,
validate_interval=1, validate_interval_updates=0, warmup_updates=32000, weight_decay=0.01, weight_norm=False), w2v_path='/private/home/abaevski/models/wav2vec2/wav2vec_vox_new.pt',
warmup_steps=40000, weight_decay=0.0, wer_args="('/checkpoint/abaevski/data/speech/libri/4-gram.bin','/checkpoint/abaevski/data/speech/libri/10h/wav2vec/raw/lexicon_ltr2.lst',1.57,-0.64)", zero_infinity=True, zero_sharding='none')
就那么一个字,考!
写了这么多知道怎么解决了吗?显然没有
无法传递绝对参数的Bug,其实不能叫Bug,因为Hydra就是这么设计的。
问题出在那里呢?在hydra/_internal/utils.py中,只能传递相对参数,如果传递了绝对参数将导致计算错误。
def compute_search_path_dir(
calling_file: Optional[str],
calling_module: Optional[str],
config_path: Optional[str],
) -> str:
if calling_file is not None:
abs_base_dir = realpath(dirname(calling_file))
if config_path is not None:
search_path_dir = join(abs_base_dir, config_path)
else:
search_path_dir = abs_base_dir
search_path_dir = normpath(search_path_dir)
elif calling_module is not None:
last_dot = calling_module.rfind(".")
if last_dot != -1:
calling_module = calling_module[0:last_dot]
else:
calling_module = ""
但是Facebook就没有考虑过代码和数据分析部署的问题?如果代码在安装目录,比如一个anaconda环境中,但是配置文件在一个用户目录中,这个相对路径的计算将变得不可能完成。
Tips: FaceBook作hydra的架构师就是脑子进水!
自己动手丰衣足食,修改这一段,加入全局配置文件支持:
if calling_file is not None:
abs_base_dir = realpath(dirname(calling_file))
if config_path is not None:
#linger fix
if not os.path.isabs(config_path):
search_path_dir = join(abs_base_dir, config_path)
else:
search_path_dir = config_path
#linger fix end
else:
search_path_dir = abs_base_dir
search_path_dir = normpath(search_path_dir)
是不是很简单,这样就可以配置和代码分离了!
Tips 2 :无法传递命令行参数
其实是我不知道怎么传递,data.key 之类的写法一直报错,追踪了很久没有什么方法和改进,怎么办呢?
总是有类似下面的这种错误信息。
The debugged program raised the exception Exception
" -- Process 0 terminated with the following error: Traceback (most recent call last): File "/home/***/anaconda3/envs/lSrv08/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 19,
in _wrap fn(i, *args) File "/home/***/Documents/workspace/fairseq/fairseq/distributed_utils.py", line 300,
in distributed_main main(cfg, **kwargs) File "/home/***/Documents/workspace/fairseq/fairseq_cli/train.py", line 51,
in main assert ( AssertionError: Must specify batch size either with --max-tokens or --batch-size "
File: /home/***/anaconda3/envs/lSrv08/lib/python3.8/site-packages/torch/multiprocessing/spawn.py, Line: 118
Break here?
既然已经可以传递全局配置文件了,那么就copy一个配置文件直接写参数吧
怎么知道改如何配置呢?
$ python fairseq_cli/hydra_train.py --help
hydra_train is powered by Hydra.
== Configuration groups ==
Compose your configuration from those groups (group=option)
bpe: bert, byte_bpe, fastbpe, gpt2, hf_byte_bpe, sentencepiece, subword_nmt
criterion: adaptive_loss, cross_entropy, ctc, wav2vec
lr_scheduler: cosine, fixed, inverse_sqrt, polynomial_decay, reduce_lr_on_plateau, tri_stage, triangular
model: transformer_lm, wav2vec, wav2vec2, wav2vec_ctc, wav2vec_seq2seq
model/transformer_lm: transformer_lm_baevski_gbw, transformer_lm_baevski_wiki103, transformer_lm_big, transformer_lm_gbw, transformer_lm_gpt, transformer_lm_gpt2_big, transformer_lm_gpt2_medium, transformer_lm_gpt2_small, transformer_lm_wiki103
model/wav2vec: vq_wav2vec_gumbel
model/wav2vec2: wav2vec2_base, wav2vec2_large
optimizer: adam, nag
scoring: bleu, sacrebleu, wer
task: audio_pretraining, dummy_lm, language_modeling
tokenizer: moses
== Config ==
Override anything in the config (foo.bar=value)
_name: null
common:
_name: null
no_progress_bar: false
log_interval: 100
log_format: null
tensorboard_logdir: null
wandb_project: null
seed: 1
cpu: false
tpu: false
bf16: false
memory_efficient_bf16: false
fp16: false
memory_efficient_fp16: false
fp16_no_flatten_grads: false
fp16_init_scale: 128
fp16_scale_window: null
fp16_scale_tolerance: 0.0
min_loss_scale: 0.0001
threshold_loss_scale: null
user_dir: null
empty_cache_freq: 0
all_gather_list_size: 16384
model_parallel_size: 1
quantization_config_path: null
profile: false
reset_logging: true
common_eval:
_name: null
path: null
post_process: null
quiet: false
model_overrides: '{}'
results_path: null
distributed_training:
_name: null
distributed_world_size: 3
distributed_rank: 0
distributed_backend: nccl
distributed_init_method: null
distributed_port: -1
device_id: 0
distributed_no_spawn: false
ddp_backend: c10d
bucket_cap_mb: 25
fix_batches_to_gpus: false
find_unused_parameters: false
fast_stat_sync: false
broadcast_buffers: false
distributed_wrapper: DDP
slowmo_momentum: null
slowmo_algorithm: LocalSGD
localsgd_frequency: 3
nprocs_per_node: 3
pipeline_model_parallel: false
pipeline_balance: null
pipeline_devices: null
pipeline_chunks: 0
pipeline_encoder_balance: null
pipeline_encoder_devices: null
pipeline_decoder_balance: null
pipeline_decoder_devices: null
pipeline_checkpoint: never
zero_sharding: none
tpu: ${common.tpu}
dataset:
_name: null
num_workers: 1
skip_invalid_size_inputs_valid_test: false
max_tokens: null
batch_size: null
required_batch_size_multiple: 8
required_seq_len_multiple: 1
dataset_impl: null
data_buffer_size: 10
train_subset: train
valid_subset: valid
validate_interval: 1
validate_interval_updates: 0
validate_after_updates: 0
fixed_validation_seed: null
disable_validation: false
max_tokens_valid: null
batch_size_valid: null
curriculum: 0
gen_subset: test
num_shards: 1
shard_id: 0
optimization:
_name: null
max_epoch: 0
max_update: 0
stop_time_hours: 0.0
clip_norm: 0.0
sentence_avg: false
update_freq:
- 1
lr:
- 0.25
min_lr: -1.0
use_bmuf: false
checkpoint:
_name: null
save_dir: checkpoints
restore_file: checkpoint_last.pt
finetune_from_model: null
reset_dataloader: false
reset_lr_scheduler: false
reset_meters: false
reset_optimizer: false
optimizer_overrides: '{}'
save_interval: 1
save_interval_updates: 0
keep_interval_updates: -1
keep_last_epochs: -1
keep_best_checkpoints: -1
no_save: false
no_epoch_checkpoints: false
no_last_checkpoints: false
no_save_optimizer_state: false
best_checkpoint_metric: loss
maximize_best_checkpoint_metric: false
patience: -1
checkpoint_suffix: ''
checkpoint_shard_count: 1
model_parallel_size: ${common.model_parallel_size}
distributed_rank: ${distributed_training.distributed_rank}
bmuf:
_name: null
block_lr: 1.0
block_momentum: 0.875
global_sync_iter: 50
warmup_iterations: 500
use_nbm: false
average_sync: false
distributed_world_size: ${distributed_training.distributed_world_size}
generation:
_name: null
beam: 5
nbest: 1
max_len_a: 0.0
max_len_b: 200
min_len: 1
match_source_len: false
unnormalized: false
no_early_stop: false
no_beamable_mm: false
lenpen: 1.0
unkpen: 0.0
replace_unk: null
sacrebleu: false
score_reference: false
prefix_size: 0
no_repeat_ngram_size: 0
sampling: false
sampling_topk: -1
sampling_topp: -1.0
constraints: null
temperature: 1.0
diverse_beam_groups: -1
diverse_beam_strength: 0.5
diversity_rate: -1.0
print_alignment: false
print_step: false
lm_path: null
lm_weight: 0.0
iter_decode_eos_penalty: 0.0
iter_decode_max_iter: 10
iter_decode_force_max_iter: false
iter_decode_with_beam: 1
iter_decode_with_external_reranker: false
retain_iter_history: false
retain_dropout: false
retain_dropout_modules: null
decoding_format: null
no_seed_provided: false
eval_lm:
_name: null
output_word_probs: false
output_word_stats: false
context_window: 0
softmax_batch: 9223372036854775807
interactive:
_name: null
buffer_size: 0
input: '-'
model: ???
task: null
criterion:
_name: cross_entropy
sentence_avg: ${optimization.sentence_avg}
optimizer: null
lr_scheduler:
_name: fixed
force_anneal: null
lr_shrink: 0.1
warmup_updates: 0
lr: ${optimization.lr}
scoring: null
bpe: null
tokenizer: null
Powered by Hydra (https://hydra.cc)
Use --hydra-help to view Hydra specific help
打开配置文件,增加/修改对应的节,记住不能重复定义!
Tips 3: Fairseq的路径参数默认train.tsv valid.tsv
例如修改为自己的数据集将 train.tsv valid.tsv,创建到目录
task:
_name: audio_pretraining
data: /data/Cache/SLR18/
max_sample_size: 250000
min_sample_size: 32000
Tips 4 : 内存溢出问题,3090 24G的现存仍然不够,唉,感慨V100 * 64 的土豪配置3秒
OOM: Ran out of memory with exception: CUDA out of memory. Tried to allocate 20.83 GiB (GPU 1; 23.70 GiB total capacity; 1.99 GiB already allocated; 20.20 GiB free; 2.24 GiB reserved in total by PyTorch)
调整batch_size 到10,没有什么暖用,20.83GB
设置max_tokens: 1400000 -> 800000,也没有什么用
怎么回事呢?原来Fairseq需要设置 vaild阶段的sample的大小,追踪发现默认使用1300的大小。。。,训练阶段才3-4
batch_size_valid:和max_tokens_valid ,具体的值自己摸索吧
max_tokens_valid: null
batch_size_valid: null
改为
max_tokens_valid: 200000
或者
batch_size_valid: 20
标准的训练脚本
python fairseq_cli/hydra_train.py \
distributed_training.distributed_world_size=2 +optimization.update_freq='[12]' \
--config-path /home/linger/Documents/Research/fairseq/config/finetuning \
--config-name vox_960h_2
可能出现的问题
典型错误
FileNotFoundError: [Errno 2] No such file or directory: '~/Documents/Research/fairseq/cache/dev_other.ltr'
处理方法:
python examples/wav2vec/wav2vec_manifest.py /data/Cache/SLR12/dev-other --dest ~/Documents/Research/fairseq/cache --ext flac --valid-percent 0
split=dev_other
python examples/wav2vec/libri_labels.py ~/Documents/Research/fairseq/cache/dev_other.tsv --output-dir ~/Documents/Research/fairseq/cache/ --output-name $split
可以设置一个比较小的数据集和yaml配置,用于调试
dataset:
num_workers: 0 #eric有bug,不能调试多进程数据加载
max_tokens: 1280000
skip_invalid_size_inputs_valid_test: true
valid_subset: dev_other
max_tokens_valid: 1280000
例如:distributed_training.distributed_world_size=1 --config-path /home/linger/Documents/Research/fairseq/config/finetuning --config-name demo
在预先训练的模型中'latent_temp': '(2.0,0.1,0.999995)',但是参数系统需要她是'latent_temp': (2.0,0.1,0.999995)
调用堆栈分析:
hydra_train -> cli_main ->hydra_main->_run_hydra->
hydra = run_and_report(
lambda: Hydra.create_main_hydra2(
task_name=task_name, config_search_path=search_path, strict=strict
)
)
if args.run:
run_and_report(
lambda: hydra.run(
config_name=config_name,
task_function=task_function,
overrides=args.overrides,
)
)
elif args.multirun:
run_and_report(
lambda: hydra.multirun(
config_name=config_name,
task_function=task_function,
overrides=args.overrides,
)
)
从这里进入到了一个特殊的调用位置:hydra/_internal/hydra.py
class Hydra:
@classmethod
create_main_hydra2
这一句是真心TMD的写法:居然在hydra的类的实现里定义了一个变量叫hydra????????
hydra = cls(task_name=task_name, config_loader=config_loader)
from hydra.core.global_hydra import GlobalHydra
hydra/_internal/config_loader_impl.py
class ConfigLoaderImpl(ConfigLoader):
"""
Configuration loader
"""
/home/linger/Documents/workspace/fairseq/fairseq/tasks/audio_pretraining.py
build_model
/home/linger/Documents/workspace/fairseq/fairseq/tasks/fairseq_task.py
model = models.build_model(cfg, self)
/home/linger/Documents/workspace/fairseq/fairseq/models/__init__.py
def build_model(cfg: FairseqDataclass, task):
/home/linger/Documents/workspace/fairseq/fairseq/models/wav2vec/wav2vec2_asr.py
@classmethod
def build_model(cls, cfg: Wav2Vec2CtcConfig, task: FairseqTask):
"""Build a new model instance."""
w2v_encoder = Wav2VecEncoder(cfg, task.target_dictionary)
return cls(cfg, w2v_encoder)
核心加载机制:
if cfg.w2v_args is None:
state = checkpoint_utils.load_checkpoint_to_cpu(cfg.w2v_path, arg_overrides)
#这里的state就是那个 load + 覆盖参数,将所有参数覆盖到state的args 或者 cfg节
w2v_args = state.get("cfg", None)
if w2v_args is None:
w2v_args = convert_namespace_to_omegaconf(state["args"])
#如果cfg无效则用args替换
#linger
try:
w2v_args.model.w2v_args.model.latent_temp = eval(w2v_args.model.w2v_args.model.latent_temp)
except :
pass
#linger
#将state的cfg保存到cfg的w2v_args
cfg.w2v_args = w2v_args
>>> print(json.dumps(eval(str(state.get("cfg", None) )), indent=4))
{
"_name": null,
"common": {
"_name": null,
"no_progress_bar": false,
"log_interval": 500,
"log_format": "json",
"tensorboard_logdir": "",
"wandb_project": null,
"seed": 1337,
"cpu": false,
"tpu": false,
"bf16": false,
"memory_efficient_bf16": false,
"fp16": true,
"memory_efficient_fp16": false,
"fp16_no_flatten_grads": false,
"fp16_init_scale": 128,
"fp16_scale_window": null,
"fp16_scale_tolerance": 0.0,
"min_loss_scale": 0.0001,
"threshold_loss_scale": null,
"user_dir": null,
"empty_cache_freq": 0,
"all_gather_list_size": 16384,
"model_parallel_size": 1,
"quantization_config_path": null,
"profile": false,
"reset_logging": true
},
"common_eval": {
"_name": null,
"path": null,
"post_process": "letter",
"quiet": false,
"model_overrides": "{}",
"results_path": null
},
"distributed_training": {
"_name": null,
"distributed_world_size": 24,
"distributed_rank": 0,
"distributed_backend": "nccl",
"distributed_init_method": "tcp://learnfair1679:56443",
"distributed_port": 56443,
"device_id": 0,
"distributed_no_spawn": true,
"ddp_backend": "no_c10d",
"bucket_cap_mb": 25,
"fix_batches_to_gpus": false,
"find_unused_parameters": false,
"fast_stat_sync": false,
"broadcast_buffers": false,
"distributed_wrapper": "DDP",
"slowmo_momentum": null,
"slowmo_algorithm": "LocalSGD",
"localsgd_frequency": 3,
"nprocs_per_node": 8,
"pipeline_model_parallel": false,
"pipeline_balance": null,
"pipeline_devices": null,
"pipeline_chunks": null,
"pipeline_encoder_balance": null,
"pipeline_encoder_devices": null,
"pipeline_decoder_balance": null,
"pipeline_decoder_devices": null,
"pipeline_checkpoint": "never",
"zero_sharding": "none",
"tpu": false
},
"dataset": {
"_name": null,
"num_workers": 4,
"skip_invalid_size_inputs_valid_test": false,
"max_tokens": 1280000,
"batch_size": null,
"required_batch_size_multiple": 8,
"required_seq_len_multiple": 1,
"dataset_impl": null,
"data_buffer_size": 10,
"train_subset": "train",
"valid_subset": "dev_other",
"validate_interval": 1,
"validate_interval_updates": 0,
"validate_after_updates": 10000,
"fixed_validation_seed": null,
"disable_validation": false,
"max_tokens_valid": 1280000,
"batch_size_valid": null,
"curriculum": 0,
"gen_subset": "test",
"num_shards": 1,
"shard_id": 0
},
"optimization": {
"_name": null,
"max_epoch": 0,
"max_update": 500000,
"stop_time_hours": 0.0,
"clip_norm": 0.0,
"sentence_avg": true,
"update_freq": [
1
],
"lr": [
3e-05
],
"min_lr": -1.0,
"use_bmuf": false
},
"checkpoint": {
"_name": null,
"save_dir": "/checkpoint/michaelauli/asr/pseudolbl/960h_vox.fp16.u500000.savg.nrm.ltr.m_static.mstd0.mask10.mprob0.1.ld0.1.mc_static.mcstd0.maskc64.mcprob0.1.fgm0.0.ffu10000.lr3e-05.warmup40000.hld210000.dec250000.frs0.05.fd0.0.drop0.0.ad0.1.atd0.0.ms1280000.sd1337.uf1.ngpu24",
"restore_file": "checkpoint_last.pt",
"finetune_from_model": null,
"reset_dataloader": false,
"reset_lr_scheduler": false,
"reset_meters": false,
"reset_optimizer": false,
"optimizer_overrides": "{}",
"save_interval": 1,
"save_interval_updates": 0,
"keep_interval_updates": -1,
"keep_last_epochs": -1,
"keep_best_checkpoints": -1,
"no_save": false,
"no_epoch_checkpoints": true,
"no_last_checkpoints": false,
"no_save_optimizer_state": false,
"best_checkpoint_metric": "wer",
"maximize_best_checkpoint_metric": false,
"patience": -1,
"checkpoint_suffix": "",
"checkpoint_shard_count": 1,
"model_parallel_size": 1,
"distributed_rank": 0
},
"bmuf": {
"_name": null,
"block_lr": 1.0,
"block_momentum": 0.875,
"global_sync_iter": 50,
"warmup_iterations": 500,
"use_nbm": false,
"average_sync": false,
"distributed_world_size": 24
},
"generation": {
"_name": null,
"beam": 5,
"nbest": 1,
"max_len_a": 0.0,
"max_len_b": 200,
"min_len": 1,
"match_source_len": false,
"unnormalized": false,
"no_early_stop": false,
"no_beamable_mm": false,
"lenpen": 1.0,
"unkpen": 0.0,
"replace_unk": null,
"sacrebleu": false,
"score_reference": false,
"prefix_size": 0,
"no_repeat_ngram_size": 0,
"sampling": false,
"sampling_topk": -1,
"sampling_topp": -1.0,
"constraints": null,
"temperature": 1.0,
"diverse_beam_groups": -1,
"diverse_beam_strength": 0.5,
"diversity_rate": -1.0,
"print_alignment": false,
"print_step": false,
"lm_path": null,
"lm_weight": 0.0,
"iter_decode_eos_penalty": 0.0,
"iter_decode_max_iter": 10,
"iter_decode_force_max_iter": false,
"iter_decode_with_beam": 1,
"iter_decode_with_external_reranker": false,
"retain_iter_history": false,
"retain_dropout": false,
"retain_dropout_modules": null,
"decoding_format": null,
"no_seed_provided": false
},
"eval_lm": {
"_name": null,
"output_word_probs": false,
"output_word_stats": false,
"context_window": 0,
"softmax_batch": 9223372036854775807
},
"interactive": {
"_name": null,
"buffer_size": 0,
"input": "-"
},
"model": {
"_name": "wav2vec_ctc",
"w2v_path": "/private/home/abaevski/models/wav2vec2/wav2vec_vox_new.pt",
"no_pretrained_weights": false,
"dropout_input": 0.0,
"final_dropout": 0.0,
"dropout": 0.0,
"attention_dropout": 0.0,
"activation_dropout": 0.1,
"apply_mask": true,
"mask_length": 10,
"mask_prob": 0.5,
"mask_selection": "static",
"mask_other": 0.0,
"no_mask_overlap": false,
"mask_channel_length": 64,
"mask_channel_prob": 0.25,
"mask_channel_selection": "static",
"mask_channel_other": 0.0,
"no_mask_channel_overlap": false,
"freeze_finetune_updates": 10000,
"feature_grad_mult": 0.0,
"layerdrop": 0.1,
"normalize": true,
"data": "/private/home/qiantong/w2v/data/train_36s_short_960h_rescore",
"w2v_args": {
"common": {
"no_progress_bar": false,
"log_interval": 100,
"log_format": "json",
"tensorboard_logdir": "",
"wandb_project": null,
"seed": 5,
"cpu": false,
"tpu": false,
"bf16": false,
"memory_efficient_bf16": false,
"fp16": true,
"memory_efficient_fp16": false,
"fp16_no_flatten_grads": false,
"fp16_init_scale": 128,
"fp16_scale_window": null,
"fp16_scale_tolerance": 0.0,
"min_loss_scale": 0.0001,
"threshold_loss_scale": null,
"user_dir": null,
"empty_cache_freq": 0,
"all_gather_list_size": 16384,
"model_parallel_size": 1,
"quantization_config_path": null,
"profile": false,
"reset_logging": true
},
"common_eval": {
"path": null,
"post_process": null,
"quiet": false,
"model_overrides": "{}",
"results_path": null
},
"distributed_training": {
"distributed_world_size": 128,
"distributed_rank": 0,
"distributed_backend": "nccl",
"distributed_init_method": "tcp://learnfair1331:55498",
"distributed_port": 55498,
"device_id": 0,
"distributed_no_spawn": true,
"ddp_backend": "c10d",
"bucket_cap_mb": 25,
"fix_batches_to_gpus": false,
"find_unused_parameters": true,
"fast_stat_sync": false,
"broadcast_buffers": false,
"distributed_wrapper": "DDP",
"slowmo_momentum": null,
"slowmo_algorithm": "LocalSGD",
"localsgd_frequency": 3,
"nprocs_per_node": 3,
"pipeline_model_parallel": false,
"pipeline_balance": null,
"pipeline_devices": null,
"pipeline_chunks": 0,
"pipeline_encoder_balance": null,
"pipeline_encoder_devices": null,
"pipeline_decoder_balance": null,
"pipeline_decoder_devices": null,
"pipeline_checkpoint": "never",
"zero_sharding": "none",
"tpu": false
},
"dataset": {
"num_workers": 6,
"skip_invalid_size_inputs_valid_test": true,
"max_tokens": 1200000,
"batch_size": null,
"required_batch_size_multiple": 8,
"required_seq_len_multiple": 1,
"dataset_impl": null,
"data_buffer_size": 10,
"train_subset": "train",
"valid_subset": "valid",
"validate_interval": 1,
"validate_interval_updates": 0,
"validate_after_updates": 0,
"fixed_validation_seed": null,
"disable_validation": false,
"max_tokens_valid": 1200000,
"batch_size_valid": null,
"curriculum": 0,
"gen_subset": "test",
"num_shards": 1,
"shard_id": 0
},
"optimization": {
"max_epoch": 0,
"max_update": 1000000,
"stop_time_hours": 0,
"clip_norm": 25,
"sentence_avg": false,
"update_freq": [
1
],
"lr": [
0.005
],
"min_lr": -1,
"use_bmuf": false
},
"checkpoint": {
"save_dir": "/checkpoint/abaevski/asr/speechbert_raw_big_dbg/prenorm_ln_stable_repr_lr.qtz.lnfrst.lnr.cb.nrm.mlp768.pq.lv320.lvb2.ab0.9_0.98.lr0.005.wu32000.mask10.mprob0.65.mstd0.drp_i0.1.drp_f0.1.in0.0.nt_gaus.lnf-1.ng512.fgm1.0.nep.qini.qini1.pen0_0_0.1_0.cpl1.ld0.0.wd0.01.uf1.mu1000000.s5.ngpu128",
"restore_file": "checkpoint_last.pt",
"finetune_from_model": null,
"reset_dataloader": false,
"reset_lr_scheduler": false,
"reset_meters": false,
"reset_optimizer": false,
"optimizer_overrides": "{}",
"save_interval": 1,
"save_interval_updates": 25000,
"keep_interval_updates": 1,
"keep_last_epochs": -1,
"keep_best_checkpoints": -1,
"no_save": false,
"no_epoch_checkpoints": true,
"no_last_checkpoints": false,
"no_save_optimizer_state": false,
"best_checkpoint_metric": "loss",
"maximize_best_checkpoint_metric": false,
"patience": -1,
"checkpoint_suffix": "",
"checkpoint_shard_count": 1,
"model_parallel_size": 1,
"distributed_rank": 0
},
"bmuf": {
"block_lr": 1,
"block_momentum": 0.875,
"global_sync_iter": 50,
"warmup_iterations": 500,
"use_nbm": false,
"average_sync": false,
"distributed_world_size": 128
},
"generation": {
"beam": 5,
"nbest": 1,
"max_len_a": 0,
"max_len_b": 200,
"min_len": 1,
"match_source_len": false,
"unnormalized": false,
"no_early_stop": false,
"no_beamable_mm": false,
"lenpen": 1,
"unkpen": 0,
"replace_unk": null,
"sacrebleu": false,
"score_reference": false,
"prefix_size": 0,
"no_repeat_ngram_size": 0,
"sampling": false,
"sampling_topk": -1,
"sampling_topp": -1.0,
"constraints": null,
"temperature": 1.0,
"diverse_beam_groups": -1,
"diverse_beam_strength": 0.5,
"diversity_rate": -1.0,
"print_alignment": false,
"print_step": false,
"lm_path": null,
"lm_weight": 0.0,
"iter_decode_eos_penalty": 0.0,
"iter_decode_max_iter": 10,
"iter_decode_force_max_iter": false,
"iter_decode_with_beam": 1,
"iter_decode_with_external_reranker": false,
"retain_iter_history": false,
"retain_dropout": false,
"retain_dropout_modules": null,
"decoding_format": null,
"no_seed_provided": false
},
"eval_lm": {
"output_word_probs": false,
"output_word_stats": false,
"context_window": 0,
"softmax_batch": 9223372036854775807
},
"interactive": {
"buffer_size": 0,
"input": "-"
},
"task": {
"_name": "audio_pretraining",
"data": "/private/home/abaevski/data/librivox/no_silence",
"labels": null,
"sample_rate": 16000,
"normalize": true,
"enable_padding": false,
"max_sample_size": 320000,
"min_sample_size": 32000,
"eval_wer": false,
"eval_wer_config": {
"beam": 5,
"nbest": 1,
"max_len_a": 0,
"max_len_b": 200,
"min_len": 1,
"match_source_len": false,
"unnormalized": false,
"no_early_stop": false,
"no_beamable_mm": false,
"lenpen": 1,
"unkpen": 0,
"replace_unk": null,
"sacrebleu": false,
"score_reference": false,
"prefix_size": 0,
"no_repeat_ngram_size": 0,
"sampling": false,
"sampling_topk": -1,
"sampling_topp": -1.0,
"constraints": null,
"temperature": 1.0,
"diverse_beam_groups": -1,
"diverse_beam_strength": 0.5,
"diversity_rate": -1.0,
"print_alignment": false,
"print_step": false,
"lm_path": null,
"lm_weight": 0.0,
"iter_decode_eos_penalty": 0.0,
"iter_decode_max_iter": 10,
"iter_decode_force_max_iter": false,
"iter_decode_with_beam": 1,
"iter_decode_with_external_reranker": false,
"retain_iter_history": false,
"retain_dropout": false,
"retain_dropout_modules": null,
"decoding_format": null,
"no_seed_provided": false
},
"eval_wer_tokenizer": null,
"eval_wer_post_process": "letter",
"autoregressive": false
},
"criterion": {
"_name": "wav2vec",
"infonce": false,
"loss_weights": null,
"log_keys": []
},
"optimizer": {
"_name": "adam",
"adam_betas": "(0.9,0.98)",
"adam_eps": 1e-06,
"weight_decay": 0.01,
"use_old_adam": false,
"tpu": false,
"lr": [
0.005
]
},
"lr_scheduler": {
"_name": "polynomial_decay",
"warmup_updates": 32000,
"force_anneal": null,
"end_learning_rate": 0.0,
"power": 1.0,
"total_num_update": 1000000,
"lr": [
0.005
]
},
"model": {
"_name": "wav2vec2",
"extractor_mode": "layer_norm",
"encoder_layers": 24,
"encoder_embed_dim": 1024,
"encoder_ffn_embed_dim": 4096,
"encoder_attention_heads": 16,
"activation_fn": "gelu",
"dropout": 0.0,
"attention_dropout": 0.1,
"activation_dropout": 0.0,
"encoder_layerdrop": 0.0,
"dropout_input": 0.1,
"dropout_features": 0.1,
"final_dim": 768,
"layer_norm_first": true,
"conv_feature_layers": "[(512, 10, 5)] + [(512, 3, 2)] * 4 + [(512,2,2)] * 2",
"conv_bias": true,
"logit_temp": 0.1,
"quantize_targets": true,
"quantize_input": false,
"same_quantizer": false,
"target_glu": false,
"feature_grad_mult": 1.0,
"latent_vars": 320,
"latent_groups": 2,
"latent_dim": 0,
"mask_length": 10,
"mask_prob": 0.65,
"mask_selection": "static",
"mask_other": 0,
"no_mask_overlap": false,
"mask_min_space": 1,
"mask_channel_length": 10,
"mask_channel_prob": 0.0,
"mask_channel_selection": "static",
"mask_channel_other": 0,
"no_mask_channel_overlap": false,
"mask_channel_min_space": 1,
"num_negatives": 100,
"negatives_from_everywhere": false,
"cross_sample_negatives": 0,
"codebook_negatives": 0,
"conv_pos": 128,
"conv_pos_groups": 16,
"latent_temp": "(2.0,0.1,0.999995)"
}
}
},
"task": {
"_name": "audio_pretraining",
"data": "/private/home/qiantong/w2v/data/train_36s_short_960h_rescore",
"labels": "ltr",
"sample_rate": 16000,
"normalize": true,
"enable_padding": false,
"max_sample_size": null,
"min_sample_size": null,
"eval_wer": false,
"eval_wer_config": {
"_name": null,
"beam": 5,
"nbest": 1,
"max_len_a": 0.0,
"max_len_b": 200,
"min_len": 1,
"match_source_len": false,
"unnormalized": false,
"no_early_stop": false,
"no_beamable_mm": false,
"lenpen": 1.0,
"unkpen": 0.0,
"replace_unk": null,
"sacrebleu": false,
"score_reference": false,
"prefix_size": 0,
"no_repeat_ngram_size": 0,
"sampling": false,
"sampling_topk": -1,
"sampling_topp": -1.0,
"constraints": null,
"temperature": 1.0,
"diverse_beam_groups": -1,
"diverse_beam_strength": 0.5,
"diversity_rate": -1.0,
"print_alignment": false,
"print_step": false,
"lm_path": null,
"lm_weight": 0.0,
"iter_decode_eos_penalty": 0.0,
"iter_decode_max_iter": 10,
"iter_decode_force_max_iter": false,
"iter_decode_with_beam": 1,
"iter_decode_with_external_reranker": false,
"retain_iter_history": false,
"retain_dropout": false,
"retain_dropout_modules": null,
"decoding_format": null,
"no_seed_provided": false
},
"eval_wer_tokenizer": null,
"eval_wer_post_process": "letter",
"autoregressive": false
},
"criterion": {
"_name": "wav2vec",
"infonce": false,
"loss_weights": null,
"log_keys": []
},
"optimizer": {
"_name": "adam",
"adam_betas": "(0.9, 0.98)",
"adam_eps": 1e-08,
"weight_decay": 0.0,
"use_old_adam": false,
"tpu": false,
"lr": [
3e-05
]
},
"lr_scheduler": {
"_name": "tri_stage",
"warmup_steps": 40000,
"hold_steps": 210000,
"decay_steps": 250000,
"phase_ratio": null,
"init_lr_scale": 0.01,
"final_lr_scale": 0.05,
"max_update": 500000.0,
"lr": [
3e-05
]
},
"scoring": {
"_name": "bleu",
"pad": 1,
"eos": 2,
"unk": 3
},
"bpe": null,
"tokenizer": null
}
从这里可以看到:模型存在明显的迭代加载的问题,一级Keys:
>>> print(json.dumps(list(eval(str(state.get("cfg", None) )).keys()), indent=4))
[
"_name",
"common",
"common_eval",
"distributed_training",
"dataset",
"optimization",
"checkpoint",
"bmuf",
"generation",
"eval_lm",
"interactive",
"model",
"task",
"criterion",
"optimizer",
"lr_scheduler",
"scoring",
"bpe",
"tokenizer"
]
其中,checkpoint model task都存储了类似的数据,这可能是后续重复加载的原因!cfg -》w2v_args->task->model
这里的反复加载可能导致了多次加载和不一致性问题!
>>> print(json.dumps(list(eval(str(state.get("cfg", None)['model'] )).keys()), indent=4))
[
"_name",
"w2v_path",
"no_pretrained_weights",
"dropout_input",
"final_dropout",
"dropout",
"attention_dropout",
"activation_dropout",
"apply_mask",
"mask_length",
"mask_prob",
"mask_selection",
"mask_other",
"no_mask_overlap",
"mask_channel_length",
"mask_channel_prob",
"mask_channel_selection",
"mask_channel_other",
"no_mask_channel_overlap",
"freeze_finetune_updates",
"feature_grad_mult",
"layerdrop",
"normalize",
"data",
"w2v_args"
]
>>> print(json.dumps(list(eval(str(state.get("cfg", None)['task'] )).keys()), indent=4))
[
"_name",
"data",
"labels",
"sample_rate",
"normalize",
"enable_padding",
"max_sample_size",
"min_sample_size",
"eval_wer",
"eval_wer_config",
"eval_wer_tokenizer",
"eval_wer_post_process",
"autoregressive"
]
这里的w2v_args.task就是那个看到不想看的内容表格!
print(json.dumps(eval(str(w2v_args.task)), indent=4))
@register_task("audio_pretraining", dataclass=AudioPretrainingConfig)
class AudioPretrainingTask(FairseqTask):
""""""
cfg: AudioPretrainingConfig
def __init__(
self,
cfg: AudioPretrainingConfig,
source_dictionary=None,
target_dictionary=None,
):
super().__init__(cfg)
self._target_dictionary = target_dictionary
self._source_dictionary = source_dictionary
if cfg.eval_wer:
assert cfg.labels is not None, "eval_wer can only be set during fine-tuning"
@classmethod
def setup_task(cls, cfg: AudioPretrainingConfig, **kwargs):
"""Setup the task (e.g., load dictionaries).
Args:
cfg (AudioPretrainingConfig): configuration of this task
"""
if cfg.labels:
dict_path = os.path.join(cfg.data, f"dict.{cfg.labels}.txt")
target_dictionary = Dictionary.load(dict_path)
else:
target_dictionary = None
return cls(cfg, target_dictionary=target_dictionary)
#其实这是用类静态函数封装的构造函数,为啥不直接对象化呢?
#做了一次奇怪而没有什么价值的封装,用重载super不香吗?