[2023-11-01 17:34:51,799][fairseq_cli.train][INFO] - {'_name': None, 'common': {'_name': None, 'no_progress_bar': False, 'log_interval': 200, 'log_format': 'json', 'log_file': None, 'aim_repo': None, 'aim_run_hash': None, 'tensorboard_logdir': None, 'wandb_project': 'VLSP_2023_WAV2VEC2_FAIRSEQ', 'azureml_logging': False, 'seed': 1, 'cpu': False, 'tpu': False, 'bf16': False, 'memory_efficient_bf16': False, 'fp16': True, 'memory_efficient_fp16': False, 'fp16_no_flatten_grads': False, 'fp16_init_scale': 128, 'fp16_scale_window': None, 'fp16_scale_tolerance': 0.0, 'on_cpu_convert_precision': False, 'min_loss_scale': 0.0001, 'threshold_loss_scale': None, 'amp': False, 'amp_batch_retries': 2, 'amp_init_scale': 128, 'amp_scale_window': None, 'user_dir': None, 'empty_cache_freq': 0, 'all_gather_list_size': 16384, 'model_parallel_size': 1, 'quantization_config_path': None, 'profile': False, 'reset_logging': False, 'suppress_crashes': False, 'use_plasma_view': False, 'plasma_path': '/tmp/plasma'}, 'common_eval': {'_name': None, 'path': None, 'post_process': None, 'quiet': False, 'model_overrides': '{}', 'results_path': None}, 'distributed_training': {'_name': None, 'distributed_world_size': 1, 'distributed_num_procs': 1, 'distributed_rank': 0, 'distributed_backend': 'nccl', 'distributed_init_method': None, 'distributed_port': -1, 'device_id': 0, 'distributed_no_spawn': False, 'ddp_backend': 'legacy_ddp', 'ddp_comm_hook': 'none', 'bucket_cap_mb': 25, 'fix_batches_to_gpus': False, 'find_unused_parameters': False, 'gradient_as_bucket_view': False, 'fast_stat_sync': False, 'heartbeat_timeout': -1, 'broadcast_buffers': False, 'slowmo_momentum': None, 'slowmo_base_algorithm': 'localsgd', 'localsgd_frequency': 3, 'nprocs_per_node': 1, 'pipeline_model_parallel': False, 'pipeline_balance': None, 'pipeline_devices': None, 'pipeline_chunks': 0, 'pipeline_encoder_balance': None, 'pipeline_encoder_devices': None, 'pipeline_decoder_balance': None, 'pipeline_decoder_devices': None, 'pipeline_checkpoint': 'never', 'zero_sharding': 'none', 'fp16': True, 'memory_efficient_fp16': False, 'tpu': False, 'no_reshard_after_forward': False, 'fp32_reduce_scatter': False, 'cpu_offload': False, 'use_sharded_state': False, 'not_fsdp_flatten_parameters': False}, 'dataset': {'_name': None, 'num_workers': 6, 'skip_invalid_size_inputs_valid_test': True, 'max_tokens': 3000000, 'batch_size': None, 'required_batch_size_multiple': 8, 'required_seq_len_multiple': 1, 'dataset_impl': None, 'data_buffer_size': 10, 'train_subset': 'train', 'valid_subset': 'valid', 'combine_valid_subsets': None, 'ignore_unused_valid_subsets': False, 'validate_interval': 1, 'validate_interval_updates': 0, 'validate_after_updates': 0, 'fixed_validation_seed': None, 'disable_validation': False, 'max_tokens_valid': 3000000, 'batch_size_valid': None, 'max_valid_steps': None, 'curriculum': 0, 'gen_subset': 'test', 'num_shards': 1, 'shard_id': 0, 'grouped_shuffling': False, 'update_epoch_batch_itr': False, 'update_ordered_indices_seed': False}, 'optimization': {'_name': None, 'max_epoch': 0, 'max_update': 400000, 'stop_time_hours': 0.0, 'clip_norm': 0.0, 'sentence_avg': False, 'update_freq': [1], 'lr': [0.0001], 'stop_min_lr': -1.0, 'use_bmuf': False, 'skip_remainder_batch': False, 'debug_param_names': False}, 'checkpoint': {'_name': None, 'save_dir': 'checkpoints', 'restore_file': 'checkpoint_last.pt', 'continue_once': None, 'finetune_from_model': None, 'reset_dataloader': False, 'reset_lr_scheduler': False, 'reset_meters': False, 'reset_optimizer': False, 'optimizer_overrides': '{}', 'save_interval': 1, 'save_interval_updates': 25000, 'keep_interval_updates': 1, 'keep_interval_updates_pattern': -1, 'keep_last_epochs': -1, 'keep_best_checkpoints': -1, 'no_save': False, 'no_epoch_checkpoints': True, 'no_last_checkpoints': False, 'no_save_optimizer_state': False, 'best_checkpoint_metric': 'loss', 'maximize_best_checkpoint_metric': False, 'patience': -1, 'checkpoint_suffix': '', 'checkpoint_shard_count': 1, 'load_checkpoint_on_all_dp_ranks': False, 'write_checkpoints_asynchronously': False, 'model_parallel_size': 1}, 'bmuf': {'_name': None, 'block_lr': 1.0, 'block_momentum': 0.875, 'global_sync_iter': 50, 'warmup_iterations': 500, 'use_nbm': False, 'average_sync': False, 'distributed_world_size': 1}, 'generation': {'_name': None, 'beam': 5, 'beam_mt': 0, 'nbest': 1, 'max_len_a': 0.0, 'max_len_b': 200, 'max_len_a_mt': 0.0, 'max_len_b_mt': 200, 'min_len': 1, 'match_source_len': False, 'unnormalized': False, 'no_early_stop': False, 'no_beamable_mm': False, 'lenpen': 1.0, 'lenpen_mt': 1.0, 'unkpen': 0.0, 'replace_unk': None, 'sacrebleu': False, 'score_reference': False, 'prefix_size': 0, 'no_repeat_ngram_size': 0, 'sampling': False, 'sampling_topk': -1, 'sampling_topp': -1.0, 'constraints': None, 'temperature': 1.0, 'diverse_beam_groups': -1, 'diverse_beam_strength': 0.5, 'diversity_rate': -1.0, 'print_alignment': None, 'print_step': False, 'lm_path': None, 'lm_weight': 0.0, 'iter_decode_eos_penalty': 0.0, 'iter_decode_max_iter': 10, 'iter_decode_force_max_iter': False, 'iter_decode_with_beam': 1, 'iter_decode_with_external_reranker': False, 'retain_iter_history': False, 'retain_dropout': False, 'retain_dropout_modules': None, 'decoding_format': None, 'no_seed_provided': False, 'eos_token': None}, 'eval_lm': {'_name': None, 'output_word_probs': False, 'output_word_stats': False, 'context_window': 0, 'softmax_batch': 9223372036854775807}, 'interactive': {'_name': None, 'buffer_size': 0, 'input': '-'}, 'model': {'_name': 'wav2vec2', 'extractor_mode': default, 'encoder_layers': 12, 'encoder_embed_dim': 768, 'encoder_ffn_embed_dim': 3072, 'encoder_attention_heads': 12, 'activation_fn': gelu, 'layer_type': transformer, 'dropout': 0.1, 'attention_dropout': 0.1, 'activation_dropout': 0.0, 'encoder_layerdrop': 0.05, 'dropout_input': 0.1, 'dropout_features': 0.1, 'final_dim': 256, 'layer_norm_first': False, 'conv_feature_layers': '[(512, 10, 5)] + [(512, 3, 2)] * 4 + [(512,2,2)] + [(512,2,2)]', 'conv_bias': False, 'logit_temp': 0.1, 'quantize_targets': True, 'quantize_input': False, 'same_quantizer': False, 'target_glu': False, 'feature_grad_mult': 0.1, 'quantizer_depth': 1, 'quantizer_factor': 3, 'latent_vars': 320, 'latent_groups': 2, 'latent_dim': 0, 'mask_length': 10, 'mask_prob': 0.65, 'mask_selection': static, 'mask_other': 0.0, 'no_mask_overlap': False, 'mask_min_space': 1, 'require_same_masks': True, 'mask_dropout': 0.0, 'mask_channel_length': 10, 'mask_channel_prob': 0.0, 'mask_channel_before': False, 'mask_channel_selection': static, 'mask_channel_other': 0.0, 'no_mask_channel_overlap': False, 'mask_channel_min_space': 1, 'num_negatives': 100, 'negatives_from_everywhere': False, 'cross_sample_negatives': 0, 'codebook_negatives': 0, 'conv_pos': 128, 'conv_pos_groups': 16, 'pos_conv_depth': 1, 'latent_temp': [2.0, 0.5, 0.999995], 'max_positions': 100000, 'checkpoint_activations': False, 'required_seq_len_multiple': 2, 'crop_seq_to_multiple': 1, 'depthwise_conv_kernel_size': 31, 'attn_type': '', 'pos_enc_type': 'abs', 'fp16': False, 'adp_num': -1, 'adp_dim': 64, 'adp_act_fn': 'relu', 'adp_trf_idx': 'all'}, 'task': {'_name': 'audio_pretraining', 'data': '/root/vlsp-2023-asr-ser/fairseq/data', 'labels': None, 'multi_corpus_keys': None, 'multi_corpus_sampling_weights': None, 'binarized_dataset': False, 'sample_rate': 16000, 'normalize': False, 'enable_padding': False, 'max_sample_size': 250000, 'min_sample_size': 32000, 'num_batch_buckets': 0, 'tpu': False, 'text_compression_level': none, 'rebuild_batches': True, 'precompute_mask_config': None, 'post_save_script': None, 'subsample': 1.0, 'seed': 1}, 'criterion': {'_name': 'wav2vec', 'infonce': True, 'loss_weights': [0.1, 10.0], 'log_keys': ['prob_perplexity', 'code_perplexity', 'temp']}, 'optimizer': {'_name': 'adam', 'adam_betas': '(0.9,0.98)', 'adam_eps': 1e-06, 'weight_decay': 0.01, 'use_old_adam': False, 'fp16_adam_stats': False, 'tpu': False, 'lr': [0.0001]}, 'lr_scheduler': {'_name': 'polynomial_decay', 'warmup_updates': 5000, 'force_anneal': None, 'end_learning_rate': 0.0, 'power': 1.0, 'total_num_update': 400000.0, 'lr': [0.0001]}, 'scoring': None, 'bpe': None, 'tokenizer': None, 'ema': {'_name': None, 'store_ema': False, 'ema_decay': 0.9999, 'ema_start_update': 0, 'ema_seed_model': None, 'ema_update_freq': 1, 'ema_fp32': False}, 'job_logging_cfg': {'version': 1, 'formatters': {'simple': {'format': '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'}}, 'handlers': {'console': {'class': 'logging.StreamHandler', 'formatter': 'simple', 'stream': 'ext://sys.stdout'}, 'file': {'class': 'logging.FileHandler', 'formatter': 'simple', 'filename': 'hydra_train.log'}}, 'root': {'level': 'INFO', 'handlers': ['console', 'file']}, 'disable_existing_loggers': False}} [2023-11-01 17:34:53,485][fairseq_cli.train][INFO] - Wav2Vec2Model( (feature_extractor): ConvFeatureExtractionModel( (conv_layers): ModuleList( (0): Sequential( (0): Conv1d(1, 512, kernel_size=(10,), stride=(5,), bias=False) (1): Dropout(p=0.0, inplace=False) (2): Fp32GroupNorm(512, 512, eps=1e-05, affine=True) (3): GELU(approximate='none') ) (1-4): 4 x Sequential( (0): Conv1d(512, 512, kernel_size=(3,), stride=(2,), bias=False) (1): Dropout(p=0.0, inplace=False) (2): GELU(approximate='none') ) (5-6): 2 x Sequential( (0): Conv1d(512, 512, kernel_size=(2,), stride=(2,), bias=False) (1): Dropout(p=0.0, inplace=False) (2): GELU(approximate='none') ) ) ) (post_extract_proj): Linear(in_features=512, out_features=768, bias=True) (dropout_input): Dropout(p=0.1, inplace=False) (dropout_features): Dropout(p=0.1, inplace=False) (quantizer): GumbelVectorQuantizer( (weight_proj): Linear(in_features=512, out_features=640, bias=True) ) (project_q): Linear(in_features=256, out_features=256, bias=True) (encoder): TransformerEncoder( (pos_conv): Sequential( (0): Conv1d(768, 768, kernel_size=(128,), stride=(1,), padding=(64,), groups=16) (1): SamePad() (2): GELU(approximate='none') ) (layers): ModuleList( (0-11): 12 x TransformerSentenceEncoderLayer( (self_attn): MultiheadAttention( (dropout_module): FairseqDropout() (k_proj): Linear(in_features=768, out_features=768, bias=True) (v_proj): Linear(in_features=768, out_features=768, bias=True) (q_proj): Linear(in_features=768, out_features=768, bias=True) (out_proj): Linear(in_features=768, out_features=768, bias=True) ) (dropout1): Dropout(p=0.1, inplace=False) (dropout2): Dropout(p=0.0, inplace=False) (dropout3): Dropout(p=0.1, inplace=False) (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True) (fc1): Linear(in_features=768, out_features=3072, bias=True) (fc2): Linear(in_features=3072, out_features=768, bias=True) (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True) ) ) (layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True) ) (layer_norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True) (final_proj): Linear(in_features=768, out_features=256, bias=True) ) [2023-11-01 17:34:53,488][fairseq_cli.train][INFO] - task: AudioPretrainingTask [2023-11-01 17:34:53,488][fairseq_cli.train][INFO] - model: Wav2Vec2Model [2023-11-01 17:34:53,488][fairseq_cli.train][INFO] - criterion: Wav2vecCriterion [2023-11-01 17:34:53,490][fairseq_cli.train][INFO] - num. shared model params: 95,044,608 (num. trained: 95,044,608) [2023-11-01 17:34:53,492][fairseq_cli.train][INFO] - num. expert model params: 0 (num. trained: 0) [2023-11-01 17:34:53,531][fairseq.data.audio.raw_audio_dataset][INFO] - loaded 19923, skipped 1006 samples [2023-11-01 17:34:53,765][fairseq.trainer][INFO] - detected shared parameter: feature_extractor.conv_layers.0.0.bias <- feature_extractor.conv_layers.1.0.bias [2023-11-01 17:34:53,765][fairseq.trainer][INFO] - detected shared parameter: feature_extractor.conv_layers.0.0.bias <- feature_extractor.conv_layers.2.0.bias [2023-11-01 17:34:53,765][fairseq.trainer][INFO] - detected shared parameter: feature_extractor.conv_layers.0.0.bias <- feature_extractor.conv_layers.3.0.bias [2023-11-01 17:34:53,765][fairseq.trainer][INFO] - detected shared parameter: feature_extractor.conv_layers.0.0.bias <- feature_extractor.conv_layers.4.0.bias [2023-11-01 17:34:53,765][fairseq.trainer][INFO] - detected shared parameter: feature_extractor.conv_layers.0.0.bias <- feature_extractor.conv_layers.5.0.bias [2023-11-01 17:34:53,765][fairseq.trainer][INFO] - detected shared parameter: feature_extractor.conv_layers.0.0.bias <- feature_extractor.conv_layers.6.0.bias [2023-11-01 17:34:53,766][fairseq.utils][INFO] - ***********************CUDA enviroments for all 1 workers*********************** [2023-11-01 17:34:53,766][fairseq.utils][INFO] - rank 0: capabilities = 8.9 ; total memory = 23.649 GB ; name = NVIDIA GeForce RTX 4090 [2023-11-01 17:34:53,766][fairseq.utils][INFO] - ***********************CUDA enviroments for all 1 workers*********************** [2023-11-01 17:34:53,766][fairseq_cli.train][INFO] - training on 1 devices (GPUs/TPUs) [2023-11-01 17:34:53,766][fairseq_cli.train][INFO] - max tokens per device = 3000000 and max sentences per device = None [2023-11-01 17:34:53,768][fairseq.trainer][INFO] - Preparing to load checkpoint checkpoints/checkpoint_last.pt [2023-11-01 17:34:53,768][fairseq.trainer][INFO] - No existing checkpoint found checkpoints/checkpoint_last.pt [2023-11-01 17:34:53,768][fairseq.trainer][INFO] - loading train data for epoch 1 [2023-11-01 17:34:54,108][fairseq.data.audio.raw_audio_dataset][INFO] - loaded 179557, skipped 8702 samples [2023-11-01 17:34:54,122][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 17:34:54,122][fairseq.tasks.fairseq_task][INFO] - reuse_dataloader = True [2023-11-01 17:34:54,122][fairseq.tasks.fairseq_task][INFO] - rebuild_batches = True [2023-11-01 17:34:54,123][fairseq.tasks.fairseq_task][INFO] - batches will be rebuilt for each epoch [2023-11-01 17:34:54,123][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 1 [2023-11-01 17:34:54,592][fairseq_cli.train][INFO] - begin dry-run validation on "valid" subset [2023-11-01 17:34:54,593][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 17:34:54,594][fairseq.tasks.fairseq_task][INFO] - reuse_dataloader = True [2023-11-01 17:34:54,594][fairseq.tasks.fairseq_task][INFO] - rebuild_batches = True [2023-11-01 17:34:54,594][fairseq.tasks.fairseq_task][INFO] - batches will be rebuilt for each epoch [2023-11-01 17:34:54,594][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 1 [2023-11-01 17:34:58,298][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-01 17:35:07,819][fairseq.trainer][INFO] - begin training epoch 1 [2023-11-01 17:35:07,820][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-01 17:35:47,891][train_inner][INFO] - {"epoch": 1, "update": 0.049, "loss": "8.808", "ntokens": "3170.32", "nsentences": "44.16", "prob_perplexity": "355.944", "code_perplexity": "350.103", "temp": "1.999", "loss_0": "6.683", "loss_1": "0.064", "loss_2": "2.061", "accuracy": "0.01926", "wps": "16447.2", "ups": "5.18", "wpb": "3170.3", "bsz": "44.2", "num_updates": "200", "lr": "4e-06", "gnorm": "1.242", "loss_scale": "128", "train_wall": "39", "gb_free": "15.2", "wall": "54"} [2023-11-01 17:36:25,738][train_inner][INFO] - {"epoch": 1, "update": 0.099, "loss": "6.887", "ntokens": "3173.6", "nsentences": "44.36", "prob_perplexity": "550.144", "code_perplexity": "544.357", "temp": "1.997", "loss_0": "6.661", "loss_1": "0.02", "loss_2": "0.205", "accuracy": "0.01877", "wps": "16771.8", "ups": "5.28", "wpb": "3173.6", "bsz": "44.4", "num_updates": "400", "lr": "8e-06", "gnorm": "0.158", "loss_scale": "128", "train_wall": "37", "gb_free": "13.5", "wall": "92"} [2023-11-01 17:37:02,857][train_inner][INFO] - {"epoch": 1, "update": 0.148, "loss": "6.711", "ntokens": "3176.6", "nsentences": "42.36", "prob_perplexity": "601.043", "code_perplexity": "595.616", "temp": "1.995", "loss_0": "6.659", "loss_1": "0.009", "loss_2": "0.043", "accuracy": "0.01861", "wps": "17116.7", "ups": "5.39", "wpb": "3176.6", "bsz": "42.4", "num_updates": "600", "lr": "1.2e-05", "gnorm": "0.078", "loss_scale": "128", "train_wall": "36", "gb_free": "13.8", "wall": "129"} [2023-11-01 17:37:39,707][train_inner][INFO] - {"epoch": 1, "update": 0.197, "loss": "6.685", "ntokens": "3205.76", "nsentences": "44.4", "prob_perplexity": "608.632", "code_perplexity": "602.282", "temp": "1.993", "loss_0": "6.659", "loss_1": "0.007", "loss_2": "0.019", "accuracy": "0.01894", "wps": "17399.6", "ups": "5.43", "wpb": "3205.8", "bsz": "44.4", "num_updates": "800", "lr": "1.6e-05", "gnorm": "0.06", "loss_scale": "128", "train_wall": "36", "gb_free": "15.2", "wall": "166"} [2023-11-01 17:38:16,626][train_inner][INFO] - {"epoch": 1, "update": 0.247, "loss": "6.678", "ntokens": "3177.48", "nsentences": "44.28", "prob_perplexity": "608.005", "code_perplexity": "601.493", "temp": "1.991", "loss_0": "6.659", "loss_1": "0.007", "loss_2": "0.012", "accuracy": "0.01897", "wps": "17214.3", "ups": "5.42", "wpb": "3177.5", "bsz": "44.3", "num_updates": "1000", "lr": "2e-05", "gnorm": "0.052", "loss_scale": "128", "train_wall": "36", "gb_free": "14.4", "wall": "203"} [2023-11-01 17:38:52,874][train_inner][INFO] - {"epoch": 1, "update": 0.296, "loss": "6.675", "ntokens": "3172.72", "nsentences": "47.44", "prob_perplexity": "606.4", "code_perplexity": "600.049", "temp": "1.989", "loss_0": "6.658", "loss_1": "0.008", "loss_2": "0.009", "accuracy": "0.02006", "wps": "17506.6", "ups": "5.52", "wpb": "3172.7", "bsz": "47.4", "num_updates": "1200", "lr": "2.4e-05", "gnorm": "0.053", "loss_scale": "128", "train_wall": "36", "gb_free": "13.4", "wall": "239"} [2023-11-01 17:39:28,952][train_inner][INFO] - {"epoch": 1, "update": 0.345, "loss": "6.532", "ntokens": "3210.08", "nsentences": "43.64", "prob_perplexity": "452.773", "code_perplexity": "446.693", "temp": "1.987", "loss_0": "6.477", "loss_1": "0.042", "loss_2": "0.012", "accuracy": "0.03114", "wps": "17796.2", "ups": "5.54", "wpb": "3210.1", "bsz": "43.6", "num_updates": "1400", "lr": "2.8e-05", "gnorm": "0.614", "loss_scale": "128", "train_wall": "35", "gb_free": "13.5", "wall": "275"} [2023-11-01 17:40:04,779][train_inner][INFO] - {"epoch": 1, "update": 0.394, "loss": "6.124", "ntokens": "3193.32", "nsentences": "45.44", "prob_perplexity": "220.971", "code_perplexity": "218.03", "temp": "1.985", "loss_0": "6.009", "loss_1": "0.095", "loss_2": "0.02", "accuracy": "0.06907", "wps": "17827.4", "ups": "5.58", "wpb": "3193.3", "bsz": "45.4", "num_updates": "1600", "lr": "3.2e-05", "gnorm": "1.422", "loss_scale": "128", "train_wall": "35", "gb_free": "12.5", "wall": "311"} [2023-11-01 17:40:21,870][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 64.0 [2023-11-01 17:40:41,432][train_inner][INFO] - {"epoch": 1, "update": 0.444, "loss": "5.671", "ntokens": "3197.84", "nsentences": "45", "prob_perplexity": "78.439", "code_perplexity": "77.247", "temp": "1.983", "loss_0": "5.521", "loss_1": "0.126", "loss_2": "0.024", "accuracy": "0.16493", "wps": "17450.4", "ups": "5.46", "wpb": "3197.8", "bsz": "45", "num_updates": "1800", "lr": "3.6e-05", "gnorm": "1.981", "loss_scale": "64", "train_wall": "36", "gb_free": "13.1", "wall": "348"} [2023-11-01 17:41:17,174][train_inner][INFO] - {"epoch": 1, "update": 0.493, "loss": "5.311", "ntokens": "3198.2", "nsentences": "42.12", "prob_perplexity": "27.107", "code_perplexity": "26.92", "temp": "1.981", "loss_0": "5.147", "loss_1": "0.138", "loss_2": "0.025", "accuracy": "0.24659", "wps": "17897", "ups": "5.6", "wpb": "3198.2", "bsz": "42.1", "num_updates": "2000", "lr": "4e-05", "gnorm": "2.41", "loss_scale": "64", "train_wall": "35", "gb_free": "14.3", "wall": "383"} [2023-11-01 17:41:52,506][train_inner][INFO] - {"epoch": 1, "update": 0.543, "loss": "5.097", "ntokens": "3204.48", "nsentences": "42.64", "prob_perplexity": "22.843", "code_perplexity": "22.773", "temp": "1.979", "loss_0": "4.934", "loss_1": "0.139", "loss_2": "0.024", "accuracy": "0.28191", "wps": "18140.3", "ups": "5.66", "wpb": "3204.5", "bsz": "42.6", "num_updates": "2200", "lr": "4.4e-05", "gnorm": "2.379", "loss_scale": "64", "train_wall": "35", "gb_free": "13.6", "wall": "419"} [2023-11-01 17:42:28,501][train_inner][INFO] - {"epoch": 1, "update": 0.592, "loss": "4.956", "ntokens": "3159.88", "nsentences": "43.16", "prob_perplexity": "22.453", "code_perplexity": "22.416", "temp": "1.977", "loss_0": "4.793", "loss_1": "0.139", "loss_2": "0.024", "accuracy": "0.30069", "wps": "17558.6", "ups": "5.56", "wpb": "3159.9", "bsz": "43.2", "num_updates": "2400", "lr": "4.8e-05", "gnorm": "2.146", "loss_scale": "64", "train_wall": "35", "gb_free": "14.3", "wall": "455"} [2023-11-01 17:43:04,342][train_inner][INFO] - {"epoch": 1, "update": 0.641, "loss": "4.883", "ntokens": "3163.6", "nsentences": "42", "prob_perplexity": "22.402", "code_perplexity": "22.378", "temp": "1.975", "loss_0": "4.719", "loss_1": "0.139", "loss_2": "0.024", "accuracy": "0.3095", "wps": "17654.6", "ups": "5.58", "wpb": "3163.6", "bsz": "42", "num_updates": "2600", "lr": "5.2e-05", "gnorm": "2.091", "loss_scale": "64", "train_wall": "35", "gb_free": "12.8", "wall": "491"} [2023-11-01 17:43:40,719][train_inner][INFO] - {"epoch": 1, "update": 0.691, "loss": "4.736", "ntokens": "3200.48", "nsentences": "46.56", "prob_perplexity": "22.629", "code_perplexity": "22.609", "temp": "1.973", "loss_0": "4.573", "loss_1": "0.139", "loss_2": "0.024", "accuracy": "0.32784", "wps": "17597.2", "ups": "5.5", "wpb": "3200.5", "bsz": "46.6", "num_updates": "2800", "lr": "5.6e-05", "gnorm": "1.858", "loss_scale": "64", "train_wall": "36", "gb_free": "13.8", "wall": "527"} [2023-11-01 17:44:16,723][train_inner][INFO] - {"epoch": 1, "update": 0.74, "loss": "4.673", "ntokens": "3160.76", "nsentences": "46", "prob_perplexity": "22.727", "code_perplexity": "22.713", "temp": "1.971", "loss_0": "4.51", "loss_1": "0.139", "loss_2": "0.024", "accuracy": "0.33505", "wps": "17558.5", "ups": "5.56", "wpb": "3160.8", "bsz": "46", "num_updates": "3000", "lr": "6e-05", "gnorm": "1.775", "loss_scale": "64", "train_wall": "35", "gb_free": "12.9", "wall": "563"} [2023-11-01 17:44:52,673][train_inner][INFO] - {"epoch": 1, "update": 0.789, "loss": "4.703", "ntokens": "3206.56", "nsentences": "44.4", "prob_perplexity": "22.806", "code_perplexity": "22.797", "temp": "1.969", "loss_0": "4.539", "loss_1": "0.139", "loss_2": "0.025", "accuracy": "0.32873", "wps": "17840.4", "ups": "5.56", "wpb": "3206.6", "bsz": "44.4", "num_updates": "3200", "lr": "6.4e-05", "gnorm": "1.684", "loss_scale": "64", "train_wall": "35", "gb_free": "14.4", "wall": "599"} [2023-11-01 17:45:28,680][train_inner][INFO] - {"epoch": 1, "update": 0.839, "loss": "4.65", "ntokens": "3180.36", "nsentences": "43.36", "prob_perplexity": "22.834", "code_perplexity": "22.824", "temp": "1.967", "loss_0": "4.485", "loss_1": "0.139", "loss_2": "0.026", "accuracy": "0.33528", "wps": "17666", "ups": "5.55", "wpb": "3180.4", "bsz": "43.4", "num_updates": "3400", "lr": "6.8e-05", "gnorm": "1.597", "loss_scale": "64", "train_wall": "35", "gb_free": "12.8", "wall": "635"} [2023-11-01 17:46:04,263][train_inner][INFO] - {"epoch": 1, "update": 0.888, "loss": "4.632", "ntokens": "3190.6", "nsentences": "43.64", "prob_perplexity": "22.833", "code_perplexity": "22.824", "temp": "1.965", "loss_0": "4.466", "loss_1": "0.139", "loss_2": "0.027", "accuracy": "0.33958", "wps": "17934.6", "ups": "5.62", "wpb": "3190.6", "bsz": "43.6", "num_updates": "3600", "lr": "7.2e-05", "gnorm": "1.499", "loss_scale": "64", "train_wall": "35", "gb_free": "13.1", "wall": "670"} [2023-11-01 17:46:40,158][train_inner][INFO] - {"epoch": 1, "update": 0.937, "loss": "4.692", "ntokens": "3250.76", "nsentences": "42.28", "prob_perplexity": "22.874", "code_perplexity": "22.865", "temp": "1.963", "loss_0": "4.526", "loss_1": "0.139", "loss_2": "0.027", "accuracy": "0.32826", "wps": "18113.5", "ups": "5.57", "wpb": "3250.8", "bsz": "42.3", "num_updates": "3800", "lr": "7.6e-05", "gnorm": "1.409", "loss_scale": "64", "train_wall": "35", "gb_free": "12.9", "wall": "706"} [2023-11-01 17:47:15,387][train_inner][INFO] - {"epoch": 1, "update": 0.986, "loss": "4.512", "ntokens": "3164.88", "nsentences": "47", "prob_perplexity": "22.887", "code_perplexity": "22.879", "temp": "1.961", "loss_0": "4.345", "loss_1": "0.139", "loss_2": "0.028", "accuracy": "0.35836", "wps": "17969.1", "ups": "5.68", "wpb": "3164.9", "bsz": "47", "num_updates": "4000", "lr": "8e-05", "gnorm": "1.405", "loss_scale": "64", "train_wall": "35", "gb_free": "13.5", "wall": "742"} [2023-11-01 17:47:24,914][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-01 17:47:24,915][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 17:47:24,934][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 2 [2023-11-01 17:47:50,717][valid][INFO] - {"epoch": 1, "valid_loss": "4.436", "valid_ntokens": "3157.11", "valid_nsentences": "44.1685", "valid_prob_perplexity": "23.05", "valid_code_perplexity": "23.044", "valid_temp": "1.96", "valid_loss_0": "4.268", "valid_loss_1": "0.139", "valid_loss_2": "0.029", "valid_accuracy": "0.37156", "valid_wps": "55464.6", "valid_wpb": "3157.1", "valid_bsz": "44.2", "valid_num_updates": "4055"} [2023-11-01 17:47:50,720][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 1 @ 4055 updates [2023-11-01 17:47:50,721][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-01 17:47:52,099][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-01 17:47:52,649][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_best.pt (epoch 1 @ 4055 updates, score 4.436) (writing took 1.9294737577438354 seconds) [2023-11-01 17:47:52,650][fairseq_cli.train][INFO] - end of epoch 1 (average epoch stats below) [2023-11-01 17:47:52,658][train][INFO] - {"epoch": 1, "train_loss": "5.662", "train_ntokens": "3187.04", "train_nsentences": "44.2614", "train_prob_perplexity": "214.205", "train_code_perplexity": "211.893", "train_temp": "1.98", "train_loss_0": "5.434", "train_loss_1": "0.096", "train_loss_2": "0.132", "train_accuracy": "0.19615", "train_wps": "16930.8", "train_ups": "5.31", "train_wpb": "3187", "train_bsz": "44.3", "train_num_updates": "4055", "train_lr": "8.11e-05", "train_gnorm": "1.296", "train_loss_scale": "64", "train_train_wall": "724", "train_gb_free": "13.2", "train_wall": "779"} [2023-11-01 17:47:52,661][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 17:47:52,685][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 2 [2023-11-01 17:47:52,857][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-01 17:47:52,877][fairseq.trainer][INFO] - begin training epoch 2 [2023-11-01 17:47:52,877][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-01 17:48:19,070][train_inner][INFO] - {"epoch": 2, "update": 1.036, "loss": "4.535", "ntokens": "3175", "nsentences": "43.4", "prob_perplexity": "22.997", "code_perplexity": "22.988", "temp": "1.959", "loss_0": "4.368", "loss_1": "0.139", "loss_2": "0.028", "accuracy": "0.35353", "wps": "9971.6", "ups": "3.14", "wpb": "3175", "bsz": "43.4", "num_updates": "4200", "lr": "8.4e-05", "gnorm": "1.324", "loss_scale": "64", "train_wall": "35", "gb_free": "13.9", "wall": "805"} [2023-11-01 17:48:54,735][train_inner][INFO] - {"epoch": 2, "update": 1.085, "loss": "4.528", "ntokens": "3192.6", "nsentences": "44", "prob_perplexity": "22.83", "code_perplexity": "22.82", "temp": "1.957", "loss_0": "4.36", "loss_1": "0.139", "loss_2": "0.028", "accuracy": "0.35695", "wps": "17904.2", "ups": "5.61", "wpb": "3192.6", "bsz": "44", "num_updates": "4400", "lr": "8.8e-05", "gnorm": "1.288", "loss_scale": "64", "train_wall": "35", "gb_free": "14.1", "wall": "841"} [2023-11-01 17:49:30,283][train_inner][INFO] - {"epoch": 2, "update": 1.134, "loss": "4.496", "ntokens": "3198.12", "nsentences": "44.8", "prob_perplexity": "23.342", "code_perplexity": "23.329", "temp": "1.956", "loss_0": "4.328", "loss_1": "0.139", "loss_2": "0.029", "accuracy": "0.36086", "wps": "17994.1", "ups": "5.63", "wpb": "3198.1", "bsz": "44.8", "num_updates": "4600", "lr": "9.2e-05", "gnorm": "1.217", "loss_scale": "64", "train_wall": "35", "gb_free": "14.2", "wall": "877"} [2023-11-01 17:50:05,842][train_inner][INFO] - {"epoch": 2, "update": 1.184, "loss": "4.481", "ntokens": "3212.52", "nsentences": "44.12", "prob_perplexity": "23.235", "code_perplexity": "23.226", "temp": "1.954", "loss_0": "4.312", "loss_1": "0.139", "loss_2": "0.03", "accuracy": "0.36344", "wps": "18069.9", "ups": "5.62", "wpb": "3212.5", "bsz": "44.1", "num_updates": "4800", "lr": "9.6e-05", "gnorm": "1.203", "loss_scale": "64", "train_wall": "35", "gb_free": "13.1", "wall": "912"} [2023-11-01 17:50:41,527][train_inner][INFO] - {"epoch": 2, "update": 1.233, "loss": "4.351", "ntokens": "3183.88", "nsentences": "48.24", "prob_perplexity": "23.635", "code_perplexity": "23.622", "temp": "1.952", "loss_0": "4.181", "loss_1": "0.139", "loss_2": "0.031", "accuracy": "0.38173", "wps": "17845.6", "ups": "5.6", "wpb": "3183.9", "bsz": "48.2", "num_updates": "5000", "lr": "0.0001", "gnorm": "1.134", "loss_scale": "64", "train_wall": "35", "gb_free": "13.5", "wall": "948"} [2023-11-01 17:51:17,290][train_inner][INFO] - {"epoch": 2, "update": 1.282, "loss": "4.4", "ntokens": "3182.76", "nsentences": "44.64", "prob_perplexity": "23.977", "code_perplexity": "23.964", "temp": "1.95", "loss_0": "4.231", "loss_1": "0.139", "loss_2": "0.03", "accuracy": "0.37046", "wps": "17800.3", "ups": "5.59", "wpb": "3182.8", "bsz": "44.6", "num_updates": "5200", "lr": "9.99494e-05", "gnorm": "1.084", "loss_scale": "64", "train_wall": "35", "gb_free": "14.5", "wall": "984"} [2023-11-01 17:51:52,296][train_inner][INFO] - {"epoch": 2, "update": 1.332, "loss": "4.396", "ntokens": "3202.8", "nsentences": "43.12", "prob_perplexity": "23.918", "code_perplexity": "23.908", "temp": "1.948", "loss_0": "4.227", "loss_1": "0.139", "loss_2": "0.03", "accuracy": "0.37428", "wps": "18299.7", "ups": "5.71", "wpb": "3202.8", "bsz": "43.1", "num_updates": "5400", "lr": "9.98987e-05", "gnorm": "1.067", "loss_scale": "64", "train_wall": "34", "gb_free": "13.7", "wall": "1019"} [2023-11-01 17:52:28,016][train_inner][INFO] - {"epoch": 2, "update": 1.381, "loss": "4.353", "ntokens": "3200.32", "nsentences": "44.8", "prob_perplexity": "24.457", "code_perplexity": "24.446", "temp": "1.946", "loss_0": "4.184", "loss_1": "0.139", "loss_2": "0.03", "accuracy": "0.37671", "wps": "17919.9", "ups": "5.6", "wpb": "3200.3", "bsz": "44.8", "num_updates": "5600", "lr": "9.98481e-05", "gnorm": "1.014", "loss_scale": "64", "train_wall": "35", "gb_free": "14.2", "wall": "1054"} [2023-11-01 17:53:03,835][train_inner][INFO] - {"epoch": 2, "update": 1.43, "loss": "4.421", "ntokens": "3205.24", "nsentences": "42.64", "prob_perplexity": "24.288", "code_perplexity": "24.278", "temp": "1.944", "loss_0": "4.252", "loss_1": "0.139", "loss_2": "0.03", "accuracy": "0.3664", "wps": "17898.1", "ups": "5.58", "wpb": "3205.2", "bsz": "42.6", "num_updates": "5800", "lr": "9.97975e-05", "gnorm": "1.034", "loss_scale": "64", "train_wall": "35", "gb_free": "13.7", "wall": "1090"} [2023-11-01 17:53:39,267][train_inner][INFO] - {"epoch": 2, "update": 1.48, "loss": "4.326", "ntokens": "3171.4", "nsentences": "43.04", "prob_perplexity": "24.185", "code_perplexity": "24.176", "temp": "1.942", "loss_0": "4.157", "loss_1": "0.139", "loss_2": "0.03", "accuracy": "0.38049", "wps": "17902.3", "ups": "5.64", "wpb": "3171.4", "bsz": "43", "num_updates": "6000", "lr": "9.97468e-05", "gnorm": "0.997", "loss_scale": "64", "train_wall": "35", "gb_free": "13", "wall": "1125"} [2023-11-01 17:54:14,423][train_inner][INFO] - {"epoch": 2, "update": 1.529, "loss": "4.281", "ntokens": "3168.24", "nsentences": "45.72", "prob_perplexity": "24.186", "code_perplexity": "24.176", "temp": "1.94", "loss_0": "4.113", "loss_1": "0.139", "loss_2": "0.03", "accuracy": "0.38967", "wps": "18025.5", "ups": "5.69", "wpb": "3168.2", "bsz": "45.7", "num_updates": "6200", "lr": "9.96962e-05", "gnorm": "0.984", "loss_scale": "64", "train_wall": "35", "gb_free": "14.1", "wall": "1161"} [2023-11-01 17:54:50,159][train_inner][INFO] - {"epoch": 2, "update": 1.578, "loss": "4.273", "ntokens": "3214.76", "nsentences": "44.44", "prob_perplexity": "24.478", "code_perplexity": "24.47", "temp": "1.938", "loss_0": "4.105", "loss_1": "0.139", "loss_2": "0.03", "accuracy": "0.38727", "wps": "17992.5", "ups": "5.6", "wpb": "3214.8", "bsz": "44.4", "num_updates": "6400", "lr": "9.96456e-05", "gnorm": "0.933", "loss_scale": "64", "train_wall": "35", "gb_free": "12.9", "wall": "1196"} [2023-11-01 17:55:25,731][train_inner][INFO] - {"epoch": 2, "update": 1.627, "loss": "4.242", "ntokens": "3211.52", "nsentences": "46.8", "prob_perplexity": "24.556", "code_perplexity": "24.545", "temp": "1.936", "loss_0": "4.074", "loss_1": "0.139", "loss_2": "0.03", "accuracy": "0.39172", "wps": "18057.8", "ups": "5.62", "wpb": "3211.5", "bsz": "46.8", "num_updates": "6600", "lr": "9.95949e-05", "gnorm": "0.911", "loss_scale": "64", "train_wall": "35", "gb_free": "13.6", "wall": "1232"} [2023-11-01 17:56:01,287][train_inner][INFO] - {"epoch": 2, "update": 1.677, "loss": "4.287", "ntokens": "3210.36", "nsentences": "41.8", "prob_perplexity": "24.464", "code_perplexity": "24.453", "temp": "1.934", "loss_0": "4.118", "loss_1": "0.139", "loss_2": "0.03", "accuracy": "0.38267", "wps": "18059", "ups": "5.63", "wpb": "3210.4", "bsz": "41.8", "num_updates": "6800", "lr": "9.95443e-05", "gnorm": "0.91", "loss_scale": "64", "train_wall": "35", "gb_free": "14.5", "wall": "1268"} [2023-11-01 17:56:36,793][train_inner][INFO] - {"epoch": 2, "update": 1.726, "loss": "4.196", "ntokens": "3203.6", "nsentences": "45.28", "prob_perplexity": "24.634", "code_perplexity": "24.623", "temp": "1.932", "loss_0": "4.027", "loss_1": "0.139", "loss_2": "0.03", "accuracy": "0.3987", "wps": "18046.3", "ups": "5.63", "wpb": "3203.6", "bsz": "45.3", "num_updates": "7000", "lr": "9.94937e-05", "gnorm": "0.881", "loss_scale": "64", "train_wall": "35", "gb_free": "13.4", "wall": "1303"} [2023-11-01 17:57:12,036][train_inner][INFO] - {"epoch": 2, "update": 1.775, "loss": "4.203", "ntokens": "3163.24", "nsentences": "43.68", "prob_perplexity": "24.736", "code_perplexity": "24.724", "temp": "1.93", "loss_0": "4.035", "loss_1": "0.139", "loss_2": "0.029", "accuracy": "0.3953", "wps": "17952.2", "ups": "5.68", "wpb": "3163.2", "bsz": "43.7", "num_updates": "7200", "lr": "9.9443e-05", "gnorm": "0.874", "loss_scale": "64", "train_wall": "35", "gb_free": "15.8", "wall": "1338"} [2023-11-01 17:57:47,925][train_inner][INFO] - {"epoch": 2, "update": 1.825, "loss": "4.232", "ntokens": "3188.48", "nsentences": "42.76", "prob_perplexity": "25.013", "code_perplexity": "25.003", "temp": "1.928", "loss_0": "4.063", "loss_1": "0.139", "loss_2": "0.03", "accuracy": "0.3908", "wps": "17769.7", "ups": "5.57", "wpb": "3188.5", "bsz": "42.8", "num_updates": "7400", "lr": "9.93924e-05", "gnorm": "0.854", "loss_scale": "64", "train_wall": "35", "gb_free": "14.8", "wall": "1374"} [2023-11-01 17:58:23,819][train_inner][INFO] - {"epoch": 2, "update": 1.874, "loss": "4.189", "ntokens": "3186.48", "nsentences": "44.44", "prob_perplexity": "25.191", "code_perplexity": "25.18", "temp": "1.926", "loss_0": "4.02", "loss_1": "0.139", "loss_2": "0.03", "accuracy": "0.3982", "wps": "17756.1", "ups": "5.57", "wpb": "3186.5", "bsz": "44.4", "num_updates": "7600", "lr": "9.93418e-05", "gnorm": "0.859", "loss_scale": "64", "train_wall": "35", "gb_free": "14", "wall": "1410"} [2023-11-01 17:58:59,633][train_inner][INFO] - {"epoch": 2, "update": 1.923, "loss": "4.186", "ntokens": "3173.32", "nsentences": "43.68", "prob_perplexity": "25.214", "code_perplexity": "25.202", "temp": "1.924", "loss_0": "4.018", "loss_1": "0.139", "loss_2": "0.03", "accuracy": "0.39723", "wps": "17722.4", "ups": "5.58", "wpb": "3173.3", "bsz": "43.7", "num_updates": "7800", "lr": "9.92911e-05", "gnorm": "0.848", "loss_scale": "64", "train_wall": "35", "gb_free": "13.4", "wall": "1446"} [2023-11-01 17:59:35,120][train_inner][INFO] - {"epoch": 2, "update": 1.973, "loss": "4.233", "ntokens": "3201.28", "nsentences": "43.08", "prob_perplexity": "25.779", "code_perplexity": "25.767", "temp": "1.923", "loss_0": "4.065", "loss_1": "0.138", "loss_2": "0.03", "accuracy": "0.3891", "wps": "18042.8", "ups": "5.64", "wpb": "3201.3", "bsz": "43.1", "num_updates": "8000", "lr": "9.92405e-05", "gnorm": "0.838", "loss_scale": "64", "train_wall": "35", "gb_free": "14.9", "wall": "1481"} [2023-11-01 17:59:54,627][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-01 17:59:54,629][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 17:59:54,646][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 3 [2023-11-01 18:00:20,225][valid][INFO] - {"epoch": 2, "valid_loss": "4.06", "valid_ntokens": "3149.53", "valid_nsentences": "44.1685", "valid_prob_perplexity": "25.959", "valid_code_perplexity": "25.948", "valid_temp": "1.921", "valid_loss_0": "3.893", "valid_loss_1": "0.138", "valid_loss_2": "0.029", "valid_accuracy": "0.41701", "valid_wps": "55806.8", "valid_wpb": "3149.5", "valid_bsz": "44.2", "valid_num_updates": "8111", "valid_best_loss": "4.06"} [2023-11-01 18:00:20,227][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 2 @ 8111 updates [2023-11-01 18:00:20,229][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-01 18:00:21,748][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-01 18:00:22,736][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_best.pt (epoch 2 @ 8111 updates, score 4.06) (writing took 2.5089683937840164 seconds) [2023-11-01 18:00:22,737][fairseq_cli.train][INFO] - end of epoch 2 (average epoch stats below) [2023-11-01 18:00:22,739][train][INFO] - {"epoch": 2, "train_loss": "4.324", "train_ntokens": "3193.48", "train_nsentences": "44.2682", "train_prob_perplexity": "24.331", "train_code_perplexity": "24.321", "train_temp": "1.94", "train_loss_0": "4.155", "train_loss_1": "0.139", "train_loss_2": "0.03", "train_accuracy": "0.38091", "train_wps": "17268.5", "train_ups": "5.41", "train_wpb": "3193.5", "train_bsz": "44.3", "train_num_updates": "8111", "train_lr": "9.92124e-05", "train_gnorm": "1.003", "train_loss_scale": "64", "train_train_wall": "709", "train_gb_free": "13.7", "train_wall": "1529"} [2023-11-01 18:00:22,742][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 18:00:22,765][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 3 [2023-11-01 18:00:22,963][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-01 18:00:22,991][fairseq.trainer][INFO] - begin training epoch 3 [2023-11-01 18:00:22,992][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-01 18:00:38,886][train_inner][INFO] - {"epoch": 3, "update": 2.022, "loss": "4.164", "ntokens": "3212.88", "nsentences": "46.4", "prob_perplexity": "26.195", "code_perplexity": "26.183", "temp": "1.921", "loss_0": "3.996", "loss_1": "0.138", "loss_2": "0.029", "accuracy": "0.39837", "wps": "10077.5", "ups": "3.14", "wpb": "3212.9", "bsz": "46.4", "num_updates": "8200", "lr": "9.91899e-05", "gnorm": "0.813", "loss_scale": "64", "train_wall": "35", "gb_free": "13.6", "wall": "1545"} [2023-11-01 18:01:14,175][train_inner][INFO] - {"epoch": 3, "update": 2.071, "loss": "4.199", "ntokens": "3237.8", "nsentences": "45.48", "prob_perplexity": "26.221", "code_perplexity": "26.21", "temp": "1.919", "loss_0": "4.032", "loss_1": "0.138", "loss_2": "0.029", "accuracy": "0.3908", "wps": "18351.2", "ups": "5.67", "wpb": "3237.8", "bsz": "45.5", "num_updates": "8400", "lr": "9.91392e-05", "gnorm": "0.807", "loss_scale": "64", "train_wall": "35", "gb_free": "13.5", "wall": "1580"} [2023-11-01 18:01:49,444][train_inner][INFO] - {"epoch": 3, "update": 2.121, "loss": "4.168", "ntokens": "3198.04", "nsentences": "45.76", "prob_perplexity": "26.721", "code_perplexity": "26.711", "temp": "1.917", "loss_0": "4.002", "loss_1": "0.138", "loss_2": "0.028", "accuracy": "0.39375", "wps": "18136.3", "ups": "5.67", "wpb": "3198", "bsz": "45.8", "num_updates": "8600", "lr": "9.90886e-05", "gnorm": "0.801", "loss_scale": "64", "train_wall": "35", "gb_free": "14.6", "wall": "1616"} [2023-11-01 18:02:24,858][train_inner][INFO] - {"epoch": 3, "update": 2.17, "loss": "4.201", "ntokens": "3221.52", "nsentences": "44.12", "prob_perplexity": "26.693", "code_perplexity": "26.683", "temp": "1.915", "loss_0": "4.034", "loss_1": "0.138", "loss_2": "0.029", "accuracy": "0.38834", "wps": "18194.8", "ups": "5.65", "wpb": "3221.5", "bsz": "44.1", "num_updates": "8800", "lr": "9.9038e-05", "gnorm": "0.789", "loss_scale": "64", "train_wall": "35", "gb_free": "13.4", "wall": "1651"} [2023-11-01 18:03:00,153][train_inner][INFO] - {"epoch": 3, "update": 2.219, "loss": "4.128", "ntokens": "3179.2", "nsentences": "44.56", "prob_perplexity": "26.681", "code_perplexity": "26.667", "temp": "1.913", "loss_0": "3.961", "loss_1": "0.138", "loss_2": "0.029", "accuracy": "0.3999", "wps": "18016.4", "ups": "5.67", "wpb": "3179.2", "bsz": "44.6", "num_updates": "9000", "lr": "9.89873e-05", "gnorm": "0.791", "loss_scale": "64", "train_wall": "35", "gb_free": "12.9", "wall": "1686"} [2023-11-01 18:03:35,719][train_inner][INFO] - {"epoch": 3, "update": 2.268, "loss": "4.149", "ntokens": "3195.44", "nsentences": "44.44", "prob_perplexity": "26.917", "code_perplexity": "26.906", "temp": "1.911", "loss_0": "3.983", "loss_1": "0.138", "loss_2": "0.028", "accuracy": "0.39597", "wps": "17969.8", "ups": "5.62", "wpb": "3195.4", "bsz": "44.4", "num_updates": "9200", "lr": "9.89367e-05", "gnorm": "0.781", "loss_scale": "64", "train_wall": "35", "gb_free": "14.5", "wall": "1722"} [2023-11-01 18:04:10,996][train_inner][INFO] - {"epoch": 3, "update": 2.318, "loss": "4.239", "ntokens": "3166", "nsentences": "41", "prob_perplexity": "27.186", "code_perplexity": "27.174", "temp": "1.909", "loss_0": "4.073", "loss_1": "0.138", "loss_2": "0.028", "accuracy": "0.38175", "wps": "17950.4", "ups": "5.67", "wpb": "3166", "bsz": "41", "num_updates": "9400", "lr": "9.88861e-05", "gnorm": "0.808", "loss_scale": "64", "train_wall": "35", "gb_free": "14.1", "wall": "1757"} [2023-11-01 18:04:46,397][train_inner][INFO] - {"epoch": 3, "update": 2.367, "loss": "4.151", "ntokens": "3213.64", "nsentences": "45.12", "prob_perplexity": "27.129", "code_perplexity": "27.116", "temp": "1.907", "loss_0": "3.985", "loss_1": "0.138", "loss_2": "0.028", "accuracy": "0.39457", "wps": "18156.8", "ups": "5.65", "wpb": "3213.6", "bsz": "45.1", "num_updates": "9600", "lr": "9.88354e-05", "gnorm": "0.769", "loss_scale": "64", "train_wall": "35", "gb_free": "13.7", "wall": "1793"} [2023-11-01 18:05:22,125][train_inner][INFO] - {"epoch": 3, "update": 2.416, "loss": "4.172", "ntokens": "3199.64", "nsentences": "41.92", "prob_perplexity": "27.654", "code_perplexity": "27.641", "temp": "1.905", "loss_0": "4.006", "loss_1": "0.138", "loss_2": "0.028", "accuracy": "0.38836", "wps": "17912.1", "ups": "5.6", "wpb": "3199.6", "bsz": "41.9", "num_updates": "9800", "lr": "9.87848e-05", "gnorm": "0.76", "loss_scale": "64", "train_wall": "35", "gb_free": "13.2", "wall": "1828"} [2023-11-01 18:05:58,210][train_inner][INFO] - {"epoch": 3, "update": 2.466, "loss": "4.11", "ntokens": "3161.64", "nsentences": "43.88", "prob_perplexity": "27.806", "code_perplexity": "27.793", "temp": "1.903", "loss_0": "3.944", "loss_1": "0.138", "loss_2": "0.028", "accuracy": "0.39876", "wps": "17524.6", "ups": "5.54", "wpb": "3161.6", "bsz": "43.9", "num_updates": "10000", "lr": "9.87342e-05", "gnorm": "0.769", "loss_scale": "64", "train_wall": "35", "gb_free": "13.6", "wall": "1864"} [2023-11-01 18:06:34,022][train_inner][INFO] - {"epoch": 3, "update": 2.515, "loss": "4.044", "ntokens": "3170.84", "nsentences": "47.28", "prob_perplexity": "27.876", "code_perplexity": "27.864", "temp": "1.902", "loss_0": "3.878", "loss_1": "0.138", "loss_2": "0.027", "accuracy": "0.41124", "wps": "17709.4", "ups": "5.59", "wpb": "3170.8", "bsz": "47.3", "num_updates": "10200", "lr": "9.86835e-05", "gnorm": "0.757", "loss_scale": "64", "train_wall": "35", "gb_free": "13.6", "wall": "1900"} [2023-11-01 18:07:09,613][train_inner][INFO] - {"epoch": 3, "update": 2.564, "loss": "4.061", "ntokens": "3190.52", "nsentences": "45.84", "prob_perplexity": "28.045", "code_perplexity": "28.031", "temp": "1.9", "loss_0": "3.895", "loss_1": "0.138", "loss_2": "0.028", "accuracy": "0.407", "wps": "17930.2", "ups": "5.62", "wpb": "3190.5", "bsz": "45.8", "num_updates": "10400", "lr": "9.86329e-05", "gnorm": "0.765", "loss_scale": "64", "train_wall": "35", "gb_free": "13.9", "wall": "1936"} [2023-11-01 18:07:45,309][train_inner][INFO] - {"epoch": 3, "update": 2.614, "loss": "4.11", "ntokens": "3165.64", "nsentences": "43.48", "prob_perplexity": "27.832", "code_perplexity": "27.823", "temp": "1.898", "loss_0": "3.944", "loss_1": "0.138", "loss_2": "0.028", "accuracy": "0.39989", "wps": "17737.4", "ups": "5.6", "wpb": "3165.6", "bsz": "43.5", "num_updates": "10600", "lr": "9.85823e-05", "gnorm": "0.759", "loss_scale": "64", "train_wall": "35", "gb_free": "13.3", "wall": "1972"} [2023-11-01 18:08:21,081][train_inner][INFO] - {"epoch": 3, "update": 2.663, "loss": "4.113", "ntokens": "3223.48", "nsentences": "43.76", "prob_perplexity": "27.967", "code_perplexity": "27.956", "temp": "1.896", "loss_0": "3.946", "loss_1": "0.138", "loss_2": "0.029", "accuracy": "0.39821", "wps": "18023.5", "ups": "5.59", "wpb": "3223.5", "bsz": "43.8", "num_updates": "10800", "lr": "9.85316e-05", "gnorm": "0.75", "loss_scale": "64", "train_wall": "35", "gb_free": "13.7", "wall": "2007"} [2023-11-01 18:08:56,757][train_inner][INFO] - {"epoch": 3, "update": 2.712, "loss": "4.11", "ntokens": "3173.24", "nsentences": "41.88", "prob_perplexity": "28.014", "code_perplexity": "28.002", "temp": "1.894", "loss_0": "3.943", "loss_1": "0.138", "loss_2": "0.028", "accuracy": "0.39913", "wps": "17790.4", "ups": "5.61", "wpb": "3173.2", "bsz": "41.9", "num_updates": "11000", "lr": "9.8481e-05", "gnorm": "0.758", "loss_scale": "64", "train_wall": "35", "gb_free": "14.3", "wall": "2043"} [2023-11-01 18:09:32,461][train_inner][INFO] - {"epoch": 3, "update": 2.762, "loss": "4.052", "ntokens": "3192", "nsentences": "44.88", "prob_perplexity": "28.268", "code_perplexity": "28.259", "temp": "1.892", "loss_0": "3.886", "loss_1": "0.138", "loss_2": "0.028", "accuracy": "0.40828", "wps": "17881.3", "ups": "5.6", "wpb": "3192", "bsz": "44.9", "num_updates": "11200", "lr": "9.84304e-05", "gnorm": "0.754", "loss_scale": "64", "train_wall": "35", "gb_free": "14.1", "wall": "2079"} [2023-11-01 18:10:07,860][train_inner][INFO] - {"epoch": 3, "update": 2.811, "loss": "4.109", "ntokens": "3211.68", "nsentences": "43.08", "prob_perplexity": "28.399", "code_perplexity": "28.388", "temp": "1.89", "loss_0": "3.943", "loss_1": "0.138", "loss_2": "0.028", "accuracy": "0.39786", "wps": "18146.7", "ups": "5.65", "wpb": "3211.7", "bsz": "43.1", "num_updates": "11400", "lr": "9.83797e-05", "gnorm": "0.742", "loss_scale": "64", "train_wall": "35", "gb_free": "14.3", "wall": "2114"} [2023-11-01 18:10:43,903][train_inner][INFO] - {"epoch": 3, "update": 2.86, "loss": "4.068", "ntokens": "3211.68", "nsentences": "43.44", "prob_perplexity": "28.499", "code_perplexity": "28.488", "temp": "1.888", "loss_0": "3.902", "loss_1": "0.138", "loss_2": "0.028", "accuracy": "0.40332", "wps": "17822.1", "ups": "5.55", "wpb": "3211.7", "bsz": "43.4", "num_updates": "11600", "lr": "9.83291e-05", "gnorm": "0.723", "loss_scale": "64", "train_wall": "35", "gb_free": "13.4", "wall": "2150"} [2023-11-01 18:11:19,409][train_inner][INFO] - {"epoch": 3, "update": 2.91, "loss": "4.016", "ntokens": "3137.52", "nsentences": "45.8", "prob_perplexity": "28.542", "code_perplexity": "28.531", "temp": "1.886", "loss_0": "3.851", "loss_1": "0.138", "loss_2": "0.028", "accuracy": "0.41456", "wps": "17674.3", "ups": "5.63", "wpb": "3137.5", "bsz": "45.8", "num_updates": "11800", "lr": "9.82785e-05", "gnorm": "0.755", "loss_scale": "64", "train_wall": "35", "gb_free": "12.8", "wall": "2186"} [2023-11-01 18:11:55,413][train_inner][INFO] - {"epoch": 3, "update": 2.959, "loss": "4.036", "ntokens": "3213.56", "nsentences": "45.76", "prob_perplexity": "28.489", "code_perplexity": "28.475", "temp": "1.884", "loss_0": "3.871", "loss_1": "0.138", "loss_2": "0.028", "accuracy": "0.41042", "wps": "17852.6", "ups": "5.56", "wpb": "3213.6", "bsz": "45.8", "num_updates": "12000", "lr": "9.82278e-05", "gnorm": "0.72", "loss_scale": "64", "train_wall": "35", "gb_free": "13.7", "wall": "2222"} [2023-11-01 18:12:25,226][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-01 18:12:25,227][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 18:12:25,246][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 4 [2023-11-01 18:12:50,699][valid][INFO] - {"epoch": 3, "valid_loss": "3.974", "valid_ntokens": "3170.06", "valid_nsentences": "44.1685", "valid_prob_perplexity": "28.372", "valid_code_perplexity": "28.367", "valid_temp": "1.882", "valid_loss_0": "3.809", "valid_loss_1": "0.138", "valid_loss_2": "0.027", "valid_accuracy": "0.42048", "valid_wps": "56459.2", "valid_wpb": "3170.1", "valid_bsz": "44.2", "valid_num_updates": "12167", "valid_best_loss": "3.974"} [2023-11-01 18:12:50,701][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 3 @ 12167 updates [2023-11-01 18:12:50,703][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-01 18:12:52,130][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-01 18:12:53,099][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_best.pt (epoch 3 @ 12167 updates, score 3.974) (writing took 2.397829449735582 seconds) [2023-11-01 18:12:53,100][fairseq_cli.train][INFO] - end of epoch 3 (average epoch stats below) [2023-11-01 18:12:53,102][train][INFO] - {"epoch": 3, "train_loss": "4.119", "train_ntokens": "3193.14", "train_nsentences": "44.2682", "train_prob_perplexity": "27.637", "train_code_perplexity": "27.626", "train_temp": "1.901", "train_loss_0": "3.952", "train_loss_1": "0.138", "train_loss_2": "0.028", "train_accuracy": "0.39894", "train_wps": "17260.2", "train_ups": "5.41", "train_wpb": "3193.1", "train_bsz": "44.3", "train_num_updates": "12167", "train_lr": "9.81856e-05", "train_gnorm": "0.766", "train_loss_scale": "64", "train_train_wall": "709", "train_gb_free": "14.3", "train_wall": "2279"} [2023-11-01 18:12:53,104][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 18:12:53,124][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 4 [2023-11-01 18:12:53,319][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-01 18:12:53,337][fairseq.trainer][INFO] - begin training epoch 4 [2023-11-01 18:12:53,338][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-01 18:12:59,364][train_inner][INFO] - {"epoch": 4, "update": 3.008, "loss": "4.066", "ntokens": "3179.84", "nsentences": "44.16", "prob_perplexity": "28.681", "code_perplexity": "28.674", "temp": "1.883", "loss_0": "3.901", "loss_1": "0.138", "loss_2": "0.028", "accuracy": "0.40379", "wps": "9947.2", "ups": "3.13", "wpb": "3179.8", "bsz": "44.2", "num_updates": "12200", "lr": "9.81772e-05", "gnorm": "0.735", "loss_scale": "64", "train_wall": "35", "gb_free": "13.8", "wall": "2286"} [2023-11-01 18:13:34,925][train_inner][INFO] - {"epoch": 4, "update": 3.057, "loss": "3.966", "ntokens": "3151.44", "nsentences": "47.4", "prob_perplexity": "28.715", "code_perplexity": "28.702", "temp": "1.881", "loss_0": "3.8", "loss_1": "0.138", "loss_2": "0.028", "accuracy": "0.42129", "wps": "17725.4", "ups": "5.62", "wpb": "3151.4", "bsz": "47.4", "num_updates": "12400", "lr": "9.81266e-05", "gnorm": "0.73", "loss_scale": "64", "train_wall": "35", "gb_free": "13.9", "wall": "2321"} [2023-11-01 18:13:35,795][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 32.0 [2023-11-01 18:14:11,311][train_inner][INFO] - {"epoch": 4, "update": 3.107, "loss": "4.045", "ntokens": "3193.16", "nsentences": "42.88", "prob_perplexity": "28.743", "code_perplexity": "28.733", "temp": "1.879", "loss_0": "3.878", "loss_1": "0.138", "loss_2": "0.029", "accuracy": "0.40667", "wps": "17552.7", "ups": "5.5", "wpb": "3193.2", "bsz": "42.9", "num_updates": "12600", "lr": "9.80759e-05", "gnorm": "0.725", "loss_scale": "32", "train_wall": "36", "gb_free": "14.2", "wall": "2358"} [2023-11-01 18:14:46,888][train_inner][INFO] - {"epoch": 4, "update": 3.156, "loss": "4.045", "ntokens": "3201.32", "nsentences": "43.76", "prob_perplexity": "28.945", "code_perplexity": "28.929", "temp": "1.877", "loss_0": "3.88", "loss_1": "0.138", "loss_2": "0.028", "accuracy": "0.406", "wps": "17997.8", "ups": "5.62", "wpb": "3201.3", "bsz": "43.8", "num_updates": "12800", "lr": "9.80253e-05", "gnorm": "0.717", "loss_scale": "32", "train_wall": "35", "gb_free": "12.5", "wall": "2393"} [2023-11-01 18:15:22,517][train_inner][INFO] - {"epoch": 4, "update": 3.206, "loss": "4.007", "ntokens": "3191.28", "nsentences": "44.8", "prob_perplexity": "28.855", "code_perplexity": "28.841", "temp": "1.875", "loss_0": "3.842", "loss_1": "0.138", "loss_2": "0.028", "accuracy": "0.41308", "wps": "17914.6", "ups": "5.61", "wpb": "3191.3", "bsz": "44.8", "num_updates": "13000", "lr": "9.79747e-05", "gnorm": "0.724", "loss_scale": "32", "train_wall": "35", "gb_free": "13.3", "wall": "2429"} [2023-11-01 18:15:57,973][train_inner][INFO] - {"epoch": 4, "update": 3.255, "loss": "4.033", "ntokens": "3195.24", "nsentences": "44.64", "prob_perplexity": "28.889", "code_perplexity": "28.879", "temp": "1.873", "loss_0": "3.868", "loss_1": "0.138", "loss_2": "0.028", "accuracy": "0.40867", "wps": "18024.9", "ups": "5.64", "wpb": "3195.2", "bsz": "44.6", "num_updates": "13200", "lr": "9.79241e-05", "gnorm": "0.712", "loss_scale": "32", "train_wall": "35", "gb_free": "14.1", "wall": "2464"} [2023-11-01 18:16:33,193][train_inner][INFO] - {"epoch": 4, "update": 3.304, "loss": "4.035", "ntokens": "3178.36", "nsentences": "43", "prob_perplexity": "29.045", "code_perplexity": "29.031", "temp": "1.871", "loss_0": "3.87", "loss_1": "0.138", "loss_2": "0.027", "accuracy": "0.40792", "wps": "18049.7", "ups": "5.68", "wpb": "3178.4", "bsz": "43", "num_updates": "13400", "lr": "9.78734e-05", "gnorm": "0.712", "loss_scale": "32", "train_wall": "35", "gb_free": "14.4", "wall": "2499"} [2023-11-01 18:17:08,790][train_inner][INFO] - {"epoch": 4, "update": 3.354, "loss": "3.982", "ntokens": "3177.32", "nsentences": "45.04", "prob_perplexity": "29.368", "code_perplexity": "29.355", "temp": "1.869", "loss_0": "3.817", "loss_1": "0.138", "loss_2": "0.028", "accuracy": "0.4161", "wps": "17853", "ups": "5.62", "wpb": "3177.3", "bsz": "45", "num_updates": "13600", "lr": "9.78228e-05", "gnorm": "0.711", "loss_scale": "32", "train_wall": "35", "gb_free": "13.9", "wall": "2535"} [2023-11-01 18:17:44,789][train_inner][INFO] - {"epoch": 4, "update": 3.403, "loss": "4.03", "ntokens": "3175.64", "nsentences": "42.96", "prob_perplexity": "29.191", "code_perplexity": "29.182", "temp": "1.868", "loss_0": "3.865", "loss_1": "0.138", "loss_2": "0.027", "accuracy": "0.40797", "wps": "17643.6", "ups": "5.56", "wpb": "3175.6", "bsz": "43", "num_updates": "13800", "lr": "9.77722e-05", "gnorm": "0.717", "loss_scale": "32", "train_wall": "35", "gb_free": "14.1", "wall": "2571"} [2023-11-01 18:18:20,558][train_inner][INFO] - {"epoch": 4, "update": 3.452, "loss": "3.937", "ntokens": "3163.16", "nsentences": "46.4", "prob_perplexity": "29.115", "code_perplexity": "29.104", "temp": "1.866", "loss_0": "3.772", "loss_1": "0.138", "loss_2": "0.028", "accuracy": "0.42591", "wps": "17688", "ups": "5.59", "wpb": "3163.2", "bsz": "46.4", "num_updates": "14000", "lr": "9.77215e-05", "gnorm": "0.723", "loss_scale": "32", "train_wall": "35", "gb_free": "12.2", "wall": "2607"} [2023-11-01 18:18:56,934][train_inner][INFO] - {"epoch": 4, "update": 3.501, "loss": "3.951", "ntokens": "3182.72", "nsentences": "44.36", "prob_perplexity": "29.244", "code_perplexity": "29.231", "temp": "1.864", "loss_0": "3.786", "loss_1": "0.138", "loss_2": "0.027", "accuracy": "0.42113", "wps": "17499.7", "ups": "5.5", "wpb": "3182.7", "bsz": "44.4", "num_updates": "14200", "lr": "9.76709e-05", "gnorm": "0.719", "loss_scale": "32", "train_wall": "36", "gb_free": "13.7", "wall": "2643"} [2023-11-01 18:19:32,973][train_inner][INFO] - {"epoch": 4, "update": 3.551, "loss": "4.007", "ntokens": "3158.56", "nsentences": "42.24", "prob_perplexity": "29.098", "code_perplexity": "29.084", "temp": "1.862", "loss_0": "3.842", "loss_1": "0.138", "loss_2": "0.028", "accuracy": "0.41154", "wps": "17529.7", "ups": "5.55", "wpb": "3158.6", "bsz": "42.2", "num_updates": "14400", "lr": "9.76203e-05", "gnorm": "0.71", "loss_scale": "32", "train_wall": "35", "gb_free": "13.6", "wall": "2679"} [2023-11-01 18:20:08,700][train_inner][INFO] - {"epoch": 4, "update": 3.6, "loss": "3.971", "ntokens": "3216.4", "nsentences": "44.88", "prob_perplexity": "29.777", "code_perplexity": "29.763", "temp": "1.86", "loss_0": "3.805", "loss_1": "0.138", "loss_2": "0.029", "accuracy": "0.41666", "wps": "18006.4", "ups": "5.6", "wpb": "3216.4", "bsz": "44.9", "num_updates": "14600", "lr": "9.75696e-05", "gnorm": "0.7", "loss_scale": "32", "train_wall": "35", "gb_free": "13", "wall": "2715"} [2023-11-01 18:20:44,164][train_inner][INFO] - {"epoch": 4, "update": 3.649, "loss": "4.006", "ntokens": "3170.24", "nsentences": "42.2", "prob_perplexity": "29.657", "code_perplexity": "29.644", "temp": "1.858", "loss_0": "3.842", "loss_1": "0.138", "loss_2": "0.027", "accuracy": "0.41054", "wps": "17879.8", "ups": "5.64", "wpb": "3170.2", "bsz": "42.2", "num_updates": "14800", "lr": "9.7519e-05", "gnorm": "0.706", "loss_scale": "32", "train_wall": "35", "gb_free": "13.7", "wall": "2750"} [2023-11-01 18:21:20,345][train_inner][INFO] - {"epoch": 4, "update": 3.699, "loss": "4.031", "ntokens": "3189.36", "nsentences": "44.04", "prob_perplexity": "30.014", "code_perplexity": "29.998", "temp": "1.856", "loss_0": "3.866", "loss_1": "0.138", "loss_2": "0.027", "accuracy": "0.4084", "wps": "17631.1", "ups": "5.53", "wpb": "3189.4", "bsz": "44", "num_updates": "15000", "lr": "9.74684e-05", "gnorm": "0.701", "loss_scale": "32", "train_wall": "36", "gb_free": "13.3", "wall": "2787"} [2023-11-01 18:21:55,773][train_inner][INFO] - {"epoch": 4, "update": 3.748, "loss": "3.977", "ntokens": "3183", "nsentences": "44.96", "prob_perplexity": "30.919", "code_perplexity": "30.907", "temp": "1.855", "loss_0": "3.813", "loss_1": "0.137", "loss_2": "0.027", "accuracy": "0.41348", "wps": "17969.8", "ups": "5.65", "wpb": "3183", "bsz": "45", "num_updates": "15200", "lr": "9.74177e-05", "gnorm": "0.688", "loss_scale": "32", "train_wall": "35", "gb_free": "13.9", "wall": "2822"} [2023-11-01 18:22:31,821][train_inner][INFO] - {"epoch": 4, "update": 3.797, "loss": "3.95", "ntokens": "3181.96", "nsentences": "45.76", "prob_perplexity": "31.211", "code_perplexity": "31.197", "temp": "1.853", "loss_0": "3.785", "loss_1": "0.137", "loss_2": "0.028", "accuracy": "0.41776", "wps": "17655.1", "ups": "5.55", "wpb": "3182", "bsz": "45.8", "num_updates": "15400", "lr": "9.73671e-05", "gnorm": "0.705", "loss_scale": "32", "train_wall": "35", "gb_free": "16.2", "wall": "2858"} [2023-11-01 18:23:07,922][train_inner][INFO] - {"epoch": 4, "update": 3.847, "loss": "3.978", "ntokens": "3195.92", "nsentences": "43.8", "prob_perplexity": "31.971", "code_perplexity": "31.957", "temp": "1.851", "loss_0": "3.813", "loss_1": "0.137", "loss_2": "0.028", "accuracy": "0.41103", "wps": "17706.2", "ups": "5.54", "wpb": "3195.9", "bsz": "43.8", "num_updates": "15600", "lr": "9.73165e-05", "gnorm": "0.691", "loss_scale": "32", "train_wall": "35", "gb_free": "14.2", "wall": "2894"} [2023-11-01 18:23:43,568][train_inner][INFO] - {"epoch": 4, "update": 3.896, "loss": "4.004", "ntokens": "3214", "nsentences": "42.52", "prob_perplexity": "32.635", "code_perplexity": "32.619", "temp": "1.849", "loss_0": "3.84", "loss_1": "0.137", "loss_2": "0.027", "accuracy": "0.40575", "wps": "18034", "ups": "5.61", "wpb": "3214", "bsz": "42.5", "num_updates": "15800", "lr": "9.72658e-05", "gnorm": "0.69", "loss_scale": "32", "train_wall": "35", "gb_free": "13.7", "wall": "2930"} [2023-11-01 18:24:19,564][train_inner][INFO] - {"epoch": 4, "update": 3.945, "loss": "3.965", "ntokens": "3196.96", "nsentences": "44.44", "prob_perplexity": "32.743", "code_perplexity": "32.731", "temp": "1.847", "loss_0": "3.801", "loss_1": "0.137", "loss_2": "0.028", "accuracy": "0.41394", "wps": "17764.2", "ups": "5.56", "wpb": "3197", "bsz": "44.4", "num_updates": "16000", "lr": "9.72152e-05", "gnorm": "0.7", "loss_scale": "32", "train_wall": "35", "gb_free": "13.7", "wall": "2966"} [2023-11-01 18:24:55,513][train_inner][INFO] - {"epoch": 4, "update": 3.995, "loss": "3.978", "ntokens": "3194.12", "nsentences": "43.76", "prob_perplexity": "33.046", "code_perplexity": "33.031", "temp": "1.845", "loss_0": "3.814", "loss_1": "0.137", "loss_2": "0.028", "accuracy": "0.41199", "wps": "17771", "ups": "5.56", "wpb": "3194.1", "bsz": "43.8", "num_updates": "16200", "lr": "9.71646e-05", "gnorm": "0.694", "loss_scale": "32", "train_wall": "35", "gb_free": "14", "wall": "3002"} [2023-11-01 18:24:59,363][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-01 18:24:59,364][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 18:24:59,384][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 5 [2023-11-01 18:25:25,187][valid][INFO] - {"epoch": 4, "valid_loss": "3.857", "valid_ntokens": "3155.65", "valid_nsentences": "44.1685", "valid_prob_perplexity": "32.54", "valid_code_perplexity": "32.531", "valid_temp": "1.844", "valid_loss_0": "3.691", "valid_loss_1": "0.137", "valid_loss_2": "0.029", "valid_accuracy": "0.43197", "valid_wps": "55405.4", "valid_wpb": "3155.7", "valid_bsz": "44.2", "valid_num_updates": "16222", "valid_best_loss": "3.857"} [2023-11-01 18:25:25,189][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 4 @ 16222 updates [2023-11-01 18:25:25,191][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-01 18:25:26,617][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-01 18:25:27,593][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_best.pt (epoch 4 @ 16222 updates, score 3.857) (writing took 2.4044785238802433 seconds) [2023-11-01 18:25:27,594][fairseq_cli.train][INFO] - end of epoch 4 (average epoch stats below) [2023-11-01 18:25:27,597][train][INFO] - {"epoch": 4, "train_loss": "3.994", "train_ntokens": "3185.7", "train_nsentences": "44.2594", "train_prob_perplexity": "30.063", "train_code_perplexity": "30.05", "train_temp": "1.863", "train_loss_0": "3.829", "train_loss_1": "0.137", "train_loss_2": "0.028", "train_accuracy": "0.41294", "train_wps": "17121.5", "train_ups": "5.37", "train_wpb": "3185.7", "train_bsz": "44.3", "train_num_updates": "16222", "train_lr": "9.7159e-05", "train_gnorm": "0.709", "train_loss_scale": "32", "train_train_wall": "713", "train_gb_free": "13.7", "train_wall": "3034"} [2023-11-01 18:25:27,599][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 18:25:27,618][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 5 [2023-11-01 18:25:27,788][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-01 18:25:27,807][fairseq.trainer][INFO] - begin training epoch 5 [2023-11-01 18:25:27,808][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-01 18:25:36,884][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2023-11-01 18:26:00,150][train_inner][INFO] - {"epoch": 5, "update": 4.044, "loss": "3.91", "ntokens": "3194.52", "nsentences": "46.64", "prob_perplexity": "33.284", "code_perplexity": "33.265", "temp": "1.843", "loss_0": "3.745", "loss_1": "0.137", "loss_2": "0.027", "accuracy": "0.42372", "wps": "9884.9", "ups": "3.09", "wpb": "3194.5", "bsz": "46.6", "num_updates": "16400", "lr": "9.71139e-05", "gnorm": "0.704", "loss_scale": "16", "train_wall": "35", "gb_free": "14.3", "wall": "3066"} [2023-11-01 18:26:35,755][train_inner][INFO] - {"epoch": 5, "update": 4.093, "loss": "3.927", "ntokens": "3173", "nsentences": "45.08", "prob_perplexity": "33.756", "code_perplexity": "33.741", "temp": "1.842", "loss_0": "3.762", "loss_1": "0.137", "loss_2": "0.028", "accuracy": "0.41938", "wps": "17824.3", "ups": "5.62", "wpb": "3173", "bsz": "45.1", "num_updates": "16600", "lr": "9.70633e-05", "gnorm": "0.691", "loss_scale": "16", "train_wall": "35", "gb_free": "16.4", "wall": "3102"} [2023-11-01 18:27:10,929][train_inner][INFO] - {"epoch": 5, "update": 4.143, "loss": "3.893", "ntokens": "3185.72", "nsentences": "46.08", "prob_perplexity": "33.734", "code_perplexity": "33.72", "temp": "1.84", "loss_0": "3.728", "loss_1": "0.137", "loss_2": "0.028", "accuracy": "0.42459", "wps": "18115.3", "ups": "5.69", "wpb": "3185.7", "bsz": "46.1", "num_updates": "16800", "lr": "9.70127e-05", "gnorm": "0.691", "loss_scale": "16", "train_wall": "35", "gb_free": "12.9", "wall": "3137"} [2023-11-01 18:27:46,682][train_inner][INFO] - {"epoch": 5, "update": 4.192, "loss": "3.955", "ntokens": "3196.16", "nsentences": "42.28", "prob_perplexity": "33.785", "code_perplexity": "33.768", "temp": "1.838", "loss_0": "3.79", "loss_1": "0.137", "loss_2": "0.028", "accuracy": "0.41327", "wps": "17880.4", "ups": "5.59", "wpb": "3196.2", "bsz": "42.3", "num_updates": "17000", "lr": "9.6962e-05", "gnorm": "0.691", "loss_scale": "16", "train_wall": "35", "gb_free": "14.2", "wall": "3173"} [2023-11-01 18:28:22,545][train_inner][INFO] - {"epoch": 5, "update": 4.241, "loss": "3.94", "ntokens": "3205.72", "nsentences": "42.84", "prob_perplexity": "34.017", "code_perplexity": "34.001", "temp": "1.836", "loss_0": "3.775", "loss_1": "0.137", "loss_2": "0.028", "accuracy": "0.4146", "wps": "17878.8", "ups": "5.58", "wpb": "3205.7", "bsz": "42.8", "num_updates": "17200", "lr": "9.69114e-05", "gnorm": "0.686", "loss_scale": "16", "train_wall": "35", "gb_free": "17.4", "wall": "3209"} [2023-11-01 18:28:58,519][train_inner][INFO] - {"epoch": 5, "update": 4.291, "loss": "3.894", "ntokens": "3188.96", "nsentences": "44.76", "prob_perplexity": "34.142", "code_perplexity": "34.127", "temp": "1.834", "loss_0": "3.729", "loss_1": "0.137", "loss_2": "0.028", "accuracy": "0.42292", "wps": "17730.2", "ups": "5.56", "wpb": "3189", "bsz": "44.8", "num_updates": "17400", "lr": "9.68608e-05", "gnorm": "0.696", "loss_scale": "16", "train_wall": "35", "gb_free": "13", "wall": "3245"} [2023-11-01 18:29:34,338][train_inner][INFO] - {"epoch": 5, "update": 4.34, "loss": "3.872", "ntokens": "3180.52", "nsentences": "44.92", "prob_perplexity": "34.465", "code_perplexity": "34.448", "temp": "1.832", "loss_0": "3.707", "loss_1": "0.136", "loss_2": "0.028", "accuracy": "0.42638", "wps": "17759.8", "ups": "5.58", "wpb": "3180.5", "bsz": "44.9", "num_updates": "17600", "lr": "9.68101e-05", "gnorm": "0.683", "loss_scale": "16", "train_wall": "35", "gb_free": "13.5", "wall": "3281"} [2023-11-01 18:30:10,458][train_inner][INFO] - {"epoch": 5, "update": 4.389, "loss": "3.963", "ntokens": "3179.16", "nsentences": "41.24", "prob_perplexity": "34.186", "code_perplexity": "34.173", "temp": "1.831", "loss_0": "3.799", "loss_1": "0.137", "loss_2": "0.028", "accuracy": "0.41112", "wps": "17604.2", "ups": "5.54", "wpb": "3179.2", "bsz": "41.2", "num_updates": "17800", "lr": "9.67595e-05", "gnorm": "0.691", "loss_scale": "16", "train_wall": "35", "gb_free": "13.4", "wall": "3317"} [2023-11-01 18:30:46,234][train_inner][INFO] - {"epoch": 5, "update": 4.439, "loss": "3.832", "ntokens": "3189.84", "nsentences": "46.6", "prob_perplexity": "34.477", "code_perplexity": "34.459", "temp": "1.829", "loss_0": "3.668", "loss_1": "0.136", "loss_2": "0.028", "accuracy": "0.43249", "wps": "17833.6", "ups": "5.59", "wpb": "3189.8", "bsz": "46.6", "num_updates": "18000", "lr": "9.67089e-05", "gnorm": "0.681", "loss_scale": "16", "train_wall": "35", "gb_free": "14.1", "wall": "3352"} [2023-11-01 18:31:22,290][train_inner][INFO] - {"epoch": 5, "update": 4.488, "loss": "3.926", "ntokens": "3224.04", "nsentences": "43.48", "prob_perplexity": "34.575", "code_perplexity": "34.559", "temp": "1.827", "loss_0": "3.761", "loss_1": "0.136", "loss_2": "0.028", "accuracy": "0.41576", "wps": "17884.6", "ups": "5.55", "wpb": "3224", "bsz": "43.5", "num_updates": "18200", "lr": "9.66582e-05", "gnorm": "0.669", "loss_scale": "16", "train_wall": "35", "gb_free": "13.5", "wall": "3389"} [2023-11-01 18:31:58,505][train_inner][INFO] - {"epoch": 5, "update": 4.537, "loss": "3.922", "ntokens": "3251.32", "nsentences": "44.72", "prob_perplexity": "34.716", "code_perplexity": "34.698", "temp": "1.825", "loss_0": "3.757", "loss_1": "0.136", "loss_2": "0.028", "accuracy": "0.41675", "wps": "17956.6", "ups": "5.52", "wpb": "3251.3", "bsz": "44.7", "num_updates": "18400", "lr": "9.66076e-05", "gnorm": "0.675", "loss_scale": "16", "train_wall": "36", "gb_free": "12.4", "wall": "3425"} [2023-11-01 18:32:34,643][train_inner][INFO] - {"epoch": 5, "update": 4.587, "loss": "3.893", "ntokens": "3214.28", "nsentences": "43.96", "prob_perplexity": "34.77", "code_perplexity": "34.751", "temp": "1.823", "loss_0": "3.729", "loss_1": "0.136", "loss_2": "0.027", "accuracy": "0.4216", "wps": "17789.6", "ups": "5.53", "wpb": "3214.3", "bsz": "44", "num_updates": "18600", "lr": "9.6557e-05", "gnorm": "0.674", "loss_scale": "16", "train_wall": "35", "gb_free": "14.7", "wall": "3461"} [2023-11-01 18:33:10,475][train_inner][INFO] - {"epoch": 5, "update": 4.636, "loss": "3.82", "ntokens": "3159.52", "nsentences": "46.52", "prob_perplexity": "34.779", "code_perplexity": "34.76", "temp": "1.821", "loss_0": "3.656", "loss_1": "0.136", "loss_2": "0.028", "accuracy": "0.43421", "wps": "17636.6", "ups": "5.58", "wpb": "3159.5", "bsz": "46.5", "num_updates": "18800", "lr": "9.65063e-05", "gnorm": "0.691", "loss_scale": "16", "train_wall": "35", "gb_free": "13", "wall": "3497"} [2023-11-01 18:33:46,326][train_inner][INFO] - {"epoch": 5, "update": 4.685, "loss": "3.908", "ntokens": "3228.16", "nsentences": "42.4", "prob_perplexity": "34.922", "code_perplexity": "34.903", "temp": "1.82", "loss_0": "3.744", "loss_1": "0.136", "loss_2": "0.028", "accuracy": "0.41631", "wps": "18010", "ups": "5.58", "wpb": "3228.2", "bsz": "42.4", "num_updates": "19000", "lr": "9.64557e-05", "gnorm": "0.675", "loss_scale": "16", "train_wall": "35", "gb_free": "13.1", "wall": "3533"} [2023-11-01 18:34:22,287][train_inner][INFO] - {"epoch": 5, "update": 4.734, "loss": "3.862", "ntokens": "3194.64", "nsentences": "44.24", "prob_perplexity": "34.893", "code_perplexity": "34.878", "temp": "1.818", "loss_0": "3.698", "loss_1": "0.136", "loss_2": "0.027", "accuracy": "0.42496", "wps": "17768.6", "ups": "5.56", "wpb": "3194.6", "bsz": "44.2", "num_updates": "19200", "lr": "9.64051e-05", "gnorm": "0.682", "loss_scale": "16", "train_wall": "35", "gb_free": "13.5", "wall": "3569"} [2023-11-01 18:34:58,107][train_inner][INFO] - {"epoch": 5, "update": 4.784, "loss": "3.743", "ntokens": "3149.32", "nsentences": "46.84", "prob_perplexity": "34.988", "code_perplexity": "34.97", "temp": "1.816", "loss_0": "3.579", "loss_1": "0.136", "loss_2": "0.027", "accuracy": "0.4457", "wps": "17585.1", "ups": "5.58", "wpb": "3149.3", "bsz": "46.8", "num_updates": "19400", "lr": "9.63544e-05", "gnorm": "0.688", "loss_scale": "16", "train_wall": "35", "gb_free": "13.3", "wall": "3604"} [2023-11-01 18:35:33,843][train_inner][INFO] - {"epoch": 5, "update": 4.833, "loss": "3.906", "ntokens": "3187.44", "nsentences": "43.92", "prob_perplexity": "35.121", "code_perplexity": "35.104", "temp": "1.814", "loss_0": "3.742", "loss_1": "0.136", "loss_2": "0.027", "accuracy": "0.41923", "wps": "17839.8", "ups": "5.6", "wpb": "3187.4", "bsz": "43.9", "num_updates": "19600", "lr": "9.63038e-05", "gnorm": "0.68", "loss_scale": "16", "train_wall": "35", "gb_free": "13.2", "wall": "3640"} [2023-11-01 18:36:09,575][train_inner][INFO] - {"epoch": 5, "update": 4.882, "loss": "3.861", "ntokens": "3155.08", "nsentences": "43.16", "prob_perplexity": "35.049", "code_perplexity": "35.034", "temp": "1.812", "loss_0": "3.697", "loss_1": "0.136", "loss_2": "0.027", "accuracy": "0.42665", "wps": "17660.7", "ups": "5.6", "wpb": "3155.1", "bsz": "43.2", "num_updates": "19800", "lr": "9.62532e-05", "gnorm": "0.694", "loss_scale": "16", "train_wall": "35", "gb_free": "15.8", "wall": "3676"} [2023-11-01 18:36:45,738][train_inner][INFO] - {"epoch": 5, "update": 4.932, "loss": "3.9", "ntokens": "3183.6", "nsentences": "42.4", "prob_perplexity": "35.276", "code_perplexity": "35.258", "temp": "1.811", "loss_0": "3.736", "loss_1": "0.136", "loss_2": "0.028", "accuracy": "0.41938", "wps": "17608.2", "ups": "5.53", "wpb": "3183.6", "bsz": "42.4", "num_updates": "20000", "lr": "9.62025e-05", "gnorm": "0.687", "loss_scale": "16", "train_wall": "36", "gb_free": "13.5", "wall": "3712"} [2023-11-01 18:37:22,530][train_inner][INFO] - {"epoch": 5, "update": 4.981, "loss": "3.863", "ntokens": "3225.08", "nsentences": "42.6", "prob_perplexity": "35.178", "code_perplexity": "35.159", "temp": "1.809", "loss_0": "3.698", "loss_1": "0.136", "loss_2": "0.028", "accuracy": "0.42325", "wps": "17532.6", "ups": "5.44", "wpb": "3225.1", "bsz": "42.6", "num_updates": "20200", "lr": "9.61519e-05", "gnorm": "0.668", "loss_scale": "16", "train_wall": "36", "gb_free": "14.2", "wall": "3749"} [2023-11-01 18:37:36,114][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-01 18:37:36,116][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 18:37:36,136][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 6 [2023-11-01 18:38:01,591][valid][INFO] - {"epoch": 5, "valid_loss": "3.792", "valid_ntokens": "3169.7", "valid_nsentences": "44.1685", "valid_prob_perplexity": "35.205", "valid_code_perplexity": "35.191", "valid_temp": "1.807", "valid_loss_0": "3.627", "valid_loss_1": "0.136", "valid_loss_2": "0.029", "valid_accuracy": "0.43564", "valid_wps": "56462.3", "valid_wpb": "3169.7", "valid_bsz": "44.2", "valid_num_updates": "20277", "valid_best_loss": "3.792"} [2023-11-01 18:38:01,592][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 5 @ 20277 updates [2023-11-01 18:38:01,594][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-01 18:38:03,014][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-01 18:38:03,986][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_best.pt (epoch 5 @ 20277 updates, score 3.792) (writing took 2.3937717489898205 seconds) [2023-11-01 18:38:03,987][fairseq_cli.train][INFO] - end of epoch 5 (average epoch stats below) [2023-11-01 18:38:03,989][train][INFO] - {"epoch": 5, "train_loss": "3.887", "train_ntokens": "3192.62", "train_nsentences": "44.2575", "train_prob_perplexity": "34.527", "train_code_perplexity": "34.51", "train_temp": "1.826", "train_loss_0": "3.723", "train_loss_1": "0.136", "train_loss_2": "0.028", "train_accuracy": "0.42292", "train_wps": "17115.6", "train_ups": "5.36", "train_wpb": "3192.6", "train_bsz": "44.3", "train_num_updates": "20277", "train_lr": "9.61324e-05", "train_gnorm": "0.685", "train_loss_scale": "16", "train_train_wall": "715", "train_gb_free": "14.1", "train_wall": "3790"} [2023-11-01 18:38:03,992][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 18:38:04,014][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 6 [2023-11-01 18:38:04,209][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-01 18:38:04,228][fairseq.trainer][INFO] - begin training epoch 6 [2023-11-01 18:38:04,229][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-01 18:38:26,272][train_inner][INFO] - {"epoch": 6, "update": 5.03, "loss": "3.779", "ntokens": "3172.8", "nsentences": "44.84", "prob_perplexity": "35.272", "code_perplexity": "35.254", "temp": "1.807", "loss_0": "3.615", "loss_1": "0.136", "loss_2": "0.028", "accuracy": "0.43721", "wps": "9957.5", "ups": "3.14", "wpb": "3172.8", "bsz": "44.8", "num_updates": "20400", "lr": "9.61013e-05", "gnorm": "0.682", "loss_scale": "16", "train_wall": "35", "gb_free": "13.5", "wall": "3812"} [2023-11-01 18:39:01,375][train_inner][INFO] - {"epoch": 6, "update": 5.08, "loss": "3.8", "ntokens": "3161.24", "nsentences": "44.16", "prob_perplexity": "35.727", "code_perplexity": "35.71", "temp": "1.805", "loss_0": "3.636", "loss_1": "0.136", "loss_2": "0.028", "accuracy": "0.43286", "wps": "18012.6", "ups": "5.7", "wpb": "3161.2", "bsz": "44.2", "num_updates": "20600", "lr": "9.60506e-05", "gnorm": "0.675", "loss_scale": "16", "train_wall": "34", "gb_free": "13.8", "wall": "3848"} [2023-11-01 18:39:36,879][train_inner][INFO] - {"epoch": 6, "update": 5.129, "loss": "3.842", "ntokens": "3167.6", "nsentences": "42.68", "prob_perplexity": "35.642", "code_perplexity": "35.623", "temp": "1.803", "loss_0": "3.678", "loss_1": "0.136", "loss_2": "0.028", "accuracy": "0.42688", "wps": "17844.5", "ups": "5.63", "wpb": "3167.6", "bsz": "42.7", "num_updates": "20800", "lr": "9.6e-05", "gnorm": "0.68", "loss_scale": "16", "train_wall": "35", "gb_free": "13.3", "wall": "3883"} [2023-11-01 18:40:12,505][train_inner][INFO] - {"epoch": 6, "update": 5.178, "loss": "3.877", "ntokens": "3192.08", "nsentences": "43.56", "prob_perplexity": "35.787", "code_perplexity": "35.768", "temp": "1.802", "loss_0": "3.713", "loss_1": "0.136", "loss_2": "0.028", "accuracy": "0.42274", "wps": "17921.2", "ups": "5.61", "wpb": "3192.1", "bsz": "43.6", "num_updates": "21000", "lr": "9.59494e-05", "gnorm": "0.67", "loss_scale": "16", "train_wall": "35", "gb_free": "12.7", "wall": "3919"} [2023-11-01 18:40:48,073][train_inner][INFO] - {"epoch": 6, "update": 5.228, "loss": "3.829", "ntokens": "3212.64", "nsentences": "42.88", "prob_perplexity": "36.038", "code_perplexity": "36.024", "temp": "1.8", "loss_0": "3.665", "loss_1": "0.136", "loss_2": "0.028", "accuracy": "0.42794", "wps": "18066.1", "ups": "5.62", "wpb": "3212.6", "bsz": "42.9", "num_updates": "21200", "lr": "9.58987e-05", "gnorm": "0.668", "loss_scale": "16", "train_wall": "35", "gb_free": "12.8", "wall": "3954"} [2023-11-01 18:41:24,193][train_inner][INFO] - {"epoch": 6, "update": 5.277, "loss": "3.82", "ntokens": "3235.16", "nsentences": "44.44", "prob_perplexity": "35.916", "code_perplexity": "35.902", "temp": "1.798", "loss_0": "3.655", "loss_1": "0.136", "loss_2": "0.028", "accuracy": "0.43115", "wps": "17914.5", "ups": "5.54", "wpb": "3235.2", "bsz": "44.4", "num_updates": "21400", "lr": "9.58481e-05", "gnorm": "0.668", "loss_scale": "16", "train_wall": "35", "gb_free": "16.9", "wall": "3990"} [2023-11-01 18:41:59,763][train_inner][INFO] - {"epoch": 6, "update": 5.326, "loss": "3.815", "ntokens": "3198.08", "nsentences": "44.6", "prob_perplexity": "35.962", "code_perplexity": "35.947", "temp": "1.796", "loss_0": "3.65", "loss_1": "0.136", "loss_2": "0.028", "accuracy": "0.43294", "wps": "17982.9", "ups": "5.62", "wpb": "3198.1", "bsz": "44.6", "num_updates": "21600", "lr": "9.57975e-05", "gnorm": "0.67", "loss_scale": "16", "train_wall": "35", "gb_free": "14.5", "wall": "4026"} [2023-11-01 18:42:35,818][train_inner][INFO] - {"epoch": 6, "update": 5.375, "loss": "3.747", "ntokens": "3184.4", "nsentences": "46.76", "prob_perplexity": "36.032", "code_perplexity": "36.015", "temp": "1.794", "loss_0": "3.583", "loss_1": "0.136", "loss_2": "0.028", "accuracy": "0.44236", "wps": "17664.9", "ups": "5.55", "wpb": "3184.4", "bsz": "46.8", "num_updates": "21800", "lr": "9.57468e-05", "gnorm": "0.675", "loss_scale": "16", "train_wall": "35", "gb_free": "13", "wall": "4062"} [2023-11-01 18:43:11,748][train_inner][INFO] - {"epoch": 6, "update": 5.425, "loss": "3.795", "ntokens": "3179.6", "nsentences": "44.28", "prob_perplexity": "36.424", "code_perplexity": "36.41", "temp": "1.793", "loss_0": "3.632", "loss_1": "0.136", "loss_2": "0.027", "accuracy": "0.43358", "wps": "17700.3", "ups": "5.57", "wpb": "3179.6", "bsz": "44.3", "num_updates": "22000", "lr": "9.56962e-05", "gnorm": "0.663", "loss_scale": "16", "train_wall": "35", "gb_free": "13.8", "wall": "4098"} [2023-11-01 18:43:47,535][train_inner][INFO] - {"epoch": 6, "update": 5.474, "loss": "3.755", "ntokens": "3177.44", "nsentences": "46.48", "prob_perplexity": "36.318", "code_perplexity": "36.3", "temp": "1.791", "loss_0": "3.592", "loss_1": "0.136", "loss_2": "0.028", "accuracy": "0.44266", "wps": "17758.5", "ups": "5.59", "wpb": "3177.4", "bsz": "46.5", "num_updates": "22200", "lr": "9.56456e-05", "gnorm": "0.675", "loss_scale": "16", "train_wall": "35", "gb_free": "13.8", "wall": "4134"} [2023-11-01 18:44:23,754][train_inner][INFO] - {"epoch": 6, "update": 5.523, "loss": "3.827", "ntokens": "3180.04", "nsentences": "42.96", "prob_perplexity": "36.3", "code_perplexity": "36.282", "temp": "1.789", "loss_0": "3.664", "loss_1": "0.136", "loss_2": "0.027", "accuracy": "0.42808", "wps": "17561.1", "ups": "5.52", "wpb": "3180", "bsz": "43", "num_updates": "22400", "lr": "9.55949e-05", "gnorm": "0.667", "loss_scale": "16", "train_wall": "36", "gb_free": "15.3", "wall": "4170"} [2023-11-01 18:44:59,517][train_inner][INFO] - {"epoch": 6, "update": 5.573, "loss": "3.831", "ntokens": "3187.92", "nsentences": "42.84", "prob_perplexity": "36.486", "code_perplexity": "36.473", "temp": "1.787", "loss_0": "3.668", "loss_1": "0.136", "loss_2": "0.027", "accuracy": "0.42718", "wps": "17828.9", "ups": "5.59", "wpb": "3187.9", "bsz": "42.8", "num_updates": "22600", "lr": "9.55443e-05", "gnorm": "0.67", "loss_scale": "16", "train_wall": "35", "gb_free": "14", "wall": "4206"} [2023-11-01 18:45:35,802][train_inner][INFO] - {"epoch": 6, "update": 5.622, "loss": "3.861", "ntokens": "3219.12", "nsentences": "42.36", "prob_perplexity": "36.541", "code_perplexity": "36.522", "temp": "1.785", "loss_0": "3.697", "loss_1": "0.136", "loss_2": "0.027", "accuracy": "0.42304", "wps": "17744.5", "ups": "5.51", "wpb": "3219.1", "bsz": "42.4", "num_updates": "22800", "lr": "9.54937e-05", "gnorm": "0.663", "loss_scale": "16", "train_wall": "36", "gb_free": "13.3", "wall": "4242"} [2023-11-01 18:46:11,646][train_inner][INFO] - {"epoch": 6, "update": 5.671, "loss": "3.796", "ntokens": "3196.96", "nsentences": "44", "prob_perplexity": "36.781", "code_perplexity": "36.766", "temp": "1.784", "loss_0": "3.633", "loss_1": "0.136", "loss_2": "0.027", "accuracy": "0.4324", "wps": "17839.7", "ups": "5.58", "wpb": "3197", "bsz": "44", "num_updates": "23000", "lr": "9.5443e-05", "gnorm": "0.665", "loss_scale": "16", "train_wall": "35", "gb_free": "13.7", "wall": "4278"} [2023-11-01 18:46:47,041][train_inner][INFO] - {"epoch": 6, "update": 5.721, "loss": "3.798", "ntokens": "3189.44", "nsentences": "43.72", "prob_perplexity": "36.884", "code_perplexity": "36.867", "temp": "1.782", "loss_0": "3.635", "loss_1": "0.136", "loss_2": "0.027", "accuracy": "0.43243", "wps": "18022.9", "ups": "5.65", "wpb": "3189.4", "bsz": "43.7", "num_updates": "23200", "lr": "9.53924e-05", "gnorm": "0.668", "loss_scale": "16", "train_wall": "35", "gb_free": "13.1", "wall": "4313"} [2023-11-01 18:47:22,652][train_inner][INFO] - {"epoch": 6, "update": 5.77, "loss": "3.726", "ntokens": "3166.08", "nsentences": "45.24", "prob_perplexity": "36.916", "code_perplexity": "36.896", "temp": "1.78", "loss_0": "3.563", "loss_1": "0.136", "loss_2": "0.027", "accuracy": "0.44358", "wps": "17782.4", "ups": "5.62", "wpb": "3166.1", "bsz": "45.2", "num_updates": "23400", "lr": "9.53418e-05", "gnorm": "0.665", "loss_scale": "16", "train_wall": "35", "gb_free": "14.7", "wall": "4349"} [2023-11-01 18:47:59,032][train_inner][INFO] - {"epoch": 6, "update": 5.819, "loss": "3.726", "ntokens": "3172.8", "nsentences": "47.16", "prob_perplexity": "36.871", "code_perplexity": "36.855", "temp": "1.778", "loss_0": "3.563", "loss_1": "0.136", "loss_2": "0.027", "accuracy": "0.44576", "wps": "17443.6", "ups": "5.5", "wpb": "3172.8", "bsz": "47.2", "num_updates": "23600", "lr": "9.52911e-05", "gnorm": "0.67", "loss_scale": "16", "train_wall": "36", "gb_free": "12.1", "wall": "4385"} [2023-11-01 18:48:35,261][train_inner][INFO] - {"epoch": 6, "update": 5.869, "loss": "3.735", "ntokens": "3166.24", "nsentences": "45.76", "prob_perplexity": "36.967", "code_perplexity": "36.95", "temp": "1.777", "loss_0": "3.573", "loss_1": "0.136", "loss_2": "0.027", "accuracy": "0.4432", "wps": "17480.3", "ups": "5.52", "wpb": "3166.2", "bsz": "45.8", "num_updates": "23800", "lr": "9.52405e-05", "gnorm": "0.671", "loss_scale": "16", "train_wall": "36", "gb_free": "13.8", "wall": "4421"} [2023-11-01 18:49:11,731][train_inner][INFO] - {"epoch": 6, "update": 5.918, "loss": "3.773", "ntokens": "3234.32", "nsentences": "43.68", "prob_perplexity": "37.092", "code_perplexity": "37.08", "temp": "1.775", "loss_0": "3.611", "loss_1": "0.136", "loss_2": "0.027", "accuracy": "0.43424", "wps": "17737.9", "ups": "5.48", "wpb": "3234.3", "bsz": "43.7", "num_updates": "24000", "lr": "9.51899e-05", "gnorm": "0.655", "loss_scale": "16", "train_wall": "36", "gb_free": "12.7", "wall": "4458"} [2023-11-01 18:49:47,812][train_inner][INFO] - {"epoch": 6, "update": 5.967, "loss": "3.785", "ntokens": "3225.92", "nsentences": "45.4", "prob_perplexity": "37.223", "code_perplexity": "37.21", "temp": "1.773", "loss_0": "3.622", "loss_1": "0.136", "loss_2": "0.027", "accuracy": "0.43582", "wps": "17882.2", "ups": "5.54", "wpb": "3225.9", "bsz": "45.4", "num_updates": "24200", "lr": "9.51392e-05", "gnorm": "0.661", "loss_scale": "16", "train_wall": "35", "gb_free": "13.4", "wall": "4494"} [2023-11-01 18:50:11,699][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-01 18:50:11,700][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 18:50:11,718][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 7 [2023-11-01 18:50:37,393][valid][INFO] - {"epoch": 6, "valid_loss": "3.687", "valid_ntokens": "3160.66", "valid_nsentences": "44.1685", "valid_prob_perplexity": "37.361", "valid_code_perplexity": "37.353", "valid_temp": "1.771", "valid_loss_0": "3.524", "valid_loss_1": "0.136", "valid_loss_2": "0.027", "valid_accuracy": "0.45148", "valid_wps": "55725.7", "valid_wpb": "3160.7", "valid_bsz": "44.2", "valid_num_updates": "24333", "valid_best_loss": "3.687"} [2023-11-01 18:50:37,395][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 6 @ 24333 updates [2023-11-01 18:50:37,397][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-01 18:50:38,839][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-01 18:50:39,785][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_best.pt (epoch 6 @ 24333 updates, score 3.687) (writing took 2.390093854162842 seconds) [2023-11-01 18:50:39,786][fairseq_cli.train][INFO] - end of epoch 6 (average epoch stats below) [2023-11-01 18:50:39,788][train][INFO] - {"epoch": 6, "train_loss": "3.798", "train_ntokens": "3192.39", "train_nsentences": "44.2682", "train_prob_perplexity": "36.415", "train_code_perplexity": "36.399", "train_temp": "1.789", "train_loss_0": "3.634", "train_loss_1": "0.136", "train_loss_2": "0.027", "train_accuracy": "0.43335", "train_wps": "17132", "train_ups": "5.37", "train_wpb": "3192.4", "train_bsz": "44.3", "train_num_updates": "24333", "train_lr": "9.51056e-05", "train_gnorm": "0.668", "train_loss_scale": "16", "train_train_wall": "714", "train_gb_free": "13", "train_wall": "4546"} [2023-11-01 18:50:39,791][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 18:50:39,819][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 7 [2023-11-01 18:50:40,018][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-01 18:50:40,038][fairseq.trainer][INFO] - begin training epoch 7 [2023-11-01 18:50:40,039][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-01 18:50:52,211][train_inner][INFO] - {"epoch": 7, "update": 6.017, "loss": "3.781", "ntokens": "3201.76", "nsentences": "44.24", "prob_perplexity": "37.379", "code_perplexity": "37.363", "temp": "1.771", "loss_0": "3.617", "loss_1": "0.136", "loss_2": "0.028", "accuracy": "0.43472", "wps": "9943.9", "ups": "3.11", "wpb": "3201.8", "bsz": "44.2", "num_updates": "24400", "lr": "9.50886e-05", "gnorm": "0.658", "loss_scale": "16", "train_wall": "35", "gb_free": "13.1", "wall": "4558"} [2023-11-01 18:51:27,879][train_inner][INFO] - {"epoch": 7, "update": 6.066, "loss": "3.76", "ntokens": "3175.6", "nsentences": "43.44", "prob_perplexity": "37.573", "code_perplexity": "37.562", "temp": "1.769", "loss_0": "3.596", "loss_1": "0.136", "loss_2": "0.028", "accuracy": "0.43713", "wps": "17807.4", "ups": "5.61", "wpb": "3175.6", "bsz": "43.4", "num_updates": "24600", "lr": "9.5038e-05", "gnorm": "0.662", "loss_scale": "16", "train_wall": "35", "gb_free": "15.1", "wall": "4594"} [2023-11-01 18:52:03,549][train_inner][INFO] - {"epoch": 7, "update": 6.115, "loss": "3.742", "ntokens": "3186", "nsentences": "43.44", "prob_perplexity": "38.02", "code_perplexity": "38.005", "temp": "1.768", "loss_0": "3.579", "loss_1": "0.136", "loss_2": "0.027", "accuracy": "0.4394", "wps": "17865.1", "ups": "5.61", "wpb": "3186", "bsz": "43.4", "num_updates": "24800", "lr": "9.49873e-05", "gnorm": "0.669", "loss_scale": "16", "train_wall": "35", "gb_free": "13.1", "wall": "4630"} [2023-11-01 18:52:39,311][train_inner][INFO] - {"epoch": 7, "update": 6.164, "loss": "3.821", "ntokens": "3215", "nsentences": "42.12", "prob_perplexity": "37.887", "code_perplexity": "37.874", "temp": "1.766", "loss_0": "3.658", "loss_1": "0.136", "loss_2": "0.028", "accuracy": "0.42584", "wps": "17981.2", "ups": "5.59", "wpb": "3215", "bsz": "42.1", "num_updates": "25000", "lr": "9.49367e-05", "gnorm": "0.658", "loss_scale": "16", "train_wall": "35", "gb_free": "13.5", "wall": "4666"} [2023-11-01 18:52:39,313][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-01 18:52:39,314][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 18:52:39,334][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 8 [2023-11-01 18:53:05,392][valid][INFO] - {"epoch": 7, "valid_loss": "3.654", "valid_ntokens": "3155.46", "valid_nsentences": "44.1685", "valid_prob_perplexity": "37.222", "valid_code_perplexity": "37.209", "valid_temp": "1.765", "valid_loss_0": "3.49", "valid_loss_1": "0.136", "valid_loss_2": "0.028", "valid_accuracy": "0.45454", "valid_wps": "54847.3", "valid_wpb": "3155.5", "valid_bsz": "44.2", "valid_num_updates": "25000", "valid_best_loss": "3.654"} [2023-11-01 18:53:05,394][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 7 @ 25000 updates [2023-11-01 18:53:05,396][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_7_25000.pt [2023-11-01 18:53:06,735][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_7_25000.pt [2023-11-01 18:53:08,593][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_7_25000.pt (epoch 7 @ 25000 updates, score 3.654) (writing took 3.1992476009763777 seconds) [2023-11-01 18:53:44,500][train_inner][INFO] - {"epoch": 7, "update": 6.214, "loss": "3.722", "ntokens": "3224.4", "nsentences": "46.16", "prob_perplexity": "37.905", "code_perplexity": "37.885", "temp": "1.764", "loss_0": "3.559", "loss_1": "0.136", "loss_2": "0.027", "accuracy": "0.44366", "wps": "9892.8", "ups": "3.07", "wpb": "3224.4", "bsz": "46.2", "num_updates": "25200", "lr": "9.48861e-05", "gnorm": "0.661", "loss_scale": "16", "train_wall": "35", "gb_free": "14", "wall": "4731"} [2023-11-01 18:54:20,131][train_inner][INFO] - {"epoch": 7, "update": 6.263, "loss": "3.762", "ntokens": "3203.24", "nsentences": "43.44", "prob_perplexity": "38.066", "code_perplexity": "38.05", "temp": "1.762", "loss_0": "3.599", "loss_1": "0.136", "loss_2": "0.027", "accuracy": "0.43545", "wps": "17981.4", "ups": "5.61", "wpb": "3203.2", "bsz": "43.4", "num_updates": "25400", "lr": "9.48354e-05", "gnorm": "0.659", "loss_scale": "16", "train_wall": "35", "gb_free": "13.2", "wall": "4766"} [2023-11-01 18:54:55,784][train_inner][INFO] - {"epoch": 7, "update": 6.312, "loss": "3.737", "ntokens": "3197.96", "nsentences": "45.4", "prob_perplexity": "38.07", "code_perplexity": "38.052", "temp": "1.761", "loss_0": "3.574", "loss_1": "0.136", "loss_2": "0.027", "accuracy": "0.44122", "wps": "17940.8", "ups": "5.61", "wpb": "3198", "bsz": "45.4", "num_updates": "25600", "lr": "9.47848e-05", "gnorm": "0.669", "loss_scale": "16", "train_wall": "35", "gb_free": "12.7", "wall": "4802"} [2023-11-01 18:55:32,086][train_inner][INFO] - {"epoch": 7, "update": 6.362, "loss": "3.762", "ntokens": "3213.12", "nsentences": "42.88", "prob_perplexity": "38.085", "code_perplexity": "38.07", "temp": "1.759", "loss_0": "3.599", "loss_1": "0.136", "loss_2": "0.027", "accuracy": "0.43557", "wps": "17702.9", "ups": "5.51", "wpb": "3213.1", "bsz": "42.9", "num_updates": "25800", "lr": "9.47342e-05", "gnorm": "0.663", "loss_scale": "16", "train_wall": "36", "gb_free": "14", "wall": "4838"} [2023-11-01 18:56:08,082][train_inner][INFO] - {"epoch": 7, "update": 6.411, "loss": "3.795", "ntokens": "3176.84", "nsentences": "41.88", "prob_perplexity": "38.255", "code_perplexity": "38.237", "temp": "1.757", "loss_0": "3.632", "loss_1": "0.136", "loss_2": "0.027", "accuracy": "0.4299", "wps": "17652.5", "ups": "5.56", "wpb": "3176.8", "bsz": "41.9", "num_updates": "26000", "lr": "9.46835e-05", "gnorm": "0.658", "loss_scale": "16", "train_wall": "35", "gb_free": "12.6", "wall": "4874"} [2023-11-01 18:56:44,202][train_inner][INFO] - {"epoch": 7, "update": 6.46, "loss": "3.666", "ntokens": "3169.52", "nsentences": "46.44", "prob_perplexity": "38.294", "code_perplexity": "38.275", "temp": "1.755", "loss_0": "3.503", "loss_1": "0.136", "loss_2": "0.028", "accuracy": "0.45307", "wps": "17550.8", "ups": "5.54", "wpb": "3169.5", "bsz": "46.4", "num_updates": "26200", "lr": "9.46329e-05", "gnorm": "0.661", "loss_scale": "16", "train_wall": "35", "gb_free": "14", "wall": "4910"} [2023-11-01 18:57:20,166][train_inner][INFO] - {"epoch": 7, "update": 6.51, "loss": "3.742", "ntokens": "3189.28", "nsentences": "44.2", "prob_perplexity": "38.483", "code_perplexity": "38.465", "temp": "1.754", "loss_0": "3.579", "loss_1": "0.136", "loss_2": "0.027", "accuracy": "0.43997", "wps": "17736.9", "ups": "5.56", "wpb": "3189.3", "bsz": "44.2", "num_updates": "26400", "lr": "9.45823e-05", "gnorm": "0.661", "loss_scale": "16", "train_wall": "35", "gb_free": "13", "wall": "4946"} [2023-11-01 18:57:56,387][train_inner][INFO] - {"epoch": 7, "update": 6.559, "loss": "3.758", "ntokens": "3239.92", "nsentences": "43.08", "prob_perplexity": "38.474", "code_perplexity": "38.455", "temp": "1.752", "loss_0": "3.596", "loss_1": "0.136", "loss_2": "0.027", "accuracy": "0.43622", "wps": "17890.5", "ups": "5.52", "wpb": "3239.9", "bsz": "43.1", "num_updates": "26600", "lr": "9.45316e-05", "gnorm": "0.659", "loss_scale": "16", "train_wall": "36", "gb_free": "14.1", "wall": "4983"} [2023-11-01 18:58:31,558][train_inner][INFO] - {"epoch": 7, "update": 6.608, "loss": "3.708", "ntokens": "3115.92", "nsentences": "42.88", "prob_perplexity": "38.21", "code_perplexity": "38.189", "temp": "1.75", "loss_0": "3.545", "loss_1": "0.136", "loss_2": "0.027", "accuracy": "0.44384", "wps": "17720.1", "ups": "5.69", "wpb": "3115.9", "bsz": "42.9", "num_updates": "26800", "lr": "9.4481e-05", "gnorm": "0.671", "loss_scale": "16", "train_wall": "35", "gb_free": "12.7", "wall": "5018"} [2023-11-01 18:59:07,540][train_inner][INFO] - {"epoch": 7, "update": 6.658, "loss": "3.663", "ntokens": "3147.64", "nsentences": "45.8", "prob_perplexity": "38.606", "code_perplexity": "38.588", "temp": "1.748", "loss_0": "3.5", "loss_1": "0.136", "loss_2": "0.027", "accuracy": "0.45279", "wps": "17496.7", "ups": "5.56", "wpb": "3147.6", "bsz": "45.8", "num_updates": "27000", "lr": "9.44304e-05", "gnorm": "0.665", "loss_scale": "16", "train_wall": "35", "gb_free": "13.6", "wall": "5054"} [2023-11-01 18:59:43,178][train_inner][INFO] - {"epoch": 7, "update": 6.707, "loss": "3.736", "ntokens": "3160.84", "nsentences": "42.64", "prob_perplexity": "38.436", "code_perplexity": "38.422", "temp": "1.747", "loss_0": "3.573", "loss_1": "0.136", "loss_2": "0.027", "accuracy": "0.43911", "wps": "17739.5", "ups": "5.61", "wpb": "3160.8", "bsz": "42.6", "num_updates": "27200", "lr": "9.43797e-05", "gnorm": "0.662", "loss_scale": "16", "train_wall": "35", "gb_free": "13.5", "wall": "5089"} [2023-11-01 19:00:18,738][train_inner][INFO] - {"epoch": 7, "update": 6.756, "loss": "3.692", "ntokens": "3183", "nsentences": "46.4", "prob_perplexity": "38.658", "code_perplexity": "38.64", "temp": "1.745", "loss_0": "3.53", "loss_1": "0.136", "loss_2": "0.027", "accuracy": "0.44798", "wps": "17903", "ups": "5.62", "wpb": "3183", "bsz": "46.4", "num_updates": "27400", "lr": "9.43291e-05", "gnorm": "0.656", "loss_scale": "16", "train_wall": "35", "gb_free": "13.6", "wall": "5125"} [2023-11-01 19:00:54,571][train_inner][INFO] - {"epoch": 7, "update": 6.805, "loss": "3.674", "ntokens": "3189.76", "nsentences": "45.36", "prob_perplexity": "38.806", "code_perplexity": "38.789", "temp": "1.743", "loss_0": "3.512", "loss_1": "0.136", "loss_2": "0.027", "accuracy": "0.45037", "wps": "17804.6", "ups": "5.58", "wpb": "3189.8", "bsz": "45.4", "num_updates": "27600", "lr": "9.42785e-05", "gnorm": "0.662", "loss_scale": "16", "train_wall": "35", "gb_free": "13.8", "wall": "5161"} [2023-11-01 19:01:30,673][train_inner][INFO] - {"epoch": 7, "update": 6.855, "loss": "3.745", "ntokens": "3221.24", "nsentences": "45.96", "prob_perplexity": "38.607", "code_perplexity": "38.591", "temp": "1.741", "loss_0": "3.583", "loss_1": "0.136", "loss_2": "0.027", "accuracy": "0.44066", "wps": "17846.1", "ups": "5.54", "wpb": "3221.2", "bsz": "46", "num_updates": "27800", "lr": "9.42278e-05", "gnorm": "0.657", "loss_scale": "16", "train_wall": "35", "gb_free": "12.8", "wall": "5197"} [2023-11-01 19:02:06,965][train_inner][INFO] - {"epoch": 7, "update": 6.904, "loss": "3.763", "ntokens": "3203.76", "nsentences": "43.4", "prob_perplexity": "38.574", "code_perplexity": "38.557", "temp": "1.74", "loss_0": "3.6", "loss_1": "0.136", "loss_2": "0.027", "accuracy": "0.43537", "wps": "17656.5", "ups": "5.51", "wpb": "3203.8", "bsz": "43.4", "num_updates": "28000", "lr": "9.41772e-05", "gnorm": "0.661", "loss_scale": "16", "train_wall": "36", "gb_free": "13", "wall": "5233"} [2023-11-01 19:02:43,166][train_inner][INFO] - {"epoch": 7, "update": 6.953, "loss": "3.689", "ntokens": "3210.92", "nsentences": "45.84", "prob_perplexity": "38.743", "code_perplexity": "38.726", "temp": "1.738", "loss_0": "3.526", "loss_1": "0.136", "loss_2": "0.027", "accuracy": "0.44826", "wps": "17740.4", "ups": "5.53", "wpb": "3210.9", "bsz": "45.8", "num_updates": "28200", "lr": "9.41266e-05", "gnorm": "0.653", "loss_scale": "16", "train_wall": "36", "gb_free": "12.8", "wall": "5269"} [2023-11-01 19:03:17,324][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-01 19:03:17,325][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 19:03:17,347][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 9 [2023-11-01 19:03:43,096][valid][INFO] - {"epoch": 7, "valid_loss": "3.674", "valid_ntokens": "3179.19", "valid_nsentences": "44.1685", "valid_prob_perplexity": "39.095", "valid_code_perplexity": "39.083", "valid_temp": "1.735", "valid_loss_0": "3.512", "valid_loss_1": "0.135", "valid_loss_2": "0.027", "valid_accuracy": "0.44964", "valid_wps": "55972.8", "valid_wpb": "3179.2", "valid_bsz": "44.2", "valid_num_updates": "28389", "valid_best_loss": "3.654"} [2023-11-01 19:03:43,098][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 7 @ 28389 updates [2023-11-01 19:03:43,099][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-01 19:03:44,540][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-01 19:03:44,588][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_last.pt (epoch 7 @ 28389 updates, score 3.674) (writing took 1.4906941591762006 seconds) [2023-11-01 19:03:44,589][fairseq_cli.train][INFO] - end of epoch 7 (average epoch stats below) [2023-11-01 19:03:44,591][train][INFO] - {"epoch": 7, "train_loss": "3.733", "train_ntokens": "3190.47", "train_nsentences": "44.2682", "train_prob_perplexity": "38.315", "train_code_perplexity": "38.298", "train_temp": "1.753", "train_loss_0": "3.57", "train_loss_1": "0.136", "train_loss_2": "0.027", "train_accuracy": "0.44089", "train_wps": "16488.9", "train_ups": "5.17", "train_wpb": "3190.5", "train_bsz": "44.3", "train_num_updates": "28389", "train_lr": "9.40787e-05", "train_gnorm": "0.661", "train_loss_scale": "16", "train_train_wall": "715", "train_gb_free": "14.8", "train_wall": "5331"} [2023-11-01 19:03:44,593][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 19:03:44,611][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 8 [2023-11-01 19:03:44,779][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-01 19:03:44,799][fairseq.trainer][INFO] - begin training epoch 8 [2023-11-01 19:03:44,800][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-01 19:03:46,861][train_inner][INFO] - {"epoch": 8, "update": 7.003, "loss": "3.744", "ntokens": "3190.48", "nsentences": "42.64", "prob_perplexity": "38.872", "code_perplexity": "38.855", "temp": "1.736", "loss_0": "3.582", "loss_1": "0.136", "loss_2": "0.027", "accuracy": "0.43725", "wps": "10020.4", "ups": "3.14", "wpb": "3190.5", "bsz": "42.6", "num_updates": "28400", "lr": "9.40759e-05", "gnorm": "0.653", "loss_scale": "16", "train_wall": "36", "gb_free": "14.3", "wall": "5333"} [2023-11-01 19:04:22,837][train_inner][INFO] - {"epoch": 8, "update": 7.052, "loss": "3.701", "ntokens": "3183.36", "nsentences": "44.4", "prob_perplexity": "38.756", "code_perplexity": "38.736", "temp": "1.734", "loss_0": "3.538", "loss_1": "0.136", "loss_2": "0.027", "accuracy": "0.44474", "wps": "17698.1", "ups": "5.56", "wpb": "3183.4", "bsz": "44.4", "num_updates": "28600", "lr": "9.40253e-05", "gnorm": "0.66", "loss_scale": "16", "train_wall": "35", "gb_free": "15.3", "wall": "5369"} [2023-11-01 19:04:57,814][train_inner][INFO] - {"epoch": 8, "update": 7.101, "loss": "3.712", "ntokens": "3180.84", "nsentences": "43.44", "prob_perplexity": "38.807", "code_perplexity": "38.789", "temp": "1.733", "loss_0": "3.55", "loss_1": "0.136", "loss_2": "0.026", "accuracy": "0.4438", "wps": "18189.5", "ups": "5.72", "wpb": "3180.8", "bsz": "43.4", "num_updates": "28800", "lr": "9.39747e-05", "gnorm": "0.662", "loss_scale": "16", "train_wall": "34", "gb_free": "13.8", "wall": "5404"} [2023-11-01 19:05:33,816][train_inner][INFO] - {"epoch": 8, "update": 7.151, "loss": "3.699", "ntokens": "3175", "nsentences": "44.32", "prob_perplexity": "39.082", "code_perplexity": "39.063", "temp": "1.731", "loss_0": "3.536", "loss_1": "0.135", "loss_2": "0.027", "accuracy": "0.44491", "wps": "17639.2", "ups": "5.56", "wpb": "3175", "bsz": "44.3", "num_updates": "29000", "lr": "9.39241e-05", "gnorm": "0.659", "loss_scale": "16", "train_wall": "35", "gb_free": "13.6", "wall": "5440"} [2023-11-01 19:06:09,774][train_inner][INFO] - {"epoch": 8, "update": 7.2, "loss": "3.683", "ntokens": "3184.72", "nsentences": "45", "prob_perplexity": "39.051", "code_perplexity": "39.035", "temp": "1.729", "loss_0": "3.521", "loss_1": "0.135", "loss_2": "0.027", "accuracy": "0.44805", "wps": "17714.3", "ups": "5.56", "wpb": "3184.7", "bsz": "45", "num_updates": "29200", "lr": "9.38734e-05", "gnorm": "0.653", "loss_scale": "16", "train_wall": "35", "gb_free": "13.6", "wall": "5476"} [2023-11-01 19:06:45,922][train_inner][INFO] - {"epoch": 8, "update": 7.249, "loss": "3.736", "ntokens": "3237.48", "nsentences": "44.08", "prob_perplexity": "39.084", "code_perplexity": "39.065", "temp": "1.727", "loss_0": "3.575", "loss_1": "0.135", "loss_2": "0.026", "accuracy": "0.43949", "wps": "17913.6", "ups": "5.53", "wpb": "3237.5", "bsz": "44.1", "num_updates": "29400", "lr": "9.38228e-05", "gnorm": "0.654", "loss_scale": "16", "train_wall": "35", "gb_free": "13.7", "wall": "5512"} [2023-11-01 19:07:21,947][train_inner][INFO] - {"epoch": 8, "update": 7.299, "loss": "3.635", "ntokens": "3164.92", "nsentences": "45.88", "prob_perplexity": "39.308", "code_perplexity": "39.29", "temp": "1.726", "loss_0": "3.473", "loss_1": "0.135", "loss_2": "0.026", "accuracy": "0.45517", "wps": "17571.7", "ups": "5.55", "wpb": "3164.9", "bsz": "45.9", "num_updates": "29600", "lr": "9.37722e-05", "gnorm": "0.657", "loss_scale": "16", "train_wall": "35", "gb_free": "13.7", "wall": "5548"} [2023-11-01 19:07:58,395][train_inner][INFO] - {"epoch": 8, "update": 7.348, "loss": "3.7", "ntokens": "3212.84", "nsentences": "43.76", "prob_perplexity": "39.446", "code_perplexity": "39.421", "temp": "1.724", "loss_0": "3.538", "loss_1": "0.135", "loss_2": "0.027", "accuracy": "0.44253", "wps": "17630.6", "ups": "5.49", "wpb": "3212.8", "bsz": "43.8", "num_updates": "29800", "lr": "9.37215e-05", "gnorm": "0.648", "loss_scale": "16", "train_wall": "36", "gb_free": "14.4", "wall": "5585"} [2023-11-01 19:08:34,925][train_inner][INFO] - {"epoch": 8, "update": 7.397, "loss": "3.72", "ntokens": "3166.84", "nsentences": "42.08", "prob_perplexity": "39.448", "code_perplexity": "39.431", "temp": "1.722", "loss_0": "3.559", "loss_1": "0.135", "loss_2": "0.026", "accuracy": "0.4385", "wps": "17339.5", "ups": "5.48", "wpb": "3166.8", "bsz": "42.1", "num_updates": "30000", "lr": "9.36709e-05", "gnorm": "0.66", "loss_scale": "16", "train_wall": "36", "gb_free": "13.6", "wall": "5621"} [2023-11-01 19:09:10,660][train_inner][INFO] - {"epoch": 8, "update": 7.446, "loss": "3.63", "ntokens": "3127.32", "nsentences": "42.76", "prob_perplexity": "39.336", "code_perplexity": "39.312", "temp": "1.721", "loss_0": "3.468", "loss_1": "0.135", "loss_2": "0.027", "accuracy": "0.45275", "wps": "17503.8", "ups": "5.6", "wpb": "3127.3", "bsz": "42.8", "num_updates": "30200", "lr": "9.36203e-05", "gnorm": "0.666", "loss_scale": "16", "train_wall": "35", "gb_free": "13.6", "wall": "5657"} [2023-11-01 19:09:47,450][train_inner][INFO] - {"epoch": 8, "update": 7.496, "loss": "3.595", "ntokens": "3230.6", "nsentences": "47.32", "prob_perplexity": "39.677", "code_perplexity": "39.658", "temp": "1.719", "loss_0": "3.434", "loss_1": "0.135", "loss_2": "0.026", "accuracy": "0.46164", "wps": "17563.5", "ups": "5.44", "wpb": "3230.6", "bsz": "47.3", "num_updates": "30400", "lr": "9.35696e-05", "gnorm": "0.65", "loss_scale": "16", "train_wall": "36", "gb_free": "13.3", "wall": "5694"} [2023-11-01 19:10:23,829][train_inner][INFO] - {"epoch": 8, "update": 7.545, "loss": "3.75", "ntokens": "3190.08", "nsentences": "41.24", "prob_perplexity": "39.525", "code_perplexity": "39.506", "temp": "1.717", "loss_0": "3.589", "loss_1": "0.135", "loss_2": "0.026", "accuracy": "0.43375", "wps": "17538.9", "ups": "5.5", "wpb": "3190.1", "bsz": "41.2", "num_updates": "30600", "lr": "9.3519e-05", "gnorm": "0.656", "loss_scale": "16", "train_wall": "36", "gb_free": "13.4", "wall": "5730"} [2023-11-01 19:11:00,071][train_inner][INFO] - {"epoch": 8, "update": 7.594, "loss": "3.686", "ntokens": "3204.48", "nsentences": "43.48", "prob_perplexity": "39.735", "code_perplexity": "39.714", "temp": "1.715", "loss_0": "3.525", "loss_1": "0.135", "loss_2": "0.026", "accuracy": "0.44553", "wps": "17684.9", "ups": "5.52", "wpb": "3204.5", "bsz": "43.5", "num_updates": "30800", "lr": "9.34684e-05", "gnorm": "0.654", "loss_scale": "16", "train_wall": "36", "gb_free": "13.5", "wall": "5766"} [2023-11-01 19:11:36,716][train_inner][INFO] - {"epoch": 8, "update": 7.644, "loss": "3.656", "ntokens": "3218.8", "nsentences": "45.56", "prob_perplexity": "39.558", "code_perplexity": "39.544", "temp": "1.714", "loss_0": "3.494", "loss_1": "0.135", "loss_2": "0.026", "accuracy": "0.4518", "wps": "17568.4", "ups": "5.46", "wpb": "3218.8", "bsz": "45.6", "num_updates": "31000", "lr": "9.34177e-05", "gnorm": "0.658", "loss_scale": "16", "train_wall": "36", "gb_free": "14.5", "wall": "5803"} [2023-11-01 19:12:12,861][train_inner][INFO] - {"epoch": 8, "update": 7.693, "loss": "3.597", "ntokens": "3181.76", "nsentences": "46.76", "prob_perplexity": "39.944", "code_perplexity": "39.923", "temp": "1.712", "loss_0": "3.436", "loss_1": "0.135", "loss_2": "0.026", "accuracy": "0.46109", "wps": "17606.9", "ups": "5.53", "wpb": "3181.8", "bsz": "46.8", "num_updates": "31200", "lr": "9.33671e-05", "gnorm": "0.664", "loss_scale": "16", "train_wall": "35", "gb_free": "14.2", "wall": "5839"} [2023-11-01 19:12:48,861][train_inner][INFO] - {"epoch": 8, "update": 7.742, "loss": "3.677", "ntokens": "3222.68", "nsentences": "45.32", "prob_perplexity": "39.885", "code_perplexity": "39.87", "temp": "1.71", "loss_0": "3.516", "loss_1": "0.135", "loss_2": "0.026", "accuracy": "0.44666", "wps": "17904.9", "ups": "5.56", "wpb": "3222.7", "bsz": "45.3", "num_updates": "31400", "lr": "9.33165e-05", "gnorm": "0.653", "loss_scale": "16", "train_wall": "35", "gb_free": "13.7", "wall": "5875"} [2023-11-01 19:13:24,920][train_inner][INFO] - {"epoch": 8, "update": 7.792, "loss": "3.658", "ntokens": "3132.52", "nsentences": "43.16", "prob_perplexity": "39.801", "code_perplexity": "39.783", "temp": "1.709", "loss_0": "3.497", "loss_1": "0.135", "loss_2": "0.026", "accuracy": "0.45013", "wps": "17375.2", "ups": "5.55", "wpb": "3132.5", "bsz": "43.2", "num_updates": "31600", "lr": "9.32658e-05", "gnorm": "0.666", "loss_scale": "16", "train_wall": "35", "gb_free": "13.1", "wall": "5911"} [2023-11-01 19:14:00,853][train_inner][INFO] - {"epoch": 8, "update": 7.841, "loss": "3.683", "ntokens": "3193.6", "nsentences": "42.92", "prob_perplexity": "39.821", "code_perplexity": "39.799", "temp": "1.707", "loss_0": "3.521", "loss_1": "0.135", "loss_2": "0.027", "accuracy": "0.44531", "wps": "17776.6", "ups": "5.57", "wpb": "3193.6", "bsz": "42.9", "num_updates": "31800", "lr": "9.32152e-05", "gnorm": "0.656", "loss_scale": "16", "train_wall": "35", "gb_free": "15.5", "wall": "5947"} [2023-11-01 19:14:36,503][train_inner][INFO] - {"epoch": 8, "update": 7.89, "loss": "3.722", "ntokens": "3224.48", "nsentences": "43.84", "prob_perplexity": "39.99", "code_perplexity": "39.971", "temp": "1.705", "loss_0": "3.562", "loss_1": "0.135", "loss_2": "0.025", "accuracy": "0.43966", "wps": "18090.4", "ups": "5.61", "wpb": "3224.5", "bsz": "43.8", "num_updates": "32000", "lr": "9.31646e-05", "gnorm": "0.653", "loss_scale": "16", "train_wall": "35", "gb_free": "13.2", "wall": "5983"} [2023-11-01 19:15:12,782][train_inner][INFO] - {"epoch": 8, "update": 7.94, "loss": "3.677", "ntokens": "3219.44", "nsentences": "43.48", "prob_perplexity": "39.929", "code_perplexity": "39.909", "temp": "1.703", "loss_0": "3.515", "loss_1": "0.135", "loss_2": "0.026", "accuracy": "0.44564", "wps": "17749.6", "ups": "5.51", "wpb": "3219.4", "bsz": "43.5", "num_updates": "32200", "lr": "9.31139e-05", "gnorm": "0.656", "loss_scale": "16", "train_wall": "36", "gb_free": "15.8", "wall": "6019"} [2023-11-01 19:15:48,730][train_inner][INFO] - {"epoch": 8, "update": 7.989, "loss": "3.632", "ntokens": "3205.04", "nsentences": "46.8", "prob_perplexity": "39.845", "code_perplexity": "39.823", "temp": "1.702", "loss_0": "3.471", "loss_1": "0.135", "loss_2": "0.026", "accuracy": "0.4561", "wps": "17832.2", "ups": "5.56", "wpb": "3205", "bsz": "46.8", "num_updates": "32400", "lr": "9.30633e-05", "gnorm": "0.657", "loss_scale": "16", "train_wall": "35", "gb_free": "13.6", "wall": "6055"} [2023-11-01 19:15:56,679][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-01 19:15:56,680][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 19:15:56,700][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 10 [2023-11-01 19:16:22,069][valid][INFO] - {"epoch": 8, "valid_loss": "3.58", "valid_ntokens": "3160.8", "valid_nsentences": "44.1685", "valid_prob_perplexity": "39.929", "valid_code_perplexity": "39.92", "valid_temp": "1.701", "valid_loss_0": "3.418", "valid_loss_1": "0.135", "valid_loss_2": "0.026", "valid_accuracy": "0.46249", "valid_wps": "56473.5", "valid_wpb": "3160.8", "valid_bsz": "44.2", "valid_num_updates": "32445", "valid_best_loss": "3.58"} [2023-11-01 19:16:22,071][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 8 @ 32445 updates [2023-11-01 19:16:22,072][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-01 19:16:23,417][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-01 19:16:24,384][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_best.pt (epoch 8 @ 32445 updates, score 3.58) (writing took 2.3138983799144626 seconds) [2023-11-01 19:16:24,385][fairseq_cli.train][INFO] - end of epoch 8 (average epoch stats below) [2023-11-01 19:16:24,387][train][INFO] - {"epoch": 8, "train_loss": "3.678", "train_ntokens": "3192.42", "train_nsentences": "44.2682", "train_prob_perplexity": "39.504", "train_code_perplexity": "39.485", "train_temp": "1.718", "train_loss_0": "3.516", "train_loss_1": "0.135", "train_loss_2": "0.026", "train_accuracy": "0.44734", "train_wps": "17042", "train_ups": "5.34", "train_wpb": "3192.4", "train_bsz": "44.3", "train_num_updates": "32445", "train_lr": "9.30519e-05", "train_gnorm": "0.657", "train_loss_scale": "16", "train_train_wall": "719", "train_gb_free": "14.8", "train_wall": "6091"} [2023-11-01 19:16:24,390][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 19:16:24,417][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 9 [2023-11-01 19:16:24,641][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-01 19:16:24,661][fairseq.trainer][INFO] - begin training epoch 9 [2023-11-01 19:16:24,662][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-01 19:16:52,035][train_inner][INFO] - {"epoch": 9, "update": 8.038, "loss": "3.595", "ntokens": "3173.84", "nsentences": "46.44", "prob_perplexity": "39.807", "code_perplexity": "39.79", "temp": "1.7", "loss_0": "3.434", "loss_1": "0.135", "loss_2": "0.026", "accuracy": "0.46019", "wps": "10030", "ups": "3.16", "wpb": "3173.8", "bsz": "46.4", "num_updates": "32600", "lr": "9.30127e-05", "gnorm": "0.658", "loss_scale": "16", "train_wall": "35", "gb_free": "13.3", "wall": "6118"} [2023-11-01 19:17:27,920][train_inner][INFO] - {"epoch": 9, "update": 8.088, "loss": "3.639", "ntokens": "3203.76", "nsentences": "44.4", "prob_perplexity": "39.96", "code_perplexity": "39.943", "temp": "1.698", "loss_0": "3.477", "loss_1": "0.135", "loss_2": "0.026", "accuracy": "0.4526", "wps": "17856.7", "ups": "5.57", "wpb": "3203.8", "bsz": "44.4", "num_updates": "32800", "lr": "9.2962e-05", "gnorm": "0.658", "loss_scale": "32", "train_wall": "35", "gb_free": "12.9", "wall": "6154"} [2023-11-01 19:17:37,844][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 16.0 [2023-11-01 19:18:04,492][train_inner][INFO] - {"epoch": 9, "update": 8.137, "loss": "3.666", "ntokens": "3200.88", "nsentences": "44.24", "prob_perplexity": "39.923", "code_perplexity": "39.898", "temp": "1.697", "loss_0": "3.505", "loss_1": "0.135", "loss_2": "0.026", "accuracy": "0.4482", "wps": "17505.7", "ups": "5.47", "wpb": "3200.9", "bsz": "44.2", "num_updates": "33000", "lr": "9.29114e-05", "gnorm": "0.661", "loss_scale": "16", "train_wall": "36", "gb_free": "14.6", "wall": "6191"} [2023-11-01 19:18:40,375][train_inner][INFO] - {"epoch": 9, "update": 8.186, "loss": "3.619", "ntokens": "3192.08", "nsentences": "46.8", "prob_perplexity": "40.192", "code_perplexity": "40.166", "temp": "1.695", "loss_0": "3.458", "loss_1": "0.135", "loss_2": "0.026", "accuracy": "0.45688", "wps": "17792.7", "ups": "5.57", "wpb": "3192.1", "bsz": "46.8", "num_updates": "33200", "lr": "9.28608e-05", "gnorm": "0.659", "loss_scale": "16", "train_wall": "35", "gb_free": "13.4", "wall": "6227"} [2023-11-01 19:19:16,651][train_inner][INFO] - {"epoch": 9, "update": 8.236, "loss": "3.676", "ntokens": "3185.88", "nsentences": "41.96", "prob_perplexity": "40.206", "code_perplexity": "40.186", "temp": "1.693", "loss_0": "3.515", "loss_1": "0.135", "loss_2": "0.026", "accuracy": "0.445", "wps": "17565.9", "ups": "5.51", "wpb": "3185.9", "bsz": "42", "num_updates": "33400", "lr": "9.28101e-05", "gnorm": "0.656", "loss_scale": "16", "train_wall": "36", "gb_free": "15.1", "wall": "6263"} [2023-11-01 19:19:52,791][train_inner][INFO] - {"epoch": 9, "update": 8.285, "loss": "3.722", "ntokens": "3207.76", "nsentences": "40.6", "prob_perplexity": "40.009", "code_perplexity": "39.987", "temp": "1.692", "loss_0": "3.561", "loss_1": "0.135", "loss_2": "0.026", "accuracy": "0.43782", "wps": "17752.8", "ups": "5.53", "wpb": "3207.8", "bsz": "40.6", "num_updates": "33600", "lr": "9.27595e-05", "gnorm": "0.655", "loss_scale": "16", "train_wall": "35", "gb_free": "14.1", "wall": "6299"} [2023-11-01 19:20:28,691][train_inner][INFO] - {"epoch": 9, "update": 8.334, "loss": "3.659", "ntokens": "3167.04", "nsentences": "45.16", "prob_perplexity": "40.353", "code_perplexity": "40.328", "temp": "1.69", "loss_0": "3.498", "loss_1": "0.135", "loss_2": "0.026", "accuracy": "0.45112", "wps": "17644.8", "ups": "5.57", "wpb": "3167", "bsz": "45.2", "num_updates": "33800", "lr": "9.27089e-05", "gnorm": "0.661", "loss_scale": "16", "train_wall": "35", "gb_free": "14.9", "wall": "6335"} [2023-11-01 19:21:04,783][train_inner][INFO] - {"epoch": 9, "update": 8.384, "loss": "3.641", "ntokens": "3197.56", "nsentences": "45.12", "prob_perplexity": "40.393", "code_perplexity": "40.375", "temp": "1.688", "loss_0": "3.48", "loss_1": "0.135", "loss_2": "0.026", "accuracy": "0.45201", "wps": "17720.3", "ups": "5.54", "wpb": "3197.6", "bsz": "45.1", "num_updates": "34000", "lr": "9.26582e-05", "gnorm": "0.66", "loss_scale": "16", "train_wall": "35", "gb_free": "12.9", "wall": "6371"} [2023-11-01 19:21:40,780][train_inner][INFO] - {"epoch": 9, "update": 8.433, "loss": "3.673", "ntokens": "3230.64", "nsentences": "43.56", "prob_perplexity": "40.213", "code_perplexity": "40.196", "temp": "1.686", "loss_0": "3.512", "loss_1": "0.135", "loss_2": "0.026", "accuracy": "0.44602", "wps": "17951", "ups": "5.56", "wpb": "3230.6", "bsz": "43.6", "num_updates": "34200", "lr": "9.26076e-05", "gnorm": "0.653", "loss_scale": "16", "train_wall": "35", "gb_free": "13.8", "wall": "6407"} [2023-11-01 19:22:17,269][train_inner][INFO] - {"epoch": 9, "update": 8.482, "loss": "3.598", "ntokens": "3240.64", "nsentences": "44.96", "prob_perplexity": "40.45", "code_perplexity": "40.431", "temp": "1.685", "loss_0": "3.437", "loss_1": "0.135", "loss_2": "0.026", "accuracy": "0.45823", "wps": "17763.3", "ups": "5.48", "wpb": "3240.6", "bsz": "45", "num_updates": "34400", "lr": "9.2557e-05", "gnorm": "0.656", "loss_scale": "16", "train_wall": "36", "gb_free": "13.6", "wall": "6444"} [2023-11-01 19:22:54,008][train_inner][INFO] - {"epoch": 9, "update": 8.532, "loss": "3.648", "ntokens": "3169.2", "nsentences": "43.4", "prob_perplexity": "40.398", "code_perplexity": "40.377", "temp": "1.683", "loss_0": "3.487", "loss_1": "0.135", "loss_2": "0.026", "accuracy": "0.45122", "wps": "17253.4", "ups": "5.44", "wpb": "3169.2", "bsz": "43.4", "num_updates": "34600", "lr": "9.25063e-05", "gnorm": "0.663", "loss_scale": "16", "train_wall": "36", "gb_free": "13.3", "wall": "6480"} [2023-11-01 19:23:31,050][train_inner][INFO] - {"epoch": 9, "update": 8.581, "loss": "3.66", "ntokens": "3231.16", "nsentences": "44.04", "prob_perplexity": "40.467", "code_perplexity": "40.446", "temp": "1.681", "loss_0": "3.5", "loss_1": "0.135", "loss_2": "0.026", "accuracy": "0.44866", "wps": "17447.4", "ups": "5.4", "wpb": "3231.2", "bsz": "44", "num_updates": "34800", "lr": "9.24557e-05", "gnorm": "0.65", "loss_scale": "16", "train_wall": "36", "gb_free": "13.7", "wall": "6517"} [2023-11-01 19:24:07,274][train_inner][INFO] - {"epoch": 9, "update": 8.63, "loss": "3.587", "ntokens": "3203.24", "nsentences": "45.88", "prob_perplexity": "40.581", "code_perplexity": "40.558", "temp": "1.68", "loss_0": "3.426", "loss_1": "0.135", "loss_2": "0.026", "accuracy": "0.46121", "wps": "17686.6", "ups": "5.52", "wpb": "3203.2", "bsz": "45.9", "num_updates": "35000", "lr": "9.24051e-05", "gnorm": "0.653", "loss_scale": "16", "train_wall": "36", "gb_free": "13.5", "wall": "6554"} [2023-11-01 19:24:43,507][train_inner][INFO] - {"epoch": 9, "update": 8.679, "loss": "3.644", "ntokens": "3203.52", "nsentences": "43.96", "prob_perplexity": "40.549", "code_perplexity": "40.527", "temp": "1.678", "loss_0": "3.483", "loss_1": "0.135", "loss_2": "0.026", "accuracy": "0.45175", "wps": "17684", "ups": "5.52", "wpb": "3203.5", "bsz": "44", "num_updates": "35200", "lr": "9.23544e-05", "gnorm": "0.656", "loss_scale": "16", "train_wall": "36", "gb_free": "15.4", "wall": "6590"} [2023-11-01 19:25:19,608][train_inner][INFO] - {"epoch": 9, "update": 8.729, "loss": "3.564", "ntokens": "3153.84", "nsentences": "45.16", "prob_perplexity": "40.532", "code_perplexity": "40.512", "temp": "1.676", "loss_0": "3.403", "loss_1": "0.135", "loss_2": "0.026", "accuracy": "0.46401", "wps": "17473.3", "ups": "5.54", "wpb": "3153.8", "bsz": "45.2", "num_updates": "35400", "lr": "9.23038e-05", "gnorm": "0.653", "loss_scale": "16", "train_wall": "35", "gb_free": "14.3", "wall": "6626"} [2023-11-01 19:25:55,272][train_inner][INFO] - {"epoch": 9, "update": 8.778, "loss": "3.575", "ntokens": "3148.24", "nsentences": "44.88", "prob_perplexity": "40.607", "code_perplexity": "40.58", "temp": "1.675", "loss_0": "3.414", "loss_1": "0.135", "loss_2": "0.026", "accuracy": "0.46276", "wps": "17656.3", "ups": "5.61", "wpb": "3148.2", "bsz": "44.9", "num_updates": "35600", "lr": "9.22532e-05", "gnorm": "0.659", "loss_scale": "16", "train_wall": "35", "gb_free": "13.6", "wall": "6662"} [2023-11-01 19:26:31,534][train_inner][INFO] - {"epoch": 9, "update": 8.827, "loss": "3.587", "ntokens": "3189.64", "nsentences": "44.88", "prob_perplexity": "40.743", "code_perplexity": "40.724", "temp": "1.673", "loss_0": "3.427", "loss_1": "0.135", "loss_2": "0.026", "accuracy": "0.45851", "wps": "17593.2", "ups": "5.52", "wpb": "3189.6", "bsz": "44.9", "num_updates": "35800", "lr": "9.22025e-05", "gnorm": "0.647", "loss_scale": "16", "train_wall": "36", "gb_free": "13.7", "wall": "6698"} [2023-11-01 19:27:07,934][train_inner][INFO] - {"epoch": 9, "update": 8.877, "loss": "3.647", "ntokens": "3171", "nsentences": "41.96", "prob_perplexity": "40.374", "code_perplexity": "40.35", "temp": "1.671", "loss_0": "3.487", "loss_1": "0.135", "loss_2": "0.025", "accuracy": "0.44871", "wps": "17424.4", "ups": "5.49", "wpb": "3171", "bsz": "42", "num_updates": "36000", "lr": "9.21519e-05", "gnorm": "0.664", "loss_scale": "16", "train_wall": "36", "gb_free": "13.4", "wall": "6734"} [2023-11-01 19:27:44,008][train_inner][INFO] - {"epoch": 9, "update": 8.926, "loss": "3.625", "ntokens": "3163.12", "nsentences": "43.32", "prob_perplexity": "40.651", "code_perplexity": "40.63", "temp": "1.67", "loss_0": "3.464", "loss_1": "0.135", "loss_2": "0.025", "accuracy": "0.45347", "wps": "17537.7", "ups": "5.54", "wpb": "3163.1", "bsz": "43.3", "num_updates": "36200", "lr": "9.21013e-05", "gnorm": "0.654", "loss_scale": "16", "train_wall": "35", "gb_free": "14", "wall": "6770"} [2023-11-01 19:28:20,301][train_inner][INFO] - {"epoch": 9, "update": 8.975, "loss": "3.623", "ntokens": "3196.4", "nsentences": "43.96", "prob_perplexity": "40.796", "code_perplexity": "40.773", "temp": "1.668", "loss_0": "3.462", "loss_1": "0.135", "loss_2": "0.026", "accuracy": "0.45326", "wps": "17615.5", "ups": "5.51", "wpb": "3196.4", "bsz": "44", "num_updates": "36400", "lr": "9.20506e-05", "gnorm": "0.654", "loss_scale": "16", "train_wall": "36", "gb_free": "13.2", "wall": "6807"} [2023-11-01 19:28:38,191][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-01 19:28:38,193][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 19:28:38,213][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 11 [2023-11-01 19:29:03,935][valid][INFO] - {"epoch": 9, "valid_loss": "3.539", "valid_ntokens": "3156.35", "valid_nsentences": "44.1685", "valid_prob_perplexity": "40.964", "valid_code_perplexity": "40.947", "valid_temp": "1.666", "valid_loss_0": "3.379", "valid_loss_1": "0.135", "valid_loss_2": "0.025", "valid_accuracy": "0.46811", "valid_wps": "55581.2", "valid_wpb": "3156.3", "valid_bsz": "44.2", "valid_num_updates": "36500", "valid_best_loss": "3.539"} [2023-11-01 19:29:03,937][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 9 @ 36500 updates [2023-11-01 19:29:03,939][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-01 19:29:05,394][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-01 19:29:06,368][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_best.pt (epoch 9 @ 36500 updates, score 3.539) (writing took 2.4305443009361625 seconds) [2023-11-01 19:29:06,368][fairseq_cli.train][INFO] - end of epoch 9 (average epoch stats below) [2023-11-01 19:29:06,371][train][INFO] - {"epoch": 9, "train_loss": "3.631", "train_ntokens": "3190.99", "train_nsentences": "44.2594", "train_prob_perplexity": "40.377", "train_code_perplexity": "40.355", "train_temp": "1.683", "train_loss_0": "3.47", "train_loss_1": "0.135", "train_loss_2": "0.026", "train_accuracy": "0.45333", "train_wps": "16981.4", "train_ups": "5.32", "train_wpb": "3191", "train_bsz": "44.3", "train_num_updates": "36500", "train_lr": "9.20253e-05", "train_gnorm": "0.657", "train_loss_scale": "16", "train_train_wall": "720", "train_gb_free": "12.5", "train_wall": "6853"} [2023-11-01 19:29:06,373][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 19:29:06,395][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 10 [2023-11-01 19:29:06,585][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-01 19:29:06,607][fairseq.trainer][INFO] - begin training epoch 10 [2023-11-01 19:29:06,608][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-01 19:29:24,762][train_inner][INFO] - {"epoch": 10, "update": 9.025, "loss": "3.613", "ntokens": "3186.72", "nsentences": "44.16", "prob_perplexity": "40.894", "code_perplexity": "40.872", "temp": "1.666", "loss_0": "3.452", "loss_1": "0.135", "loss_2": "0.026", "accuracy": "0.45446", "wps": "9889.8", "ups": "3.1", "wpb": "3186.7", "bsz": "44.2", "num_updates": "36600", "lr": "9.2e-05", "gnorm": "0.662", "loss_scale": "16", "train_wall": "35", "gb_free": "13.4", "wall": "6871"} [2023-11-01 19:30:00,555][train_inner][INFO] - {"epoch": 10, "update": 9.074, "loss": "3.644", "ntokens": "3218.24", "nsentences": "42.6", "prob_perplexity": "40.932", "code_perplexity": "40.911", "temp": "1.665", "loss_0": "3.484", "loss_1": "0.135", "loss_2": "0.025", "accuracy": "0.44939", "wps": "17983.8", "ups": "5.59", "wpb": "3218.2", "bsz": "42.6", "num_updates": "36800", "lr": "9.19494e-05", "gnorm": "0.656", "loss_scale": "16", "train_wall": "35", "gb_free": "13.6", "wall": "6907"} [2023-11-01 19:30:36,557][train_inner][INFO] - {"epoch": 10, "update": 9.123, "loss": "3.569", "ntokens": "3189.64", "nsentences": "46.28", "prob_perplexity": "40.989", "code_perplexity": "40.966", "temp": "1.663", "loss_0": "3.408", "loss_1": "0.135", "loss_2": "0.026", "accuracy": "0.46372", "wps": "17720", "ups": "5.56", "wpb": "3189.6", "bsz": "46.3", "num_updates": "37000", "lr": "9.18987e-05", "gnorm": "0.666", "loss_scale": "16", "train_wall": "35", "gb_free": "14", "wall": "6943"} [2023-11-01 19:31:12,424][train_inner][INFO] - {"epoch": 10, "update": 9.173, "loss": "3.603", "ntokens": "3211.52", "nsentences": "44.48", "prob_perplexity": "41.038", "code_perplexity": "41.014", "temp": "1.661", "loss_0": "3.442", "loss_1": "0.135", "loss_2": "0.026", "accuracy": "0.45629", "wps": "17909.1", "ups": "5.58", "wpb": "3211.5", "bsz": "44.5", "num_updates": "37200", "lr": "9.18481e-05", "gnorm": "0.655", "loss_scale": "16", "train_wall": "35", "gb_free": "16.5", "wall": "6979"} [2023-11-01 19:31:48,256][train_inner][INFO] - {"epoch": 10, "update": 9.222, "loss": "3.561", "ntokens": "3173.56", "nsentences": "43.84", "prob_perplexity": "41.089", "code_perplexity": "41.065", "temp": "1.66", "loss_0": "3.4", "loss_1": "0.135", "loss_2": "0.025", "accuracy": "0.46295", "wps": "17714.6", "ups": "5.58", "wpb": "3173.6", "bsz": "43.8", "num_updates": "37400", "lr": "9.17975e-05", "gnorm": "0.658", "loss_scale": "16", "train_wall": "35", "gb_free": "13.6", "wall": "7014"} [2023-11-01 19:32:23,925][train_inner][INFO] - {"epoch": 10, "update": 9.271, "loss": "3.577", "ntokens": "3184.52", "nsentences": "45.32", "prob_perplexity": "41.02", "code_perplexity": "40.995", "temp": "1.658", "loss_0": "3.417", "loss_1": "0.135", "loss_2": "0.025", "accuracy": "0.46127", "wps": "17857", "ups": "5.61", "wpb": "3184.5", "bsz": "45.3", "num_updates": "37600", "lr": "9.17468e-05", "gnorm": "0.665", "loss_scale": "16", "train_wall": "35", "gb_free": "13.6", "wall": "7050"} [2023-11-01 19:33:00,435][train_inner][INFO] - {"epoch": 10, "update": 9.321, "loss": "3.59", "ntokens": "3197.6", "nsentences": "44.04", "prob_perplexity": "41.072", "code_perplexity": "41.043", "temp": "1.656", "loss_0": "3.429", "loss_1": "0.135", "loss_2": "0.026", "accuracy": "0.45796", "wps": "17517.5", "ups": "5.48", "wpb": "3197.6", "bsz": "44", "num_updates": "37800", "lr": "9.16962e-05", "gnorm": "0.657", "loss_scale": "16", "train_wall": "36", "gb_free": "14.5", "wall": "7087"} [2023-11-01 19:33:36,384][train_inner][INFO] - {"epoch": 10, "update": 9.37, "loss": "3.587", "ntokens": "3185.88", "nsentences": "44.16", "prob_perplexity": "41.339", "code_perplexity": "41.314", "temp": "1.655", "loss_0": "3.427", "loss_1": "0.135", "loss_2": "0.025", "accuracy": "0.4589", "wps": "17725.3", "ups": "5.56", "wpb": "3185.9", "bsz": "44.2", "num_updates": "38000", "lr": "9.16456e-05", "gnorm": "0.66", "loss_scale": "16", "train_wall": "35", "gb_free": "14.7", "wall": "7123"} [2023-11-01 19:34:12,310][train_inner][INFO] - {"epoch": 10, "update": 9.419, "loss": "3.606", "ntokens": "3178.64", "nsentences": "44.08", "prob_perplexity": "41.281", "code_perplexity": "41.26", "temp": "1.653", "loss_0": "3.445", "loss_1": "0.135", "loss_2": "0.026", "accuracy": "0.45603", "wps": "17696.7", "ups": "5.57", "wpb": "3178.6", "bsz": "44.1", "num_updates": "38200", "lr": "9.15949e-05", "gnorm": "0.659", "loss_scale": "16", "train_wall": "35", "gb_free": "14.1", "wall": "7159"} [2023-11-01 19:34:48,545][train_inner][INFO] - {"epoch": 10, "update": 9.468, "loss": "3.584", "ntokens": "3198.56", "nsentences": "42.88", "prob_perplexity": "41.465", "code_perplexity": "41.444", "temp": "1.651", "loss_0": "3.423", "loss_1": "0.135", "loss_2": "0.026", "accuracy": "0.45805", "wps": "17655.5", "ups": "5.52", "wpb": "3198.6", "bsz": "42.9", "num_updates": "38400", "lr": "9.15443e-05", "gnorm": "0.664", "loss_scale": "16", "train_wall": "36", "gb_free": "12.9", "wall": "7195"} [2023-11-01 19:35:24,856][train_inner][INFO] - {"epoch": 10, "update": 9.518, "loss": "3.652", "ntokens": "3172.48", "nsentences": "41.52", "prob_perplexity": "41.589", "code_perplexity": "41.562", "temp": "1.65", "loss_0": "3.492", "loss_1": "0.135", "loss_2": "0.025", "accuracy": "0.44644", "wps": "17475.1", "ups": "5.51", "wpb": "3172.5", "bsz": "41.5", "num_updates": "38600", "lr": "9.14937e-05", "gnorm": "0.659", "loss_scale": "16", "train_wall": "36", "gb_free": "15.3", "wall": "7231"} [2023-11-01 19:36:01,245][train_inner][INFO] - {"epoch": 10, "update": 9.567, "loss": "3.595", "ntokens": "3211.28", "nsentences": "44.04", "prob_perplexity": "41.727", "code_perplexity": "41.698", "temp": "1.648", "loss_0": "3.435", "loss_1": "0.135", "loss_2": "0.025", "accuracy": "0.45704", "wps": "17650.7", "ups": "5.5", "wpb": "3211.3", "bsz": "44", "num_updates": "38800", "lr": "9.1443e-05", "gnorm": "0.658", "loss_scale": "16", "train_wall": "36", "gb_free": "15.6", "wall": "7267"} [2023-11-01 19:36:37,387][train_inner][INFO] - {"epoch": 10, "update": 9.616, "loss": "3.562", "ntokens": "3164.12", "nsentences": "43.92", "prob_perplexity": "41.665", "code_perplexity": "41.632", "temp": "1.646", "loss_0": "3.402", "loss_1": "0.135", "loss_2": "0.026", "accuracy": "0.46264", "wps": "17510.5", "ups": "5.53", "wpb": "3164.1", "bsz": "43.9", "num_updates": "39000", "lr": "9.13924e-05", "gnorm": "0.667", "loss_scale": "16", "train_wall": "35", "gb_free": "13.3", "wall": "7304"} [2023-11-01 19:37:13,257][train_inner][INFO] - {"epoch": 10, "update": 9.666, "loss": "3.61", "ntokens": "3193.64", "nsentences": "43.08", "prob_perplexity": "41.749", "code_perplexity": "41.717", "temp": "1.645", "loss_0": "3.449", "loss_1": "0.135", "loss_2": "0.026", "accuracy": "0.45465", "wps": "17807.6", "ups": "5.58", "wpb": "3193.6", "bsz": "43.1", "num_updates": "39200", "lr": "9.13418e-05", "gnorm": "0.657", "loss_scale": "16", "train_wall": "35", "gb_free": "13", "wall": "7339"} [2023-11-01 19:37:49,193][train_inner][INFO] - {"epoch": 10, "update": 9.715, "loss": "3.551", "ntokens": "3180.32", "nsentences": "46.16", "prob_perplexity": "41.6", "code_perplexity": "41.573", "temp": "1.643", "loss_0": "3.391", "loss_1": "0.135", "loss_2": "0.025", "accuracy": "0.46571", "wps": "17700.9", "ups": "5.57", "wpb": "3180.3", "bsz": "46.2", "num_updates": "39400", "lr": "9.12911e-05", "gnorm": "0.662", "loss_scale": "16", "train_wall": "35", "gb_free": "13.7", "wall": "7375"} [2023-11-01 19:38:25,301][train_inner][INFO] - {"epoch": 10, "update": 9.764, "loss": "3.586", "ntokens": "3148.12", "nsentences": "43.76", "prob_perplexity": "41.563", "code_perplexity": "41.539", "temp": "1.642", "loss_0": "3.426", "loss_1": "0.135", "loss_2": "0.025", "accuracy": "0.45869", "wps": "17438.5", "ups": "5.54", "wpb": "3148.1", "bsz": "43.8", "num_updates": "39600", "lr": "9.12405e-05", "gnorm": "0.669", "loss_scale": "16", "train_wall": "35", "gb_free": "13.3", "wall": "7412"} [2023-11-01 19:39:01,063][train_inner][INFO] - {"epoch": 10, "update": 9.814, "loss": "3.526", "ntokens": "3210.08", "nsentences": "47.52", "prob_perplexity": "41.779", "code_perplexity": "41.751", "temp": "1.64", "loss_0": "3.366", "loss_1": "0.135", "loss_2": "0.026", "accuracy": "0.46913", "wps": "17953.3", "ups": "5.59", "wpb": "3210.1", "bsz": "47.5", "num_updates": "39800", "lr": "9.11899e-05", "gnorm": "0.661", "loss_scale": "16", "train_wall": "35", "gb_free": "13.4", "wall": "7447"} [2023-11-01 19:39:36,759][train_inner][INFO] - {"epoch": 10, "update": 9.863, "loss": "3.565", "ntokens": "3175.56", "nsentences": "44.08", "prob_perplexity": "42", "code_perplexity": "41.968", "temp": "1.638", "loss_0": "3.405", "loss_1": "0.135", "loss_2": "0.025", "accuracy": "0.46123", "wps": "17793.3", "ups": "5.6", "wpb": "3175.6", "bsz": "44.1", "num_updates": "40000", "lr": "9.11392e-05", "gnorm": "0.665", "loss_scale": "16", "train_wall": "35", "gb_free": "14.3", "wall": "7483"} [2023-11-01 19:40:13,144][train_inner][INFO] - {"epoch": 10, "update": 9.912, "loss": "3.579", "ntokens": "3193.4", "nsentences": "44.44", "prob_perplexity": "41.946", "code_perplexity": "41.915", "temp": "1.637", "loss_0": "3.419", "loss_1": "0.135", "loss_2": "0.025", "accuracy": "0.45951", "wps": "17554.8", "ups": "5.5", "wpb": "3193.4", "bsz": "44.4", "num_updates": "40200", "lr": "9.10886e-05", "gnorm": "0.674", "loss_scale": "16", "train_wall": "36", "gb_free": "15.5", "wall": "7519"} [2023-11-01 19:40:49,134][train_inner][INFO] - {"epoch": 10, "update": 9.962, "loss": "3.55", "ntokens": "3222.12", "nsentences": "46.44", "prob_perplexity": "42.248", "code_perplexity": "42.217", "temp": "1.635", "loss_0": "3.39", "loss_1": "0.135", "loss_2": "0.025", "accuracy": "0.46522", "wps": "17906.6", "ups": "5.56", "wpb": "3222.1", "bsz": "46.4", "num_updates": "40400", "lr": "9.1038e-05", "gnorm": "0.658", "loss_scale": "16", "train_wall": "35", "gb_free": "13", "wall": "7555"} [2023-11-01 19:41:16,977][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-01 19:41:16,978][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 19:41:16,996][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 12 [2023-11-01 19:41:42,319][valid][INFO] - {"epoch": 10, "valid_loss": "3.502", "valid_ntokens": "3164.77", "valid_nsentences": "44.1685", "valid_prob_perplexity": "42.479", "valid_code_perplexity": "42.454", "valid_temp": "1.633", "valid_loss_0": "3.342", "valid_loss_1": "0.135", "valid_loss_2": "0.025", "valid_accuracy": "0.4719", "valid_wps": "56630.7", "valid_wpb": "3164.8", "valid_bsz": "44.2", "valid_num_updates": "40556", "valid_best_loss": "3.502"} [2023-11-01 19:41:42,321][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 10 @ 40556 updates [2023-11-01 19:41:42,323][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-01 19:41:43,728][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-01 19:41:44,696][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_best.pt (epoch 10 @ 40556 updates, score 3.502) (writing took 2.3755785701796412 seconds) [2023-11-01 19:41:44,697][fairseq_cli.train][INFO] - end of epoch 10 (average epoch stats below) [2023-11-01 19:41:44,699][train][INFO] - {"epoch": 10, "train_loss": "3.587", "train_ntokens": "3190.09", "train_nsentences": "44.2682", "train_prob_perplexity": "41.49", "train_code_perplexity": "41.463", "train_temp": "1.65", "train_loss_0": "3.427", "train_loss_1": "0.135", "train_loss_2": "0.025", "train_accuracy": "0.45873", "train_wps": "17062.6", "train_ups": "5.35", "train_wpb": "3190.1", "train_bsz": "44.3", "train_num_updates": "40556", "train_lr": "9.09985e-05", "train_gnorm": "0.662", "train_loss_scale": "16", "train_train_wall": "717", "train_gb_free": "13.9", "train_wall": "7611"} [2023-11-01 19:41:44,702][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 19:41:44,721][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 11 [2023-11-01 19:41:44,910][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-01 19:41:44,931][fairseq.trainer][INFO] - begin training epoch 11 [2023-11-01 19:41:44,932][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-01 19:41:53,117][train_inner][INFO] - {"epoch": 11, "update": 10.011, "loss": "3.562", "ntokens": "3168.72", "nsentences": "44.6", "prob_perplexity": "42.148", "code_perplexity": "42.12", "temp": "1.633", "loss_0": "3.403", "loss_1": "0.135", "loss_2": "0.025", "accuracy": "0.46274", "wps": "9905.3", "ups": "3.13", "wpb": "3168.7", "bsz": "44.6", "num_updates": "40600", "lr": "9.09873e-05", "gnorm": "0.663", "loss_scale": "16", "train_wall": "35", "gb_free": "14", "wall": "7619"} [2023-11-01 19:42:29,794][train_inner][INFO] - {"epoch": 11, "update": 10.06, "loss": "3.585", "ntokens": "3192", "nsentences": "43.56", "prob_perplexity": "42.162", "code_perplexity": "42.131", "temp": "1.632", "loss_0": "3.425", "loss_1": "0.135", "loss_2": "0.025", "accuracy": "0.45763", "wps": "17413.9", "ups": "5.46", "wpb": "3192", "bsz": "43.6", "num_updates": "40800", "lr": "9.09367e-05", "gnorm": "0.666", "loss_scale": "16", "train_wall": "36", "gb_free": "13.2", "wall": "7656"} [2023-11-01 19:43:05,587][train_inner][INFO] - {"epoch": 11, "update": 10.109, "loss": "3.582", "ntokens": "3184.16", "nsentences": "43.52", "prob_perplexity": "42.23", "code_perplexity": "42.201", "temp": "1.63", "loss_0": "3.422", "loss_1": "0.135", "loss_2": "0.026", "accuracy": "0.45764", "wps": "17793.3", "ups": "5.59", "wpb": "3184.2", "bsz": "43.5", "num_updates": "41000", "lr": "9.08861e-05", "gnorm": "0.659", "loss_scale": "16", "train_wall": "35", "gb_free": "14.3", "wall": "7692"} [2023-11-01 19:43:41,626][train_inner][INFO] - {"epoch": 11, "update": 10.159, "loss": "3.614", "ntokens": "3197.16", "nsentences": "41.88", "prob_perplexity": "42.375", "code_perplexity": "42.345", "temp": "1.628", "loss_0": "3.454", "loss_1": "0.135", "loss_2": "0.025", "accuracy": "0.4512", "wps": "17743.8", "ups": "5.55", "wpb": "3197.2", "bsz": "41.9", "num_updates": "41200", "lr": "9.08354e-05", "gnorm": "0.66", "loss_scale": "16", "train_wall": "35", "gb_free": "13.7", "wall": "7728"} [2023-11-01 19:44:18,088][train_inner][INFO] - {"epoch": 11, "update": 10.208, "loss": "3.584", "ntokens": "3215.76", "nsentences": "45.36", "prob_perplexity": "42.588", "code_perplexity": "42.557", "temp": "1.627", "loss_0": "3.424", "loss_1": "0.135", "loss_2": "0.025", "accuracy": "0.45792", "wps": "17639.5", "ups": "5.49", "wpb": "3215.8", "bsz": "45.4", "num_updates": "41400", "lr": "9.07848e-05", "gnorm": "0.656", "loss_scale": "16", "train_wall": "36", "gb_free": "12.7", "wall": "7764"} [2023-11-01 19:44:54,687][train_inner][INFO] - {"epoch": 11, "update": 10.257, "loss": "3.559", "ntokens": "3200.64", "nsentences": "43.6", "prob_perplexity": "42.414", "code_perplexity": "42.386", "temp": "1.625", "loss_0": "3.399", "loss_1": "0.135", "loss_2": "0.025", "accuracy": "0.46073", "wps": "17491.5", "ups": "5.47", "wpb": "3200.6", "bsz": "43.6", "num_updates": "41600", "lr": "9.07342e-05", "gnorm": "0.668", "loss_scale": "16", "train_wall": "36", "gb_free": "13.7", "wall": "7801"} [2023-11-01 19:45:30,873][train_inner][INFO] - {"epoch": 11, "update": 10.307, "loss": "3.531", "ntokens": "3208.8", "nsentences": "45.56", "prob_perplexity": "42.625", "code_perplexity": "42.596", "temp": "1.624", "loss_0": "3.372", "loss_1": "0.135", "loss_2": "0.025", "accuracy": "0.46542", "wps": "17736.1", "ups": "5.53", "wpb": "3208.8", "bsz": "45.6", "num_updates": "41800", "lr": "9.06835e-05", "gnorm": "0.653", "loss_scale": "16", "train_wall": "36", "gb_free": "13.5", "wall": "7837"} [2023-11-01 19:46:07,215][train_inner][INFO] - {"epoch": 11, "update": 10.356, "loss": "3.547", "ntokens": "3217.48", "nsentences": "46.32", "prob_perplexity": "42.803", "code_perplexity": "42.777", "temp": "1.622", "loss_0": "3.388", "loss_1": "0.135", "loss_2": "0.025", "accuracy": "0.46355", "wps": "17707.8", "ups": "5.5", "wpb": "3217.5", "bsz": "46.3", "num_updates": "42000", "lr": "9.06329e-05", "gnorm": "0.66", "loss_scale": "16", "train_wall": "36", "gb_free": "13", "wall": "7873"} [2023-11-01 19:46:43,436][train_inner][INFO] - {"epoch": 11, "update": 10.405, "loss": "3.522", "ntokens": "3156.64", "nsentences": "45.24", "prob_perplexity": "42.801", "code_perplexity": "42.771", "temp": "1.62", "loss_0": "3.363", "loss_1": "0.135", "loss_2": "0.025", "accuracy": "0.46684", "wps": "17431.6", "ups": "5.52", "wpb": "3156.6", "bsz": "45.2", "num_updates": "42200", "lr": "9.05823e-05", "gnorm": "0.665", "loss_scale": "16", "train_wall": "36", "gb_free": "12.6", "wall": "7910"} [2023-11-01 19:47:19,831][train_inner][INFO] - {"epoch": 11, "update": 10.455, "loss": "3.567", "ntokens": "3187.04", "nsentences": "43.04", "prob_perplexity": "42.934", "code_perplexity": "42.908", "temp": "1.619", "loss_0": "3.408", "loss_1": "0.135", "loss_2": "0.025", "accuracy": "0.45766", "wps": "17514.6", "ups": "5.5", "wpb": "3187", "bsz": "43", "num_updates": "42400", "lr": "9.05316e-05", "gnorm": "0.659", "loss_scale": "16", "train_wall": "36", "gb_free": "13.9", "wall": "7946"} [2023-11-01 19:47:56,551][train_inner][INFO] - {"epoch": 11, "update": 10.504, "loss": "3.521", "ntokens": "3200.28", "nsentences": "45.52", "prob_perplexity": "42.974", "code_perplexity": "42.941", "temp": "1.617", "loss_0": "3.361", "loss_1": "0.135", "loss_2": "0.025", "accuracy": "0.46735", "wps": "17431.5", "ups": "5.45", "wpb": "3200.3", "bsz": "45.5", "num_updates": "42600", "lr": "9.0481e-05", "gnorm": "0.657", "loss_scale": "16", "train_wall": "36", "gb_free": "13.9", "wall": "7983"} [2023-11-01 19:48:33,292][train_inner][INFO] - {"epoch": 11, "update": 10.553, "loss": "3.569", "ntokens": "3208.8", "nsentences": "44", "prob_perplexity": "43.555", "code_perplexity": "43.522", "temp": "1.616", "loss_0": "3.41", "loss_1": "0.134", "loss_2": "0.025", "accuracy": "0.45674", "wps": "17468.5", "ups": "5.44", "wpb": "3208.8", "bsz": "44", "num_updates": "42800", "lr": "9.04304e-05", "gnorm": "0.657", "loss_scale": "16", "train_wall": "36", "gb_free": "13.9", "wall": "8020"} [2023-11-01 19:49:09,665][train_inner][INFO] - {"epoch": 11, "update": 10.603, "loss": "3.578", "ntokens": "3123.12", "nsentences": "40.4", "prob_perplexity": "43.229", "code_perplexity": "43.199", "temp": "1.614", "loss_0": "3.418", "loss_1": "0.135", "loss_2": "0.025", "accuracy": "0.45372", "wps": "17173.8", "ups": "5.5", "wpb": "3123.1", "bsz": "40.4", "num_updates": "43000", "lr": "9.03797e-05", "gnorm": "0.669", "loss_scale": "16", "train_wall": "36", "gb_free": "13.3", "wall": "8056"} [2023-11-01 19:49:45,723][train_inner][INFO] - {"epoch": 11, "update": 10.652, "loss": "3.566", "ntokens": "3148.44", "nsentences": "42.88", "prob_perplexity": "43.562", "code_perplexity": "43.531", "temp": "1.612", "loss_0": "3.407", "loss_1": "0.134", "loss_2": "0.025", "accuracy": "0.45875", "wps": "17464.1", "ups": "5.55", "wpb": "3148.4", "bsz": "42.9", "num_updates": "43200", "lr": "9.03291e-05", "gnorm": "0.667", "loss_scale": "16", "train_wall": "35", "gb_free": "14", "wall": "8092"} [2023-11-01 19:50:21,723][train_inner][INFO] - {"epoch": 11, "update": 10.701, "loss": "3.586", "ntokens": "3185.6", "nsentences": "44.12", "prob_perplexity": "43.479", "code_perplexity": "43.452", "temp": "1.611", "loss_0": "3.426", "loss_1": "0.134", "loss_2": "0.025", "accuracy": "0.45677", "wps": "17698.8", "ups": "5.56", "wpb": "3185.6", "bsz": "44.1", "num_updates": "43400", "lr": "9.02785e-05", "gnorm": "0.666", "loss_scale": "16", "train_wall": "35", "gb_free": "12.3", "wall": "8128"} [2023-11-01 19:50:57,713][train_inner][INFO] - {"epoch": 11, "update": 10.75, "loss": "3.503", "ntokens": "3190.88", "nsentences": "47.24", "prob_perplexity": "43.535", "code_perplexity": "43.503", "temp": "1.609", "loss_0": "3.344", "loss_1": "0.134", "loss_2": "0.025", "accuracy": "0.47031", "wps": "17733.1", "ups": "5.56", "wpb": "3190.9", "bsz": "47.2", "num_updates": "43600", "lr": "9.02278e-05", "gnorm": "0.663", "loss_scale": "16", "train_wall": "35", "gb_free": "12.5", "wall": "8164"} [2023-11-01 19:51:34,192][train_inner][INFO] - {"epoch": 11, "update": 10.8, "loss": "3.459", "ntokens": "3185.32", "nsentences": "46.2", "prob_perplexity": "43.799", "code_perplexity": "43.766", "temp": "1.607", "loss_0": "3.3", "loss_1": "0.134", "loss_2": "0.025", "accuracy": "0.47624", "wps": "17464.7", "ups": "5.48", "wpb": "3185.3", "bsz": "46.2", "num_updates": "43800", "lr": "9.01772e-05", "gnorm": "0.657", "loss_scale": "16", "train_wall": "36", "gb_free": "13.8", "wall": "8200"} [2023-11-01 19:52:10,603][train_inner][INFO] - {"epoch": 11, "update": 10.849, "loss": "3.552", "ntokens": "3201.84", "nsentences": "43.8", "prob_perplexity": "43.487", "code_perplexity": "43.451", "temp": "1.606", "loss_0": "3.393", "loss_1": "0.134", "loss_2": "0.025", "accuracy": "0.46058", "wps": "17588.5", "ups": "5.49", "wpb": "3201.8", "bsz": "43.8", "num_updates": "44000", "lr": "9.01266e-05", "gnorm": "0.664", "loss_scale": "16", "train_wall": "36", "gb_free": "13.4", "wall": "8237"} [2023-11-01 19:52:27,998][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2023-11-01 19:52:47,082][train_inner][INFO] - {"epoch": 11, "update": 10.899, "loss": "3.553", "ntokens": "3193.6", "nsentences": "43.36", "prob_perplexity": "43.782", "code_perplexity": "43.75", "temp": "1.604", "loss_0": "3.393", "loss_1": "0.134", "loss_2": "0.025", "accuracy": "0.45967", "wps": "17510.1", "ups": "5.48", "wpb": "3193.6", "bsz": "43.4", "num_updates": "44200", "lr": "9.00759e-05", "gnorm": "0.662", "loss_scale": "8", "train_wall": "36", "gb_free": "12.6", "wall": "8273"} [2023-11-01 19:53:23,854][train_inner][INFO] - {"epoch": 11, "update": 10.948, "loss": "3.525", "ntokens": "3191.24", "nsentences": "45.2", "prob_perplexity": "43.843", "code_perplexity": "43.811", "temp": "1.603", "loss_0": "3.366", "loss_1": "0.134", "loss_2": "0.025", "accuracy": "0.46644", "wps": "17357.9", "ups": "5.44", "wpb": "3191.2", "bsz": "45.2", "num_updates": "44400", "lr": "9.00253e-05", "gnorm": "0.658", "loss_scale": "8", "train_wall": "36", "gb_free": "13.6", "wall": "8310"} [2023-11-01 19:54:00,015][train_inner][INFO] - {"epoch": 11, "update": 10.997, "loss": "3.546", "ntokens": "3169.04", "nsentences": "43.6", "prob_perplexity": "43.847", "code_perplexity": "43.815", "temp": "1.601", "loss_0": "3.387", "loss_1": "0.134", "loss_2": "0.025", "accuracy": "0.4611", "wps": "17528.5", "ups": "5.53", "wpb": "3169", "bsz": "43.6", "num_updates": "44600", "lr": "8.99747e-05", "gnorm": "0.666", "loss_scale": "8", "train_wall": "36", "gb_free": "14.6", "wall": "8346"} [2023-11-01 19:54:02,014][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-01 19:54:02,016][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 19:54:02,034][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 13 [2023-11-01 19:54:27,538][valid][INFO] - {"epoch": 11, "valid_loss": "3.456", "valid_ntokens": "3163.14", "valid_nsentences": "44.1685", "valid_prob_perplexity": "44.332", "valid_code_perplexity": "44.304", "valid_temp": "1.6", "valid_loss_0": "3.297", "valid_loss_1": "0.134", "valid_loss_2": "0.025", "valid_accuracy": "0.476", "valid_wps": "56220.5", "valid_wpb": "3163.1", "valid_bsz": "44.2", "valid_num_updates": "44611", "valid_best_loss": "3.456"} [2023-11-01 19:54:27,540][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 11 @ 44611 updates [2023-11-01 19:54:27,542][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-01 19:54:28,940][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-01 19:54:29,912][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_best.pt (epoch 11 @ 44611 updates, score 3.456) (writing took 2.3725196421146393 seconds) [2023-11-01 19:54:29,913][fairseq_cli.train][INFO] - end of epoch 11 (average epoch stats below) [2023-11-01 19:54:29,915][train][INFO] - {"epoch": 11, "train_loss": "3.551", "train_ntokens": "3187.29", "train_nsentences": "44.2614", "train_prob_perplexity": "43.095", "train_code_perplexity": "43.064", "train_temp": "1.616", "train_loss_0": "3.391", "train_loss_1": "0.135", "train_loss_2": "0.025", "train_accuracy": "0.4616", "train_wps": "16890", "train_ups": "5.3", "train_wpb": "3187.3", "train_bsz": "44.3", "train_num_updates": "44611", "train_lr": "8.99719e-05", "train_gnorm": "0.661", "train_loss_scale": "8", "train_train_wall": "724", "train_gb_free": "13.7", "train_wall": "8376"} [2023-11-01 19:54:29,917][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 19:54:29,937][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 12 [2023-11-01 19:54:30,131][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-01 19:54:30,153][fairseq.trainer][INFO] - begin training epoch 12 [2023-11-01 19:54:30,154][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-01 19:55:04,222][train_inner][INFO] - {"epoch": 12, "update": 11.047, "loss": "3.541", "ntokens": "3171.4", "nsentences": "44.44", "prob_perplexity": "44.042", "code_perplexity": "44.015", "temp": "1.599", "loss_0": "3.382", "loss_1": "0.134", "loss_2": "0.025", "accuracy": "0.46285", "wps": "9881.4", "ups": "3.12", "wpb": "3171.4", "bsz": "44.4", "num_updates": "44800", "lr": "8.99241e-05", "gnorm": "0.659", "loss_scale": "8", "train_wall": "35", "gb_free": "13.8", "wall": "8410"} [2023-11-01 19:55:39,650][train_inner][INFO] - {"epoch": 12, "update": 11.096, "loss": "3.555", "ntokens": "3216.8", "nsentences": "42.88", "prob_perplexity": "43.87", "code_perplexity": "43.837", "temp": "1.598", "loss_0": "3.396", "loss_1": "0.134", "loss_2": "0.025", "accuracy": "0.45915", "wps": "18160.8", "ups": "5.65", "wpb": "3216.8", "bsz": "42.9", "num_updates": "45000", "lr": "8.98734e-05", "gnorm": "0.66", "loss_scale": "8", "train_wall": "35", "gb_free": "14.3", "wall": "8446"} [2023-11-01 19:56:15,609][train_inner][INFO] - {"epoch": 12, "update": 11.145, "loss": "3.545", "ntokens": "3186.56", "nsentences": "41.08", "prob_perplexity": "44.268", "code_perplexity": "44.23", "temp": "1.596", "loss_0": "3.386", "loss_1": "0.134", "loss_2": "0.025", "accuracy": "0.45764", "wps": "17724.5", "ups": "5.56", "wpb": "3186.6", "bsz": "41.1", "num_updates": "45200", "lr": "8.98228e-05", "gnorm": "0.664", "loss_scale": "8", "train_wall": "35", "gb_free": "13.6", "wall": "8482"} [2023-11-01 19:56:51,330][train_inner][INFO] - {"epoch": 12, "update": 11.195, "loss": "3.525", "ntokens": "3168.76", "nsentences": "44.12", "prob_perplexity": "44.107", "code_perplexity": "44.079", "temp": "1.595", "loss_0": "3.366", "loss_1": "0.134", "loss_2": "0.025", "accuracy": "0.46412", "wps": "17742.7", "ups": "5.6", "wpb": "3168.8", "bsz": "44.1", "num_updates": "45400", "lr": "8.97722e-05", "gnorm": "0.663", "loss_scale": "8", "train_wall": "35", "gb_free": "13", "wall": "8518"} [2023-11-01 19:57:27,224][train_inner][INFO] - {"epoch": 12, "update": 11.244, "loss": "3.543", "ntokens": "3194.6", "nsentences": "43.8", "prob_perplexity": "44.154", "code_perplexity": "44.122", "temp": "1.593", "loss_0": "3.384", "loss_1": "0.134", "loss_2": "0.025", "accuracy": "0.46088", "wps": "17801.1", "ups": "5.57", "wpb": "3194.6", "bsz": "43.8", "num_updates": "45600", "lr": "8.97215e-05", "gnorm": "0.659", "loss_scale": "8", "train_wall": "35", "gb_free": "13.9", "wall": "8553"} [2023-11-01 19:58:03,197][train_inner][INFO] - {"epoch": 12, "update": 11.293, "loss": "3.549", "ntokens": "3222.44", "nsentences": "43.04", "prob_perplexity": "44.494", "code_perplexity": "44.461", "temp": "1.591", "loss_0": "3.39", "loss_1": "0.134", "loss_2": "0.025", "accuracy": "0.45808", "wps": "17917", "ups": "5.56", "wpb": "3222.4", "bsz": "43", "num_updates": "45800", "lr": "8.96709e-05", "gnorm": "0.655", "loss_scale": "8", "train_wall": "35", "gb_free": "13.9", "wall": "8589"} [2023-11-01 19:58:38,948][train_inner][INFO] - {"epoch": 12, "update": 11.342, "loss": "3.503", "ntokens": "3179.16", "nsentences": "44.84", "prob_perplexity": "44.392", "code_perplexity": "44.359", "temp": "1.59", "loss_0": "3.344", "loss_1": "0.134", "loss_2": "0.025", "accuracy": "0.46752", "wps": "17786.3", "ups": "5.59", "wpb": "3179.2", "bsz": "44.8", "num_updates": "46000", "lr": "8.96203e-05", "gnorm": "0.663", "loss_scale": "8", "train_wall": "35", "gb_free": "15.8", "wall": "8625"} [2023-11-01 19:59:15,046][train_inner][INFO] - {"epoch": 12, "update": 11.392, "loss": "3.578", "ntokens": "3177.96", "nsentences": "41.08", "prob_perplexity": "44.485", "code_perplexity": "44.455", "temp": "1.588", "loss_0": "3.419", "loss_1": "0.134", "loss_2": "0.025", "accuracy": "0.45362", "wps": "17608.7", "ups": "5.54", "wpb": "3178", "bsz": "41.1", "num_updates": "46200", "lr": "8.95696e-05", "gnorm": "0.663", "loss_scale": "8", "train_wall": "35", "gb_free": "13.1", "wall": "8661"} [2023-11-01 19:59:50,958][train_inner][INFO] - {"epoch": 12, "update": 11.441, "loss": "3.541", "ntokens": "3164.92", "nsentences": "43.4", "prob_perplexity": "44.815", "code_perplexity": "44.779", "temp": "1.587", "loss_0": "3.382", "loss_1": "0.134", "loss_2": "0.025", "accuracy": "0.46014", "wps": "17626.9", "ups": "5.57", "wpb": "3164.9", "bsz": "43.4", "num_updates": "46400", "lr": "8.9519e-05", "gnorm": "0.658", "loss_scale": "8", "train_wall": "35", "gb_free": "14.8", "wall": "8697"} [2023-11-01 20:00:27,190][train_inner][INFO] - {"epoch": 12, "update": 11.49, "loss": "3.525", "ntokens": "3240.28", "nsentences": "45.24", "prob_perplexity": "44.865", "code_perplexity": "44.829", "temp": "1.585", "loss_0": "3.367", "loss_1": "0.134", "loss_2": "0.024", "accuracy": "0.46397", "wps": "17887.3", "ups": "5.52", "wpb": "3240.3", "bsz": "45.2", "num_updates": "46600", "lr": "8.94684e-05", "gnorm": "0.656", "loss_scale": "8", "train_wall": "36", "gb_free": "13.3", "wall": "8733"} [2023-11-01 20:01:03,442][train_inner][INFO] - {"epoch": 12, "update": 11.54, "loss": "3.433", "ntokens": "3183.08", "nsentences": "48", "prob_perplexity": "45.017", "code_perplexity": "44.983", "temp": "1.584", "loss_0": "3.275", "loss_1": "0.134", "loss_2": "0.024", "accuracy": "0.48024", "wps": "17562.1", "ups": "5.52", "wpb": "3183.1", "bsz": "48", "num_updates": "46800", "lr": "8.94177e-05", "gnorm": "0.663", "loss_scale": "8", "train_wall": "36", "gb_free": "13.8", "wall": "8770"} [2023-11-01 20:01:39,613][train_inner][INFO] - {"epoch": 12, "update": 11.589, "loss": "3.568", "ntokens": "3211.04", "nsentences": "44.08", "prob_perplexity": "45.208", "code_perplexity": "45.173", "temp": "1.582", "loss_0": "3.41", "loss_1": "0.134", "loss_2": "0.024", "accuracy": "0.45621", "wps": "17755.7", "ups": "5.53", "wpb": "3211", "bsz": "44.1", "num_updates": "47000", "lr": "8.93671e-05", "gnorm": "0.66", "loss_scale": "8", "train_wall": "36", "gb_free": "14", "wall": "8806"} [2023-11-01 20:02:15,749][train_inner][INFO] - {"epoch": 12, "update": 11.638, "loss": "3.504", "ntokens": "3201.28", "nsentences": "45.6", "prob_perplexity": "45.168", "code_perplexity": "45.131", "temp": "1.58", "loss_0": "3.345", "loss_1": "0.134", "loss_2": "0.024", "accuracy": "0.46694", "wps": "17719", "ups": "5.53", "wpb": "3201.3", "bsz": "45.6", "num_updates": "47200", "lr": "8.93165e-05", "gnorm": "0.66", "loss_scale": "8", "train_wall": "36", "gb_free": "14.4", "wall": "8842"} [2023-11-01 20:02:52,171][train_inner][INFO] - {"epoch": 12, "update": 11.688, "loss": "3.48", "ntokens": "3171.56", "nsentences": "45.36", "prob_perplexity": "45.297", "code_perplexity": "45.262", "temp": "1.579", "loss_0": "3.321", "loss_1": "0.134", "loss_2": "0.024", "accuracy": "0.4711", "wps": "17416.8", "ups": "5.49", "wpb": "3171.6", "bsz": "45.4", "num_updates": "47400", "lr": "8.92658e-05", "gnorm": "0.664", "loss_scale": "8", "train_wall": "36", "gb_free": "14.5", "wall": "8878"} [2023-11-01 20:03:28,474][train_inner][INFO] - {"epoch": 12, "update": 11.737, "loss": "3.583", "ntokens": "3192.44", "nsentences": "41.96", "prob_perplexity": "45.453", "code_perplexity": "45.419", "temp": "1.577", "loss_0": "3.425", "loss_1": "0.134", "loss_2": "0.024", "accuracy": "0.45171", "wps": "17588.8", "ups": "5.51", "wpb": "3192.4", "bsz": "42", "num_updates": "47600", "lr": "8.92152e-05", "gnorm": "0.666", "loss_scale": "8", "train_wall": "36", "gb_free": "14.7", "wall": "8915"} [2023-11-01 20:04:04,839][train_inner][INFO] - {"epoch": 12, "update": 11.786, "loss": "3.494", "ntokens": "3217.52", "nsentences": "45.72", "prob_perplexity": "45.761", "code_perplexity": "45.722", "temp": "1.576", "loss_0": "3.336", "loss_1": "0.134", "loss_2": "0.024", "accuracy": "0.46836", "wps": "17696.8", "ups": "5.5", "wpb": "3217.5", "bsz": "45.7", "num_updates": "47800", "lr": "8.91646e-05", "gnorm": "0.703", "loss_scale": "8", "train_wall": "36", "gb_free": "13", "wall": "8951"} [2023-11-01 20:04:40,402][train_inner][INFO] - {"epoch": 12, "update": 11.836, "loss": "3.521", "ntokens": "3155.92", "nsentences": "46.08", "prob_perplexity": "45.494", "code_perplexity": "45.46", "temp": "1.574", "loss_0": "3.362", "loss_1": "0.134", "loss_2": "0.024", "accuracy": "0.46644", "wps": "17749.1", "ups": "5.62", "wpb": "3155.9", "bsz": "46.1", "num_updates": "48000", "lr": "8.91139e-05", "gnorm": "0.668", "loss_scale": "8", "train_wall": "35", "gb_free": "13.5", "wall": "8987"} [2023-11-01 20:05:16,943][train_inner][INFO] - {"epoch": 12, "update": 11.885, "loss": "3.556", "ntokens": "3198.96", "nsentences": "43.04", "prob_perplexity": "45.303", "code_perplexity": "45.27", "temp": "1.572", "loss_0": "3.398", "loss_1": "0.134", "loss_2": "0.024", "accuracy": "0.45732", "wps": "17510.3", "ups": "5.47", "wpb": "3199", "bsz": "43", "num_updates": "48200", "lr": "8.90633e-05", "gnorm": "0.664", "loss_scale": "8", "train_wall": "36", "gb_free": "14.3", "wall": "9023"} [2023-11-01 20:05:53,229][train_inner][INFO] - {"epoch": 12, "update": 11.934, "loss": "3.518", "ntokens": "3177.12", "nsentences": "43.96", "prob_perplexity": "45.808", "code_perplexity": "45.771", "temp": "1.571", "loss_0": "3.36", "loss_1": "0.134", "loss_2": "0.024", "accuracy": "0.46438", "wps": "17512.2", "ups": "5.51", "wpb": "3177.1", "bsz": "44", "num_updates": "48400", "lr": "8.90127e-05", "gnorm": "0.671", "loss_scale": "8", "train_wall": "36", "gb_free": "14.3", "wall": "9059"} [2023-11-01 20:06:29,411][train_inner][INFO] - {"epoch": 12, "update": 11.983, "loss": "3.417", "ntokens": "3171.2", "nsentences": "48.04", "prob_perplexity": "45.794", "code_perplexity": "45.755", "temp": "1.569", "loss_0": "3.258", "loss_1": "0.134", "loss_2": "0.024", "accuracy": "0.48175", "wps": "17530.8", "ups": "5.53", "wpb": "3171.2", "bsz": "48", "num_updates": "48600", "lr": "8.8962e-05", "gnorm": "0.662", "loss_scale": "8", "train_wall": "36", "gb_free": "13.4", "wall": "9096"} [2023-11-01 20:06:41,372][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-01 20:06:41,373][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 20:06:41,392][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 14 [2023-11-01 20:07:07,328][valid][INFO] - {"epoch": 12, "valid_loss": "3.434", "valid_ntokens": "3167.56", "valid_nsentences": "44.1685", "valid_prob_perplexity": "45.709", "valid_code_perplexity": "45.686", "valid_temp": "1.568", "valid_loss_0": "3.276", "valid_loss_1": "0.134", "valid_loss_2": "0.024", "valid_accuracy": "0.47889", "valid_wps": "55366.5", "valid_wpb": "3167.6", "valid_bsz": "44.2", "valid_num_updates": "48667", "valid_best_loss": "3.434"} [2023-11-01 20:07:07,329][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 12 @ 48667 updates [2023-11-01 20:07:07,331][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-01 20:07:08,745][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-01 20:07:09,720][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_best.pt (epoch 12 @ 48667 updates, score 3.434) (writing took 2.3905333182774484 seconds) [2023-11-01 20:07:09,721][fairseq_cli.train][INFO] - end of epoch 12 (average epoch stats below) [2023-11-01 20:07:09,723][train][INFO] - {"epoch": 12, "train_loss": "3.525", "train_ntokens": "3190.4", "train_nsentences": "44.2682", "train_prob_perplexity": "44.899", "train_code_perplexity": "44.864", "train_temp": "1.584", "train_loss_0": "3.366", "train_loss_1": "0.134", "train_loss_2": "0.024", "train_accuracy": "0.46343", "train_wps": "17031", "train_ups": "5.34", "train_wpb": "3190.4", "train_bsz": "44.3", "train_num_updates": "48667", "train_lr": "8.89451e-05", "train_gnorm": "0.664", "train_loss_scale": "8", "train_train_wall": "718", "train_gb_free": "13.6", "train_wall": "9136"} [2023-11-01 20:07:09,725][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 20:07:09,745][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 13 [2023-11-01 20:07:09,931][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-01 20:07:09,954][fairseq.trainer][INFO] - begin training epoch 13 [2023-11-01 20:07:09,955][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-01 20:07:33,752][train_inner][INFO] - {"epoch": 13, "update": 12.033, "loss": "3.459", "ntokens": "3207.44", "nsentences": "47.6", "prob_perplexity": "45.946", "code_perplexity": "45.909", "temp": "1.568", "loss_0": "3.301", "loss_1": "0.134", "loss_2": "0.024", "accuracy": "0.47512", "wps": "9970.4", "ups": "3.11", "wpb": "3207.4", "bsz": "47.6", "num_updates": "48800", "lr": "8.89114e-05", "gnorm": "0.658", "loss_scale": "8", "train_wall": "35", "gb_free": "12.9", "wall": "9160"} [2023-11-01 20:08:09,286][train_inner][INFO] - {"epoch": 13, "update": 12.082, "loss": "3.557", "ntokens": "3182.88", "nsentences": "42.52", "prob_perplexity": "45.94", "code_perplexity": "45.897", "temp": "1.566", "loss_0": "3.399", "loss_1": "0.134", "loss_2": "0.024", "accuracy": "0.45658", "wps": "17915.7", "ups": "5.63", "wpb": "3182.9", "bsz": "42.5", "num_updates": "49000", "lr": "8.88608e-05", "gnorm": "0.671", "loss_scale": "8", "train_wall": "35", "gb_free": "13.3", "wall": "9196"} [2023-11-01 20:08:45,114][train_inner][INFO] - {"epoch": 13, "update": 12.131, "loss": "3.439", "ntokens": "3161.2", "nsentences": "46.12", "prob_perplexity": "46.158", "code_perplexity": "46.117", "temp": "1.565", "loss_0": "3.28", "loss_1": "0.134", "loss_2": "0.025", "accuracy": "0.47613", "wps": "17647.6", "ups": "5.58", "wpb": "3161.2", "bsz": "46.1", "num_updates": "49200", "lr": "8.88101e-05", "gnorm": "0.665", "loss_scale": "8", "train_wall": "35", "gb_free": "13.3", "wall": "9231"} [2023-11-01 20:09:20,957][train_inner][INFO] - {"epoch": 13, "update": 12.181, "loss": "3.612", "ntokens": "3200.96", "nsentences": "40.88", "prob_perplexity": "46.235", "code_perplexity": "46.189", "temp": "1.563", "loss_0": "3.454", "loss_1": "0.134", "loss_2": "0.025", "accuracy": "0.44579", "wps": "17862.4", "ups": "5.58", "wpb": "3201", "bsz": "40.9", "num_updates": "49400", "lr": "8.87595e-05", "gnorm": "0.663", "loss_scale": "8", "train_wall": "35", "gb_free": "13.9", "wall": "9267"} [2023-11-01 20:09:57,000][train_inner][INFO] - {"epoch": 13, "update": 12.23, "loss": "3.496", "ntokens": "3175.72", "nsentences": "45.08", "prob_perplexity": "46.336", "code_perplexity": "46.305", "temp": "1.562", "loss_0": "3.338", "loss_1": "0.134", "loss_2": "0.025", "accuracy": "0.46816", "wps": "17622.6", "ups": "5.55", "wpb": "3175.7", "bsz": "45.1", "num_updates": "49600", "lr": "8.87089e-05", "gnorm": "0.672", "loss_scale": "8", "train_wall": "35", "gb_free": "14.7", "wall": "9303"} [2023-11-01 20:10:32,701][train_inner][INFO] - {"epoch": 13, "update": 12.279, "loss": "3.53", "ntokens": "3178.12", "nsentences": "42.96", "prob_perplexity": "46.288", "code_perplexity": "46.256", "temp": "1.56", "loss_0": "3.372", "loss_1": "0.134", "loss_2": "0.024", "accuracy": "0.4612", "wps": "17805.4", "ups": "5.6", "wpb": "3178.1", "bsz": "43", "num_updates": "49800", "lr": "8.86582e-05", "gnorm": "0.675", "loss_scale": "8", "train_wall": "35", "gb_free": "15.5", "wall": "9339"} [2023-11-01 20:11:08,349][train_inner][INFO] - {"epoch": 13, "update": 12.329, "loss": "3.498", "ntokens": "3210.84", "nsentences": "45.32", "prob_perplexity": "47.086", "code_perplexity": "47.047", "temp": "1.558", "loss_0": "3.341", "loss_1": "0.134", "loss_2": "0.024", "accuracy": "0.46611", "wps": "18015.3", "ups": "5.61", "wpb": "3210.8", "bsz": "45.3", "num_updates": "50000", "lr": "8.86076e-05", "gnorm": "0.668", "loss_scale": "8", "train_wall": "35", "gb_free": "13.3", "wall": "9375"} [2023-11-01 20:11:08,350][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-01 20:11:08,351][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 20:11:08,372][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 15 [2023-11-01 20:11:33,768][valid][INFO] - {"epoch": 13, "valid_loss": "3.424", "valid_ntokens": "3154.34", "valid_nsentences": "44.1685", "valid_prob_perplexity": "47.244", "valid_code_perplexity": "47.217", "valid_temp": "1.558", "valid_loss_0": "3.267", "valid_loss_1": "0.134", "valid_loss_2": "0.023", "valid_accuracy": "0.47896", "valid_wps": "56247.6", "valid_wpb": "3154.3", "valid_bsz": "44.2", "valid_num_updates": "50000", "valid_best_loss": "3.424"} [2023-11-01 20:11:33,769][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 13 @ 50000 updates [2023-11-01 20:11:33,771][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_13_50000.pt [2023-11-01 20:11:35,111][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_13_50000.pt [2023-11-01 20:11:36,982][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_13_50000.pt (epoch 13 @ 50000 updates, score 3.424) (writing took 3.2127992808818817 seconds) [2023-11-01 20:12:12,822][train_inner][INFO] - {"epoch": 13, "update": 12.378, "loss": "3.492", "ntokens": "3172", "nsentences": "44.36", "prob_perplexity": "46.956", "code_perplexity": "46.921", "temp": "1.557", "loss_0": "3.334", "loss_1": "0.134", "loss_2": "0.024", "accuracy": "0.46701", "wps": "9840.1", "ups": "3.1", "wpb": "3172", "bsz": "44.4", "num_updates": "50200", "lr": "8.8557e-05", "gnorm": "0.665", "loss_scale": "8", "train_wall": "35", "gb_free": "14.6", "wall": "9439"} [2023-11-01 20:12:49,335][train_inner][INFO] - {"epoch": 13, "update": 12.427, "loss": "3.488", "ntokens": "3191.04", "nsentences": "44", "prob_perplexity": "47.228", "code_perplexity": "47.187", "temp": "1.555", "loss_0": "3.33", "loss_1": "0.134", "loss_2": "0.025", "accuracy": "0.46753", "wps": "17480.1", "ups": "5.48", "wpb": "3191", "bsz": "44", "num_updates": "50400", "lr": "8.85063e-05", "gnorm": "0.666", "loss_scale": "8", "train_wall": "36", "gb_free": "13.6", "wall": "9476"} [2023-11-01 20:13:25,601][train_inner][INFO] - {"epoch": 13, "update": 12.477, "loss": "3.539", "ntokens": "3220.96", "nsentences": "43.48", "prob_perplexity": "47.563", "code_perplexity": "47.521", "temp": "1.554", "loss_0": "3.381", "loss_1": "0.134", "loss_2": "0.024", "accuracy": "0.45814", "wps": "17764", "ups": "5.52", "wpb": "3221", "bsz": "43.5", "num_updates": "50600", "lr": "8.84557e-05", "gnorm": "0.662", "loss_scale": "8", "train_wall": "36", "gb_free": "13.4", "wall": "9512"} [2023-11-01 20:14:01,557][train_inner][INFO] - {"epoch": 13, "update": 12.526, "loss": "3.547", "ntokens": "3191.32", "nsentences": "43.92", "prob_perplexity": "47.656", "code_perplexity": "47.619", "temp": "1.552", "loss_0": "3.389", "loss_1": "0.134", "loss_2": "0.024", "accuracy": "0.4584", "wps": "17752.7", "ups": "5.56", "wpb": "3191.3", "bsz": "43.9", "num_updates": "50800", "lr": "8.84051e-05", "gnorm": "0.671", "loss_scale": "8", "train_wall": "35", "gb_free": "13.5", "wall": "9548"} [2023-11-01 20:14:37,432][train_inner][INFO] - {"epoch": 13, "update": 12.575, "loss": "3.521", "ntokens": "3184.4", "nsentences": "43.92", "prob_perplexity": "48.081", "code_perplexity": "48.037", "temp": "1.551", "loss_0": "3.363", "loss_1": "0.133", "loss_2": "0.024", "accuracy": "0.46186", "wps": "17753.7", "ups": "5.58", "wpb": "3184.4", "bsz": "43.9", "num_updates": "51000", "lr": "8.83544e-05", "gnorm": "0.66", "loss_scale": "8", "train_wall": "35", "gb_free": "14.5", "wall": "9584"} [2023-11-01 20:15:13,215][train_inner][INFO] - {"epoch": 13, "update": 12.625, "loss": "3.458", "ntokens": "3167.4", "nsentences": "46.16", "prob_perplexity": "47.745", "code_perplexity": "47.706", "temp": "1.549", "loss_0": "3.301", "loss_1": "0.134", "loss_2": "0.024", "accuracy": "0.47323", "wps": "17704", "ups": "5.59", "wpb": "3167.4", "bsz": "46.2", "num_updates": "51200", "lr": "8.83038e-05", "gnorm": "0.668", "loss_scale": "8", "train_wall": "35", "gb_free": "13.8", "wall": "9619"} [2023-11-01 20:15:49,088][train_inner][INFO] - {"epoch": 13, "update": 12.674, "loss": "3.458", "ntokens": "3157.52", "nsentences": "45.12", "prob_perplexity": "48.122", "code_perplexity": "48.083", "temp": "1.548", "loss_0": "3.3", "loss_1": "0.133", "loss_2": "0.024", "accuracy": "0.47197", "wps": "17605", "ups": "5.58", "wpb": "3157.5", "bsz": "45.1", "num_updates": "51400", "lr": "8.82532e-05", "gnorm": "0.67", "loss_scale": "8", "train_wall": "35", "gb_free": "12.7", "wall": "9655"} [2023-11-01 20:16:25,054][train_inner][INFO] - {"epoch": 13, "update": 12.723, "loss": "3.531", "ntokens": "3221.44", "nsentences": "43.44", "prob_perplexity": "48.125", "code_perplexity": "48.091", "temp": "1.546", "loss_0": "3.374", "loss_1": "0.133", "loss_2": "0.024", "accuracy": "0.4585", "wps": "17915.3", "ups": "5.56", "wpb": "3221.4", "bsz": "43.4", "num_updates": "51600", "lr": "8.82025e-05", "gnorm": "0.664", "loss_scale": "8", "train_wall": "35", "gb_free": "13.8", "wall": "9691"} [2023-11-01 20:17:00,993][train_inner][INFO] - {"epoch": 13, "update": 12.772, "loss": "3.515", "ntokens": "3183.32", "nsentences": "44.2", "prob_perplexity": "48.496", "code_perplexity": "48.457", "temp": "1.544", "loss_0": "3.358", "loss_1": "0.133", "loss_2": "0.024", "accuracy": "0.46213", "wps": "17716.2", "ups": "5.57", "wpb": "3183.3", "bsz": "44.2", "num_updates": "51800", "lr": "8.81519e-05", "gnorm": "0.668", "loss_scale": "8", "train_wall": "35", "gb_free": "14.4", "wall": "9727"} [2023-11-01 20:17:36,890][train_inner][INFO] - {"epoch": 13, "update": 12.822, "loss": "3.499", "ntokens": "3200.56", "nsentences": "42.96", "prob_perplexity": "48.634", "code_perplexity": "48.59", "temp": "1.543", "loss_0": "3.342", "loss_1": "0.133", "loss_2": "0.024", "accuracy": "0.4633", "wps": "17832.8", "ups": "5.57", "wpb": "3200.6", "bsz": "43", "num_updates": "52000", "lr": "8.81013e-05", "gnorm": "0.67", "loss_scale": "8", "train_wall": "35", "gb_free": "14.4", "wall": "9763"} [2023-11-01 20:18:12,903][train_inner][INFO] - {"epoch": 13, "update": 12.871, "loss": "3.455", "ntokens": "3185.48", "nsentences": "43.72", "prob_perplexity": "48.739", "code_perplexity": "48.703", "temp": "1.541", "loss_0": "3.298", "loss_1": "0.133", "loss_2": "0.024", "accuracy": "0.47006", "wps": "17692.1", "ups": "5.55", "wpb": "3185.5", "bsz": "43.7", "num_updates": "52200", "lr": "8.80506e-05", "gnorm": "0.661", "loss_scale": "8", "train_wall": "35", "gb_free": "14.3", "wall": "9799"} [2023-11-01 20:18:49,384][train_inner][INFO] - {"epoch": 13, "update": 12.92, "loss": "3.52", "ntokens": "3208.72", "nsentences": "42.96", "prob_perplexity": "48.585", "code_perplexity": "48.543", "temp": "1.54", "loss_0": "3.363", "loss_1": "0.133", "loss_2": "0.024", "accuracy": "0.45931", "wps": "17591.7", "ups": "5.48", "wpb": "3208.7", "bsz": "43", "num_updates": "52400", "lr": "8.8e-05", "gnorm": "0.664", "loss_scale": "8", "train_wall": "36", "gb_free": "13.5", "wall": "9836"} [2023-11-01 20:19:25,418][train_inner][INFO] - {"epoch": 13, "update": 12.97, "loss": "3.471", "ntokens": "3215.12", "nsentences": "45.96", "prob_perplexity": "48.79", "code_perplexity": "48.748", "temp": "1.538", "loss_0": "3.314", "loss_1": "0.133", "loss_2": "0.024", "accuracy": "0.47001", "wps": "17846.3", "ups": "5.55", "wpb": "3215.1", "bsz": "46", "num_updates": "52600", "lr": "8.79494e-05", "gnorm": "0.665", "loss_scale": "8", "train_wall": "35", "gb_free": "13.6", "wall": "9872"} [2023-11-01 20:19:47,749][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-01 20:19:47,751][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 20:19:47,769][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 16 [2023-11-01 20:20:13,762][valid][INFO] - {"epoch": 13, "valid_loss": "3.41", "valid_ntokens": "3172.88", "valid_nsentences": "44.1685", "valid_prob_perplexity": "48.601", "valid_code_perplexity": "48.574", "valid_temp": "1.537", "valid_loss_0": "3.253", "valid_loss_1": "0.133", "valid_loss_2": "0.024", "valid_accuracy": "0.47926", "valid_wps": "55290.7", "valid_wpb": "3172.9", "valid_bsz": "44.2", "valid_num_updates": "52723", "valid_best_loss": "3.41"} [2023-11-01 20:20:13,764][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 13 @ 52723 updates [2023-11-01 20:20:13,766][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-01 20:20:15,208][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-01 20:20:16,184][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_best.pt (epoch 13 @ 52723 updates, score 3.41) (writing took 2.4204194769263268 seconds) [2023-11-01 20:20:16,185][fairseq_cli.train][INFO] - end of epoch 13 (average epoch stats below) [2023-11-01 20:20:16,187][train][INFO] - {"epoch": 13, "train_loss": "3.503", "train_ntokens": "3191.37", "train_nsentences": "44.2682", "train_prob_perplexity": "47.519", "train_code_perplexity": "47.479", "train_temp": "1.552", "train_loss_0": "3.345", "train_loss_1": "0.134", "train_loss_2": "0.024", "train_accuracy": "0.46477", "train_wps": "16458.8", "train_ups": "5.16", "train_wpb": "3191.4", "train_bsz": "44.3", "train_num_updates": "52723", "train_lr": "8.79182e-05", "train_gnorm": "0.666", "train_loss_scale": "8", "train_train_wall": "716", "train_gb_free": "12.7", "train_wall": "9922"} [2023-11-01 20:20:16,190][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 20:20:16,215][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 14 [2023-11-01 20:20:16,431][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-01 20:20:16,456][fairseq.trainer][INFO] - begin training epoch 14 [2023-11-01 20:20:16,456][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-01 20:20:30,406][train_inner][INFO] - {"epoch": 14, "update": 13.019, "loss": "3.467", "ntokens": "3208.4", "nsentences": "45.36", "prob_perplexity": "48.879", "code_perplexity": "48.834", "temp": "1.537", "loss_0": "3.309", "loss_1": "0.133", "loss_2": "0.024", "accuracy": "0.4703", "wps": "9876.5", "ups": "3.08", "wpb": "3208.4", "bsz": "45.4", "num_updates": "52800", "lr": "8.78987e-05", "gnorm": "0.663", "loss_scale": "8", "train_wall": "36", "gb_free": "15.6", "wall": "9937"} [2023-11-01 20:21:06,315][train_inner][INFO] - {"epoch": 14, "update": 13.068, "loss": "3.459", "ntokens": "3210.92", "nsentences": "46.16", "prob_perplexity": "48.856", "code_perplexity": "48.815", "temp": "1.535", "loss_0": "3.301", "loss_1": "0.133", "loss_2": "0.024", "accuracy": "0.47141", "wps": "17884.4", "ups": "5.57", "wpb": "3210.9", "bsz": "46.2", "num_updates": "53000", "lr": "8.78481e-05", "gnorm": "0.657", "loss_scale": "8", "train_wall": "35", "gb_free": "14.1", "wall": "9973"} [2023-11-01 20:21:42,091][train_inner][INFO] - {"epoch": 14, "update": 13.118, "loss": "3.532", "ntokens": "3191.36", "nsentences": "42.36", "prob_perplexity": "49.012", "code_perplexity": "48.964", "temp": "1.534", "loss_0": "3.375", "loss_1": "0.133", "loss_2": "0.024", "accuracy": "0.45703", "wps": "17842.2", "ups": "5.59", "wpb": "3191.4", "bsz": "42.4", "num_updates": "53200", "lr": "8.77975e-05", "gnorm": "0.667", "loss_scale": "8", "train_wall": "35", "gb_free": "12.9", "wall": "10008"} [2023-11-01 20:22:17,905][train_inner][INFO] - {"epoch": 14, "update": 13.167, "loss": "3.534", "ntokens": "3199.6", "nsentences": "44.84", "prob_perplexity": "49.381", "code_perplexity": "49.335", "temp": "1.532", "loss_0": "3.377", "loss_1": "0.133", "loss_2": "0.024", "accuracy": "0.46021", "wps": "17869.2", "ups": "5.58", "wpb": "3199.6", "bsz": "44.8", "num_updates": "53400", "lr": "8.77468e-05", "gnorm": "0.668", "loss_scale": "8", "train_wall": "35", "gb_free": "13.3", "wall": "10044"} [2023-11-01 20:22:53,536][train_inner][INFO] - {"epoch": 14, "update": 13.216, "loss": "3.587", "ntokens": "3182.08", "nsentences": "39.68", "prob_perplexity": "48.987", "code_perplexity": "48.942", "temp": "1.531", "loss_0": "3.43", "loss_1": "0.133", "loss_2": "0.024", "accuracy": "0.44763", "wps": "17862.1", "ups": "5.61", "wpb": "3182.1", "bsz": "39.7", "num_updates": "53600", "lr": "8.76962e-05", "gnorm": "0.67", "loss_scale": "8", "train_wall": "35", "gb_free": "14.2", "wall": "10080"} [2023-11-01 20:23:29,564][train_inner][INFO] - {"epoch": 14, "update": 13.266, "loss": "3.419", "ntokens": "3211.68", "nsentences": "47.28", "prob_perplexity": "49.314", "code_perplexity": "49.271", "temp": "1.529", "loss_0": "3.262", "loss_1": "0.133", "loss_2": "0.024", "accuracy": "0.47748", "wps": "17830.1", "ups": "5.55", "wpb": "3211.7", "bsz": "47.3", "num_updates": "53800", "lr": "8.76456e-05", "gnorm": "0.662", "loss_scale": "8", "train_wall": "35", "gb_free": "13", "wall": "10116"} [2023-11-01 20:24:05,566][train_inner][INFO] - {"epoch": 14, "update": 13.315, "loss": "3.438", "ntokens": "3172.64", "nsentences": "45.24", "prob_perplexity": "49.442", "code_perplexity": "49.391", "temp": "1.528", "loss_0": "3.281", "loss_1": "0.133", "loss_2": "0.024", "accuracy": "0.47452", "wps": "17625.7", "ups": "5.56", "wpb": "3172.6", "bsz": "45.2", "num_updates": "54000", "lr": "8.75949e-05", "gnorm": "0.673", "loss_scale": "8", "train_wall": "35", "gb_free": "14.1", "wall": "10152"} [2023-11-01 20:24:41,839][train_inner][INFO] - {"epoch": 14, "update": 13.364, "loss": "3.521", "ntokens": "3185.68", "nsentences": "41.44", "prob_perplexity": "49.165", "code_perplexity": "49.114", "temp": "1.526", "loss_0": "3.364", "loss_1": "0.133", "loss_2": "0.024", "accuracy": "0.45867", "wps": "17566.1", "ups": "5.51", "wpb": "3185.7", "bsz": "41.4", "num_updates": "54200", "lr": "8.75443e-05", "gnorm": "0.674", "loss_scale": "8", "train_wall": "36", "gb_free": "13.2", "wall": "10188"} [2023-11-01 20:25:17,921][train_inner][INFO] - {"epoch": 14, "update": 13.413, "loss": "3.456", "ntokens": "3210.4", "nsentences": "47.44", "prob_perplexity": "49.684", "code_perplexity": "49.624", "temp": "1.524", "loss_0": "3.299", "loss_1": "0.133", "loss_2": "0.024", "accuracy": "0.47328", "wps": "17796.1", "ups": "5.54", "wpb": "3210.4", "bsz": "47.4", "num_updates": "54400", "lr": "8.74937e-05", "gnorm": "0.67", "loss_scale": "8", "train_wall": "35", "gb_free": "13.2", "wall": "10224"} [2023-11-01 20:25:53,415][train_inner][INFO] - {"epoch": 14, "update": 13.463, "loss": "3.433", "ntokens": "3192.28", "nsentences": "46.32", "prob_perplexity": "50.116", "code_perplexity": "50.067", "temp": "1.523", "loss_0": "3.276", "loss_1": "0.133", "loss_2": "0.024", "accuracy": "0.4745", "wps": "17988.7", "ups": "5.64", "wpb": "3192.3", "bsz": "46.3", "num_updates": "54600", "lr": "8.7443e-05", "gnorm": "0.663", "loss_scale": "8", "train_wall": "35", "gb_free": "13.7", "wall": "10260"} [2023-11-01 20:26:29,445][train_inner][INFO] - {"epoch": 14, "update": 13.512, "loss": "3.52", "ntokens": "3222.88", "nsentences": "41.84", "prob_perplexity": "49.772", "code_perplexity": "49.716", "temp": "1.521", "loss_0": "3.363", "loss_1": "0.133", "loss_2": "0.024", "accuracy": "0.45787", "wps": "17890.9", "ups": "5.55", "wpb": "3222.9", "bsz": "41.8", "num_updates": "54800", "lr": "8.73924e-05", "gnorm": "0.661", "loss_scale": "8", "train_wall": "35", "gb_free": "12.9", "wall": "10296"} [2023-11-01 20:27:05,483][train_inner][INFO] - {"epoch": 14, "update": 13.561, "loss": "3.483", "ntokens": "3201.44", "nsentences": "44.88", "prob_perplexity": "49.712", "code_perplexity": "49.662", "temp": "1.52", "loss_0": "3.326", "loss_1": "0.133", "loss_2": "0.024", "accuracy": "0.46731", "wps": "17768.6", "ups": "5.55", "wpb": "3201.4", "bsz": "44.9", "num_updates": "55000", "lr": "8.73418e-05", "gnorm": "0.669", "loss_scale": "8", "train_wall": "35", "gb_free": "13.9", "wall": "10332"} [2023-11-01 20:27:41,467][train_inner][INFO] - {"epoch": 14, "update": 13.611, "loss": "3.449", "ntokens": "3127.56", "nsentences": "44.28", "prob_perplexity": "49.711", "code_perplexity": "49.656", "temp": "1.518", "loss_0": "3.292", "loss_1": "0.133", "loss_2": "0.024", "accuracy": "0.4737", "wps": "17384.2", "ups": "5.56", "wpb": "3127.6", "bsz": "44.3", "num_updates": "55200", "lr": "8.72911e-05", "gnorm": "0.681", "loss_scale": "8", "train_wall": "35", "gb_free": "12.7", "wall": "10368"} [2023-11-01 20:28:17,520][train_inner][INFO] - {"epoch": 14, "update": 13.66, "loss": "3.478", "ntokens": "3175.24", "nsentences": "43.4", "prob_perplexity": "50.242", "code_perplexity": "50.189", "temp": "1.517", "loss_0": "3.32", "loss_1": "0.133", "loss_2": "0.024", "accuracy": "0.46562", "wps": "17615.1", "ups": "5.55", "wpb": "3175.2", "bsz": "43.4", "num_updates": "55400", "lr": "8.72405e-05", "gnorm": "0.67", "loss_scale": "8", "train_wall": "35", "gb_free": "13.2", "wall": "10404"} [2023-11-01 20:28:53,603][train_inner][INFO] - {"epoch": 14, "update": 13.709, "loss": "3.444", "ntokens": "3164.12", "nsentences": "45.04", "prob_perplexity": "50.28", "code_perplexity": "50.219", "temp": "1.515", "loss_0": "3.287", "loss_1": "0.133", "loss_2": "0.024", "accuracy": "0.47212", "wps": "17539", "ups": "5.54", "wpb": "3164.1", "bsz": "45", "num_updates": "55600", "lr": "8.71899e-05", "gnorm": "0.671", "loss_scale": "8", "train_wall": "35", "gb_free": "13.6", "wall": "10440"} [2023-11-01 20:29:29,546][train_inner][INFO] - {"epoch": 14, "update": 13.759, "loss": "3.55", "ntokens": "3223.16", "nsentences": "43.4", "prob_perplexity": "50.325", "code_perplexity": "50.268", "temp": "1.514", "loss_0": "3.393", "loss_1": "0.133", "loss_2": "0.024", "accuracy": "0.45426", "wps": "17936", "ups": "5.56", "wpb": "3223.2", "bsz": "43.4", "num_updates": "55800", "lr": "8.71392e-05", "gnorm": "0.665", "loss_scale": "8", "train_wall": "35", "gb_free": "13.2", "wall": "10476"} [2023-11-01 20:30:05,380][train_inner][INFO] - {"epoch": 14, "update": 13.808, "loss": "3.454", "ntokens": "3185.44", "nsentences": "44.12", "prob_perplexity": "50.507", "code_perplexity": "50.445", "temp": "1.512", "loss_0": "3.297", "loss_1": "0.133", "loss_2": "0.024", "accuracy": "0.47008", "wps": "17780.3", "ups": "5.58", "wpb": "3185.4", "bsz": "44.1", "num_updates": "56000", "lr": "8.70886e-05", "gnorm": "0.664", "loss_scale": "8", "train_wall": "35", "gb_free": "13.2", "wall": "10512"} [2023-11-01 20:30:41,583][train_inner][INFO] - {"epoch": 14, "update": 13.857, "loss": "3.422", "ntokens": "3184.24", "nsentences": "46.08", "prob_perplexity": "50.434", "code_perplexity": "50.38", "temp": "1.511", "loss_0": "3.266", "loss_1": "0.133", "loss_2": "0.024", "accuracy": "0.47556", "wps": "17592.4", "ups": "5.52", "wpb": "3184.2", "bsz": "46.1", "num_updates": "56200", "lr": "8.7038e-05", "gnorm": "0.673", "loss_scale": "8", "train_wall": "36", "gb_free": "16.5", "wall": "10548"} [2023-11-01 20:31:17,789][train_inner][INFO] - {"epoch": 14, "update": 13.907, "loss": "3.499", "ntokens": "3215.88", "nsentences": "42.68", "prob_perplexity": "50.672", "code_perplexity": "50.621", "temp": "1.509", "loss_0": "3.343", "loss_1": "0.133", "loss_2": "0.024", "accuracy": "0.46097", "wps": "17765.4", "ups": "5.52", "wpb": "3215.9", "bsz": "42.7", "num_updates": "56400", "lr": "8.69873e-05", "gnorm": "0.659", "loss_scale": "8", "train_wall": "36", "gb_free": "14", "wall": "10584"} [2023-11-01 20:31:53,751][train_inner][INFO] - {"epoch": 14, "update": 13.956, "loss": "3.509", "ntokens": "3225.84", "nsentences": "44.08", "prob_perplexity": "50.889", "code_perplexity": "50.834", "temp": "1.508", "loss_0": "3.352", "loss_1": "0.133", "loss_2": "0.024", "accuracy": "0.46021", "wps": "17941.2", "ups": "5.56", "wpb": "3225.8", "bsz": "44.1", "num_updates": "56600", "lr": "8.69367e-05", "gnorm": "0.656", "loss_scale": "8", "train_wall": "35", "gb_free": "13.2", "wall": "10620"} [2023-11-01 20:32:26,288][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-01 20:32:26,290][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 20:32:26,313][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 17 [2023-11-01 20:32:51,950][valid][INFO] - {"epoch": 14, "valid_loss": "3.416", "valid_ntokens": "3182.23", "valid_nsentences": "44.1685", "valid_prob_perplexity": "51.046", "valid_code_perplexity": "51.008", "valid_temp": "1.506", "valid_loss_0": "3.26", "valid_loss_1": "0.133", "valid_loss_2": "0.024", "valid_accuracy": "0.47707", "valid_wps": "56280", "valid_wpb": "3182.2", "valid_bsz": "44.2", "valid_num_updates": "56779", "valid_best_loss": "3.41"} [2023-11-01 20:32:51,952][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 14 @ 56779 updates [2023-11-01 20:32:51,953][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-01 20:32:53,398][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-01 20:32:53,448][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_last.pt (epoch 14 @ 56779 updates, score 3.416) (writing took 1.4962981748394668 seconds) [2023-11-01 20:32:53,449][fairseq_cli.train][INFO] - end of epoch 14 (average epoch stats below) [2023-11-01 20:32:53,451][train][INFO] - {"epoch": 14, "train_loss": "3.485", "train_ntokens": "3195.89", "train_nsentences": "44.2682", "train_prob_perplexity": "49.839", "train_code_perplexity": "49.787", "train_temp": "1.521", "train_loss_0": "3.328", "train_loss_1": "0.133", "train_loss_2": "0.024", "train_accuracy": "0.46574", "train_wps": "17117.6", "train_ups": "5.36", "train_wpb": "3195.9", "train_bsz": "44.3", "train_num_updates": "56779", "train_lr": "8.68914e-05", "train_gnorm": "0.667", "train_loss_scale": "8", "train_train_wall": "717", "train_gb_free": "13.9", "train_wall": "10680"} [2023-11-01 20:32:53,453][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 20:32:53,471][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 15 [2023-11-01 20:32:53,641][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-01 20:32:53,665][fairseq.trainer][INFO] - begin training epoch 15 [2023-11-01 20:32:53,666][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-01 20:32:57,679][train_inner][INFO] - {"epoch": 15, "update": 14.005, "loss": "3.527", "ntokens": "3244.6", "nsentences": "43.48", "prob_perplexity": "50.905", "code_perplexity": "50.851", "temp": "1.506", "loss_0": "3.37", "loss_1": "0.133", "loss_2": "0.024", "accuracy": "0.4574", "wps": "10151.2", "ups": "3.13", "wpb": "3244.6", "bsz": "43.5", "num_updates": "56800", "lr": "8.68861e-05", "gnorm": "0.668", "loss_scale": "8", "train_wall": "36", "gb_free": "13.5", "wall": "10684"} [2023-11-01 20:33:32,783][train_inner][INFO] - {"epoch": 15, "update": 14.054, "loss": "3.453", "ntokens": "3168.48", "nsentences": "44", "prob_perplexity": "51.063", "code_perplexity": "51.007", "temp": "1.505", "loss_0": "3.297", "loss_1": "0.133", "loss_2": "0.024", "accuracy": "0.46931", "wps": "18060.5", "ups": "5.7", "wpb": "3168.5", "bsz": "44", "num_updates": "57000", "lr": "8.68354e-05", "gnorm": "0.674", "loss_scale": "8", "train_wall": "34", "gb_free": "13.5", "wall": "10719"} [2023-11-01 20:34:08,663][train_inner][INFO] - {"epoch": 15, "update": 14.104, "loss": "3.513", "ntokens": "3210.76", "nsentences": "40.36", "prob_perplexity": "51.411", "code_perplexity": "51.353", "temp": "1.503", "loss_0": "3.356", "loss_1": "0.133", "loss_2": "0.024", "accuracy": "0.45765", "wps": "17898.4", "ups": "5.57", "wpb": "3210.8", "bsz": "40.4", "num_updates": "57200", "lr": "8.67848e-05", "gnorm": "0.667", "loss_scale": "8", "train_wall": "35", "gb_free": "12.5", "wall": "10755"} [2023-11-01 20:34:44,377][train_inner][INFO] - {"epoch": 15, "update": 14.153, "loss": "3.405", "ntokens": "3197.52", "nsentences": "46.52", "prob_perplexity": "51.243", "code_perplexity": "51.19", "temp": "1.502", "loss_0": "3.248", "loss_1": "0.133", "loss_2": "0.024", "accuracy": "0.47879", "wps": "17907.2", "ups": "5.6", "wpb": "3197.5", "bsz": "46.5", "num_updates": "57400", "lr": "8.67342e-05", "gnorm": "0.663", "loss_scale": "8", "train_wall": "35", "gb_free": "13.7", "wall": "10791"} [2023-11-01 20:35:20,407][train_inner][INFO] - {"epoch": 15, "update": 14.202, "loss": "3.482", "ntokens": "3207.2", "nsentences": "43.16", "prob_perplexity": "51.9", "code_perplexity": "51.849", "temp": "1.5", "loss_0": "3.325", "loss_1": "0.133", "loss_2": "0.024", "accuracy": "0.46403", "wps": "17803.9", "ups": "5.55", "wpb": "3207.2", "bsz": "43.2", "num_updates": "57600", "lr": "8.66835e-05", "gnorm": "0.666", "loss_scale": "8", "train_wall": "35", "gb_free": "12.4", "wall": "10827"} [2023-11-01 20:35:56,160][train_inner][INFO] - {"epoch": 15, "update": 14.252, "loss": "3.49", "ntokens": "3197.96", "nsentences": "43.16", "prob_perplexity": "51.905", "code_perplexity": "51.84", "temp": "1.499", "loss_0": "3.334", "loss_1": "0.133", "loss_2": "0.024", "accuracy": "0.46311", "wps": "17890.6", "ups": "5.59", "wpb": "3198", "bsz": "43.2", "num_updates": "57800", "lr": "8.66329e-05", "gnorm": "0.672", "loss_scale": "8", "train_wall": "35", "gb_free": "13.3", "wall": "10862"} [2023-11-01 20:36:32,400][train_inner][INFO] - {"epoch": 15, "update": 14.301, "loss": "3.452", "ntokens": "3176.28", "nsentences": "43.84", "prob_perplexity": "52.158", "code_perplexity": "52.094", "temp": "1.497", "loss_0": "3.296", "loss_1": "0.133", "loss_2": "0.024", "accuracy": "0.46953", "wps": "17530.4", "ups": "5.52", "wpb": "3176.3", "bsz": "43.8", "num_updates": "58000", "lr": "8.65823e-05", "gnorm": "0.669", "loss_scale": "8", "train_wall": "36", "gb_free": "12.4", "wall": "10899"} [2023-11-01 20:37:08,450][train_inner][INFO] - {"epoch": 15, "update": 14.35, "loss": "3.404", "ntokens": "3170.12", "nsentences": "46.2", "prob_perplexity": "52.373", "code_perplexity": "52.317", "temp": "1.496", "loss_0": "3.248", "loss_1": "0.132", "loss_2": "0.024", "accuracy": "0.47769", "wps": "17588.3", "ups": "5.55", "wpb": "3170.1", "bsz": "46.2", "num_updates": "58200", "lr": "8.65316e-05", "gnorm": "0.668", "loss_scale": "8", "train_wall": "35", "gb_free": "13.9", "wall": "10935"} [2023-11-01 20:37:44,343][train_inner][INFO] - {"epoch": 15, "update": 14.4, "loss": "3.45", "ntokens": "3197.44", "nsentences": "44.36", "prob_perplexity": "52.991", "code_perplexity": "52.938", "temp": "1.494", "loss_0": "3.294", "loss_1": "0.132", "loss_2": "0.024", "accuracy": "0.46903", "wps": "17817.7", "ups": "5.57", "wpb": "3197.4", "bsz": "44.4", "num_updates": "58400", "lr": "8.6481e-05", "gnorm": "0.671", "loss_scale": "8", "train_wall": "35", "gb_free": "14.6", "wall": "10971"} [2023-11-01 20:38:20,944][train_inner][INFO] - {"epoch": 15, "update": 14.449, "loss": "3.495", "ntokens": "3190.24", "nsentences": "43.64", "prob_perplexity": "52.895", "code_perplexity": "52.825", "temp": "1.493", "loss_0": "3.339", "loss_1": "0.132", "loss_2": "0.024", "accuracy": "0.46277", "wps": "17433.6", "ups": "5.46", "wpb": "3190.2", "bsz": "43.6", "num_updates": "58600", "lr": "8.64304e-05", "gnorm": "0.668", "loss_scale": "8", "train_wall": "36", "gb_free": "14.4", "wall": "11007"} [2023-11-01 20:38:57,230][train_inner][INFO] - {"epoch": 15, "update": 14.498, "loss": "3.481", "ntokens": "3211.36", "nsentences": "44.52", "prob_perplexity": "52.618", "code_perplexity": "52.566", "temp": "1.491", "loss_0": "3.325", "loss_1": "0.132", "loss_2": "0.024", "accuracy": "0.46449", "wps": "17701.3", "ups": "5.51", "wpb": "3211.4", "bsz": "44.5", "num_updates": "58800", "lr": "8.63797e-05", "gnorm": "0.668", "loss_scale": "8", "train_wall": "36", "gb_free": "15.2", "wall": "11043"} [2023-11-01 20:39:33,161][train_inner][INFO] - {"epoch": 15, "update": 14.548, "loss": "3.423", "ntokens": "3166.52", "nsentences": "45.6", "prob_perplexity": "52.76", "code_perplexity": "52.697", "temp": "1.49", "loss_0": "3.266", "loss_1": "0.132", "loss_2": "0.024", "accuracy": "0.475", "wps": "17626.8", "ups": "5.57", "wpb": "3166.5", "bsz": "45.6", "num_updates": "59000", "lr": "8.63291e-05", "gnorm": "0.673", "loss_scale": "8", "train_wall": "35", "gb_free": "15.3", "wall": "11079"} [2023-11-01 20:40:09,440][train_inner][INFO] - {"epoch": 15, "update": 14.597, "loss": "3.451", "ntokens": "3198.08", "nsentences": "45.72", "prob_perplexity": "53.218", "code_perplexity": "53.158", "temp": "1.488", "loss_0": "3.295", "loss_1": "0.132", "loss_2": "0.024", "accuracy": "0.46989", "wps": "17631.5", "ups": "5.51", "wpb": "3198.1", "bsz": "45.7", "num_updates": "59200", "lr": "8.62785e-05", "gnorm": "0.669", "loss_scale": "8", "train_wall": "36", "gb_free": "13.9", "wall": "11116"} [2023-11-01 20:40:45,975][train_inner][INFO] - {"epoch": 15, "update": 14.646, "loss": "3.448", "ntokens": "3215.32", "nsentences": "44.8", "prob_perplexity": "53.426", "code_perplexity": "53.362", "temp": "1.487", "loss_0": "3.292", "loss_1": "0.132", "loss_2": "0.024", "accuracy": "0.46987", "wps": "17602.5", "ups": "5.47", "wpb": "3215.3", "bsz": "44.8", "num_updates": "59400", "lr": "8.62278e-05", "gnorm": "0.665", "loss_scale": "8", "train_wall": "36", "gb_free": "13.2", "wall": "11152"} [2023-11-01 20:41:22,100][train_inner][INFO] - {"epoch": 15, "update": 14.696, "loss": "3.463", "ntokens": "3203.32", "nsentences": "43.48", "prob_perplexity": "53.44", "code_perplexity": "53.376", "temp": "1.485", "loss_0": "3.307", "loss_1": "0.132", "loss_2": "0.024", "accuracy": "0.46625", "wps": "17735.6", "ups": "5.54", "wpb": "3203.3", "bsz": "43.5", "num_updates": "59600", "lr": "8.61772e-05", "gnorm": "0.668", "loss_scale": "8", "train_wall": "35", "gb_free": "14.5", "wall": "11188"} [2023-11-01 20:41:58,266][train_inner][INFO] - {"epoch": 15, "update": 14.745, "loss": "3.398", "ntokens": "3215.68", "nsentences": "48.36", "prob_perplexity": "53.88", "code_perplexity": "53.811", "temp": "1.484", "loss_0": "3.241", "loss_1": "0.132", "loss_2": "0.024", "accuracy": "0.47992", "wps": "17783.7", "ups": "5.53", "wpb": "3215.7", "bsz": "48.4", "num_updates": "59800", "lr": "8.61266e-05", "gnorm": "0.669", "loss_scale": "8", "train_wall": "36", "gb_free": "13", "wall": "11224"} [2023-11-01 20:42:34,328][train_inner][INFO] - {"epoch": 15, "update": 14.794, "loss": "3.41", "ntokens": "3184.84", "nsentences": "44.76", "prob_perplexity": "53.931", "code_perplexity": "53.858", "temp": "1.482", "loss_0": "3.254", "loss_1": "0.132", "loss_2": "0.024", "accuracy": "0.4748", "wps": "17664.2", "ups": "5.55", "wpb": "3184.8", "bsz": "44.8", "num_updates": "60000", "lr": "8.60759e-05", "gnorm": "0.663", "loss_scale": "8", "train_wall": "35", "gb_free": "11.8", "wall": "11261"} [2023-11-01 20:43:10,325][train_inner][INFO] - {"epoch": 15, "update": 14.843, "loss": "3.481", "ntokens": "3219.08", "nsentences": "42.92", "prob_perplexity": "53.772", "code_perplexity": "53.702", "temp": "1.481", "loss_0": "3.325", "loss_1": "0.132", "loss_2": "0.024", "accuracy": "0.46364", "wps": "17886.5", "ups": "5.56", "wpb": "3219.1", "bsz": "42.9", "num_updates": "60200", "lr": "8.60253e-05", "gnorm": "0.662", "loss_scale": "8", "train_wall": "35", "gb_free": "12.6", "wall": "11297"} [2023-11-01 20:43:46,431][train_inner][INFO] - {"epoch": 15, "update": 14.893, "loss": "3.489", "ntokens": "3229.84", "nsentences": "42.32", "prob_perplexity": "53.882", "code_perplexity": "53.809", "temp": "1.479", "loss_0": "3.333", "loss_1": "0.132", "loss_2": "0.024", "accuracy": "0.45966", "wps": "17891.8", "ups": "5.54", "wpb": "3229.8", "bsz": "42.3", "num_updates": "60400", "lr": "8.59747e-05", "gnorm": "0.683", "loss_scale": "8", "train_wall": "35", "gb_free": "14.7", "wall": "11333"} [2023-11-01 20:44:22,268][train_inner][INFO] - {"epoch": 15, "update": 14.942, "loss": "3.422", "ntokens": "3198.52", "nsentences": "46.32", "prob_perplexity": "53.991", "code_perplexity": "53.92", "temp": "1.478", "loss_0": "3.266", "loss_1": "0.132", "loss_2": "0.024", "accuracy": "0.47474", "wps": "17852", "ups": "5.58", "wpb": "3198.5", "bsz": "46.3", "num_updates": "60600", "lr": "8.59241e-05", "gnorm": "0.668", "loss_scale": "16", "train_wall": "35", "gb_free": "12.8", "wall": "11368"} [2023-11-01 20:44:58,332][train_inner][INFO] - {"epoch": 15, "update": 14.991, "loss": "3.487", "ntokens": "3164.44", "nsentences": "42.28", "prob_perplexity": "53.91", "code_perplexity": "53.839", "temp": "1.476", "loss_0": "3.331", "loss_1": "0.132", "loss_2": "0.024", "accuracy": "0.46137", "wps": "17549.6", "ups": "5.55", "wpb": "3164.4", "bsz": "42.3", "num_updates": "60800", "lr": "8.58734e-05", "gnorm": "0.676", "loss_scale": "16", "train_wall": "35", "gb_free": "13.4", "wall": "11405"} [2023-11-01 20:45:04,482][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-01 20:45:04,483][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 20:45:04,502][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 18 [2023-11-01 20:45:29,994][valid][INFO] - {"epoch": 15, "valid_loss": "3.331", "valid_ntokens": "3159.65", "valid_nsentences": "44.1685", "valid_prob_perplexity": "53.54", "valid_code_perplexity": "53.5", "valid_temp": "1.475", "valid_loss_0": "3.175", "valid_loss_1": "0.132", "valid_loss_2": "0.024", "valid_accuracy": "0.49018", "valid_wps": "56199.4", "valid_wpb": "3159.6", "valid_bsz": "44.2", "valid_num_updates": "60835", "valid_best_loss": "3.331"} [2023-11-01 20:45:29,996][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 15 @ 60835 updates [2023-11-01 20:45:29,998][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-01 20:45:31,421][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-01 20:45:32,403][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_best.pt (epoch 15 @ 60835 updates, score 3.331) (writing took 2.4076006789691746 seconds) [2023-11-01 20:45:32,404][fairseq_cli.train][INFO] - end of epoch 15 (average epoch stats below) [2023-11-01 20:45:32,406][train][INFO] - {"epoch": 15, "train_loss": "3.456", "train_ntokens": "3196.38", "train_nsentences": "44.2682", "train_prob_perplexity": "52.841", "train_code_perplexity": "52.778", "train_temp": "1.491", "train_loss_0": "3.3", "train_loss_1": "0.132", "train_loss_2": "0.024", "train_accuracy": "0.46834", "train_wps": "17082.1", "train_ups": "5.34", "train_wpb": "3196.4", "train_bsz": "44.3", "train_num_updates": "60835", "train_lr": "8.58646e-05", "train_gnorm": "0.669", "train_loss_scale": "16", "train_train_wall": "718", "train_gb_free": "13.9", "train_wall": "11439"} [2023-11-01 20:45:32,409][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 20:45:32,430][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 16 [2023-11-01 20:45:32,618][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-01 20:45:32,642][fairseq.trainer][INFO] - begin training epoch 16 [2023-11-01 20:45:32,643][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-01 20:46:02,615][train_inner][INFO] - {"epoch": 16, "update": 15.041, "loss": "3.48", "ntokens": "3264.96", "nsentences": "44.08", "prob_perplexity": "54.146", "code_perplexity": "54.072", "temp": "1.475", "loss_0": "3.324", "loss_1": "0.132", "loss_2": "0.024", "accuracy": "0.46271", "wps": "10160.9", "ups": "3.11", "wpb": "3265", "bsz": "44.1", "num_updates": "61000", "lr": "8.58228e-05", "gnorm": "0.662", "loss_scale": "16", "train_wall": "35", "gb_free": "12.2", "wall": "11469"} [2023-11-01 20:46:38,580][train_inner][INFO] - {"epoch": 16, "update": 15.09, "loss": "3.352", "ntokens": "3177.52", "nsentences": "48.32", "prob_perplexity": "54.201", "code_perplexity": "54.132", "temp": "1.474", "loss_0": "3.196", "loss_1": "0.132", "loss_2": "0.024", "accuracy": "0.48808", "wps": "17671.2", "ups": "5.56", "wpb": "3177.5", "bsz": "48.3", "num_updates": "61200", "lr": "8.57722e-05", "gnorm": "0.678", "loss_scale": "16", "train_wall": "35", "gb_free": "13.7", "wall": "11505"} [2023-11-01 20:47:14,545][train_inner][INFO] - {"epoch": 16, "update": 15.139, "loss": "3.441", "ntokens": "3161.4", "nsentences": "43.2", "prob_perplexity": "54.571", "code_perplexity": "54.48", "temp": "1.472", "loss_0": "3.285", "loss_1": "0.132", "loss_2": "0.024", "accuracy": "0.46892", "wps": "17581.3", "ups": "5.56", "wpb": "3161.4", "bsz": "43.2", "num_updates": "61400", "lr": "8.57215e-05", "gnorm": "0.702", "loss_scale": "16", "train_wall": "35", "gb_free": "14.5", "wall": "11541"} [2023-11-01 20:47:51,045][train_inner][INFO] - {"epoch": 16, "update": 15.189, "loss": "3.502", "ntokens": "3233.84", "nsentences": "43", "prob_perplexity": "54.735", "code_perplexity": "54.645", "temp": "1.471", "loss_0": "3.345", "loss_1": "0.132", "loss_2": "0.025", "accuracy": "0.4584", "wps": "17720.5", "ups": "5.48", "wpb": "3233.8", "bsz": "43", "num_updates": "61600", "lr": "8.56709e-05", "gnorm": "0.666", "loss_scale": "16", "train_wall": "36", "gb_free": "13.4", "wall": "11577"} [2023-11-01 20:48:27,161][train_inner][INFO] - {"epoch": 16, "update": 15.238, "loss": "3.374", "ntokens": "3179.2", "nsentences": "46.88", "prob_perplexity": "54.871", "code_perplexity": "54.791", "temp": "1.469", "loss_0": "3.218", "loss_1": "0.132", "loss_2": "0.024", "accuracy": "0.48186", "wps": "17606.8", "ups": "5.54", "wpb": "3179.2", "bsz": "46.9", "num_updates": "61800", "lr": "8.56203e-05", "gnorm": "0.701", "loss_scale": "16", "train_wall": "35", "gb_free": "13.1", "wall": "11613"} [2023-11-01 20:49:02,970][train_inner][INFO] - {"epoch": 16, "update": 15.287, "loss": "3.452", "ntokens": "3201.56", "nsentences": "44.04", "prob_perplexity": "54.939", "code_perplexity": "54.854", "temp": "1.468", "loss_0": "3.296", "loss_1": "0.132", "loss_2": "0.024", "accuracy": "0.46788", "wps": "17882.3", "ups": "5.59", "wpb": "3201.6", "bsz": "44", "num_updates": "62000", "lr": "8.55696e-05", "gnorm": "0.687", "loss_scale": "16", "train_wall": "35", "gb_free": "14.2", "wall": "11649"} [2023-11-01 20:49:38,912][train_inner][INFO] - {"epoch": 16, "update": 15.337, "loss": "3.405", "ntokens": "3172.56", "nsentences": "44.32", "prob_perplexity": "55.264", "code_perplexity": "55.174", "temp": "1.466", "loss_0": "3.249", "loss_1": "0.132", "loss_2": "0.024", "accuracy": "0.47483", "wps": "17654.5", "ups": "5.56", "wpb": "3172.6", "bsz": "44.3", "num_updates": "62200", "lr": "8.5519e-05", "gnorm": "0.668", "loss_scale": "16", "train_wall": "35", "gb_free": "14.4", "wall": "11685"} [2023-11-01 20:50:15,310][train_inner][INFO] - {"epoch": 16, "update": 15.386, "loss": "3.473", "ntokens": "3210.56", "nsentences": "42.68", "prob_perplexity": "55.407", "code_perplexity": "55.314", "temp": "1.465", "loss_0": "3.316", "loss_1": "0.132", "loss_2": "0.025", "accuracy": "0.46271", "wps": "17642.4", "ups": "5.5", "wpb": "3210.6", "bsz": "42.7", "num_updates": "62400", "lr": "8.54684e-05", "gnorm": "0.671", "loss_scale": "16", "train_wall": "36", "gb_free": "14.9", "wall": "11722"} [2023-11-01 20:50:51,601][train_inner][INFO] - {"epoch": 16, "update": 15.435, "loss": "3.482", "ntokens": "3189.04", "nsentences": "41.92", "prob_perplexity": "54.858", "code_perplexity": "54.78", "temp": "1.463", "loss_0": "3.326", "loss_1": "0.132", "loss_2": "0.025", "accuracy": "0.46084", "wps": "17575.8", "ups": "5.51", "wpb": "3189", "bsz": "41.9", "num_updates": "62600", "lr": "8.54177e-05", "gnorm": "0.671", "loss_scale": "16", "train_wall": "36", "gb_free": "14", "wall": "11758"} [2023-11-01 20:51:27,721][train_inner][INFO] - {"epoch": 16, "update": 15.484, "loss": "3.499", "ntokens": "3189.84", "nsentences": "41.24", "prob_perplexity": "55.195", "code_perplexity": "55.103", "temp": "1.462", "loss_0": "3.342", "loss_1": "0.132", "loss_2": "0.025", "accuracy": "0.45786", "wps": "17663.9", "ups": "5.54", "wpb": "3189.8", "bsz": "41.2", "num_updates": "62800", "lr": "8.53671e-05", "gnorm": "0.673", "loss_scale": "16", "train_wall": "35", "gb_free": "13.7", "wall": "11794"} [2023-11-01 20:52:03,454][train_inner][INFO] - {"epoch": 16, "update": 15.534, "loss": "3.404", "ntokens": "3191.2", "nsentences": "44.28", "prob_perplexity": "55.437", "code_perplexity": "55.352", "temp": "1.46", "loss_0": "3.247", "loss_1": "0.132", "loss_2": "0.025", "accuracy": "0.47437", "wps": "17862.2", "ups": "5.6", "wpb": "3191.2", "bsz": "44.3", "num_updates": "63000", "lr": "8.53165e-05", "gnorm": "0.672", "loss_scale": "16", "train_wall": "35", "gb_free": "14.2", "wall": "11830"} [2023-11-01 20:52:39,389][train_inner][INFO] - {"epoch": 16, "update": 15.583, "loss": "3.466", "ntokens": "3171.84", "nsentences": "42.68", "prob_perplexity": "55.541", "code_perplexity": "55.438", "temp": "1.459", "loss_0": "3.31", "loss_1": "0.132", "loss_2": "0.025", "accuracy": "0.46404", "wps": "17654.4", "ups": "5.57", "wpb": "3171.8", "bsz": "42.7", "num_updates": "63200", "lr": "8.52658e-05", "gnorm": "0.673", "loss_scale": "16", "train_wall": "35", "gb_free": "14.2", "wall": "11866"} [2023-11-01 20:53:09,224][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2023-11-01 20:53:15,480][train_inner][INFO] - {"epoch": 16, "update": 15.633, "loss": "3.446", "ntokens": "3206.24", "nsentences": "44.76", "prob_perplexity": "55.543", "code_perplexity": "55.442", "temp": "1.457", "loss_0": "3.29", "loss_1": "0.132", "loss_2": "0.025", "accuracy": "0.46923", "wps": "17768.2", "ups": "5.54", "wpb": "3206.2", "bsz": "44.8", "num_updates": "63400", "lr": "8.52152e-05", "gnorm": "0.673", "loss_scale": "8", "train_wall": "35", "gb_free": "13", "wall": "11902"} [2023-11-01 20:53:51,875][train_inner][INFO] - {"epoch": 16, "update": 15.682, "loss": "3.378", "ntokens": "3210.76", "nsentences": "45.08", "prob_perplexity": "55.991", "code_perplexity": "55.897", "temp": "1.456", "loss_0": "3.221", "loss_1": "0.132", "loss_2": "0.025", "accuracy": "0.47932", "wps": "17645", "ups": "5.5", "wpb": "3210.8", "bsz": "45.1", "num_updates": "63600", "lr": "8.51646e-05", "gnorm": "0.671", "loss_scale": "8", "train_wall": "36", "gb_free": "13.3", "wall": "11938"} [2023-11-01 20:54:27,954][train_inner][INFO] - {"epoch": 16, "update": 15.731, "loss": "3.393", "ntokens": "3174.44", "nsentences": "44.96", "prob_perplexity": "55.735", "code_perplexity": "55.635", "temp": "1.454", "loss_0": "3.236", "loss_1": "0.132", "loss_2": "0.026", "accuracy": "0.47799", "wps": "17598.6", "ups": "5.54", "wpb": "3174.4", "bsz": "45", "num_updates": "63800", "lr": "8.51139e-05", "gnorm": "0.68", "loss_scale": "8", "train_wall": "35", "gb_free": "14.6", "wall": "11974"} [2023-11-01 20:55:03,732][train_inner][INFO] - {"epoch": 16, "update": 15.781, "loss": "3.376", "ntokens": "3154.12", "nsentences": "45.08", "prob_perplexity": "56.133", "code_perplexity": "56.027", "temp": "1.453", "loss_0": "3.218", "loss_1": "0.132", "loss_2": "0.025", "accuracy": "0.48", "wps": "17632.5", "ups": "5.59", "wpb": "3154.1", "bsz": "45.1", "num_updates": "64000", "lr": "8.50633e-05", "gnorm": "0.681", "loss_scale": "8", "train_wall": "35", "gb_free": "14", "wall": "12010"} [2023-11-01 20:55:39,990][train_inner][INFO] - {"epoch": 16, "update": 15.83, "loss": "3.44", "ntokens": "3215.6", "nsentences": "43.92", "prob_perplexity": "56.235", "code_perplexity": "56.127", "temp": "1.452", "loss_0": "3.282", "loss_1": "0.132", "loss_2": "0.026", "accuracy": "0.46844", "wps": "17738.2", "ups": "5.52", "wpb": "3215.6", "bsz": "43.9", "num_updates": "64200", "lr": "8.50127e-05", "gnorm": "0.664", "loss_scale": "8", "train_wall": "36", "gb_free": "13.9", "wall": "12046"} [2023-11-01 20:56:15,443][train_inner][INFO] - {"epoch": 16, "update": 15.879, "loss": "3.409", "ntokens": "3181.24", "nsentences": "44", "prob_perplexity": "56.268", "code_perplexity": "56.146", "temp": "1.45", "loss_0": "3.252", "loss_1": "0.132", "loss_2": "0.026", "accuracy": "0.47433", "wps": "17947.6", "ups": "5.64", "wpb": "3181.2", "bsz": "44", "num_updates": "64400", "lr": "8.4962e-05", "gnorm": "0.675", "loss_scale": "8", "train_wall": "35", "gb_free": "13.1", "wall": "12082"} [2023-11-01 20:56:51,163][train_inner][INFO] - {"epoch": 16, "update": 15.929, "loss": "3.324", "ntokens": "3143.56", "nsentences": "46.48", "prob_perplexity": "55.762", "code_perplexity": "55.643", "temp": "1.449", "loss_0": "3.167", "loss_1": "0.132", "loss_2": "0.026", "accuracy": "0.48915", "wps": "17602.2", "ups": "5.6", "wpb": "3143.6", "bsz": "46.5", "num_updates": "64600", "lr": "8.49114e-05", "gnorm": "0.686", "loss_scale": "8", "train_wall": "35", "gb_free": "13.2", "wall": "12117"} [2023-11-01 20:57:27,221][train_inner][INFO] - {"epoch": 16, "update": 15.978, "loss": "3.397", "ntokens": "3217.4", "nsentences": "44.56", "prob_perplexity": "56.236", "code_perplexity": "56.105", "temp": "1.447", "loss_0": "3.24", "loss_1": "0.132", "loss_2": "0.026", "accuracy": "0.47452", "wps": "17846.5", "ups": "5.55", "wpb": "3217.4", "bsz": "44.6", "num_updates": "64800", "lr": "8.48608e-05", "gnorm": "0.672", "loss_scale": "8", "train_wall": "35", "gb_free": "13.7", "wall": "12153"} [2023-11-01 20:57:43,336][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-01 20:57:43,338][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 20:57:43,359][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 19 [2023-11-01 20:58:08,552][valid][INFO] - {"epoch": 16, "valid_loss": "3.317", "valid_ntokens": "3161.65", "valid_nsentences": "44.1685", "valid_prob_perplexity": "56.44", "valid_code_perplexity": "56.369", "valid_temp": "1.446", "valid_loss_0": "3.159", "valid_loss_1": "0.132", "valid_loss_2": "0.026", "valid_accuracy": "0.49111", "valid_wps": "56865.7", "valid_wpb": "3161.6", "valid_bsz": "44.2", "valid_num_updates": "64890", "valid_best_loss": "3.317"} [2023-11-01 20:58:08,554][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 16 @ 64890 updates [2023-11-01 20:58:08,556][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-01 20:58:09,969][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-01 20:58:10,945][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_best.pt (epoch 16 @ 64890 updates, score 3.317) (writing took 2.390970362816006 seconds) [2023-11-01 20:58:10,946][fairseq_cli.train][INFO] - end of epoch 16 (average epoch stats below) [2023-11-01 20:58:10,948][train][INFO] - {"epoch": 16, "train_loss": "3.425", "train_ntokens": "3191.3", "train_nsentences": "44.2673", "train_prob_perplexity": "55.39", "train_code_perplexity": "55.293", "train_temp": "1.461", "train_loss_0": "3.268", "train_loss_1": "0.132", "train_loss_2": "0.025", "train_accuracy": "0.47173", "train_wps": "17060.1", "train_ups": "5.35", "train_wpb": "3191.3", "train_bsz": "44.3", "train_num_updates": "64890", "train_lr": "8.4838e-05", "train_gnorm": "0.676", "train_loss_scale": "8", "train_train_wall": "718", "train_gb_free": "13.1", "train_wall": "12197"} [2023-11-01 20:58:10,951][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 20:58:10,971][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 17 [2023-11-01 20:58:11,147][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-01 20:58:11,172][fairseq.trainer][INFO] - begin training epoch 17 [2023-11-01 20:58:11,173][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-01 20:58:30,988][train_inner][INFO] - {"epoch": 17, "update": 16.027, "loss": "3.427", "ntokens": "3179.52", "nsentences": "43.68", "prob_perplexity": "56.61", "code_perplexity": "56.464", "temp": "1.446", "loss_0": "3.269", "loss_1": "0.131", "loss_2": "0.026", "accuracy": "0.47104", "wps": "9972.7", "ups": "3.14", "wpb": "3179.5", "bsz": "43.7", "num_updates": "65000", "lr": "8.48101e-05", "gnorm": "0.679", "loss_scale": "8", "train_wall": "35", "gb_free": "13.7", "wall": "12217"} [2023-11-01 20:59:06,123][train_inner][INFO] - {"epoch": 17, "update": 16.076, "loss": "3.478", "ntokens": "3192.32", "nsentences": "40.92", "prob_perplexity": "56.617", "code_perplexity": "56.461", "temp": "1.444", "loss_0": "3.321", "loss_1": "0.131", "loss_2": "0.026", "accuracy": "0.45936", "wps": "18180.6", "ups": "5.7", "wpb": "3192.3", "bsz": "40.9", "num_updates": "65200", "lr": "8.47595e-05", "gnorm": "0.678", "loss_scale": "8", "train_wall": "34", "gb_free": "14.5", "wall": "12252"} [2023-11-01 20:59:41,808][train_inner][INFO] - {"epoch": 17, "update": 16.126, "loss": "3.39", "ntokens": "3183.04", "nsentences": "46.2", "prob_perplexity": "56.357", "code_perplexity": "56.207", "temp": "1.443", "loss_0": "3.232", "loss_1": "0.132", "loss_2": "0.026", "accuracy": "0.47786", "wps": "17840.8", "ups": "5.6", "wpb": "3183", "bsz": "46.2", "num_updates": "65400", "lr": "8.47089e-05", "gnorm": "0.681", "loss_scale": "8", "train_wall": "35", "gb_free": "13.8", "wall": "12288"} [2023-11-01 21:00:17,139][train_inner][INFO] - {"epoch": 17, "update": 16.175, "loss": "3.357", "ntokens": "3172.12", "nsentences": "45.76", "prob_perplexity": "56.55", "code_perplexity": "56.367", "temp": "1.441", "loss_0": "3.199", "loss_1": "0.132", "loss_2": "0.026", "accuracy": "0.48225", "wps": "17957.4", "ups": "5.66", "wpb": "3172.1", "bsz": "45.8", "num_updates": "65600", "lr": "8.46582e-05", "gnorm": "0.679", "loss_scale": "8", "train_wall": "35", "gb_free": "13.6", "wall": "12323"} [2023-11-01 21:00:53,161][train_inner][INFO] - {"epoch": 17, "update": 16.224, "loss": "3.482", "ntokens": "3226.88", "nsentences": "42.28", "prob_perplexity": "56.821", "code_perplexity": "56.632", "temp": "1.44", "loss_0": "3.324", "loss_1": "0.131", "loss_2": "0.027", "accuracy": "0.46002", "wps": "17917.2", "ups": "5.55", "wpb": "3226.9", "bsz": "42.3", "num_updates": "65800", "lr": "8.46076e-05", "gnorm": "0.669", "loss_scale": "8", "train_wall": "35", "gb_free": "13", "wall": "12359"} [2023-11-01 21:01:29,085][train_inner][INFO] - {"epoch": 17, "update": 16.274, "loss": "3.414", "ntokens": "3194.84", "nsentences": "44.28", "prob_perplexity": "56.899", "code_perplexity": "56.726", "temp": "1.439", "loss_0": "3.256", "loss_1": "0.131", "loss_2": "0.027", "accuracy": "0.47338", "wps": "17788.2", "ups": "5.57", "wpb": "3194.8", "bsz": "44.3", "num_updates": "66000", "lr": "8.4557e-05", "gnorm": "0.686", "loss_scale": "8", "train_wall": "35", "gb_free": "14.1", "wall": "12395"} [2023-11-01 21:02:05,145][train_inner][INFO] - {"epoch": 17, "update": 16.323, "loss": "3.412", "ntokens": "3206.56", "nsentences": "44.16", "prob_perplexity": "56.917", "code_perplexity": "56.733", "temp": "1.437", "loss_0": "3.253", "loss_1": "0.131", "loss_2": "0.027", "accuracy": "0.47226", "wps": "17785.6", "ups": "5.55", "wpb": "3206.6", "bsz": "44.2", "num_updates": "66200", "lr": "8.45063e-05", "gnorm": "0.68", "loss_scale": "8", "train_wall": "35", "gb_free": "13.9", "wall": "12431"} [2023-11-01 21:02:41,473][train_inner][INFO] - {"epoch": 17, "update": 16.372, "loss": "3.386", "ntokens": "3229", "nsentences": "46.96", "prob_perplexity": "57.373", "code_perplexity": "57.168", "temp": "1.436", "loss_0": "3.227", "loss_1": "0.131", "loss_2": "0.027", "accuracy": "0.47903", "wps": "17778.5", "ups": "5.51", "wpb": "3229", "bsz": "47", "num_updates": "66400", "lr": "8.44557e-05", "gnorm": "0.674", "loss_scale": "8", "train_wall": "36", "gb_free": "13.4", "wall": "12468"} [2023-11-01 21:03:17,994][train_inner][INFO] - {"epoch": 17, "update": 16.422, "loss": "3.358", "ntokens": "3128.16", "nsentences": "44.64", "prob_perplexity": "57.106", "code_perplexity": "56.876", "temp": "1.434", "loss_0": "3.199", "loss_1": "0.131", "loss_2": "0.028", "accuracy": "0.48234", "wps": "17131.6", "ups": "5.48", "wpb": "3128.2", "bsz": "44.6", "num_updates": "66600", "lr": "8.44051e-05", "gnorm": "0.692", "loss_scale": "8", "train_wall": "36", "gb_free": "13.9", "wall": "12504"} [2023-11-01 21:03:54,366][train_inner][INFO] - {"epoch": 17, "update": 16.471, "loss": "3.4", "ntokens": "3156.84", "nsentences": "45.2", "prob_perplexity": "57.293", "code_perplexity": "57.052", "temp": "1.433", "loss_0": "3.24", "loss_1": "0.131", "loss_2": "0.028", "accuracy": "0.47596", "wps": "17359.8", "ups": "5.5", "wpb": "3156.8", "bsz": "45.2", "num_updates": "66800", "lr": "8.43544e-05", "gnorm": "0.692", "loss_scale": "8", "train_wall": "36", "gb_free": "13.1", "wall": "12541"} [2023-11-01 21:04:30,571][train_inner][INFO] - {"epoch": 17, "update": 16.52, "loss": "3.516", "ntokens": "3174.36", "nsentences": "40.04", "prob_perplexity": "57.636", "code_perplexity": "57.373", "temp": "1.431", "loss_0": "3.356", "loss_1": "0.131", "loss_2": "0.029", "accuracy": "0.4545", "wps": "17536.3", "ups": "5.52", "wpb": "3174.4", "bsz": "40", "num_updates": "67000", "lr": "8.43038e-05", "gnorm": "0.686", "loss_scale": "8", "train_wall": "36", "gb_free": "13.4", "wall": "12577"} [2023-11-01 21:05:06,013][train_inner][INFO] - {"epoch": 17, "update": 16.57, "loss": "3.46", "ntokens": "3157.6", "nsentences": "41.84", "prob_perplexity": "57.67", "code_perplexity": "57.384", "temp": "1.43", "loss_0": "3.3", "loss_1": "0.131", "loss_2": "0.029", "accuracy": "0.46284", "wps": "17819.6", "ups": "5.64", "wpb": "3157.6", "bsz": "41.8", "num_updates": "67200", "lr": "8.42532e-05", "gnorm": "0.683", "loss_scale": "8", "train_wall": "35", "gb_free": "14.4", "wall": "12612"} [2023-11-01 21:05:41,999][train_inner][INFO] - {"epoch": 17, "update": 16.619, "loss": "3.426", "ntokens": "3192.92", "nsentences": "45.24", "prob_perplexity": "58.081", "code_perplexity": "57.768", "temp": "1.429", "loss_0": "3.266", "loss_1": "0.131", "loss_2": "0.029", "accuracy": "0.47031", "wps": "17746.6", "ups": "5.56", "wpb": "3192.9", "bsz": "45.2", "num_updates": "67400", "lr": "8.42025e-05", "gnorm": "0.687", "loss_scale": "8", "train_wall": "35", "gb_free": "14.2", "wall": "12648"} [2023-11-01 21:06:18,305][train_inner][INFO] - {"epoch": 17, "update": 16.668, "loss": "3.423", "ntokens": "3218", "nsentences": "44.12", "prob_perplexity": "58.381", "code_perplexity": "58.055", "temp": "1.427", "loss_0": "3.263", "loss_1": "0.131", "loss_2": "0.029", "accuracy": "0.46963", "wps": "17728.5", "ups": "5.51", "wpb": "3218", "bsz": "44.1", "num_updates": "67600", "lr": "8.41519e-05", "gnorm": "0.683", "loss_scale": "8", "train_wall": "36", "gb_free": "13.2", "wall": "12685"} [2023-11-01 21:06:54,593][train_inner][INFO] - {"epoch": 17, "update": 16.717, "loss": "3.457", "ntokens": "3202.28", "nsentences": "42.28", "prob_perplexity": "58.717", "code_perplexity": "58.359", "temp": "1.426", "loss_0": "3.295", "loss_1": "0.131", "loss_2": "0.03", "accuracy": "0.46388", "wps": "17650.4", "ups": "5.51", "wpb": "3202.3", "bsz": "42.3", "num_updates": "67800", "lr": "8.41013e-05", "gnorm": "0.684", "loss_scale": "8", "train_wall": "36", "gb_free": "14.2", "wall": "12721"} [2023-11-01 21:07:30,701][train_inner][INFO] - {"epoch": 17, "update": 16.767, "loss": "3.407", "ntokens": "3156.6", "nsentences": "45.4", "prob_perplexity": "58.768", "code_perplexity": "58.384", "temp": "1.424", "loss_0": "3.245", "loss_1": "0.131", "loss_2": "0.03", "accuracy": "0.47509", "wps": "17485.1", "ups": "5.54", "wpb": "3156.6", "bsz": "45.4", "num_updates": "68000", "lr": "8.40506e-05", "gnorm": "0.701", "loss_scale": "8", "train_wall": "35", "gb_free": "13.3", "wall": "12757"} [2023-11-01 21:08:06,847][train_inner][INFO] - {"epoch": 17, "update": 16.816, "loss": "3.414", "ntokens": "3199.84", "nsentences": "44.92", "prob_perplexity": "59.182", "code_perplexity": "58.782", "temp": "1.423", "loss_0": "3.252", "loss_1": "0.131", "loss_2": "0.032", "accuracy": "0.47281", "wps": "17706.4", "ups": "5.53", "wpb": "3199.8", "bsz": "44.9", "num_updates": "68200", "lr": "8.4e-05", "gnorm": "0.684", "loss_scale": "8", "train_wall": "36", "gb_free": "12.8", "wall": "12793"} [2023-11-01 21:08:42,482][train_inner][INFO] - {"epoch": 17, "update": 16.865, "loss": "3.41", "ntokens": "3196.92", "nsentences": "44.12", "prob_perplexity": "59.633", "code_perplexity": "59.203", "temp": "1.421", "loss_0": "3.247", "loss_1": "0.131", "loss_2": "0.032", "accuracy": "0.47263", "wps": "17943.6", "ups": "5.61", "wpb": "3196.9", "bsz": "44.1", "num_updates": "68400", "lr": "8.39494e-05", "gnorm": "0.683", "loss_scale": "8", "train_wall": "35", "gb_free": "14.2", "wall": "12829"} [2023-11-01 21:09:18,545][train_inner][INFO] - {"epoch": 17, "update": 16.915, "loss": "3.448", "ntokens": "3217.28", "nsentences": "45.36", "prob_perplexity": "59.66", "code_perplexity": "59.202", "temp": "1.42", "loss_0": "3.286", "loss_1": "0.131", "loss_2": "0.032", "accuracy": "0.46736", "wps": "17843.7", "ups": "5.55", "wpb": "3217.3", "bsz": "45.4", "num_updates": "68600", "lr": "8.38987e-05", "gnorm": "0.688", "loss_scale": "8", "train_wall": "35", "gb_free": "13.6", "wall": "12865"} [2023-11-01 21:09:53,900][train_inner][INFO] - {"epoch": 17, "update": 16.964, "loss": "3.423", "ntokens": "3172.8", "nsentences": "45.04", "prob_perplexity": "59.9", "code_perplexity": "59.445", "temp": "1.419", "loss_0": "3.26", "loss_1": "0.131", "loss_2": "0.032", "accuracy": "0.47037", "wps": "17949.2", "ups": "5.66", "wpb": "3172.8", "bsz": "45", "num_updates": "68800", "lr": "8.38481e-05", "gnorm": "0.692", "loss_scale": "8", "train_wall": "35", "gb_free": "15.7", "wall": "12900"} [2023-11-01 21:10:19,737][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-01 21:10:19,739][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 21:10:19,758][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 20 [2023-11-01 21:10:45,494][valid][INFO] - {"epoch": 17, "valid_loss": "3.343", "valid_ntokens": "3176.18", "valid_nsentences": "44.1685", "valid_prob_perplexity": "58.718", "valid_code_perplexity": "58.372", "valid_temp": "1.417", "valid_loss_0": "3.179", "valid_loss_1": "0.131", "valid_loss_2": "0.033", "valid_accuracy": "0.48755", "valid_wps": "55905.3", "valid_wpb": "3176.2", "valid_bsz": "44.2", "valid_num_updates": "68946", "valid_best_loss": "3.317"} [2023-11-01 21:10:45,496][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 17 @ 68946 updates [2023-11-01 21:10:45,498][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-01 21:10:46,946][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-01 21:10:46,994][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_last.pt (epoch 17 @ 68946 updates, score 3.343) (writing took 1.4983113608323038 seconds) [2023-11-01 21:10:46,995][fairseq_cli.train][INFO] - end of epoch 17 (average epoch stats below) [2023-11-01 21:10:46,997][train][INFO] - {"epoch": 17, "train_loss": "3.422", "train_ntokens": "3188.07", "train_nsentences": "44.2682", "train_prob_perplexity": "57.924", "train_code_perplexity": "57.637", "train_temp": "1.431", "train_loss_0": "3.262", "train_loss_1": "0.131", "train_loss_2": "0.029", "train_accuracy": "0.47113", "train_wps": "17103.2", "train_ups": "5.36", "train_wpb": "3188.1", "train_bsz": "44.3", "train_num_updates": "68946", "train_lr": "8.38111e-05", "train_gnorm": "0.685", "train_loss_scale": "8", "train_train_wall": "716", "train_gb_free": "13.1", "train_wall": "12953"} [2023-11-01 21:10:47,000][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 21:10:47,022][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 18 [2023-11-01 21:10:47,201][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-01 21:10:47,227][fairseq.trainer][INFO] - begin training epoch 18 [2023-11-01 21:10:47,227][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-01 21:10:56,968][train_inner][INFO] - {"epoch": 18, "update": 17.013, "loss": "3.388", "ntokens": "3179.28", "nsentences": "47.2", "prob_perplexity": "60.198", "code_perplexity": "59.719", "temp": "1.417", "loss_0": "3.225", "loss_1": "0.131", "loss_2": "0.032", "accuracy": "0.47847", "wps": "10082.5", "ups": "3.17", "wpb": "3179.3", "bsz": "47.2", "num_updates": "69000", "lr": "8.37975e-05", "gnorm": "0.695", "loss_scale": "8", "train_wall": "35", "gb_free": "15.1", "wall": "12963"} [2023-11-01 21:11:32,297][train_inner][INFO] - {"epoch": 18, "update": 17.063, "loss": "3.403", "ntokens": "3119.32", "nsentences": "46.28", "prob_perplexity": "60.133", "code_perplexity": "59.628", "temp": "1.416", "loss_0": "3.239", "loss_1": "0.131", "loss_2": "0.033", "accuracy": "0.4762", "wps": "17666.9", "ups": "5.66", "wpb": "3119.3", "bsz": "46.3", "num_updates": "69200", "lr": "8.37468e-05", "gnorm": "0.713", "loss_scale": "8", "train_wall": "35", "gb_free": "15.4", "wall": "12999"} [2023-11-01 21:12:08,489][train_inner][INFO] - {"epoch": 18, "update": 17.112, "loss": "3.452", "ntokens": "3199.96", "nsentences": "44.32", "prob_perplexity": "60.749", "code_perplexity": "60.213", "temp": "1.414", "loss_0": "3.288", "loss_1": "0.131", "loss_2": "0.033", "accuracy": "0.46564", "wps": "17684.8", "ups": "5.53", "wpb": "3200", "bsz": "44.3", "num_updates": "69400", "lr": "8.36962e-05", "gnorm": "0.692", "loss_scale": "8", "train_wall": "36", "gb_free": "14.9", "wall": "13035"} [2023-11-01 21:12:44,476][train_inner][INFO] - {"epoch": 18, "update": 17.161, "loss": "3.397", "ntokens": "3173.08", "nsentences": "47.16", "prob_perplexity": "60.972", "code_perplexity": "60.398", "temp": "1.413", "loss_0": "3.232", "loss_1": "0.131", "loss_2": "0.034", "accuracy": "0.47648", "wps": "17635.4", "ups": "5.56", "wpb": "3173.1", "bsz": "47.2", "num_updates": "69600", "lr": "8.36456e-05", "gnorm": "0.694", "loss_scale": "8", "train_wall": "35", "gb_free": "13.3", "wall": "13071"} [2023-11-01 21:13:19,893][train_inner][INFO] - {"epoch": 18, "update": 17.211, "loss": "3.447", "ntokens": "3167.04", "nsentences": "42.56", "prob_perplexity": "61.211", "code_perplexity": "60.588", "temp": "1.411", "loss_0": "3.282", "loss_1": "0.13", "loss_2": "0.034", "accuracy": "0.46423", "wps": "17885.4", "ups": "5.65", "wpb": "3167", "bsz": "42.6", "num_updates": "69800", "lr": "8.35949e-05", "gnorm": "0.697", "loss_scale": "8", "train_wall": "35", "gb_free": "13.5", "wall": "13106"} [2023-11-01 21:13:55,662][train_inner][INFO] - {"epoch": 18, "update": 17.26, "loss": "3.454", "ntokens": "3168.32", "nsentences": "44", "prob_perplexity": "61.315", "code_perplexity": "60.675", "temp": "1.41", "loss_0": "3.288", "loss_1": "0.13", "loss_2": "0.035", "accuracy": "0.46416", "wps": "17716.5", "ups": "5.59", "wpb": "3168.3", "bsz": "44", "num_updates": "70000", "lr": "8.35443e-05", "gnorm": "0.7", "loss_scale": "8", "train_wall": "35", "gb_free": "15.4", "wall": "13142"} [2023-11-01 21:14:31,323][train_inner][INFO] - {"epoch": 18, "update": 17.309, "loss": "3.511", "ntokens": "3170.56", "nsentences": "41.48", "prob_perplexity": "61.42", "code_perplexity": "60.749", "temp": "1.409", "loss_0": "3.346", "loss_1": "0.13", "loss_2": "0.035", "accuracy": "0.45307", "wps": "17782.9", "ups": "5.61", "wpb": "3170.6", "bsz": "41.5", "num_updates": "70200", "lr": "8.34937e-05", "gnorm": "0.7", "loss_scale": "8", "train_wall": "35", "gb_free": "16", "wall": "13178"} [2023-11-01 21:15:06,957][train_inner][INFO] - {"epoch": 18, "update": 17.358, "loss": "3.435", "ntokens": "3184.36", "nsentences": "44.24", "prob_perplexity": "61.667", "code_perplexity": "60.952", "temp": "1.407", "loss_0": "3.27", "loss_1": "0.13", "loss_2": "0.035", "accuracy": "0.46641", "wps": "17873.8", "ups": "5.61", "wpb": "3184.4", "bsz": "44.2", "num_updates": "70400", "lr": "8.3443e-05", "gnorm": "0.717", "loss_scale": "8", "train_wall": "35", "gb_free": "14.1", "wall": "13213"} [2023-11-01 21:15:43,024][train_inner][INFO] - {"epoch": 18, "update": 17.408, "loss": "3.46", "ntokens": "3162.96", "nsentences": "43.4", "prob_perplexity": "62.145", "code_perplexity": "61.404", "temp": "1.406", "loss_0": "3.295", "loss_1": "0.13", "loss_2": "0.035", "accuracy": "0.46331", "wps": "17540.4", "ups": "5.55", "wpb": "3163", "bsz": "43.4", "num_updates": "70600", "lr": "8.33924e-05", "gnorm": "0.715", "loss_scale": "8", "train_wall": "35", "gb_free": "13.2", "wall": "13249"} [2023-11-01 21:16:18,948][train_inner][INFO] - {"epoch": 18, "update": 17.457, "loss": "3.475", "ntokens": "3194.24", "nsentences": "43.52", "prob_perplexity": "62.332", "code_perplexity": "61.579", "temp": "1.404", "loss_0": "3.309", "loss_1": "0.13", "loss_2": "0.035", "accuracy": "0.46114", "wps": "17784.5", "ups": "5.57", "wpb": "3194.2", "bsz": "43.5", "num_updates": "70800", "lr": "8.33418e-05", "gnorm": "0.695", "loss_scale": "8", "train_wall": "35", "gb_free": "13.9", "wall": "13285"} [2023-11-01 21:16:55,152][train_inner][INFO] - {"epoch": 18, "update": 17.506, "loss": "3.474", "ntokens": "3217.52", "nsentences": "44.4", "prob_perplexity": "62.462", "code_perplexity": "61.674", "temp": "1.403", "loss_0": "3.308", "loss_1": "0.13", "loss_2": "0.036", "accuracy": "0.46088", "wps": "17775.5", "ups": "5.52", "wpb": "3217.5", "bsz": "44.4", "num_updates": "71000", "lr": "8.32911e-05", "gnorm": "0.725", "loss_scale": "8", "train_wall": "36", "gb_free": "13.7", "wall": "13321"} [2023-11-01 21:17:31,344][train_inner][INFO] - {"epoch": 18, "update": 17.556, "loss": "3.479", "ntokens": "3215.64", "nsentences": "43.88", "prob_perplexity": "62.798", "code_perplexity": "61.925", "temp": "1.402", "loss_0": "3.312", "loss_1": "0.13", "loss_2": "0.036", "accuracy": "0.4592", "wps": "17771.3", "ups": "5.53", "wpb": "3215.6", "bsz": "43.9", "num_updates": "71200", "lr": "8.32405e-05", "gnorm": "0.695", "loss_scale": "8", "train_wall": "36", "gb_free": "12.5", "wall": "13358"} [2023-11-01 21:18:07,421][train_inner][INFO] - {"epoch": 18, "update": 17.605, "loss": "3.425", "ntokens": "3240.36", "nsentences": "45.28", "prob_perplexity": "63.494", "code_perplexity": "62.629", "temp": "1.4", "loss_0": "3.259", "loss_1": "0.13", "loss_2": "0.036", "accuracy": "0.46698", "wps": "17964.9", "ups": "5.54", "wpb": "3240.4", "bsz": "45.3", "num_updates": "71400", "lr": "8.31899e-05", "gnorm": "0.688", "loss_scale": "8", "train_wall": "35", "gb_free": "13.3", "wall": "13394"} [2023-11-01 21:18:43,636][train_inner][INFO] - {"epoch": 18, "update": 17.654, "loss": "3.455", "ntokens": "3161.52", "nsentences": "42.4", "prob_perplexity": "63.745", "code_perplexity": "62.838", "temp": "1.399", "loss_0": "3.29", "loss_1": "0.13", "loss_2": "0.036", "accuracy": "0.46074", "wps": "17461.2", "ups": "5.52", "wpb": "3161.5", "bsz": "42.4", "num_updates": "71600", "lr": "8.31392e-05", "gnorm": "0.705", "loss_scale": "8", "train_wall": "36", "gb_free": "14", "wall": "13430"} [2023-11-01 21:19:19,270][train_inner][INFO] - {"epoch": 18, "update": 17.704, "loss": "3.414", "ntokens": "3191.6", "nsentences": "46.4", "prob_perplexity": "64.256", "code_perplexity": "63.357", "temp": "1.397", "loss_0": "3.248", "loss_1": "0.13", "loss_2": "0.035", "accuracy": "0.47078", "wps": "17914.6", "ups": "5.61", "wpb": "3191.6", "bsz": "46.4", "num_updates": "71800", "lr": "8.30886e-05", "gnorm": "0.7", "loss_scale": "8", "train_wall": "35", "gb_free": "12.7", "wall": "13466"} [2023-11-01 21:19:55,036][train_inner][INFO] - {"epoch": 18, "update": 17.753, "loss": "3.442", "ntokens": "3196.52", "nsentences": "45.92", "prob_perplexity": "64.04", "code_perplexity": "63.149", "temp": "1.396", "loss_0": "3.276", "loss_1": "0.13", "loss_2": "0.036", "accuracy": "0.46601", "wps": "17875.8", "ups": "5.59", "wpb": "3196.5", "bsz": "45.9", "num_updates": "72000", "lr": "8.3038e-05", "gnorm": "0.704", "loss_scale": "8", "train_wall": "35", "gb_free": "15.5", "wall": "13501"} [2023-11-01 21:20:30,719][train_inner][INFO] - {"epoch": 18, "update": 17.802, "loss": "3.48", "ntokens": "3197.8", "nsentences": "44.2", "prob_perplexity": "64.711", "code_perplexity": "63.76", "temp": "1.395", "loss_0": "3.314", "loss_1": "0.13", "loss_2": "0.036", "accuracy": "0.45871", "wps": "17924.1", "ups": "5.61", "wpb": "3197.8", "bsz": "44.2", "num_updates": "72200", "lr": "8.29873e-05", "gnorm": "0.701", "loss_scale": "8", "train_wall": "35", "gb_free": "14", "wall": "13537"} [2023-11-01 21:21:06,913][train_inner][INFO] - {"epoch": 18, "update": 17.852, "loss": "3.478", "ntokens": "3200.08", "nsentences": "41.6", "prob_perplexity": "64.559", "code_perplexity": "63.592", "temp": "1.393", "loss_0": "3.312", "loss_1": "0.13", "loss_2": "0.036", "accuracy": "0.45562", "wps": "17684", "ups": "5.53", "wpb": "3200.1", "bsz": "41.6", "num_updates": "72400", "lr": "8.29367e-05", "gnorm": "0.703", "loss_scale": "8", "train_wall": "36", "gb_free": "14.5", "wall": "13573"} [2023-11-01 21:21:43,327][train_inner][INFO] - {"epoch": 18, "update": 17.901, "loss": "3.447", "ntokens": "3209.16", "nsentences": "45.16", "prob_perplexity": "64.678", "code_perplexity": "63.727", "temp": "1.392", "loss_0": "3.281", "loss_1": "0.13", "loss_2": "0.036", "accuracy": "0.46472", "wps": "17627.2", "ups": "5.49", "wpb": "3209.2", "bsz": "45.2", "num_updates": "72600", "lr": "8.28861e-05", "gnorm": "0.704", "loss_scale": "8", "train_wall": "36", "gb_free": "13.9", "wall": "13610"} [2023-11-01 21:22:19,663][train_inner][INFO] - {"epoch": 18, "update": 17.95, "loss": "3.431", "ntokens": "3151.68", "nsentences": "46.52", "prob_perplexity": "65.701", "code_perplexity": "64.654", "temp": "1.39", "loss_0": "3.265", "loss_1": "0.129", "loss_2": "0.036", "accuracy": "0.46868", "wps": "17348.5", "ups": "5.5", "wpb": "3151.7", "bsz": "46.5", "num_updates": "72800", "lr": "8.28354e-05", "gnorm": "0.7", "loss_scale": "8", "train_wall": "36", "gb_free": "15.5", "wall": "13646"} [2023-11-01 21:22:55,892][train_inner][INFO] - {"epoch": 18, "update": 18.0, "loss": "3.494", "ntokens": "3187.4", "nsentences": "42.24", "prob_perplexity": "65.765", "code_perplexity": "64.722", "temp": "1.389", "loss_0": "3.329", "loss_1": "0.129", "loss_2": "0.035", "accuracy": "0.45404", "wps": "17596.7", "ups": "5.52", "wpb": "3187.4", "bsz": "42.2", "num_updates": "73000", "lr": "8.27848e-05", "gnorm": "0.702", "loss_scale": "8", "train_wall": "36", "gb_free": "13.1", "wall": "13682"} [2023-11-01 21:22:56,239][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-01 21:22:56,241][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 21:22:56,259][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 21 [2023-11-01 21:23:21,835][valid][INFO] - {"epoch": 18, "valid_loss": "3.308", "valid_ntokens": "3164.56", "valid_nsentences": "44.1685", "valid_prob_perplexity": "64.828", "valid_code_perplexity": "64.039", "valid_temp": "1.388", "valid_loss_0": "3.142", "valid_loss_1": "0.13", "valid_loss_2": "0.037", "valid_accuracy": "0.49034", "valid_wps": "56075.6", "valid_wpb": "3164.6", "valid_bsz": "44.2", "valid_num_updates": "73002", "valid_best_loss": "3.308"} [2023-11-01 21:23:21,837][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 18 @ 73002 updates [2023-11-01 21:23:21,839][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-01 21:23:23,254][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-01 21:23:24,200][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_best.pt (epoch 18 @ 73002 updates, score 3.308) (writing took 2.362589519005269 seconds) [2023-11-01 21:23:24,200][fairseq_cli.train][INFO] - end of epoch 18 (average epoch stats below) [2023-11-01 21:23:24,203][train][INFO] - {"epoch": 18, "train_loss": "3.452", "train_ntokens": "3185.93", "train_nsentences": "44.2682", "train_prob_perplexity": "62.873", "train_code_perplexity": "62.08", "train_temp": "1.403", "train_loss_0": "3.287", "train_loss_1": "0.13", "train_loss_2": "0.035", "train_accuracy": "0.4639", "train_wps": "17065.6", "train_ups": "5.36", "train_wpb": "3185.9", "train_bsz": "44.3", "train_num_updates": "73002", "train_lr": "8.27843e-05", "train_gnorm": "0.702", "train_loss_scale": "8", "train_train_wall": "716", "train_gb_free": "14.5", "train_wall": "13710"} [2023-11-01 21:23:24,206][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 21:23:24,225][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 19 [2023-11-01 21:23:24,397][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-01 21:23:24,424][fairseq.trainer][INFO] - begin training epoch 19 [2023-11-01 21:23:24,424][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-01 21:24:00,435][train_inner][INFO] - {"epoch": 19, "update": 18.049, "loss": "3.485", "ntokens": "3209.08", "nsentences": "44.56", "prob_perplexity": "65.926", "code_perplexity": "64.869", "temp": "1.388", "loss_0": "3.32", "loss_1": "0.129", "loss_2": "0.036", "accuracy": "0.45693", "wps": "9944.4", "ups": "3.1", "wpb": "3209.1", "bsz": "44.6", "num_updates": "73200", "lr": "8.27342e-05", "gnorm": "0.701", "loss_scale": "8", "train_wall": "36", "gb_free": "13.7", "wall": "13747"} [2023-11-01 21:24:36,485][train_inner][INFO] - {"epoch": 19, "update": 18.098, "loss": "3.497", "ntokens": "3231.12", "nsentences": "44.04", "prob_perplexity": "66.489", "code_perplexity": "65.437", "temp": "1.386", "loss_0": "3.333", "loss_1": "0.129", "loss_2": "0.035", "accuracy": "0.45381", "wps": "17934.4", "ups": "5.55", "wpb": "3231.1", "bsz": "44", "num_updates": "73400", "lr": "8.26835e-05", "gnorm": "0.701", "loss_scale": "8", "train_wall": "35", "gb_free": "13.3", "wall": "13783"} [2023-11-01 21:25:11,735][train_inner][INFO] - {"epoch": 19, "update": 18.147, "loss": "3.504", "ntokens": "3130.64", "nsentences": "42.24", "prob_perplexity": "66.035", "code_perplexity": "64.951", "temp": "1.385", "loss_0": "3.34", "loss_1": "0.129", "loss_2": "0.035", "accuracy": "0.45325", "wps": "17763.6", "ups": "5.67", "wpb": "3130.6", "bsz": "42.2", "num_updates": "73600", "lr": "8.26329e-05", "gnorm": "0.705", "loss_scale": "8", "train_wall": "35", "gb_free": "13.4", "wall": "13818"} [2023-11-01 21:25:47,453][train_inner][INFO] - {"epoch": 19, "update": 18.197, "loss": "3.501", "ntokens": "3195.4", "nsentences": "43.76", "prob_perplexity": "66.405", "code_perplexity": "65.352", "temp": "1.384", "loss_0": "3.337", "loss_1": "0.129", "loss_2": "0.035", "accuracy": "0.45392", "wps": "17893.3", "ups": "5.6", "wpb": "3195.4", "bsz": "43.8", "num_updates": "73800", "lr": "8.25823e-05", "gnorm": "0.704", "loss_scale": "8", "train_wall": "35", "gb_free": "13.9", "wall": "13854"} [2023-11-01 21:26:23,238][train_inner][INFO] - {"epoch": 19, "update": 18.246, "loss": "3.461", "ntokens": "3139.64", "nsentences": "43.36", "prob_perplexity": "66.827", "code_perplexity": "65.732", "temp": "1.382", "loss_0": "3.296", "loss_1": "0.129", "loss_2": "0.035", "accuracy": "0.46022", "wps": "17548.2", "ups": "5.59", "wpb": "3139.6", "bsz": "43.4", "num_updates": "74000", "lr": "8.25316e-05", "gnorm": "0.712", "loss_scale": "8", "train_wall": "35", "gb_free": "13.5", "wall": "13889"} [2023-11-01 21:26:59,625][train_inner][INFO] - {"epoch": 19, "update": 18.295, "loss": "3.503", "ntokens": "3216.92", "nsentences": "43.2", "prob_perplexity": "67.074", "code_perplexity": "65.966", "temp": "1.381", "loss_0": "3.339", "loss_1": "0.129", "loss_2": "0.035", "accuracy": "0.45239", "wps": "17682.9", "ups": "5.5", "wpb": "3216.9", "bsz": "43.2", "num_updates": "74200", "lr": "8.2481e-05", "gnorm": "0.693", "loss_scale": "8", "train_wall": "36", "gb_free": "13.5", "wall": "13926"} [2023-11-01 21:27:35,693][train_inner][INFO] - {"epoch": 19, "update": 18.345, "loss": "3.403", "ntokens": "3175.4", "nsentences": "46.2", "prob_perplexity": "67.393", "code_perplexity": "66.238", "temp": "1.379", "loss_0": "3.239", "loss_1": "0.129", "loss_2": "0.035", "accuracy": "0.4712", "wps": "17608.9", "ups": "5.55", "wpb": "3175.4", "bsz": "46.2", "num_updates": "74400", "lr": "8.24304e-05", "gnorm": "0.703", "loss_scale": "8", "train_wall": "35", "gb_free": "14.5", "wall": "13962"} [2023-11-01 21:28:11,686][train_inner][INFO] - {"epoch": 19, "update": 18.394, "loss": "3.428", "ntokens": "3163.72", "nsentences": "45.16", "prob_perplexity": "67.573", "code_perplexity": "66.429", "temp": "1.378", "loss_0": "3.264", "loss_1": "0.129", "loss_2": "0.035", "accuracy": "0.46583", "wps": "17580.5", "ups": "5.56", "wpb": "3163.7", "bsz": "45.2", "num_updates": "74600", "lr": "8.23797e-05", "gnorm": "0.704", "loss_scale": "8", "train_wall": "35", "gb_free": "12.8", "wall": "13998"} [2023-11-01 21:28:47,878][train_inner][INFO] - {"epoch": 19, "update": 18.443, "loss": "3.423", "ntokens": "3157.2", "nsentences": "45.36", "prob_perplexity": "67.779", "code_perplexity": "66.582", "temp": "1.377", "loss_0": "3.26", "loss_1": "0.129", "loss_2": "0.034", "accuracy": "0.46768", "wps": "17447.8", "ups": "5.53", "wpb": "3157.2", "bsz": "45.4", "num_updates": "74800", "lr": "8.23291e-05", "gnorm": "0.708", "loss_scale": "8", "train_wall": "36", "gb_free": "13.6", "wall": "14034"} [2023-11-01 21:29:23,920][train_inner][INFO] - {"epoch": 19, "update": 18.493, "loss": "3.474", "ntokens": "3164.24", "nsentences": "42.6", "prob_perplexity": "67.64", "code_perplexity": "66.438", "temp": "1.375", "loss_0": "3.311", "loss_1": "0.129", "loss_2": "0.034", "accuracy": "0.45621", "wps": "17559.8", "ups": "5.55", "wpb": "3164.2", "bsz": "42.6", "num_updates": "75000", "lr": "8.22785e-05", "gnorm": "0.705", "loss_scale": "8", "train_wall": "35", "gb_free": "13.1", "wall": "14070"} [2023-11-01 21:29:23,921][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-01 21:29:23,923][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 21:29:23,944][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 22 [2023-11-01 21:29:49,412][valid][INFO] - {"epoch": 19, "valid_loss": "3.264", "valid_ntokens": "3148.49", "valid_nsentences": "44.1685", "valid_prob_perplexity": "66.7", "valid_code_perplexity": "65.804", "valid_temp": "1.375", "valid_loss_0": "3.102", "valid_loss_1": "0.129", "valid_loss_2": "0.033", "valid_accuracy": "0.49615", "valid_wps": "56041.2", "valid_wpb": "3148.5", "valid_bsz": "44.2", "valid_num_updates": "75000", "valid_best_loss": "3.264"} [2023-11-01 21:29:49,414][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 19 @ 75000 updates [2023-11-01 21:29:49,416][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_19_75000.pt [2023-11-01 21:29:50,754][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_19_75000.pt [2023-11-01 21:29:52,637][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_19_75000.pt (epoch 19 @ 75000 updates, score 3.264) (writing took 3.223350137937814 seconds) [2023-11-01 21:30:29,222][train_inner][INFO] - {"epoch": 19, "update": 18.542, "loss": "3.473", "ntokens": "3168.52", "nsentences": "43.08", "prob_perplexity": "67.989", "code_perplexity": "66.812", "temp": "1.374", "loss_0": "3.31", "loss_1": "0.129", "loss_2": "0.034", "accuracy": "0.45625", "wps": "9704.5", "ups": "3.06", "wpb": "3168.5", "bsz": "43.1", "num_updates": "75200", "lr": "8.22278e-05", "gnorm": "0.713", "loss_scale": "8", "train_wall": "36", "gb_free": "14", "wall": "14135"} [2023-11-01 21:31:05,589][train_inner][INFO] - {"epoch": 19, "update": 18.591, "loss": "3.378", "ntokens": "3156.6", "nsentences": "45.68", "prob_perplexity": "68.671", "code_perplexity": "67.411", "temp": "1.373", "loss_0": "3.216", "loss_1": "0.129", "loss_2": "0.034", "accuracy": "0.47317", "wps": "17360.6", "ups": "5.5", "wpb": "3156.6", "bsz": "45.7", "num_updates": "75400", "lr": "8.21772e-05", "gnorm": "0.701", "loss_scale": "8", "train_wall": "36", "gb_free": "13.2", "wall": "14172"} [2023-11-01 21:31:42,042][train_inner][INFO] - {"epoch": 19, "update": 18.641, "loss": "3.439", "ntokens": "3196.04", "nsentences": "45.08", "prob_perplexity": "68.543", "code_perplexity": "67.29", "temp": "1.371", "loss_0": "3.277", "loss_1": "0.129", "loss_2": "0.034", "accuracy": "0.46316", "wps": "17536.2", "ups": "5.49", "wpb": "3196", "bsz": "45.1", "num_updates": "75600", "lr": "8.21266e-05", "gnorm": "0.702", "loss_scale": "8", "train_wall": "36", "gb_free": "15.2", "wall": "14208"} [2023-11-01 21:32:18,242][train_inner][INFO] - {"epoch": 19, "update": 18.69, "loss": "3.468", "ntokens": "3212.68", "nsentences": "43.52", "prob_perplexity": "68.936", "code_perplexity": "67.662", "temp": "1.37", "loss_0": "3.305", "loss_1": "0.129", "loss_2": "0.034", "accuracy": "0.45713", "wps": "17750.5", "ups": "5.53", "wpb": "3212.7", "bsz": "43.5", "num_updates": "75800", "lr": "8.20759e-05", "gnorm": "0.701", "loss_scale": "8", "train_wall": "36", "gb_free": "13.5", "wall": "14244"} [2023-11-01 21:32:54,778][train_inner][INFO] - {"epoch": 19, "update": 18.739, "loss": "3.528", "ntokens": "3203.04", "nsentences": "41.88", "prob_perplexity": "69.039", "code_perplexity": "67.747", "temp": "1.368", "loss_0": "3.366", "loss_1": "0.129", "loss_2": "0.033", "accuracy": "0.44678", "wps": "17534.6", "ups": "5.47", "wpb": "3203", "bsz": "41.9", "num_updates": "76000", "lr": "8.20253e-05", "gnorm": "0.707", "loss_scale": "8", "train_wall": "36", "gb_free": "14.7", "wall": "14281"} [2023-11-01 21:33:31,699][train_inner][INFO] - {"epoch": 19, "update": 18.788, "loss": "3.466", "ntokens": "3184.04", "nsentences": "43.72", "prob_perplexity": "69.284", "code_perplexity": "67.992", "temp": "1.367", "loss_0": "3.304", "loss_1": "0.129", "loss_2": "0.033", "accuracy": "0.45786", "wps": "17248.9", "ups": "5.42", "wpb": "3184", "bsz": "43.7", "num_updates": "76200", "lr": "8.19747e-05", "gnorm": "0.702", "loss_scale": "8", "train_wall": "36", "gb_free": "13.3", "wall": "14318"} [2023-11-01 21:34:07,931][train_inner][INFO] - {"epoch": 19, "update": 18.838, "loss": "3.482", "ntokens": "3190", "nsentences": "43.84", "prob_perplexity": "69.606", "code_perplexity": "68.305", "temp": "1.366", "loss_0": "3.32", "loss_1": "0.129", "loss_2": "0.033", "accuracy": "0.45457", "wps": "17609.7", "ups": "5.52", "wpb": "3190", "bsz": "43.8", "num_updates": "76400", "lr": "8.19241e-05", "gnorm": "0.705", "loss_scale": "8", "train_wall": "36", "gb_free": "14.6", "wall": "14354"} [2023-11-01 21:34:44,435][train_inner][INFO] - {"epoch": 19, "update": 18.887, "loss": "3.455", "ntokens": "3217.32", "nsentences": "45.72", "prob_perplexity": "69.893", "code_perplexity": "68.563", "temp": "1.364", "loss_0": "3.293", "loss_1": "0.129", "loss_2": "0.033", "accuracy": "0.46002", "wps": "17628.7", "ups": "5.48", "wpb": "3217.3", "bsz": "45.7", "num_updates": "76600", "lr": "8.18734e-05", "gnorm": "0.7", "loss_scale": "8", "train_wall": "36", "gb_free": "13.2", "wall": "14391"} [2023-11-01 21:35:20,719][train_inner][INFO] - {"epoch": 19, "update": 18.936, "loss": "3.429", "ntokens": "3198.64", "nsentences": "45.76", "prob_perplexity": "70.096", "code_perplexity": "68.743", "temp": "1.363", "loss_0": "3.268", "loss_1": "0.128", "loss_2": "0.033", "accuracy": "0.46478", "wps": "17632.3", "ups": "5.51", "wpb": "3198.6", "bsz": "45.8", "num_updates": "76800", "lr": "8.18228e-05", "gnorm": "0.698", "loss_scale": "8", "train_wall": "36", "gb_free": "13.3", "wall": "14427"} [2023-11-01 21:35:57,457][train_inner][INFO] - {"epoch": 19, "update": 18.986, "loss": "3.423", "ntokens": "3196.48", "nsentences": "45.48", "prob_perplexity": "70.34", "code_perplexity": "68.934", "temp": "1.362", "loss_0": "3.262", "loss_1": "0.128", "loss_2": "0.033", "accuracy": "0.46432", "wps": "17402.6", "ups": "5.44", "wpb": "3196.5", "bsz": "45.5", "num_updates": "77000", "lr": "8.17722e-05", "gnorm": "0.704", "loss_scale": "8", "train_wall": "36", "gb_free": "17.4", "wall": "14464"} [2023-11-01 21:36:07,774][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-01 21:36:07,775][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 21:36:07,794][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 23 [2023-11-01 21:36:33,476][valid][INFO] - {"epoch": 19, "valid_loss": "3.275", "valid_ntokens": "3161.65", "valid_nsentences": "44.1685", "valid_prob_perplexity": "69.51", "valid_code_perplexity": "68.32", "valid_temp": "1.361", "valid_loss_0": "3.114", "valid_loss_1": "0.129", "valid_loss_2": "0.033", "valid_accuracy": "0.49369", "valid_wps": "55809.6", "valid_wpb": "3161.6", "valid_bsz": "44.2", "valid_num_updates": "77058", "valid_best_loss": "3.264"} [2023-11-01 21:36:33,478][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 19 @ 77058 updates [2023-11-01 21:36:33,480][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-01 21:36:34,895][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-01 21:36:34,942][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_last.pt (epoch 19 @ 77058 updates, score 3.275) (writing took 1.4643314089626074 seconds) [2023-11-01 21:36:34,943][fairseq_cli.train][INFO] - end of epoch 19 (average epoch stats below) [2023-11-01 21:36:34,945][train][INFO] - {"epoch": 19, "train_loss": "3.46", "train_ntokens": "3184.82", "train_nsentences": "44.2682", "train_prob_perplexity": "68.113", "train_code_perplexity": "66.906", "train_temp": "1.374", "train_loss_0": "3.297", "train_loss_1": "0.129", "train_loss_2": "0.034", "train_accuracy": "0.45973", "train_wps": "16336.1", "train_ups": "5.13", "train_wpb": "3184.8", "train_bsz": "44.3", "train_num_updates": "77058", "train_lr": "8.17575e-05", "train_gnorm": "0.704", "train_loss_scale": "8", "train_train_wall": "722", "train_gb_free": "12.9", "train_wall": "14501"} [2023-11-01 21:36:34,948][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 21:36:34,969][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 20 [2023-11-01 21:36:35,144][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-01 21:36:35,171][fairseq.trainer][INFO] - begin training epoch 20 [2023-11-01 21:36:35,171][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-01 21:37:00,581][train_inner][INFO] - {"epoch": 20, "update": 19.035, "loss": "3.42", "ntokens": "3188.04", "nsentences": "44.48", "prob_perplexity": "70.682", "code_perplexity": "69.299", "temp": "1.36", "loss_0": "3.259", "loss_1": "0.128", "loss_2": "0.032", "accuracy": "0.46434", "wps": "10103.5", "ups": "3.17", "wpb": "3188", "bsz": "44.5", "num_updates": "77200", "lr": "8.17215e-05", "gnorm": "0.7", "loss_scale": "8", "train_wall": "35", "gb_free": "14.6", "wall": "14527"} [2023-11-01 21:37:36,210][train_inner][INFO] - {"epoch": 20, "update": 19.084, "loss": "3.478", "ntokens": "3217.16", "nsentences": "43.12", "prob_perplexity": "70.92", "code_perplexity": "69.517", "temp": "1.359", "loss_0": "3.317", "loss_1": "0.128", "loss_2": "0.033", "accuracy": "0.4534", "wps": "18060.5", "ups": "5.61", "wpb": "3217.2", "bsz": "43.1", "num_updates": "77400", "lr": "8.16709e-05", "gnorm": "0.699", "loss_scale": "8", "train_wall": "35", "gb_free": "13.2", "wall": "14562"} [2023-11-01 21:38:12,227][train_inner][INFO] - {"epoch": 20, "update": 19.134, "loss": "3.493", "ntokens": "3205.84", "nsentences": "43.04", "prob_perplexity": "71.037", "code_perplexity": "69.625", "temp": "1.358", "loss_0": "3.333", "loss_1": "0.128", "loss_2": "0.033", "accuracy": "0.45223", "wps": "17802.7", "ups": "5.55", "wpb": "3205.8", "bsz": "43", "num_updates": "77600", "lr": "8.16203e-05", "gnorm": "0.704", "loss_scale": "8", "train_wall": "35", "gb_free": "12.8", "wall": "14598"} [2023-11-01 21:38:47,902][train_inner][INFO] - {"epoch": 20, "update": 19.183, "loss": "3.437", "ntokens": "3174.2", "nsentences": "46", "prob_perplexity": "71.322", "code_perplexity": "69.838", "temp": "1.356", "loss_0": "3.276", "loss_1": "0.128", "loss_2": "0.032", "accuracy": "0.46486", "wps": "17796.6", "ups": "5.61", "wpb": "3174.2", "bsz": "46", "num_updates": "77800", "lr": "8.15696e-05", "gnorm": "0.704", "loss_scale": "8", "train_wall": "35", "gb_free": "13.8", "wall": "14634"} [2023-11-01 21:39:24,199][train_inner][INFO] - {"epoch": 20, "update": 19.232, "loss": "3.453", "ntokens": "3164.52", "nsentences": "46.04", "prob_perplexity": "71.693", "code_perplexity": "70.206", "temp": "1.355", "loss_0": "3.292", "loss_1": "0.128", "loss_2": "0.032", "accuracy": "0.46153", "wps": "17437.7", "ups": "5.51", "wpb": "3164.5", "bsz": "46", "num_updates": "78000", "lr": "8.1519e-05", "gnorm": "0.708", "loss_scale": "8", "train_wall": "36", "gb_free": "12.7", "wall": "14670"} [2023-11-01 21:40:00,690][train_inner][INFO] - {"epoch": 20, "update": 19.282, "loss": "3.459", "ntokens": "3190.68", "nsentences": "43.8", "prob_perplexity": "72.269", "code_perplexity": "70.725", "temp": "1.353", "loss_0": "3.299", "loss_1": "0.128", "loss_2": "0.033", "accuracy": "0.45728", "wps": "17488.6", "ups": "5.48", "wpb": "3190.7", "bsz": "43.8", "num_updates": "78200", "lr": "8.14684e-05", "gnorm": "0.698", "loss_scale": "8", "train_wall": "36", "gb_free": "13.5", "wall": "14707"} [2023-11-01 21:40:36,456][train_inner][INFO] - {"epoch": 20, "update": 19.331, "loss": "3.52", "ntokens": "3233.12", "nsentences": "42.52", "prob_perplexity": "72.579", "code_perplexity": "71.064", "temp": "1.352", "loss_0": "3.36", "loss_1": "0.128", "loss_2": "0.032", "accuracy": "0.44683", "wps": "18080", "ups": "5.59", "wpb": "3233.1", "bsz": "42.5", "num_updates": "78400", "lr": "8.14177e-05", "gnorm": "0.696", "loss_scale": "8", "train_wall": "35", "gb_free": "14.3", "wall": "14743"} [2023-11-01 21:41:11,947][train_inner][INFO] - {"epoch": 20, "update": 19.38, "loss": "3.405", "ntokens": "3157.8", "nsentences": "45", "prob_perplexity": "72.436", "code_perplexity": "70.934", "temp": "1.351", "loss_0": "3.246", "loss_1": "0.128", "loss_2": "0.032", "accuracy": "0.46732", "wps": "17796", "ups": "5.64", "wpb": "3157.8", "bsz": "45", "num_updates": "78600", "lr": "8.13671e-05", "gnorm": "0.699", "loss_scale": "8", "train_wall": "35", "gb_free": "13.7", "wall": "14778"} [2023-11-01 21:41:47,631][train_inner][INFO] - {"epoch": 20, "update": 19.429, "loss": "3.383", "ntokens": "3163.84", "nsentences": "47.36", "prob_perplexity": "72.612", "code_perplexity": "71.118", "temp": "1.349", "loss_0": "3.223", "loss_1": "0.128", "loss_2": "0.032", "accuracy": "0.47359", "wps": "17733.9", "ups": "5.61", "wpb": "3163.8", "bsz": "47.4", "num_updates": "78800", "lr": "8.13165e-05", "gnorm": "0.704", "loss_scale": "8", "train_wall": "35", "gb_free": "15", "wall": "14814"} [2023-11-01 21:42:23,552][train_inner][INFO] - {"epoch": 20, "update": 19.479, "loss": "3.402", "ntokens": "3200.76", "nsentences": "44.84", "prob_perplexity": "73.131", "code_perplexity": "71.604", "temp": "1.348", "loss_0": "3.243", "loss_1": "0.128", "loss_2": "0.032", "accuracy": "0.46546", "wps": "17821.8", "ups": "5.57", "wpb": "3200.8", "bsz": "44.8", "num_updates": "79000", "lr": "8.12658e-05", "gnorm": "0.699", "loss_scale": "8", "train_wall": "35", "gb_free": "15.8", "wall": "14850"} [2023-11-01 21:42:59,703][train_inner][INFO] - {"epoch": 20, "update": 19.528, "loss": "3.416", "ntokens": "3203.64", "nsentences": "44.92", "prob_perplexity": "73.051", "code_perplexity": "71.514", "temp": "1.347", "loss_0": "3.256", "loss_1": "0.128", "loss_2": "0.032", "accuracy": "0.46531", "wps": "17724.9", "ups": "5.53", "wpb": "3203.6", "bsz": "44.9", "num_updates": "79200", "lr": "8.12152e-05", "gnorm": "0.705", "loss_scale": "8", "train_wall": "36", "gb_free": "13.3", "wall": "14886"} [2023-11-01 21:43:36,140][train_inner][INFO] - {"epoch": 20, "update": 19.577, "loss": "3.471", "ntokens": "3183.84", "nsentences": "42.2", "prob_perplexity": "73.856", "code_perplexity": "72.294", "temp": "1.345", "loss_0": "3.312", "loss_1": "0.128", "loss_2": "0.031", "accuracy": "0.45316", "wps": "17476.8", "ups": "5.49", "wpb": "3183.8", "bsz": "42.2", "num_updates": "79400", "lr": "8.11646e-05", "gnorm": "0.701", "loss_scale": "8", "train_wall": "36", "gb_free": "13.4", "wall": "14922"} [2023-11-01 21:44:12,299][train_inner][INFO] - {"epoch": 20, "update": 19.627, "loss": "3.435", "ntokens": "3169.48", "nsentences": "44.48", "prob_perplexity": "73.617", "code_perplexity": "72.066", "temp": "1.344", "loss_0": "3.276", "loss_1": "0.128", "loss_2": "0.031", "accuracy": "0.46168", "wps": "17531.7", "ups": "5.53", "wpb": "3169.5", "bsz": "44.5", "num_updates": "79600", "lr": "8.11139e-05", "gnorm": "0.708", "loss_scale": "8", "train_wall": "36", "gb_free": "13.3", "wall": "14959"} [2023-11-01 21:44:48,885][train_inner][INFO] - {"epoch": 20, "update": 19.676, "loss": "3.435", "ntokens": "3168.8", "nsentences": "42.76", "prob_perplexity": "74.112", "code_perplexity": "72.528", "temp": "1.343", "loss_0": "3.277", "loss_1": "0.128", "loss_2": "0.031", "accuracy": "0.45901", "wps": "17323.8", "ups": "5.47", "wpb": "3168.8", "bsz": "42.8", "num_updates": "79800", "lr": "8.10633e-05", "gnorm": "0.706", "loss_scale": "16", "train_wall": "36", "gb_free": "13.3", "wall": "14995"} [2023-11-01 21:45:25,069][train_inner][INFO] - {"epoch": 20, "update": 19.725, "loss": "3.426", "ntokens": "3191.64", "nsentences": "44.2", "prob_perplexity": "74.343", "code_perplexity": "72.738", "temp": "1.341", "loss_0": "3.267", "loss_1": "0.128", "loss_2": "0.031", "accuracy": "0.46174", "wps": "17642.3", "ups": "5.53", "wpb": "3191.6", "bsz": "44.2", "num_updates": "80000", "lr": "8.10127e-05", "gnorm": "0.695", "loss_scale": "16", "train_wall": "36", "gb_free": "14.2", "wall": "15031"} [2023-11-01 21:46:02,062][train_inner][INFO] - {"epoch": 20, "update": 19.775, "loss": "3.456", "ntokens": "3196.12", "nsentences": "43.88", "prob_perplexity": "74.57", "code_perplexity": "72.909", "temp": "1.34", "loss_0": "3.297", "loss_1": "0.127", "loss_2": "0.031", "accuracy": "0.45793", "wps": "17280.8", "ups": "5.41", "wpb": "3196.1", "bsz": "43.9", "num_updates": "80200", "lr": "8.0962e-05", "gnorm": "0.706", "loss_scale": "16", "train_wall": "36", "gb_free": "13.8", "wall": "15068"} [2023-11-01 21:46:38,872][train_inner][INFO] - {"epoch": 20, "update": 19.824, "loss": "3.492", "ntokens": "3206.76", "nsentences": "42.52", "prob_perplexity": "74.55", "code_perplexity": "72.939", "temp": "1.339", "loss_0": "3.333", "loss_1": "0.127", "loss_2": "0.031", "accuracy": "0.44968", "wps": "17424", "ups": "5.43", "wpb": "3206.8", "bsz": "42.5", "num_updates": "80400", "lr": "8.09114e-05", "gnorm": "0.706", "loss_scale": "16", "train_wall": "36", "gb_free": "13.7", "wall": "15105"} [2023-11-01 21:47:14,535][train_inner][INFO] - {"epoch": 20, "update": 19.873, "loss": "3.402", "ntokens": "3162.16", "nsentences": "44.92", "prob_perplexity": "75.1", "code_perplexity": "73.377", "temp": "1.337", "loss_0": "3.244", "loss_1": "0.127", "loss_2": "0.03", "accuracy": "0.46682", "wps": "17735.2", "ups": "5.61", "wpb": "3162.2", "bsz": "44.9", "num_updates": "80600", "lr": "8.08608e-05", "gnorm": "0.708", "loss_scale": "16", "train_wall": "35", "gb_free": "13", "wall": "15141"} [2023-11-01 21:47:50,435][train_inner][INFO] - {"epoch": 20, "update": 19.923, "loss": "3.41", "ntokens": "3173.52", "nsentences": "44.32", "prob_perplexity": "75.169", "code_perplexity": "73.435", "temp": "1.336", "loss_0": "3.253", "loss_1": "0.127", "loss_2": "0.03", "accuracy": "0.4642", "wps": "17680.7", "ups": "5.57", "wpb": "3173.5", "bsz": "44.3", "num_updates": "80800", "lr": "8.08101e-05", "gnorm": "0.703", "loss_scale": "16", "train_wall": "35", "gb_free": "13.9", "wall": "15177"} [2023-11-01 21:48:05,208][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 8.0 [2023-11-01 21:48:27,183][train_inner][INFO] - {"epoch": 20, "update": 19.972, "loss": "3.391", "ntokens": "3187.48", "nsentences": "45.04", "prob_perplexity": "75.507", "code_perplexity": "73.825", "temp": "1.335", "loss_0": "3.233", "loss_1": "0.127", "loss_2": "0.031", "accuracy": "0.46753", "wps": "17349.1", "ups": "5.44", "wpb": "3187.5", "bsz": "45", "num_updates": "81000", "lr": "8.07595e-05", "gnorm": "0.696", "loss_scale": "8", "train_wall": "36", "gb_free": "13.4", "wall": "15213"} [2023-11-01 21:48:47,199][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-01 21:48:47,201][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 21:48:47,220][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 24 [2023-11-01 21:49:12,933][valid][INFO] - {"epoch": 20, "valid_loss": "3.252", "valid_ntokens": "3166.23", "valid_nsentences": "44.1685", "valid_prob_perplexity": "75.499", "valid_code_perplexity": "73.849", "valid_temp": "1.333", "valid_loss_0": "3.091", "valid_loss_1": "0.127", "valid_loss_2": "0.033", "valid_accuracy": "0.49747", "valid_wps": "55790.9", "valid_wpb": "3166.2", "valid_bsz": "44.2", "valid_num_updates": "81113", "valid_best_loss": "3.252"} [2023-11-01 21:49:12,935][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 20 @ 81113 updates [2023-11-01 21:49:12,936][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-01 21:49:14,357][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-01 21:49:15,326][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_best.pt (epoch 20 @ 81113 updates, score 3.252) (writing took 2.3916617669165134 seconds) [2023-11-01 21:49:15,327][fairseq_cli.train][INFO] - end of epoch 20 (average epoch stats below) [2023-11-01 21:49:15,329][train][INFO] - {"epoch": 20, "train_loss": "3.439", "train_ntokens": "3187.28", "train_nsentences": "44.2693", "train_prob_perplexity": "73.233", "train_code_perplexity": "71.673", "train_temp": "1.347", "train_loss_0": "3.279", "train_loss_1": "0.128", "train_loss_2": "0.032", "train_accuracy": "0.46073", "train_wps": "16997.3", "train_ups": "5.33", "train_wpb": "3187.3", "train_bsz": "44.3", "train_num_updates": "81113", "train_lr": "8.07309e-05", "train_gnorm": "0.702", "train_loss_scale": "8", "train_train_wall": "719", "train_gb_free": "13.2", "train_wall": "15262"} [2023-11-01 21:49:15,332][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 21:49:15,351][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 21 [2023-11-01 21:49:15,524][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-01 21:49:15,551][fairseq.trainer][INFO] - begin training epoch 21 [2023-11-01 21:49:15,552][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-01 21:49:31,061][train_inner][INFO] - {"epoch": 21, "update": 20.021, "loss": "3.386", "ntokens": "3160.56", "nsentences": "44.96", "prob_perplexity": "75.54", "code_perplexity": "73.763", "temp": "1.333", "loss_0": "3.229", "loss_1": "0.127", "loss_2": "0.03", "accuracy": "0.46892", "wps": "9895.9", "ups": "3.13", "wpb": "3160.6", "bsz": "45", "num_updates": "81200", "lr": "8.07089e-05", "gnorm": "0.702", "loss_scale": "8", "train_wall": "35", "gb_free": "16.5", "wall": "15277"} [2023-11-01 21:50:06,463][train_inner][INFO] - {"epoch": 21, "update": 20.071, "loss": "3.445", "ntokens": "3187.4", "nsentences": "44.08", "prob_perplexity": "75.839", "code_perplexity": "74.131", "temp": "1.332", "loss_0": "3.288", "loss_1": "0.127", "loss_2": "0.03", "accuracy": "0.45769", "wps": "18015.6", "ups": "5.65", "wpb": "3187.4", "bsz": "44.1", "num_updates": "81400", "lr": "8.06582e-05", "gnorm": "0.72", "loss_scale": "8", "train_wall": "35", "gb_free": "13", "wall": "15313"} [2023-11-01 21:50:41,803][train_inner][INFO] - {"epoch": 21, "update": 20.12, "loss": "3.431", "ntokens": "3185.44", "nsentences": "45.04", "prob_perplexity": "76.105", "code_perplexity": "74.38", "temp": "1.331", "loss_0": "3.274", "loss_1": "0.127", "loss_2": "0.03", "accuracy": "0.46226", "wps": "18028.4", "ups": "5.66", "wpb": "3185.4", "bsz": "45", "num_updates": "81600", "lr": "8.06076e-05", "gnorm": "0.706", "loss_scale": "8", "train_wall": "35", "gb_free": "13", "wall": "15348"} [2023-11-01 21:51:17,571][train_inner][INFO] - {"epoch": 21, "update": 20.169, "loss": "3.429", "ntokens": "3189.92", "nsentences": "43.32", "prob_perplexity": "76.632", "code_perplexity": "74.791", "temp": "1.329", "loss_0": "3.272", "loss_1": "0.127", "loss_2": "0.03", "accuracy": "0.45938", "wps": "17838.2", "ups": "5.59", "wpb": "3189.9", "bsz": "43.3", "num_updates": "81800", "lr": "8.0557e-05", "gnorm": "0.707", "loss_scale": "8", "train_wall": "35", "gb_free": "12.7", "wall": "15384"} [2023-11-01 21:51:53,554][train_inner][INFO] - {"epoch": 21, "update": 20.219, "loss": "3.465", "ntokens": "3210.72", "nsentences": "43.96", "prob_perplexity": "76.793", "code_perplexity": "74.987", "temp": "1.328", "loss_0": "3.309", "loss_1": "0.127", "loss_2": "0.03", "accuracy": "0.45481", "wps": "17846.5", "ups": "5.56", "wpb": "3210.7", "bsz": "44", "num_updates": "82000", "lr": "8.05063e-05", "gnorm": "0.704", "loss_scale": "8", "train_wall": "35", "gb_free": "13.5", "wall": "15420"} [2023-11-01 21:52:29,761][train_inner][INFO] - {"epoch": 21, "update": 20.268, "loss": "3.407", "ntokens": "3192.24", "nsentences": "44.64", "prob_perplexity": "77.374", "code_perplexity": "75.53", "temp": "1.327", "loss_0": "3.25", "loss_1": "0.127", "loss_2": "0.029", "accuracy": "0.46378", "wps": "17634.2", "ups": "5.52", "wpb": "3192.2", "bsz": "44.6", "num_updates": "82200", "lr": "8.04557e-05", "gnorm": "0.706", "loss_scale": "8", "train_wall": "36", "gb_free": "13.3", "wall": "15456"} [2023-11-01 21:53:05,731][train_inner][INFO] - {"epoch": 21, "update": 20.317, "loss": "3.399", "ntokens": "3187.96", "nsentences": "45.68", "prob_perplexity": "77.48", "code_perplexity": "75.705", "temp": "1.325", "loss_0": "3.243", "loss_1": "0.127", "loss_2": "0.029", "accuracy": "0.46597", "wps": "17726.8", "ups": "5.56", "wpb": "3188", "bsz": "45.7", "num_updates": "82400", "lr": "8.04051e-05", "gnorm": "0.708", "loss_scale": "8", "train_wall": "35", "gb_free": "14.1", "wall": "15492"} [2023-11-01 21:53:41,723][train_inner][INFO] - {"epoch": 21, "update": 20.367, "loss": "3.411", "ntokens": "3166.4", "nsentences": "45.8", "prob_perplexity": "77.679", "code_perplexity": "75.821", "temp": "1.324", "loss_0": "3.255", "loss_1": "0.127", "loss_2": "0.029", "accuracy": "0.46553", "wps": "17596.4", "ups": "5.56", "wpb": "3166.4", "bsz": "45.8", "num_updates": "82600", "lr": "8.03544e-05", "gnorm": "0.707", "loss_scale": "8", "train_wall": "35", "gb_free": "14.2", "wall": "15528"} [2023-11-01 21:54:17,836][train_inner][INFO] - {"epoch": 21, "update": 20.416, "loss": "3.466", "ntokens": "3181.52", "nsentences": "42.84", "prob_perplexity": "78.087", "code_perplexity": "76.249", "temp": "1.323", "loss_0": "3.309", "loss_1": "0.127", "loss_2": "0.03", "accuracy": "0.45284", "wps": "17620.7", "ups": "5.54", "wpb": "3181.5", "bsz": "42.8", "num_updates": "82800", "lr": "8.03038e-05", "gnorm": "0.705", "loss_scale": "8", "train_wall": "35", "gb_free": "12.6", "wall": "15564"} [2023-11-01 21:54:53,963][train_inner][INFO] - {"epoch": 21, "update": 20.465, "loss": "3.435", "ntokens": "3238.44", "nsentences": "45.56", "prob_perplexity": "78.295", "code_perplexity": "76.5", "temp": "1.321", "loss_0": "3.279", "loss_1": "0.127", "loss_2": "0.029", "accuracy": "0.46028", "wps": "17929.2", "ups": "5.54", "wpb": "3238.4", "bsz": "45.6", "num_updates": "83000", "lr": "8.02532e-05", "gnorm": "0.695", "loss_scale": "8", "train_wall": "35", "gb_free": "13.5", "wall": "15600"} [2023-11-01 21:55:30,095][train_inner][INFO] - {"epoch": 21, "update": 20.515, "loss": "3.431", "ntokens": "3165.48", "nsentences": "42.56", "prob_perplexity": "78.401", "code_perplexity": "76.508", "temp": "1.32", "loss_0": "3.275", "loss_1": "0.127", "loss_2": "0.029", "accuracy": "0.45782", "wps": "17522.8", "ups": "5.54", "wpb": "3165.5", "bsz": "42.6", "num_updates": "83200", "lr": "8.02025e-05", "gnorm": "0.709", "loss_scale": "8", "train_wall": "36", "gb_free": "13.2", "wall": "15636"} [2023-11-01 21:56:05,843][train_inner][INFO] - {"epoch": 21, "update": 20.564, "loss": "3.464", "ntokens": "3205.4", "nsentences": "43.84", "prob_perplexity": "78.097", "code_perplexity": "76.286", "temp": "1.319", "loss_0": "3.308", "loss_1": "0.127", "loss_2": "0.029", "accuracy": "0.4537", "wps": "17934.5", "ups": "5.6", "wpb": "3205.4", "bsz": "43.8", "num_updates": "83400", "lr": "8.01519e-05", "gnorm": "0.707", "loss_scale": "8", "train_wall": "35", "gb_free": "13.1", "wall": "15672"} [2023-11-01 21:56:41,727][train_inner][INFO] - {"epoch": 21, "update": 20.613, "loss": "3.454", "ntokens": "3212.2", "nsentences": "42.88", "prob_perplexity": "79.012", "code_perplexity": "77.115", "temp": "1.317", "loss_0": "3.298", "loss_1": "0.126", "loss_2": "0.029", "accuracy": "0.45336", "wps": "17904.3", "ups": "5.57", "wpb": "3212.2", "bsz": "42.9", "num_updates": "83600", "lr": "8.01013e-05", "gnorm": "0.702", "loss_scale": "8", "train_wall": "35", "gb_free": "12.9", "wall": "15708"} [2023-11-01 21:57:18,603][train_inner][INFO] - {"epoch": 21, "update": 20.662, "loss": "3.435", "ntokens": "3213.68", "nsentences": "44.32", "prob_perplexity": "79.46", "code_perplexity": "77.57", "temp": "1.316", "loss_0": "3.28", "loss_1": "0.126", "loss_2": "0.029", "accuracy": "0.45914", "wps": "17430.7", "ups": "5.42", "wpb": "3213.7", "bsz": "44.3", "num_updates": "83800", "lr": "8.00506e-05", "gnorm": "0.705", "loss_scale": "8", "train_wall": "36", "gb_free": "14.2", "wall": "15745"} [2023-11-01 21:57:54,945][train_inner][INFO] - {"epoch": 21, "update": 20.712, "loss": "3.383", "ntokens": "3184.76", "nsentences": "45.84", "prob_perplexity": "79.74", "code_perplexity": "77.876", "temp": "1.315", "loss_0": "3.228", "loss_1": "0.126", "loss_2": "0.029", "accuracy": "0.46891", "wps": "17527.7", "ups": "5.5", "wpb": "3184.8", "bsz": "45.8", "num_updates": "84000", "lr": "8e-05", "gnorm": "0.7", "loss_scale": "8", "train_wall": "36", "gb_free": "13.2", "wall": "15781"} [2023-11-01 21:58:31,250][train_inner][INFO] - {"epoch": 21, "update": 20.761, "loss": "3.395", "ntokens": "3157.04", "nsentences": "43.72", "prob_perplexity": "79.426", "code_perplexity": "77.556", "temp": "1.313", "loss_0": "3.24", "loss_1": "0.126", "loss_2": "0.029", "accuracy": "0.46406", "wps": "17392.9", "ups": "5.51", "wpb": "3157", "bsz": "43.7", "num_updates": "84200", "lr": "7.99494e-05", "gnorm": "0.698", "loss_scale": "8", "train_wall": "36", "gb_free": "14.1", "wall": "15817"} [2023-11-01 21:59:07,446][train_inner][INFO] - {"epoch": 21, "update": 20.81, "loss": "3.434", "ntokens": "3215.64", "nsentences": "45.2", "prob_perplexity": "80.125", "code_perplexity": "78.229", "temp": "1.312", "loss_0": "3.279", "loss_1": "0.126", "loss_2": "0.028", "accuracy": "0.45917", "wps": "17768.9", "ups": "5.53", "wpb": "3215.6", "bsz": "45.2", "num_updates": "84400", "lr": "7.98987e-05", "gnorm": "0.697", "loss_scale": "8", "train_wall": "36", "gb_free": "13.3", "wall": "15854"} [2023-11-01 21:59:43,294][train_inner][INFO] - {"epoch": 21, "update": 20.86, "loss": "3.427", "ntokens": "3149.32", "nsentences": "43.56", "prob_perplexity": "80.018", "code_perplexity": "78.124", "temp": "1.311", "loss_0": "3.272", "loss_1": "0.126", "loss_2": "0.029", "accuracy": "0.46126", "wps": "17571.4", "ups": "5.58", "wpb": "3149.3", "bsz": "43.6", "num_updates": "84600", "lr": "7.98481e-05", "gnorm": "0.704", "loss_scale": "8", "train_wall": "35", "gb_free": "14.5", "wall": "15890"} [2023-11-01 22:00:19,240][train_inner][INFO] - {"epoch": 21, "update": 20.909, "loss": "3.408", "ntokens": "3157.8", "nsentences": "43.96", "prob_perplexity": "80.247", "code_perplexity": "78.338", "temp": "1.31", "loss_0": "3.253", "loss_1": "0.126", "loss_2": "0.029", "accuracy": "0.46255", "wps": "17571", "ups": "5.56", "wpb": "3157.8", "bsz": "44", "num_updates": "84800", "lr": "7.97975e-05", "gnorm": "0.708", "loss_scale": "8", "train_wall": "35", "gb_free": "13.7", "wall": "15925"} [2023-11-01 22:00:55,399][train_inner][INFO] - {"epoch": 21, "update": 20.958, "loss": "3.411", "ntokens": "3187.12", "nsentences": "43.48", "prob_perplexity": "80.956", "code_perplexity": "79.027", "temp": "1.308", "loss_0": "3.256", "loss_1": "0.126", "loss_2": "0.028", "accuracy": "0.46008", "wps": "17629.4", "ups": "5.53", "wpb": "3187.1", "bsz": "43.5", "num_updates": "85000", "lr": "7.97468e-05", "gnorm": "0.71", "loss_scale": "8", "train_wall": "36", "gb_free": "13.5", "wall": "15962"} [2023-11-01 22:01:26,316][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-01 22:01:26,317][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 22:01:26,334][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 25 [2023-11-01 22:01:51,801][valid][INFO] - {"epoch": 21, "valid_loss": "3.211", "valid_ntokens": "3153.01", "valid_nsentences": "44.1685", "valid_prob_perplexity": "80.419", "valid_code_perplexity": "78.899", "valid_temp": "1.306", "valid_loss_0": "3.056", "valid_loss_1": "0.126", "valid_loss_2": "0.029", "valid_accuracy": "0.5017", "valid_wps": "56071", "valid_wpb": "3153", "valid_bsz": "44.2", "valid_num_updates": "85169", "valid_best_loss": "3.211"} [2023-11-01 22:01:51,803][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 21 @ 85169 updates [2023-11-01 22:01:51,805][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-01 22:01:53,213][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-01 22:01:54,171][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_best.pt (epoch 21 @ 85169 updates, score 3.211) (writing took 2.368702960666269 seconds) [2023-11-01 22:01:54,172][fairseq_cli.train][INFO] - end of epoch 21 (average epoch stats below) [2023-11-01 22:01:54,174][train][INFO] - {"epoch": 21, "train_loss": "3.425", "train_ntokens": "3187.8", "train_nsentences": "44.2682", "train_prob_perplexity": "78.468", "train_code_perplexity": "76.623", "train_temp": "1.32", "train_loss_0": "3.27", "train_loss_1": "0.127", "train_loss_2": "0.029", "train_accuracy": "0.46064", "train_wps": "17038.7", "train_ups": "5.34", "train_wpb": "3187.8", "train_bsz": "44.3", "train_num_updates": "85169", "train_lr": "7.97041e-05", "train_gnorm": "0.705", "train_loss_scale": "8", "train_train_wall": "718", "train_gb_free": "14.2", "train_wall": "16020"} [2023-11-01 22:01:54,177][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 22:01:54,203][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 22 [2023-11-01 22:01:54,376][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-01 22:01:54,403][fairseq.trainer][INFO] - begin training epoch 22 [2023-11-01 22:01:54,404][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-01 22:02:00,228][train_inner][INFO] - {"epoch": 22, "update": 21.008, "loss": "3.394", "ntokens": "3201.24", "nsentences": "45.08", "prob_perplexity": "81.278", "code_perplexity": "79.372", "temp": "1.307", "loss_0": "3.24", "loss_1": "0.126", "loss_2": "0.028", "accuracy": "0.46629", "wps": "9876.4", "ups": "3.09", "wpb": "3201.2", "bsz": "45.1", "num_updates": "85200", "lr": "7.96962e-05", "gnorm": "0.696", "loss_scale": "8", "train_wall": "36", "gb_free": "14", "wall": "16026"} [2023-11-01 22:02:36,177][train_inner][INFO] - {"epoch": 22, "update": 21.057, "loss": "3.438", "ntokens": "3205.04", "nsentences": "44.32", "prob_perplexity": "81.592", "code_perplexity": "79.663", "temp": "1.306", "loss_0": "3.284", "loss_1": "0.126", "loss_2": "0.028", "accuracy": "0.4575", "wps": "17839.7", "ups": "5.57", "wpb": "3205", "bsz": "44.3", "num_updates": "85400", "lr": "7.96456e-05", "gnorm": "0.699", "loss_scale": "8", "train_wall": "35", "gb_free": "14.1", "wall": "16062"} [2023-11-01 22:03:11,658][train_inner][INFO] - {"epoch": 22, "update": 21.106, "loss": "3.365", "ntokens": "3147.56", "nsentences": "44.92", "prob_perplexity": "81.85", "code_perplexity": "79.924", "temp": "1.304", "loss_0": "3.21", "loss_1": "0.126", "loss_2": "0.028", "accuracy": "0.46922", "wps": "17743", "ups": "5.64", "wpb": "3147.6", "bsz": "44.9", "num_updates": "85600", "lr": "7.95949e-05", "gnorm": "0.702", "loss_scale": "8", "train_wall": "35", "gb_free": "13.7", "wall": "16098"} [2023-11-01 22:03:47,371][train_inner][INFO] - {"epoch": 22, "update": 21.156, "loss": "3.44", "ntokens": "3189.12", "nsentences": "43.16", "prob_perplexity": "82.01", "code_perplexity": "80.127", "temp": "1.303", "loss_0": "3.286", "loss_1": "0.126", "loss_2": "0.028", "accuracy": "0.45629", "wps": "17861", "ups": "5.6", "wpb": "3189.1", "bsz": "43.2", "num_updates": "85800", "lr": "7.95443e-05", "gnorm": "0.708", "loss_scale": "8", "train_wall": "35", "gb_free": "14", "wall": "16134"} [2023-11-01 22:04:22,770][train_inner][INFO] - {"epoch": 22, "update": 21.205, "loss": "3.366", "ntokens": "3138.72", "nsentences": "45.32", "prob_perplexity": "81.95", "code_perplexity": "80.061", "temp": "1.302", "loss_0": "3.212", "loss_1": "0.126", "loss_2": "0.027", "accuracy": "0.46992", "wps": "17734.1", "ups": "5.65", "wpb": "3138.7", "bsz": "45.3", "num_updates": "86000", "lr": "7.94937e-05", "gnorm": "0.711", "loss_scale": "8", "train_wall": "35", "gb_free": "13.7", "wall": "16169"} [2023-11-01 22:04:59,351][train_inner][INFO] - {"epoch": 22, "update": 21.254, "loss": "3.474", "ntokens": "3186.92", "nsentences": "41.08", "prob_perplexity": "82.413", "code_perplexity": "80.476", "temp": "1.3", "loss_0": "3.321", "loss_1": "0.126", "loss_2": "0.028", "accuracy": "0.44788", "wps": "17425", "ups": "5.47", "wpb": "3186.9", "bsz": "41.1", "num_updates": "86200", "lr": "7.9443e-05", "gnorm": "0.714", "loss_scale": "8", "train_wall": "36", "gb_free": "14", "wall": "16206"} [2023-11-01 22:05:35,320][train_inner][INFO] - {"epoch": 22, "update": 21.304, "loss": "3.349", "ntokens": "3172.04", "nsentences": "46.6", "prob_perplexity": "82.842", "code_perplexity": "80.933", "temp": "1.299", "loss_0": "3.197", "loss_1": "0.126", "loss_2": "0.027", "accuracy": "0.47348", "wps": "17639.1", "ups": "5.56", "wpb": "3172", "bsz": "46.6", "num_updates": "86400", "lr": "7.93924e-05", "gnorm": "0.705", "loss_scale": "8", "train_wall": "35", "gb_free": "13.4", "wall": "16242"} [2023-11-01 22:06:11,383][train_inner][INFO] - {"epoch": 22, "update": 21.353, "loss": "3.416", "ntokens": "3219.36", "nsentences": "43.36", "prob_perplexity": "82.953", "code_perplexity": "81.016", "temp": "1.298", "loss_0": "3.263", "loss_1": "0.126", "loss_2": "0.028", "accuracy": "0.45922", "wps": "17854.9", "ups": "5.55", "wpb": "3219.4", "bsz": "43.4", "num_updates": "86600", "lr": "7.93418e-05", "gnorm": "0.699", "loss_scale": "8", "train_wall": "35", "gb_free": "13.8", "wall": "16278"} [2023-11-01 22:06:47,763][train_inner][INFO] - {"epoch": 22, "update": 21.402, "loss": "3.438", "ntokens": "3204.04", "nsentences": "43", "prob_perplexity": "83.567", "code_perplexity": "81.594", "temp": "1.296", "loss_0": "3.285", "loss_1": "0.125", "loss_2": "0.027", "accuracy": "0.45601", "wps": "17615.1", "ups": "5.5", "wpb": "3204", "bsz": "43", "num_updates": "86800", "lr": "7.92911e-05", "gnorm": "0.699", "loss_scale": "8", "train_wall": "36", "gb_free": "13.1", "wall": "16314"} [2023-11-01 22:07:24,186][train_inner][INFO] - {"epoch": 22, "update": 21.451, "loss": "3.372", "ntokens": "3216.52", "nsentences": "45.56", "prob_perplexity": "84.1", "code_perplexity": "82.156", "temp": "1.295", "loss_0": "3.219", "loss_1": "0.125", "loss_2": "0.028", "accuracy": "0.46838", "wps": "17663.4", "ups": "5.49", "wpb": "3216.5", "bsz": "45.6", "num_updates": "87000", "lr": "7.92405e-05", "gnorm": "0.695", "loss_scale": "8", "train_wall": "36", "gb_free": "14.2", "wall": "16350"} [2023-11-01 22:07:48,760][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 [2023-11-01 22:08:00,200][train_inner][INFO] - {"epoch": 22, "update": 21.501, "loss": "3.437", "ntokens": "3178.44", "nsentences": "42.72", "prob_perplexity": "84.365", "code_perplexity": "82.367", "temp": "1.294", "loss_0": "3.284", "loss_1": "0.125", "loss_2": "0.027", "accuracy": "0.4558", "wps": "17651.9", "ups": "5.55", "wpb": "3178.4", "bsz": "42.7", "num_updates": "87200", "lr": "7.91899e-05", "gnorm": "0.707", "loss_scale": "4", "train_wall": "35", "gb_free": "14.2", "wall": "16386"} [2023-11-01 22:08:37,067][train_inner][INFO] - {"epoch": 22, "update": 21.55, "loss": "3.427", "ntokens": "3230.2", "nsentences": "43.2", "prob_perplexity": "84.547", "code_perplexity": "82.53", "temp": "1.293", "loss_0": "3.274", "loss_1": "0.125", "loss_2": "0.027", "accuracy": "0.45682", "wps": "17524.5", "ups": "5.43", "wpb": "3230.2", "bsz": "43.2", "num_updates": "87400", "lr": "7.91392e-05", "gnorm": "0.7", "loss_scale": "4", "train_wall": "36", "gb_free": "15", "wall": "16423"} [2023-11-01 22:09:13,428][train_inner][INFO] - {"epoch": 22, "update": 21.6, "loss": "3.406", "ntokens": "3185.08", "nsentences": "44.72", "prob_perplexity": "85.036", "code_perplexity": "83.05", "temp": "1.291", "loss_0": "3.254", "loss_1": "0.125", "loss_2": "0.027", "accuracy": "0.46209", "wps": "17520.5", "ups": "5.5", "wpb": "3185.1", "bsz": "44.7", "num_updates": "87600", "lr": "7.90886e-05", "gnorm": "0.702", "loss_scale": "4", "train_wall": "36", "gb_free": "13.6", "wall": "16460"} [2023-11-01 22:09:49,952][train_inner][INFO] - {"epoch": 22, "update": 21.649, "loss": "3.408", "ntokens": "3201.64", "nsentences": "44.76", "prob_perplexity": "84.576", "code_perplexity": "82.602", "temp": "1.29", "loss_0": "3.256", "loss_1": "0.125", "loss_2": "0.027", "accuracy": "0.46292", "wps": "17533.1", "ups": "5.48", "wpb": "3201.6", "bsz": "44.8", "num_updates": "87800", "lr": "7.9038e-05", "gnorm": "0.706", "loss_scale": "4", "train_wall": "36", "gb_free": "14.3", "wall": "16496"} [2023-11-01 22:10:26,383][train_inner][INFO] - {"epoch": 22, "update": 21.698, "loss": "3.404", "ntokens": "3190.6", "nsentences": "43.52", "prob_perplexity": "85.843", "code_perplexity": "83.808", "temp": "1.289", "loss_0": "3.252", "loss_1": "0.125", "loss_2": "0.027", "accuracy": "0.4604", "wps": "17516.7", "ups": "5.49", "wpb": "3190.6", "bsz": "43.5", "num_updates": "88000", "lr": "7.89873e-05", "gnorm": "0.703", "loss_scale": "4", "train_wall": "36", "gb_free": "14.4", "wall": "16533"} [2023-11-01 22:11:02,628][train_inner][INFO] - {"epoch": 22, "update": 21.748, "loss": "3.449", "ntokens": "3221.88", "nsentences": "43.72", "prob_perplexity": "86.019", "code_perplexity": "84.038", "temp": "1.287", "loss_0": "3.298", "loss_1": "0.125", "loss_2": "0.027", "accuracy": "0.45318", "wps": "17779.7", "ups": "5.52", "wpb": "3221.9", "bsz": "43.7", "num_updates": "88200", "lr": "7.89367e-05", "gnorm": "0.708", "loss_scale": "4", "train_wall": "36", "gb_free": "13.8", "wall": "16569"} [2023-11-01 22:11:38,403][train_inner][INFO] - {"epoch": 22, "update": 21.797, "loss": "3.33", "ntokens": "3163.6", "nsentences": "45.72", "prob_perplexity": "86.099", "code_perplexity": "84.12", "temp": "1.286", "loss_0": "3.178", "loss_1": "0.125", "loss_2": "0.027", "accuracy": "0.47454", "wps": "17686.8", "ups": "5.59", "wpb": "3163.6", "bsz": "45.7", "num_updates": "88400", "lr": "7.88861e-05", "gnorm": "0.7", "loss_scale": "4", "train_wall": "35", "gb_free": "13.6", "wall": "16605"} [2023-11-01 22:12:14,454][train_inner][INFO] - {"epoch": 22, "update": 21.846, "loss": "3.4", "ntokens": "3187.44", "nsentences": "44.32", "prob_perplexity": "86.411", "code_perplexity": "84.425", "temp": "1.285", "loss_0": "3.248", "loss_1": "0.125", "loss_2": "0.027", "accuracy": "0.46229", "wps": "17684.1", "ups": "5.55", "wpb": "3187.4", "bsz": "44.3", "num_updates": "88600", "lr": "7.88354e-05", "gnorm": "0.697", "loss_scale": "4", "train_wall": "35", "gb_free": "12.7", "wall": "16641"} [2023-11-01 22:12:50,802][train_inner][INFO] - {"epoch": 22, "update": 21.895, "loss": "3.371", "ntokens": "3190.12", "nsentences": "45.24", "prob_perplexity": "86.805", "code_perplexity": "84.797", "temp": "1.284", "loss_0": "3.22", "loss_1": "0.125", "loss_2": "0.027", "accuracy": "0.4672", "wps": "17554.1", "ups": "5.5", "wpb": "3190.1", "bsz": "45.2", "num_updates": "88800", "lr": "7.87848e-05", "gnorm": "0.698", "loss_scale": "4", "train_wall": "36", "gb_free": "13.6", "wall": "16677"} [2023-11-01 22:13:27,190][train_inner][INFO] - {"epoch": 22, "update": 21.945, "loss": "3.338", "ntokens": "3195.76", "nsentences": "46.52", "prob_perplexity": "87.277", "code_perplexity": "85.246", "temp": "1.282", "loss_0": "3.186", "loss_1": "0.125", "loss_2": "0.027", "accuracy": "0.4749", "wps": "17566", "ups": "5.5", "wpb": "3195.8", "bsz": "46.5", "num_updates": "89000", "lr": "7.87342e-05", "gnorm": "0.697", "loss_scale": "4", "train_wall": "36", "gb_free": "13.7", "wall": "16713"} [2023-11-01 22:14:03,708][train_inner][INFO] - {"epoch": 22, "update": 21.994, "loss": "3.43", "ntokens": "3194.08", "nsentences": "43.92", "prob_perplexity": "87.261", "code_perplexity": "85.24", "temp": "1.281", "loss_0": "3.279", "loss_1": "0.125", "loss_2": "0.026", "accuracy": "0.45773", "wps": "17494.1", "ups": "5.48", "wpb": "3194.1", "bsz": "43.9", "num_updates": "89200", "lr": "7.86835e-05", "gnorm": "0.703", "loss_scale": "4", "train_wall": "36", "gb_free": "14.7", "wall": "16750"} [2023-11-01 22:14:07,807][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-01 22:14:07,809][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 22:14:07,826][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 26 [2023-11-01 22:14:33,521][valid][INFO] - {"epoch": 22, "valid_loss": "3.215", "valid_ntokens": "3167.31", "valid_nsentences": "44.1685", "valid_prob_perplexity": "87.673", "valid_code_perplexity": "85.667", "valid_temp": "1.28", "valid_loss_0": "3.062", "valid_loss_1": "0.124", "valid_loss_2": "0.028", "valid_accuracy": "0.49903", "valid_wps": "55870.6", "valid_wpb": "3167.3", "valid_bsz": "44.2", "valid_num_updates": "89224", "valid_best_loss": "3.211"} [2023-11-01 22:14:33,523][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 22 @ 89224 updates [2023-11-01 22:14:33,524][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-01 22:14:34,957][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-01 22:14:35,004][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_last.pt (epoch 22 @ 89224 updates, score 3.215) (writing took 1.4817161527462304 seconds) [2023-11-01 22:14:35,005][fairseq_cli.train][INFO] - end of epoch 22 (average epoch stats below) [2023-11-01 22:14:35,007][train][INFO] - {"epoch": 22, "train_loss": "3.404", "train_ntokens": "3191", "train_nsentences": "44.2713", "train_prob_perplexity": "84.373", "train_code_perplexity": "82.405", "train_temp": "1.293", "train_loss_0": "3.251", "train_loss_1": "0.125", "train_loss_2": "0.027", "train_accuracy": "0.46212", "train_wps": "17007.1", "train_ups": "5.33", "train_wpb": "3191", "train_bsz": "44.3", "train_num_updates": "89224", "train_lr": "7.86775e-05", "train_gnorm": "0.703", "train_loss_scale": "4", "train_train_wall": "720", "train_gb_free": "15.4", "train_wall": "16781"} [2023-11-01 22:14:35,010][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 22:14:35,031][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 23 [2023-11-01 22:14:35,206][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-01 22:14:35,234][fairseq.trainer][INFO] - begin training epoch 23 [2023-11-01 22:14:35,235][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-01 22:15:06,448][train_inner][INFO] - {"epoch": 23, "update": 22.043, "loss": "3.398", "ntokens": "3169.88", "nsentences": "43.84", "prob_perplexity": "87.265", "code_perplexity": "85.246", "temp": "1.28", "loss_0": "3.247", "loss_1": "0.125", "loss_2": "0.026", "accuracy": "0.46179", "wps": "10105.2", "ups": "3.19", "wpb": "3169.9", "bsz": "43.8", "num_updates": "89400", "lr": "7.86329e-05", "gnorm": "0.708", "loss_scale": "4", "train_wall": "35", "gb_free": "13.4", "wall": "16813"} [2023-11-01 22:15:42,263][train_inner][INFO] - {"epoch": 23, "update": 22.093, "loss": "3.391", "ntokens": "3190.56", "nsentences": "44.4", "prob_perplexity": "88.129", "code_perplexity": "86.1", "temp": "1.278", "loss_0": "3.241", "loss_1": "0.124", "loss_2": "0.027", "accuracy": "0.46284", "wps": "17828.6", "ups": "5.59", "wpb": "3190.6", "bsz": "44.4", "num_updates": "89600", "lr": "7.85823e-05", "gnorm": "0.703", "loss_scale": "4", "train_wall": "35", "gb_free": "15.5", "wall": "16848"} [2023-11-01 22:16:18,366][train_inner][INFO] - {"epoch": 23, "update": 22.142, "loss": "3.441", "ntokens": "3230.52", "nsentences": "43.56", "prob_perplexity": "88.214", "code_perplexity": "86.181", "temp": "1.277", "loss_0": "3.29", "loss_1": "0.124", "loss_2": "0.026", "accuracy": "0.45347", "wps": "17897.2", "ups": "5.54", "wpb": "3230.5", "bsz": "43.6", "num_updates": "89800", "lr": "7.85316e-05", "gnorm": "0.701", "loss_scale": "4", "train_wall": "35", "gb_free": "14.1", "wall": "16885"} [2023-11-01 22:16:54,073][train_inner][INFO] - {"epoch": 23, "update": 22.191, "loss": "3.369", "ntokens": "3200.88", "nsentences": "47.6", "prob_perplexity": "89.143", "code_perplexity": "87.069", "temp": "1.276", "loss_0": "3.219", "loss_1": "0.124", "loss_2": "0.026", "accuracy": "0.47041", "wps": "17930.4", "ups": "5.6", "wpb": "3200.9", "bsz": "47.6", "num_updates": "90000", "lr": "7.8481e-05", "gnorm": "0.696", "loss_scale": "4", "train_wall": "35", "gb_free": "13", "wall": "16920"} [2023-11-01 22:17:30,335][train_inner][INFO] - {"epoch": 23, "update": 22.241, "loss": "3.428", "ntokens": "3191.68", "nsentences": "42.08", "prob_perplexity": "89.135", "code_perplexity": "87.087", "temp": "1.275", "loss_0": "3.277", "loss_1": "0.124", "loss_2": "0.026", "accuracy": "0.45436", "wps": "17604.1", "ups": "5.52", "wpb": "3191.7", "bsz": "42.1", "num_updates": "90200", "lr": "7.84304e-05", "gnorm": "0.704", "loss_scale": "4", "train_wall": "36", "gb_free": "12.6", "wall": "16957"} [2023-11-01 22:18:06,430][train_inner][INFO] - {"epoch": 23, "update": 22.29, "loss": "3.405", "ntokens": "3168.12", "nsentences": "43.2", "prob_perplexity": "89.28", "code_perplexity": "87.194", "temp": "1.273", "loss_0": "3.254", "loss_1": "0.124", "loss_2": "0.026", "accuracy": "0.46015", "wps": "17555.6", "ups": "5.54", "wpb": "3168.1", "bsz": "43.2", "num_updates": "90400", "lr": "7.83797e-05", "gnorm": "0.715", "loss_scale": "4", "train_wall": "35", "gb_free": "14.1", "wall": "16993"} [2023-11-01 22:18:42,546][train_inner][INFO] - {"epoch": 23, "update": 22.339, "loss": "3.36", "ntokens": "3156.72", "nsentences": "45.44", "prob_perplexity": "89.432", "code_perplexity": "87.329", "temp": "1.272", "loss_0": "3.21", "loss_1": "0.124", "loss_2": "0.026", "accuracy": "0.46975", "wps": "17481.8", "ups": "5.54", "wpb": "3156.7", "bsz": "45.4", "num_updates": "90600", "lr": "7.83291e-05", "gnorm": "0.711", "loss_scale": "4", "train_wall": "35", "gb_free": "13.7", "wall": "17029"} [2023-11-01 22:19:18,759][train_inner][INFO] - {"epoch": 23, "update": 22.389, "loss": "3.411", "ntokens": "3196.76", "nsentences": "43.76", "prob_perplexity": "90.06", "code_perplexity": "88.018", "temp": "1.271", "loss_0": "3.261", "loss_1": "0.124", "loss_2": "0.026", "accuracy": "0.45919", "wps": "17656.6", "ups": "5.52", "wpb": "3196.8", "bsz": "43.8", "num_updates": "90800", "lr": "7.82785e-05", "gnorm": "0.701", "loss_scale": "4", "train_wall": "36", "gb_free": "14", "wall": "17065"} [2023-11-01 22:19:54,752][train_inner][INFO] - {"epoch": 23, "update": 22.438, "loss": "3.377", "ntokens": "3179.44", "nsentences": "43.96", "prob_perplexity": "90.224", "code_perplexity": "88.15", "temp": "1.27", "loss_0": "3.227", "loss_1": "0.124", "loss_2": "0.026", "accuracy": "0.46389", "wps": "17668.1", "ups": "5.56", "wpb": "3179.4", "bsz": "44", "num_updates": "91000", "lr": "7.82278e-05", "gnorm": "0.705", "loss_scale": "4", "train_wall": "35", "gb_free": "13.7", "wall": "17101"} [2023-11-01 22:20:31,238][train_inner][INFO] - {"epoch": 23, "update": 22.487, "loss": "3.341", "ntokens": "3181.56", "nsentences": "47.2", "prob_perplexity": "90.523", "code_perplexity": "88.469", "temp": "1.268", "loss_0": "3.191", "loss_1": "0.124", "loss_2": "0.026", "accuracy": "0.47404", "wps": "17441.1", "ups": "5.48", "wpb": "3181.6", "bsz": "47.2", "num_updates": "91200", "lr": "7.81772e-05", "gnorm": "0.706", "loss_scale": "4", "train_wall": "36", "gb_free": "13.7", "wall": "17137"} [2023-11-01 22:21:07,701][train_inner][INFO] - {"epoch": 23, "update": 22.536, "loss": "3.426", "ntokens": "3238.64", "nsentences": "42.84", "prob_perplexity": "91.219", "code_perplexity": "89.12", "temp": "1.267", "loss_0": "3.277", "loss_1": "0.124", "loss_2": "0.026", "accuracy": "0.45354", "wps": "17764.9", "ups": "5.49", "wpb": "3238.6", "bsz": "42.8", "num_updates": "91400", "lr": "7.81266e-05", "gnorm": "0.695", "loss_scale": "4", "train_wall": "36", "gb_free": "13.9", "wall": "17174"} [2023-11-01 22:21:44,142][train_inner][INFO] - {"epoch": 23, "update": 22.586, "loss": "3.371", "ntokens": "3179.2", "nsentences": "45.92", "prob_perplexity": "91.233", "code_perplexity": "89.14", "temp": "1.266", "loss_0": "3.221", "loss_1": "0.124", "loss_2": "0.026", "accuracy": "0.46755", "wps": "17449.3", "ups": "5.49", "wpb": "3179.2", "bsz": "45.9", "num_updates": "91600", "lr": "7.80759e-05", "gnorm": "0.711", "loss_scale": "4", "train_wall": "36", "gb_free": "14", "wall": "17210"} [2023-11-01 22:22:20,254][train_inner][INFO] - {"epoch": 23, "update": 22.635, "loss": "3.377", "ntokens": "3186.08", "nsentences": "44.04", "prob_perplexity": "91.692", "code_perplexity": "89.548", "temp": "1.264", "loss_0": "3.228", "loss_1": "0.124", "loss_2": "0.026", "accuracy": "0.46387", "wps": "17646.9", "ups": "5.54", "wpb": "3186.1", "bsz": "44", "num_updates": "91800", "lr": "7.80253e-05", "gnorm": "0.704", "loss_scale": "4", "train_wall": "35", "gb_free": "14", "wall": "17246"} [2023-11-01 22:22:56,480][train_inner][INFO] - {"epoch": 23, "update": 22.684, "loss": "3.335", "ntokens": "3158.6", "nsentences": "45.16", "prob_perplexity": "92.238", "code_perplexity": "90.05", "temp": "1.263", "loss_0": "3.186", "loss_1": "0.123", "loss_2": "0.026", "accuracy": "0.47258", "wps": "17439.5", "ups": "5.52", "wpb": "3158.6", "bsz": "45.2", "num_updates": "92000", "lr": "7.79747e-05", "gnorm": "0.703", "loss_scale": "4", "train_wall": "36", "gb_free": "14", "wall": "17283"} [2023-11-01 22:23:33,312][train_inner][INFO] - {"epoch": 23, "update": 22.734, "loss": "3.361", "ntokens": "3210.36", "nsentences": "44.64", "prob_perplexity": "92.518", "code_perplexity": "90.373", "temp": "1.262", "loss_0": "3.212", "loss_1": "0.123", "loss_2": "0.025", "accuracy": "0.46595", "wps": "17433.1", "ups": "5.43", "wpb": "3210.4", "bsz": "44.6", "num_updates": "92200", "lr": "7.79241e-05", "gnorm": "0.698", "loss_scale": "4", "train_wall": "36", "gb_free": "12.5", "wall": "17320"} [2023-11-01 22:24:09,934][train_inner][INFO] - {"epoch": 23, "update": 22.783, "loss": "3.444", "ntokens": "3189.84", "nsentences": "41", "prob_perplexity": "92.162", "code_perplexity": "89.975", "temp": "1.261", "loss_0": "3.295", "loss_1": "0.123", "loss_2": "0.026", "accuracy": "0.45052", "wps": "17421.5", "ups": "5.46", "wpb": "3189.8", "bsz": "41", "num_updates": "92400", "lr": "7.78734e-05", "gnorm": "0.709", "loss_scale": "4", "train_wall": "36", "gb_free": "13.7", "wall": "17356"} [2023-11-01 22:24:46,577][train_inner][INFO] - {"epoch": 23, "update": 22.832, "loss": "3.374", "ntokens": "3210.56", "nsentences": "44.6", "prob_perplexity": "93.373", "code_perplexity": "91.151", "temp": "1.259", "loss_0": "3.225", "loss_1": "0.123", "loss_2": "0.025", "accuracy": "0.46521", "wps": "17524.5", "ups": "5.46", "wpb": "3210.6", "bsz": "44.6", "num_updates": "92600", "lr": "7.78228e-05", "gnorm": "0.705", "loss_scale": "4", "train_wall": "36", "gb_free": "13.7", "wall": "17393"} [2023-11-01 22:25:23,410][train_inner][INFO] - {"epoch": 23, "update": 22.882, "loss": "3.41", "ntokens": "3226.4", "nsentences": "43.68", "prob_perplexity": "93.056", "code_perplexity": "90.848", "temp": "1.258", "loss_0": "3.261", "loss_1": "0.123", "loss_2": "0.025", "accuracy": "0.45808", "wps": "17520.1", "ups": "5.43", "wpb": "3226.4", "bsz": "43.7", "num_updates": "92800", "lr": "7.77722e-05", "gnorm": "0.7", "loss_scale": "4", "train_wall": "36", "gb_free": "13.7", "wall": "17430"} [2023-11-01 22:26:00,093][train_inner][INFO] - {"epoch": 23, "update": 22.931, "loss": "3.343", "ntokens": "3186", "nsentences": "46.4", "prob_perplexity": "93.855", "code_perplexity": "91.616", "temp": "1.257", "loss_0": "3.195", "loss_1": "0.123", "loss_2": "0.025", "accuracy": "0.47247", "wps": "17371.7", "ups": "5.45", "wpb": "3186", "bsz": "46.4", "num_updates": "93000", "lr": "7.77215e-05", "gnorm": "0.699", "loss_scale": "4", "train_wall": "36", "gb_free": "13.7", "wall": "17466"} [2023-11-01 22:26:36,856][train_inner][INFO] - {"epoch": 23, "update": 22.98, "loss": "3.426", "ntokens": "3197.84", "nsentences": "41.44", "prob_perplexity": "93.542", "code_perplexity": "91.291", "temp": "1.256", "loss_0": "3.277", "loss_1": "0.123", "loss_2": "0.025", "accuracy": "0.45279", "wps": "17398.3", "ups": "5.44", "wpb": "3197.8", "bsz": "41.4", "num_updates": "93200", "lr": "7.76709e-05", "gnorm": "0.713", "loss_scale": "4", "train_wall": "36", "gb_free": "14.1", "wall": "17503"} [2023-11-01 22:26:51,582][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-01 22:26:51,583][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 22:26:51,603][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 27 [2023-11-01 22:27:17,156][valid][INFO] - {"epoch": 23, "valid_loss": "3.185", "valid_ntokens": "3170.86", "valid_nsentences": "44.1685", "valid_prob_perplexity": "94.021", "valid_code_perplexity": "91.916", "valid_temp": "1.255", "valid_loss_0": "3.036", "valid_loss_1": "0.123", "valid_loss_2": "0.025", "valid_accuracy": "0.50279", "valid_wps": "56209", "valid_wpb": "3170.9", "valid_bsz": "44.2", "valid_num_updates": "93280", "valid_best_loss": "3.185"} [2023-11-01 22:27:17,158][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 23 @ 93280 updates [2023-11-01 22:27:17,160][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-01 22:27:18,603][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-01 22:27:19,577][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_best.pt (epoch 23 @ 93280 updates, score 3.185) (writing took 2.419030208606273 seconds) [2023-11-01 22:27:19,578][fairseq_cli.train][INFO] - end of epoch 23 (average epoch stats below) [2023-11-01 22:27:19,580][train][INFO] - {"epoch": 23, "train_loss": "3.388", "train_ntokens": "3193.17", "train_nsentences": "44.2682", "train_prob_perplexity": "90.899", "train_code_perplexity": "88.78", "train_temp": "1.267", "train_loss_0": "3.238", "train_loss_1": "0.124", "train_loss_2": "0.026", "train_accuracy": "0.46288", "train_wps": "16939.6", "train_ups": "5.3", "train_wpb": "3193.2", "train_bsz": "44.3", "train_num_updates": "93280", "train_lr": "7.76506e-05", "train_gnorm": "0.704", "train_loss_scale": "4", "train_train_wall": "723", "train_gb_free": "15.1", "train_wall": "17546"} [2023-11-01 22:27:19,583][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 22:27:19,601][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 24 [2023-11-01 22:27:19,773][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-01 22:27:19,801][fairseq.trainer][INFO] - begin training epoch 24 [2023-11-01 22:27:19,802][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-01 22:27:41,192][train_inner][INFO] - {"epoch": 24, "update": 23.03, "loss": "3.333", "ntokens": "3174", "nsentences": "46.04", "prob_perplexity": "94.299", "code_perplexity": "92.131", "temp": "1.254", "loss_0": "3.185", "loss_1": "0.123", "loss_2": "0.025", "accuracy": "0.47324", "wps": "9867.3", "ups": "3.11", "wpb": "3174", "bsz": "46", "num_updates": "93400", "lr": "7.76203e-05", "gnorm": "0.707", "loss_scale": "4", "train_wall": "35", "gb_free": "14.4", "wall": "17567"} [2023-11-01 22:28:16,923][train_inner][INFO] - {"epoch": 24, "update": 23.079, "loss": "3.349", "ntokens": "3147.2", "nsentences": "44.6", "prob_perplexity": "94.376", "code_perplexity": "92.081", "temp": "1.253", "loss_0": "3.201", "loss_1": "0.123", "loss_2": "0.025", "accuracy": "0.46999", "wps": "17624.6", "ups": "5.6", "wpb": "3147.2", "bsz": "44.6", "num_updates": "93600", "lr": "7.75696e-05", "gnorm": "0.715", "loss_scale": "4", "train_wall": "35", "gb_free": "12.9", "wall": "17603"} [2023-11-01 22:28:53,281][train_inner][INFO] - {"epoch": 24, "update": 23.128, "loss": "3.379", "ntokens": "3246.8", "nsentences": "44.96", "prob_perplexity": "95.134", "code_perplexity": "92.866", "temp": "1.252", "loss_0": "3.231", "loss_1": "0.123", "loss_2": "0.025", "accuracy": "0.46309", "wps": "17861.1", "ups": "5.5", "wpb": "3246.8", "bsz": "45", "num_updates": "93800", "lr": "7.7519e-05", "gnorm": "0.697", "loss_scale": "4", "train_wall": "36", "gb_free": "13.6", "wall": "17640"} [2023-11-01 22:29:29,682][train_inner][INFO] - {"epoch": 24, "update": 23.178, "loss": "3.413", "ntokens": "3221", "nsentences": "42.52", "prob_perplexity": "95.606", "code_perplexity": "93.324", "temp": "1.251", "loss_0": "3.266", "loss_1": "0.123", "loss_2": "0.025", "accuracy": "0.45636", "wps": "17697.9", "ups": "5.49", "wpb": "3221", "bsz": "42.5", "num_updates": "94000", "lr": "7.74684e-05", "gnorm": "0.704", "loss_scale": "4", "train_wall": "36", "gb_free": "14.1", "wall": "17676"} [2023-11-01 22:30:05,624][train_inner][INFO] - {"epoch": 24, "update": 23.227, "loss": "3.339", "ntokens": "3158.92", "nsentences": "43.84", "prob_perplexity": "95.006", "code_perplexity": "92.747", "temp": "1.249", "loss_0": "3.192", "loss_1": "0.123", "loss_2": "0.025", "accuracy": "0.47049", "wps": "17579.3", "ups": "5.56", "wpb": "3158.9", "bsz": "43.8", "num_updates": "94200", "lr": "7.74177e-05", "gnorm": "0.711", "loss_scale": "4", "train_wall": "35", "gb_free": "14.3", "wall": "17712"} [2023-11-01 22:30:41,785][train_inner][INFO] - {"epoch": 24, "update": 23.276, "loss": "3.323", "ntokens": "3153.72", "nsentences": "44.4", "prob_perplexity": "95.565", "code_perplexity": "93.252", "temp": "1.248", "loss_0": "3.176", "loss_1": "0.123", "loss_2": "0.025", "accuracy": "0.47274", "wps": "17443.4", "ups": "5.53", "wpb": "3153.7", "bsz": "44.4", "num_updates": "94400", "lr": "7.73671e-05", "gnorm": "0.71", "loss_scale": "4", "train_wall": "36", "gb_free": "12.9", "wall": "17748"} [2023-11-01 22:31:17,717][train_inner][INFO] - {"epoch": 24, "update": 23.325, "loss": "3.348", "ntokens": "3167.12", "nsentences": "44.8", "prob_perplexity": "95.669", "code_perplexity": "93.404", "temp": "1.247", "loss_0": "3.2", "loss_1": "0.123", "loss_2": "0.025", "accuracy": "0.47002", "wps": "17630", "ups": "5.57", "wpb": "3167.1", "bsz": "44.8", "num_updates": "94600", "lr": "7.73165e-05", "gnorm": "0.711", "loss_scale": "4", "train_wall": "35", "gb_free": "14.7", "wall": "17784"} [2023-11-01 22:31:53,461][train_inner][INFO] - {"epoch": 24, "update": 23.375, "loss": "3.386", "ntokens": "3183.32", "nsentences": "43.24", "prob_perplexity": "95.839", "code_perplexity": "93.567", "temp": "1.246", "loss_0": "3.239", "loss_1": "0.123", "loss_2": "0.025", "accuracy": "0.46156", "wps": "17813", "ups": "5.6", "wpb": "3183.3", "bsz": "43.2", "num_updates": "94800", "lr": "7.72658e-05", "gnorm": "0.71", "loss_scale": "4", "train_wall": "35", "gb_free": "14.6", "wall": "17820"} [2023-11-01 22:32:29,556][train_inner][INFO] - {"epoch": 24, "update": 23.424, "loss": "3.331", "ntokens": "3194.88", "nsentences": "44.8", "prob_perplexity": "96.558", "code_perplexity": "94.248", "temp": "1.244", "loss_0": "3.183", "loss_1": "0.122", "loss_2": "0.025", "accuracy": "0.47139", "wps": "17703.3", "ups": "5.54", "wpb": "3194.9", "bsz": "44.8", "num_updates": "95000", "lr": "7.72152e-05", "gnorm": "0.708", "loss_scale": "4", "train_wall": "35", "gb_free": "13.5", "wall": "17856"} [2023-11-01 22:33:05,460][train_inner][INFO] - {"epoch": 24, "update": 23.473, "loss": "3.348", "ntokens": "3163.92", "nsentences": "44.92", "prob_perplexity": "96.81", "code_perplexity": "94.511", "temp": "1.243", "loss_0": "3.201", "loss_1": "0.122", "loss_2": "0.025", "accuracy": "0.46955", "wps": "17626.1", "ups": "5.57", "wpb": "3163.9", "bsz": "44.9", "num_updates": "95200", "lr": "7.71646e-05", "gnorm": "0.715", "loss_scale": "4", "train_wall": "35", "gb_free": "12.4", "wall": "17892"} [2023-11-01 22:33:42,000][train_inner][INFO] - {"epoch": 24, "update": 23.523, "loss": "3.365", "ntokens": "3220.32", "nsentences": "44.24", "prob_perplexity": "97.48", "code_perplexity": "95.116", "temp": "1.242", "loss_0": "3.218", "loss_1": "0.122", "loss_2": "0.025", "accuracy": "0.46449", "wps": "17627.5", "ups": "5.47", "wpb": "3220.3", "bsz": "44.2", "num_updates": "95400", "lr": "7.71139e-05", "gnorm": "0.707", "loss_scale": "4", "train_wall": "36", "gb_free": "14.7", "wall": "17928"} [2023-11-01 22:34:18,533][train_inner][INFO] - {"epoch": 24, "update": 23.572, "loss": "3.28", "ntokens": "3162.68", "nsentences": "46.36", "prob_perplexity": "97.669", "code_perplexity": "95.364", "temp": "1.241", "loss_0": "3.134", "loss_1": "0.122", "loss_2": "0.025", "accuracy": "0.47969", "wps": "17315.2", "ups": "5.47", "wpb": "3162.7", "bsz": "46.4", "num_updates": "95600", "lr": "7.70633e-05", "gnorm": "0.71", "loss_scale": "4", "train_wall": "36", "gb_free": "12.4", "wall": "17965"} [2023-11-01 22:34:54,805][train_inner][INFO] - {"epoch": 24, "update": 23.621, "loss": "3.333", "ntokens": "3197.32", "nsentences": "45.32", "prob_perplexity": "98.179", "code_perplexity": "95.802", "temp": "1.239", "loss_0": "3.186", "loss_1": "0.122", "loss_2": "0.024", "accuracy": "0.47045", "wps": "17630.6", "ups": "5.51", "wpb": "3197.3", "bsz": "45.3", "num_updates": "95800", "lr": "7.70127e-05", "gnorm": "0.703", "loss_scale": "4", "train_wall": "36", "gb_free": "13.8", "wall": "18001"} [2023-11-01 22:35:31,065][train_inner][INFO] - {"epoch": 24, "update": 23.671, "loss": "3.33", "ntokens": "3169.36", "nsentences": "45.2", "prob_perplexity": "98.336", "code_perplexity": "95.98", "temp": "1.238", "loss_0": "3.184", "loss_1": "0.122", "loss_2": "0.024", "accuracy": "0.47195", "wps": "17482.4", "ups": "5.52", "wpb": "3169.4", "bsz": "45.2", "num_updates": "96000", "lr": "7.6962e-05", "gnorm": "0.705", "loss_scale": "4", "train_wall": "36", "gb_free": "13.3", "wall": "18037"} [2023-11-01 22:36:07,338][train_inner][INFO] - {"epoch": 24, "update": 23.72, "loss": "3.306", "ntokens": "3210.12", "nsentences": "47.48", "prob_perplexity": "98.1", "code_perplexity": "95.795", "temp": "1.237", "loss_0": "3.16", "loss_1": "0.122", "loss_2": "0.024", "accuracy": "0.47671", "wps": "17700.9", "ups": "5.51", "wpb": "3210.1", "bsz": "47.5", "num_updates": "96200", "lr": "7.69114e-05", "gnorm": "0.704", "loss_scale": "4", "train_wall": "36", "gb_free": "12.9", "wall": "18074"} [2023-11-01 22:36:44,129][train_inner][INFO] - {"epoch": 24, "update": 23.769, "loss": "3.432", "ntokens": "3212.68", "nsentences": "40.64", "prob_perplexity": "98.87", "code_perplexity": "96.428", "temp": "1.236", "loss_0": "3.286", "loss_1": "0.122", "loss_2": "0.024", "accuracy": "0.45025", "wps": "17465.3", "ups": "5.44", "wpb": "3212.7", "bsz": "40.6", "num_updates": "96400", "lr": "7.68608e-05", "gnorm": "0.715", "loss_scale": "4", "train_wall": "36", "gb_free": "15.3", "wall": "18110"} [2023-11-01 22:37:20,175][train_inner][INFO] - {"epoch": 24, "update": 23.819, "loss": "3.296", "ntokens": "3154.08", "nsentences": "44.84", "prob_perplexity": "99.021", "code_perplexity": "96.699", "temp": "1.234", "loss_0": "3.15", "loss_1": "0.122", "loss_2": "0.024", "accuracy": "0.47717", "wps": "17501.7", "ups": "5.55", "wpb": "3154.1", "bsz": "44.8", "num_updates": "96600", "lr": "7.68101e-05", "gnorm": "0.719", "loss_scale": "4", "train_wall": "35", "gb_free": "13.9", "wall": "18146"} [2023-11-01 22:37:56,371][train_inner][INFO] - {"epoch": 24, "update": 23.868, "loss": "3.359", "ntokens": "3190.64", "nsentences": "43.2", "prob_perplexity": "99.788", "code_perplexity": "97.371", "temp": "1.233", "loss_0": "3.213", "loss_1": "0.122", "loss_2": "0.024", "accuracy": "0.4647", "wps": "17630.7", "ups": "5.53", "wpb": "3190.6", "bsz": "43.2", "num_updates": "96800", "lr": "7.67595e-05", "gnorm": "0.709", "loss_scale": "4", "train_wall": "36", "gb_free": "13.7", "wall": "18183"} [2023-11-01 22:38:32,715][train_inner][INFO] - {"epoch": 24, "update": 23.917, "loss": "3.35", "ntokens": "3186.24", "nsentences": "43.16", "prob_perplexity": "99.507", "code_perplexity": "97.011", "temp": "1.232", "loss_0": "3.205", "loss_1": "0.122", "loss_2": "0.024", "accuracy": "0.46643", "wps": "17535.1", "ups": "5.5", "wpb": "3186.2", "bsz": "43.2", "num_updates": "97000", "lr": "7.67089e-05", "gnorm": "0.715", "loss_scale": "4", "train_wall": "36", "gb_free": "13.9", "wall": "18219"} [2023-11-01 22:39:08,575][train_inner][INFO] - {"epoch": 24, "update": 23.966, "loss": "3.41", "ntokens": "3155.84", "nsentences": "42.36", "prob_perplexity": "100.083", "code_perplexity": "97.653", "temp": "1.231", "loss_0": "3.265", "loss_1": "0.122", "loss_2": "0.023", "accuracy": "0.45679", "wps": "17602", "ups": "5.58", "wpb": "3155.8", "bsz": "42.4", "num_updates": "97200", "lr": "7.66582e-05", "gnorm": "0.732", "loss_scale": "4", "train_wall": "35", "gb_free": "13.5", "wall": "18255"} [2023-11-01 22:39:32,983][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-01 22:39:32,984][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 22:39:33,003][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 28 [2023-11-01 22:39:58,711][valid][INFO] - {"epoch": 24, "valid_loss": "3.147", "valid_ntokens": "3145.93", "valid_nsentences": "44.1685", "valid_prob_perplexity": "99.669", "valid_code_perplexity": "97.264", "valid_temp": "1.229", "valid_loss_0": "3", "valid_loss_1": "0.122", "valid_loss_2": "0.025", "valid_accuracy": "0.50828", "valid_wps": "55510.7", "valid_wpb": "3145.9", "valid_bsz": "44.2", "valid_num_updates": "97336", "valid_best_loss": "3.147"} [2023-11-01 22:39:58,713][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 24 @ 97336 updates [2023-11-01 22:39:58,715][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-01 22:40:00,143][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-01 22:40:01,123][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_best.pt (epoch 24 @ 97336 updates, score 3.147) (writing took 2.409964661113918 seconds) [2023-11-01 22:40:01,123][fairseq_cli.train][INFO] - end of epoch 24 (average epoch stats below) [2023-11-01 22:40:01,126][train][INFO] - {"epoch": 24, "train_loss": "3.353", "train_ntokens": "3184.45", "train_nsentences": "44.2682", "train_prob_perplexity": "97.263", "train_code_perplexity": "94.929", "train_temp": "1.242", "train_loss_0": "3.206", "train_loss_1": "0.122", "train_loss_2": "0.024", "train_accuracy": "0.46744", "train_wps": "16960.4", "train_ups": "5.33", "train_wpb": "3184.4", "train_bsz": "44.3", "train_num_updates": "97336", "train_lr": "7.66238e-05", "train_gnorm": "0.711", "train_loss_scale": "4", "train_train_wall": "720", "train_gb_free": "14.4", "train_wall": "18307"} [2023-11-01 22:40:01,129][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 22:40:01,147][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 25 [2023-11-01 22:40:01,317][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-01 22:40:01,346][fairseq.trainer][INFO] - begin training epoch 25 [2023-11-01 22:40:01,346][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-01 22:40:12,785][train_inner][INFO] - {"epoch": 25, "update": 24.016, "loss": "3.394", "ntokens": "3208.96", "nsentences": "42.16", "prob_perplexity": "100.582", "code_perplexity": "98.069", "temp": "1.23", "loss_0": "3.249", "loss_1": "0.122", "loss_2": "0.024", "accuracy": "0.4578", "wps": "9995.5", "ups": "3.11", "wpb": "3209", "bsz": "42.2", "num_updates": "97400", "lr": "7.66076e-05", "gnorm": "0.719", "loss_scale": "4", "train_wall": "35", "gb_free": "13.8", "wall": "18319"} [2023-11-01 22:40:48,758][train_inner][INFO] - {"epoch": 25, "update": 24.065, "loss": "3.37", "ntokens": "3197.8", "nsentences": "43.28", "prob_perplexity": "100.456", "code_perplexity": "98.028", "temp": "1.228", "loss_0": "3.225", "loss_1": "0.122", "loss_2": "0.023", "accuracy": "0.46279", "wps": "17779.8", "ups": "5.56", "wpb": "3197.8", "bsz": "43.3", "num_updates": "97600", "lr": "7.6557e-05", "gnorm": "0.714", "loss_scale": "4", "train_wall": "35", "gb_free": "13.3", "wall": "18355"} [2023-11-01 22:41:23,778][train_inner][INFO] - {"epoch": 25, "update": 24.114, "loss": "3.328", "ntokens": "3151.64", "nsentences": "44.16", "prob_perplexity": "101.048", "code_perplexity": "98.638", "temp": "1.227", "loss_0": "3.183", "loss_1": "0.121", "loss_2": "0.024", "accuracy": "0.47051", "wps": "18007.7", "ups": "5.71", "wpb": "3151.6", "bsz": "44.2", "num_updates": "97800", "lr": "7.65063e-05", "gnorm": "0.716", "loss_scale": "4", "train_wall": "34", "gb_free": "14.7", "wall": "18390"} [2023-11-01 22:41:59,618][train_inner][INFO] - {"epoch": 25, "update": 24.164, "loss": "3.339", "ntokens": "3199.92", "nsentences": "44.72", "prob_perplexity": "101.854", "code_perplexity": "99.299", "temp": "1.226", "loss_0": "3.195", "loss_1": "0.121", "loss_2": "0.024", "accuracy": "0.46897", "wps": "17857.6", "ups": "5.58", "wpb": "3199.9", "bsz": "44.7", "num_updates": "98000", "lr": "7.64557e-05", "gnorm": "0.713", "loss_scale": "4", "train_wall": "35", "gb_free": "13.2", "wall": "18426"} [2023-11-01 22:42:35,339][train_inner][INFO] - {"epoch": 25, "update": 24.213, "loss": "3.318", "ntokens": "3181.24", "nsentences": "44.2", "prob_perplexity": "102.166", "code_perplexity": "99.665", "temp": "1.225", "loss_0": "3.173", "loss_1": "0.121", "loss_2": "0.023", "accuracy": "0.47126", "wps": "17812.6", "ups": "5.6", "wpb": "3181.2", "bsz": "44.2", "num_updates": "98200", "lr": "7.64051e-05", "gnorm": "0.71", "loss_scale": "4", "train_wall": "35", "gb_free": "12.9", "wall": "18462"} [2023-11-01 22:43:11,353][train_inner][INFO] - {"epoch": 25, "update": 24.262, "loss": "3.323", "ntokens": "3230.48", "nsentences": "44.88", "prob_perplexity": "102.757", "code_perplexity": "100.19", "temp": "1.223", "loss_0": "3.178", "loss_1": "0.121", "loss_2": "0.024", "accuracy": "0.47045", "wps": "17941.1", "ups": "5.55", "wpb": "3230.5", "bsz": "44.9", "num_updates": "98400", "lr": "7.63544e-05", "gnorm": "0.698", "loss_scale": "4", "train_wall": "35", "gb_free": "14.2", "wall": "18498"} [2023-11-01 22:43:47,301][train_inner][INFO] - {"epoch": 25, "update": 24.312, "loss": "3.363", "ntokens": "3194.24", "nsentences": "42.92", "prob_perplexity": "102.757", "code_perplexity": "100.175", "temp": "1.222", "loss_0": "3.219", "loss_1": "0.121", "loss_2": "0.023", "accuracy": "0.4633", "wps": "17772.7", "ups": "5.56", "wpb": "3194.2", "bsz": "42.9", "num_updates": "98600", "lr": "7.63038e-05", "gnorm": "0.728", "loss_scale": "4", "train_wall": "35", "gb_free": "12.6", "wall": "18534"} [2023-11-01 22:44:23,426][train_inner][INFO] - {"epoch": 25, "update": 24.361, "loss": "3.34", "ntokens": "3176.24", "nsentences": "43", "prob_perplexity": "102.937", "code_perplexity": "100.323", "temp": "1.221", "loss_0": "3.195", "loss_1": "0.121", "loss_2": "0.024", "accuracy": "0.46696", "wps": "17585.6", "ups": "5.54", "wpb": "3176.2", "bsz": "43", "num_updates": "98800", "lr": "7.62532e-05", "gnorm": "0.72", "loss_scale": "4", "train_wall": "35", "gb_free": "13.5", "wall": "18570"} [2023-11-01 22:44:59,671][train_inner][INFO] - {"epoch": 25, "update": 24.41, "loss": "3.352", "ntokens": "3219.8", "nsentences": "43.52", "prob_perplexity": "103.586", "code_perplexity": "101.053", "temp": "1.22", "loss_0": "3.208", "loss_1": "0.121", "loss_2": "0.023", "accuracy": "0.46448", "wps": "17768", "ups": "5.52", "wpb": "3219.8", "bsz": "43.5", "num_updates": "99000", "lr": "7.62025e-05", "gnorm": "0.711", "loss_scale": "4", "train_wall": "36", "gb_free": "14.6", "wall": "18606"} [2023-11-01 22:45:35,535][train_inner][INFO] - {"epoch": 25, "update": 24.46, "loss": "3.268", "ntokens": "3184.24", "nsentences": "48.36", "prob_perplexity": "104.115", "code_perplexity": "101.62", "temp": "1.219", "loss_0": "3.125", "loss_1": "0.121", "loss_2": "0.023", "accuracy": "0.48256", "wps": "17758.4", "ups": "5.58", "wpb": "3184.2", "bsz": "48.4", "num_updates": "99200", "lr": "7.61519e-05", "gnorm": "0.704", "loss_scale": "4", "train_wall": "35", "gb_free": "13", "wall": "18642"} [2023-11-01 22:46:11,818][train_inner][INFO] - {"epoch": 25, "update": 24.509, "loss": "3.339", "ntokens": "3170.08", "nsentences": "42.52", "prob_perplexity": "103.931", "code_perplexity": "101.377", "temp": "1.217", "loss_0": "3.195", "loss_1": "0.121", "loss_2": "0.023", "accuracy": "0.46546", "wps": "17475.5", "ups": "5.51", "wpb": "3170.1", "bsz": "42.5", "num_updates": "99400", "lr": "7.61013e-05", "gnorm": "0.712", "loss_scale": "4", "train_wall": "36", "gb_free": "14.1", "wall": "18678"} [2023-11-01 22:46:48,185][train_inner][INFO] - {"epoch": 25, "update": 24.558, "loss": "3.332", "ntokens": "3205.96", "nsentences": "45.04", "prob_perplexity": "104.743", "code_perplexity": "102.167", "temp": "1.216", "loss_0": "3.188", "loss_1": "0.121", "loss_2": "0.023", "accuracy": "0.47", "wps": "17631.8", "ups": "5.5", "wpb": "3206", "bsz": "45", "num_updates": "99600", "lr": "7.60506e-05", "gnorm": "0.707", "loss_scale": "4", "train_wall": "36", "gb_free": "14.2", "wall": "18714"} [2023-11-01 22:47:24,290][train_inner][INFO] - {"epoch": 25, "update": 24.607, "loss": "3.301", "ntokens": "3188.44", "nsentences": "44.88", "prob_perplexity": "104.616", "code_perplexity": "102.191", "temp": "1.215", "loss_0": "3.158", "loss_1": "0.121", "loss_2": "0.023", "accuracy": "0.47436", "wps": "17663.2", "ups": "5.54", "wpb": "3188.4", "bsz": "44.9", "num_updates": "99800", "lr": "7.6e-05", "gnorm": "0.71", "loss_scale": "4", "train_wall": "35", "gb_free": "15", "wall": "18751"} [2023-11-01 22:48:00,757][train_inner][INFO] - {"epoch": 25, "update": 24.657, "loss": "3.381", "ntokens": "3194.44", "nsentences": "42.84", "prob_perplexity": "104.87", "code_perplexity": "102.349", "temp": "1.214", "loss_0": "3.237", "loss_1": "0.121", "loss_2": "0.023", "accuracy": "0.46036", "wps": "17521.1", "ups": "5.48", "wpb": "3194.4", "bsz": "42.8", "num_updates": "100000", "lr": "7.59494e-05", "gnorm": "0.724", "loss_scale": "4", "train_wall": "36", "gb_free": "13.9", "wall": "18787"} [2023-11-01 22:48:00,758][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-01 22:48:00,760][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 22:48:00,780][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 29 [2023-11-01 22:48:26,915][valid][INFO] - {"epoch": 25, "valid_loss": "3.128", "valid_ntokens": "3164.22", "valid_nsentences": "44.1685", "valid_prob_perplexity": "104.327", "valid_code_perplexity": "102.065", "valid_temp": "1.213", "valid_loss_0": "2.984", "valid_loss_1": "0.121", "valid_loss_2": "0.023", "valid_accuracy": "0.50984", "valid_wps": "54894.1", "valid_wpb": "3164.2", "valid_bsz": "44.2", "valid_num_updates": "100000", "valid_best_loss": "3.128"} [2023-11-01 22:48:26,917][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 25 @ 100000 updates [2023-11-01 22:48:26,919][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_25_100000.pt [2023-11-01 22:48:28,264][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_25_100000.pt [2023-11-01 22:48:30,141][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_25_100000.pt (epoch 25 @ 100000 updates, score 3.128) (writing took 3.2242278503254056 seconds) [2023-11-01 22:49:06,804][train_inner][INFO] - {"epoch": 25, "update": 24.706, "loss": "3.293", "ntokens": "3212.24", "nsentences": "48.12", "prob_perplexity": "105.364", "code_perplexity": "102.855", "temp": "1.212", "loss_0": "3.149", "loss_1": "0.121", "loss_2": "0.023", "accuracy": "0.47916", "wps": "9727.4", "ups": "3.03", "wpb": "3212.2", "bsz": "48.1", "num_updates": "100200", "lr": "7.58987e-05", "gnorm": "0.704", "loss_scale": "4", "train_wall": "36", "gb_free": "13.6", "wall": "18853"} [2023-11-01 22:49:43,225][train_inner][INFO] - {"epoch": 25, "update": 24.755, "loss": "3.318", "ntokens": "3185.64", "nsentences": "45.08", "prob_perplexity": "105.494", "code_perplexity": "102.987", "temp": "1.211", "loss_0": "3.175", "loss_1": "0.12", "loss_2": "0.023", "accuracy": "0.47229", "wps": "17494.4", "ups": "5.49", "wpb": "3185.6", "bsz": "45.1", "num_updates": "100400", "lr": "7.58481e-05", "gnorm": "0.71", "loss_scale": "4", "train_wall": "36", "gb_free": "14.2", "wall": "18889"} [2023-11-01 22:49:54,714][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 [2023-11-01 22:50:19,094][train_inner][INFO] - {"epoch": 25, "update": 24.805, "loss": "3.31", "ntokens": "3161", "nsentences": "44.8", "prob_perplexity": "105.645", "code_perplexity": "103.137", "temp": "1.21", "loss_0": "3.166", "loss_1": "0.12", "loss_2": "0.023", "accuracy": "0.47451", "wps": "17626.3", "ups": "5.58", "wpb": "3161", "bsz": "44.8", "num_updates": "100600", "lr": "7.57975e-05", "gnorm": "0.714", "loss_scale": "2", "train_wall": "35", "gb_free": "13.9", "wall": "18925"} [2023-11-01 22:50:55,200][train_inner][INFO] - {"epoch": 25, "update": 24.854, "loss": "3.279", "ntokens": "3159.8", "nsentences": "44.36", "prob_perplexity": "106.494", "code_perplexity": "103.93", "temp": "1.209", "loss_0": "3.135", "loss_1": "0.12", "loss_2": "0.023", "accuracy": "0.47826", "wps": "17504", "ups": "5.54", "wpb": "3159.8", "bsz": "44.4", "num_updates": "100800", "lr": "7.57468e-05", "gnorm": "0.712", "loss_scale": "2", "train_wall": "35", "gb_free": "14.6", "wall": "18961"} [2023-11-01 22:51:31,719][train_inner][INFO] - {"epoch": 25, "update": 24.904, "loss": "3.315", "ntokens": "3171.32", "nsentences": "43.24", "prob_perplexity": "106.989", "code_perplexity": "104.516", "temp": "1.208", "loss_0": "3.172", "loss_1": "0.12", "loss_2": "0.023", "accuracy": "0.47153", "wps": "17368.9", "ups": "5.48", "wpb": "3171.3", "bsz": "43.2", "num_updates": "101000", "lr": "7.56962e-05", "gnorm": "0.718", "loss_scale": "2", "train_wall": "36", "gb_free": "14.3", "wall": "18998"} [2023-11-01 22:52:07,907][train_inner][INFO] - {"epoch": 25, "update": 24.953, "loss": "3.393", "ntokens": "3197.16", "nsentences": "40.64", "prob_perplexity": "106.984", "code_perplexity": "104.41", "temp": "1.206", "loss_0": "3.25", "loss_1": "0.12", "loss_2": "0.023", "accuracy": "0.45492", "wps": "17670.8", "ups": "5.53", "wpb": "3197.2", "bsz": "40.6", "num_updates": "101200", "lr": "7.56456e-05", "gnorm": "0.724", "loss_scale": "2", "train_wall": "36", "gb_free": "15.6", "wall": "19034"} [2023-11-01 22:52:42,466][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-01 22:52:42,467][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 22:52:42,486][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 30 [2023-11-01 22:53:08,100][valid][INFO] - {"epoch": 25, "valid_loss": "3.14", "valid_ntokens": "3165.57", "valid_nsentences": "44.1685", "valid_prob_perplexity": "106.725", "valid_code_perplexity": "104.218", "valid_temp": "1.205", "valid_loss_0": "2.995", "valid_loss_1": "0.12", "valid_loss_2": "0.025", "valid_accuracy": "0.50855", "valid_wps": "56000.8", "valid_wpb": "3165.6", "valid_bsz": "44.2", "valid_num_updates": "101391", "valid_best_loss": "3.128"} [2023-11-01 22:53:08,102][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 25 @ 101391 updates [2023-11-01 22:53:08,104][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-01 22:53:09,517][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-01 22:53:09,561][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_last.pt (epoch 25 @ 101391 updates, score 3.14) (writing took 1.4587292992509902 seconds) [2023-11-01 22:53:09,562][fairseq_cli.train][INFO] - end of epoch 25 (average epoch stats below) [2023-11-01 22:53:09,564][train][INFO] - {"epoch": 25, "train_loss": "3.329", "train_ntokens": "3188.86", "train_nsentences": "44.2594", "train_prob_perplexity": "104.154", "train_code_perplexity": "101.631", "train_temp": "1.217", "train_loss_0": "3.185", "train_loss_1": "0.121", "train_loss_2": "0.023", "train_accuracy": "0.46961", "train_wps": "16400.6", "train_ups": "5.14", "train_wpb": "3188.9", "train_bsz": "44.3", "train_num_updates": "101391", "train_lr": "7.55972e-05", "train_gnorm": "0.713", "train_loss_scale": "2", "train_train_wall": "719", "train_gb_free": "13.1", "train_wall": "19096"} [2023-11-01 22:53:09,566][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 22:53:09,584][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 26 [2023-11-01 22:53:09,763][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-01 22:53:09,793][fairseq.trainer][INFO] - begin training epoch 26 [2023-11-01 22:53:09,794][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-01 22:53:11,557][train_inner][INFO] - {"epoch": 26, "update": 25.002, "loss": "3.294", "ntokens": "3198.68", "nsentences": "45.72", "prob_perplexity": "107.501", "code_perplexity": "104.962", "temp": "1.205", "loss_0": "3.151", "loss_1": "0.12", "loss_2": "0.023", "accuracy": "0.47602", "wps": "10051.3", "ups": "3.14", "wpb": "3198.7", "bsz": "45.7", "num_updates": "101400", "lr": "7.55949e-05", "gnorm": "0.711", "loss_scale": "2", "train_wall": "36", "gb_free": "13", "wall": "19098"} [2023-11-01 22:53:47,168][train_inner][INFO] - {"epoch": 26, "update": 25.052, "loss": "3.285", "ntokens": "3186.44", "nsentences": "46.4", "prob_perplexity": "108.142", "code_perplexity": "105.575", "temp": "1.204", "loss_0": "3.143", "loss_1": "0.12", "loss_2": "0.023", "accuracy": "0.47839", "wps": "17905.8", "ups": "5.62", "wpb": "3186.4", "bsz": "46.4", "num_updates": "101600", "lr": "7.55443e-05", "gnorm": "0.713", "loss_scale": "2", "train_wall": "35", "gb_free": "14.7", "wall": "19133"} [2023-11-01 22:54:22,822][train_inner][INFO] - {"epoch": 26, "update": 25.101, "loss": "3.33", "ntokens": "3171.36", "nsentences": "43.12", "prob_perplexity": "107.998", "code_perplexity": "105.521", "temp": "1.203", "loss_0": "3.187", "loss_1": "0.12", "loss_2": "0.023", "accuracy": "0.4684", "wps": "17790.5", "ups": "5.61", "wpb": "3171.4", "bsz": "43.1", "num_updates": "101800", "lr": "7.54937e-05", "gnorm": "0.715", "loss_scale": "2", "train_wall": "35", "gb_free": "12.9", "wall": "19169"} [2023-11-01 22:54:58,700][train_inner][INFO] - {"epoch": 26, "update": 25.15, "loss": "3.327", "ntokens": "3184.6", "nsentences": "45", "prob_perplexity": "108.439", "code_perplexity": "105.863", "temp": "1.202", "loss_0": "3.185", "loss_1": "0.12", "loss_2": "0.022", "accuracy": "0.46997", "wps": "17753.7", "ups": "5.57", "wpb": "3184.6", "bsz": "45", "num_updates": "102000", "lr": "7.5443e-05", "gnorm": "0.715", "loss_scale": "2", "train_wall": "35", "gb_free": "13.9", "wall": "19205"} [2023-11-01 22:55:34,336][train_inner][INFO] - {"epoch": 26, "update": 25.199, "loss": "3.341", "ntokens": "3175.52", "nsentences": "44.6", "prob_perplexity": "108.578", "code_perplexity": "106.097", "temp": "1.2", "loss_0": "3.199", "loss_1": "0.12", "loss_2": "0.023", "accuracy": "0.46777", "wps": "17823.1", "ups": "5.61", "wpb": "3175.5", "bsz": "44.6", "num_updates": "102200", "lr": "7.53924e-05", "gnorm": "0.708", "loss_scale": "2", "train_wall": "35", "gb_free": "13.1", "wall": "19241"} [2023-11-01 22:56:10,392][train_inner][INFO] - {"epoch": 26, "update": 25.249, "loss": "3.364", "ntokens": "3219.68", "nsentences": "41.88", "prob_perplexity": "109.595", "code_perplexity": "107.033", "temp": "1.199", "loss_0": "3.222", "loss_1": "0.12", "loss_2": "0.023", "accuracy": "0.46024", "wps": "17861", "ups": "5.55", "wpb": "3219.7", "bsz": "41.9", "num_updates": "102400", "lr": "7.53418e-05", "gnorm": "0.721", "loss_scale": "2", "train_wall": "35", "gb_free": "13.6", "wall": "19277"} [2023-11-01 22:56:46,140][train_inner][INFO] - {"epoch": 26, "update": 25.298, "loss": "3.326", "ntokens": "3160.08", "nsentences": "41.96", "prob_perplexity": "109.331", "code_perplexity": "106.73", "temp": "1.198", "loss_0": "3.184", "loss_1": "0.12", "loss_2": "0.022", "accuracy": "0.46706", "wps": "17680.5", "ups": "5.59", "wpb": "3160.1", "bsz": "42", "num_updates": "102600", "lr": "7.52911e-05", "gnorm": "0.716", "loss_scale": "2", "train_wall": "35", "gb_free": "14.3", "wall": "19312"} [2023-11-01 22:57:21,828][train_inner][INFO] - {"epoch": 26, "update": 25.347, "loss": "3.286", "ntokens": "3190.36", "nsentences": "45.12", "prob_perplexity": "109.323", "code_perplexity": "106.798", "temp": "1.197", "loss_0": "3.144", "loss_1": "0.12", "loss_2": "0.022", "accuracy": "0.47683", "wps": "17880", "ups": "5.6", "wpb": "3190.4", "bsz": "45.1", "num_updates": "102800", "lr": "7.52405e-05", "gnorm": "0.711", "loss_scale": "2", "train_wall": "35", "gb_free": "14.5", "wall": "19348"} [2023-11-01 22:57:58,195][train_inner][INFO] - {"epoch": 26, "update": 25.397, "loss": "3.326", "ntokens": "3178.44", "nsentences": "42.92", "prob_perplexity": "109.361", "code_perplexity": "106.771", "temp": "1.196", "loss_0": "3.184", "loss_1": "0.12", "loss_2": "0.022", "accuracy": "0.46766", "wps": "17481.2", "ups": "5.5", "wpb": "3178.4", "bsz": "42.9", "num_updates": "103000", "lr": "7.51899e-05", "gnorm": "0.727", "loss_scale": "2", "train_wall": "36", "gb_free": "14", "wall": "19384"} [2023-11-01 22:58:34,334][train_inner][INFO] - {"epoch": 26, "update": 25.446, "loss": "3.339", "ntokens": "3171.64", "nsentences": "42.84", "prob_perplexity": "109.948", "code_perplexity": "107.47", "temp": "1.194", "loss_0": "3.197", "loss_1": "0.119", "loss_2": "0.022", "accuracy": "0.46627", "wps": "17553.5", "ups": "5.53", "wpb": "3171.6", "bsz": "42.8", "num_updates": "103200", "lr": "7.51392e-05", "gnorm": "0.721", "loss_scale": "2", "train_wall": "36", "gb_free": "13.5", "wall": "19421"} [2023-11-01 22:59:11,141][train_inner][INFO] - {"epoch": 26, "update": 25.495, "loss": "3.294", "ntokens": "3179.12", "nsentences": "44.4", "prob_perplexity": "110.376", "code_perplexity": "107.798", "temp": "1.193", "loss_0": "3.153", "loss_1": "0.119", "loss_2": "0.022", "accuracy": "0.47439", "wps": "17275.4", "ups": "5.43", "wpb": "3179.1", "bsz": "44.4", "num_updates": "103400", "lr": "7.50886e-05", "gnorm": "0.717", "loss_scale": "2", "train_wall": "36", "gb_free": "14.1", "wall": "19457"} [2023-11-01 22:59:47,583][train_inner][INFO] - {"epoch": 26, "update": 25.545, "loss": "3.314", "ntokens": "3174.6", "nsentences": "43.44", "prob_perplexity": "110.663", "code_perplexity": "108.01", "temp": "1.192", "loss_0": "3.173", "loss_1": "0.119", "loss_2": "0.022", "accuracy": "0.47049", "wps": "17423.5", "ups": "5.49", "wpb": "3174.6", "bsz": "43.4", "num_updates": "103600", "lr": "7.5038e-05", "gnorm": "0.719", "loss_scale": "2", "train_wall": "36", "gb_free": "13.4", "wall": "19494"} [2023-11-01 23:00:24,488][train_inner][INFO] - {"epoch": 26, "update": 25.594, "loss": "3.347", "ntokens": "3221.04", "nsentences": "43.76", "prob_perplexity": "111.083", "code_perplexity": "108.452", "temp": "1.191", "loss_0": "3.206", "loss_1": "0.119", "loss_2": "0.022", "accuracy": "0.46537", "wps": "17457.2", "ups": "5.42", "wpb": "3221", "bsz": "43.8", "num_updates": "103800", "lr": "7.49873e-05", "gnorm": "0.71", "loss_scale": "2", "train_wall": "36", "gb_free": "15.3", "wall": "19531"} [2023-11-01 23:01:00,681][train_inner][INFO] - {"epoch": 26, "update": 25.643, "loss": "3.2", "ntokens": "3158.72", "nsentences": "49.48", "prob_perplexity": "111.493", "code_perplexity": "108.964", "temp": "1.19", "loss_0": "3.059", "loss_1": "0.119", "loss_2": "0.022", "accuracy": "0.49568", "wps": "17455.6", "ups": "5.53", "wpb": "3158.7", "bsz": "49.5", "num_updates": "104000", "lr": "7.49367e-05", "gnorm": "0.707", "loss_scale": "2", "train_wall": "36", "gb_free": "13.9", "wall": "19567"} [2023-11-01 23:01:37,301][train_inner][INFO] - {"epoch": 26, "update": 25.693, "loss": "3.302", "ntokens": "3212.32", "nsentences": "44.12", "prob_perplexity": "111.786", "code_perplexity": "109.146", "temp": "1.188", "loss_0": "3.161", "loss_1": "0.119", "loss_2": "0.022", "accuracy": "0.47195", "wps": "17545.2", "ups": "5.46", "wpb": "3212.3", "bsz": "44.1", "num_updates": "104200", "lr": "7.48861e-05", "gnorm": "0.717", "loss_scale": "2", "train_wall": "36", "gb_free": "12.8", "wall": "19604"} [2023-11-01 23:02:13,606][train_inner][INFO] - {"epoch": 26, "update": 25.742, "loss": "3.272", "ntokens": "3211.12", "nsentences": "45.64", "prob_perplexity": "111.99", "code_perplexity": "109.363", "temp": "1.187", "loss_0": "3.131", "loss_1": "0.119", "loss_2": "0.022", "accuracy": "0.47882", "wps": "17690.9", "ups": "5.51", "wpb": "3211.1", "bsz": "45.6", "num_updates": "104400", "lr": "7.48354e-05", "gnorm": "0.711", "loss_scale": "2", "train_wall": "36", "gb_free": "13.3", "wall": "19640"} [2023-11-01 23:02:50,257][train_inner][INFO] - {"epoch": 26, "update": 25.791, "loss": "3.348", "ntokens": "3226.8", "nsentences": "42.32", "prob_perplexity": "112.451", "code_perplexity": "109.841", "temp": "1.186", "loss_0": "3.207", "loss_1": "0.119", "loss_2": "0.022", "accuracy": "0.4628", "wps": "17609.1", "ups": "5.46", "wpb": "3226.8", "bsz": "42.3", "num_updates": "104600", "lr": "7.47848e-05", "gnorm": "0.706", "loss_scale": "2", "train_wall": "36", "gb_free": "13.8", "wall": "19676"} [2023-11-01 23:03:26,741][train_inner][INFO] - {"epoch": 26, "update": 25.84, "loss": "3.265", "ntokens": "3175.68", "nsentences": "45.6", "prob_perplexity": "112.601", "code_perplexity": "109.946", "temp": "1.185", "loss_0": "3.124", "loss_1": "0.119", "loss_2": "0.022", "accuracy": "0.47984", "wps": "17409.9", "ups": "5.48", "wpb": "3175.7", "bsz": "45.6", "num_updates": "104800", "lr": "7.47342e-05", "gnorm": "0.713", "loss_scale": "2", "train_wall": "36", "gb_free": "13.1", "wall": "19713"} [2023-11-01 23:04:02,825][train_inner][INFO] - {"epoch": 26, "update": 25.89, "loss": "3.333", "ntokens": "3215.6", "nsentences": "43.12", "prob_perplexity": "113.194", "code_perplexity": "110.509", "temp": "1.184", "loss_0": "3.192", "loss_1": "0.119", "loss_2": "0.022", "accuracy": "0.46574", "wps": "17823.7", "ups": "5.54", "wpb": "3215.6", "bsz": "43.1", "num_updates": "105000", "lr": "7.46835e-05", "gnorm": "0.726", "loss_scale": "2", "train_wall": "35", "gb_free": "13.1", "wall": "19749"} [2023-11-01 23:04:39,351][train_inner][INFO] - {"epoch": 26, "update": 25.939, "loss": "3.307", "ntokens": "3178.24", "nsentences": "42.2", "prob_perplexity": "113.567", "code_perplexity": "110.88", "temp": "1.183", "loss_0": "3.167", "loss_1": "0.119", "loss_2": "0.022", "accuracy": "0.46898", "wps": "17404.2", "ups": "5.48", "wpb": "3178.2", "bsz": "42.2", "num_updates": "105200", "lr": "7.46329e-05", "gnorm": "0.713", "loss_scale": "2", "train_wall": "36", "gb_free": "13.1", "wall": "19786"} [2023-11-01 23:05:15,661][train_inner][INFO] - {"epoch": 26, "update": 25.988, "loss": "3.238", "ntokens": "3176.08", "nsentences": "46.12", "prob_perplexity": "113.963", "code_perplexity": "111.216", "temp": "1.181", "loss_0": "3.098", "loss_1": "0.119", "loss_2": "0.021", "accuracy": "0.48354", "wps": "17495.2", "ups": "5.51", "wpb": "3176.1", "bsz": "46.1", "num_updates": "105400", "lr": "7.45823e-05", "gnorm": "0.716", "loss_scale": "2", "train_wall": "36", "gb_free": "13.6", "wall": "19822"} [2023-11-01 23:05:24,229][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-01 23:05:24,231][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 23:05:24,248][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 31 [2023-11-01 23:05:50,219][valid][INFO] - {"epoch": 26, "valid_loss": "3.111", "valid_ntokens": "3173.71", "valid_nsentences": "44.1685", "valid_prob_perplexity": "112.658", "valid_code_perplexity": "110.421", "valid_temp": "1.18", "valid_loss_0": "2.97", "valid_loss_1": "0.119", "valid_loss_2": "0.021", "valid_accuracy": "0.51021", "valid_wps": "55355", "valid_wpb": "3173.7", "valid_bsz": "44.2", "valid_num_updates": "105447", "valid_best_loss": "3.111"} [2023-11-01 23:05:50,221][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 26 @ 105447 updates [2023-11-01 23:05:50,223][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-01 23:05:51,643][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-01 23:05:52,578][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_best.pt (epoch 26 @ 105447 updates, score 3.111) (writing took 2.357001183088869 seconds) [2023-11-01 23:05:52,579][fairseq_cli.train][INFO] - end of epoch 26 (average epoch stats below) [2023-11-01 23:05:52,581][train][INFO] - {"epoch": 26, "train_loss": "3.305", "train_ntokens": "3187.44", "train_nsentences": "44.2682", "train_prob_perplexity": "110.727", "train_code_perplexity": "108.133", "train_temp": "1.193", "train_loss_0": "3.164", "train_loss_1": "0.119", "train_loss_2": "0.022", "train_accuracy": "0.47243", "train_wps": "16943.7", "train_ups": "5.32", "train_wpb": "3187.4", "train_bsz": "44.3", "train_num_updates": "105447", "train_lr": "7.45704e-05", "train_gnorm": "0.715", "train_loss_scale": "2", "train_train_wall": "721", "train_gb_free": "14.6", "train_wall": "19859"} [2023-11-01 23:05:52,584][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 23:05:52,603][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 27 [2023-11-01 23:05:52,794][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-01 23:05:52,825][fairseq.trainer][INFO] - begin training epoch 27 [2023-11-01 23:05:52,826][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-01 23:06:20,278][train_inner][INFO] - {"epoch": 27, "update": 26.038, "loss": "3.216", "ntokens": "3163.04", "nsentences": "46.52", "prob_perplexity": "114.037", "code_perplexity": "111.384", "temp": "1.18", "loss_0": "3.076", "loss_1": "0.119", "loss_2": "0.021", "accuracy": "0.48937", "wps": "9790.5", "ups": "3.1", "wpb": "3163", "bsz": "46.5", "num_updates": "105600", "lr": "7.45316e-05", "gnorm": "0.71", "loss_scale": "2", "train_wall": "35", "gb_free": "13", "wall": "19887"} [2023-11-01 23:06:56,052][train_inner][INFO] - {"epoch": 27, "update": 26.087, "loss": "3.299", "ntokens": "3173.56", "nsentences": "43.44", "prob_perplexity": "114.361", "code_perplexity": "111.613", "temp": "1.179", "loss_0": "3.159", "loss_1": "0.118", "loss_2": "0.021", "accuracy": "0.4718", "wps": "17751.1", "ups": "5.59", "wpb": "3173.6", "bsz": "43.4", "num_updates": "105800", "lr": "7.4481e-05", "gnorm": "0.718", "loss_scale": "2", "train_wall": "35", "gb_free": "13.9", "wall": "19922"} [2023-11-01 23:07:31,720][train_inner][INFO] - {"epoch": 27, "update": 26.136, "loss": "3.301", "ntokens": "3183.16", "nsentences": "45.32", "prob_perplexity": "115", "code_perplexity": "112.21", "temp": "1.178", "loss_0": "3.162", "loss_1": "0.118", "loss_2": "0.021", "accuracy": "0.47346", "wps": "17849.4", "ups": "5.61", "wpb": "3183.2", "bsz": "45.3", "num_updates": "106000", "lr": "7.44304e-05", "gnorm": "0.72", "loss_scale": "2", "train_wall": "35", "gb_free": "12.9", "wall": "19958"} [2023-11-01 23:08:07,558][train_inner][INFO] - {"epoch": 27, "update": 26.186, "loss": "3.302", "ntokens": "3204.16", "nsentences": "44.52", "prob_perplexity": "115.262", "code_perplexity": "112.423", "temp": "1.177", "loss_0": "3.162", "loss_1": "0.118", "loss_2": "0.021", "accuracy": "0.47169", "wps": "17882.4", "ups": "5.58", "wpb": "3204.2", "bsz": "44.5", "num_updates": "106200", "lr": "7.43797e-05", "gnorm": "0.718", "loss_scale": "2", "train_wall": "35", "gb_free": "16.2", "wall": "19994"} [2023-11-01 23:08:44,208][train_inner][INFO] - {"epoch": 27, "update": 26.235, "loss": "3.353", "ntokens": "3211.44", "nsentences": "41.28", "prob_perplexity": "115.11", "code_perplexity": "112.335", "temp": "1.175", "loss_0": "3.214", "loss_1": "0.118", "loss_2": "0.021", "accuracy": "0.46034", "wps": "17526.2", "ups": "5.46", "wpb": "3211.4", "bsz": "41.3", "num_updates": "106400", "lr": "7.43291e-05", "gnorm": "0.716", "loss_scale": "2", "train_wall": "36", "gb_free": "13.4", "wall": "20030"} [2023-11-01 23:09:19,954][train_inner][INFO] - {"epoch": 27, "update": 26.284, "loss": "3.271", "ntokens": "3163.68", "nsentences": "44.8", "prob_perplexity": "116.038", "code_perplexity": "113.293", "temp": "1.174", "loss_0": "3.131", "loss_1": "0.118", "loss_2": "0.021", "accuracy": "0.47784", "wps": "17702.1", "ups": "5.6", "wpb": "3163.7", "bsz": "44.8", "num_updates": "106600", "lr": "7.42785e-05", "gnorm": "0.712", "loss_scale": "2", "train_wall": "35", "gb_free": "14.2", "wall": "20066"} [2023-11-01 23:09:55,878][train_inner][INFO] - {"epoch": 27, "update": 26.334, "loss": "3.291", "ntokens": "3184.76", "nsentences": "44.2", "prob_perplexity": "115.65", "code_perplexity": "112.902", "temp": "1.173", "loss_0": "3.152", "loss_1": "0.118", "loss_2": "0.021", "accuracy": "0.47395", "wps": "17731.6", "ups": "5.57", "wpb": "3184.8", "bsz": "44.2", "num_updates": "106800", "lr": "7.42278e-05", "gnorm": "0.718", "loss_scale": "2", "train_wall": "35", "gb_free": "14.5", "wall": "20102"} [2023-11-01 23:10:31,856][train_inner][INFO] - {"epoch": 27, "update": 26.383, "loss": "3.203", "ntokens": "3134.08", "nsentences": "45.56", "prob_perplexity": "116.166", "code_perplexity": "113.371", "temp": "1.172", "loss_0": "3.064", "loss_1": "0.118", "loss_2": "0.021", "accuracy": "0.48919", "wps": "17423", "ups": "5.56", "wpb": "3134.1", "bsz": "45.6", "num_updates": "107000", "lr": "7.41772e-05", "gnorm": "0.717", "loss_scale": "2", "train_wall": "35", "gb_free": "12.3", "wall": "20138"} [2023-11-01 23:11:08,478][train_inner][INFO] - {"epoch": 27, "update": 26.432, "loss": "3.306", "ntokens": "3223.16", "nsentences": "43.28", "prob_perplexity": "116.63", "code_perplexity": "113.879", "temp": "1.171", "loss_0": "3.166", "loss_1": "0.118", "loss_2": "0.021", "accuracy": "0.4694", "wps": "17603.2", "ups": "5.46", "wpb": "3223.2", "bsz": "43.3", "num_updates": "107200", "lr": "7.41266e-05", "gnorm": "0.706", "loss_scale": "2", "train_wall": "36", "gb_free": "14.2", "wall": "20175"} [2023-11-01 23:11:45,064][train_inner][INFO] - {"epoch": 27, "update": 26.482, "loss": "3.277", "ntokens": "3199.28", "nsentences": "43.16", "prob_perplexity": "117.048", "code_perplexity": "114.288", "temp": "1.17", "loss_0": "3.138", "loss_1": "0.118", "loss_2": "0.021", "accuracy": "0.47387", "wps": "17490.4", "ups": "5.47", "wpb": "3199.3", "bsz": "43.2", "num_updates": "107400", "lr": "7.40759e-05", "gnorm": "0.717", "loss_scale": "2", "train_wall": "36", "gb_free": "12.6", "wall": "20211"} [2023-11-01 23:12:21,303][train_inner][INFO] - {"epoch": 27, "update": 26.531, "loss": "3.276", "ntokens": "3177.12", "nsentences": "44.2", "prob_perplexity": "117.234", "code_perplexity": "114.44", "temp": "1.168", "loss_0": "3.137", "loss_1": "0.118", "loss_2": "0.021", "accuracy": "0.4756", "wps": "17535.3", "ups": "5.52", "wpb": "3177.1", "bsz": "44.2", "num_updates": "107600", "lr": "7.40253e-05", "gnorm": "0.715", "loss_scale": "2", "train_wall": "36", "gb_free": "15.6", "wall": "20248"} [2023-11-01 23:12:57,554][train_inner][INFO] - {"epoch": 27, "update": 26.58, "loss": "3.339", "ntokens": "3184.84", "nsentences": "42.12", "prob_perplexity": "116.952", "code_perplexity": "114.163", "temp": "1.167", "loss_0": "3.2", "loss_1": "0.118", "loss_2": "0.021", "accuracy": "0.46337", "wps": "17572", "ups": "5.52", "wpb": "3184.8", "bsz": "42.1", "num_updates": "107800", "lr": "7.39747e-05", "gnorm": "0.726", "loss_scale": "2", "train_wall": "36", "gb_free": "13.6", "wall": "20284"} [2023-11-01 23:13:34,232][train_inner][INFO] - {"epoch": 27, "update": 26.629, "loss": "3.278", "ntokens": "3218.16", "nsentences": "46.44", "prob_perplexity": "118.449", "code_perplexity": "115.61", "temp": "1.166", "loss_0": "3.14", "loss_1": "0.118", "loss_2": "0.021", "accuracy": "0.47713", "wps": "17549.5", "ups": "5.45", "wpb": "3218.2", "bsz": "46.4", "num_updates": "108000", "lr": "7.39241e-05", "gnorm": "0.703", "loss_scale": "2", "train_wall": "36", "gb_free": "13.4", "wall": "20320"} [2023-11-01 23:14:10,580][train_inner][INFO] - {"epoch": 27, "update": 26.679, "loss": "3.278", "ntokens": "3164.68", "nsentences": "44.56", "prob_perplexity": "118.536", "code_perplexity": "115.598", "temp": "1.165", "loss_0": "3.14", "loss_1": "0.118", "loss_2": "0.021", "accuracy": "0.47686", "wps": "17414.4", "ups": "5.5", "wpb": "3164.7", "bsz": "44.6", "num_updates": "108200", "lr": "7.38734e-05", "gnorm": "0.717", "loss_scale": "2", "train_wall": "36", "gb_free": "15.2", "wall": "20357"} [2023-11-01 23:14:46,370][train_inner][INFO] - {"epoch": 27, "update": 26.728, "loss": "3.292", "ntokens": "3192.32", "nsentences": "43.92", "prob_perplexity": "119.483", "code_perplexity": "116.644", "temp": "1.164", "loss_0": "3.154", "loss_1": "0.117", "loss_2": "0.021", "accuracy": "0.47238", "wps": "17840.2", "ups": "5.59", "wpb": "3192.3", "bsz": "43.9", "num_updates": "108400", "lr": "7.38228e-05", "gnorm": "0.719", "loss_scale": "2", "train_wall": "35", "gb_free": "13.3", "wall": "20393"} [2023-11-01 23:15:22,239][train_inner][INFO] - {"epoch": 27, "update": 26.777, "loss": "3.324", "ntokens": "3206.64", "nsentences": "43.2", "prob_perplexity": "118.689", "code_perplexity": "115.812", "temp": "1.163", "loss_0": "3.185", "loss_1": "0.117", "loss_2": "0.021", "accuracy": "0.46639", "wps": "17880.8", "ups": "5.58", "wpb": "3206.6", "bsz": "43.2", "num_updates": "108600", "lr": "7.37722e-05", "gnorm": "0.711", "loss_scale": "2", "train_wall": "35", "gb_free": "13.2", "wall": "20428"} [2023-11-01 23:15:58,698][train_inner][INFO] - {"epoch": 27, "update": 26.827, "loss": "3.286", "ntokens": "3196.2", "nsentences": "44.76", "prob_perplexity": "119.318", "code_perplexity": "116.479", "temp": "1.161", "loss_0": "3.148", "loss_1": "0.117", "loss_2": "0.021", "accuracy": "0.47587", "wps": "17534.2", "ups": "5.49", "wpb": "3196.2", "bsz": "44.8", "num_updates": "108800", "lr": "7.37215e-05", "gnorm": "0.716", "loss_scale": "2", "train_wall": "36", "gb_free": "15.9", "wall": "20465"} [2023-11-01 23:16:34,966][train_inner][INFO] - {"epoch": 27, "update": 26.876, "loss": "3.223", "ntokens": "3172.56", "nsentences": "47.12", "prob_perplexity": "119.562", "code_perplexity": "116.725", "temp": "1.16", "loss_0": "3.085", "loss_1": "0.117", "loss_2": "0.021", "accuracy": "0.48752", "wps": "17496.1", "ups": "5.51", "wpb": "3172.6", "bsz": "47.1", "num_updates": "109000", "lr": "7.36709e-05", "gnorm": "0.712", "loss_scale": "2", "train_wall": "36", "gb_free": "13", "wall": "20501"} [2023-11-01 23:17:10,809][train_inner][INFO] - {"epoch": 27, "update": 26.925, "loss": "3.324", "ntokens": "3227.56", "nsentences": "43.84", "prob_perplexity": "119.87", "code_perplexity": "117.009", "temp": "1.159", "loss_0": "3.187", "loss_1": "0.117", "loss_2": "0.02", "accuracy": "0.4668", "wps": "18010.4", "ups": "5.58", "wpb": "3227.6", "bsz": "43.8", "num_updates": "109200", "lr": "7.36203e-05", "gnorm": "0.719", "loss_scale": "2", "train_wall": "35", "gb_free": "13.1", "wall": "20537"} [2023-11-01 23:17:47,024][train_inner][INFO] - {"epoch": 27, "update": 26.975, "loss": "3.221", "ntokens": "3177.76", "nsentences": "45.48", "prob_perplexity": "120.499", "code_perplexity": "117.691", "temp": "1.158", "loss_0": "3.084", "loss_1": "0.117", "loss_2": "0.02", "accuracy": "0.4847", "wps": "17550.4", "ups": "5.52", "wpb": "3177.8", "bsz": "45.5", "num_updates": "109400", "lr": "7.35696e-05", "gnorm": "0.715", "loss_scale": "2", "train_wall": "36", "gb_free": "13.5", "wall": "20573"} [2023-11-01 23:18:05,378][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-01 23:18:05,380][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 23:18:05,396][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 32 [2023-11-01 23:18:30,798][valid][INFO] - {"epoch": 27, "valid_loss": "3.092", "valid_ntokens": "3164.1", "valid_nsentences": "44.1685", "valid_prob_perplexity": "119.804", "valid_code_perplexity": "117.087", "valid_temp": "1.157", "valid_loss_0": "2.953", "valid_loss_1": "0.117", "valid_loss_2": "0.021", "valid_accuracy": "0.51181", "valid_wps": "56453.1", "valid_wpb": "3164.1", "valid_bsz": "44.2", "valid_num_updates": "109503", "valid_best_loss": "3.092"} [2023-11-01 23:18:30,800][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 27 @ 109503 updates [2023-11-01 23:18:30,802][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-01 23:18:32,214][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-01 23:18:33,195][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_best.pt (epoch 27 @ 109503 updates, score 3.092) (writing took 2.3955133105628192 seconds) [2023-11-01 23:18:33,196][fairseq_cli.train][INFO] - end of epoch 27 (average epoch stats below) [2023-11-01 23:18:33,198][train][INFO] - {"epoch": 27, "train_loss": "3.285", "train_ntokens": "3188.65", "train_nsentences": "44.2682", "train_prob_perplexity": "117.313", "train_code_perplexity": "114.505", "train_temp": "1.169", "train_loss_0": "3.146", "train_loss_1": "0.118", "train_loss_2": "0.021", "train_accuracy": "0.47438", "train_wps": "17003.5", "train_ups": "5.33", "train_wpb": "3188.6", "train_bsz": "44.3", "train_num_updates": "109503", "train_lr": "7.35435e-05", "train_gnorm": "0.716", "train_loss_scale": "2", "train_train_wall": "720", "train_gb_free": "13.2", "train_wall": "20619"} [2023-11-01 23:18:33,201][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 23:18:33,230][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 28 [2023-11-01 23:18:33,436][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-01 23:18:33,467][fairseq.trainer][INFO] - begin training epoch 28 [2023-11-01 23:18:33,468][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-01 23:18:50,929][train_inner][INFO] - {"epoch": 28, "update": 27.024, "loss": "3.323", "ntokens": "3191", "nsentences": "41.64", "prob_perplexity": "120.623", "code_perplexity": "117.748", "temp": "1.157", "loss_0": "3.185", "loss_1": "0.117", "loss_2": "0.021", "accuracy": "0.46572", "wps": "9987.2", "ups": "3.13", "wpb": "3191", "bsz": "41.6", "num_updates": "109600", "lr": "7.3519e-05", "gnorm": "0.732", "loss_scale": "2", "train_wall": "35", "gb_free": "13.1", "wall": "20637"} [2023-11-01 23:19:26,837][train_inner][INFO] - {"epoch": 28, "update": 27.073, "loss": "3.179", "ntokens": "3180.04", "nsentences": "47.56", "prob_perplexity": "120.451", "code_perplexity": "117.594", "temp": "1.156", "loss_0": "3.042", "loss_1": "0.117", "loss_2": "0.02", "accuracy": "0.49413", "wps": "17720.2", "ups": "5.57", "wpb": "3180", "bsz": "47.6", "num_updates": "109800", "lr": "7.34684e-05", "gnorm": "0.714", "loss_scale": "2", "train_wall": "35", "gb_free": "13.7", "wall": "20673"} [2023-11-01 23:20:02,923][train_inner][INFO] - {"epoch": 28, "update": 27.123, "loss": "3.301", "ntokens": "3180.72", "nsentences": "43.2", "prob_perplexity": "120.882", "code_perplexity": "118.033", "temp": "1.154", "loss_0": "3.163", "loss_1": "0.117", "loss_2": "0.02", "accuracy": "0.46974", "wps": "17629.6", "ups": "5.54", "wpb": "3180.7", "bsz": "43.2", "num_updates": "110000", "lr": "7.34177e-05", "gnorm": "0.718", "loss_scale": "2", "train_wall": "35", "gb_free": "14.1", "wall": "20709"} [2023-11-01 23:20:38,647][train_inner][INFO] - {"epoch": 28, "update": 27.172, "loss": "3.256", "ntokens": "3164.52", "nsentences": "45.12", "prob_perplexity": "121.113", "code_perplexity": "118.223", "temp": "1.153", "loss_0": "3.119", "loss_1": "0.117", "loss_2": "0.02", "accuracy": "0.4798", "wps": "17717.6", "ups": "5.6", "wpb": "3164.5", "bsz": "45.1", "num_updates": "110200", "lr": "7.33671e-05", "gnorm": "0.715", "loss_scale": "2", "train_wall": "35", "gb_free": "13.9", "wall": "20745"} [2023-11-01 23:21:14,825][train_inner][INFO] - {"epoch": 28, "update": 27.221, "loss": "3.221", "ntokens": "3184.96", "nsentences": "45.6", "prob_perplexity": "121.326", "code_perplexity": "118.526", "temp": "1.152", "loss_0": "3.084", "loss_1": "0.117", "loss_2": "0.02", "accuracy": "0.48593", "wps": "17608.3", "ups": "5.53", "wpb": "3185", "bsz": "45.6", "num_updates": "110400", "lr": "7.33165e-05", "gnorm": "0.713", "loss_scale": "2", "train_wall": "36", "gb_free": "13.5", "wall": "20781"} [2023-11-01 23:21:50,792][train_inner][INFO] - {"epoch": 28, "update": 27.27, "loss": "3.326", "ntokens": "3192.56", "nsentences": "41.04", "prob_perplexity": "121.337", "code_perplexity": "118.417", "temp": "1.151", "loss_0": "3.189", "loss_1": "0.117", "loss_2": "0.02", "accuracy": "0.4642", "wps": "17753.6", "ups": "5.56", "wpb": "3192.6", "bsz": "41", "num_updates": "110600", "lr": "7.32658e-05", "gnorm": "0.722", "loss_scale": "2", "train_wall": "35", "gb_free": "14.1", "wall": "20817"} [2023-11-01 23:22:27,580][train_inner][INFO] - {"epoch": 28, "update": 27.32, "loss": "3.318", "ntokens": "3225.4", "nsentences": "41.04", "prob_perplexity": "121.483", "code_perplexity": "118.614", "temp": "1.15", "loss_0": "3.18", "loss_1": "0.117", "loss_2": "0.02", "accuracy": "0.46459", "wps": "17536.7", "ups": "5.44", "wpb": "3225.4", "bsz": "41", "num_updates": "110800", "lr": "7.32152e-05", "gnorm": "0.721", "loss_scale": "2", "train_wall": "36", "gb_free": "14.4", "wall": "20854"} [2023-11-01 23:23:04,088][train_inner][INFO] - {"epoch": 28, "update": 27.369, "loss": "3.319", "ntokens": "3188.32", "nsentences": "43", "prob_perplexity": "121.951", "code_perplexity": "118.997", "temp": "1.149", "loss_0": "3.182", "loss_1": "0.117", "loss_2": "0.02", "accuracy": "0.46738", "wps": "17467.2", "ups": "5.48", "wpb": "3188.3", "bsz": "43", "num_updates": "111000", "lr": "7.31646e-05", "gnorm": "0.723", "loss_scale": "2", "train_wall": "36", "gb_free": "14.3", "wall": "20890"} [2023-11-01 23:23:40,269][train_inner][INFO] - {"epoch": 28, "update": 27.418, "loss": "3.279", "ntokens": "3160.64", "nsentences": "43", "prob_perplexity": "122.249", "code_perplexity": "119.279", "temp": "1.148", "loss_0": "3.142", "loss_1": "0.117", "loss_2": "0.02", "accuracy": "0.47318", "wps": "17472.3", "ups": "5.53", "wpb": "3160.6", "bsz": "43", "num_updates": "111200", "lr": "7.31139e-05", "gnorm": "0.725", "loss_scale": "2", "train_wall": "36", "gb_free": "15.4", "wall": "20927"} [2023-11-01 23:24:16,505][train_inner][INFO] - {"epoch": 28, "update": 27.468, "loss": "3.24", "ntokens": "3182.04", "nsentences": "45.52", "prob_perplexity": "122.841", "code_perplexity": "119.97", "temp": "1.146", "loss_0": "3.103", "loss_1": "0.117", "loss_2": "0.02", "accuracy": "0.48276", "wps": "17564.1", "ups": "5.52", "wpb": "3182", "bsz": "45.5", "num_updates": "111400", "lr": "7.30633e-05", "gnorm": "0.718", "loss_scale": "2", "train_wall": "36", "gb_free": "13", "wall": "20963"} [2023-11-01 23:24:52,732][train_inner][INFO] - {"epoch": 28, "update": 27.517, "loss": "3.259", "ntokens": "3208.04", "nsentences": "44.04", "prob_perplexity": "122.86", "code_perplexity": "119.957", "temp": "1.145", "loss_0": "3.122", "loss_1": "0.117", "loss_2": "0.02", "accuracy": "0.47739", "wps": "17711.8", "ups": "5.52", "wpb": "3208", "bsz": "44", "num_updates": "111600", "lr": "7.30127e-05", "gnorm": "0.714", "loss_scale": "2", "train_wall": "36", "gb_free": "14.2", "wall": "20999"} [2023-11-01 23:25:28,917][train_inner][INFO] - {"epoch": 28, "update": 27.566, "loss": "3.329", "ntokens": "3246.96", "nsentences": "43.08", "prob_perplexity": "123.781", "code_perplexity": "120.84", "temp": "1.144", "loss_0": "3.193", "loss_1": "0.116", "loss_2": "0.02", "accuracy": "0.46557", "wps": "17947.9", "ups": "5.53", "wpb": "3247", "bsz": "43.1", "num_updates": "111800", "lr": "7.2962e-05", "gnorm": "0.71", "loss_scale": "2", "train_wall": "36", "gb_free": "13.6", "wall": "21035"} [2023-11-01 23:26:05,105][train_inner][INFO] - {"epoch": 28, "update": 27.616, "loss": "3.206", "ntokens": "3166.6", "nsentences": "47.16", "prob_perplexity": "123.765", "code_perplexity": "120.891", "temp": "1.143", "loss_0": "3.07", "loss_1": "0.116", "loss_2": "0.02", "accuracy": "0.48926", "wps": "17501.5", "ups": "5.53", "wpb": "3166.6", "bsz": "47.2", "num_updates": "112000", "lr": "7.29114e-05", "gnorm": "0.716", "loss_scale": "2", "train_wall": "36", "gb_free": "13.5", "wall": "21071"} [2023-11-01 23:26:41,000][train_inner][INFO] - {"epoch": 28, "update": 27.665, "loss": "3.304", "ntokens": "3167.08", "nsentences": "42.48", "prob_perplexity": "123.367", "code_perplexity": "120.423", "temp": "1.142", "loss_0": "3.167", "loss_1": "0.116", "loss_2": "0.02", "accuracy": "0.46953", "wps": "17647.7", "ups": "5.57", "wpb": "3167.1", "bsz": "42.5", "num_updates": "112200", "lr": "7.28608e-05", "gnorm": "0.727", "loss_scale": "2", "train_wall": "35", "gb_free": "14.9", "wall": "21107"} [2023-11-01 23:27:17,290][train_inner][INFO] - {"epoch": 28, "update": 27.714, "loss": "3.227", "ntokens": "3175.88", "nsentences": "45.24", "prob_perplexity": "124.067", "code_perplexity": "121.131", "temp": "1.141", "loss_0": "3.091", "loss_1": "0.116", "loss_2": "0.02", "accuracy": "0.48345", "wps": "17503.8", "ups": "5.51", "wpb": "3175.9", "bsz": "45.2", "num_updates": "112400", "lr": "7.28101e-05", "gnorm": "0.723", "loss_scale": "2", "train_wall": "36", "gb_free": "15", "wall": "21144"} [2023-11-01 23:27:53,894][train_inner][INFO] - {"epoch": 28, "update": 27.764, "loss": "3.3", "ntokens": "3164.44", "nsentences": "41.84", "prob_perplexity": "124.183", "code_perplexity": "121.201", "temp": "1.14", "loss_0": "3.163", "loss_1": "0.116", "loss_2": "0.02", "accuracy": "0.46885", "wps": "17291.1", "ups": "5.46", "wpb": "3164.4", "bsz": "41.8", "num_updates": "112600", "lr": "7.27595e-05", "gnorm": "0.723", "loss_scale": "2", "train_wall": "36", "gb_free": "14.2", "wall": "21180"} [2023-11-01 23:28:30,996][train_inner][INFO] - {"epoch": 28, "update": 27.813, "loss": "3.123", "ntokens": "3147.2", "nsentences": "49.04", "prob_perplexity": "125.02", "code_perplexity": "122.03", "temp": "1.138", "loss_0": "2.988", "loss_1": "0.116", "loss_2": "0.02", "accuracy": "0.50348", "wps": "16965.9", "ups": "5.39", "wpb": "3147.2", "bsz": "49", "num_updates": "112800", "lr": "7.27089e-05", "gnorm": "0.715", "loss_scale": "2", "train_wall": "36", "gb_free": "15.4", "wall": "21217"} [2023-11-01 23:29:07,832][train_inner][INFO] - {"epoch": 28, "update": 27.862, "loss": "3.247", "ntokens": "3217.92", "nsentences": "44.92", "prob_perplexity": "125.648", "code_perplexity": "122.696", "temp": "1.137", "loss_0": "3.112", "loss_1": "0.116", "loss_2": "0.019", "accuracy": "0.47914", "wps": "17472.7", "ups": "5.43", "wpb": "3217.9", "bsz": "44.9", "num_updates": "113000", "lr": "7.26582e-05", "gnorm": "0.723", "loss_scale": "2", "train_wall": "36", "gb_free": "13.5", "wall": "21254"} [2023-11-01 23:29:44,106][train_inner][INFO] - {"epoch": 28, "update": 27.911, "loss": "3.23", "ntokens": "3189.72", "nsentences": "44.56", "prob_perplexity": "124.791", "code_perplexity": "121.831", "temp": "1.136", "loss_0": "3.094", "loss_1": "0.116", "loss_2": "0.019", "accuracy": "0.48182", "wps": "17587.7", "ups": "5.51", "wpb": "3189.7", "bsz": "44.6", "num_updates": "113200", "lr": "7.26076e-05", "gnorm": "0.72", "loss_scale": "2", "train_wall": "36", "gb_free": "13.7", "wall": "21290"} [2023-11-01 23:30:20,637][train_inner][INFO] - {"epoch": 28, "update": 27.961, "loss": "3.259", "ntokens": "3232.28", "nsentences": "44.92", "prob_perplexity": "125.655", "code_perplexity": "122.766", "temp": "1.135", "loss_0": "3.123", "loss_1": "0.116", "loss_2": "0.02", "accuracy": "0.47768", "wps": "17697.2", "ups": "5.48", "wpb": "3232.3", "bsz": "44.9", "num_updates": "113400", "lr": "7.2557e-05", "gnorm": "0.744", "loss_scale": "2", "train_wall": "36", "gb_free": "13.9", "wall": "21327"} [2023-11-01 23:30:49,379][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-01 23:30:49,380][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 23:30:49,400][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 33 [2023-11-01 23:31:14,926][valid][INFO] - {"epoch": 28, "valid_loss": "3.091", "valid_ntokens": "3164.84", "valid_nsentences": "44.1685", "valid_prob_perplexity": "123.795", "valid_code_perplexity": "121.093", "valid_temp": "1.134", "valid_loss_0": "2.954", "valid_loss_1": "0.116", "valid_loss_2": "0.02", "valid_accuracy": "0.51307", "valid_wps": "56218.3", "valid_wpb": "3164.8", "valid_bsz": "44.2", "valid_num_updates": "113559", "valid_best_loss": "3.091"} [2023-11-01 23:31:14,927][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 28 @ 113559 updates [2023-11-01 23:31:14,929][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-01 23:31:16,342][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-01 23:31:17,305][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_best.pt (epoch 28 @ 113559 updates, score 3.091) (writing took 2.377332412172109 seconds) [2023-11-01 23:31:17,306][fairseq_cli.train][INFO] - end of epoch 28 (average epoch stats below) [2023-11-01 23:31:17,308][train][INFO] - {"epoch": 28, "train_loss": "3.262", "train_ntokens": "3189.94", "train_nsentences": "44.2682", "train_prob_perplexity": "123.041", "train_code_perplexity": "120.129", "train_temp": "1.145", "train_loss_0": "3.126", "train_loss_1": "0.117", "train_loss_2": "0.02", "train_accuracy": "0.47721", "train_wps": "16932.7", "train_ups": "5.31", "train_wpb": "3189.9", "train_bsz": "44.3", "train_num_updates": "113559", "train_lr": "7.25167e-05", "train_gnorm": "0.722", "train_loss_scale": "2", "train_train_wall": "723", "train_gb_free": "13.5", "train_wall": "21384"} [2023-11-01 23:31:17,311][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 23:31:17,333][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 29 [2023-11-01 23:31:17,523][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-01 23:31:17,554][fairseq.trainer][INFO] - begin training epoch 29 [2023-11-01 23:31:17,555][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-01 23:31:24,980][train_inner][INFO] - {"epoch": 29, "update": 28.01, "loss": "3.233", "ntokens": "3202.4", "nsentences": "45.88", "prob_perplexity": "125.527", "code_perplexity": "122.578", "temp": "1.134", "loss_0": "3.097", "loss_1": "0.116", "loss_2": "0.02", "accuracy": "0.48267", "wps": "9954.6", "ups": "3.11", "wpb": "3202.4", "bsz": "45.9", "num_updates": "113600", "lr": "7.25063e-05", "gnorm": "0.764", "loss_scale": "2", "train_wall": "35", "gb_free": "13.9", "wall": "21391"} [2023-11-01 23:32:00,815][train_inner][INFO] - {"epoch": 29, "update": 28.059, "loss": "3.228", "ntokens": "3196.84", "nsentences": "44.84", "prob_perplexity": "126.118", "code_perplexity": "123.205", "temp": "1.133", "loss_0": "3.093", "loss_1": "0.116", "loss_2": "0.019", "accuracy": "0.48318", "wps": "17843", "ups": "5.58", "wpb": "3196.8", "bsz": "44.8", "num_updates": "113800", "lr": "7.24557e-05", "gnorm": "0.72", "loss_scale": "2", "train_wall": "35", "gb_free": "12.8", "wall": "21427"} [2023-11-01 23:32:36,428][train_inner][INFO] - {"epoch": 29, "update": 28.109, "loss": "3.235", "ntokens": "3171.92", "nsentences": "44.76", "prob_perplexity": "126.493", "code_perplexity": "123.642", "temp": "1.132", "loss_0": "3.1", "loss_1": "0.116", "loss_2": "0.019", "accuracy": "0.48138", "wps": "17823.8", "ups": "5.62", "wpb": "3171.9", "bsz": "44.8", "num_updates": "114000", "lr": "7.24051e-05", "gnorm": "0.725", "loss_scale": "2", "train_wall": "35", "gb_free": "13.6", "wall": "21463"} [2023-11-01 23:33:12,409][train_inner][INFO] - {"epoch": 29, "update": 28.158, "loss": "3.174", "ntokens": "3198.4", "nsentences": "47", "prob_perplexity": "126.64", "code_perplexity": "123.675", "temp": "1.13", "loss_0": "3.039", "loss_1": "0.116", "loss_2": "0.019", "accuracy": "0.49272", "wps": "17779.5", "ups": "5.56", "wpb": "3198.4", "bsz": "47", "num_updates": "114200", "lr": "7.23544e-05", "gnorm": "0.711", "loss_scale": "2", "train_wall": "35", "gb_free": "13", "wall": "21499"} [2023-11-01 23:33:48,468][train_inner][INFO] - {"epoch": 29, "update": 28.207, "loss": "3.188", "ntokens": "3166.88", "nsentences": "46.8", "prob_perplexity": "126.466", "code_perplexity": "123.595", "temp": "1.129", "loss_0": "3.053", "loss_1": "0.116", "loss_2": "0.019", "accuracy": "0.49265", "wps": "17566.3", "ups": "5.55", "wpb": "3166.9", "bsz": "46.8", "num_updates": "114400", "lr": "7.23038e-05", "gnorm": "0.723", "loss_scale": "2", "train_wall": "35", "gb_free": "13.7", "wall": "21535"} [2023-11-01 23:34:24,202][train_inner][INFO] - {"epoch": 29, "update": 28.257, "loss": "3.22", "ntokens": "3155.92", "nsentences": "43.32", "prob_perplexity": "126.794", "code_perplexity": "123.92", "temp": "1.128", "loss_0": "3.085", "loss_1": "0.116", "loss_2": "0.019", "accuracy": "0.48315", "wps": "17664.3", "ups": "5.6", "wpb": "3155.9", "bsz": "43.3", "num_updates": "114600", "lr": "7.22532e-05", "gnorm": "0.72", "loss_scale": "2", "train_wall": "35", "gb_free": "13.3", "wall": "21570"} [2023-11-01 23:35:00,235][train_inner][INFO] - {"epoch": 29, "update": 28.306, "loss": "3.239", "ntokens": "3172.44", "nsentences": "42.56", "prob_perplexity": "126.993", "code_perplexity": "124.001", "temp": "1.127", "loss_0": "3.104", "loss_1": "0.116", "loss_2": "0.019", "accuracy": "0.47886", "wps": "17609.6", "ups": "5.55", "wpb": "3172.4", "bsz": "42.6", "num_updates": "114800", "lr": "7.22025e-05", "gnorm": "0.725", "loss_scale": "2", "train_wall": "35", "gb_free": "13", "wall": "21606"} [2023-11-01 23:35:37,195][train_inner][INFO] - {"epoch": 29, "update": 28.355, "loss": "3.304", "ntokens": "3236.24", "nsentences": "41.64", "prob_perplexity": "127.041", "code_perplexity": "124.149", "temp": "1.126", "loss_0": "3.17", "loss_1": "0.116", "loss_2": "0.019", "accuracy": "0.46651", "wps": "17513.4", "ups": "5.41", "wpb": "3236.2", "bsz": "41.6", "num_updates": "115000", "lr": "7.21519e-05", "gnorm": "0.721", "loss_scale": "2", "train_wall": "36", "gb_free": "13.8", "wall": "21643"} [2023-11-01 23:36:13,713][train_inner][INFO] - {"epoch": 29, "update": 28.405, "loss": "3.245", "ntokens": "3186.32", "nsentences": "45.16", "prob_perplexity": "127.294", "code_perplexity": "124.291", "temp": "1.125", "loss_0": "3.11", "loss_1": "0.116", "loss_2": "0.019", "accuracy": "0.48064", "wps": "17452.1", "ups": "5.48", "wpb": "3186.3", "bsz": "45.2", "num_updates": "115200", "lr": "7.21013e-05", "gnorm": "0.777", "loss_scale": "2", "train_wall": "36", "gb_free": "13.4", "wall": "21680"} [2023-11-01 23:36:49,601][train_inner][INFO] - {"epoch": 29, "update": 28.454, "loss": "3.241", "ntokens": "3158.24", "nsentences": "45.68", "prob_perplexity": "127.474", "code_perplexity": "124.567", "temp": "1.124", "loss_0": "3.106", "loss_1": "0.116", "loss_2": "0.019", "accuracy": "0.48217", "wps": "17601.2", "ups": "5.57", "wpb": "3158.2", "bsz": "45.7", "num_updates": "115400", "lr": "7.20506e-05", "gnorm": "0.74", "loss_scale": "2", "train_wall": "35", "gb_free": "14.2", "wall": "21716"} [2023-11-01 23:37:25,484][train_inner][INFO] - {"epoch": 29, "update": 28.503, "loss": "3.269", "ntokens": "3171", "nsentences": "42.48", "prob_perplexity": "128.111", "code_perplexity": "125.273", "temp": "1.123", "loss_0": "3.134", "loss_1": "0.115", "loss_2": "0.019", "accuracy": "0.47454", "wps": "17675.3", "ups": "5.57", "wpb": "3171", "bsz": "42.5", "num_updates": "115600", "lr": "7.2e-05", "gnorm": "0.726", "loss_scale": "2", "train_wall": "35", "gb_free": "13.7", "wall": "21752"} [2023-11-01 23:38:01,570][train_inner][INFO] - {"epoch": 29, "update": 28.553, "loss": "3.281", "ntokens": "3200.8", "nsentences": "42.08", "prob_perplexity": "128.962", "code_perplexity": "126.026", "temp": "1.121", "loss_0": "3.147", "loss_1": "0.115", "loss_2": "0.019", "accuracy": "0.47087", "wps": "17740.8", "ups": "5.54", "wpb": "3200.8", "bsz": "42.1", "num_updates": "115800", "lr": "7.19494e-05", "gnorm": "0.722", "loss_scale": "2", "train_wall": "35", "gb_free": "13.7", "wall": "21788"} [2023-11-01 23:38:38,248][train_inner][INFO] - {"epoch": 29, "update": 28.602, "loss": "3.249", "ntokens": "3235.92", "nsentences": "43.24", "prob_perplexity": "129.08", "code_perplexity": "126.233", "temp": "1.12", "loss_0": "3.115", "loss_1": "0.115", "loss_2": "0.019", "accuracy": "0.47744", "wps": "17646.4", "ups": "5.45", "wpb": "3235.9", "bsz": "43.2", "num_updates": "116000", "lr": "7.18987e-05", "gnorm": "0.719", "loss_scale": "2", "train_wall": "36", "gb_free": "14.7", "wall": "21824"} [2023-11-01 23:39:14,683][train_inner][INFO] - {"epoch": 29, "update": 28.651, "loss": "3.152", "ntokens": "3182.64", "nsentences": "48.04", "prob_perplexity": "129.413", "code_perplexity": "126.542", "temp": "1.119", "loss_0": "3.018", "loss_1": "0.115", "loss_2": "0.019", "accuracy": "0.49786", "wps": "17471.1", "ups": "5.49", "wpb": "3182.6", "bsz": "48", "num_updates": "116200", "lr": "7.18481e-05", "gnorm": "0.724", "loss_scale": "2", "train_wall": "36", "gb_free": "13.1", "wall": "21861"} [2023-11-01 23:39:51,014][train_inner][INFO] - {"epoch": 29, "update": 28.7, "loss": "3.233", "ntokens": "3191.32", "nsentences": "45", "prob_perplexity": "129.186", "code_perplexity": "126.285", "temp": "1.118", "loss_0": "3.099", "loss_1": "0.115", "loss_2": "0.019", "accuracy": "0.48248", "wps": "17569.3", "ups": "5.51", "wpb": "3191.3", "bsz": "45", "num_updates": "116400", "lr": "7.17975e-05", "gnorm": "0.723", "loss_scale": "2", "train_wall": "36", "gb_free": "13.7", "wall": "21897"} [2023-11-01 23:40:27,039][train_inner][INFO] - {"epoch": 29, "update": 28.75, "loss": "3.205", "ntokens": "3198.56", "nsentences": "45.36", "prob_perplexity": "129.968", "code_perplexity": "127.044", "temp": "1.117", "loss_0": "3.071", "loss_1": "0.115", "loss_2": "0.019", "accuracy": "0.48728", "wps": "17758.6", "ups": "5.55", "wpb": "3198.6", "bsz": "45.4", "num_updates": "116600", "lr": "7.17468e-05", "gnorm": "0.721", "loss_scale": "2", "train_wall": "35", "gb_free": "14", "wall": "21933"} [2023-11-01 23:41:02,837][train_inner][INFO] - {"epoch": 29, "update": 28.799, "loss": "3.263", "ntokens": "3208.24", "nsentences": "44.12", "prob_perplexity": "129.946", "code_perplexity": "127.073", "temp": "1.116", "loss_0": "3.13", "loss_1": "0.115", "loss_2": "0.019", "accuracy": "0.47588", "wps": "17925.3", "ups": "5.59", "wpb": "3208.2", "bsz": "44.1", "num_updates": "116800", "lr": "7.16962e-05", "gnorm": "0.716", "loss_scale": "2", "train_wall": "35", "gb_free": "16", "wall": "21969"} [2023-11-01 23:41:38,778][train_inner][INFO] - {"epoch": 29, "update": 28.848, "loss": "3.285", "ntokens": "3202.16", "nsentences": "43", "prob_perplexity": "130.358", "code_perplexity": "127.385", "temp": "1.115", "loss_0": "3.151", "loss_1": "0.115", "loss_2": "0.019", "accuracy": "0.47188", "wps": "17820.2", "ups": "5.57", "wpb": "3202.2", "bsz": "43", "num_updates": "117000", "lr": "7.16456e-05", "gnorm": "0.727", "loss_scale": "4", "train_wall": "35", "gb_free": "13.5", "wall": "22005"} [2023-11-01 23:42:15,370][train_inner][INFO] - {"epoch": 29, "update": 28.898, "loss": "3.261", "ntokens": "3185.4", "nsentences": "43.44", "prob_perplexity": "130.739", "code_perplexity": "127.732", "temp": "1.114", "loss_0": "3.128", "loss_1": "0.115", "loss_2": "0.019", "accuracy": "0.47604", "wps": "17411.4", "ups": "5.47", "wpb": "3185.4", "bsz": "43.4", "num_updates": "117200", "lr": "7.15949e-05", "gnorm": "0.723", "loss_scale": "4", "train_wall": "36", "gb_free": "13.8", "wall": "22042"} [2023-11-01 23:42:51,898][train_inner][INFO] - {"epoch": 29, "update": 28.947, "loss": "3.232", "ntokens": "3184.92", "nsentences": "44.2", "prob_perplexity": "130.71", "code_perplexity": "127.819", "temp": "1.113", "loss_0": "3.098", "loss_1": "0.115", "loss_2": "0.018", "accuracy": "0.48121", "wps": "17439.4", "ups": "5.48", "wpb": "3184.9", "bsz": "44.2", "num_updates": "117400", "lr": "7.15443e-05", "gnorm": "0.726", "loss_scale": "4", "train_wall": "36", "gb_free": "14.7", "wall": "22078"} [2023-11-01 23:43:28,572][train_inner][INFO] - {"epoch": 29, "update": 28.996, "loss": "3.296", "ntokens": "3207.96", "nsentences": "42.16", "prob_perplexity": "130.658", "code_perplexity": "127.782", "temp": "1.111", "loss_0": "3.163", "loss_1": "0.115", "loss_2": "0.019", "accuracy": "0.46885", "wps": "17495.2", "ups": "5.45", "wpb": "3208", "bsz": "42.2", "num_updates": "117600", "lr": "7.14937e-05", "gnorm": "0.729", "loss_scale": "4", "train_wall": "36", "gb_free": "14.2", "wall": "22115"} [2023-11-01 23:43:31,353][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-01 23:43:31,355][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 23:43:31,371][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 34 [2023-11-01 23:43:56,835][valid][INFO] - {"epoch": 29, "valid_loss": "3.035", "valid_ntokens": "3162.29", "valid_nsentences": "44.1685", "valid_prob_perplexity": "129.251", "valid_code_perplexity": "126.786", "valid_temp": "1.111", "valid_loss_0": "2.902", "valid_loss_1": "0.115", "valid_loss_2": "0.018", "valid_accuracy": "0.52043", "valid_wps": "56266.1", "valid_wpb": "3162.3", "valid_bsz": "44.2", "valid_num_updates": "117615", "valid_best_loss": "3.035"} [2023-11-01 23:43:56,837][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 29 @ 117615 updates [2023-11-01 23:43:56,839][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-01 23:43:58,257][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-01 23:43:59,200][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_best.pt (epoch 29 @ 117615 updates, score 3.035) (writing took 2.3628720361739397 seconds) [2023-11-01 23:43:59,201][fairseq_cli.train][INFO] - end of epoch 29 (average epoch stats below) [2023-11-01 23:43:59,203][train][INFO] - {"epoch": 29, "train_loss": "3.24", "train_ntokens": "3190.02", "train_nsentences": "44.2682", "train_prob_perplexity": "128.401", "train_code_perplexity": "125.491", "train_temp": "1.122", "train_loss_0": "3.105", "train_loss_1": "0.115", "train_loss_2": "0.019", "train_accuracy": "0.48033", "train_wps": "16982.3", "train_ups": "5.32", "train_wpb": "3190", "train_bsz": "44.3", "train_num_updates": "117615", "train_lr": "7.14899e-05", "train_gnorm": "0.726", "train_loss_scale": "4", "train_train_wall": "721", "train_gb_free": "12.8", "train_wall": "22145"} [2023-11-01 23:43:59,205][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 23:43:59,227][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 30 [2023-11-01 23:43:59,414][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-01 23:43:59,448][fairseq.trainer][INFO] - begin training epoch 30 [2023-11-01 23:43:59,449][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-01 23:44:32,726][train_inner][INFO] - {"epoch": 30, "update": 29.046, "loss": "3.257", "ntokens": "3199.72", "nsentences": "42.96", "prob_perplexity": "131.538", "code_perplexity": "128.545", "temp": "1.11", "loss_0": "3.124", "loss_1": "0.115", "loss_2": "0.019", "accuracy": "0.47565", "wps": "9975.5", "ups": "3.12", "wpb": "3199.7", "bsz": "43", "num_updates": "117800", "lr": "7.1443e-05", "gnorm": "0.721", "loss_scale": "4", "train_wall": "35", "gb_free": "13.4", "wall": "22179"} [2023-11-01 23:45:08,748][train_inner][INFO] - {"epoch": 30, "update": 29.095, "loss": "3.197", "ntokens": "3169.08", "nsentences": "44.68", "prob_perplexity": "131.997", "code_perplexity": "129.039", "temp": "1.109", "loss_0": "3.064", "loss_1": "0.114", "loss_2": "0.018", "accuracy": "0.48638", "wps": "17596.1", "ups": "5.55", "wpb": "3169.1", "bsz": "44.7", "num_updates": "118000", "lr": "7.13924e-05", "gnorm": "0.727", "loss_scale": "4", "train_wall": "35", "gb_free": "13.2", "wall": "22215"} [2023-11-01 23:45:44,937][train_inner][INFO] - {"epoch": 30, "update": 29.144, "loss": "3.266", "ntokens": "3244.44", "nsentences": "42.92", "prob_perplexity": "132.049", "code_perplexity": "129.066", "temp": "1.108", "loss_0": "3.133", "loss_1": "0.114", "loss_2": "0.019", "accuracy": "0.47373", "wps": "17931.7", "ups": "5.53", "wpb": "3244.4", "bsz": "42.9", "num_updates": "118200", "lr": "7.13418e-05", "gnorm": "0.72", "loss_scale": "4", "train_wall": "36", "gb_free": "14.8", "wall": "22251"} [2023-11-01 23:46:21,014][train_inner][INFO] - {"epoch": 30, "update": 29.194, "loss": "3.223", "ntokens": "3168.32", "nsentences": "43.52", "prob_perplexity": "131.822", "code_perplexity": "128.758", "temp": "1.107", "loss_0": "3.09", "loss_1": "0.115", "loss_2": "0.019", "accuracy": "0.48212", "wps": "17565.2", "ups": "5.54", "wpb": "3168.3", "bsz": "43.5", "num_updates": "118400", "lr": "7.12911e-05", "gnorm": "0.735", "loss_scale": "4", "train_wall": "35", "gb_free": "15.3", "wall": "22287"} [2023-11-01 23:46:36,050][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 [2023-11-01 23:46:57,120][train_inner][INFO] - {"epoch": 30, "update": 29.243, "loss": "3.221", "ntokens": "3206.4", "nsentences": "45", "prob_perplexity": "132.39", "code_perplexity": "129.426", "temp": "1.106", "loss_0": "3.088", "loss_1": "0.114", "loss_2": "0.019", "accuracy": "0.48294", "wps": "17762", "ups": "5.54", "wpb": "3206.4", "bsz": "45", "num_updates": "118600", "lr": "7.12405e-05", "gnorm": "0.77", "loss_scale": "2", "train_wall": "35", "gb_free": "12.5", "wall": "22323"} [2023-11-01 23:47:33,105][train_inner][INFO] - {"epoch": 30, "update": 29.292, "loss": "3.257", "ntokens": "3186.84", "nsentences": "41.8", "prob_perplexity": "131.988", "code_perplexity": "129.081", "temp": "1.105", "loss_0": "3.124", "loss_1": "0.114", "loss_2": "0.019", "accuracy": "0.47442", "wps": "17713.2", "ups": "5.56", "wpb": "3186.8", "bsz": "41.8", "num_updates": "118800", "lr": "7.11899e-05", "gnorm": "0.738", "loss_scale": "2", "train_wall": "35", "gb_free": "14.5", "wall": "22359"} [2023-11-01 23:48:09,828][train_inner][INFO] - {"epoch": 30, "update": 29.342, "loss": "3.187", "ntokens": "3190.96", "nsentences": "46.32", "prob_perplexity": "132.394", "code_perplexity": "129.451", "temp": "1.104", "loss_0": "3.054", "loss_1": "0.114", "loss_2": "0.018", "accuracy": "0.49041", "wps": "17379.6", "ups": "5.45", "wpb": "3191", "bsz": "46.3", "num_updates": "119000", "lr": "7.11392e-05", "gnorm": "0.727", "loss_scale": "2", "train_wall": "36", "gb_free": "13.3", "wall": "22396"} [2023-11-01 23:48:46,439][train_inner][INFO] - {"epoch": 30, "update": 29.391, "loss": "3.224", "ntokens": "3184.2", "nsentences": "45.48", "prob_perplexity": "132.742", "code_perplexity": "129.854", "temp": "1.103", "loss_0": "3.091", "loss_1": "0.114", "loss_2": "0.019", "accuracy": "0.48382", "wps": "17395.9", "ups": "5.46", "wpb": "3184.2", "bsz": "45.5", "num_updates": "119200", "lr": "7.10886e-05", "gnorm": "0.817", "loss_scale": "2", "train_wall": "36", "gb_free": "15.1", "wall": "22433"} [2023-11-01 23:49:22,579][train_inner][INFO] - {"epoch": 30, "update": 29.44, "loss": "3.226", "ntokens": "3209.48", "nsentences": "45.28", "prob_perplexity": "133.436", "code_perplexity": "130.531", "temp": "1.101", "loss_0": "3.093", "loss_1": "0.114", "loss_2": "0.018", "accuracy": "0.48255", "wps": "17762.5", "ups": "5.53", "wpb": "3209.5", "bsz": "45.3", "num_updates": "119400", "lr": "7.1038e-05", "gnorm": "0.726", "loss_scale": "2", "train_wall": "35", "gb_free": "13.6", "wall": "22469"} [2023-11-01 23:49:58,814][train_inner][INFO] - {"epoch": 30, "update": 29.49, "loss": "3.171", "ntokens": "3123.36", "nsentences": "45.16", "prob_perplexity": "133.09", "code_perplexity": "130.162", "temp": "1.1", "loss_0": "3.038", "loss_1": "0.114", "loss_2": "0.018", "accuracy": "0.49253", "wps": "17240.3", "ups": "5.52", "wpb": "3123.4", "bsz": "45.2", "num_updates": "119600", "lr": "7.09873e-05", "gnorm": "0.761", "loss_scale": "2", "train_wall": "36", "gb_free": "13.9", "wall": "22505"} [2023-11-01 23:50:35,499][train_inner][INFO] - {"epoch": 30, "update": 29.539, "loss": "3.251", "ntokens": "3175", "nsentences": "44.04", "prob_perplexity": "133.249", "code_perplexity": "130.326", "temp": "1.099", "loss_0": "3.118", "loss_1": "0.114", "loss_2": "0.018", "accuracy": "0.47784", "wps": "17310.3", "ups": "5.45", "wpb": "3175", "bsz": "44", "num_updates": "119800", "lr": "7.09367e-05", "gnorm": "0.736", "loss_scale": "2", "train_wall": "36", "gb_free": "12.7", "wall": "22542"} [2023-11-01 23:51:11,487][train_inner][INFO] - {"epoch": 30, "update": 29.588, "loss": "3.22", "ntokens": "3180.6", "nsentences": "44.4", "prob_perplexity": "134.481", "code_perplexity": "131.579", "temp": "1.098", "loss_0": "3.088", "loss_1": "0.114", "loss_2": "0.018", "accuracy": "0.4837", "wps": "17677.3", "ups": "5.56", "wpb": "3180.6", "bsz": "44.4", "num_updates": "120000", "lr": "7.08861e-05", "gnorm": "0.732", "loss_scale": "2", "train_wall": "35", "gb_free": "12.8", "wall": "22578"} [2023-11-01 23:51:47,321][train_inner][INFO] - {"epoch": 30, "update": 29.638, "loss": "3.195", "ntokens": "3149.68", "nsentences": "45.48", "prob_perplexity": "133.618", "code_perplexity": "130.642", "temp": "1.097", "loss_0": "3.062", "loss_1": "0.114", "loss_2": "0.018", "accuracy": "0.48829", "wps": "17579.8", "ups": "5.58", "wpb": "3149.7", "bsz": "45.5", "num_updates": "120200", "lr": "7.08354e-05", "gnorm": "0.741", "loss_scale": "2", "train_wall": "35", "gb_free": "15.5", "wall": "22614"} [2023-11-01 23:52:24,217][train_inner][INFO] - {"epoch": 30, "update": 29.687, "loss": "3.231", "ntokens": "3251.96", "nsentences": "43.96", "prob_perplexity": "133.913", "code_perplexity": "131.107", "temp": "1.096", "loss_0": "3.099", "loss_1": "0.114", "loss_2": "0.018", "accuracy": "0.48026", "wps": "17628.7", "ups": "5.42", "wpb": "3252", "bsz": "44", "num_updates": "120400", "lr": "7.07848e-05", "gnorm": "0.718", "loss_scale": "2", "train_wall": "36", "gb_free": "13", "wall": "22650"} [2023-11-01 23:53:00,552][train_inner][INFO] - {"epoch": 30, "update": 29.736, "loss": "3.219", "ntokens": "3198.76", "nsentences": "43.16", "prob_perplexity": "134.199", "code_perplexity": "131.359", "temp": "1.095", "loss_0": "3.087", "loss_1": "0.114", "loss_2": "0.018", "accuracy": "0.48185", "wps": "17608.2", "ups": "5.5", "wpb": "3198.8", "bsz": "43.2", "num_updates": "120600", "lr": "7.07342e-05", "gnorm": "0.729", "loss_scale": "2", "train_wall": "36", "gb_free": "14.2", "wall": "22687"} [2023-11-01 23:53:36,469][train_inner][INFO] - {"epoch": 30, "update": 29.786, "loss": "3.178", "ntokens": "3172.84", "nsentences": "44.24", "prob_perplexity": "134.373", "code_perplexity": "131.499", "temp": "1.094", "loss_0": "3.047", "loss_1": "0.114", "loss_2": "0.018", "accuracy": "0.48968", "wps": "17668.9", "ups": "5.57", "wpb": "3172.8", "bsz": "44.2", "num_updates": "120800", "lr": "7.06835e-05", "gnorm": "0.729", "loss_scale": "2", "train_wall": "35", "gb_free": "13.1", "wall": "22723"} [2023-11-01 23:54:12,860][train_inner][INFO] - {"epoch": 30, "update": 29.835, "loss": "3.216", "ntokens": "3173.68", "nsentences": "43.56", "prob_perplexity": "134.456", "code_perplexity": "131.556", "temp": "1.093", "loss_0": "3.084", "loss_1": "0.114", "loss_2": "0.018", "accuracy": "0.48308", "wps": "17442.9", "ups": "5.5", "wpb": "3173.7", "bsz": "43.6", "num_updates": "121000", "lr": "7.06329e-05", "gnorm": "0.738", "loss_scale": "2", "train_wall": "36", "gb_free": "13", "wall": "22759"} [2023-11-01 23:54:49,217][train_inner][INFO] - {"epoch": 30, "update": 29.884, "loss": "3.22", "ntokens": "3174.96", "nsentences": "44.36", "prob_perplexity": "134.88", "code_perplexity": "131.972", "temp": "1.092", "loss_0": "3.088", "loss_1": "0.114", "loss_2": "0.018", "accuracy": "0.48303", "wps": "17466.5", "ups": "5.5", "wpb": "3175", "bsz": "44.4", "num_updates": "121200", "lr": "7.05823e-05", "gnorm": "0.726", "loss_scale": "2", "train_wall": "36", "gb_free": "12.8", "wall": "22795"} [2023-11-01 23:55:25,044][train_inner][INFO] - {"epoch": 30, "update": 29.933, "loss": "3.178", "ntokens": "3157.88", "nsentences": "44.52", "prob_perplexity": "135.747", "code_perplexity": "132.831", "temp": "1.091", "loss_0": "3.047", "loss_1": "0.114", "loss_2": "0.018", "accuracy": "0.48897", "wps": "17629.4", "ups": "5.58", "wpb": "3157.9", "bsz": "44.5", "num_updates": "121400", "lr": "7.05316e-05", "gnorm": "0.732", "loss_scale": "2", "train_wall": "35", "gb_free": "13.2", "wall": "22831"} [2023-11-01 23:56:01,160][train_inner][INFO] - {"epoch": 30, "update": 29.983, "loss": "3.261", "ntokens": "3207.44", "nsentences": "43.6", "prob_perplexity": "135.897", "code_perplexity": "133.003", "temp": "1.089", "loss_0": "3.129", "loss_1": "0.114", "loss_2": "0.018", "accuracy": "0.4768", "wps": "17762.9", "ups": "5.54", "wpb": "3207.4", "bsz": "43.6", "num_updates": "121600", "lr": "7.0481e-05", "gnorm": "0.726", "loss_scale": "2", "train_wall": "35", "gb_free": "13.3", "wall": "22867"} [2023-11-01 23:56:13,961][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-01 23:56:13,962][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 23:56:13,979][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 35 [2023-11-01 23:56:39,445][valid][INFO] - {"epoch": 30, "valid_loss": "3.041", "valid_ntokens": "3170.31", "valid_nsentences": "44.1685", "valid_prob_perplexity": "136.902", "valid_code_perplexity": "134.132", "valid_temp": "1.089", "valid_loss_0": "2.909", "valid_loss_1": "0.113", "valid_loss_2": "0.019", "valid_accuracy": "0.51873", "valid_wps": "56392.6", "valid_wpb": "3170.3", "valid_bsz": "44.2", "valid_num_updates": "121670", "valid_best_loss": "3.035"} [2023-11-01 23:56:39,447][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 30 @ 121670 updates [2023-11-01 23:56:39,449][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-01 23:56:40,879][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-01 23:56:40,929][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_last.pt (epoch 30 @ 121670 updates, score 3.041) (writing took 1.4821034292690456 seconds) [2023-11-01 23:56:40,930][fairseq_cli.train][INFO] - end of epoch 30 (average epoch stats below) [2023-11-01 23:56:40,932][train][INFO] - {"epoch": 30, "train_loss": "3.219", "train_ntokens": "3187.16", "train_nsentences": "44.2772", "train_prob_perplexity": "133.453", "train_code_perplexity": "130.529", "train_temp": "1.1", "train_loss_0": "3.086", "train_loss_1": "0.114", "train_loss_2": "0.018", "train_accuracy": "0.48315", "train_wps": "16966.6", "train_ups": "5.32", "train_wpb": "3187.2", "train_bsz": "44.3", "train_num_updates": "121670", "train_lr": "7.04633e-05", "train_gnorm": "0.737", "train_loss_scale": "2", "train_train_wall": "722", "train_gb_free": "12", "train_wall": "22907"} [2023-11-01 23:56:40,934][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-01 23:56:40,954][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 31 [2023-11-01 23:56:41,156][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-01 23:56:41,188][fairseq.trainer][INFO] - begin training epoch 31 [2023-11-01 23:56:41,189][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-01 23:57:04,438][train_inner][INFO] - {"epoch": 31, "update": 30.032, "loss": "3.219", "ntokens": "3206.96", "nsentences": "44.48", "prob_perplexity": "135.833", "code_perplexity": "132.881", "temp": "1.088", "loss_0": "3.087", "loss_1": "0.114", "loss_2": "0.018", "accuracy": "0.48439", "wps": "10136.6", "ups": "3.16", "wpb": "3207", "bsz": "44.5", "num_updates": "121800", "lr": "7.04304e-05", "gnorm": "0.726", "loss_scale": "2", "train_wall": "35", "gb_free": "13.8", "wall": "22931"} [2023-11-01 23:57:40,322][train_inner][INFO] - {"epoch": 31, "update": 30.081, "loss": "3.151", "ntokens": "3170.76", "nsentences": "46.44", "prob_perplexity": "135.629", "code_perplexity": "132.772", "temp": "1.087", "loss_0": "3.02", "loss_1": "0.114", "loss_2": "0.018", "accuracy": "0.49591", "wps": "17673.1", "ups": "5.57", "wpb": "3170.8", "bsz": "46.4", "num_updates": "122000", "lr": "7.03797e-05", "gnorm": "0.734", "loss_scale": "2", "train_wall": "35", "gb_free": "13.5", "wall": "22967"} [2023-11-01 23:58:15,724][train_inner][INFO] - {"epoch": 31, "update": 30.131, "loss": "3.169", "ntokens": "3168.88", "nsentences": "46.08", "prob_perplexity": "136.253", "code_perplexity": "133.282", "temp": "1.086", "loss_0": "3.037", "loss_1": "0.114", "loss_2": "0.018", "accuracy": "0.49318", "wps": "17903.8", "ups": "5.65", "wpb": "3168.9", "bsz": "46.1", "num_updates": "122200", "lr": "7.03291e-05", "gnorm": "0.734", "loss_scale": "2", "train_wall": "35", "gb_free": "13.3", "wall": "23002"} [2023-11-01 23:58:51,395][train_inner][INFO] - {"epoch": 31, "update": 30.18, "loss": "3.175", "ntokens": "3187.48", "nsentences": "43.48", "prob_perplexity": "137.261", "code_perplexity": "134.306", "temp": "1.085", "loss_0": "3.044", "loss_1": "0.113", "loss_2": "0.018", "accuracy": "0.48847", "wps": "17872.4", "ups": "5.61", "wpb": "3187.5", "bsz": "43.5", "num_updates": "122400", "lr": "7.02785e-05", "gnorm": "0.722", "loss_scale": "2", "train_wall": "35", "gb_free": "14.1", "wall": "23038"} [2023-11-01 23:59:27,089][train_inner][INFO] - {"epoch": 31, "update": 30.229, "loss": "3.219", "ntokens": "3200.48", "nsentences": "42.72", "prob_perplexity": "136.539", "code_perplexity": "133.587", "temp": "1.084", "loss_0": "3.088", "loss_1": "0.113", "loss_2": "0.018", "accuracy": "0.48102", "wps": "17941.7", "ups": "5.61", "wpb": "3200.5", "bsz": "42.7", "num_updates": "122600", "lr": "7.02278e-05", "gnorm": "0.732", "loss_scale": "2", "train_wall": "35", "gb_free": "13.8", "wall": "23073"} [2023-11-02 00:00:03,257][train_inner][INFO] - {"epoch": 31, "update": 30.279, "loss": "3.227", "ntokens": "3226.88", "nsentences": "44.36", "prob_perplexity": "136.988", "code_perplexity": "134.059", "temp": "1.083", "loss_0": "3.096", "loss_1": "0.113", "loss_2": "0.018", "accuracy": "0.48186", "wps": "17844.6", "ups": "5.53", "wpb": "3226.9", "bsz": "44.4", "num_updates": "122800", "lr": "7.01772e-05", "gnorm": "0.728", "loss_scale": "2", "train_wall": "36", "gb_free": "15.5", "wall": "23109"} [2023-11-02 00:00:39,402][train_inner][INFO] - {"epoch": 31, "update": 30.328, "loss": "3.216", "ntokens": "3208.96", "nsentences": "44.48", "prob_perplexity": "136.943", "code_perplexity": "134.04", "temp": "1.082", "loss_0": "3.085", "loss_1": "0.113", "loss_2": "0.018", "accuracy": "0.4831", "wps": "17757.4", "ups": "5.53", "wpb": "3209", "bsz": "44.5", "num_updates": "123000", "lr": "7.01266e-05", "gnorm": "0.746", "loss_scale": "2", "train_wall": "36", "gb_free": "13.4", "wall": "23146"} [2023-11-02 00:01:15,613][train_inner][INFO] - {"epoch": 31, "update": 30.377, "loss": "3.212", "ntokens": "3175.8", "nsentences": "43.28", "prob_perplexity": "137.791", "code_perplexity": "134.871", "temp": "1.081", "loss_0": "3.081", "loss_1": "0.113", "loss_2": "0.018", "accuracy": "0.48293", "wps": "17541.4", "ups": "5.52", "wpb": "3175.8", "bsz": "43.3", "num_updates": "123200", "lr": "7.00759e-05", "gnorm": "0.732", "loss_scale": "2", "train_wall": "36", "gb_free": "13.7", "wall": "23182"} [2023-11-02 00:01:51,230][train_inner][INFO] - {"epoch": 31, "update": 30.427, "loss": "3.166", "ntokens": "3126.6", "nsentences": "43.84", "prob_perplexity": "138.045", "code_perplexity": "135.198", "temp": "1.08", "loss_0": "3.035", "loss_1": "0.113", "loss_2": "0.017", "accuracy": "0.49179", "wps": "17558.2", "ups": "5.62", "wpb": "3126.6", "bsz": "43.8", "num_updates": "123400", "lr": "7.00253e-05", "gnorm": "0.734", "loss_scale": "2", "train_wall": "35", "gb_free": "12.6", "wall": "23217"} [2023-11-02 00:02:27,787][train_inner][INFO] - {"epoch": 31, "update": 30.476, "loss": "3.164", "ntokens": "3185.8", "nsentences": "46.44", "prob_perplexity": "138.452", "code_perplexity": "135.524", "temp": "1.079", "loss_0": "3.033", "loss_1": "0.113", "loss_2": "0.018", "accuracy": "0.49354", "wps": "17429.9", "ups": "5.47", "wpb": "3185.8", "bsz": "46.4", "num_updates": "123600", "lr": "6.99747e-05", "gnorm": "0.731", "loss_scale": "2", "train_wall": "36", "gb_free": "13.8", "wall": "23254"} [2023-11-02 00:03:04,268][train_inner][INFO] - {"epoch": 31, "update": 30.525, "loss": "3.245", "ntokens": "3184.64", "nsentences": "43.4", "prob_perplexity": "138.2", "code_perplexity": "135.359", "temp": "1.078", "loss_0": "3.115", "loss_1": "0.113", "loss_2": "0.018", "accuracy": "0.47797", "wps": "17460.6", "ups": "5.48", "wpb": "3184.6", "bsz": "43.4", "num_updates": "123800", "lr": "6.99241e-05", "gnorm": "0.737", "loss_scale": "2", "train_wall": "36", "gb_free": "13.5", "wall": "23290"} [2023-11-02 00:03:40,781][train_inner][INFO] - {"epoch": 31, "update": 30.574, "loss": "3.223", "ntokens": "3218.92", "nsentences": "43.04", "prob_perplexity": "138.457", "code_perplexity": "135.461", "temp": "1.076", "loss_0": "3.092", "loss_1": "0.113", "loss_2": "0.018", "accuracy": "0.48092", "wps": "17632.3", "ups": "5.48", "wpb": "3218.9", "bsz": "43", "num_updates": "124000", "lr": "6.98734e-05", "gnorm": "0.731", "loss_scale": "2", "train_wall": "36", "gb_free": "13.9", "wall": "23327"} [2023-11-02 00:04:17,403][train_inner][INFO] - {"epoch": 31, "update": 30.624, "loss": "3.192", "ntokens": "3156.24", "nsentences": "43.48", "prob_perplexity": "138.511", "code_perplexity": "135.652", "temp": "1.075", "loss_0": "3.061", "loss_1": "0.113", "loss_2": "0.017", "accuracy": "0.48632", "wps": "17237.8", "ups": "5.46", "wpb": "3156.2", "bsz": "43.5", "num_updates": "124200", "lr": "6.98228e-05", "gnorm": "0.732", "loss_scale": "2", "train_wall": "36", "gb_free": "14.1", "wall": "23364"} [2023-11-02 00:04:53,594][train_inner][INFO] - {"epoch": 31, "update": 30.673, "loss": "3.183", "ntokens": "3201.32", "nsentences": "45.8", "prob_perplexity": "138.823", "code_perplexity": "135.822", "temp": "1.074", "loss_0": "3.052", "loss_1": "0.113", "loss_2": "0.017", "accuracy": "0.49054", "wps": "17692.2", "ups": "5.53", "wpb": "3201.3", "bsz": "45.8", "num_updates": "124400", "lr": "6.97722e-05", "gnorm": "0.731", "loss_scale": "2", "train_wall": "36", "gb_free": "13.8", "wall": "23400"} [2023-11-02 00:05:29,353][train_inner][INFO] - {"epoch": 31, "update": 30.722, "loss": "3.225", "ntokens": "3163", "nsentences": "43.32", "prob_perplexity": "138.526", "code_perplexity": "135.607", "temp": "1.073", "loss_0": "3.094", "loss_1": "0.113", "loss_2": "0.018", "accuracy": "0.4808", "wps": "17692", "ups": "5.59", "wpb": "3163", "bsz": "43.3", "num_updates": "124600", "lr": "6.97215e-05", "gnorm": "0.741", "loss_scale": "2", "train_wall": "35", "gb_free": "16.2", "wall": "23436"} [2023-11-02 00:06:04,738][train_inner][INFO] - {"epoch": 31, "update": 30.772, "loss": "3.166", "ntokens": "3154.32", "nsentences": "44.72", "prob_perplexity": "139.854", "code_perplexity": "136.9", "temp": "1.072", "loss_0": "3.036", "loss_1": "0.113", "loss_2": "0.017", "accuracy": "0.49246", "wps": "17829.5", "ups": "5.65", "wpb": "3154.3", "bsz": "44.7", "num_updates": "124800", "lr": "6.96709e-05", "gnorm": "0.735", "loss_scale": "2", "train_wall": "35", "gb_free": "13.6", "wall": "23471"} [2023-11-02 00:06:40,700][train_inner][INFO] - {"epoch": 31, "update": 30.821, "loss": "3.219", "ntokens": "3201.24", "nsentences": "43.64", "prob_perplexity": "140.12", "code_perplexity": "137.201", "temp": "1.071", "loss_0": "3.089", "loss_1": "0.113", "loss_2": "0.017", "accuracy": "0.48137", "wps": "17804.3", "ups": "5.56", "wpb": "3201.2", "bsz": "43.6", "num_updates": "125000", "lr": "6.96203e-05", "gnorm": "0.732", "loss_scale": "2", "train_wall": "35", "gb_free": "14.4", "wall": "23507"} [2023-11-02 00:06:40,702][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 00:06:40,703][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 00:06:40,722][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 36 [2023-11-02 00:07:06,370][valid][INFO] - {"epoch": 31, "valid_loss": "3.022", "valid_ntokens": "3157.76", "valid_nsentences": "44.1685", "valid_prob_perplexity": "138.573", "valid_code_perplexity": "136.156", "valid_temp": "1.071", "valid_loss_0": "2.893", "valid_loss_1": "0.113", "valid_loss_2": "0.016", "valid_accuracy": "0.522", "valid_wps": "55849.4", "valid_wpb": "3157.8", "valid_bsz": "44.2", "valid_num_updates": "125000", "valid_best_loss": "3.022"} [2023-11-02 00:07:06,372][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 31 @ 125000 updates [2023-11-02 00:07:06,374][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_31_125000.pt [2023-11-02 00:07:07,792][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_31_125000.pt [2023-11-02 00:07:09,709][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_31_125000.pt (epoch 31 @ 125000 updates, score 3.022) (writing took 3.3369545750319958 seconds) [2023-11-02 00:07:46,030][train_inner][INFO] - {"epoch": 31, "update": 30.87, "loss": "3.189", "ntokens": "3196.44", "nsentences": "44.12", "prob_perplexity": "139.514", "code_perplexity": "136.639", "temp": "1.07", "loss_0": "3.059", "loss_1": "0.113", "loss_2": "0.017", "accuracy": "0.48754", "wps": "9785.9", "ups": "3.06", "wpb": "3196.4", "bsz": "44.1", "num_updates": "125200", "lr": "6.95696e-05", "gnorm": "0.73", "loss_scale": "2", "train_wall": "36", "gb_free": "14.2", "wall": "23572"} [2023-11-02 00:08:23,271][train_inner][INFO] - {"epoch": 31, "update": 30.92, "loss": "3.195", "ntokens": "3225.48", "nsentences": "45.36", "prob_perplexity": "139.828", "code_perplexity": "136.94", "temp": "1.069", "loss_0": "3.065", "loss_1": "0.113", "loss_2": "0.017", "accuracy": "0.48716", "wps": "17323.1", "ups": "5.37", "wpb": "3225.5", "bsz": "45.4", "num_updates": "125400", "lr": "6.9519e-05", "gnorm": "0.741", "loss_scale": "2", "train_wall": "37", "gb_free": "13.8", "wall": "23610"} [2023-11-02 00:08:59,569][train_inner][INFO] - {"epoch": 31, "update": 30.969, "loss": "3.189", "ntokens": "3206.72", "nsentences": "44.72", "prob_perplexity": "140.105", "code_perplexity": "137.192", "temp": "1.068", "loss_0": "3.059", "loss_1": "0.113", "loss_2": "0.017", "accuracy": "0.4881", "wps": "17669.6", "ups": "5.51", "wpb": "3206.7", "bsz": "44.7", "num_updates": "125600", "lr": "6.94684e-05", "gnorm": "0.73", "loss_scale": "2", "train_wall": "36", "gb_free": "13.8", "wall": "23646"} [2023-11-02 00:09:22,482][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 00:09:22,483][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 00:09:22,503][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 37 [2023-11-02 00:09:48,205][valid][INFO] - {"epoch": 31, "valid_loss": "3.011", "valid_ntokens": "3176.73", "valid_nsentences": "44.1685", "valid_prob_perplexity": "139.429", "valid_code_perplexity": "136.905", "valid_temp": "1.067", "valid_loss_0": "2.881", "valid_loss_1": "0.113", "valid_loss_2": "0.017", "valid_accuracy": "0.52286", "valid_wps": "56006.2", "valid_wpb": "3176.7", "valid_bsz": "44.2", "valid_num_updates": "125726", "valid_best_loss": "3.011"} [2023-11-02 00:09:48,207][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 31 @ 125726 updates [2023-11-02 00:09:48,209][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-02 00:09:49,651][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-02 00:09:50,616][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_best.pt (epoch 31 @ 125726 updates, score 3.011) (writing took 2.40913813887164 seconds) [2023-11-02 00:09:50,617][fairseq_cli.train][INFO] - end of epoch 31 (average epoch stats below) [2023-11-02 00:09:50,619][train][INFO] - {"epoch": 31, "train_loss": "3.197", "train_ntokens": "3186.37", "train_nsentences": "44.2682", "train_prob_perplexity": "138.157", "train_code_perplexity": "135.24", "train_temp": "1.078", "train_loss_0": "3.067", "train_loss_1": "0.113", "train_loss_2": "0.018", "train_accuracy": "0.48631", "train_wps": "16365.9", "train_ups": "5.14", "train_wpb": "3186.4", "train_bsz": "44.3", "train_num_updates": "125726", "train_lr": "6.94365e-05", "train_gnorm": "0.734", "train_loss_scale": "2", "train_train_wall": "719", "train_gb_free": "13.5", "train_wall": "23697"} [2023-11-02 00:09:50,622][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 00:09:50,653][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 32 [2023-11-02 00:09:50,877][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 00:09:50,910][fairseq.trainer][INFO] - begin training epoch 32 [2023-11-02 00:09:50,911][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 00:10:04,484][train_inner][INFO] - {"epoch": 32, "update": 31.018, "loss": "3.156", "ntokens": "3168.84", "nsentences": "45.28", "prob_perplexity": "139.723", "code_perplexity": "136.827", "temp": "1.067", "loss_0": "3.027", "loss_1": "0.113", "loss_2": "0.017", "accuracy": "0.49356", "wps": "9763.7", "ups": "3.08", "wpb": "3168.8", "bsz": "45.3", "num_updates": "125800", "lr": "6.94177e-05", "gnorm": "0.733", "loss_scale": "2", "train_wall": "36", "gb_free": "14.3", "wall": "23711"} [2023-11-02 00:10:40,350][train_inner][INFO] - {"epoch": 32, "update": 31.068, "loss": "3.218", "ntokens": "3184.6", "nsentences": "44.4", "prob_perplexity": "140.728", "code_perplexity": "137.78", "temp": "1.066", "loss_0": "3.089", "loss_1": "0.113", "loss_2": "0.017", "accuracy": "0.48356", "wps": "17759.9", "ups": "5.58", "wpb": "3184.6", "bsz": "44.4", "num_updates": "126000", "lr": "6.93671e-05", "gnorm": "0.735", "loss_scale": "2", "train_wall": "35", "gb_free": "13.7", "wall": "23747"} [2023-11-02 00:11:16,128][train_inner][INFO] - {"epoch": 32, "update": 31.117, "loss": "3.159", "ntokens": "3180.72", "nsentences": "45.52", "prob_perplexity": "141.559", "code_perplexity": "138.638", "temp": "1.065", "loss_0": "3.029", "loss_1": "0.112", "loss_2": "0.017", "accuracy": "0.49321", "wps": "17781.4", "ups": "5.59", "wpb": "3180.7", "bsz": "45.5", "num_updates": "126200", "lr": "6.93165e-05", "gnorm": "0.731", "loss_scale": "2", "train_wall": "35", "gb_free": "13.2", "wall": "23782"} [2023-11-02 00:11:51,780][train_inner][INFO] - {"epoch": 32, "update": 31.166, "loss": "3.142", "ntokens": "3158.56", "nsentences": "45.52", "prob_perplexity": "140.86", "code_perplexity": "138.014", "temp": "1.064", "loss_0": "3.013", "loss_1": "0.113", "loss_2": "0.017", "accuracy": "0.49724", "wps": "17720.3", "ups": "5.61", "wpb": "3158.6", "bsz": "45.5", "num_updates": "126400", "lr": "6.92658e-05", "gnorm": "0.734", "loss_scale": "2", "train_wall": "35", "gb_free": "16", "wall": "23818"} [2023-11-02 00:12:27,321][train_inner][INFO] - {"epoch": 32, "update": 31.215, "loss": "3.222", "ntokens": "3189.8", "nsentences": "43.6", "prob_perplexity": "141.234", "code_perplexity": "138.345", "temp": "1.063", "loss_0": "3.092", "loss_1": "0.112", "loss_2": "0.017", "accuracy": "0.48204", "wps": "17951", "ups": "5.63", "wpb": "3189.8", "bsz": "43.6", "num_updates": "126600", "lr": "6.92152e-05", "gnorm": "0.729", "loss_scale": "2", "train_wall": "35", "gb_free": "13.2", "wall": "23854"} [2023-11-02 00:13:03,162][train_inner][INFO] - {"epoch": 32, "update": 31.265, "loss": "3.175", "ntokens": "3204.56", "nsentences": "45.36", "prob_perplexity": "141.114", "code_perplexity": "138.233", "temp": "1.061", "loss_0": "3.045", "loss_1": "0.112", "loss_2": "0.017", "accuracy": "0.49056", "wps": "17890.6", "ups": "5.58", "wpb": "3204.6", "bsz": "45.4", "num_updates": "126800", "lr": "6.91646e-05", "gnorm": "0.738", "loss_scale": "2", "train_wall": "35", "gb_free": "13.5", "wall": "23889"} [2023-11-02 00:13:38,768][train_inner][INFO] - {"epoch": 32, "update": 31.314, "loss": "3.2", "ntokens": "3189.16", "nsentences": "43.6", "prob_perplexity": "141.456", "code_perplexity": "138.45", "temp": "1.06", "loss_0": "3.07", "loss_1": "0.112", "loss_2": "0.017", "accuracy": "0.48533", "wps": "17914.2", "ups": "5.62", "wpb": "3189.2", "bsz": "43.6", "num_updates": "127000", "lr": "6.91139e-05", "gnorm": "0.738", "loss_scale": "2", "train_wall": "35", "gb_free": "14.4", "wall": "23925"} [2023-11-02 00:14:14,706][train_inner][INFO] - {"epoch": 32, "update": 31.363, "loss": "3.17", "ntokens": "3161.8", "nsentences": "45.96", "prob_perplexity": "141.052", "code_perplexity": "138.134", "temp": "1.059", "loss_0": "3.04", "loss_1": "0.112", "loss_2": "0.017", "accuracy": "0.49366", "wps": "17597.1", "ups": "5.57", "wpb": "3161.8", "bsz": "46", "num_updates": "127200", "lr": "6.90633e-05", "gnorm": "0.741", "loss_scale": "2", "train_wall": "35", "gb_free": "14", "wall": "23961"} [2023-11-02 00:14:50,966][train_inner][INFO] - {"epoch": 32, "update": 31.413, "loss": "3.221", "ntokens": "3202.36", "nsentences": "42.52", "prob_perplexity": "141.463", "code_perplexity": "138.581", "temp": "1.058", "loss_0": "3.092", "loss_1": "0.112", "loss_2": "0.017", "accuracy": "0.48044", "wps": "17664.2", "ups": "5.52", "wpb": "3202.4", "bsz": "42.5", "num_updates": "127400", "lr": "6.90127e-05", "gnorm": "0.739", "loss_scale": "2", "train_wall": "36", "gb_free": "13.6", "wall": "23997"} [2023-11-02 00:15:27,095][train_inner][INFO] - {"epoch": 32, "update": 31.462, "loss": "3.177", "ntokens": "3197.48", "nsentences": "44.8", "prob_perplexity": "142.596", "code_perplexity": "139.686", "temp": "1.057", "loss_0": "3.048", "loss_1": "0.112", "loss_2": "0.017", "accuracy": "0.48911", "wps": "17701.3", "ups": "5.54", "wpb": "3197.5", "bsz": "44.8", "num_updates": "127600", "lr": "6.8962e-05", "gnorm": "0.728", "loss_scale": "2", "train_wall": "35", "gb_free": "14.1", "wall": "24033"} [2023-11-02 00:16:03,436][train_inner][INFO] - {"epoch": 32, "update": 31.511, "loss": "3.226", "ntokens": "3239.2", "nsentences": "42.84", "prob_perplexity": "142.904", "code_perplexity": "139.885", "temp": "1.056", "loss_0": "3.097", "loss_1": "0.112", "loss_2": "0.017", "accuracy": "0.47909", "wps": "17827.8", "ups": "5.5", "wpb": "3239.2", "bsz": "42.8", "num_updates": "127800", "lr": "6.89114e-05", "gnorm": "0.735", "loss_scale": "2", "train_wall": "36", "gb_free": "14", "wall": "24070"} [2023-11-02 00:16:39,811][train_inner][INFO] - {"epoch": 32, "update": 31.561, "loss": "3.22", "ntokens": "3189.44", "nsentences": "42.08", "prob_perplexity": "142.791", "code_perplexity": "139.805", "temp": "1.055", "loss_0": "3.091", "loss_1": "0.112", "loss_2": "0.017", "accuracy": "0.47973", "wps": "17537.6", "ups": "5.5", "wpb": "3189.4", "bsz": "42.1", "num_updates": "128000", "lr": "6.88608e-05", "gnorm": "0.735", "loss_scale": "2", "train_wall": "36", "gb_free": "13.3", "wall": "24106"} [2023-11-02 00:17:15,819][train_inner][INFO] - {"epoch": 32, "update": 31.61, "loss": "3.105", "ntokens": "3172.48", "nsentences": "46.84", "prob_perplexity": "143.258", "code_perplexity": "140.303", "temp": "1.054", "loss_0": "2.976", "loss_1": "0.112", "loss_2": "0.017", "accuracy": "0.50326", "wps": "17622.2", "ups": "5.55", "wpb": "3172.5", "bsz": "46.8", "num_updates": "128200", "lr": "6.88101e-05", "gnorm": "0.734", "loss_scale": "2", "train_wall": "35", "gb_free": "13.5", "wall": "24142"} [2023-11-02 00:17:52,092][train_inner][INFO] - {"epoch": 32, "update": 31.659, "loss": "3.209", "ntokens": "3212.28", "nsentences": "44.04", "prob_perplexity": "142.51", "code_perplexity": "139.495", "temp": "1.053", "loss_0": "3.08", "loss_1": "0.112", "loss_2": "0.017", "accuracy": "0.4837", "wps": "17712.8", "ups": "5.51", "wpb": "3212.3", "bsz": "44", "num_updates": "128400", "lr": "6.87595e-05", "gnorm": "0.741", "loss_scale": "2", "train_wall": "36", "gb_free": "13.5", "wall": "24178"} [2023-11-02 00:18:28,547][train_inner][INFO] - {"epoch": 32, "update": 31.709, "loss": "3.152", "ntokens": "3187.84", "nsentences": "45.28", "prob_perplexity": "143.358", "code_perplexity": "140.434", "temp": "1.052", "loss_0": "3.024", "loss_1": "0.112", "loss_2": "0.016", "accuracy": "0.49397", "wps": "17490", "ups": "5.49", "wpb": "3187.8", "bsz": "45.3", "num_updates": "128600", "lr": "6.87089e-05", "gnorm": "0.732", "loss_scale": "2", "train_wall": "36", "gb_free": "14.2", "wall": "24215"} [2023-11-02 00:19:05,124][train_inner][INFO] - {"epoch": 32, "update": 31.758, "loss": "3.189", "ntokens": "3190.8", "nsentences": "44.24", "prob_perplexity": "143.705", "code_perplexity": "140.775", "temp": "1.051", "loss_0": "3.06", "loss_1": "0.112", "loss_2": "0.017", "accuracy": "0.48692", "wps": "17447.8", "ups": "5.47", "wpb": "3190.8", "bsz": "44.2", "num_updates": "128800", "lr": "6.86582e-05", "gnorm": "0.729", "loss_scale": "2", "train_wall": "36", "gb_free": "13.5", "wall": "24251"} [2023-11-02 00:19:41,693][train_inner][INFO] - {"epoch": 32, "update": 31.807, "loss": "3.209", "ntokens": "3224.44", "nsentences": "43.6", "prob_perplexity": "144.017", "code_perplexity": "141.13", "temp": "1.05", "loss_0": "3.081", "loss_1": "0.112", "loss_2": "0.017", "accuracy": "0.48249", "wps": "17636", "ups": "5.47", "wpb": "3224.4", "bsz": "43.6", "num_updates": "129000", "lr": "6.86076e-05", "gnorm": "0.734", "loss_scale": "2", "train_wall": "36", "gb_free": "13.9", "wall": "24288"} [2023-11-02 00:20:18,040][train_inner][INFO] - {"epoch": 32, "update": 31.857, "loss": "3.16", "ntokens": "3207", "nsentences": "45.48", "prob_perplexity": "143.991", "code_perplexity": "141.155", "temp": "1.049", "loss_0": "3.032", "loss_1": "0.112", "loss_2": "0.016", "accuracy": "0.49207", "wps": "17648.1", "ups": "5.5", "wpb": "3207", "bsz": "45.5", "num_updates": "129200", "lr": "6.8557e-05", "gnorm": "0.734", "loss_scale": "2", "train_wall": "36", "gb_free": "14", "wall": "24324"} [2023-11-02 00:20:54,590][train_inner][INFO] - {"epoch": 32, "update": 31.906, "loss": "3.212", "ntokens": "3212.32", "nsentences": "43.36", "prob_perplexity": "143.598", "code_perplexity": "140.756", "temp": "1.048", "loss_0": "3.083", "loss_1": "0.112", "loss_2": "0.016", "accuracy": "0.48144", "wps": "17578.5", "ups": "5.47", "wpb": "3212.3", "bsz": "43.4", "num_updates": "129400", "lr": "6.85063e-05", "gnorm": "0.741", "loss_scale": "2", "train_wall": "36", "gb_free": "16", "wall": "24361"} [2023-11-02 00:21:30,652][train_inner][INFO] - {"epoch": 32, "update": 31.955, "loss": "3.128", "ntokens": "3143.28", "nsentences": "43.72", "prob_perplexity": "143.698", "code_perplexity": "140.825", "temp": "1.047", "loss_0": "3", "loss_1": "0.112", "loss_2": "0.016", "accuracy": "0.49666", "wps": "17433.8", "ups": "5.55", "wpb": "3143.3", "bsz": "43.7", "num_updates": "129600", "lr": "6.84557e-05", "gnorm": "0.745", "loss_scale": "2", "train_wall": "35", "gb_free": "14.3", "wall": "24397"} [2023-11-02 00:22:03,327][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 00:22:03,328][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 00:22:03,347][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 38 [2023-11-02 00:22:29,015][valid][INFO] - {"epoch": 32, "valid_loss": "2.981", "valid_ntokens": "3161.24", "valid_nsentences": "44.1685", "valid_prob_perplexity": "143.62", "valid_code_perplexity": "140.749", "valid_temp": "1.045", "valid_loss_0": "2.851", "valid_loss_1": "0.112", "valid_loss_2": "0.018", "valid_accuracy": "0.52832", "valid_wps": "55796.3", "valid_wpb": "3161.2", "valid_bsz": "44.2", "valid_num_updates": "129782", "valid_best_loss": "2.981"} [2023-11-02 00:22:29,017][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 32 @ 129782 updates [2023-11-02 00:22:29,019][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-02 00:22:30,473][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-02 00:22:31,461][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_best.pt (epoch 32 @ 129782 updates, score 2.981) (writing took 2.44343936489895 seconds) [2023-11-02 00:22:31,462][fairseq_cli.train][INFO] - end of epoch 32 (average epoch stats below) [2023-11-02 00:22:31,464][train][INFO] - {"epoch": 32, "train_loss": "3.182", "train_ntokens": "3188.02", "train_nsentences": "44.2682", "train_prob_perplexity": "142.447", "train_code_perplexity": "139.523", "train_temp": "1.056", "train_loss_0": "3.054", "train_loss_1": "0.112", "train_loss_2": "0.017", "train_accuracy": "0.48827", "train_wps": "16995.1", "train_ups": "5.33", "train_wpb": "3188", "train_bsz": "44.3", "train_num_updates": "129782", "train_lr": "6.84096e-05", "train_gnorm": "0.736", "train_loss_scale": "2", "train_train_wall": "719", "train_gb_free": "13.4", "train_wall": "24458"} [2023-11-02 00:22:31,467][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 00:22:31,487][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 33 [2023-11-02 00:22:31,657][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 00:22:31,691][fairseq.trainer][INFO] - begin training epoch 33 [2023-11-02 00:22:31,692][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 00:22:35,101][train_inner][INFO] - {"epoch": 33, "update": 32.004, "loss": "3.189", "ntokens": "3116.24", "nsentences": "40.92", "prob_perplexity": "143.831", "code_perplexity": "140.82", "temp": "1.046", "loss_0": "3.061", "loss_1": "0.112", "loss_2": "0.017", "accuracy": "0.48382", "wps": "9670.7", "ups": "3.1", "wpb": "3116.2", "bsz": "40.9", "num_updates": "129800", "lr": "6.84051e-05", "gnorm": "0.752", "loss_scale": "2", "train_wall": "35", "gb_free": "13.6", "wall": "24461"} [2023-11-02 00:23:11,148][train_inner][INFO] - {"epoch": 33, "update": 32.054, "loss": "3.137", "ntokens": "3161.72", "nsentences": "44.84", "prob_perplexity": "144.148", "code_perplexity": "141.237", "temp": "1.045", "loss_0": "3.009", "loss_1": "0.112", "loss_2": "0.016", "accuracy": "0.49604", "wps": "17543.6", "ups": "5.55", "wpb": "3161.7", "bsz": "44.8", "num_updates": "130000", "lr": "6.83544e-05", "gnorm": "0.739", "loss_scale": "2", "train_wall": "35", "gb_free": "12.5", "wall": "24497"} [2023-11-02 00:23:47,521][train_inner][INFO] - {"epoch": 33, "update": 32.103, "loss": "3.165", "ntokens": "3208.44", "nsentences": "45.32", "prob_perplexity": "144.642", "code_perplexity": "141.715", "temp": "1.044", "loss_0": "3.037", "loss_1": "0.112", "loss_2": "0.017", "accuracy": "0.49133", "wps": "17642.7", "ups": "5.5", "wpb": "3208.4", "bsz": "45.3", "num_updates": "130200", "lr": "6.83038e-05", "gnorm": "0.738", "loss_scale": "2", "train_wall": "36", "gb_free": "15.8", "wall": "24534"} [2023-11-02 00:24:24,412][train_inner][INFO] - {"epoch": 33, "update": 32.152, "loss": "3.186", "ntokens": "3186.88", "nsentences": "42.44", "prob_perplexity": "145.111", "code_perplexity": "142.103", "temp": "1.043", "loss_0": "3.057", "loss_1": "0.112", "loss_2": "0.017", "accuracy": "0.48587", "wps": "17278.5", "ups": "5.42", "wpb": "3186.9", "bsz": "42.4", "num_updates": "130400", "lr": "6.82532e-05", "gnorm": "0.739", "loss_scale": "2", "train_wall": "36", "gb_free": "13.8", "wall": "24571"} [2023-11-02 00:25:00,437][train_inner][INFO] - {"epoch": 33, "update": 32.202, "loss": "3.176", "ntokens": "3210.28", "nsentences": "44.24", "prob_perplexity": "145.194", "code_perplexity": "142.339", "temp": "1.041", "loss_0": "3.048", "loss_1": "0.112", "loss_2": "0.016", "accuracy": "0.48893", "wps": "17823.3", "ups": "5.55", "wpb": "3210.3", "bsz": "44.2", "num_updates": "130600", "lr": "6.82025e-05", "gnorm": "0.737", "loss_scale": "2", "train_wall": "35", "gb_free": "14.1", "wall": "24607"} [2023-11-02 00:25:36,641][train_inner][INFO] - {"epoch": 33, "update": 32.251, "loss": "3.145", "ntokens": "3182.56", "nsentences": "47.12", "prob_perplexity": "145.91", "code_perplexity": "143.13", "temp": "1.04", "loss_0": "3.017", "loss_1": "0.111", "loss_2": "0.016", "accuracy": "0.49704", "wps": "17582.2", "ups": "5.52", "wpb": "3182.6", "bsz": "47.1", "num_updates": "130800", "lr": "6.81519e-05", "gnorm": "0.732", "loss_scale": "2", "train_wall": "36", "gb_free": "14.2", "wall": "24643"} [2023-11-02 00:26:13,355][train_inner][INFO] - {"epoch": 33, "update": 32.3, "loss": "3.208", "ntokens": "3249.48", "nsentences": "44.72", "prob_perplexity": "145.711", "code_perplexity": "142.823", "temp": "1.039", "loss_0": "3.08", "loss_1": "0.111", "loss_2": "0.016", "accuracy": "0.48368", "wps": "17702.8", "ups": "5.45", "wpb": "3249.5", "bsz": "44.7", "num_updates": "131000", "lr": "6.81013e-05", "gnorm": "0.729", "loss_scale": "2", "train_wall": "36", "gb_free": "14.3", "wall": "24680"} [2023-11-02 00:26:49,632][train_inner][INFO] - {"epoch": 33, "update": 32.35, "loss": "3.174", "ntokens": "3201.52", "nsentences": "43.56", "prob_perplexity": "146.239", "code_perplexity": "143.274", "temp": "1.038", "loss_0": "3.046", "loss_1": "0.111", "loss_2": "0.016", "accuracy": "0.48898", "wps": "17658", "ups": "5.52", "wpb": "3201.5", "bsz": "43.6", "num_updates": "131200", "lr": "6.80506e-05", "gnorm": "0.733", "loss_scale": "2", "train_wall": "36", "gb_free": "14.7", "wall": "24716"} [2023-11-02 00:27:25,516][train_inner][INFO] - {"epoch": 33, "update": 32.399, "loss": "3.172", "ntokens": "3190.88", "nsentences": "43.76", "prob_perplexity": "145.802", "code_perplexity": "142.971", "temp": "1.037", "loss_0": "3.044", "loss_1": "0.111", "loss_2": "0.016", "accuracy": "0.48917", "wps": "17785.7", "ups": "5.57", "wpb": "3190.9", "bsz": "43.8", "num_updates": "131400", "lr": "6.8e-05", "gnorm": "0.748", "loss_scale": "2", "train_wall": "35", "gb_free": "13.7", "wall": "24752"} [2023-11-02 00:28:01,603][train_inner][INFO] - {"epoch": 33, "update": 32.448, "loss": "3.117", "ntokens": "3155.04", "nsentences": "45.96", "prob_perplexity": "146.123", "code_perplexity": "143.229", "temp": "1.036", "loss_0": "2.989", "loss_1": "0.111", "loss_2": "0.016", "accuracy": "0.50088", "wps": "17486.5", "ups": "5.54", "wpb": "3155", "bsz": "46", "num_updates": "131600", "lr": "6.79494e-05", "gnorm": "0.746", "loss_scale": "2", "train_wall": "35", "gb_free": "12.8", "wall": "24788"} [2023-11-02 00:28:37,473][train_inner][INFO] - {"epoch": 33, "update": 32.498, "loss": "3.194", "ntokens": "3193.88", "nsentences": "42.64", "prob_perplexity": "145.661", "code_perplexity": "142.793", "temp": "1.035", "loss_0": "3.067", "loss_1": "0.111", "loss_2": "0.016", "accuracy": "0.4848", "wps": "17809.5", "ups": "5.58", "wpb": "3193.9", "bsz": "42.6", "num_updates": "131800", "lr": "6.78987e-05", "gnorm": "0.752", "loss_scale": "2", "train_wall": "35", "gb_free": "14.2", "wall": "24824"} [2023-11-02 00:29:13,837][train_inner][INFO] - {"epoch": 33, "update": 32.547, "loss": "3.147", "ntokens": "3181.44", "nsentences": "44.64", "prob_perplexity": "146.602", "code_perplexity": "143.601", "temp": "1.034", "loss_0": "3.02", "loss_1": "0.111", "loss_2": "0.016", "accuracy": "0.49369", "wps": "17498.7", "ups": "5.5", "wpb": "3181.4", "bsz": "44.6", "num_updates": "132000", "lr": "6.78481e-05", "gnorm": "0.738", "loss_scale": "2", "train_wall": "36", "gb_free": "13.7", "wall": "24860"} [2023-11-02 00:29:50,264][train_inner][INFO] - {"epoch": 33, "update": 32.596, "loss": "3.171", "ntokens": "3213.08", "nsentences": "43.12", "prob_perplexity": "146.884", "code_perplexity": "143.911", "temp": "1.033", "loss_0": "3.044", "loss_1": "0.111", "loss_2": "0.016", "accuracy": "0.48803", "wps": "17641.9", "ups": "5.49", "wpb": "3213.1", "bsz": "43.1", "num_updates": "132200", "lr": "6.77975e-05", "gnorm": "0.739", "loss_scale": "2", "train_wall": "36", "gb_free": "15.6", "wall": "24896"} [2023-11-02 00:30:26,475][train_inner][INFO] - {"epoch": 33, "update": 32.645, "loss": "3.213", "ntokens": "3158.36", "nsentences": "41.48", "prob_perplexity": "146.66", "code_perplexity": "143.674", "temp": "1.032", "loss_0": "3.086", "loss_1": "0.111", "loss_2": "0.016", "accuracy": "0.47973", "wps": "17445.4", "ups": "5.52", "wpb": "3158.4", "bsz": "41.5", "num_updates": "132400", "lr": "6.77468e-05", "gnorm": "0.756", "loss_scale": "2", "train_wall": "36", "gb_free": "13.3", "wall": "24933"} [2023-11-02 00:31:02,018][train_inner][INFO] - {"epoch": 33, "update": 32.695, "loss": "3.152", "ntokens": "3180.04", "nsentences": "44.32", "prob_perplexity": "146.467", "code_perplexity": "143.503", "temp": "1.031", "loss_0": "3.025", "loss_1": "0.111", "loss_2": "0.016", "accuracy": "0.49264", "wps": "17895.3", "ups": "5.63", "wpb": "3180", "bsz": "44.3", "num_updates": "132600", "lr": "6.76962e-05", "gnorm": "0.745", "loss_scale": "2", "train_wall": "35", "gb_free": "14.6", "wall": "24968"} [2023-11-02 00:31:38,314][train_inner][INFO] - {"epoch": 33, "update": 32.744, "loss": "3.161", "ntokens": "3210.52", "nsentences": "44.4", "prob_perplexity": "147.328", "code_perplexity": "144.437", "temp": "1.03", "loss_0": "3.035", "loss_1": "0.111", "loss_2": "0.016", "accuracy": "0.49107", "wps": "17691.5", "ups": "5.51", "wpb": "3210.5", "bsz": "44.4", "num_updates": "132800", "lr": "6.76456e-05", "gnorm": "0.737", "loss_scale": "2", "train_wall": "36", "gb_free": "13.3", "wall": "25005"} [2023-11-02 00:32:15,009][train_inner][INFO] - {"epoch": 33, "update": 32.793, "loss": "3.16", "ntokens": "3214.72", "nsentences": "45.56", "prob_perplexity": "146.541", "code_perplexity": "143.663", "temp": "1.029", "loss_0": "3.033", "loss_1": "0.111", "loss_2": "0.016", "accuracy": "0.49245", "wps": "17522.7", "ups": "5.45", "wpb": "3214.7", "bsz": "45.6", "num_updates": "133000", "lr": "6.75949e-05", "gnorm": "0.738", "loss_scale": "2", "train_wall": "36", "gb_free": "13.8", "wall": "25041"} [2023-11-02 00:32:50,844][train_inner][INFO] - {"epoch": 33, "update": 32.843, "loss": "3.133", "ntokens": "3213.16", "nsentences": "46.28", "prob_perplexity": "147.608", "code_perplexity": "144.672", "temp": "1.028", "loss_0": "3.006", "loss_1": "0.111", "loss_2": "0.016", "accuracy": "0.49697", "wps": "17934", "ups": "5.58", "wpb": "3213.2", "bsz": "46.3", "num_updates": "133200", "lr": "6.75443e-05", "gnorm": "0.725", "loss_scale": "2", "train_wall": "35", "gb_free": "14", "wall": "25077"} [2023-11-02 00:33:27,223][train_inner][INFO] - {"epoch": 33, "update": 32.892, "loss": "3.183", "ntokens": "3200.52", "nsentences": "45.48", "prob_perplexity": "148.079", "code_perplexity": "145.134", "temp": "1.027", "loss_0": "3.057", "loss_1": "0.111", "loss_2": "0.016", "accuracy": "0.48884", "wps": "17596.4", "ups": "5.5", "wpb": "3200.5", "bsz": "45.5", "num_updates": "133400", "lr": "6.74937e-05", "gnorm": "0.746", "loss_scale": "2", "train_wall": "36", "gb_free": "13.4", "wall": "25113"} [2023-11-02 00:34:03,671][train_inner][INFO] - {"epoch": 33, "update": 32.941, "loss": "3.168", "ntokens": "3164.08", "nsentences": "44", "prob_perplexity": "147.822", "code_perplexity": "144.817", "temp": "1.026", "loss_0": "3.041", "loss_1": "0.111", "loss_2": "0.016", "accuracy": "0.48978", "wps": "17363.4", "ups": "5.49", "wpb": "3164.1", "bsz": "44", "num_updates": "133600", "lr": "6.7443e-05", "gnorm": "0.755", "loss_scale": "2", "train_wall": "36", "gb_free": "14.1", "wall": "25150"} [2023-11-02 00:34:40,289][train_inner][INFO] - {"epoch": 33, "update": 32.991, "loss": "3.253", "ntokens": "3215.36", "nsentences": "40.6", "prob_perplexity": "147.959", "code_perplexity": "144.958", "temp": "1.025", "loss_0": "3.126", "loss_1": "0.111", "loss_2": "0.016", "accuracy": "0.47269", "wps": "17562.4", "ups": "5.46", "wpb": "3215.4", "bsz": "40.6", "num_updates": "133800", "lr": "6.73924e-05", "gnorm": "0.739", "loss_scale": "2", "train_wall": "36", "gb_free": "14", "wall": "25187"} [2023-11-02 00:34:47,219][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 00:34:47,220][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 00:34:47,237][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 39 [2023-11-02 00:35:13,303][valid][INFO] - {"epoch": 33, "valid_loss": "2.971", "valid_ntokens": "3164.65", "valid_nsentences": "44.1685", "valid_prob_perplexity": "147.018", "valid_code_perplexity": "144.337", "valid_temp": "1.024", "valid_loss_0": "2.845", "valid_loss_1": "0.111", "valid_loss_2": "0.015", "valid_accuracy": "0.52921", "valid_wps": "55031.2", "valid_wpb": "3164.6", "valid_bsz": "44.2", "valid_num_updates": "133838", "valid_best_loss": "2.971"} [2023-11-02 00:35:13,305][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 33 @ 133838 updates [2023-11-02 00:35:13,307][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-02 00:35:14,755][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-02 00:35:15,736][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_best.pt (epoch 33 @ 133838 updates, score 2.971) (writing took 2.43115011183545 seconds) [2023-11-02 00:35:15,737][fairseq_cli.train][INFO] - end of epoch 33 (average epoch stats below) [2023-11-02 00:35:15,739][train][INFO] - {"epoch": 33, "train_loss": "3.17", "train_ntokens": "3194.79", "train_nsentences": "44.2682", "train_prob_perplexity": "146.344", "train_code_perplexity": "143.417", "train_temp": "1.035", "train_loss_0": "3.043", "train_loss_1": "0.111", "train_loss_2": "0.016", "train_accuracy": "0.48971", "train_wps": "16954.8", "train_ups": "5.31", "train_wpb": "3194.8", "train_bsz": "44.3", "train_num_updates": "133838", "train_lr": "6.73828e-05", "train_gnorm": "0.741", "train_loss_scale": "2", "train_train_wall": "723", "train_gb_free": "13.8", "train_wall": "25222"} [2023-11-02 00:35:15,741][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 00:35:15,769][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 34 [2023-11-02 00:35:15,969][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 00:35:16,003][fairseq.trainer][INFO] - begin training epoch 34 [2023-11-02 00:35:16,004][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 00:35:45,189][train_inner][INFO] - {"epoch": 34, "update": 33.04, "loss": "3.192", "ntokens": "3226", "nsentences": "44", "prob_perplexity": "147.848", "code_perplexity": "144.938", "temp": "1.024", "loss_0": "3.065", "loss_1": "0.111", "loss_2": "0.015", "accuracy": "0.48466", "wps": "9941.9", "ups": "3.08", "wpb": "3226", "bsz": "44", "num_updates": "134000", "lr": "6.73418e-05", "gnorm": "0.755", "loss_scale": "2", "train_wall": "35", "gb_free": "15", "wall": "25251"} [2023-11-02 00:36:20,657][train_inner][INFO] - {"epoch": 34, "update": 33.089, "loss": "3.113", "ntokens": "3157.32", "nsentences": "45.52", "prob_perplexity": "147.916", "code_perplexity": "144.958", "temp": "1.023", "loss_0": "2.987", "loss_1": "0.111", "loss_2": "0.016", "accuracy": "0.50037", "wps": "17805.1", "ups": "5.64", "wpb": "3157.3", "bsz": "45.5", "num_updates": "134200", "lr": "6.72911e-05", "gnorm": "0.745", "loss_scale": "2", "train_wall": "35", "gb_free": "13.9", "wall": "25287"} [2023-11-02 00:36:56,477][train_inner][INFO] - {"epoch": 34, "update": 33.139, "loss": "3.229", "ntokens": "3219.24", "nsentences": "41.84", "prob_perplexity": "148.743", "code_perplexity": "145.787", "temp": "1.022", "loss_0": "3.102", "loss_1": "0.111", "loss_2": "0.016", "accuracy": "0.47636", "wps": "17975.4", "ups": "5.58", "wpb": "3219.2", "bsz": "41.8", "num_updates": "134400", "lr": "6.72405e-05", "gnorm": "0.742", "loss_scale": "2", "train_wall": "35", "gb_free": "13.2", "wall": "25323"} [2023-11-02 00:37:32,129][train_inner][INFO] - {"epoch": 34, "update": 33.188, "loss": "3.124", "ntokens": "3148.08", "nsentences": "43.2", "prob_perplexity": "147.789", "code_perplexity": "144.927", "temp": "1.021", "loss_0": "2.997", "loss_1": "0.111", "loss_2": "0.016", "accuracy": "0.49653", "wps": "17660.9", "ups": "5.61", "wpb": "3148.1", "bsz": "43.2", "num_updates": "134600", "lr": "6.71899e-05", "gnorm": "0.749", "loss_scale": "2", "train_wall": "35", "gb_free": "13.7", "wall": "25358"} [2023-11-02 00:38:08,078][train_inner][INFO] - {"epoch": 34, "update": 33.237, "loss": "3.151", "ntokens": "3199.68", "nsentences": "42.4", "prob_perplexity": "149.467", "code_perplexity": "146.431", "temp": "1.02", "loss_0": "3.024", "loss_1": "0.111", "loss_2": "0.016", "accuracy": "0.48975", "wps": "17802.7", "ups": "5.56", "wpb": "3199.7", "bsz": "42.4", "num_updates": "134800", "lr": "6.71392e-05", "gnorm": "0.738", "loss_scale": "2", "train_wall": "35", "gb_free": "13.2", "wall": "25394"} [2023-11-02 00:38:44,412][train_inner][INFO] - {"epoch": 34, "update": 33.286, "loss": "3.144", "ntokens": "3133.92", "nsentences": "43.52", "prob_perplexity": "148.962", "code_perplexity": "145.91", "temp": "1.019", "loss_0": "3.018", "loss_1": "0.111", "loss_2": "0.016", "accuracy": "0.49372", "wps": "17251.8", "ups": "5.5", "wpb": "3133.9", "bsz": "43.5", "num_updates": "135000", "lr": "6.70886e-05", "gnorm": "0.744", "loss_scale": "4", "train_wall": "36", "gb_free": "13.5", "wall": "25431"} [2023-11-02 00:39:20,652][train_inner][INFO] - {"epoch": 34, "update": 33.336, "loss": "3.077", "ntokens": "3189.92", "nsentences": "47.28", "prob_perplexity": "148.949", "code_perplexity": "146.017", "temp": "1.018", "loss_0": "2.951", "loss_1": "0.111", "loss_2": "0.016", "accuracy": "0.50767", "wps": "17605.6", "ups": "5.52", "wpb": "3189.9", "bsz": "47.3", "num_updates": "135200", "lr": "6.7038e-05", "gnorm": "0.729", "loss_scale": "4", "train_wall": "36", "gb_free": "13.3", "wall": "25467"} [2023-11-02 00:39:57,070][train_inner][INFO] - {"epoch": 34, "update": 33.385, "loss": "3.148", "ntokens": "3201.28", "nsentences": "44.84", "prob_perplexity": "149.021", "code_perplexity": "146.036", "temp": "1.017", "loss_0": "3.022", "loss_1": "0.111", "loss_2": "0.015", "accuracy": "0.49376", "wps": "17589.2", "ups": "5.49", "wpb": "3201.3", "bsz": "44.8", "num_updates": "135400", "lr": "6.69873e-05", "gnorm": "0.746", "loss_scale": "4", "train_wall": "36", "gb_free": "13.9", "wall": "25503"} [2023-11-02 00:40:33,389][train_inner][INFO] - {"epoch": 34, "update": 33.434, "loss": "3.152", "ntokens": "3191.36", "nsentences": "44.52", "prob_perplexity": "149.136", "code_perplexity": "146.082", "temp": "1.016", "loss_0": "3.026", "loss_1": "0.111", "loss_2": "0.016", "accuracy": "0.49285", "wps": "17574.9", "ups": "5.51", "wpb": "3191.4", "bsz": "44.5", "num_updates": "135600", "lr": "6.69367e-05", "gnorm": "0.739", "loss_scale": "4", "train_wall": "36", "gb_free": "17.2", "wall": "25540"} [2023-11-02 00:41:09,481][train_inner][INFO] - {"epoch": 34, "update": 33.484, "loss": "3.096", "ntokens": "3164.88", "nsentences": "46.04", "prob_perplexity": "149.55", "code_perplexity": "146.613", "temp": "1.015", "loss_0": "2.97", "loss_1": "0.111", "loss_2": "0.015", "accuracy": "0.50287", "wps": "17539.2", "ups": "5.54", "wpb": "3164.9", "bsz": "46", "num_updates": "135800", "lr": "6.68861e-05", "gnorm": "0.746", "loss_scale": "4", "train_wall": "35", "gb_free": "13.6", "wall": "25576"} [2023-11-02 00:41:45,310][train_inner][INFO] - {"epoch": 34, "update": 33.533, "loss": "3.144", "ntokens": "3199.52", "nsentences": "44.24", "prob_perplexity": "149.035", "code_perplexity": "146.099", "temp": "1.014", "loss_0": "3.018", "loss_1": "0.111", "loss_2": "0.015", "accuracy": "0.49327", "wps": "17860.8", "ups": "5.58", "wpb": "3199.5", "bsz": "44.2", "num_updates": "136000", "lr": "6.68354e-05", "gnorm": "0.748", "loss_scale": "4", "train_wall": "35", "gb_free": "15.1", "wall": "25612"} [2023-11-02 00:42:21,340][train_inner][INFO] - {"epoch": 34, "update": 33.582, "loss": "3.151", "ntokens": "3200.76", "nsentences": "44.08", "prob_perplexity": "149.428", "code_perplexity": "146.454", "temp": "1.013", "loss_0": "3.025", "loss_1": "0.111", "loss_2": "0.015", "accuracy": "0.4926", "wps": "17768.1", "ups": "5.55", "wpb": "3200.8", "bsz": "44.1", "num_updates": "136200", "lr": "6.67848e-05", "gnorm": "0.754", "loss_scale": "4", "train_wall": "35", "gb_free": "13.3", "wall": "25648"} [2023-11-02 00:42:57,214][train_inner][INFO] - {"epoch": 34, "update": 33.632, "loss": "3.192", "ntokens": "3189.44", "nsentences": "41.44", "prob_perplexity": "149.645", "code_perplexity": "146.663", "temp": "1.012", "loss_0": "3.065", "loss_1": "0.111", "loss_2": "0.016", "accuracy": "0.48272", "wps": "17782.6", "ups": "5.58", "wpb": "3189.4", "bsz": "41.4", "num_updates": "136400", "lr": "6.67342e-05", "gnorm": "0.742", "loss_scale": "4", "train_wall": "35", "gb_free": "15.7", "wall": "25683"} [2023-11-02 00:43:15,674][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 [2023-11-02 00:43:33,670][train_inner][INFO] - {"epoch": 34, "update": 33.681, "loss": "3.14", "ntokens": "3226", "nsentences": "45.64", "prob_perplexity": "150.373", "code_perplexity": "147.402", "temp": "1.011", "loss_0": "3.014", "loss_1": "0.11", "loss_2": "0.016", "accuracy": "0.4951", "wps": "17698.7", "ups": "5.49", "wpb": "3226", "bsz": "45.6", "num_updates": "136600", "lr": "6.66835e-05", "gnorm": "0.735", "loss_scale": "2", "train_wall": "36", "gb_free": "11.9", "wall": "25720"} [2023-11-02 00:44:10,184][train_inner][INFO] - {"epoch": 34, "update": 33.731, "loss": "3.084", "ntokens": "3138.12", "nsentences": "45.8", "prob_perplexity": "149.881", "code_perplexity": "146.916", "temp": "1.01", "loss_0": "2.958", "loss_1": "0.11", "loss_2": "0.015", "accuracy": "0.50566", "wps": "17189.7", "ups": "5.48", "wpb": "3138.1", "bsz": "45.8", "num_updates": "136800", "lr": "6.66329e-05", "gnorm": "0.75", "loss_scale": "2", "train_wall": "36", "gb_free": "15", "wall": "25756"} [2023-11-02 00:44:47,042][train_inner][INFO] - {"epoch": 34, "update": 33.78, "loss": "3.174", "ntokens": "3237.36", "nsentences": "43.12", "prob_perplexity": "150.125", "code_perplexity": "147.171", "temp": "1.009", "loss_0": "3.048", "loss_1": "0.11", "loss_2": "0.016", "accuracy": "0.4878", "wps": "17567.8", "ups": "5.43", "wpb": "3237.4", "bsz": "43.1", "num_updates": "137000", "lr": "6.65823e-05", "gnorm": "0.735", "loss_scale": "2", "train_wall": "36", "gb_free": "13.6", "wall": "25793"} [2023-11-02 00:45:23,043][train_inner][INFO] - {"epoch": 34, "update": 33.829, "loss": "3.129", "ntokens": "3171.28", "nsentences": "44.76", "prob_perplexity": "149.408", "code_perplexity": "146.532", "temp": "1.008", "loss_0": "3.003", "loss_1": "0.111", "loss_2": "0.015", "accuracy": "0.49654", "wps": "17618.5", "ups": "5.56", "wpb": "3171.3", "bsz": "44.8", "num_updates": "137200", "lr": "6.65316e-05", "gnorm": "0.748", "loss_scale": "2", "train_wall": "35", "gb_free": "13.9", "wall": "25829"} [2023-11-02 00:45:59,111][train_inner][INFO] - {"epoch": 34, "update": 33.878, "loss": "3.162", "ntokens": "3181.04", "nsentences": "42.76", "prob_perplexity": "150.045", "code_perplexity": "147.063", "temp": "1.007", "loss_0": "3.037", "loss_1": "0.11", "loss_2": "0.015", "accuracy": "0.48975", "wps": "17640.2", "ups": "5.55", "wpb": "3181", "bsz": "42.8", "num_updates": "137400", "lr": "6.6481e-05", "gnorm": "0.756", "loss_scale": "2", "train_wall": "35", "gb_free": "13.4", "wall": "25865"} [2023-11-02 00:46:35,335][train_inner][INFO] - {"epoch": 34, "update": 33.928, "loss": "3.117", "ntokens": "3199.96", "nsentences": "46.64", "prob_perplexity": "150.333", "code_perplexity": "147.325", "temp": "1.006", "loss_0": "2.991", "loss_1": "0.11", "loss_2": "0.015", "accuracy": "0.50001", "wps": "17668.9", "ups": "5.52", "wpb": "3200", "bsz": "46.6", "num_updates": "137600", "lr": "6.64304e-05", "gnorm": "0.74", "loss_scale": "2", "train_wall": "36", "gb_free": "13.2", "wall": "25902"} [2023-11-02 00:47:11,343][train_inner][INFO] - {"epoch": 34, "update": 33.977, "loss": "3.208", "ntokens": "3227.48", "nsentences": "43.6", "prob_perplexity": "150.938", "code_perplexity": "147.987", "temp": "1.005", "loss_0": "3.083", "loss_1": "0.11", "loss_2": "0.015", "accuracy": "0.48269", "wps": "17927.5", "ups": "5.55", "wpb": "3227.5", "bsz": "43.6", "num_updates": "137800", "lr": "6.63797e-05", "gnorm": "0.744", "loss_scale": "2", "train_wall": "35", "gb_free": "14.3", "wall": "25938"} [2023-11-02 00:47:28,271][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 00:47:28,273][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 00:47:28,291][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 40 [2023-11-02 00:47:53,933][valid][INFO] - {"epoch": 34, "valid_loss": "2.975", "valid_ntokens": "3171.05", "valid_nsentences": "44.1685", "valid_prob_perplexity": "151.184", "valid_code_perplexity": "148.439", "valid_temp": "1.004", "valid_loss_0": "2.85", "valid_loss_1": "0.11", "valid_loss_2": "0.015", "valid_accuracy": "0.52794", "valid_wps": "56008.6", "valid_wpb": "3171.1", "valid_bsz": "44.2", "valid_num_updates": "137893", "valid_best_loss": "2.971"} [2023-11-02 00:47:53,935][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 34 @ 137893 updates [2023-11-02 00:47:53,937][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 00:47:55,376][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 00:47:55,426][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_last.pt (epoch 34 @ 137893 updates, score 2.975) (writing took 1.4905301630496979 seconds) [2023-11-02 00:47:55,427][fairseq_cli.train][INFO] - end of epoch 34 (average epoch stats below) [2023-11-02 00:47:55,429][train][INFO] - {"epoch": 34, "train_loss": "3.147", "train_ntokens": "3191.43", "train_nsentences": "44.2634", "train_prob_perplexity": "149.384", "train_code_perplexity": "146.42", "train_temp": "1.014", "train_loss_0": "3.021", "train_loss_1": "0.111", "train_loss_2": "0.016", "train_accuracy": "0.4931", "train_wps": "17035", "train_ups": "5.34", "train_wpb": "3191.4", "train_bsz": "44.3", "train_num_updates": "137893", "train_lr": "6.63562e-05", "train_gnorm": "0.744", "train_loss_scale": "2", "train_train_wall": "719", "train_gb_free": "13.1", "train_wall": "25982"} [2023-11-02 00:47:55,431][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 00:47:55,451][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 35 [2023-11-02 00:47:55,614][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 00:47:55,648][fairseq.trainer][INFO] - begin training epoch 35 [2023-11-02 00:47:55,649][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 00:48:15,513][train_inner][INFO] - {"epoch": 35, "update": 34.026, "loss": "3.127", "ntokens": "3235.48", "nsentences": "45.96", "prob_perplexity": "151.489", "code_perplexity": "148.503", "temp": "1.004", "loss_0": "3.001", "loss_1": "0.11", "loss_2": "0.015", "accuracy": "0.49753", "wps": "10084.5", "ups": "3.12", "wpb": "3235.5", "bsz": "46", "num_updates": "138000", "lr": "6.63291e-05", "gnorm": "0.739", "loss_scale": "2", "train_wall": "36", "gb_free": "15.5", "wall": "26002"} [2023-11-02 00:48:51,278][train_inner][INFO] - {"epoch": 35, "update": 34.076, "loss": "3.159", "ntokens": "3236.24", "nsentences": "44.88", "prob_perplexity": "151.4", "code_perplexity": "148.441", "temp": "1.003", "loss_0": "3.034", "loss_1": "0.11", "loss_2": "0.015", "accuracy": "0.49085", "wps": "18098.4", "ups": "5.59", "wpb": "3236.2", "bsz": "44.9", "num_updates": "138200", "lr": "6.62785e-05", "gnorm": "0.744", "loss_scale": "2", "train_wall": "35", "gb_free": "12.8", "wall": "26038"} [2023-11-02 00:49:27,324][train_inner][INFO] - {"epoch": 35, "update": 34.125, "loss": "3.146", "ntokens": "3204.24", "nsentences": "43.4", "prob_perplexity": "151.485", "code_perplexity": "148.554", "temp": "1.002", "loss_0": "3.02", "loss_1": "0.11", "loss_2": "0.015", "accuracy": "0.49276", "wps": "17779.4", "ups": "5.55", "wpb": "3204.2", "bsz": "43.4", "num_updates": "138400", "lr": "6.62278e-05", "gnorm": "0.741", "loss_scale": "2", "train_wall": "35", "gb_free": "12.7", "wall": "26074"} [2023-11-02 00:50:03,230][train_inner][INFO] - {"epoch": 35, "update": 34.174, "loss": "3.143", "ntokens": "3192.88", "nsentences": "43.12", "prob_perplexity": "151.446", "code_perplexity": "148.473", "temp": "1.001", "loss_0": "3.018", "loss_1": "0.11", "loss_2": "0.015", "accuracy": "0.49248", "wps": "17785.9", "ups": "5.57", "wpb": "3192.9", "bsz": "43.1", "num_updates": "138600", "lr": "6.61772e-05", "gnorm": "0.741", "loss_scale": "2", "train_wall": "35", "gb_free": "12.6", "wall": "26109"} [2023-11-02 00:50:39,291][train_inner][INFO] - {"epoch": 35, "update": 34.224, "loss": "3.147", "ntokens": "3216.6", "nsentences": "43.32", "prob_perplexity": "151.709", "code_perplexity": "148.748", "temp": "1", "loss_0": "3.022", "loss_1": "0.11", "loss_2": "0.015", "accuracy": "0.49068", "wps": "17840.4", "ups": "5.55", "wpb": "3216.6", "bsz": "43.3", "num_updates": "138800", "lr": "6.61266e-05", "gnorm": "0.741", "loss_scale": "2", "train_wall": "35", "gb_free": "14.4", "wall": "26146"} [2023-11-02 00:51:15,231][train_inner][INFO] - {"epoch": 35, "update": 34.273, "loss": "3.147", "ntokens": "3217.84", "nsentences": "44.4", "prob_perplexity": "152.282", "code_perplexity": "149.248", "temp": "0.999", "loss_0": "3.022", "loss_1": "0.11", "loss_2": "0.015", "accuracy": "0.49284", "wps": "17908.1", "ups": "5.57", "wpb": "3217.8", "bsz": "44.4", "num_updates": "139000", "lr": "6.60759e-05", "gnorm": "0.744", "loss_scale": "2", "train_wall": "35", "gb_free": "14.1", "wall": "26181"} [2023-11-02 00:51:51,081][train_inner][INFO] - {"epoch": 35, "update": 34.322, "loss": "3.114", "ntokens": "3184.08", "nsentences": "45.96", "prob_perplexity": "152.809", "code_perplexity": "149.766", "temp": "0.998", "loss_0": "2.989", "loss_1": "0.11", "loss_2": "0.015", "accuracy": "0.49982", "wps": "17764.4", "ups": "5.58", "wpb": "3184.1", "bsz": "46", "num_updates": "139200", "lr": "6.60253e-05", "gnorm": "0.747", "loss_scale": "2", "train_wall": "35", "gb_free": "13.2", "wall": "26217"} [2023-11-02 00:52:27,533][train_inner][INFO] - {"epoch": 35, "update": 34.372, "loss": "3.164", "ntokens": "3209.4", "nsentences": "43.44", "prob_perplexity": "152.127", "code_perplexity": "149.092", "temp": "0.997", "loss_0": "3.039", "loss_1": "0.11", "loss_2": "0.015", "accuracy": "0.48968", "wps": "17609.8", "ups": "5.49", "wpb": "3209.4", "bsz": "43.4", "num_updates": "139400", "lr": "6.59747e-05", "gnorm": "0.75", "loss_scale": "2", "train_wall": "36", "gb_free": "14.3", "wall": "26254"} [2023-11-02 00:53:04,049][train_inner][INFO] - {"epoch": 35, "update": 34.421, "loss": "3.145", "ntokens": "3217.04", "nsentences": "45.08", "prob_perplexity": "152.117", "code_perplexity": "149.075", "temp": "0.996", "loss_0": "3.02", "loss_1": "0.11", "loss_2": "0.015", "accuracy": "0.49411", "wps": "17621.1", "ups": "5.48", "wpb": "3217", "bsz": "45.1", "num_updates": "139600", "lr": "6.59241e-05", "gnorm": "0.742", "loss_scale": "2", "train_wall": "36", "gb_free": "13.7", "wall": "26290"} [2023-11-02 00:53:40,405][train_inner][INFO] - {"epoch": 35, "update": 34.47, "loss": "3.124", "ntokens": "3185.92", "nsentences": "44.84", "prob_perplexity": "152.391", "code_perplexity": "149.394", "temp": "0.995", "loss_0": "2.999", "loss_1": "0.11", "loss_2": "0.015", "accuracy": "0.49729", "wps": "17526.9", "ups": "5.5", "wpb": "3185.9", "bsz": "44.8", "num_updates": "139800", "lr": "6.58734e-05", "gnorm": "0.737", "loss_scale": "2", "train_wall": "36", "gb_free": "13.9", "wall": "26327"} [2023-11-02 00:54:16,670][train_inner][INFO] - {"epoch": 35, "update": 34.519, "loss": "3.09", "ntokens": "3158.44", "nsentences": "45", "prob_perplexity": "152.128", "code_perplexity": "149.137", "temp": "0.994", "loss_0": "2.966", "loss_1": "0.11", "loss_2": "0.015", "accuracy": "0.50445", "wps": "17419.6", "ups": "5.52", "wpb": "3158.4", "bsz": "45", "num_updates": "140000", "lr": "6.58228e-05", "gnorm": "0.743", "loss_scale": "2", "train_wall": "36", "gb_free": "14.5", "wall": "26363"} [2023-11-02 00:54:52,995][train_inner][INFO] - {"epoch": 35, "update": 34.569, "loss": "3.073", "ntokens": "3122.04", "nsentences": "44.88", "prob_perplexity": "152.119", "code_perplexity": "149.153", "temp": "0.993", "loss_0": "2.948", "loss_1": "0.11", "loss_2": "0.015", "accuracy": "0.5062", "wps": "17190.7", "ups": "5.51", "wpb": "3122", "bsz": "44.9", "num_updates": "140200", "lr": "6.57722e-05", "gnorm": "0.746", "loss_scale": "2", "train_wall": "36", "gb_free": "14.9", "wall": "26399"} [2023-11-02 00:55:29,485][train_inner][INFO] - {"epoch": 35, "update": 34.618, "loss": "3.149", "ntokens": "3195.36", "nsentences": "42.96", "prob_perplexity": "152.578", "code_perplexity": "149.521", "temp": "0.992", "loss_0": "3.024", "loss_1": "0.11", "loss_2": "0.015", "accuracy": "0.49097", "wps": "17514.7", "ups": "5.48", "wpb": "3195.4", "bsz": "43", "num_updates": "140400", "lr": "6.57215e-05", "gnorm": "0.751", "loss_scale": "2", "train_wall": "36", "gb_free": "14", "wall": "26436"} [2023-11-02 00:56:05,685][train_inner][INFO] - {"epoch": 35, "update": 34.667, "loss": "3.166", "ntokens": "3197.64", "nsentences": "44.2", "prob_perplexity": "152.785", "code_perplexity": "149.8", "temp": "0.991", "loss_0": "3.041", "loss_1": "0.11", "loss_2": "0.015", "accuracy": "0.49011", "wps": "17667.6", "ups": "5.53", "wpb": "3197.6", "bsz": "44.2", "num_updates": "140600", "lr": "6.56709e-05", "gnorm": "0.755", "loss_scale": "2", "train_wall": "36", "gb_free": "13.9", "wall": "26472"} [2023-11-02 00:56:41,608][train_inner][INFO] - {"epoch": 35, "update": 34.717, "loss": "3.134", "ntokens": "3156.92", "nsentences": "42.68", "prob_perplexity": "153.655", "code_perplexity": "150.625", "temp": "0.99", "loss_0": "3.01", "loss_1": "0.11", "loss_2": "0.015", "accuracy": "0.49361", "wps": "17577", "ups": "5.57", "wpb": "3156.9", "bsz": "42.7", "num_updates": "140800", "lr": "6.56203e-05", "gnorm": "0.755", "loss_scale": "2", "train_wall": "35", "gb_free": "13.8", "wall": "26508"} [2023-11-02 00:57:17,737][train_inner][INFO] - {"epoch": 35, "update": 34.766, "loss": "3.08", "ntokens": "3149.8", "nsentences": "44.52", "prob_perplexity": "153.01", "code_perplexity": "149.991", "temp": "0.989", "loss_0": "2.955", "loss_1": "0.11", "loss_2": "0.015", "accuracy": "0.50409", "wps": "17437.2", "ups": "5.54", "wpb": "3149.8", "bsz": "44.5", "num_updates": "141000", "lr": "6.55696e-05", "gnorm": "0.752", "loss_scale": "2", "train_wall": "35", "gb_free": "14.3", "wall": "26544"} [2023-11-02 00:57:53,733][train_inner][INFO] - {"epoch": 35, "update": 34.815, "loss": "3.083", "ntokens": "3197.92", "nsentences": "46.48", "prob_perplexity": "153.018", "code_perplexity": "150.087", "temp": "0.988", "loss_0": "2.959", "loss_1": "0.11", "loss_2": "0.015", "accuracy": "0.50548", "wps": "17769.5", "ups": "5.56", "wpb": "3197.9", "bsz": "46.5", "num_updates": "141200", "lr": "6.5519e-05", "gnorm": "0.747", "loss_scale": "2", "train_wall": "35", "gb_free": "14.4", "wall": "26580"} [2023-11-02 00:58:29,765][train_inner][INFO] - {"epoch": 35, "update": 34.865, "loss": "3.171", "ntokens": "3180.84", "nsentences": "42.44", "prob_perplexity": "153.119", "code_perplexity": "150.105", "temp": "0.987", "loss_0": "3.047", "loss_1": "0.11", "loss_2": "0.015", "accuracy": "0.48816", "wps": "17656.6", "ups": "5.55", "wpb": "3180.8", "bsz": "42.4", "num_updates": "141400", "lr": "6.54684e-05", "gnorm": "0.752", "loss_scale": "2", "train_wall": "35", "gb_free": "16.5", "wall": "26616"} [2023-11-02 00:59:05,477][train_inner][INFO] - {"epoch": 35, "update": 34.914, "loss": "3.125", "ntokens": "3214.76", "nsentences": "44.96", "prob_perplexity": "153.337", "code_perplexity": "150.308", "temp": "0.986", "loss_0": "3", "loss_1": "0.11", "loss_2": "0.015", "accuracy": "0.49667", "wps": "18004.9", "ups": "5.6", "wpb": "3214.8", "bsz": "45", "num_updates": "141600", "lr": "6.54177e-05", "gnorm": "0.744", "loss_scale": "2", "train_wall": "35", "gb_free": "12.4", "wall": "26652"} [2023-11-02 00:59:41,345][train_inner][INFO] - {"epoch": 35, "update": 34.963, "loss": "3.184", "ntokens": "3168.36", "nsentences": "43.88", "prob_perplexity": "153.337", "code_perplexity": "150.306", "temp": "0.985", "loss_0": "3.06", "loss_1": "0.11", "loss_2": "0.015", "accuracy": "0.48689", "wps": "17667.4", "ups": "5.58", "wpb": "3168.4", "bsz": "43.9", "num_updates": "141800", "lr": "6.53671e-05", "gnorm": "0.755", "loss_scale": "2", "train_wall": "35", "gb_free": "14.1", "wall": "26688"} [2023-11-02 01:00:08,117][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 01:00:08,118][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 01:00:08,137][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 41 [2023-11-02 01:00:33,846][valid][INFO] - {"epoch": 35, "valid_loss": "2.93", "valid_ntokens": "3152.5", "valid_nsentences": "44.1685", "valid_prob_perplexity": "153.817", "valid_code_perplexity": "150.979", "valid_temp": "0.984", "valid_loss_0": "2.806", "valid_loss_1": "0.11", "valid_loss_2": "0.015", "valid_accuracy": "0.53519", "valid_wps": "55621.8", "valid_wpb": "3152.5", "valid_bsz": "44.2", "valid_num_updates": "141949", "valid_best_loss": "2.93"} [2023-11-02 01:00:33,848][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 35 @ 141949 updates [2023-11-02 01:00:33,850][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-02 01:00:35,291][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-02 01:00:36,271][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_best.pt (epoch 35 @ 141949 updates, score 2.93) (writing took 2.4224466192536056 seconds) [2023-11-02 01:00:36,271][fairseq_cli.train][INFO] - end of epoch 35 (average epoch stats below) [2023-11-02 01:00:36,274][train][INFO] - {"epoch": 35, "train_loss": "3.133", "train_ntokens": "3191.93", "train_nsentences": "44.2682", "train_prob_perplexity": "152.504", "train_code_perplexity": "149.5", "train_temp": "0.994", "train_loss_0": "3.009", "train_loss_1": "0.11", "train_loss_2": "0.015", "train_accuracy": "0.49518", "train_wps": "17015.9", "train_ups": "5.33", "train_wpb": "3191.9", "train_bsz": "44.3", "train_num_updates": "141949", "train_lr": "6.53294e-05", "train_gnorm": "0.747", "train_loss_scale": "2", "train_train_wall": "720", "train_gb_free": "15.9", "train_wall": "26743"} [2023-11-02 01:00:36,277][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 01:00:36,302][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 36 [2023-11-02 01:00:36,479][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 01:00:36,516][fairseq.trainer][INFO] - begin training epoch 36 [2023-11-02 01:00:36,516][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 01:00:45,859][train_inner][INFO] - {"epoch": 36, "update": 35.013, "loss": "3.14", "ntokens": "3226.8", "nsentences": "43.92", "prob_perplexity": "154.302", "code_perplexity": "151.187", "temp": "0.984", "loss_0": "3.015", "loss_1": "0.109", "loss_2": "0.015", "accuracy": "0.49303", "wps": "10003.7", "ups": "3.1", "wpb": "3226.8", "bsz": "43.9", "num_updates": "142000", "lr": "6.53165e-05", "gnorm": "0.746", "loss_scale": "2", "train_wall": "35", "gb_free": "13.5", "wall": "26752"} [2023-11-02 01:01:21,695][train_inner][INFO] - {"epoch": 36, "update": 35.062, "loss": "3.125", "ntokens": "3194.56", "nsentences": "43.8", "prob_perplexity": "154.301", "code_perplexity": "151.252", "temp": "0.983", "loss_0": "3.001", "loss_1": "0.109", "loss_2": "0.015", "accuracy": "0.49641", "wps": "17830.1", "ups": "5.58", "wpb": "3194.6", "bsz": "43.8", "num_updates": "142200", "lr": "6.52658e-05", "gnorm": "0.745", "loss_scale": "2", "train_wall": "35", "gb_free": "15.6", "wall": "26788"} [2023-11-02 01:01:57,421][train_inner][INFO] - {"epoch": 36, "update": 35.111, "loss": "3.149", "ntokens": "3188.12", "nsentences": "43.8", "prob_perplexity": "154.084", "code_perplexity": "151.103", "temp": "0.982", "loss_0": "3.025", "loss_1": "0.11", "loss_2": "0.014", "accuracy": "0.4926", "wps": "17848.6", "ups": "5.6", "wpb": "3188.1", "bsz": "43.8", "num_updates": "142400", "lr": "6.52152e-05", "gnorm": "0.746", "loss_scale": "2", "train_wall": "35", "gb_free": "16.7", "wall": "26824"} [2023-11-02 01:02:32,881][train_inner][INFO] - {"epoch": 36, "update": 35.161, "loss": "3.077", "ntokens": "3152.36", "nsentences": "45.08", "prob_perplexity": "154.431", "code_perplexity": "151.436", "temp": "0.981", "loss_0": "2.953", "loss_1": "0.109", "loss_2": "0.015", "accuracy": "0.5045", "wps": "17780.6", "ups": "5.64", "wpb": "3152.4", "bsz": "45.1", "num_updates": "142600", "lr": "6.51646e-05", "gnorm": "0.751", "loss_scale": "2", "train_wall": "35", "gb_free": "13.2", "wall": "26859"} [2023-11-02 01:03:08,131][train_inner][INFO] - {"epoch": 36, "update": 35.21, "loss": "3.074", "ntokens": "3160.24", "nsentences": "46.08", "prob_perplexity": "154.477", "code_perplexity": "151.514", "temp": "0.98", "loss_0": "2.95", "loss_1": "0.109", "loss_2": "0.014", "accuracy": "0.50669", "wps": "17931.6", "ups": "5.67", "wpb": "3160.2", "bsz": "46.1", "num_updates": "142800", "lr": "6.51139e-05", "gnorm": "0.746", "loss_scale": "2", "train_wall": "35", "gb_free": "13.2", "wall": "26894"} [2023-11-02 01:03:44,076][train_inner][INFO] - {"epoch": 36, "update": 35.259, "loss": "3.092", "ntokens": "3161.4", "nsentences": "45.88", "prob_perplexity": "153.912", "code_perplexity": "150.919", "temp": "0.979", "loss_0": "2.968", "loss_1": "0.11", "loss_2": "0.014", "accuracy": "0.50343", "wps": "17591.3", "ups": "5.56", "wpb": "3161.4", "bsz": "45.9", "num_updates": "143000", "lr": "6.50633e-05", "gnorm": "0.749", "loss_scale": "2", "train_wall": "35", "gb_free": "13.5", "wall": "26930"} [2023-11-02 01:04:20,353][train_inner][INFO] - {"epoch": 36, "update": 35.308, "loss": "3.223", "ntokens": "3215.36", "nsentences": "39.96", "prob_perplexity": "154.242", "code_perplexity": "151.144", "temp": "0.978", "loss_0": "3.099", "loss_1": "0.109", "loss_2": "0.015", "accuracy": "0.47668", "wps": "17727.8", "ups": "5.51", "wpb": "3215.4", "bsz": "40", "num_updates": "143200", "lr": "6.50127e-05", "gnorm": "0.754", "loss_scale": "2", "train_wall": "36", "gb_free": "13.2", "wall": "26967"} [2023-11-02 01:04:57,110][train_inner][INFO] - {"epoch": 36, "update": 35.358, "loss": "3.104", "ntokens": "3187.36", "nsentences": "44.16", "prob_perplexity": "155.075", "code_perplexity": "152.053", "temp": "0.977", "loss_0": "2.98", "loss_1": "0.109", "loss_2": "0.014", "accuracy": "0.49923", "wps": "17343.7", "ups": "5.44", "wpb": "3187.4", "bsz": "44.2", "num_updates": "143400", "lr": "6.4962e-05", "gnorm": "0.749", "loss_scale": "2", "train_wall": "36", "gb_free": "13.7", "wall": "27003"} [2023-11-02 01:05:33,360][train_inner][INFO] - {"epoch": 36, "update": 35.407, "loss": "3.101", "ntokens": "3153.32", "nsentences": "43.88", "prob_perplexity": "154.797", "code_perplexity": "151.819", "temp": "0.976", "loss_0": "2.977", "loss_1": "0.109", "loss_2": "0.014", "accuracy": "0.50062", "wps": "17398.9", "ups": "5.52", "wpb": "3153.3", "bsz": "43.9", "num_updates": "143600", "lr": "6.49114e-05", "gnorm": "0.754", "loss_scale": "2", "train_wall": "36", "gb_free": "13.5", "wall": "27040"} [2023-11-02 01:06:09,968][train_inner][INFO] - {"epoch": 36, "update": 35.456, "loss": "3.103", "ntokens": "3206.96", "nsentences": "46.4", "prob_perplexity": "154.542", "code_perplexity": "151.534", "temp": "0.975", "loss_0": "2.98", "loss_1": "0.109", "loss_2": "0.014", "accuracy": "0.50215", "wps": "17521.4", "ups": "5.46", "wpb": "3207", "bsz": "46.4", "num_updates": "143800", "lr": "6.48608e-05", "gnorm": "0.748", "loss_scale": "2", "train_wall": "36", "gb_free": "13.6", "wall": "27076"} [2023-11-02 01:06:46,189][train_inner][INFO] - {"epoch": 36, "update": 35.506, "loss": "3.083", "ntokens": "3209.24", "nsentences": "46.64", "prob_perplexity": "154.825", "code_perplexity": "151.854", "temp": "0.974", "loss_0": "2.96", "loss_1": "0.109", "loss_2": "0.014", "accuracy": "0.50466", "wps": "17728.6", "ups": "5.52", "wpb": "3209.2", "bsz": "46.6", "num_updates": "144000", "lr": "6.48101e-05", "gnorm": "0.746", "loss_scale": "2", "train_wall": "36", "gb_free": "14", "wall": "27112"} [2023-11-02 01:07:22,837][train_inner][INFO] - {"epoch": 36, "update": 35.555, "loss": "3.159", "ntokens": "3219.52", "nsentences": "42.48", "prob_perplexity": "154.65", "code_perplexity": "151.732", "temp": "0.973", "loss_0": "3.035", "loss_1": "0.109", "loss_2": "0.014", "accuracy": "0.48851", "wps": "17570.9", "ups": "5.46", "wpb": "3219.5", "bsz": "42.5", "num_updates": "144200", "lr": "6.47595e-05", "gnorm": "0.752", "loss_scale": "2", "train_wall": "36", "gb_free": "13.9", "wall": "27149"} [2023-11-02 01:07:59,155][train_inner][INFO] - {"epoch": 36, "update": 35.604, "loss": "3.088", "ntokens": "3211.52", "nsentences": "45.84", "prob_perplexity": "155.614", "code_perplexity": "152.695", "temp": "0.972", "loss_0": "2.965", "loss_1": "0.109", "loss_2": "0.014", "accuracy": "0.50235", "wps": "17686.8", "ups": "5.51", "wpb": "3211.5", "bsz": "45.8", "num_updates": "144400", "lr": "6.47089e-05", "gnorm": "0.751", "loss_scale": "2", "train_wall": "36", "gb_free": "13.4", "wall": "27185"} [2023-11-02 01:08:35,042][train_inner][INFO] - {"epoch": 36, "update": 35.654, "loss": "3.088", "ntokens": "3174.24", "nsentences": "44.24", "prob_perplexity": "155.701", "code_perplexity": "152.799", "temp": "0.971", "loss_0": "2.965", "loss_1": "0.109", "loss_2": "0.014", "accuracy": "0.50203", "wps": "17691.3", "ups": "5.57", "wpb": "3174.2", "bsz": "44.2", "num_updates": "144600", "lr": "6.46582e-05", "gnorm": "0.757", "loss_scale": "2", "train_wall": "35", "gb_free": "13.2", "wall": "27221"} [2023-11-02 01:09:11,125][train_inner][INFO] - {"epoch": 36, "update": 35.703, "loss": "3.161", "ntokens": "3170.12", "nsentences": "42.64", "prob_perplexity": "155.964", "code_perplexity": "153.009", "temp": "0.97", "loss_0": "3.037", "loss_1": "0.109", "loss_2": "0.014", "accuracy": "0.48992", "wps": "17572.1", "ups": "5.54", "wpb": "3170.1", "bsz": "42.6", "num_updates": "144800", "lr": "6.46076e-05", "gnorm": "0.759", "loss_scale": "2", "train_wall": "35", "gb_free": "12.9", "wall": "27257"} [2023-11-02 01:09:47,345][train_inner][INFO] - {"epoch": 36, "update": 35.752, "loss": "3.103", "ntokens": "3160.4", "nsentences": "46.2", "prob_perplexity": "155.597", "code_perplexity": "152.598", "temp": "0.969", "loss_0": "2.98", "loss_1": "0.109", "loss_2": "0.014", "accuracy": "0.50254", "wps": "17452.4", "ups": "5.52", "wpb": "3160.4", "bsz": "46.2", "num_updates": "145000", "lr": "6.4557e-05", "gnorm": "0.754", "loss_scale": "2", "train_wall": "36", "gb_free": "13.8", "wall": "27294"} [2023-11-02 01:10:23,252][train_inner][INFO] - {"epoch": 36, "update": 35.802, "loss": "3.094", "ntokens": "3209.36", "nsentences": "45.92", "prob_perplexity": "156.197", "code_perplexity": "153.249", "temp": "0.968", "loss_0": "2.971", "loss_1": "0.109", "loss_2": "0.014", "accuracy": "0.50301", "wps": "17876.9", "ups": "5.57", "wpb": "3209.4", "bsz": "45.9", "num_updates": "145200", "lr": "6.45063e-05", "gnorm": "0.739", "loss_scale": "2", "train_wall": "35", "gb_free": "13.8", "wall": "27329"} [2023-11-02 01:10:59,581][train_inner][INFO] - {"epoch": 36, "update": 35.851, "loss": "3.119", "ntokens": "3205", "nsentences": "43.8", "prob_perplexity": "156.745", "code_perplexity": "153.711", "temp": "0.967", "loss_0": "2.996", "loss_1": "0.109", "loss_2": "0.014", "accuracy": "0.49702", "wps": "17645.4", "ups": "5.51", "wpb": "3205", "bsz": "43.8", "num_updates": "145400", "lr": "6.44557e-05", "gnorm": "0.751", "loss_scale": "2", "train_wall": "36", "gb_free": "13.2", "wall": "27366"} [2023-11-02 01:11:35,576][train_inner][INFO] - {"epoch": 36, "update": 35.9, "loss": "3.12", "ntokens": "3209.96", "nsentences": "44.84", "prob_perplexity": "156.33", "code_perplexity": "153.364", "temp": "0.966", "loss_0": "2.997", "loss_1": "0.109", "loss_2": "0.014", "accuracy": "0.49728", "wps": "17836.9", "ups": "5.56", "wpb": "3210", "bsz": "44.8", "num_updates": "145600", "lr": "6.44051e-05", "gnorm": "0.741", "loss_scale": "2", "train_wall": "35", "gb_free": "14.1", "wall": "27402"} [2023-11-02 01:12:11,432][train_inner][INFO] - {"epoch": 36, "update": 35.949, "loss": "3.173", "ntokens": "3200.4", "nsentences": "41.88", "prob_perplexity": "156.222", "code_perplexity": "153.162", "temp": "0.965", "loss_0": "3.05", "loss_1": "0.109", "loss_2": "0.014", "accuracy": "0.48618", "wps": "17852.2", "ups": "5.58", "wpb": "3200.4", "bsz": "41.9", "num_updates": "145800", "lr": "6.43544e-05", "gnorm": "0.753", "loss_scale": "2", "train_wall": "35", "gb_free": "13.6", "wall": "27438"} [2023-11-02 01:12:47,797][train_inner][INFO] - {"epoch": 36, "update": 35.999, "loss": "3.178", "ntokens": "3192.8", "nsentences": "41.76", "prob_perplexity": "156.328", "code_perplexity": "153.268", "temp": "0.964", "loss_0": "3.055", "loss_1": "0.109", "loss_2": "0.014", "accuracy": "0.48526", "wps": "17560.9", "ups": "5.5", "wpb": "3192.8", "bsz": "41.8", "num_updates": "146000", "lr": "6.43038e-05", "gnorm": "0.759", "loss_scale": "2", "train_wall": "36", "gb_free": "14.9", "wall": "27474"} [2023-11-02 01:12:48,784][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 01:12:48,785][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 01:12:48,803][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 42 [2023-11-02 01:13:14,578][valid][INFO] - {"epoch": 36, "valid_loss": "2.941", "valid_ntokens": "3170.31", "valid_nsentences": "44.1685", "valid_prob_perplexity": "156.568", "valid_code_perplexity": "153.798", "valid_temp": "0.964", "valid_loss_0": "2.818", "valid_loss_1": "0.109", "valid_loss_2": "0.014", "valid_accuracy": "0.53294", "valid_wps": "55705.2", "valid_wpb": "3170.3", "valid_bsz": "44.2", "valid_num_updates": "146005", "valid_best_loss": "2.93"} [2023-11-02 01:13:14,580][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 36 @ 146005 updates [2023-11-02 01:13:14,582][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 01:13:16,008][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 01:13:16,055][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_last.pt (epoch 36 @ 146005 updates, score 2.941) (writing took 1.4748790203593671 seconds) [2023-11-02 01:13:16,056][fairseq_cli.train][INFO] - end of epoch 36 (average epoch stats below) [2023-11-02 01:13:16,058][train][INFO] - {"epoch": 36, "train_loss": "3.121", "train_ntokens": "3189.74", "train_nsentences": "44.2682", "train_prob_perplexity": "155.196", "train_code_perplexity": "152.203", "train_temp": "0.974", "train_loss_0": "2.997", "train_loss_1": "0.109", "train_loss_2": "0.014", "train_accuracy": "0.497", "train_wps": "17028", "train_ups": "5.34", "train_wpb": "3189.7", "train_bsz": "44.3", "train_num_updates": "146005", "train_lr": "6.43025e-05", "train_gnorm": "0.75", "train_loss_scale": "2", "train_train_wall": "719", "train_gb_free": "14.1", "train_wall": "27502"} [2023-11-02 01:13:16,060][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 01:13:16,081][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 37 [2023-11-02 01:13:16,249][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 01:13:16,284][fairseq.trainer][INFO] - begin training epoch 37 [2023-11-02 01:13:16,285][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 01:13:51,907][train_inner][INFO] - {"epoch": 37, "update": 36.048, "loss": "3.132", "ntokens": "3192.96", "nsentences": "43.64", "prob_perplexity": "156.739", "code_perplexity": "153.692", "temp": "0.963", "loss_0": "3.009", "loss_1": "0.109", "loss_2": "0.014", "accuracy": "0.49463", "wps": "9961.3", "ups": "3.12", "wpb": "3193", "bsz": "43.6", "num_updates": "146200", "lr": "6.42532e-05", "gnorm": "0.752", "loss_scale": "2", "train_wall": "36", "gb_free": "13.2", "wall": "27538"} [2023-11-02 01:14:27,515][train_inner][INFO] - {"epoch": 37, "update": 36.097, "loss": "3.124", "ntokens": "3165.24", "nsentences": "41.68", "prob_perplexity": "156.907", "code_perplexity": "153.875", "temp": "0.962", "loss_0": "3.001", "loss_1": "0.109", "loss_2": "0.014", "accuracy": "0.49462", "wps": "17779.2", "ups": "5.62", "wpb": "3165.2", "bsz": "41.7", "num_updates": "146400", "lr": "6.42025e-05", "gnorm": "0.751", "loss_scale": "2", "train_wall": "35", "gb_free": "13.8", "wall": "27574"} [2023-11-02 01:15:03,021][train_inner][INFO] - {"epoch": 37, "update": 36.147, "loss": "3.094", "ntokens": "3215.56", "nsentences": "44.44", "prob_perplexity": "156.725", "code_perplexity": "153.757", "temp": "0.961", "loss_0": "2.971", "loss_1": "0.109", "loss_2": "0.014", "accuracy": "0.50123", "wps": "18113.8", "ups": "5.63", "wpb": "3215.6", "bsz": "44.4", "num_updates": "146600", "lr": "6.41519e-05", "gnorm": "0.754", "loss_scale": "2", "train_wall": "35", "gb_free": "14.3", "wall": "27609"} [2023-11-02 01:15:38,652][train_inner][INFO] - {"epoch": 37, "update": 36.196, "loss": "3.013", "ntokens": "3157.76", "nsentences": "47.72", "prob_perplexity": "157.446", "code_perplexity": "154.415", "temp": "0.96", "loss_0": "2.89", "loss_1": "0.109", "loss_2": "0.014", "accuracy": "0.51896", "wps": "17725.7", "ups": "5.61", "wpb": "3157.8", "bsz": "47.7", "num_updates": "146800", "lr": "6.41013e-05", "gnorm": "0.737", "loss_scale": "2", "train_wall": "35", "gb_free": "13.6", "wall": "27645"} [2023-11-02 01:16:14,667][train_inner][INFO] - {"epoch": 37, "update": 36.245, "loss": "3.123", "ntokens": "3214.2", "nsentences": "44.16", "prob_perplexity": "156.74", "code_perplexity": "153.683", "temp": "0.959", "loss_0": "3", "loss_1": "0.109", "loss_2": "0.014", "accuracy": "0.49647", "wps": "17850.2", "ups": "5.55", "wpb": "3214.2", "bsz": "44.2", "num_updates": "147000", "lr": "6.40506e-05", "gnorm": "0.755", "loss_scale": "2", "train_wall": "35", "gb_free": "13.3", "wall": "27681"} [2023-11-02 01:16:50,292][train_inner][INFO] - {"epoch": 37, "update": 36.295, "loss": "3.068", "ntokens": "3159.04", "nsentences": "44.36", "prob_perplexity": "157.335", "code_perplexity": "154.298", "temp": "0.959", "loss_0": "2.945", "loss_1": "0.109", "loss_2": "0.014", "accuracy": "0.50555", "wps": "17736.1", "ups": "5.61", "wpb": "3159", "bsz": "44.4", "num_updates": "147200", "lr": "6.4e-05", "gnorm": "0.76", "loss_scale": "2", "train_wall": "35", "gb_free": "12.7", "wall": "27717"} [2023-11-02 01:17:26,507][train_inner][INFO] - {"epoch": 37, "update": 36.344, "loss": "3.121", "ntokens": "3222.12", "nsentences": "43.24", "prob_perplexity": "157.374", "code_perplexity": "154.405", "temp": "0.958", "loss_0": "2.998", "loss_1": "0.109", "loss_2": "0.014", "accuracy": "0.49442", "wps": "17795.4", "ups": "5.52", "wpb": "3222.1", "bsz": "43.2", "num_updates": "147400", "lr": "6.39494e-05", "gnorm": "0.776", "loss_scale": "2", "train_wall": "36", "gb_free": "12.9", "wall": "27753"} [2023-11-02 01:18:02,762][train_inner][INFO] - {"epoch": 37, "update": 36.393, "loss": "3.096", "ntokens": "3167.92", "nsentences": "44.84", "prob_perplexity": "157.405", "code_perplexity": "154.417", "temp": "0.957", "loss_0": "2.974", "loss_1": "0.109", "loss_2": "0.014", "accuracy": "0.50206", "wps": "17477", "ups": "5.52", "wpb": "3167.9", "bsz": "44.8", "num_updates": "147600", "lr": "6.38987e-05", "gnorm": "0.751", "loss_scale": "2", "train_wall": "36", "gb_free": "12.3", "wall": "27789"} [2023-11-02 01:18:38,689][train_inner][INFO] - {"epoch": 37, "update": 36.443, "loss": "3.149", "ntokens": "3139.76", "nsentences": "42.24", "prob_perplexity": "157.214", "code_perplexity": "154.26", "temp": "0.956", "loss_0": "3.026", "loss_1": "0.109", "loss_2": "0.014", "accuracy": "0.49126", "wps": "17479.6", "ups": "5.57", "wpb": "3139.8", "bsz": "42.2", "num_updates": "147800", "lr": "6.38481e-05", "gnorm": "0.771", "loss_scale": "2", "train_wall": "35", "gb_free": "13.4", "wall": "27825"} [2023-11-02 01:19:15,018][train_inner][INFO] - {"epoch": 37, "update": 36.492, "loss": "3.127", "ntokens": "3176.48", "nsentences": "43.92", "prob_perplexity": "157.604", "code_perplexity": "154.546", "temp": "0.955", "loss_0": "3.004", "loss_1": "0.109", "loss_2": "0.014", "accuracy": "0.49542", "wps": "17488.1", "ups": "5.51", "wpb": "3176.5", "bsz": "43.9", "num_updates": "148000", "lr": "6.37975e-05", "gnorm": "0.763", "loss_scale": "2", "train_wall": "36", "gb_free": "14.7", "wall": "27861"} [2023-11-02 01:19:50,860][train_inner][INFO] - {"epoch": 37, "update": 36.541, "loss": "3.047", "ntokens": "3179.4", "nsentences": "46.56", "prob_perplexity": "158.306", "code_perplexity": "155.33", "temp": "0.954", "loss_0": "2.925", "loss_1": "0.109", "loss_2": "0.014", "accuracy": "0.5106", "wps": "17742.4", "ups": "5.58", "wpb": "3179.4", "bsz": "46.6", "num_updates": "148200", "lr": "6.37468e-05", "gnorm": "0.749", "loss_scale": "2", "train_wall": "35", "gb_free": "13.9", "wall": "27897"} [2023-11-02 01:20:27,030][train_inner][INFO] - {"epoch": 37, "update": 36.59, "loss": "3.137", "ntokens": "3176", "nsentences": "43.16", "prob_perplexity": "157.494", "code_perplexity": "154.5", "temp": "0.953", "loss_0": "3.014", "loss_1": "0.109", "loss_2": "0.014", "accuracy": "0.49357", "wps": "17570.4", "ups": "5.53", "wpb": "3176", "bsz": "43.2", "num_updates": "148400", "lr": "6.36962e-05", "gnorm": "0.754", "loss_scale": "2", "train_wall": "36", "gb_free": "16.9", "wall": "27933"} [2023-11-02 01:21:03,308][train_inner][INFO] - {"epoch": 37, "update": 36.64, "loss": "3.029", "ntokens": "3179.56", "nsentences": "45.48", "prob_perplexity": "158.499", "code_perplexity": "155.535", "temp": "0.952", "loss_0": "2.906", "loss_1": "0.109", "loss_2": "0.014", "accuracy": "0.51287", "wps": "17530", "ups": "5.51", "wpb": "3179.6", "bsz": "45.5", "num_updates": "148600", "lr": "6.36456e-05", "gnorm": "0.753", "loss_scale": "2", "train_wall": "36", "gb_free": "12.6", "wall": "27970"} [2023-11-02 01:21:39,875][train_inner][INFO] - {"epoch": 37, "update": 36.689, "loss": "3.036", "ntokens": "3167.28", "nsentences": "45.28", "prob_perplexity": "159.099", "code_perplexity": "156.145", "temp": "0.951", "loss_0": "2.914", "loss_1": "0.108", "loss_2": "0.014", "accuracy": "0.51142", "wps": "17324.3", "ups": "5.47", "wpb": "3167.3", "bsz": "45.3", "num_updates": "148800", "lr": "6.35949e-05", "gnorm": "0.748", "loss_scale": "2", "train_wall": "36", "gb_free": "13.4", "wall": "28006"} [2023-11-02 01:22:16,373][train_inner][INFO] - {"epoch": 37, "update": 36.738, "loss": "3.06", "ntokens": "3185.2", "nsentences": "44.44", "prob_perplexity": "158.541", "code_perplexity": "155.565", "temp": "0.95", "loss_0": "2.938", "loss_1": "0.109", "loss_2": "0.014", "accuracy": "0.5062", "wps": "17455.4", "ups": "5.48", "wpb": "3185.2", "bsz": "44.4", "num_updates": "149000", "lr": "6.35443e-05", "gnorm": "0.753", "loss_scale": "2", "train_wall": "36", "gb_free": "13.1", "wall": "28043"} [2023-11-02 01:22:52,769][train_inner][INFO] - {"epoch": 37, "update": 36.788, "loss": "3.085", "ntokens": "3210", "nsentences": "44.84", "prob_perplexity": "158.367", "code_perplexity": "155.409", "temp": "0.949", "loss_0": "2.963", "loss_1": "0.109", "loss_2": "0.014", "accuracy": "0.50205", "wps": "17640.3", "ups": "5.5", "wpb": "3210", "bsz": "44.8", "num_updates": "149200", "lr": "6.34937e-05", "gnorm": "0.745", "loss_scale": "2", "train_wall": "36", "gb_free": "13.5", "wall": "28079"} [2023-11-02 01:23:29,231][train_inner][INFO] - {"epoch": 37, "update": 36.837, "loss": "3.082", "ntokens": "3194.56", "nsentences": "44.56", "prob_perplexity": "158.383", "code_perplexity": "155.454", "temp": "0.948", "loss_0": "2.959", "loss_1": "0.109", "loss_2": "0.014", "accuracy": "0.5027", "wps": "17523.8", "ups": "5.49", "wpb": "3194.6", "bsz": "44.6", "num_updates": "149400", "lr": "6.3443e-05", "gnorm": "0.755", "loss_scale": "2", "train_wall": "36", "gb_free": "14.2", "wall": "28115"} [2023-11-02 01:24:05,151][train_inner][INFO] - {"epoch": 37, "update": 36.886, "loss": "3.088", "ntokens": "3145.56", "nsentences": "43.8", "prob_perplexity": "158.65", "code_perplexity": "155.7", "temp": "0.947", "loss_0": "2.966", "loss_1": "0.108", "loss_2": "0.013", "accuracy": "0.50172", "wps": "17515.1", "ups": "5.57", "wpb": "3145.6", "bsz": "43.8", "num_updates": "149600", "lr": "6.33924e-05", "gnorm": "0.762", "loss_scale": "2", "train_wall": "35", "gb_free": "13.7", "wall": "28151"} [2023-11-02 01:24:41,233][train_inner][INFO] - {"epoch": 37, "update": 36.936, "loss": "3.093", "ntokens": "3154.8", "nsentences": "44.24", "prob_perplexity": "159.414", "code_perplexity": "156.313", "temp": "0.946", "loss_0": "2.972", "loss_1": "0.108", "loss_2": "0.014", "accuracy": "0.50184", "wps": "17487.8", "ups": "5.54", "wpb": "3154.8", "bsz": "44.2", "num_updates": "149800", "lr": "6.33418e-05", "gnorm": "0.762", "loss_scale": "2", "train_wall": "35", "gb_free": "13.1", "wall": "28187"} [2023-11-02 01:25:17,158][train_inner][INFO] - {"epoch": 37, "update": 36.985, "loss": "3.096", "ntokens": "3172.96", "nsentences": "43.12", "prob_perplexity": "159.326", "code_perplexity": "156.281", "temp": "0.945", "loss_0": "2.975", "loss_1": "0.108", "loss_2": "0.014", "accuracy": "0.49963", "wps": "17665.4", "ups": "5.57", "wpb": "3173", "bsz": "43.1", "num_updates": "150000", "lr": "6.32911e-05", "gnorm": "0.761", "loss_scale": "2", "train_wall": "35", "gb_free": "13.6", "wall": "28223"} [2023-11-02 01:25:17,160][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 01:25:17,161][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 01:25:17,178][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 43 [2023-11-02 01:25:43,118][valid][INFO] - {"epoch": 37, "valid_loss": "2.934", "valid_ntokens": "3168.05", "valid_nsentences": "44.1685", "valid_prob_perplexity": "158.113", "valid_code_perplexity": "155.292", "valid_temp": "0.945", "valid_loss_0": "2.811", "valid_loss_1": "0.109", "valid_loss_2": "0.014", "valid_accuracy": "0.53409", "valid_wps": "55377.1", "valid_wpb": "3168.1", "valid_bsz": "44.2", "valid_num_updates": "150000", "valid_best_loss": "2.93"} [2023-11-02 01:25:43,120][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 37 @ 150000 updates [2023-11-02 01:25:43,122][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_37_150000.pt [2023-11-02 01:25:44,464][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_37_150000.pt [2023-11-02 01:25:45,435][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_37_150000.pt (epoch 37 @ 150000 updates, score 2.934) (writing took 2.3149167243391275 seconds) [2023-11-02 01:25:56,581][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 01:25:56,582][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 01:25:56,599][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 44 [2023-11-02 01:26:22,095][valid][INFO] - {"epoch": 37, "valid_loss": "2.908", "valid_ntokens": "3155.76", "valid_nsentences": "44.1685", "valid_prob_perplexity": "159.597", "valid_code_perplexity": "156.742", "valid_temp": "0.944", "valid_loss_0": "2.786", "valid_loss_1": "0.108", "valid_loss_2": "0.014", "valid_accuracy": "0.53776", "valid_wps": "56111.3", "valid_wpb": "3155.8", "valid_bsz": "44.2", "valid_num_updates": "150061", "valid_best_loss": "2.908"} [2023-11-02 01:26:22,097][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 37 @ 150061 updates [2023-11-02 01:26:22,099][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-02 01:26:23,516][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-02 01:26:24,508][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_best.pt (epoch 37 @ 150061 updates, score 2.908) (writing took 2.4113264880143106 seconds) [2023-11-02 01:26:24,509][fairseq_cli.train][INFO] - end of epoch 37 (average epoch stats below) [2023-11-02 01:26:24,511][train][INFO] - {"epoch": 37, "train_loss": "3.09", "train_ntokens": "3179.81", "train_nsentences": "44.2682", "train_prob_perplexity": "157.909", "train_code_perplexity": "154.911", "train_temp": "0.954", "train_loss_0": "2.968", "train_loss_1": "0.109", "train_loss_2": "0.014", "train_accuracy": "0.50179", "train_wps": "16357.8", "train_ups": "5.14", "train_wpb": "3179.8", "train_bsz": "44.3", "train_num_updates": "150061", "train_lr": "6.32757e-05", "train_gnorm": "0.755", "train_loss_scale": "2", "train_train_wall": "719", "train_gb_free": "13.3", "train_wall": "28291"} [2023-11-02 01:26:24,513][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 01:26:24,547][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 38 [2023-11-02 01:26:24,763][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 01:26:24,799][fairseq.trainer][INFO] - begin training epoch 38 [2023-11-02 01:26:24,800][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 01:26:49,395][train_inner][INFO] - {"epoch": 38, "update": 37.034, "loss": "3.097", "ntokens": "3188.12", "nsentences": "43.36", "prob_perplexity": "160.171", "code_perplexity": "157.158", "temp": "0.944", "loss_0": "2.975", "loss_1": "0.108", "loss_2": "0.014", "accuracy": "0.49882", "wps": "6913", "ups": "2.17", "wpb": "3188.1", "bsz": "43.4", "num_updates": "150200", "lr": "6.32405e-05", "gnorm": "0.759", "loss_scale": "2", "train_wall": "35", "gb_free": "13.1", "wall": "28316"} [2023-11-02 01:27:25,213][train_inner][INFO] - {"epoch": 38, "update": 37.084, "loss": "3.078", "ntokens": "3189.32", "nsentences": "45.84", "prob_perplexity": "159.193", "code_perplexity": "156.159", "temp": "0.943", "loss_0": "2.956", "loss_1": "0.108", "loss_2": "0.014", "accuracy": "0.50504", "wps": "17809.5", "ups": "5.58", "wpb": "3189.3", "bsz": "45.8", "num_updates": "150400", "lr": "6.31899e-05", "gnorm": "0.762", "loss_scale": "2", "train_wall": "35", "gb_free": "13.2", "wall": "28351"} [2023-11-02 01:28:01,101][train_inner][INFO] - {"epoch": 38, "update": 37.133, "loss": "3.1", "ntokens": "3201.56", "nsentences": "43.48", "prob_perplexity": "159.63", "code_perplexity": "156.655", "temp": "0.942", "loss_0": "2.979", "loss_1": "0.108", "loss_2": "0.013", "accuracy": "0.49938", "wps": "17843.4", "ups": "5.57", "wpb": "3201.6", "bsz": "43.5", "num_updates": "150600", "lr": "6.31392e-05", "gnorm": "0.762", "loss_scale": "2", "train_wall": "35", "gb_free": "13.9", "wall": "28387"} [2023-11-02 01:28:37,230][train_inner][INFO] - {"epoch": 38, "update": 37.182, "loss": "3.142", "ntokens": "3194.76", "nsentences": "43.04", "prob_perplexity": "160.008", "code_perplexity": "156.917", "temp": "0.941", "loss_0": "3.021", "loss_1": "0.108", "loss_2": "0.013", "accuracy": "0.49257", "wps": "17686.4", "ups": "5.54", "wpb": "3194.8", "bsz": "43", "num_updates": "150800", "lr": "6.30886e-05", "gnorm": "0.752", "loss_scale": "2", "train_wall": "35", "gb_free": "13.6", "wall": "28423"} [2023-11-02 01:29:13,400][train_inner][INFO] - {"epoch": 38, "update": 37.232, "loss": "3.094", "ntokens": "3198.72", "nsentences": "43.56", "prob_perplexity": "160.605", "code_perplexity": "157.651", "temp": "0.94", "loss_0": "2.973", "loss_1": "0.108", "loss_2": "0.013", "accuracy": "0.50025", "wps": "17688.3", "ups": "5.53", "wpb": "3198.7", "bsz": "43.6", "num_updates": "151000", "lr": "6.3038e-05", "gnorm": "0.754", "loss_scale": "2", "train_wall": "36", "gb_free": "13.6", "wall": "28460"} [2023-11-02 01:29:49,866][train_inner][INFO] - {"epoch": 38, "update": 37.281, "loss": "3.114", "ntokens": "3217.68", "nsentences": "43.24", "prob_perplexity": "160.191", "code_perplexity": "157.215", "temp": "0.94", "loss_0": "2.992", "loss_1": "0.108", "loss_2": "0.013", "accuracy": "0.49566", "wps": "17648.4", "ups": "5.48", "wpb": "3217.7", "bsz": "43.2", "num_updates": "151200", "lr": "6.29873e-05", "gnorm": "0.747", "loss_scale": "2", "train_wall": "36", "gb_free": "14.8", "wall": "28496"} [2023-11-02 01:30:26,233][train_inner][INFO] - {"epoch": 38, "update": 37.33, "loss": "3.183", "ntokens": "3182.68", "nsentences": "41.88", "prob_perplexity": "159.496", "code_perplexity": "156.579", "temp": "0.939", "loss_0": "3.062", "loss_1": "0.108", "loss_2": "0.013", "accuracy": "0.4848", "wps": "17504.6", "ups": "5.5", "wpb": "3182.7", "bsz": "41.9", "num_updates": "151400", "lr": "6.29367e-05", "gnorm": "0.765", "loss_scale": "2", "train_wall": "36", "gb_free": "13", "wall": "28532"} [2023-11-02 01:31:03,041][train_inner][INFO] - {"epoch": 38, "update": 37.379, "loss": "3.137", "ntokens": "3171.44", "nsentences": "41.84", "prob_perplexity": "159.894", "code_perplexity": "156.876", "temp": "0.938", "loss_0": "3.015", "loss_1": "0.108", "loss_2": "0.014", "accuracy": "0.49205", "wps": "17233.1", "ups": "5.43", "wpb": "3171.4", "bsz": "41.8", "num_updates": "151600", "lr": "6.28861e-05", "gnorm": "0.773", "loss_scale": "2", "train_wall": "36", "gb_free": "14.1", "wall": "28569"} [2023-11-02 01:31:39,690][train_inner][INFO] - {"epoch": 38, "update": 37.429, "loss": "3.067", "ntokens": "3185.96", "nsentences": "43.36", "prob_perplexity": "159.939", "code_perplexity": "156.914", "temp": "0.937", "loss_0": "2.945", "loss_1": "0.108", "loss_2": "0.013", "accuracy": "0.50393", "wps": "17387.3", "ups": "5.46", "wpb": "3186", "bsz": "43.4", "num_updates": "151800", "lr": "6.28354e-05", "gnorm": "0.766", "loss_scale": "2", "train_wall": "36", "gb_free": "14.2", "wall": "28606"} [2023-11-02 01:32:15,622][train_inner][INFO] - {"epoch": 38, "update": 37.478, "loss": "3.036", "ntokens": "3169.48", "nsentences": "46.48", "prob_perplexity": "161.019", "code_perplexity": "158.015", "temp": "0.936", "loss_0": "2.914", "loss_1": "0.108", "loss_2": "0.013", "accuracy": "0.51315", "wps": "17642.5", "ups": "5.57", "wpb": "3169.5", "bsz": "46.5", "num_updates": "152000", "lr": "6.27848e-05", "gnorm": "0.749", "loss_scale": "2", "train_wall": "35", "gb_free": "13.7", "wall": "28642"} [2023-11-02 01:32:51,574][train_inner][INFO] - {"epoch": 38, "update": 37.527, "loss": "3.101", "ntokens": "3206.68", "nsentences": "44.6", "prob_perplexity": "160.991", "code_perplexity": "157.986", "temp": "0.935", "loss_0": "2.98", "loss_1": "0.108", "loss_2": "0.013", "accuracy": "0.50024", "wps": "17840", "ups": "5.56", "wpb": "3206.7", "bsz": "44.6", "num_updates": "152200", "lr": "6.27342e-05", "gnorm": "0.761", "loss_scale": "2", "train_wall": "35", "gb_free": "14.1", "wall": "28678"} [2023-11-02 01:33:28,075][train_inner][INFO] - {"epoch": 38, "update": 37.577, "loss": "3.013", "ntokens": "3200.6", "nsentences": "47.68", "prob_perplexity": "161.247", "code_perplexity": "158.279", "temp": "0.934", "loss_0": "2.892", "loss_1": "0.108", "loss_2": "0.013", "accuracy": "0.51667", "wps": "17544.8", "ups": "5.48", "wpb": "3200.6", "bsz": "47.7", "num_updates": "152400", "lr": "6.26835e-05", "gnorm": "0.746", "loss_scale": "2", "train_wall": "36", "gb_free": "13.4", "wall": "28714"} [2023-11-02 01:34:04,228][train_inner][INFO] - {"epoch": 38, "update": 37.626, "loss": "3.047", "ntokens": "3182.36", "nsentences": "44.8", "prob_perplexity": "161.045", "code_perplexity": "158.051", "temp": "0.933", "loss_0": "2.926", "loss_1": "0.108", "loss_2": "0.013", "accuracy": "0.50886", "wps": "17606.1", "ups": "5.53", "wpb": "3182.4", "bsz": "44.8", "num_updates": "152600", "lr": "6.26329e-05", "gnorm": "0.76", "loss_scale": "2", "train_wall": "36", "gb_free": "12.7", "wall": "28750"} [2023-11-02 01:34:40,445][train_inner][INFO] - {"epoch": 38, "update": 37.675, "loss": "3.063", "ntokens": "3168.56", "nsentences": "44.44", "prob_perplexity": "160.924", "code_perplexity": "157.914", "temp": "0.932", "loss_0": "2.942", "loss_1": "0.108", "loss_2": "0.013", "accuracy": "0.50606", "wps": "17498.4", "ups": "5.52", "wpb": "3168.6", "bsz": "44.4", "num_updates": "152800", "lr": "6.25823e-05", "gnorm": "0.765", "loss_scale": "2", "train_wall": "36", "gb_free": "15.6", "wall": "28787"} [2023-11-02 01:35:16,488][train_inner][INFO] - {"epoch": 38, "update": 37.725, "loss": "3.066", "ntokens": "3194.96", "nsentences": "46.16", "prob_perplexity": "161.869", "code_perplexity": "158.95", "temp": "0.931", "loss_0": "2.946", "loss_1": "0.108", "loss_2": "0.013", "accuracy": "0.50734", "wps": "17729.6", "ups": "5.55", "wpb": "3195", "bsz": "46.2", "num_updates": "153000", "lr": "6.25316e-05", "gnorm": "0.754", "loss_scale": "4", "train_wall": "35", "gb_free": "12.8", "wall": "28823"} [2023-11-02 01:35:52,487][train_inner][INFO] - {"epoch": 38, "update": 37.774, "loss": "3.111", "ntokens": "3180.52", "nsentences": "42.84", "prob_perplexity": "160.866", "code_perplexity": "157.898", "temp": "0.93", "loss_0": "2.99", "loss_1": "0.108", "loss_2": "0.013", "accuracy": "0.49584", "wps": "17671.5", "ups": "5.56", "wpb": "3180.5", "bsz": "42.8", "num_updates": "153200", "lr": "6.2481e-05", "gnorm": "0.783", "loss_scale": "4", "train_wall": "35", "gb_free": "13", "wall": "28859"} [2023-11-02 01:36:21,136][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 [2023-11-02 01:36:29,270][train_inner][INFO] - {"epoch": 38, "update": 37.823, "loss": "3.112", "ntokens": "3215.84", "nsentences": "43.52", "prob_perplexity": "161.706", "code_perplexity": "158.841", "temp": "0.929", "loss_0": "2.991", "loss_1": "0.108", "loss_2": "0.013", "accuracy": "0.49661", "wps": "17486.4", "ups": "5.44", "wpb": "3215.8", "bsz": "43.5", "num_updates": "153400", "lr": "6.24304e-05", "gnorm": "0.763", "loss_scale": "2", "train_wall": "36", "gb_free": "13.8", "wall": "28896"} [2023-11-02 01:37:05,477][train_inner][INFO] - {"epoch": 38, "update": 37.873, "loss": "3.042", "ntokens": "3175.92", "nsentences": "46.44", "prob_perplexity": "161.992", "code_perplexity": "159.03", "temp": "0.928", "loss_0": "2.921", "loss_1": "0.108", "loss_2": "0.013", "accuracy": "0.51212", "wps": "17544", "ups": "5.52", "wpb": "3175.9", "bsz": "46.4", "num_updates": "153600", "lr": "6.23797e-05", "gnorm": "0.746", "loss_scale": "2", "train_wall": "36", "gb_free": "13.5", "wall": "28932"} [2023-11-02 01:37:42,396][train_inner][INFO] - {"epoch": 38, "update": 37.922, "loss": "3.193", "ntokens": "3214.84", "nsentences": "40.84", "prob_perplexity": "162.985", "code_perplexity": "159.905", "temp": "0.927", "loss_0": "3.073", "loss_1": "0.107", "loss_2": "0.013", "accuracy": "0.48178", "wps": "17416.8", "ups": "5.42", "wpb": "3214.8", "bsz": "40.8", "num_updates": "153800", "lr": "6.23291e-05", "gnorm": "0.765", "loss_scale": "2", "train_wall": "36", "gb_free": "14.7", "wall": "28969"} [2023-11-02 01:38:18,570][train_inner][INFO] - {"epoch": 38, "update": 37.971, "loss": "3.067", "ntokens": "3196.28", "nsentences": "45.56", "prob_perplexity": "162.693", "code_perplexity": "159.688", "temp": "0.926", "loss_0": "2.947", "loss_1": "0.108", "loss_2": "0.013", "accuracy": "0.50553", "wps": "17672.4", "ups": "5.53", "wpb": "3196.3", "bsz": "45.6", "num_updates": "154000", "lr": "6.22785e-05", "gnorm": "0.754", "loss_scale": "2", "train_wall": "36", "gb_free": "14.7", "wall": "29005"} [2023-11-02 01:38:39,464][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 01:38:39,465][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 01:38:39,483][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 45 [2023-11-02 01:39:05,252][valid][INFO] - {"epoch": 38, "valid_loss": "2.91", "valid_ntokens": "3158.9", "valid_nsentences": "44.1685", "valid_prob_perplexity": "161.31", "valid_code_perplexity": "158.668", "valid_temp": "0.925", "valid_loss_0": "2.79", "valid_loss_1": "0.108", "valid_loss_2": "0.013", "valid_accuracy": "0.53721", "valid_wps": "55608.7", "valid_wpb": "3158.9", "valid_bsz": "44.2", "valid_num_updates": "154116", "valid_best_loss": "2.908"} [2023-11-02 01:39:05,254][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 38 @ 154116 updates [2023-11-02 01:39:05,256][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 01:39:06,721][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 01:39:06,774][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_last.pt (epoch 38 @ 154116 updates, score 2.91) (writing took 1.5204339995980263 seconds) [2023-11-02 01:39:06,775][fairseq_cli.train][INFO] - end of epoch 38 (average epoch stats below) [2023-11-02 01:39:06,779][train][INFO] - {"epoch": 38, "train_loss": "3.091", "train_ntokens": "3189.96", "train_nsentences": "44.2732", "train_prob_perplexity": "160.874", "train_code_perplexity": "157.885", "train_temp": "0.935", "train_loss_0": "2.97", "train_loss_1": "0.108", "train_loss_2": "0.013", "train_accuracy": "0.50132", "train_wps": "16969.6", "train_ups": "5.32", "train_wpb": "3190", "train_bsz": "44.3", "train_num_updates": "154116", "train_lr": "6.22491e-05", "train_gnorm": "0.759", "train_loss_scale": "2", "train_train_wall": "722", "train_gb_free": "13.5", "train_wall": "29053"} [2023-11-02 01:39:06,782][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 01:39:06,805][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 39 [2023-11-02 01:39:07,008][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 01:39:07,053][fairseq.trainer][INFO] - begin training epoch 39 [2023-11-02 01:39:07,054][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 01:39:21,956][train_inner][INFO] - {"epoch": 39, "update": 38.021, "loss": "3.074", "ntokens": "3164.88", "nsentences": "45.4", "prob_perplexity": "161.956", "code_perplexity": "159.013", "temp": "0.926", "loss_0": "2.953", "loss_1": "0.108", "loss_2": "0.013", "accuracy": "0.5061", "wps": "9986.5", "ups": "3.16", "wpb": "3164.9", "bsz": "45.4", "num_updates": "154200", "lr": "6.22278e-05", "gnorm": "0.768", "loss_scale": "2", "train_wall": "35", "gb_free": "13.1", "wall": "29068"} [2023-11-02 01:39:57,999][train_inner][INFO] - {"epoch": 39, "update": 38.07, "loss": "3.094", "ntokens": "3209", "nsentences": "44.48", "prob_perplexity": "162.363", "code_perplexity": "159.388", "temp": "0.925", "loss_0": "2.973", "loss_1": "0.108", "loss_2": "0.013", "accuracy": "0.50071", "wps": "17807.7", "ups": "5.55", "wpb": "3209", "bsz": "44.5", "num_updates": "154400", "lr": "6.21772e-05", "gnorm": "0.76", "loss_scale": "2", "train_wall": "35", "gb_free": "13.3", "wall": "29104"} [2023-11-02 01:40:33,708][train_inner][INFO] - {"epoch": 39, "update": 38.119, "loss": "3.051", "ntokens": "3170.68", "nsentences": "44.8", "prob_perplexity": "162.314", "code_perplexity": "159.288", "temp": "0.924", "loss_0": "2.93", "loss_1": "0.108", "loss_2": "0.013", "accuracy": "0.50809", "wps": "17759.6", "ups": "5.6", "wpb": "3170.7", "bsz": "44.8", "num_updates": "154600", "lr": "6.21266e-05", "gnorm": "0.764", "loss_scale": "2", "train_wall": "35", "gb_free": "12.5", "wall": "29140"} [2023-11-02 01:41:09,518][train_inner][INFO] - {"epoch": 39, "update": 38.169, "loss": "3.021", "ntokens": "3201.24", "nsentences": "46.96", "prob_perplexity": "162.488", "code_perplexity": "159.495", "temp": "0.923", "loss_0": "2.901", "loss_1": "0.108", "loss_2": "0.013", "accuracy": "0.51482", "wps": "17879.7", "ups": "5.59", "wpb": "3201.2", "bsz": "47", "num_updates": "154800", "lr": "6.20759e-05", "gnorm": "0.752", "loss_scale": "2", "train_wall": "35", "gb_free": "13.3", "wall": "29176"} [2023-11-02 01:41:45,678][train_inner][INFO] - {"epoch": 39, "update": 38.218, "loss": "3.056", "ntokens": "3181.16", "nsentences": "45.52", "prob_perplexity": "162.358", "code_perplexity": "159.346", "temp": "0.922", "loss_0": "2.935", "loss_1": "0.108", "loss_2": "0.013", "accuracy": "0.50849", "wps": "17596.3", "ups": "5.53", "wpb": "3181.2", "bsz": "45.5", "num_updates": "155000", "lr": "6.20253e-05", "gnorm": "0.759", "loss_scale": "2", "train_wall": "36", "gb_free": "14.2", "wall": "29212"} [2023-11-02 01:42:22,208][train_inner][INFO] - {"epoch": 39, "update": 38.267, "loss": "3.118", "ntokens": "3199.72", "nsentences": "42.4", "prob_perplexity": "162.228", "code_perplexity": "159.298", "temp": "0.921", "loss_0": "2.997", "loss_1": "0.108", "loss_2": "0.013", "accuracy": "0.4952", "wps": "17519.4", "ups": "5.48", "wpb": "3199.7", "bsz": "42.4", "num_updates": "155200", "lr": "6.19747e-05", "gnorm": "0.764", "loss_scale": "2", "train_wall": "36", "gb_free": "13.8", "wall": "29248"} [2023-11-02 01:42:58,217][train_inner][INFO] - {"epoch": 39, "update": 38.317, "loss": "3.044", "ntokens": "3162.88", "nsentences": "45.04", "prob_perplexity": "163.053", "code_perplexity": "160.127", "temp": "0.92", "loss_0": "2.923", "loss_1": "0.108", "loss_2": "0.013", "accuracy": "0.51029", "wps": "17567.8", "ups": "5.55", "wpb": "3162.9", "bsz": "45", "num_updates": "155400", "lr": "6.19241e-05", "gnorm": "0.761", "loss_scale": "2", "train_wall": "35", "gb_free": "13.7", "wall": "29284"} [2023-11-02 01:43:34,604][train_inner][INFO] - {"epoch": 39, "update": 38.366, "loss": "3.148", "ntokens": "3184.24", "nsentences": "41.44", "prob_perplexity": "162.56", "code_perplexity": "159.544", "temp": "0.919", "loss_0": "3.027", "loss_1": "0.108", "loss_2": "0.013", "accuracy": "0.48959", "wps": "17503.4", "ups": "5.5", "wpb": "3184.2", "bsz": "41.4", "num_updates": "155600", "lr": "6.18734e-05", "gnorm": "0.763", "loss_scale": "2", "train_wall": "36", "gb_free": "13.9", "wall": "29321"} [2023-11-02 01:44:11,059][train_inner][INFO] - {"epoch": 39, "update": 38.415, "loss": "3.161", "ntokens": "3213.6", "nsentences": "40.88", "prob_perplexity": "163.426", "code_perplexity": "160.484", "temp": "0.918", "loss_0": "3.041", "loss_1": "0.107", "loss_2": "0.013", "accuracy": "0.48602", "wps": "17631.6", "ups": "5.49", "wpb": "3213.6", "bsz": "40.9", "num_updates": "155800", "lr": "6.18228e-05", "gnorm": "0.762", "loss_scale": "2", "train_wall": "36", "gb_free": "16.9", "wall": "29357"} [2023-11-02 01:44:47,872][train_inner][INFO] - {"epoch": 39, "update": 38.464, "loss": "3.139", "ntokens": "3228.36", "nsentences": "43.2", "prob_perplexity": "163.683", "code_perplexity": "160.725", "temp": "0.917", "loss_0": "3.019", "loss_1": "0.107", "loss_2": "0.013", "accuracy": "0.49099", "wps": "17540.3", "ups": "5.43", "wpb": "3228.4", "bsz": "43.2", "num_updates": "156000", "lr": "6.17722e-05", "gnorm": "0.761", "loss_scale": "2", "train_wall": "36", "gb_free": "14.7", "wall": "29394"} [2023-11-02 01:45:24,439][train_inner][INFO] - {"epoch": 39, "update": 38.514, "loss": "3.031", "ntokens": "3223.08", "nsentences": "46.32", "prob_perplexity": "163.458", "code_perplexity": "160.495", "temp": "0.916", "loss_0": "2.911", "loss_1": "0.107", "loss_2": "0.013", "accuracy": "0.51222", "wps": "17629.3", "ups": "5.47", "wpb": "3223.1", "bsz": "46.3", "num_updates": "156200", "lr": "6.17215e-05", "gnorm": "0.756", "loss_scale": "2", "train_wall": "36", "gb_free": "13.5", "wall": "29431"} [2023-11-02 01:46:00,442][train_inner][INFO] - {"epoch": 39, "update": 38.563, "loss": "2.993", "ntokens": "3149.4", "nsentences": "46.8", "prob_perplexity": "163.395", "code_perplexity": "160.459", "temp": "0.915", "loss_0": "2.873", "loss_1": "0.107", "loss_2": "0.013", "accuracy": "0.51939", "wps": "17496.6", "ups": "5.56", "wpb": "3149.4", "bsz": "46.8", "num_updates": "156400", "lr": "6.16709e-05", "gnorm": "0.768", "loss_scale": "2", "train_wall": "35", "gb_free": "15.2", "wall": "29467"} [2023-11-02 01:46:36,984][train_inner][INFO] - {"epoch": 39, "update": 38.612, "loss": "3.061", "ntokens": "3213.72", "nsentences": "44.36", "prob_perplexity": "163.096", "code_perplexity": "160.081", "temp": "0.915", "loss_0": "2.941", "loss_1": "0.107", "loss_2": "0.013", "accuracy": "0.50628", "wps": "17590.2", "ups": "5.47", "wpb": "3213.7", "bsz": "44.4", "num_updates": "156600", "lr": "6.16203e-05", "gnorm": "0.781", "loss_scale": "2", "train_wall": "36", "gb_free": "13.5", "wall": "29503"} [2023-11-02 01:47:13,440][train_inner][INFO] - {"epoch": 39, "update": 38.662, "loss": "3.099", "ntokens": "3192.52", "nsentences": "43.16", "prob_perplexity": "163.261", "code_perplexity": "160.352", "temp": "0.914", "loss_0": "2.978", "loss_1": "0.107", "loss_2": "0.013", "accuracy": "0.49934", "wps": "17523", "ups": "5.49", "wpb": "3192.5", "bsz": "43.2", "num_updates": "156800", "lr": "6.15696e-05", "gnorm": "0.763", "loss_scale": "2", "train_wall": "36", "gb_free": "15", "wall": "29540"} [2023-11-02 01:47:50,065][train_inner][INFO] - {"epoch": 39, "update": 38.711, "loss": "3.056", "ntokens": "3163.84", "nsentences": "44.84", "prob_perplexity": "163.665", "code_perplexity": "160.726", "temp": "0.913", "loss_0": "2.936", "loss_1": "0.107", "loss_2": "0.013", "accuracy": "0.50699", "wps": "17278.5", "ups": "5.46", "wpb": "3163.8", "bsz": "44.8", "num_updates": "157000", "lr": "6.1519e-05", "gnorm": "0.771", "loss_scale": "2", "train_wall": "36", "gb_free": "13.2", "wall": "29576"} [2023-11-02 01:48:26,910][train_inner][INFO] - {"epoch": 39, "update": 38.76, "loss": "3.051", "ntokens": "3187.8", "nsentences": "44.84", "prob_perplexity": "163.869", "code_perplexity": "160.876", "temp": "0.912", "loss_0": "2.931", "loss_1": "0.107", "loss_2": "0.013", "accuracy": "0.50787", "wps": "17304.5", "ups": "5.43", "wpb": "3187.8", "bsz": "44.8", "num_updates": "157200", "lr": "6.14684e-05", "gnorm": "0.762", "loss_scale": "2", "train_wall": "36", "gb_free": "13.7", "wall": "29613"} [2023-11-02 01:49:03,290][train_inner][INFO] - {"epoch": 39, "update": 38.81, "loss": "3.129", "ntokens": "3213.08", "nsentences": "43.48", "prob_perplexity": "164.072", "code_perplexity": "161.175", "temp": "0.911", "loss_0": "3.009", "loss_1": "0.107", "loss_2": "0.012", "accuracy": "0.49343", "wps": "17665.4", "ups": "5.5", "wpb": "3213.1", "bsz": "43.5", "num_updates": "157400", "lr": "6.14177e-05", "gnorm": "0.77", "loss_scale": "2", "train_wall": "36", "gb_free": "15.4", "wall": "29650"} [2023-11-02 01:49:39,564][train_inner][INFO] - {"epoch": 39, "update": 38.859, "loss": "3.089", "ntokens": "3179.4", "nsentences": "42.4", "prob_perplexity": "164.314", "code_perplexity": "161.306", "temp": "0.91", "loss_0": "2.969", "loss_1": "0.107", "loss_2": "0.013", "accuracy": "0.49843", "wps": "17530.8", "ups": "5.51", "wpb": "3179.4", "bsz": "42.4", "num_updates": "157600", "lr": "6.13671e-05", "gnorm": "0.763", "loss_scale": "2", "train_wall": "36", "gb_free": "13.1", "wall": "29686"} [2023-11-02 01:50:15,980][train_inner][INFO] - {"epoch": 39, "update": 38.908, "loss": "3.116", "ntokens": "3185.6", "nsentences": "42.68", "prob_perplexity": "164.525", "code_perplexity": "161.4", "temp": "0.909", "loss_0": "2.996", "loss_1": "0.107", "loss_2": "0.013", "accuracy": "0.49531", "wps": "17496.8", "ups": "5.49", "wpb": "3185.6", "bsz": "42.7", "num_updates": "157800", "lr": "6.13165e-05", "gnorm": "0.774", "loss_scale": "2", "train_wall": "36", "gb_free": "13", "wall": "29722"} [2023-11-02 01:50:52,190][train_inner][INFO] - {"epoch": 39, "update": 38.958, "loss": "3.039", "ntokens": "3202.84", "nsentences": "44.68", "prob_perplexity": "164.625", "code_perplexity": "161.602", "temp": "0.908", "loss_0": "2.919", "loss_1": "0.107", "loss_2": "0.013", "accuracy": "0.50903", "wps": "17691.6", "ups": "5.52", "wpb": "3202.8", "bsz": "44.7", "num_updates": "158000", "lr": "6.12658e-05", "gnorm": "0.755", "loss_scale": "2", "train_wall": "36", "gb_free": "13.6", "wall": "29758"} [2023-11-02 01:51:23,674][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 01:51:23,675][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 01:51:23,695][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 46 [2023-11-02 01:51:49,020][valid][INFO] - {"epoch": 39, "valid_loss": "2.892", "valid_ntokens": "3158.63", "valid_nsentences": "44.1685", "valid_prob_perplexity": "162.648", "valid_code_perplexity": "159.936", "valid_temp": "0.907", "valid_loss_0": "2.773", "valid_loss_1": "0.108", "valid_loss_2": "0.012", "valid_accuracy": "0.54004", "valid_wps": "56562.1", "valid_wpb": "3158.6", "valid_bsz": "44.2", "valid_num_updates": "158172", "valid_best_loss": "2.892"} [2023-11-02 01:51:49,022][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 39 @ 158172 updates [2023-11-02 01:51:49,025][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-02 01:51:50,464][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-02 01:51:51,448][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_best.pt (epoch 39 @ 158172 updates, score 2.892) (writing took 2.4252156377770007 seconds) [2023-11-02 01:51:51,448][fairseq_cli.train][INFO] - end of epoch 39 (average epoch stats below) [2023-11-02 01:51:51,451][train][INFO] - {"epoch": 39, "train_loss": "3.077", "train_ntokens": "3193.68", "train_nsentences": "44.2682", "train_prob_perplexity": "163.325", "train_code_perplexity": "160.348", "train_temp": "0.916", "train_loss_0": "2.957", "train_loss_1": "0.107", "train_loss_2": "0.013", "train_accuracy": "0.50314", "train_wps": "16940.1", "train_ups": "5.3", "train_wpb": "3193.7", "train_bsz": "44.3", "train_num_updates": "158172", "train_lr": "6.12223e-05", "train_gnorm": "0.763", "train_loss_scale": "2", "train_train_wall": "723", "train_gb_free": "13.8", "train_wall": "29818"} [2023-11-02 01:51:51,454][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 01:51:51,488][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 40 [2023-11-02 01:51:51,718][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 01:51:51,758][fairseq.trainer][INFO] - begin training epoch 40 [2023-11-02 01:51:51,759][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 01:51:56,770][train_inner][INFO] - {"epoch": 40, "update": 39.007, "loss": "3.023", "ntokens": "3222.24", "nsentences": "47.56", "prob_perplexity": "164.494", "code_perplexity": "161.513", "temp": "0.907", "loss_0": "2.903", "loss_1": "0.107", "loss_2": "0.013", "accuracy": "0.51418", "wps": "9979.3", "ups": "3.1", "wpb": "3222.2", "bsz": "47.6", "num_updates": "158200", "lr": "6.12152e-05", "gnorm": "0.752", "loss_scale": "2", "train_wall": "36", "gb_free": "12.9", "wall": "29823"} [2023-11-02 01:52:32,656][train_inner][INFO] - {"epoch": 40, "update": 39.056, "loss": "3.101", "ntokens": "3189.52", "nsentences": "42.44", "prob_perplexity": "164.551", "code_perplexity": "161.479", "temp": "0.906", "loss_0": "2.981", "loss_1": "0.107", "loss_2": "0.013", "accuracy": "0.4973", "wps": "17777.1", "ups": "5.57", "wpb": "3189.5", "bsz": "42.4", "num_updates": "158400", "lr": "6.11646e-05", "gnorm": "0.76", "loss_scale": "2", "train_wall": "35", "gb_free": "14", "wall": "29859"} [2023-11-02 01:53:09,040][train_inner][INFO] - {"epoch": 40, "update": 39.106, "loss": "3.075", "ntokens": "3190.2", "nsentences": "43.28", "prob_perplexity": "164.002", "code_perplexity": "160.957", "temp": "0.905", "loss_0": "2.955", "loss_1": "0.107", "loss_2": "0.013", "accuracy": "0.50228", "wps": "17537.6", "ups": "5.5", "wpb": "3190.2", "bsz": "43.3", "num_updates": "158600", "lr": "6.11139e-05", "gnorm": "0.764", "loss_scale": "2", "train_wall": "36", "gb_free": "14.5", "wall": "29895"} [2023-11-02 01:53:45,305][train_inner][INFO] - {"epoch": 40, "update": 39.155, "loss": "3.069", "ntokens": "3174.04", "nsentences": "43.8", "prob_perplexity": "164.622", "code_perplexity": "161.74", "temp": "0.905", "loss_0": "2.949", "loss_1": "0.107", "loss_2": "0.013", "accuracy": "0.50477", "wps": "17505.6", "ups": "5.52", "wpb": "3174", "bsz": "43.8", "num_updates": "158800", "lr": "6.10633e-05", "gnorm": "0.766", "loss_scale": "2", "train_wall": "36", "gb_free": "13.2", "wall": "29932"} [2023-11-02 01:54:22,138][train_inner][INFO] - {"epoch": 40, "update": 39.204, "loss": "3.01", "ntokens": "3150.4", "nsentences": "45.48", "prob_perplexity": "165.319", "code_perplexity": "162.305", "temp": "0.904", "loss_0": "2.891", "loss_1": "0.107", "loss_2": "0.012", "accuracy": "0.51512", "wps": "17107.5", "ups": "5.43", "wpb": "3150.4", "bsz": "45.5", "num_updates": "159000", "lr": "6.10127e-05", "gnorm": "0.764", "loss_scale": "2", "train_wall": "36", "gb_free": "13.3", "wall": "29968"} [2023-11-02 01:54:58,265][train_inner][INFO] - {"epoch": 40, "update": 39.253, "loss": "3.056", "ntokens": "3174.84", "nsentences": "45", "prob_perplexity": "164.136", "code_perplexity": "161.206", "temp": "0.903", "loss_0": "2.936", "loss_1": "0.107", "loss_2": "0.013", "accuracy": "0.50748", "wps": "17577.4", "ups": "5.54", "wpb": "3174.8", "bsz": "45", "num_updates": "159200", "lr": "6.0962e-05", "gnorm": "0.764", "loss_scale": "2", "train_wall": "35", "gb_free": "13.5", "wall": "30004"} [2023-11-02 01:55:34,561][train_inner][INFO] - {"epoch": 40, "update": 39.303, "loss": "3.09", "ntokens": "3181.88", "nsentences": "43.8", "prob_perplexity": "164.781", "code_perplexity": "161.814", "temp": "0.902", "loss_0": "2.97", "loss_1": "0.107", "loss_2": "0.013", "accuracy": "0.50093", "wps": "17534", "ups": "5.51", "wpb": "3181.9", "bsz": "43.8", "num_updates": "159400", "lr": "6.09114e-05", "gnorm": "0.766", "loss_scale": "2", "train_wall": "36", "gb_free": "14.1", "wall": "30041"} [2023-11-02 01:56:10,933][train_inner][INFO] - {"epoch": 40, "update": 39.352, "loss": "3.042", "ntokens": "3192.56", "nsentences": "44.72", "prob_perplexity": "164.785", "code_perplexity": "161.781", "temp": "0.901", "loss_0": "2.923", "loss_1": "0.107", "loss_2": "0.012", "accuracy": "0.50944", "wps": "17556.5", "ups": "5.5", "wpb": "3192.6", "bsz": "44.7", "num_updates": "159600", "lr": "6.08608e-05", "gnorm": "0.761", "loss_scale": "2", "train_wall": "36", "gb_free": "13.3", "wall": "30077"} [2023-11-02 01:56:47,390][train_inner][INFO] - {"epoch": 40, "update": 39.401, "loss": "3.093", "ntokens": "3178.8", "nsentences": "43.2", "prob_perplexity": "164.891", "code_perplexity": "161.829", "temp": "0.9", "loss_0": "2.973", "loss_1": "0.107", "loss_2": "0.013", "accuracy": "0.49979", "wps": "17439.9", "ups": "5.49", "wpb": "3178.8", "bsz": "43.2", "num_updates": "159800", "lr": "6.08101e-05", "gnorm": "0.77", "loss_scale": "2", "train_wall": "36", "gb_free": "12.4", "wall": "30114"} [2023-11-02 01:57:24,338][train_inner][INFO] - {"epoch": 40, "update": 39.451, "loss": "3.082", "ntokens": "3235.2", "nsentences": "45.32", "prob_perplexity": "164.667", "code_perplexity": "161.703", "temp": "0.899", "loss_0": "2.962", "loss_1": "0.107", "loss_2": "0.012", "accuracy": "0.503", "wps": "17512.8", "ups": "5.41", "wpb": "3235.2", "bsz": "45.3", "num_updates": "160000", "lr": "6.07595e-05", "gnorm": "0.766", "loss_scale": "2", "train_wall": "36", "gb_free": "13.2", "wall": "30151"} [2023-11-02 01:58:00,489][train_inner][INFO] - {"epoch": 40, "update": 39.5, "loss": "3.019", "ntokens": "3153.24", "nsentences": "45.08", "prob_perplexity": "164.759", "code_perplexity": "161.752", "temp": "0.898", "loss_0": "2.899", "loss_1": "0.107", "loss_2": "0.012", "accuracy": "0.51395", "wps": "17446", "ups": "5.53", "wpb": "3153.2", "bsz": "45.1", "num_updates": "160200", "lr": "6.07089e-05", "gnorm": "0.766", "loss_scale": "2", "train_wall": "36", "gb_free": "13.2", "wall": "30187"} [2023-11-02 01:58:36,877][train_inner][INFO] - {"epoch": 40, "update": 39.549, "loss": "3.074", "ntokens": "3200.96", "nsentences": "43.68", "prob_perplexity": "165.447", "code_perplexity": "162.419", "temp": "0.897", "loss_0": "2.955", "loss_1": "0.107", "loss_2": "0.012", "accuracy": "0.50313", "wps": "17594.5", "ups": "5.5", "wpb": "3201", "bsz": "43.7", "num_updates": "160400", "lr": "6.06582e-05", "gnorm": "0.762", "loss_scale": "2", "train_wall": "36", "gb_free": "12.6", "wall": "30223"} [2023-11-02 01:59:12,671][train_inner][INFO] - {"epoch": 40, "update": 39.599, "loss": "3.02", "ntokens": "3207.48", "nsentences": "45.72", "prob_perplexity": "165.566", "code_perplexity": "162.525", "temp": "0.896", "loss_0": "2.901", "loss_1": "0.107", "loss_2": "0.012", "accuracy": "0.51245", "wps": "17923", "ups": "5.59", "wpb": "3207.5", "bsz": "45.7", "num_updates": "160600", "lr": "6.06076e-05", "gnorm": "0.763", "loss_scale": "2", "train_wall": "35", "gb_free": "14.7", "wall": "30259"} [2023-11-02 01:59:48,871][train_inner][INFO] - {"epoch": 40, "update": 39.648, "loss": "3.112", "ntokens": "3198.8", "nsentences": "42.48", "prob_perplexity": "166.023", "code_perplexity": "162.997", "temp": "0.896", "loss_0": "2.992", "loss_1": "0.107", "loss_2": "0.012", "accuracy": "0.49515", "wps": "17674.2", "ups": "5.53", "wpb": "3198.8", "bsz": "42.5", "num_updates": "160800", "lr": "6.0557e-05", "gnorm": "0.77", "loss_scale": "2", "train_wall": "36", "gb_free": "13.6", "wall": "30295"} [2023-11-02 02:00:24,921][train_inner][INFO] - {"epoch": 40, "update": 39.697, "loss": "3.038", "ntokens": "3178.52", "nsentences": "44.92", "prob_perplexity": "164.942", "code_perplexity": "161.942", "temp": "0.895", "loss_0": "2.919", "loss_1": "0.107", "loss_2": "0.012", "accuracy": "0.51068", "wps": "17642.7", "ups": "5.55", "wpb": "3178.5", "bsz": "44.9", "num_updates": "161000", "lr": "6.05063e-05", "gnorm": "0.772", "loss_scale": "2", "train_wall": "35", "gb_free": "15.8", "wall": "30331"} [2023-11-02 02:01:01,498][train_inner][INFO] - {"epoch": 40, "update": 39.747, "loss": "3.077", "ntokens": "3223.52", "nsentences": "43.76", "prob_perplexity": "165.735", "code_perplexity": "162.759", "temp": "0.894", "loss_0": "2.958", "loss_1": "0.107", "loss_2": "0.012", "accuracy": "0.50175", "wps": "17626.8", "ups": "5.47", "wpb": "3223.5", "bsz": "43.8", "num_updates": "161200", "lr": "6.04557e-05", "gnorm": "0.767", "loss_scale": "2", "train_wall": "36", "gb_free": "13.7", "wall": "30368"} [2023-11-02 02:01:37,513][train_inner][INFO] - {"epoch": 40, "update": 39.796, "loss": "3.085", "ntokens": "3210.16", "nsentences": "43.08", "prob_perplexity": "165.971", "code_perplexity": "162.939", "temp": "0.893", "loss_0": "2.966", "loss_1": "0.107", "loss_2": "0.012", "accuracy": "0.50075", "wps": "17828", "ups": "5.55", "wpb": "3210.2", "bsz": "43.1", "num_updates": "161400", "lr": "6.04051e-05", "gnorm": "0.763", "loss_scale": "2", "train_wall": "35", "gb_free": "14.2", "wall": "30404"} [2023-11-02 02:02:13,670][train_inner][INFO] - {"epoch": 40, "update": 39.845, "loss": "2.999", "ntokens": "3218.72", "nsentences": "46.88", "prob_perplexity": "166.82", "code_perplexity": "163.861", "temp": "0.892", "loss_0": "2.88", "loss_1": "0.107", "loss_2": "0.012", "accuracy": "0.51734", "wps": "17805.3", "ups": "5.53", "wpb": "3218.7", "bsz": "46.9", "num_updates": "161600", "lr": "6.03544e-05", "gnorm": "0.762", "loss_scale": "2", "train_wall": "36", "gb_free": "13.9", "wall": "30440"} [2023-11-02 02:02:49,540][train_inner][INFO] - {"epoch": 40, "update": 39.894, "loss": "3.105", "ntokens": "3186.28", "nsentences": "43.28", "prob_perplexity": "165.196", "code_perplexity": "162.16", "temp": "0.891", "loss_0": "2.985", "loss_1": "0.107", "loss_2": "0.012", "accuracy": "0.49815", "wps": "17766.3", "ups": "5.58", "wpb": "3186.3", "bsz": "43.3", "num_updates": "161800", "lr": "6.03038e-05", "gnorm": "0.764", "loss_scale": "2", "train_wall": "35", "gb_free": "14.7", "wall": "30476"} [2023-11-02 02:03:25,640][train_inner][INFO] - {"epoch": 40, "update": 39.944, "loss": "3.061", "ntokens": "3173.56", "nsentences": "45.72", "prob_perplexity": "166.539", "code_perplexity": "163.515", "temp": "0.89", "loss_0": "2.942", "loss_1": "0.107", "loss_2": "0.012", "accuracy": "0.50723", "wps": "17583.1", "ups": "5.54", "wpb": "3173.6", "bsz": "45.7", "num_updates": "162000", "lr": "6.02532e-05", "gnorm": "0.769", "loss_scale": "2", "train_wall": "35", "gb_free": "13.1", "wall": "30512"} [2023-11-02 02:04:01,892][train_inner][INFO] - {"epoch": 40, "update": 39.993, "loss": "3.034", "ntokens": "3162.76", "nsentences": "44.12", "prob_perplexity": "166.432", "code_perplexity": "163.363", "temp": "0.889", "loss_0": "2.915", "loss_1": "0.107", "loss_2": "0.012", "accuracy": "0.50969", "wps": "17450.3", "ups": "5.52", "wpb": "3162.8", "bsz": "44.1", "num_updates": "162200", "lr": "6.02025e-05", "gnorm": "0.767", "loss_scale": "2", "train_wall": "36", "gb_free": "13.6", "wall": "30548"} [2023-11-02 02:04:06,895][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 02:04:06,896][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 02:04:06,915][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 47 [2023-11-02 02:04:32,678][valid][INFO] - {"epoch": 40, "valid_loss": "2.878", "valid_ntokens": "3150.07", "valid_nsentences": "44.1685", "valid_prob_perplexity": "167.29", "valid_code_perplexity": "164.224", "valid_temp": "0.889", "valid_loss_0": "2.758", "valid_loss_1": "0.107", "valid_loss_2": "0.013", "valid_accuracy": "0.54162", "valid_wps": "55440", "valid_wpb": "3150.1", "valid_bsz": "44.2", "valid_num_updates": "162228", "valid_best_loss": "2.878"} [2023-11-02 02:04:32,680][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 40 @ 162228 updates [2023-11-02 02:04:32,682][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-02 02:04:34,125][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-02 02:04:35,085][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_best.pt (epoch 40 @ 162228 updates, score 2.878) (writing took 2.405232140328735 seconds) [2023-11-02 02:04:35,086][fairseq_cli.train][INFO] - end of epoch 40 (average epoch stats below) [2023-11-02 02:04:35,088][train][INFO] - {"epoch": 40, "train_loss": "3.062", "train_ntokens": "3189.32", "train_nsentences": "44.2682", "train_prob_perplexity": "165.258", "train_code_perplexity": "162.251", "train_temp": "0.898", "train_loss_0": "2.943", "train_loss_1": "0.107", "train_loss_2": "0.012", "train_accuracy": "0.50545", "train_wps": "16939.9", "train_ups": "5.31", "train_wpb": "3189.3", "train_bsz": "44.3", "train_num_updates": "162228", "train_lr": "6.01954e-05", "train_gnorm": "0.765", "train_loss_scale": "2", "train_train_wall": "722", "train_gb_free": "13.5", "train_wall": "30581"} [2023-11-02 02:04:35,091][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 02:04:35,113][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 41 [2023-11-02 02:04:35,324][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 02:04:35,365][fairseq.trainer][INFO] - begin training epoch 41 [2023-11-02 02:04:35,366][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 02:05:06,276][train_inner][INFO] - {"epoch": 41, "update": 40.042, "loss": "3.073", "ntokens": "3191.08", "nsentences": "43.56", "prob_perplexity": "166.606", "code_perplexity": "163.584", "temp": "0.888", "loss_0": "2.954", "loss_1": "0.107", "loss_2": "0.012", "accuracy": "0.50336", "wps": "9912.9", "ups": "3.11", "wpb": "3191.1", "bsz": "43.6", "num_updates": "162400", "lr": "6.01519e-05", "gnorm": "0.77", "loss_scale": "2", "train_wall": "35", "gb_free": "13.7", "wall": "30613"} [2023-11-02 02:05:42,086][train_inner][INFO] - {"epoch": 41, "update": 40.092, "loss": "3.062", "ntokens": "3188.84", "nsentences": "42.8", "prob_perplexity": "166.244", "code_perplexity": "163.221", "temp": "0.887", "loss_0": "2.943", "loss_1": "0.107", "loss_2": "0.012", "accuracy": "0.50377", "wps": "17810.8", "ups": "5.59", "wpb": "3188.8", "bsz": "42.8", "num_updates": "162600", "lr": "6.01013e-05", "gnorm": "0.777", "loss_scale": "2", "train_wall": "35", "gb_free": "12.7", "wall": "30648"} [2023-11-02 02:06:18,093][train_inner][INFO] - {"epoch": 41, "update": 40.141, "loss": "3.126", "ntokens": "3203.84", "nsentences": "41.08", "prob_perplexity": "166.788", "code_perplexity": "163.773", "temp": "0.887", "loss_0": "3.008", "loss_1": "0.107", "loss_2": "0.012", "accuracy": "0.49156", "wps": "17796.8", "ups": "5.55", "wpb": "3203.8", "bsz": "41.1", "num_updates": "162800", "lr": "6.00506e-05", "gnorm": "0.78", "loss_scale": "2", "train_wall": "35", "gb_free": "13", "wall": "30684"} [2023-11-02 02:06:53,961][train_inner][INFO] - {"epoch": 41, "update": 40.19, "loss": "3.029", "ntokens": "3214.6", "nsentences": "45.12", "prob_perplexity": "167.644", "code_perplexity": "164.509", "temp": "0.886", "loss_0": "2.91", "loss_1": "0.106", "loss_2": "0.012", "accuracy": "0.51122", "wps": "17925.4", "ups": "5.58", "wpb": "3214.6", "bsz": "45.1", "num_updates": "163000", "lr": "6e-05", "gnorm": "0.765", "loss_scale": "2", "train_wall": "35", "gb_free": "13.7", "wall": "30720"} [2023-11-02 02:07:29,161][train_inner][INFO] - {"epoch": 41, "update": 40.24, "loss": "3.079", "ntokens": "3156.92", "nsentences": "42.72", "prob_perplexity": "167.563", "code_perplexity": "164.439", "temp": "0.885", "loss_0": "2.961", "loss_1": "0.106", "loss_2": "0.012", "accuracy": "0.50172", "wps": "17938.2", "ups": "5.68", "wpb": "3156.9", "bsz": "42.7", "num_updates": "163200", "lr": "5.99494e-05", "gnorm": "0.774", "loss_scale": "2", "train_wall": "35", "gb_free": "14.7", "wall": "30755"} [2023-11-02 02:08:05,079][train_inner][INFO] - {"epoch": 41, "update": 40.289, "loss": "3.022", "ntokens": "3160.8", "nsentences": "43.84", "prob_perplexity": "167.123", "code_perplexity": "164.111", "temp": "0.884", "loss_0": "2.903", "loss_1": "0.107", "loss_2": "0.012", "accuracy": "0.51248", "wps": "17601.2", "ups": "5.57", "wpb": "3160.8", "bsz": "43.8", "num_updates": "163400", "lr": "5.98987e-05", "gnorm": "0.769", "loss_scale": "2", "train_wall": "35", "gb_free": "13.7", "wall": "30791"} [2023-11-02 02:08:41,737][train_inner][INFO] - {"epoch": 41, "update": 40.338, "loss": "3.075", "ntokens": "3175.68", "nsentences": "43.72", "prob_perplexity": "167.352", "code_perplexity": "164.315", "temp": "0.883", "loss_0": "2.957", "loss_1": "0.107", "loss_2": "0.012", "accuracy": "0.50395", "wps": "17327.1", "ups": "5.46", "wpb": "3175.7", "bsz": "43.7", "num_updates": "163600", "lr": "5.98481e-05", "gnorm": "0.769", "loss_scale": "2", "train_wall": "36", "gb_free": "14.2", "wall": "30828"} [2023-11-02 02:09:17,871][train_inner][INFO] - {"epoch": 41, "update": 40.388, "loss": "3.01", "ntokens": "3184.08", "nsentences": "45.8", "prob_perplexity": "167.094", "code_perplexity": "164.146", "temp": "0.882", "loss_0": "2.892", "loss_1": "0.107", "loss_2": "0.012", "accuracy": "0.51475", "wps": "17624.7", "ups": "5.54", "wpb": "3184.1", "bsz": "45.8", "num_updates": "163800", "lr": "5.97975e-05", "gnorm": "0.77", "loss_scale": "2", "train_wall": "36", "gb_free": "14.3", "wall": "30864"} [2023-11-02 02:09:54,136][train_inner][INFO] - {"epoch": 41, "update": 40.437, "loss": "3.038", "ntokens": "3203.2", "nsentences": "45.28", "prob_perplexity": "167.977", "code_perplexity": "165.003", "temp": "0.881", "loss_0": "2.92", "loss_1": "0.106", "loss_2": "0.012", "accuracy": "0.51009", "wps": "17666.5", "ups": "5.52", "wpb": "3203.2", "bsz": "45.3", "num_updates": "164000", "lr": "5.97468e-05", "gnorm": "0.764", "loss_scale": "2", "train_wall": "36", "gb_free": "12.7", "wall": "30900"} [2023-11-02 02:10:31,189][train_inner][INFO] - {"epoch": 41, "update": 40.486, "loss": "3.076", "ntokens": "3223.2", "nsentences": "43.2", "prob_perplexity": "169.054", "code_perplexity": "166.02", "temp": "0.88", "loss_0": "2.958", "loss_1": "0.106", "loss_2": "0.012", "accuracy": "0.50092", "wps": "17399.6", "ups": "5.4", "wpb": "3223.2", "bsz": "43.2", "num_updates": "164200", "lr": "5.96962e-05", "gnorm": "0.769", "loss_scale": "2", "train_wall": "36", "gb_free": "13", "wall": "30937"} [2023-11-02 02:11:07,609][train_inner][INFO] - {"epoch": 41, "update": 40.536, "loss": "3.019", "ntokens": "3175.92", "nsentences": "45.4", "prob_perplexity": "167.446", "code_perplexity": "164.473", "temp": "0.88", "loss_0": "2.901", "loss_1": "0.106", "loss_2": "0.012", "accuracy": "0.51315", "wps": "17441.6", "ups": "5.49", "wpb": "3175.9", "bsz": "45.4", "num_updates": "164400", "lr": "5.96456e-05", "gnorm": "0.769", "loss_scale": "2", "train_wall": "36", "gb_free": "13.7", "wall": "30974"} [2023-11-02 02:11:43,594][train_inner][INFO] - {"epoch": 41, "update": 40.585, "loss": "3.062", "ntokens": "3209.24", "nsentences": "44.64", "prob_perplexity": "167.702", "code_perplexity": "164.699", "temp": "0.879", "loss_0": "2.943", "loss_1": "0.106", "loss_2": "0.012", "accuracy": "0.50547", "wps": "17837.4", "ups": "5.56", "wpb": "3209.2", "bsz": "44.6", "num_updates": "164600", "lr": "5.95949e-05", "gnorm": "0.762", "loss_scale": "2", "train_wall": "35", "gb_free": "14.5", "wall": "31010"} [2023-11-02 02:12:19,562][train_inner][INFO] - {"epoch": 41, "update": 40.634, "loss": "3.055", "ntokens": "3182.36", "nsentences": "43.84", "prob_perplexity": "167.206", "code_perplexity": "164.209", "temp": "0.878", "loss_0": "2.937", "loss_1": "0.107", "loss_2": "0.012", "accuracy": "0.50637", "wps": "17696.6", "ups": "5.56", "wpb": "3182.4", "bsz": "43.8", "num_updates": "164800", "lr": "5.95443e-05", "gnorm": "0.777", "loss_scale": "2", "train_wall": "35", "gb_free": "14.2", "wall": "31046"} [2023-11-02 02:12:55,901][train_inner][INFO] - {"epoch": 41, "update": 40.683, "loss": "3.028", "ntokens": "3225.16", "nsentences": "45.24", "prob_perplexity": "167.797", "code_perplexity": "164.724", "temp": "0.877", "loss_0": "2.909", "loss_1": "0.106", "loss_2": "0.012", "accuracy": "0.51119", "wps": "17751.4", "ups": "5.5", "wpb": "3225.2", "bsz": "45.2", "num_updates": "165000", "lr": "5.94937e-05", "gnorm": "0.771", "loss_scale": "2", "train_wall": "36", "gb_free": "12.9", "wall": "31082"} [2023-11-02 02:13:32,305][train_inner][INFO] - {"epoch": 41, "update": 40.733, "loss": "3.029", "ntokens": "3197", "nsentences": "44.6", "prob_perplexity": "167.977", "code_perplexity": "164.957", "temp": "0.876", "loss_0": "2.911", "loss_1": "0.106", "loss_2": "0.012", "accuracy": "0.51045", "wps": "17564.9", "ups": "5.49", "wpb": "3197", "bsz": "44.6", "num_updates": "165200", "lr": "5.9443e-05", "gnorm": "0.784", "loss_scale": "2", "train_wall": "36", "gb_free": "13.8", "wall": "31119"} [2023-11-02 02:14:08,080][train_inner][INFO] - {"epoch": 41, "update": 40.782, "loss": "3.066", "ntokens": "3170.6", "nsentences": "41.84", "prob_perplexity": "168.073", "code_perplexity": "165.008", "temp": "0.875", "loss_0": "2.948", "loss_1": "0.106", "loss_2": "0.012", "accuracy": "0.502", "wps": "17734", "ups": "5.59", "wpb": "3170.6", "bsz": "41.8", "num_updates": "165400", "lr": "5.93924e-05", "gnorm": "0.779", "loss_scale": "2", "train_wall": "35", "gb_free": "13.9", "wall": "31154"} [2023-11-02 02:14:44,607][train_inner][INFO] - {"epoch": 41, "update": 40.831, "loss": "3.027", "ntokens": "3186.72", "nsentences": "44.2", "prob_perplexity": "168.108", "code_perplexity": "164.959", "temp": "0.874", "loss_0": "2.909", "loss_1": "0.106", "loss_2": "0.012", "accuracy": "0.51004", "wps": "17449.7", "ups": "5.48", "wpb": "3186.7", "bsz": "44.2", "num_updates": "165600", "lr": "5.93418e-05", "gnorm": "0.77", "loss_scale": "2", "train_wall": "36", "gb_free": "13", "wall": "31191"} [2023-11-02 02:15:20,789][train_inner][INFO] - {"epoch": 41, "update": 40.881, "loss": "3.021", "ntokens": "3175.08", "nsentences": "44.36", "prob_perplexity": "168.204", "code_perplexity": "165.157", "temp": "0.873", "loss_0": "2.903", "loss_1": "0.106", "loss_2": "0.012", "accuracy": "0.51213", "wps": "17551.6", "ups": "5.53", "wpb": "3175.1", "bsz": "44.4", "num_updates": "165800", "lr": "5.92911e-05", "gnorm": "0.777", "loss_scale": "2", "train_wall": "36", "gb_free": "13.7", "wall": "31227"} [2023-11-02 02:15:56,753][train_inner][INFO] - {"epoch": 41, "update": 40.93, "loss": "2.997", "ntokens": "3154.48", "nsentences": "46.92", "prob_perplexity": "167.556", "code_perplexity": "164.469", "temp": "0.873", "loss_0": "2.879", "loss_1": "0.106", "loss_2": "0.012", "accuracy": "0.51856", "wps": "17543.3", "ups": "5.56", "wpb": "3154.5", "bsz": "46.9", "num_updates": "166000", "lr": "5.92405e-05", "gnorm": "0.771", "loss_scale": "2", "train_wall": "35", "gb_free": "14.8", "wall": "31263"} [2023-11-02 02:16:32,844][train_inner][INFO] - {"epoch": 41, "update": 40.979, "loss": "2.975", "ntokens": "3187.96", "nsentences": "46.48", "prob_perplexity": "167.89", "code_perplexity": "164.888", "temp": "0.872", "loss_0": "2.857", "loss_1": "0.106", "loss_2": "0.012", "accuracy": "0.52069", "wps": "17667.4", "ups": "5.54", "wpb": "3188", "bsz": "46.5", "num_updates": "166200", "lr": "5.91899e-05", "gnorm": "0.772", "loss_scale": "2", "train_wall": "35", "gb_free": "14.6", "wall": "31299"} [2023-11-02 02:16:48,209][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 02:16:48,211][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 02:16:48,230][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 48 [2023-11-02 02:17:13,827][valid][INFO] - {"epoch": 41, "valid_loss": "2.882", "valid_ntokens": "3165.45", "valid_nsentences": "44.1685", "valid_prob_perplexity": "169.009", "valid_code_perplexity": "165.978", "valid_temp": "0.871", "valid_loss_0": "2.764", "valid_loss_1": "0.106", "valid_loss_2": "0.012", "valid_accuracy": "0.54069", "valid_wps": "56112", "valid_wpb": "3165.4", "valid_bsz": "44.2", "valid_num_updates": "166284", "valid_best_loss": "2.878"} [2023-11-02 02:17:13,830][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 41 @ 166284 updates [2023-11-02 02:17:13,832][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 02:17:15,270][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 02:17:15,319][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_last.pt (epoch 41 @ 166284 updates, score 2.882) (writing took 1.4896948346868157 seconds) [2023-11-02 02:17:15,320][fairseq_cli.train][INFO] - end of epoch 41 (average epoch stats below) [2023-11-02 02:17:15,322][train][INFO] - {"epoch": 41, "train_loss": "3.042", "train_ntokens": "3188.58", "train_nsentences": "44.2682", "train_prob_perplexity": "167.612", "train_code_perplexity": "164.574", "train_temp": "0.88", "train_loss_0": "2.923", "train_loss_1": "0.106", "train_loss_2": "0.012", "train_accuracy": "0.50847", "train_wps": "17011.8", "train_ups": "5.34", "train_wpb": "3188.6", "train_bsz": "44.3", "train_num_updates": "166284", "train_lr": "5.91686e-05", "train_gnorm": "0.772", "train_loss_scale": "2", "train_train_wall": "720", "train_gb_free": "13.5", "train_wall": "31342"} [2023-11-02 02:17:15,325][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 02:17:15,344][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 42 [2023-11-02 02:17:15,517][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 02:17:15,557][fairseq.trainer][INFO] - begin training epoch 42 [2023-11-02 02:17:15,558][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 02:17:36,606][train_inner][INFO] - {"epoch": 42, "update": 41.029, "loss": "2.999", "ntokens": "3188.8", "nsentences": "44.12", "prob_perplexity": "168.591", "code_perplexity": "165.53", "temp": "0.871", "loss_0": "2.881", "loss_1": "0.106", "loss_2": "0.012", "accuracy": "0.51492", "wps": "10002.6", "ups": "3.14", "wpb": "3188.8", "bsz": "44.1", "num_updates": "166400", "lr": "5.91392e-05", "gnorm": "0.766", "loss_scale": "2", "train_wall": "36", "gb_free": "13.8", "wall": "31363"} [2023-11-02 02:18:12,795][train_inner][INFO] - {"epoch": 42, "update": 41.078, "loss": "2.997", "ntokens": "3182.08", "nsentences": "45.36", "prob_perplexity": "168.359", "code_perplexity": "165.32", "temp": "0.87", "loss_0": "2.879", "loss_1": "0.106", "loss_2": "0.012", "accuracy": "0.51635", "wps": "17586.5", "ups": "5.53", "wpb": "3182.1", "bsz": "45.4", "num_updates": "166600", "lr": "5.90886e-05", "gnorm": "0.776", "loss_scale": "2", "train_wall": "36", "gb_free": "14.2", "wall": "31399"} [2023-11-02 02:18:48,508][train_inner][INFO] - {"epoch": 42, "update": 41.127, "loss": "3.04", "ntokens": "3176.28", "nsentences": "44.52", "prob_perplexity": "168.795", "code_perplexity": "165.83", "temp": "0.869", "loss_0": "2.922", "loss_1": "0.106", "loss_2": "0.012", "accuracy": "0.50946", "wps": "17789.1", "ups": "5.6", "wpb": "3176.3", "bsz": "44.5", "num_updates": "166800", "lr": "5.9038e-05", "gnorm": "0.772", "loss_scale": "2", "train_wall": "35", "gb_free": "14.6", "wall": "31435"} [2023-11-02 02:19:24,616][train_inner][INFO] - {"epoch": 42, "update": 41.177, "loss": "3.006", "ntokens": "3175.76", "nsentences": "45.12", "prob_perplexity": "167.99", "code_perplexity": "164.879", "temp": "0.868", "loss_0": "2.888", "loss_1": "0.106", "loss_2": "0.012", "accuracy": "0.51517", "wps": "17591.6", "ups": "5.54", "wpb": "3175.8", "bsz": "45.1", "num_updates": "167000", "lr": "5.89873e-05", "gnorm": "0.773", "loss_scale": "2", "train_wall": "35", "gb_free": "13.1", "wall": "31471"} [2023-11-02 02:20:00,821][train_inner][INFO] - {"epoch": 42, "update": 41.226, "loss": "3.041", "ntokens": "3191.12", "nsentences": "43.2", "prob_perplexity": "169.273", "code_perplexity": "166.252", "temp": "0.867", "loss_0": "2.924", "loss_1": "0.106", "loss_2": "0.012", "accuracy": "0.50714", "wps": "17628.9", "ups": "5.52", "wpb": "3191.1", "bsz": "43.2", "num_updates": "167200", "lr": "5.89367e-05", "gnorm": "0.768", "loss_scale": "2", "train_wall": "36", "gb_free": "13.1", "wall": "31507"} [2023-11-02 02:20:36,915][train_inner][INFO] - {"epoch": 42, "update": 41.275, "loss": "3.023", "ntokens": "3219", "nsentences": "45.84", "prob_perplexity": "169.902", "code_perplexity": "166.73", "temp": "0.866", "loss_0": "2.905", "loss_1": "0.106", "loss_2": "0.012", "accuracy": "0.51158", "wps": "17838.2", "ups": "5.54", "wpb": "3219", "bsz": "45.8", "num_updates": "167400", "lr": "5.88861e-05", "gnorm": "0.766", "loss_scale": "2", "train_wall": "35", "gb_free": "13.4", "wall": "31543"} [2023-11-02 02:21:13,144][train_inner][INFO] - {"epoch": 42, "update": 41.324, "loss": "3.05", "ntokens": "3169.68", "nsentences": "43.24", "prob_perplexity": "168.671", "code_perplexity": "165.67", "temp": "0.866", "loss_0": "2.932", "loss_1": "0.106", "loss_2": "0.012", "accuracy": "0.50678", "wps": "17498.9", "ups": "5.52", "wpb": "3169.7", "bsz": "43.2", "num_updates": "167600", "lr": "5.88354e-05", "gnorm": "0.774", "loss_scale": "2", "train_wall": "36", "gb_free": "14.5", "wall": "31579"} [2023-11-02 02:21:48,884][train_inner][INFO] - {"epoch": 42, "update": 41.374, "loss": "3.038", "ntokens": "3201.2", "nsentences": "44.44", "prob_perplexity": "168.774", "code_perplexity": "165.649", "temp": "0.865", "loss_0": "2.921", "loss_1": "0.106", "loss_2": "0.012", "accuracy": "0.50907", "wps": "17914.6", "ups": "5.6", "wpb": "3201.2", "bsz": "44.4", "num_updates": "167800", "lr": "5.87848e-05", "gnorm": "0.766", "loss_scale": "2", "train_wall": "35", "gb_free": "13", "wall": "31615"} [2023-11-02 02:22:24,720][train_inner][INFO] - {"epoch": 42, "update": 41.423, "loss": "2.995", "ntokens": "3169.28", "nsentences": "45.88", "prob_perplexity": "169.97", "code_perplexity": "166.856", "temp": "0.864", "loss_0": "2.878", "loss_1": "0.106", "loss_2": "0.011", "accuracy": "0.51877", "wps": "17689.1", "ups": "5.58", "wpb": "3169.3", "bsz": "45.9", "num_updates": "168000", "lr": "5.87342e-05", "gnorm": "0.775", "loss_scale": "2", "train_wall": "35", "gb_free": "13.6", "wall": "31651"} [2023-11-02 02:23:00,398][train_inner][INFO] - {"epoch": 42, "update": 41.472, "loss": "3.069", "ntokens": "3182.64", "nsentences": "43.68", "prob_perplexity": "169.794", "code_perplexity": "166.609", "temp": "0.863", "loss_0": "2.951", "loss_1": "0.106", "loss_2": "0.011", "accuracy": "0.50472", "wps": "17841.8", "ups": "5.61", "wpb": "3182.6", "bsz": "43.7", "num_updates": "168200", "lr": "5.86835e-05", "gnorm": "0.766", "loss_scale": "2", "train_wall": "35", "gb_free": "13.8", "wall": "31687"} [2023-11-02 02:23:36,670][train_inner][INFO] - {"epoch": 42, "update": 41.522, "loss": "3.054", "ntokens": "3173.72", "nsentences": "42.92", "prob_perplexity": "169.522", "code_perplexity": "166.364", "temp": "0.862", "loss_0": "2.936", "loss_1": "0.106", "loss_2": "0.012", "accuracy": "0.50557", "wps": "17500.8", "ups": "5.51", "wpb": "3173.7", "bsz": "42.9", "num_updates": "168400", "lr": "5.86329e-05", "gnorm": "0.781", "loss_scale": "2", "train_wall": "36", "gb_free": "13.6", "wall": "31723"} [2023-11-02 02:24:12,847][train_inner][INFO] - {"epoch": 42, "update": 41.571, "loss": "3.012", "ntokens": "3160.76", "nsentences": "44", "prob_perplexity": "170.171", "code_perplexity": "167.045", "temp": "0.861", "loss_0": "2.895", "loss_1": "0.106", "loss_2": "0.011", "accuracy": "0.51343", "wps": "17474.8", "ups": "5.53", "wpb": "3160.8", "bsz": "44", "num_updates": "168600", "lr": "5.85823e-05", "gnorm": "0.773", "loss_scale": "2", "train_wall": "36", "gb_free": "13", "wall": "31759"} [2023-11-02 02:24:49,627][train_inner][INFO] - {"epoch": 42, "update": 41.62, "loss": "3.021", "ntokens": "3185.92", "nsentences": "43.76", "prob_perplexity": "170.037", "code_perplexity": "166.897", "temp": "0.86", "loss_0": "2.903", "loss_1": "0.106", "loss_2": "0.012", "accuracy": "0.51097", "wps": "17325.6", "ups": "5.44", "wpb": "3185.9", "bsz": "43.8", "num_updates": "168800", "lr": "5.85316e-05", "gnorm": "0.779", "loss_scale": "2", "train_wall": "36", "gb_free": "14", "wall": "31796"} [2023-11-02 02:25:26,146][train_inner][INFO] - {"epoch": 42, "update": 41.67, "loss": "3.058", "ntokens": "3245.64", "nsentences": "43.08", "prob_perplexity": "170.559", "code_perplexity": "167.414", "temp": "0.86", "loss_0": "2.941", "loss_1": "0.106", "loss_2": "0.012", "accuracy": "0.50355", "wps": "17775.9", "ups": "5.48", "wpb": "3245.6", "bsz": "43.1", "num_updates": "169000", "lr": "5.8481e-05", "gnorm": "0.769", "loss_scale": "2", "train_wall": "36", "gb_free": "14.6", "wall": "31832"} [2023-11-02 02:26:02,986][train_inner][INFO] - {"epoch": 42, "update": 41.719, "loss": "3.008", "ntokens": "3182.32", "nsentences": "44.84", "prob_perplexity": "170.609", "code_perplexity": "167.532", "temp": "0.859", "loss_0": "2.891", "loss_1": "0.106", "loss_2": "0.011", "accuracy": "0.51492", "wps": "17277.7", "ups": "5.43", "wpb": "3182.3", "bsz": "44.8", "num_updates": "169200", "lr": "5.84304e-05", "gnorm": "0.767", "loss_scale": "2", "train_wall": "36", "gb_free": "14", "wall": "31869"} [2023-11-02 02:26:39,037][train_inner][INFO] - {"epoch": 42, "update": 41.768, "loss": "3.054", "ntokens": "3193.04", "nsentences": "43.16", "prob_perplexity": "170.06", "code_perplexity": "167.085", "temp": "0.858", "loss_0": "2.937", "loss_1": "0.106", "loss_2": "0.011", "accuracy": "0.50588", "wps": "17714.7", "ups": "5.55", "wpb": "3193", "bsz": "43.2", "num_updates": "169400", "lr": "5.83797e-05", "gnorm": "0.766", "loss_scale": "2", "train_wall": "35", "gb_free": "14.7", "wall": "31905"} [2023-11-02 02:27:14,840][train_inner][INFO] - {"epoch": 42, "update": 41.818, "loss": "2.998", "ntokens": "3156.32", "nsentences": "46.44", "prob_perplexity": "170.4", "code_perplexity": "167.32", "temp": "0.857", "loss_0": "2.881", "loss_1": "0.106", "loss_2": "0.011", "accuracy": "0.51855", "wps": "17632.9", "ups": "5.59", "wpb": "3156.3", "bsz": "46.4", "num_updates": "169600", "lr": "5.83291e-05", "gnorm": "0.776", "loss_scale": "2", "train_wall": "35", "gb_free": "13.9", "wall": "31941"} [2023-11-02 02:27:51,729][train_inner][INFO] - {"epoch": 42, "update": 41.867, "loss": "3.083", "ntokens": "3234.72", "nsentences": "44.08", "prob_perplexity": "170.715", "code_perplexity": "167.599", "temp": "0.856", "loss_0": "2.966", "loss_1": "0.106", "loss_2": "0.012", "accuracy": "0.5014", "wps": "17538.5", "ups": "5.42", "wpb": "3234.7", "bsz": "44.1", "num_updates": "169800", "lr": "5.82785e-05", "gnorm": "0.772", "loss_scale": "4", "train_wall": "36", "gb_free": "13.4", "wall": "31978"} [2023-11-02 02:28:28,073][train_inner][INFO] - {"epoch": 42, "update": 41.916, "loss": "2.973", "ntokens": "3157.56", "nsentences": "45.04", "prob_perplexity": "170.689", "code_perplexity": "167.533", "temp": "0.855", "loss_0": "2.856", "loss_1": "0.106", "loss_2": "0.011", "accuracy": "0.52056", "wps": "17377.3", "ups": "5.5", "wpb": "3157.6", "bsz": "45", "num_updates": "170000", "lr": "5.82278e-05", "gnorm": "0.774", "loss_scale": "4", "train_wall": "36", "gb_free": "14.6", "wall": "32014"} [2023-11-02 02:29:04,761][train_inner][INFO] - {"epoch": 42, "update": 41.965, "loss": "3.075", "ntokens": "3171.92", "nsentences": "42.2", "prob_perplexity": "170.007", "code_perplexity": "166.998", "temp": "0.854", "loss_0": "2.958", "loss_1": "0.106", "loss_2": "0.011", "accuracy": "0.50103", "wps": "17292.2", "ups": "5.45", "wpb": "3171.9", "bsz": "42.2", "num_updates": "170200", "lr": "5.81772e-05", "gnorm": "0.773", "loss_scale": "4", "train_wall": "36", "gb_free": "15", "wall": "32051"} [2023-11-02 02:29:30,377][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 02:29:30,379][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 02:29:30,400][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 49 [2023-11-02 02:29:56,132][valid][INFO] - {"epoch": 42, "valid_loss": "2.886", "valid_ntokens": "3177.12", "valid_nsentences": "44.1685", "valid_prob_perplexity": "167.689", "valid_code_perplexity": "164.655", "valid_temp": "0.853", "valid_loss_0": "2.767", "valid_loss_1": "0.106", "valid_loss_2": "0.012", "valid_accuracy": "0.54005", "valid_wps": "55975.7", "valid_wpb": "3177.1", "valid_bsz": "44.2", "valid_num_updates": "170340", "valid_best_loss": "2.878"} [2023-11-02 02:29:56,134][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 42 @ 170340 updates [2023-11-02 02:29:56,136][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 02:29:57,571][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 02:29:57,627][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_last.pt (epoch 42 @ 170340 updates, score 2.886) (writing took 1.4933795761317015 seconds) [2023-11-02 02:29:57,628][fairseq_cli.train][INFO] - end of epoch 42 (average epoch stats below) [2023-11-02 02:29:57,630][train][INFO] - {"epoch": 42, "train_loss": "3.03", "train_ntokens": "3184.44", "train_nsentences": "44.2682", "train_prob_perplexity": "169.659", "train_code_perplexity": "166.57", "train_temp": "0.862", "train_loss_0": "2.913", "train_loss_1": "0.106", "train_loss_2": "0.012", "train_accuracy": "0.51043", "train_wps": "16943.5", "train_ups": "5.32", "train_wpb": "3184.4", "train_bsz": "44.3", "train_num_updates": "170340", "train_lr": "5.81418e-05", "train_gnorm": "0.772", "train_loss_scale": "4", "train_train_wall": "722", "train_gb_free": "12.9", "train_wall": "32104"} [2023-11-02 02:29:57,633][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 02:29:57,652][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 43 [2023-11-02 02:29:57,820][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 02:29:57,862][fairseq.trainer][INFO] - begin training epoch 43 [2023-11-02 02:29:57,862][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 02:30:08,915][train_inner][INFO] - {"epoch": 43, "update": 42.015, "loss": "3.012", "ntokens": "3189", "nsentences": "45.64", "prob_perplexity": "169.937", "code_perplexity": "166.819", "temp": "0.854", "loss_0": "2.894", "loss_1": "0.106", "loss_2": "0.011", "accuracy": "0.51428", "wps": "9942.1", "ups": "3.12", "wpb": "3189", "bsz": "45.6", "num_updates": "170400", "lr": "5.81266e-05", "gnorm": "0.771", "loss_scale": "4", "train_wall": "36", "gb_free": "14.6", "wall": "32115"} [2023-11-02 02:30:44,418][train_inner][INFO] - {"epoch": 43, "update": 42.064, "loss": "3.022", "ntokens": "3204.72", "nsentences": "44.4", "prob_perplexity": "170.338", "code_perplexity": "167.248", "temp": "0.853", "loss_0": "2.905", "loss_1": "0.106", "loss_2": "0.011", "accuracy": "0.51057", "wps": "18054.4", "ups": "5.63", "wpb": "3204.7", "bsz": "44.4", "num_updates": "170600", "lr": "5.80759e-05", "gnorm": "0.772", "loss_scale": "4", "train_wall": "35", "gb_free": "14.3", "wall": "32151"} [2023-11-02 02:31:20,550][train_inner][INFO] - {"epoch": 43, "update": 42.113, "loss": "3.026", "ntokens": "3210.32", "nsentences": "46.12", "prob_perplexity": "171.873", "code_perplexity": "168.715", "temp": "0.852", "loss_0": "2.909", "loss_1": "0.106", "loss_2": "0.011", "accuracy": "0.51291", "wps": "17770.9", "ups": "5.54", "wpb": "3210.3", "bsz": "46.1", "num_updates": "170800", "lr": "5.80253e-05", "gnorm": "0.768", "loss_scale": "4", "train_wall": "35", "gb_free": "12.6", "wall": "32187"} [2023-11-02 02:31:57,078][train_inner][INFO] - {"epoch": 43, "update": 42.163, "loss": "3.015", "ntokens": "3173.28", "nsentences": "44.4", "prob_perplexity": "170.69", "code_perplexity": "167.631", "temp": "0.851", "loss_0": "2.898", "loss_1": "0.106", "loss_2": "0.011", "accuracy": "0.51287", "wps": "17375.9", "ups": "5.48", "wpb": "3173.3", "bsz": "44.4", "num_updates": "171000", "lr": "5.79747e-05", "gnorm": "0.778", "loss_scale": "4", "train_wall": "36", "gb_free": "13.5", "wall": "32223"} [2023-11-02 02:32:33,282][train_inner][INFO] - {"epoch": 43, "update": 42.212, "loss": "3.045", "ntokens": "3209.8", "nsentences": "43.32", "prob_perplexity": "170.68", "code_perplexity": "167.626", "temp": "0.85", "loss_0": "2.928", "loss_1": "0.106", "loss_2": "0.011", "accuracy": "0.50715", "wps": "17732.9", "ups": "5.52", "wpb": "3209.8", "bsz": "43.3", "num_updates": "171200", "lr": "5.79241e-05", "gnorm": "0.772", "loss_scale": "4", "train_wall": "36", "gb_free": "13.7", "wall": "32260"} [2023-11-02 02:33:09,449][train_inner][INFO] - {"epoch": 43, "update": 42.261, "loss": "3.046", "ntokens": "3192.8", "nsentences": "44.04", "prob_perplexity": "172.019", "code_perplexity": "168.865", "temp": "0.849", "loss_0": "2.929", "loss_1": "0.105", "loss_2": "0.012", "accuracy": "0.50765", "wps": "17656.7", "ups": "5.53", "wpb": "3192.8", "bsz": "44", "num_updates": "171400", "lr": "5.78734e-05", "gnorm": "0.777", "loss_scale": "4", "train_wall": "36", "gb_free": "13.2", "wall": "32296"} [2023-11-02 02:33:45,907][train_inner][INFO] - {"epoch": 43, "update": 42.311, "loss": "3.084", "ntokens": "3210.4", "nsentences": "41.8", "prob_perplexity": "171.523", "code_perplexity": "168.34", "temp": "0.848", "loss_0": "2.966", "loss_1": "0.106", "loss_2": "0.011", "accuracy": "0.49922", "wps": "17612.9", "ups": "5.49", "wpb": "3210.4", "bsz": "41.8", "num_updates": "171600", "lr": "5.78228e-05", "gnorm": "0.769", "loss_scale": "4", "train_wall": "36", "gb_free": "13.2", "wall": "32332"} [2023-11-02 02:34:22,012][train_inner][INFO] - {"epoch": 43, "update": 42.36, "loss": "2.987", "ntokens": "3201.04", "nsentences": "46.44", "prob_perplexity": "172.263", "code_perplexity": "169.173", "temp": "0.848", "loss_0": "2.87", "loss_1": "0.105", "loss_2": "0.011", "accuracy": "0.51894", "wps": "17733", "ups": "5.54", "wpb": "3201", "bsz": "46.4", "num_updates": "171800", "lr": "5.77722e-05", "gnorm": "0.767", "loss_scale": "4", "train_wall": "35", "gb_free": "13.9", "wall": "32368"} [2023-11-02 02:34:58,575][train_inner][INFO] - {"epoch": 43, "update": 42.409, "loss": "3.045", "ntokens": "3164.68", "nsentences": "43.32", "prob_perplexity": "171.734", "code_perplexity": "168.489", "temp": "0.847", "loss_0": "2.928", "loss_1": "0.106", "loss_2": "0.011", "accuracy": "0.50697", "wps": "17311.9", "ups": "5.47", "wpb": "3164.7", "bsz": "43.3", "num_updates": "172000", "lr": "5.77215e-05", "gnorm": "0.782", "loss_scale": "4", "train_wall": "36", "gb_free": "13.9", "wall": "32405"} [2023-11-02 02:35:35,164][train_inner][INFO] - {"epoch": 43, "update": 42.459, "loss": "3.064", "ntokens": "3210.12", "nsentences": "43.12", "prob_perplexity": "172.407", "code_perplexity": "169.267", "temp": "0.846", "loss_0": "2.947", "loss_1": "0.105", "loss_2": "0.011", "accuracy": "0.50367", "wps": "17547.9", "ups": "5.47", "wpb": "3210.1", "bsz": "43.1", "num_updates": "172200", "lr": "5.76709e-05", "gnorm": "0.776", "loss_scale": "4", "train_wall": "36", "gb_free": "15.9", "wall": "32441"} [2023-11-02 02:36:11,384][train_inner][INFO] - {"epoch": 43, "update": 42.508, "loss": "2.99", "ntokens": "3181.24", "nsentences": "45.64", "prob_perplexity": "172.327", "code_perplexity": "169.107", "temp": "0.845", "loss_0": "2.873", "loss_1": "0.105", "loss_2": "0.011", "accuracy": "0.51837", "wps": "17567.5", "ups": "5.52", "wpb": "3181.2", "bsz": "45.6", "num_updates": "172400", "lr": "5.76203e-05", "gnorm": "0.779", "loss_scale": "4", "train_wall": "36", "gb_free": "13.7", "wall": "32478"} [2023-11-02 02:36:47,549][train_inner][INFO] - {"epoch": 43, "update": 42.557, "loss": "2.992", "ntokens": "3168.52", "nsentences": "45.96", "prob_perplexity": "172.109", "code_perplexity": "169.007", "temp": "0.844", "loss_0": "2.876", "loss_1": "0.105", "loss_2": "0.011", "accuracy": "0.51785", "wps": "17524", "ups": "5.53", "wpb": "3168.5", "bsz": "46", "num_updates": "172600", "lr": "5.75696e-05", "gnorm": "0.782", "loss_scale": "4", "train_wall": "36", "gb_free": "13", "wall": "32514"} [2023-11-02 02:37:23,878][train_inner][INFO] - {"epoch": 43, "update": 42.607, "loss": "3.005", "ntokens": "3154", "nsentences": "43.48", "prob_perplexity": "171.497", "code_perplexity": "168.392", "temp": "0.843", "loss_0": "2.888", "loss_1": "0.106", "loss_2": "0.011", "accuracy": "0.51375", "wps": "17364.5", "ups": "5.51", "wpb": "3154", "bsz": "43.5", "num_updates": "172800", "lr": "5.7519e-05", "gnorm": "0.782", "loss_scale": "4", "train_wall": "36", "gb_free": "14.1", "wall": "32550"} [2023-11-02 02:38:00,375][train_inner][INFO] - {"epoch": 43, "update": 42.656, "loss": "3.028", "ntokens": "3186.08", "nsentences": "44.2", "prob_perplexity": "172.518", "code_perplexity": "169.436", "temp": "0.843", "loss_0": "2.911", "loss_1": "0.105", "loss_2": "0.011", "accuracy": "0.50992", "wps": "17460.8", "ups": "5.48", "wpb": "3186.1", "bsz": "44.2", "num_updates": "173000", "lr": "5.74684e-05", "gnorm": "0.777", "loss_scale": "4", "train_wall": "36", "gb_free": "13.3", "wall": "32587"} [2023-11-02 02:38:36,925][train_inner][INFO] - {"epoch": 43, "update": 42.705, "loss": "3.044", "ntokens": "3196.96", "nsentences": "43.44", "prob_perplexity": "172.329", "code_perplexity": "169.262", "temp": "0.842", "loss_0": "2.927", "loss_1": "0.105", "loss_2": "0.011", "accuracy": "0.50721", "wps": "17494.8", "ups": "5.47", "wpb": "3197", "bsz": "43.4", "num_updates": "173200", "lr": "5.74177e-05", "gnorm": "0.784", "loss_scale": "4", "train_wall": "36", "gb_free": "13.5", "wall": "32623"} [2023-11-02 02:39:13,254][train_inner][INFO] - {"epoch": 43, "update": 42.754, "loss": "2.993", "ntokens": "3189.24", "nsentences": "45.36", "prob_perplexity": "172.47", "code_perplexity": "169.373", "temp": "0.841", "loss_0": "2.876", "loss_1": "0.105", "loss_2": "0.011", "accuracy": "0.51749", "wps": "17558.4", "ups": "5.51", "wpb": "3189.2", "bsz": "45.4", "num_updates": "173400", "lr": "5.73671e-05", "gnorm": "0.773", "loss_scale": "4", "train_wall": "36", "gb_free": "12.5", "wall": "32659"} [2023-11-02 02:39:49,511][train_inner][INFO] - {"epoch": 43, "update": 42.804, "loss": "3.019", "ntokens": "3181", "nsentences": "43.96", "prob_perplexity": "172.43", "code_perplexity": "169.346", "temp": "0.84", "loss_0": "2.903", "loss_1": "0.105", "loss_2": "0.011", "accuracy": "0.51089", "wps": "17548.3", "ups": "5.52", "wpb": "3181", "bsz": "44", "num_updates": "173600", "lr": "5.73165e-05", "gnorm": "0.776", "loss_scale": "4", "train_wall": "36", "gb_free": "13.8", "wall": "32696"} [2023-11-02 02:40:26,005][train_inner][INFO] - {"epoch": 43, "update": 42.853, "loss": "3.056", "ntokens": "3198.4", "nsentences": "43.16", "prob_perplexity": "173.643", "code_perplexity": "170.487", "temp": "0.839", "loss_0": "2.939", "loss_1": "0.105", "loss_2": "0.011", "accuracy": "0.50376", "wps": "17529.6", "ups": "5.48", "wpb": "3198.4", "bsz": "43.2", "num_updates": "173800", "lr": "5.72658e-05", "gnorm": "0.787", "loss_scale": "4", "train_wall": "36", "gb_free": "12.8", "wall": "32732"} [2023-11-02 02:41:02,437][train_inner][INFO] - {"epoch": 43, "update": 42.902, "loss": "2.967", "ntokens": "3178.64", "nsentences": "45.68", "prob_perplexity": "173.584", "code_perplexity": "170.449", "temp": "0.838", "loss_0": "2.85", "loss_1": "0.105", "loss_2": "0.011", "accuracy": "0.52172", "wps": "17458.9", "ups": "5.49", "wpb": "3178.6", "bsz": "45.7", "num_updates": "174000", "lr": "5.72152e-05", "gnorm": "0.822", "loss_scale": "4", "train_wall": "36", "gb_free": "13", "wall": "32769"} [2023-11-02 02:41:38,933][train_inner][INFO] - {"epoch": 43, "update": 42.952, "loss": "3.042", "ntokens": "3165.28", "nsentences": "43.08", "prob_perplexity": "173.742", "code_perplexity": "170.477", "temp": "0.837", "loss_0": "2.926", "loss_1": "0.105", "loss_2": "0.011", "accuracy": "0.50684", "wps": "17346.9", "ups": "5.48", "wpb": "3165.3", "bsz": "43.1", "num_updates": "174200", "lr": "5.71646e-05", "gnorm": "0.776", "loss_scale": "4", "train_wall": "36", "gb_free": "13", "wall": "32805"} [2023-11-02 02:42:14,307][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 02:42:14,308][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 02:42:14,325][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 50 [2023-11-02 02:42:39,938][valid][INFO] - {"epoch": 43, "valid_loss": "2.845", "valid_ntokens": "3149.78", "valid_nsentences": "44.1685", "valid_prob_perplexity": "172.479", "valid_code_perplexity": "169.631", "valid_temp": "0.836", "valid_loss_0": "2.729", "valid_loss_1": "0.105", "valid_loss_2": "0.011", "valid_accuracy": "0.54684", "valid_wps": "55757.8", "valid_wpb": "3149.8", "valid_bsz": "44.2", "valid_num_updates": "174396", "valid_best_loss": "2.845"} [2023-11-02 02:42:39,940][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 43 @ 174396 updates [2023-11-02 02:42:39,942][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-02 02:42:41,453][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-02 02:42:42,477][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_best.pt (epoch 43 @ 174396 updates, score 2.845) (writing took 2.537090924102813 seconds) [2023-11-02 02:42:42,478][fairseq_cli.train][INFO] - end of epoch 43 (average epoch stats below) [2023-11-02 02:42:42,480][train][INFO] - {"epoch": 43, "train_loss": "3.026", "train_ntokens": "3189.86", "train_nsentences": "44.2682", "train_prob_perplexity": "172.124", "train_code_perplexity": "168.989", "train_temp": "0.845", "train_loss_0": "2.909", "train_loss_1": "0.105", "train_loss_2": "0.011", "train_accuracy": "0.51078", "train_wps": "16915.9", "train_ups": "5.3", "train_wpb": "3189.9", "train_bsz": "44.3", "train_num_updates": "174396", "train_lr": "5.71149e-05", "train_gnorm": "0.779", "train_loss_scale": "4", "train_train_wall": "723", "train_gb_free": "13.4", "train_wall": "32869"} [2023-11-02 02:42:42,483][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 02:42:42,515][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 44 [2023-11-02 02:42:42,721][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 02:42:42,762][fairseq.trainer][INFO] - begin training epoch 44 [2023-11-02 02:42:42,763][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 02:42:43,665][train_inner][INFO] - {"epoch": 44, "update": 43.001, "loss": "3.049", "ntokens": "3202.84", "nsentences": "44.2", "prob_perplexity": "172.926", "code_perplexity": "169.74", "temp": "0.837", "loss_0": "2.933", "loss_1": "0.105", "loss_2": "0.011", "accuracy": "0.50705", "wps": "9895.9", "ups": "3.09", "wpb": "3202.8", "bsz": "44.2", "num_updates": "174400", "lr": "5.71139e-05", "gnorm": "0.782", "loss_scale": "4", "train_wall": "36", "gb_free": "13.5", "wall": "32870"} [2023-11-02 02:43:19,358][train_inner][INFO] - {"epoch": 44, "update": 43.05, "loss": "3.019", "ntokens": "3196.08", "nsentences": "43.76", "prob_perplexity": "173.256", "code_perplexity": "170.193", "temp": "0.836", "loss_0": "2.902", "loss_1": "0.105", "loss_2": "0.011", "accuracy": "0.51121", "wps": "17910", "ups": "5.6", "wpb": "3196.1", "bsz": "43.8", "num_updates": "174600", "lr": "5.70633e-05", "gnorm": "0.779", "loss_scale": "4", "train_wall": "35", "gb_free": "14.1", "wall": "32906"} [2023-11-02 02:43:55,362][train_inner][INFO] - {"epoch": 44, "update": 43.1, "loss": "3.05", "ntokens": "3239.48", "nsentences": "44.12", "prob_perplexity": "173.189", "code_perplexity": "170.003", "temp": "0.835", "loss_0": "2.934", "loss_1": "0.105", "loss_2": "0.011", "accuracy": "0.50647", "wps": "17996.1", "ups": "5.56", "wpb": "3239.5", "bsz": "44.1", "num_updates": "174800", "lr": "5.70127e-05", "gnorm": "0.773", "loss_scale": "4", "train_wall": "35", "gb_free": "13.5", "wall": "32942"} [2023-11-02 02:44:31,390][train_inner][INFO] - {"epoch": 44, "update": 43.149, "loss": "3.056", "ntokens": "3206.68", "nsentences": "42.64", "prob_perplexity": "173.758", "code_perplexity": "170.64", "temp": "0.834", "loss_0": "2.94", "loss_1": "0.105", "loss_2": "0.011", "accuracy": "0.50361", "wps": "17802.1", "ups": "5.55", "wpb": "3206.7", "bsz": "42.6", "num_updates": "175000", "lr": "5.6962e-05", "gnorm": "0.792", "loss_scale": "4", "train_wall": "35", "gb_free": "13", "wall": "32978"} [2023-11-02 02:44:31,392][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 02:44:31,393][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 02:44:31,411][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 51 [2023-11-02 02:44:57,069][valid][INFO] - {"epoch": 44, "valid_loss": "2.868", "valid_ntokens": "3177.61", "valid_nsentences": "44.1685", "valid_prob_perplexity": "173.993", "valid_code_perplexity": "171.117", "valid_temp": "0.834", "valid_loss_0": "2.752", "valid_loss_1": "0.105", "valid_loss_2": "0.011", "valid_accuracy": "0.54221", "valid_wps": "56178.2", "valid_wpb": "3177.6", "valid_bsz": "44.2", "valid_num_updates": "175000", "valid_best_loss": "2.845"} [2023-11-02 02:44:57,071][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 44 @ 175000 updates [2023-11-02 02:44:57,072][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_44_175000.pt [2023-11-02 02:44:58,426][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_44_175000.pt [2023-11-02 02:44:59,409][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_44_175000.pt (epoch 44 @ 175000 updates, score 2.868) (writing took 2.338623831048608 seconds) [2023-11-02 02:45:35,722][train_inner][INFO] - {"epoch": 44, "update": 43.198, "loss": "2.992", "ntokens": "3196.84", "nsentences": "45.28", "prob_perplexity": "174.126", "code_perplexity": "171.058", "temp": "0.833", "loss_0": "2.876", "loss_1": "0.105", "loss_2": "0.011", "accuracy": "0.51756", "wps": "9938.9", "ups": "3.11", "wpb": "3196.8", "bsz": "45.3", "num_updates": "175200", "lr": "5.69114e-05", "gnorm": "0.772", "loss_scale": "4", "train_wall": "36", "gb_free": "13.2", "wall": "33042"} [2023-11-02 02:46:12,028][train_inner][INFO] - {"epoch": 44, "update": 43.248, "loss": "3.012", "ntokens": "3206.68", "nsentences": "45.28", "prob_perplexity": "173.868", "code_perplexity": "170.734", "temp": "0.832", "loss_0": "2.896", "loss_1": "0.105", "loss_2": "0.011", "accuracy": "0.51335", "wps": "17665.9", "ups": "5.51", "wpb": "3206.7", "bsz": "45.3", "num_updates": "175400", "lr": "5.68608e-05", "gnorm": "0.777", "loss_scale": "4", "train_wall": "36", "gb_free": "13.1", "wall": "33078"} [2023-11-02 02:46:47,881][train_inner][INFO] - {"epoch": 44, "update": 43.297, "loss": "2.978", "ntokens": "3164.44", "nsentences": "45.72", "prob_perplexity": "173.369", "code_perplexity": "170.214", "temp": "0.832", "loss_0": "2.862", "loss_1": "0.105", "loss_2": "0.011", "accuracy": "0.5205", "wps": "17653.1", "ups": "5.58", "wpb": "3164.4", "bsz": "45.7", "num_updates": "175600", "lr": "5.68101e-05", "gnorm": "0.776", "loss_scale": "4", "train_wall": "35", "gb_free": "14.7", "wall": "33114"} [2023-11-02 02:47:23,494][train_inner][INFO] - {"epoch": 44, "update": 43.346, "loss": "3.019", "ntokens": "3207.08", "nsentences": "44.28", "prob_perplexity": "174.375", "code_perplexity": "171.281", "temp": "0.831", "loss_0": "2.903", "loss_1": "0.105", "loss_2": "0.011", "accuracy": "0.51136", "wps": "18012.1", "ups": "5.62", "wpb": "3207.1", "bsz": "44.3", "num_updates": "175800", "lr": "5.67595e-05", "gnorm": "0.773", "loss_scale": "4", "train_wall": "35", "gb_free": "14.5", "wall": "33150"} [2023-11-02 02:47:59,393][train_inner][INFO] - {"epoch": 44, "update": 43.395, "loss": "3.026", "ntokens": "3158.68", "nsentences": "44.08", "prob_perplexity": "173.934", "code_perplexity": "170.815", "temp": "0.83", "loss_0": "2.91", "loss_1": "0.105", "loss_2": "0.011", "accuracy": "0.5105", "wps": "17598.9", "ups": "5.57", "wpb": "3158.7", "bsz": "44.1", "num_updates": "176000", "lr": "5.67089e-05", "gnorm": "0.792", "loss_scale": "4", "train_wall": "35", "gb_free": "14.1", "wall": "33186"} [2023-11-02 02:48:35,512][train_inner][INFO] - {"epoch": 44, "update": 43.445, "loss": "3.001", "ntokens": "3167.2", "nsentences": "43.16", "prob_perplexity": "174.331", "code_perplexity": "171.295", "temp": "0.829", "loss_0": "2.885", "loss_1": "0.105", "loss_2": "0.011", "accuracy": "0.51391", "wps": "17538.3", "ups": "5.54", "wpb": "3167.2", "bsz": "43.2", "num_updates": "176200", "lr": "5.66582e-05", "gnorm": "0.784", "loss_scale": "4", "train_wall": "35", "gb_free": "14.6", "wall": "33222"} [2023-11-02 02:49:12,159][train_inner][INFO] - {"epoch": 44, "update": 43.494, "loss": "3.011", "ntokens": "3222.72", "nsentences": "46.08", "prob_perplexity": "174.511", "code_perplexity": "171.383", "temp": "0.828", "loss_0": "2.895", "loss_1": "0.105", "loss_2": "0.011", "accuracy": "0.51388", "wps": "17589.3", "ups": "5.46", "wpb": "3222.7", "bsz": "46.1", "num_updates": "176400", "lr": "5.66076e-05", "gnorm": "0.776", "loss_scale": "4", "train_wall": "36", "gb_free": "13.5", "wall": "33258"} [2023-11-02 02:49:48,625][train_inner][INFO] - {"epoch": 44, "update": 43.543, "loss": "3.006", "ntokens": "3190.48", "nsentences": "43.76", "prob_perplexity": "174.426", "code_perplexity": "171.226", "temp": "0.827", "loss_0": "2.891", "loss_1": "0.105", "loss_2": "0.011", "accuracy": "0.51378", "wps": "17499.5", "ups": "5.48", "wpb": "3190.5", "bsz": "43.8", "num_updates": "176600", "lr": "5.6557e-05", "gnorm": "0.79", "loss_scale": "4", "train_wall": "36", "gb_free": "12.6", "wall": "33295"} [2023-11-02 02:50:24,898][train_inner][INFO] - {"epoch": 44, "update": 43.593, "loss": "2.992", "ntokens": "3164.12", "nsentences": "43.84", "prob_perplexity": "175.719", "code_perplexity": "172.654", "temp": "0.827", "loss_0": "2.877", "loss_1": "0.105", "loss_2": "0.011", "accuracy": "0.51562", "wps": "17447.2", "ups": "5.51", "wpb": "3164.1", "bsz": "43.8", "num_updates": "176800", "lr": "5.65063e-05", "gnorm": "0.79", "loss_scale": "4", "train_wall": "36", "gb_free": "14.8", "wall": "33331"} [2023-11-02 02:51:01,137][train_inner][INFO] - {"epoch": 44, "update": 43.642, "loss": "2.947", "ntokens": "3203.04", "nsentences": "47.88", "prob_perplexity": "174.77", "code_perplexity": "171.674", "temp": "0.826", "loss_0": "2.832", "loss_1": "0.105", "loss_2": "0.011", "accuracy": "0.52573", "wps": "17678.6", "ups": "5.52", "wpb": "3203", "bsz": "47.9", "num_updates": "177000", "lr": "5.64557e-05", "gnorm": "0.781", "loss_scale": "4", "train_wall": "36", "gb_free": "14", "wall": "33367"} [2023-11-02 02:51:37,437][train_inner][INFO] - {"epoch": 44, "update": 43.691, "loss": "3.089", "ntokens": "3202.6", "nsentences": "41.88", "prob_perplexity": "174.395", "code_perplexity": "171.302", "temp": "0.825", "loss_0": "2.974", "loss_1": "0.105", "loss_2": "0.011", "accuracy": "0.49805", "wps": "17646.3", "ups": "5.51", "wpb": "3202.6", "bsz": "41.9", "num_updates": "177200", "lr": "5.64051e-05", "gnorm": "0.785", "loss_scale": "4", "train_wall": "36", "gb_free": "14", "wall": "33404"} [2023-11-02 02:52:13,708][train_inner][INFO] - {"epoch": 44, "update": 43.741, "loss": "3.018", "ntokens": "3196.4", "nsentences": "45.72", "prob_perplexity": "175.035", "code_perplexity": "171.956", "temp": "0.824", "loss_0": "2.903", "loss_1": "0.105", "loss_2": "0.011", "accuracy": "0.51412", "wps": "17626.1", "ups": "5.51", "wpb": "3196.4", "bsz": "45.7", "num_updates": "177400", "lr": "5.63544e-05", "gnorm": "0.776", "loss_scale": "4", "train_wall": "36", "gb_free": "12.5", "wall": "33440"} [2023-11-02 02:52:50,259][train_inner][INFO] - {"epoch": 44, "update": 43.79, "loss": "3.112", "ntokens": "3247.76", "nsentences": "41.52", "prob_perplexity": "175.04", "code_perplexity": "171.87", "temp": "0.823", "loss_0": "2.996", "loss_1": "0.105", "loss_2": "0.011", "accuracy": "0.49397", "wps": "17772.2", "ups": "5.47", "wpb": "3247.8", "bsz": "41.5", "num_updates": "177600", "lr": "5.63038e-05", "gnorm": "0.777", "loss_scale": "4", "train_wall": "36", "gb_free": "14", "wall": "33476"} [2023-11-02 02:53:26,866][train_inner][INFO] - {"epoch": 44, "update": 43.839, "loss": "3.06", "ntokens": "3210.2", "nsentences": "43.28", "prob_perplexity": "175.295", "code_perplexity": "172.221", "temp": "0.823", "loss_0": "2.945", "loss_1": "0.105", "loss_2": "0.011", "accuracy": "0.50364", "wps": "17539.9", "ups": "5.46", "wpb": "3210.2", "bsz": "43.3", "num_updates": "177800", "lr": "5.62532e-05", "gnorm": "0.784", "loss_scale": "4", "train_wall": "36", "gb_free": "14", "wall": "33513"} [2023-11-02 02:54:02,773][train_inner][INFO] - {"epoch": 44, "update": 43.889, "loss": "3.01", "ntokens": "3167.96", "nsentences": "42.88", "prob_perplexity": "175.648", "code_perplexity": "172.554", "temp": "0.822", "loss_0": "2.895", "loss_1": "0.105", "loss_2": "0.011", "accuracy": "0.51158", "wps": "17646.1", "ups": "5.57", "wpb": "3168", "bsz": "42.9", "num_updates": "178000", "lr": "5.62025e-05", "gnorm": "0.79", "loss_scale": "4", "train_wall": "35", "gb_free": "14.1", "wall": "33549"} [2023-11-02 02:54:38,941][train_inner][INFO] - {"epoch": 44, "update": 43.938, "loss": "3.004", "ntokens": "3176", "nsentences": "44.2", "prob_perplexity": "175.97", "code_perplexity": "172.881", "temp": "0.821", "loss_0": "2.889", "loss_1": "0.105", "loss_2": "0.011", "accuracy": "0.51436", "wps": "17571.8", "ups": "5.53", "wpb": "3176", "bsz": "44.2", "num_updates": "178200", "lr": "5.61519e-05", "gnorm": "0.782", "loss_scale": "4", "train_wall": "36", "gb_free": "14.4", "wall": "33585"} [2023-11-02 02:55:15,254][train_inner][INFO] - {"epoch": 44, "update": 43.987, "loss": "3.016", "ntokens": "3192.88", "nsentences": "44.92", "prob_perplexity": "175.406", "code_perplexity": "172.314", "temp": "0.82", "loss_0": "2.901", "loss_1": "0.105", "loss_2": "0.011", "accuracy": "0.51318", "wps": "17586.4", "ups": "5.51", "wpb": "3192.9", "bsz": "44.9", "num_updates": "178400", "lr": "5.61013e-05", "gnorm": "0.784", "loss_scale": "4", "train_wall": "36", "gb_free": "16.1", "wall": "33621"} [2023-11-02 02:55:24,442][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 02:55:24,444][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 02:55:24,464][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 52 [2023-11-02 02:55:50,218][valid][INFO] - {"epoch": 44, "valid_loss": "2.854", "valid_ntokens": "3161.08", "valid_nsentences": "44.1685", "valid_prob_perplexity": "175.063", "valid_code_perplexity": "172.099", "valid_temp": "0.819", "valid_loss_0": "2.738", "valid_loss_1": "0.105", "valid_loss_2": "0.011", "valid_accuracy": "0.54422", "valid_wps": "55635.2", "valid_wpb": "3161.1", "valid_bsz": "44.2", "valid_num_updates": "178452", "valid_best_loss": "2.845"} [2023-11-02 02:55:50,221][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 44 @ 178452 updates [2023-11-02 02:55:50,223][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 02:55:51,642][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 02:55:51,691][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_last.pt (epoch 44 @ 178452 updates, score 2.854) (writing took 1.4703601240180433 seconds) [2023-11-02 02:55:51,692][fairseq_cli.train][INFO] - end of epoch 44 (average epoch stats below) [2023-11-02 02:55:51,694][train][INFO] - {"epoch": 44, "train_loss": "3.021", "train_ntokens": "3195.97", "train_nsentences": "44.2682", "train_prob_perplexity": "174.528", "train_code_perplexity": "171.42", "train_temp": "0.828", "train_loss_0": "2.906", "train_loss_1": "0.105", "train_loss_2": "0.011", "train_accuracy": "0.51132", "train_wps": "16425.1", "train_ups": "5.14", "train_wpb": "3196", "train_bsz": "44.3", "train_num_updates": "178452", "train_lr": "5.60881e-05", "train_gnorm": "0.782", "train_loss_scale": "4", "train_train_wall": "721", "train_gb_free": "13", "train_wall": "33658"} [2023-11-02 02:55:51,697][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 02:55:51,717][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 45 [2023-11-02 02:55:51,889][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 02:55:51,932][fairseq.trainer][INFO] - begin training epoch 45 [2023-11-02 02:55:51,933][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 02:56:18,463][train_inner][INFO] - {"epoch": 45, "update": 44.036, "loss": "3.022", "ntokens": "3200.44", "nsentences": "44.84", "prob_perplexity": "176.084", "code_perplexity": "172.933", "temp": "0.819", "loss_0": "2.907", "loss_1": "0.105", "loss_2": "0.011", "accuracy": "0.51153", "wps": "10126.9", "ups": "3.16", "wpb": "3200.4", "bsz": "44.8", "num_updates": "178600", "lr": "5.60506e-05", "gnorm": "0.785", "loss_scale": "4", "train_wall": "35", "gb_free": "13.5", "wall": "33685"} [2023-11-02 02:56:54,226][train_inner][INFO] - {"epoch": 45, "update": 44.086, "loss": "2.996", "ntokens": "3199.08", "nsentences": "45.28", "prob_perplexity": "176.348", "code_perplexity": "173.289", "temp": "0.818", "loss_0": "2.881", "loss_1": "0.104", "loss_2": "0.011", "accuracy": "0.51624", "wps": "17891.3", "ups": "5.59", "wpb": "3199.1", "bsz": "45.3", "num_updates": "178800", "lr": "5.6e-05", "gnorm": "0.784", "loss_scale": "4", "train_wall": "35", "gb_free": "12.9", "wall": "33720"} [2023-11-02 02:57:30,508][train_inner][INFO] - {"epoch": 45, "update": 44.135, "loss": "3.017", "ntokens": "3214.12", "nsentences": "44.04", "prob_perplexity": "175.615", "code_perplexity": "172.628", "temp": "0.818", "loss_0": "2.902", "loss_1": "0.105", "loss_2": "0.011", "accuracy": "0.51165", "wps": "17718.9", "ups": "5.51", "wpb": "3214.1", "bsz": "44", "num_updates": "179000", "lr": "5.59494e-05", "gnorm": "0.785", "loss_scale": "4", "train_wall": "36", "gb_free": "15", "wall": "33757"} [2023-11-02 02:57:40,806][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 [2023-11-02 02:58:06,576][train_inner][INFO] - {"epoch": 45, "update": 44.185, "loss": "3.016", "ntokens": "3205.92", "nsentences": "43.92", "prob_perplexity": "177.443", "code_perplexity": "174.36", "temp": "0.817", "loss_0": "2.901", "loss_1": "0.104", "loss_2": "0.011", "accuracy": "0.51183", "wps": "17778.4", "ups": "5.55", "wpb": "3205.9", "bsz": "43.9", "num_updates": "179200", "lr": "5.58987e-05", "gnorm": "0.784", "loss_scale": "2", "train_wall": "35", "gb_free": "13", "wall": "33793"} [2023-11-02 02:58:42,894][train_inner][INFO] - {"epoch": 45, "update": 44.234, "loss": "2.995", "ntokens": "3165.08", "nsentences": "45.68", "prob_perplexity": "175.606", "code_perplexity": "172.602", "temp": "0.816", "loss_0": "2.88", "loss_1": "0.105", "loss_2": "0.01", "accuracy": "0.51675", "wps": "17430.9", "ups": "5.51", "wpb": "3165.1", "bsz": "45.7", "num_updates": "179400", "lr": "5.58481e-05", "gnorm": "0.786", "loss_scale": "2", "train_wall": "36", "gb_free": "15.5", "wall": "33829"} [2023-11-02 02:59:19,592][train_inner][INFO] - {"epoch": 45, "update": 44.283, "loss": "2.95", "ntokens": "3200.2", "nsentences": "48", "prob_perplexity": "177.067", "code_perplexity": "174.031", "temp": "0.815", "loss_0": "2.835", "loss_1": "0.104", "loss_2": "0.011", "accuracy": "0.52677", "wps": "17441.7", "ups": "5.45", "wpb": "3200.2", "bsz": "48", "num_updates": "179600", "lr": "5.57975e-05", "gnorm": "0.785", "loss_scale": "2", "train_wall": "36", "gb_free": "14.1", "wall": "33866"} [2023-11-02 02:59:55,918][train_inner][INFO] - {"epoch": 45, "update": 44.333, "loss": "3.013", "ntokens": "3206.16", "nsentences": "45.36", "prob_perplexity": "176.429", "code_perplexity": "173.362", "temp": "0.814", "loss_0": "2.898", "loss_1": "0.104", "loss_2": "0.011", "accuracy": "0.51402", "wps": "17652.9", "ups": "5.51", "wpb": "3206.2", "bsz": "45.4", "num_updates": "179800", "lr": "5.57468e-05", "gnorm": "0.784", "loss_scale": "2", "train_wall": "36", "gb_free": "14", "wall": "33902"} [2023-11-02 03:00:32,205][train_inner][INFO] - {"epoch": 45, "update": 44.382, "loss": "3.035", "ntokens": "3169.4", "nsentences": "42.56", "prob_perplexity": "176.712", "code_perplexity": "173.757", "temp": "0.814", "loss_0": "2.92", "loss_1": "0.104", "loss_2": "0.011", "accuracy": "0.50767", "wps": "17469.5", "ups": "5.51", "wpb": "3169.4", "bsz": "42.6", "num_updates": "180000", "lr": "5.56962e-05", "gnorm": "0.789", "loss_scale": "2", "train_wall": "36", "gb_free": "15.4", "wall": "33938"} [2023-11-02 03:01:08,764][train_inner][INFO] - {"epoch": 45, "update": 44.431, "loss": "2.989", "ntokens": "3182.48", "nsentences": "45.04", "prob_perplexity": "176.906", "code_perplexity": "173.808", "temp": "0.813", "loss_0": "2.874", "loss_1": "0.104", "loss_2": "0.011", "accuracy": "0.51708", "wps": "17411.5", "ups": "5.47", "wpb": "3182.5", "bsz": "45", "num_updates": "180200", "lr": "5.56456e-05", "gnorm": "0.786", "loss_scale": "2", "train_wall": "36", "gb_free": "13", "wall": "33975"} [2023-11-02 03:01:45,337][train_inner][INFO] - {"epoch": 45, "update": 44.481, "loss": "3.017", "ntokens": "3195.04", "nsentences": "43.36", "prob_perplexity": "177.078", "code_perplexity": "174.073", "temp": "0.812", "loss_0": "2.902", "loss_1": "0.104", "loss_2": "0.011", "accuracy": "0.5111", "wps": "17473.2", "ups": "5.47", "wpb": "3195", "bsz": "43.4", "num_updates": "180400", "lr": "5.55949e-05", "gnorm": "0.778", "loss_scale": "2", "train_wall": "36", "gb_free": "14.7", "wall": "34012"} [2023-11-02 03:02:21,600][train_inner][INFO] - {"epoch": 45, "update": 44.53, "loss": "3.085", "ntokens": "3213.6", "nsentences": "41.56", "prob_perplexity": "176.631", "code_perplexity": "173.553", "temp": "0.811", "loss_0": "2.97", "loss_1": "0.104", "loss_2": "0.01", "accuracy": "0.4974", "wps": "17725", "ups": "5.52", "wpb": "3213.6", "bsz": "41.6", "num_updates": "180600", "lr": "5.55443e-05", "gnorm": "0.794", "loss_scale": "2", "train_wall": "36", "gb_free": "15.1", "wall": "34048"} [2023-11-02 03:02:57,895][train_inner][INFO] - {"epoch": 45, "update": 44.579, "loss": "3.038", "ntokens": "3178.96", "nsentences": "42.92", "prob_perplexity": "177.146", "code_perplexity": "174.028", "temp": "0.81", "loss_0": "2.923", "loss_1": "0.104", "loss_2": "0.011", "accuracy": "0.50656", "wps": "17518.4", "ups": "5.51", "wpb": "3179", "bsz": "42.9", "num_updates": "180800", "lr": "5.54937e-05", "gnorm": "0.788", "loss_scale": "2", "train_wall": "36", "gb_free": "13.7", "wall": "34084"} [2023-11-02 03:03:34,578][train_inner][INFO] - {"epoch": 45, "update": 44.628, "loss": "2.993", "ntokens": "3179.92", "nsentences": "44.6", "prob_perplexity": "177.344", "code_perplexity": "174.381", "temp": "0.809", "loss_0": "2.878", "loss_1": "0.104", "loss_2": "0.01", "accuracy": "0.51652", "wps": "17338.6", "ups": "5.45", "wpb": "3179.9", "bsz": "44.6", "num_updates": "181000", "lr": "5.5443e-05", "gnorm": "0.779", "loss_scale": "2", "train_wall": "36", "gb_free": "13.2", "wall": "34121"} [2023-11-02 03:04:11,288][train_inner][INFO] - {"epoch": 45, "update": 44.678, "loss": "3.041", "ntokens": "3198.28", "nsentences": "43.28", "prob_perplexity": "177.246", "code_perplexity": "174.257", "temp": "0.809", "loss_0": "2.926", "loss_1": "0.104", "loss_2": "0.01", "accuracy": "0.50698", "wps": "17425.3", "ups": "5.45", "wpb": "3198.3", "bsz": "43.3", "num_updates": "181200", "lr": "5.53924e-05", "gnorm": "0.787", "loss_scale": "2", "train_wall": "36", "gb_free": "13.6", "wall": "34158"} [2023-11-02 03:04:47,297][train_inner][INFO] - {"epoch": 45, "update": 44.727, "loss": "3.057", "ntokens": "3207.36", "nsentences": "42.56", "prob_perplexity": "177.385", "code_perplexity": "174.457", "temp": "0.808", "loss_0": "2.942", "loss_1": "0.104", "loss_2": "0.01", "accuracy": "0.50415", "wps": "17815.5", "ups": "5.55", "wpb": "3207.4", "bsz": "42.6", "num_updates": "181400", "lr": "5.53418e-05", "gnorm": "0.794", "loss_scale": "2", "train_wall": "35", "gb_free": "14", "wall": "34194"} [2023-11-02 03:05:23,826][train_inner][INFO] - {"epoch": 45, "update": 44.776, "loss": "3.06", "ntokens": "3266.6", "nsentences": "42.28", "prob_perplexity": "177.523", "code_perplexity": "174.573", "temp": "0.807", "loss_0": "2.946", "loss_1": "0.104", "loss_2": "0.01", "accuracy": "0.50164", "wps": "17886.1", "ups": "5.48", "wpb": "3266.6", "bsz": "42.3", "num_updates": "181600", "lr": "5.52911e-05", "gnorm": "0.784", "loss_scale": "2", "train_wall": "36", "gb_free": "15.1", "wall": "34230"} [2023-11-02 03:06:00,038][train_inner][INFO] - {"epoch": 45, "update": 44.826, "loss": "2.963", "ntokens": "3209.28", "nsentences": "46.2", "prob_perplexity": "177.162", "code_perplexity": "174.243", "temp": "0.806", "loss_0": "2.849", "loss_1": "0.104", "loss_2": "0.01", "accuracy": "0.52272", "wps": "17725.9", "ups": "5.52", "wpb": "3209.3", "bsz": "46.2", "num_updates": "181800", "lr": "5.52405e-05", "gnorm": "0.778", "loss_scale": "2", "train_wall": "36", "gb_free": "12.9", "wall": "34266"} [2023-11-02 03:06:35,881][train_inner][INFO] - {"epoch": 45, "update": 44.875, "loss": "3.007", "ntokens": "3178.48", "nsentences": "45.92", "prob_perplexity": "176.935", "code_perplexity": "173.867", "temp": "0.805", "loss_0": "2.892", "loss_1": "0.104", "loss_2": "0.01", "accuracy": "0.51615", "wps": "17736.6", "ups": "5.58", "wpb": "3178.5", "bsz": "45.9", "num_updates": "182000", "lr": "5.51899e-05", "gnorm": "0.791", "loss_scale": "2", "train_wall": "35", "gb_free": "14.8", "wall": "34302"} [2023-11-02 03:07:11,664][train_inner][INFO] - {"epoch": 45, "update": 44.924, "loss": "2.977", "ntokens": "3161.28", "nsentences": "45", "prob_perplexity": "177.646", "code_perplexity": "174.664", "temp": "0.805", "loss_0": "2.863", "loss_1": "0.104", "loss_2": "0.01", "accuracy": "0.51995", "wps": "17670.4", "ups": "5.59", "wpb": "3161.3", "bsz": "45", "num_updates": "182200", "lr": "5.51392e-05", "gnorm": "0.801", "loss_scale": "2", "train_wall": "35", "gb_free": "13.8", "wall": "34338"} [2023-11-02 03:07:47,727][train_inner][INFO] - {"epoch": 45, "update": 44.974, "loss": "3.076", "ntokens": "3203.72", "nsentences": "42.96", "prob_perplexity": "178.147", "code_perplexity": "175.098", "temp": "0.804", "loss_0": "2.962", "loss_1": "0.104", "loss_2": "0.01", "accuracy": "0.50102", "wps": "17776.9", "ups": "5.55", "wpb": "3203.7", "bsz": "43", "num_updates": "182400", "lr": "5.50886e-05", "gnorm": "0.789", "loss_scale": "2", "train_wall": "35", "gb_free": "15.1", "wall": "34374"} [2023-11-02 03:08:07,246][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 03:08:07,248][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 03:08:07,269][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 53 [2023-11-02 03:08:32,721][valid][INFO] - {"epoch": 45, "valid_loss": "2.842", "valid_ntokens": "3161.03", "valid_nsentences": "44.1685", "valid_prob_perplexity": "174.92", "valid_code_perplexity": "172.107", "valid_temp": "0.803", "valid_loss_0": "2.726", "valid_loss_1": "0.105", "valid_loss_2": "0.01", "valid_accuracy": "0.54626", "valid_wps": "56311.4", "valid_wpb": "3161", "valid_bsz": "44.2", "valid_num_updates": "182507", "valid_best_loss": "2.842"} [2023-11-02 03:08:32,723][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 45 @ 182507 updates [2023-11-02 03:08:32,725][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-02 03:08:34,150][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-02 03:08:35,124][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_best.pt (epoch 45 @ 182507 updates, score 2.842) (writing took 2.4013711703009903 seconds) [2023-11-02 03:08:35,125][fairseq_cli.train][INFO] - end of epoch 45 (average epoch stats below) [2023-11-02 03:08:35,127][train][INFO] - {"epoch": 45, "train_loss": "3.014", "train_ntokens": "3195.89", "train_nsentences": "44.2772", "train_prob_perplexity": "176.984", "train_code_perplexity": "173.959", "train_temp": "0.811", "train_loss_0": "2.899", "train_loss_1": "0.104", "train_loss_2": "0.011", "train_accuracy": "0.51236", "train_wps": "16975.1", "train_ups": "5.31", "train_wpb": "3195.9", "train_bsz": "44.3", "train_num_updates": "182507", "train_lr": "5.50615e-05", "train_gnorm": "0.787", "train_loss_scale": "2", "train_train_wall": "722", "train_gb_free": "13.9", "train_wall": "34421"} [2023-11-02 03:08:35,129][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 03:08:35,163][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 46 [2023-11-02 03:08:35,370][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 03:08:35,412][fairseq.trainer][INFO] - begin training epoch 46 [2023-11-02 03:08:35,412][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 03:08:52,147][train_inner][INFO] - {"epoch": 46, "update": 45.023, "loss": "2.945", "ntokens": "3193.12", "nsentences": "44.92", "prob_perplexity": "178.604", "code_perplexity": "175.544", "temp": "0.803", "loss_0": "2.831", "loss_1": "0.104", "loss_2": "0.01", "accuracy": "0.52324", "wps": "9913.9", "ups": "3.1", "wpb": "3193.1", "bsz": "44.9", "num_updates": "182600", "lr": "5.5038e-05", "gnorm": "0.783", "loss_scale": "2", "train_wall": "36", "gb_free": "14.2", "wall": "34438"} [2023-11-02 03:09:28,173][train_inner][INFO] - {"epoch": 46, "update": 45.072, "loss": "3.01", "ntokens": "3188.76", "nsentences": "42.2", "prob_perplexity": "177.882", "code_perplexity": "174.792", "temp": "0.802", "loss_0": "2.895", "loss_1": "0.104", "loss_2": "0.01", "accuracy": "0.51096", "wps": "17703.6", "ups": "5.55", "wpb": "3188.8", "bsz": "42.2", "num_updates": "182800", "lr": "5.49873e-05", "gnorm": "0.788", "loss_scale": "2", "train_wall": "35", "gb_free": "12.6", "wall": "34474"} [2023-11-02 03:10:04,088][train_inner][INFO] - {"epoch": 46, "update": 45.122, "loss": "2.982", "ntokens": "3200.24", "nsentences": "43.6", "prob_perplexity": "177.022", "code_perplexity": "173.981", "temp": "0.801", "loss_0": "2.867", "loss_1": "0.104", "loss_2": "0.01", "accuracy": "0.51589", "wps": "17821.9", "ups": "5.57", "wpb": "3200.2", "bsz": "43.6", "num_updates": "183000", "lr": "5.49367e-05", "gnorm": "0.783", "loss_scale": "2", "train_wall": "35", "gb_free": "12.9", "wall": "34510"} [2023-11-02 03:10:39,780][train_inner][INFO] - {"epoch": 46, "update": 45.171, "loss": "3.064", "ntokens": "3237.68", "nsentences": "42.68", "prob_perplexity": "178.233", "code_perplexity": "175.199", "temp": "0.801", "loss_0": "2.95", "loss_1": "0.104", "loss_2": "0.01", "accuracy": "0.50173", "wps": "18143.9", "ups": "5.6", "wpb": "3237.7", "bsz": "42.7", "num_updates": "183200", "lr": "5.48861e-05", "gnorm": "0.79", "loss_scale": "2", "train_wall": "35", "gb_free": "15.5", "wall": "34546"} [2023-11-02 03:11:15,321][train_inner][INFO] - {"epoch": 46, "update": 45.22, "loss": "2.964", "ntokens": "3136.16", "nsentences": "43.08", "prob_perplexity": "177.912", "code_perplexity": "174.913", "temp": "0.8", "loss_0": "2.85", "loss_1": "0.104", "loss_2": "0.01", "accuracy": "0.51977", "wps": "17649.1", "ups": "5.63", "wpb": "3136.2", "bsz": "43.1", "num_updates": "183400", "lr": "5.48354e-05", "gnorm": "0.838", "loss_scale": "2", "train_wall": "35", "gb_free": "12.8", "wall": "34582"} [2023-11-02 03:11:51,269][train_inner][INFO] - {"epoch": 46, "update": 45.269, "loss": "3.025", "ntokens": "3194.68", "nsentences": "44", "prob_perplexity": "178.235", "code_perplexity": "175.357", "temp": "0.799", "loss_0": "2.91", "loss_1": "0.104", "loss_2": "0.01", "accuracy": "0.50952", "wps": "17774.9", "ups": "5.56", "wpb": "3194.7", "bsz": "44", "num_updates": "183600", "lr": "5.47848e-05", "gnorm": "0.801", "loss_scale": "2", "train_wall": "35", "gb_free": "13.8", "wall": "34618"} [2023-11-02 03:12:27,981][train_inner][INFO] - {"epoch": 46, "update": 45.319, "loss": "2.98", "ntokens": "3171.92", "nsentences": "44.08", "prob_perplexity": "177.415", "code_perplexity": "174.458", "temp": "0.798", "loss_0": "2.866", "loss_1": "0.104", "loss_2": "0.01", "accuracy": "0.51731", "wps": "17281.4", "ups": "5.45", "wpb": "3171.9", "bsz": "44.1", "num_updates": "183800", "lr": "5.47342e-05", "gnorm": "0.79", "loss_scale": "2", "train_wall": "36", "gb_free": "16.6", "wall": "34654"} [2023-11-02 03:13:04,261][train_inner][INFO] - {"epoch": 46, "update": 45.368, "loss": "3.002", "ntokens": "3180.12", "nsentences": "44.28", "prob_perplexity": "178.225", "code_perplexity": "175.338", "temp": "0.797", "loss_0": "2.887", "loss_1": "0.104", "loss_2": "0.01", "accuracy": "0.51439", "wps": "17531.8", "ups": "5.51", "wpb": "3180.1", "bsz": "44.3", "num_updates": "184000", "lr": "5.46835e-05", "gnorm": "0.786", "loss_scale": "2", "train_wall": "36", "gb_free": "14", "wall": "34690"} [2023-11-02 03:13:40,229][train_inner][INFO] - {"epoch": 46, "update": 45.417, "loss": "3.042", "ntokens": "3156.12", "nsentences": "42.24", "prob_perplexity": "177.841", "code_perplexity": "174.863", "temp": "0.797", "loss_0": "2.927", "loss_1": "0.104", "loss_2": "0.01", "accuracy": "0.50663", "wps": "17550.6", "ups": "5.56", "wpb": "3156.1", "bsz": "42.2", "num_updates": "184200", "lr": "5.46329e-05", "gnorm": "0.803", "loss_scale": "2", "train_wall": "35", "gb_free": "13.7", "wall": "34726"} [2023-11-02 03:14:16,724][train_inner][INFO] - {"epoch": 46, "update": 45.467, "loss": "2.935", "ntokens": "3177.68", "nsentences": "47.12", "prob_perplexity": "179.105", "code_perplexity": "176.243", "temp": "0.796", "loss_0": "2.821", "loss_1": "0.104", "loss_2": "0.01", "accuracy": "0.52756", "wps": "17415.4", "ups": "5.48", "wpb": "3177.7", "bsz": "47.1", "num_updates": "184400", "lr": "5.45823e-05", "gnorm": "0.782", "loss_scale": "2", "train_wall": "36", "gb_free": "15", "wall": "34763"} [2023-11-02 03:14:53,524][train_inner][INFO] - {"epoch": 46, "update": 45.516, "loss": "2.958", "ntokens": "3160.68", "nsentences": "45.88", "prob_perplexity": "179.092", "code_perplexity": "176.168", "temp": "0.795", "loss_0": "2.844", "loss_1": "0.104", "loss_2": "0.01", "accuracy": "0.52371", "wps": "17178.6", "ups": "5.44", "wpb": "3160.7", "bsz": "45.9", "num_updates": "184600", "lr": "5.45316e-05", "gnorm": "0.79", "loss_scale": "2", "train_wall": "36", "gb_free": "14", "wall": "34800"} [2023-11-02 03:15:30,312][train_inner][INFO] - {"epoch": 46, "update": 45.565, "loss": "2.979", "ntokens": "3212.84", "nsentences": "44.48", "prob_perplexity": "178.902", "code_perplexity": "175.984", "temp": "0.794", "loss_0": "2.865", "loss_1": "0.104", "loss_2": "0.01", "accuracy": "0.5174", "wps": "17467.9", "ups": "5.44", "wpb": "3212.8", "bsz": "44.5", "num_updates": "184800", "lr": "5.4481e-05", "gnorm": "0.787", "loss_scale": "2", "train_wall": "36", "gb_free": "13.6", "wall": "34837"} [2023-11-02 03:16:06,690][train_inner][INFO] - {"epoch": 46, "update": 45.615, "loss": "2.971", "ntokens": "3194.4", "nsentences": "45.16", "prob_perplexity": "180.426", "code_perplexity": "177.456", "temp": "0.793", "loss_0": "2.858", "loss_1": "0.104", "loss_2": "0.01", "accuracy": "0.51977", "wps": "17563.6", "ups": "5.5", "wpb": "3194.4", "bsz": "45.2", "num_updates": "185000", "lr": "5.44304e-05", "gnorm": "0.784", "loss_scale": "2", "train_wall": "36", "gb_free": "13.3", "wall": "34873"} [2023-11-02 03:16:43,187][train_inner][INFO] - {"epoch": 46, "update": 45.664, "loss": "2.973", "ntokens": "3165.84", "nsentences": "43.32", "prob_perplexity": "179.14", "code_perplexity": "176.284", "temp": "0.793", "loss_0": "2.859", "loss_1": "0.104", "loss_2": "0.01", "accuracy": "0.51753", "wps": "17349.2", "ups": "5.48", "wpb": "3165.8", "bsz": "43.3", "num_updates": "185200", "lr": "5.43797e-05", "gnorm": "0.793", "loss_scale": "2", "train_wall": "36", "gb_free": "14.9", "wall": "34909"} [2023-11-02 03:17:19,829][train_inner][INFO] - {"epoch": 46, "update": 45.713, "loss": "3.029", "ntokens": "3198.12", "nsentences": "43.52", "prob_perplexity": "179.025", "code_perplexity": "176.097", "temp": "0.792", "loss_0": "2.915", "loss_1": "0.104", "loss_2": "0.01", "accuracy": "0.50962", "wps": "17457.3", "ups": "5.46", "wpb": "3198.1", "bsz": "43.5", "num_updates": "185400", "lr": "5.43291e-05", "gnorm": "0.786", "loss_scale": "2", "train_wall": "36", "gb_free": "12.8", "wall": "34946"} [2023-11-02 03:17:56,317][train_inner][INFO] - {"epoch": 46, "update": 45.763, "loss": "2.962", "ntokens": "3181.8", "nsentences": "44.48", "prob_perplexity": "179.099", "code_perplexity": "176.167", "temp": "0.791", "loss_0": "2.848", "loss_1": "0.104", "loss_2": "0.01", "accuracy": "0.5214", "wps": "17441", "ups": "5.48", "wpb": "3181.8", "bsz": "44.5", "num_updates": "185600", "lr": "5.42785e-05", "gnorm": "0.783", "loss_scale": "2", "train_wall": "36", "gb_free": "14.2", "wall": "34983"} [2023-11-02 03:18:32,462][train_inner][INFO] - {"epoch": 46, "update": 45.812, "loss": "2.998", "ntokens": "3212.52", "nsentences": "45.44", "prob_perplexity": "179.153", "code_perplexity": "176.183", "temp": "0.79", "loss_0": "2.884", "loss_1": "0.104", "loss_2": "0.01", "accuracy": "0.5153", "wps": "17777", "ups": "5.53", "wpb": "3212.5", "bsz": "45.4", "num_updates": "185800", "lr": "5.42278e-05", "gnorm": "0.786", "loss_scale": "2", "train_wall": "36", "gb_free": "13.9", "wall": "35019"} [2023-11-02 03:19:08,749][train_inner][INFO] - {"epoch": 46, "update": 45.861, "loss": "2.986", "ntokens": "3187.24", "nsentences": "44.64", "prob_perplexity": "179.107", "code_perplexity": "176.18", "temp": "0.79", "loss_0": "2.872", "loss_1": "0.104", "loss_2": "0.01", "accuracy": "0.51683", "wps": "17567.8", "ups": "5.51", "wpb": "3187.2", "bsz": "44.6", "num_updates": "186000", "lr": "5.41772e-05", "gnorm": "0.785", "loss_scale": "2", "train_wall": "36", "gb_free": "13.7", "wall": "35055"} [2023-11-02 03:19:44,812][train_inner][INFO] - {"epoch": 46, "update": 45.911, "loss": "2.977", "ntokens": "3175.12", "nsentences": "44.44", "prob_perplexity": "179.656", "code_perplexity": "176.602", "temp": "0.789", "loss_0": "2.863", "loss_1": "0.104", "loss_2": "0.01", "accuracy": "0.51939", "wps": "17609.6", "ups": "5.55", "wpb": "3175.1", "bsz": "44.4", "num_updates": "186200", "lr": "5.41266e-05", "gnorm": "0.784", "loss_scale": "2", "train_wall": "35", "gb_free": "14.2", "wall": "35091"} [2023-11-02 03:20:20,914][train_inner][INFO] - {"epoch": 46, "update": 45.96, "loss": "2.982", "ntokens": "3185.24", "nsentences": "45.16", "prob_perplexity": "178.909", "code_perplexity": "175.996", "temp": "0.788", "loss_0": "2.868", "loss_1": "0.104", "loss_2": "0.01", "accuracy": "0.5185", "wps": "17647", "ups": "5.54", "wpb": "3185.2", "bsz": "45.2", "num_updates": "186400", "lr": "5.40759e-05", "gnorm": "0.794", "loss_scale": "2", "train_wall": "35", "gb_free": "14.1", "wall": "35127"} [2023-11-02 03:20:50,477][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 03:20:50,479][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 03:20:50,498][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 54 [2023-11-02 03:21:16,177][valid][INFO] - {"epoch": 46, "valid_loss": "2.85", "valid_ntokens": "3168.71", "valid_nsentences": "44.1685", "valid_prob_perplexity": "177.389", "valid_code_perplexity": "174.654", "valid_temp": "0.787", "valid_loss_0": "2.736", "valid_loss_1": "0.104", "valid_loss_2": "0.01", "valid_accuracy": "0.54461", "valid_wps": "55998.5", "valid_wpb": "3168.7", "valid_bsz": "44.2", "valid_num_updates": "186563", "valid_best_loss": "2.842"} [2023-11-02 03:21:16,179][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 46 @ 186563 updates [2023-11-02 03:21:16,181][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 03:21:17,608][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 03:21:17,653][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_last.pt (epoch 46 @ 186563 updates, score 2.85) (writing took 1.4742550570517778 seconds) [2023-11-02 03:21:17,654][fairseq_cli.train][INFO] - end of epoch 46 (average epoch stats below) [2023-11-02 03:21:17,672][train][INFO] - {"epoch": 46, "train_loss": "2.988", "train_ntokens": "3186.87", "train_nsentences": "44.2682", "train_prob_perplexity": "178.734", "train_code_perplexity": "175.781", "train_temp": "0.795", "train_loss_0": "2.874", "train_loss_1": "0.104", "train_loss_2": "0.01", "train_accuracy": "0.51636", "train_wps": "16951.4", "train_ups": "5.32", "train_wpb": "3186.9", "train_bsz": "44.3", "train_num_updates": "186563", "train_lr": "5.40347e-05", "train_gnorm": "0.791", "train_loss_scale": "2", "train_train_wall": "722", "train_gb_free": "15.3", "train_wall": "35184"} [2023-11-02 03:21:17,675][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 03:21:17,702][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 47 [2023-11-02 03:21:17,872][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 03:21:17,915][fairseq.trainer][INFO] - begin training epoch 47 [2023-11-02 03:21:17,916][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 03:21:24,870][train_inner][INFO] - {"epoch": 47, "update": 46.009, "loss": "2.958", "ntokens": "3207.12", "nsentences": "45.68", "prob_perplexity": "180.301", "code_perplexity": "177.408", "temp": "0.787", "loss_0": "2.844", "loss_1": "0.104", "loss_2": "0.01", "accuracy": "0.52278", "wps": "10029.5", "ups": "3.13", "wpb": "3207.1", "bsz": "45.7", "num_updates": "186600", "lr": "5.40253e-05", "gnorm": "0.783", "loss_scale": "2", "train_wall": "36", "gb_free": "13.6", "wall": "35191"} [2023-11-02 03:22:00,912][train_inner][INFO] - {"epoch": 47, "update": 46.058, "loss": "2.992", "ntokens": "3231.64", "nsentences": "44.8", "prob_perplexity": "179.683", "code_perplexity": "176.727", "temp": "0.786", "loss_0": "2.879", "loss_1": "0.104", "loss_2": "0.01", "accuracy": "0.51576", "wps": "17934.2", "ups": "5.55", "wpb": "3231.6", "bsz": "44.8", "num_updates": "186800", "lr": "5.39747e-05", "gnorm": "0.79", "loss_scale": "2", "train_wall": "35", "gb_free": "12.9", "wall": "35227"} [2023-11-02 03:22:36,825][train_inner][INFO] - {"epoch": 47, "update": 46.108, "loss": "3.041", "ntokens": "3215.76", "nsentences": "41.4", "prob_perplexity": "179.675", "code_perplexity": "176.712", "temp": "0.786", "loss_0": "2.927", "loss_1": "0.104", "loss_2": "0.01", "accuracy": "0.5043", "wps": "17909.5", "ups": "5.57", "wpb": "3215.8", "bsz": "41.4", "num_updates": "187000", "lr": "5.39241e-05", "gnorm": "0.799", "loss_scale": "2", "train_wall": "35", "gb_free": "12.8", "wall": "35263"} [2023-11-02 03:23:12,379][train_inner][INFO] - {"epoch": 47, "update": 46.157, "loss": "2.93", "ntokens": "3146.56", "nsentences": "44.64", "prob_perplexity": "180.065", "code_perplexity": "177.098", "temp": "0.785", "loss_0": "2.816", "loss_1": "0.104", "loss_2": "0.01", "accuracy": "0.52688", "wps": "17701.4", "ups": "5.63", "wpb": "3146.6", "bsz": "44.6", "num_updates": "187200", "lr": "5.38734e-05", "gnorm": "0.799", "loss_scale": "2", "train_wall": "35", "gb_free": "13.7", "wall": "35299"} [2023-11-02 03:23:48,149][train_inner][INFO] - {"epoch": 47, "update": 46.206, "loss": "2.958", "ntokens": "3189.4", "nsentences": "44.8", "prob_perplexity": "180.303", "code_perplexity": "177.295", "temp": "0.784", "loss_0": "2.844", "loss_1": "0.104", "loss_2": "0.01", "accuracy": "0.52187", "wps": "17833.5", "ups": "5.59", "wpb": "3189.4", "bsz": "44.8", "num_updates": "187400", "lr": "5.38228e-05", "gnorm": "0.791", "loss_scale": "2", "train_wall": "35", "gb_free": "13.9", "wall": "35334"} [2023-11-02 03:24:24,026][train_inner][INFO] - {"epoch": 47, "update": 46.256, "loss": "2.982", "ntokens": "3229.76", "nsentences": "46.96", "prob_perplexity": "179.889", "code_perplexity": "176.945", "temp": "0.783", "loss_0": "2.868", "loss_1": "0.104", "loss_2": "0.01", "accuracy": "0.51977", "wps": "18006.1", "ups": "5.58", "wpb": "3229.8", "bsz": "47", "num_updates": "187600", "lr": "5.37722e-05", "gnorm": "0.782", "loss_scale": "2", "train_wall": "35", "gb_free": "12.9", "wall": "35370"} [2023-11-02 03:24:59,947][train_inner][INFO] - {"epoch": 47, "update": 46.305, "loss": "3.017", "ntokens": "3203.24", "nsentences": "43.12", "prob_perplexity": "180.001", "code_perplexity": "177.053", "temp": "0.782", "loss_0": "2.903", "loss_1": "0.104", "loss_2": "0.01", "accuracy": "0.51002", "wps": "17836", "ups": "5.57", "wpb": "3203.2", "bsz": "43.1", "num_updates": "187800", "lr": "5.37215e-05", "gnorm": "0.792", "loss_scale": "2", "train_wall": "35", "gb_free": "13.9", "wall": "35406"} [2023-11-02 03:25:35,793][train_inner][INFO] - {"epoch": 47, "update": 46.354, "loss": "2.949", "ntokens": "3195.56", "nsentences": "44.44", "prob_perplexity": "180.408", "code_perplexity": "177.43", "temp": "0.782", "loss_0": "2.835", "loss_1": "0.104", "loss_2": "0.01", "accuracy": "0.52328", "wps": "17830.2", "ups": "5.58", "wpb": "3195.6", "bsz": "44.4", "num_updates": "188000", "lr": "5.36709e-05", "gnorm": "0.79", "loss_scale": "2", "train_wall": "35", "gb_free": "14.6", "wall": "35442"} [2023-11-02 03:26:11,791][train_inner][INFO] - {"epoch": 47, "update": 46.404, "loss": "2.969", "ntokens": "3175.76", "nsentences": "45.36", "prob_perplexity": "180.013", "code_perplexity": "177.079", "temp": "0.781", "loss_0": "2.855", "loss_1": "0.104", "loss_2": "0.01", "accuracy": "0.52083", "wps": "17645.5", "ups": "5.56", "wpb": "3175.8", "bsz": "45.4", "num_updates": "188200", "lr": "5.36203e-05", "gnorm": "0.793", "loss_scale": "2", "train_wall": "35", "gb_free": "11.9", "wall": "35478"} [2023-11-02 03:26:48,225][train_inner][INFO] - {"epoch": 47, "update": 46.453, "loss": "2.967", "ntokens": "3179.52", "nsentences": "45.48", "prob_perplexity": "180.167", "code_perplexity": "177.255", "temp": "0.78", "loss_0": "2.853", "loss_1": "0.104", "loss_2": "0.01", "accuracy": "0.52185", "wps": "17454.8", "ups": "5.49", "wpb": "3179.5", "bsz": "45.5", "num_updates": "188400", "lr": "5.35696e-05", "gnorm": "0.787", "loss_scale": "2", "train_wall": "36", "gb_free": "13.6", "wall": "35514"} [2023-11-02 03:27:24,725][train_inner][INFO] - {"epoch": 47, "update": 46.502, "loss": "2.98", "ntokens": "3184.8", "nsentences": "43.92", "prob_perplexity": "180.175", "code_perplexity": "177.278", "temp": "0.779", "loss_0": "2.867", "loss_1": "0.104", "loss_2": "0.01", "accuracy": "0.51719", "wps": "17451.8", "ups": "5.48", "wpb": "3184.8", "bsz": "43.9", "num_updates": "188600", "lr": "5.3519e-05", "gnorm": "0.793", "loss_scale": "2", "train_wall": "36", "gb_free": "13.8", "wall": "35551"} [2023-11-02 03:28:01,315][train_inner][INFO] - {"epoch": 47, "update": 46.552, "loss": "2.963", "ntokens": "3181.24", "nsentences": "45.24", "prob_perplexity": "180.225", "code_perplexity": "177.29", "temp": "0.779", "loss_0": "2.85", "loss_1": "0.104", "loss_2": "0.01", "accuracy": "0.52139", "wps": "17389.8", "ups": "5.47", "wpb": "3181.2", "bsz": "45.2", "num_updates": "188800", "lr": "5.34684e-05", "gnorm": "0.783", "loss_scale": "2", "train_wall": "36", "gb_free": "13.4", "wall": "35588"} [2023-11-02 03:28:37,343][train_inner][INFO] - {"epoch": 47, "update": 46.601, "loss": "2.973", "ntokens": "3140.52", "nsentences": "44.36", "prob_perplexity": "179.481", "code_perplexity": "176.596", "temp": "0.778", "loss_0": "2.859", "loss_1": "0.104", "loss_2": "0.01", "accuracy": "0.51999", "wps": "17435.7", "ups": "5.55", "wpb": "3140.5", "bsz": "44.4", "num_updates": "189000", "lr": "5.34177e-05", "gnorm": "0.798", "loss_scale": "2", "train_wall": "35", "gb_free": "14.5", "wall": "35624"} [2023-11-02 03:29:12,981][train_inner][INFO] - {"epoch": 47, "update": 46.65, "loss": "2.961", "ntokens": "3182", "nsentences": "45.16", "prob_perplexity": "180.042", "code_perplexity": "177.172", "temp": "0.777", "loss_0": "2.848", "loss_1": "0.104", "loss_2": "0.01", "accuracy": "0.52209", "wps": "17858.4", "ups": "5.61", "wpb": "3182", "bsz": "45.2", "num_updates": "189200", "lr": "5.33671e-05", "gnorm": "0.794", "loss_scale": "2", "train_wall": "35", "gb_free": "14", "wall": "35659"} [2023-11-02 03:29:48,720][train_inner][INFO] - {"epoch": 47, "update": 46.699, "loss": "2.935", "ntokens": "3189.68", "nsentences": "46.56", "prob_perplexity": "180.958", "code_perplexity": "178.033", "temp": "0.776", "loss_0": "2.822", "loss_1": "0.103", "loss_2": "0.01", "accuracy": "0.52724", "wps": "17850.8", "ups": "5.6", "wpb": "3189.7", "bsz": "46.6", "num_updates": "189400", "lr": "5.33165e-05", "gnorm": "0.798", "loss_scale": "2", "train_wall": "35", "gb_free": "13.4", "wall": "35695"} [2023-11-02 03:30:24,768][train_inner][INFO] - {"epoch": 47, "update": 46.749, "loss": "2.972", "ntokens": "3206.52", "nsentences": "46.04", "prob_perplexity": "180.742", "code_perplexity": "177.815", "temp": "0.775", "loss_0": "2.859", "loss_1": "0.104", "loss_2": "0.01", "accuracy": "0.52131", "wps": "17791.4", "ups": "5.55", "wpb": "3206.5", "bsz": "46", "num_updates": "189600", "lr": "5.32658e-05", "gnorm": "0.794", "loss_scale": "2", "train_wall": "35", "gb_free": "14.4", "wall": "35731"} [2023-11-02 03:31:01,136][train_inner][INFO] - {"epoch": 47, "update": 46.798, "loss": "3.08", "ntokens": "3199.84", "nsentences": "40.44", "prob_perplexity": "179.945", "code_perplexity": "177.041", "temp": "0.775", "loss_0": "2.966", "loss_1": "0.104", "loss_2": "0.01", "accuracy": "0.49766", "wps": "17598", "ups": "5.5", "wpb": "3199.8", "bsz": "40.4", "num_updates": "189800", "lr": "5.32152e-05", "gnorm": "0.799", "loss_scale": "2", "train_wall": "36", "gb_free": "13.1", "wall": "35767"} [2023-11-02 03:31:36,925][train_inner][INFO] - {"epoch": 47, "update": 46.847, "loss": "2.948", "ntokens": "3154.16", "nsentences": "44.24", "prob_perplexity": "179.418", "code_perplexity": "176.521", "temp": "0.774", "loss_0": "2.835", "loss_1": "0.104", "loss_2": "0.01", "accuracy": "0.5229", "wps": "17627.7", "ups": "5.59", "wpb": "3154.2", "bsz": "44.2", "num_updates": "190000", "lr": "5.31646e-05", "gnorm": "0.812", "loss_scale": "2", "train_wall": "35", "gb_free": "15", "wall": "35803"} [2023-11-02 03:32:13,058][train_inner][INFO] - {"epoch": 47, "update": 46.897, "loss": "3.005", "ntokens": "3205.28", "nsentences": "42.72", "prob_perplexity": "180.387", "code_perplexity": "177.495", "temp": "0.773", "loss_0": "2.892", "loss_1": "0.104", "loss_2": "0.01", "accuracy": "0.51032", "wps": "17742.4", "ups": "5.54", "wpb": "3205.3", "bsz": "42.7", "num_updates": "190200", "lr": "5.31139e-05", "gnorm": "0.837", "loss_scale": "2", "train_wall": "36", "gb_free": "13.5", "wall": "35839"} [2023-11-02 03:32:49,211][train_inner][INFO] - {"epoch": 47, "update": 46.946, "loss": "3.039", "ntokens": "3239.72", "nsentences": "43.2", "prob_perplexity": "180.198", "code_perplexity": "177.292", "temp": "0.772", "loss_0": "2.926", "loss_1": "0.104", "loss_2": "0.01", "accuracy": "0.50546", "wps": "17923.1", "ups": "5.53", "wpb": "3239.7", "bsz": "43.2", "num_updates": "190400", "lr": "5.30633e-05", "gnorm": "0.803", "loss_scale": "2", "train_wall": "36", "gb_free": "14", "wall": "35875"} [2023-11-02 03:33:25,259][train_inner][INFO] - {"epoch": 47, "update": 46.995, "loss": "2.989", "ntokens": "3186.88", "nsentences": "42.84", "prob_perplexity": "180.901", "code_perplexity": "177.954", "temp": "0.772", "loss_0": "2.876", "loss_1": "0.103", "loss_2": "0.01", "accuracy": "0.51526", "wps": "17682.4", "ups": "5.55", "wpb": "3186.9", "bsz": "42.8", "num_updates": "190600", "lr": "5.30127e-05", "gnorm": "0.794", "loss_scale": "2", "train_wall": "35", "gb_free": "13.4", "wall": "35911"} [2023-11-02 03:33:28,691][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 03:33:28,693][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 03:33:28,712][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 55 [2023-11-02 03:33:54,188][valid][INFO] - {"epoch": 47, "valid_loss": "2.836", "valid_ntokens": "3174.31", "valid_nsentences": "44.1685", "valid_prob_perplexity": "178.756", "valid_code_perplexity": "175.884", "valid_temp": "0.771", "valid_loss_0": "2.722", "valid_loss_1": "0.104", "valid_loss_2": "0.01", "valid_accuracy": "0.54738", "valid_wps": "56483.1", "valid_wpb": "3174.3", "valid_bsz": "44.2", "valid_num_updates": "190619", "valid_best_loss": "2.836"} [2023-11-02 03:33:54,190][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 47 @ 190619 updates [2023-11-02 03:33:54,192][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-02 03:33:55,623][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-02 03:33:56,596][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_best.pt (epoch 47 @ 190619 updates, score 2.836) (writing took 2.4054287914186716 seconds) [2023-11-02 03:33:56,597][fairseq_cli.train][INFO] - end of epoch 47 (average epoch stats below) [2023-11-02 03:33:56,599][train][INFO] - {"epoch": 47, "train_loss": "2.984", "train_ntokens": "3192.56", "train_nsentences": "44.2682", "train_prob_perplexity": "180.128", "train_code_perplexity": "177.199", "train_temp": "0.779", "train_loss_0": "2.87", "train_loss_1": "0.104", "train_loss_2": "0.01", "train_accuracy": "0.51707", "train_wps": "17062.3", "train_ups": "5.34", "train_wpb": "3192.6", "train_bsz": "44.3", "train_num_updates": "190619", "train_lr": "5.30078e-05", "train_gnorm": "0.796", "train_loss_scale": "2", "train_train_wall": "718", "train_gb_free": "14", "train_wall": "35943"} [2023-11-02 03:33:56,602][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 03:33:56,629][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 48 [2023-11-02 03:33:56,825][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 03:33:56,870][fairseq.trainer][INFO] - begin training epoch 48 [2023-11-02 03:33:56,871][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 03:34:29,366][train_inner][INFO] - {"epoch": 48, "update": 47.045, "loss": "3.001", "ntokens": "3187.24", "nsentences": "44.8", "prob_perplexity": "180.967", "code_perplexity": "178.057", "temp": "0.771", "loss_0": "2.888", "loss_1": "0.103", "loss_2": "0.01", "accuracy": "0.51494", "wps": "9943.8", "ups": "3.12", "wpb": "3187.2", "bsz": "44.8", "num_updates": "190800", "lr": "5.2962e-05", "gnorm": "0.799", "loss_scale": "2", "train_wall": "35", "gb_free": "12.9", "wall": "35976"} [2023-11-02 03:35:04,835][train_inner][INFO] - {"epoch": 48, "update": 47.094, "loss": "2.965", "ntokens": "3203.44", "nsentences": "43.68", "prob_perplexity": "181.328", "code_perplexity": "178.457", "temp": "0.77", "loss_0": "2.852", "loss_1": "0.103", "loss_2": "0.01", "accuracy": "0.51862", "wps": "18064.6", "ups": "5.64", "wpb": "3203.4", "bsz": "43.7", "num_updates": "191000", "lr": "5.29114e-05", "gnorm": "0.823", "loss_scale": "2", "train_wall": "35", "gb_free": "12.9", "wall": "36011"} [2023-11-02 03:35:40,723][train_inner][INFO] - {"epoch": 48, "update": 47.143, "loss": "2.923", "ntokens": "3165.6", "nsentences": "46.6", "prob_perplexity": "180.778", "code_perplexity": "177.893", "temp": "0.769", "loss_0": "2.809", "loss_1": "0.103", "loss_2": "0.01", "accuracy": "0.53032", "wps": "17642.5", "ups": "5.57", "wpb": "3165.6", "bsz": "46.6", "num_updates": "191200", "lr": "5.28608e-05", "gnorm": "0.796", "loss_scale": "2", "train_wall": "35", "gb_free": "13.9", "wall": "36047"} [2023-11-02 03:36:16,528][train_inner][INFO] - {"epoch": 48, "update": 47.193, "loss": "2.976", "ntokens": "3180.52", "nsentences": "44.16", "prob_perplexity": "180.887", "code_perplexity": "177.984", "temp": "0.768", "loss_0": "2.862", "loss_1": "0.103", "loss_2": "0.01", "accuracy": "0.51845", "wps": "17766.7", "ups": "5.59", "wpb": "3180.5", "bsz": "44.2", "num_updates": "191400", "lr": "5.28101e-05", "gnorm": "0.797", "loss_scale": "2", "train_wall": "35", "gb_free": "13.1", "wall": "36083"} [2023-11-02 03:36:52,179][train_inner][INFO] - {"epoch": 48, "update": 47.242, "loss": "2.908", "ntokens": "3157.92", "nsentences": "46.8", "prob_perplexity": "181.522", "code_perplexity": "178.65", "temp": "0.768", "loss_0": "2.795", "loss_1": "0.103", "loss_2": "0.01", "accuracy": "0.53238", "wps": "17717.1", "ups": "5.61", "wpb": "3157.9", "bsz": "46.8", "num_updates": "191600", "lr": "5.27595e-05", "gnorm": "0.79", "loss_scale": "2", "train_wall": "35", "gb_free": "13.4", "wall": "36118"} [2023-11-02 03:37:28,060][train_inner][INFO] - {"epoch": 48, "update": 47.291, "loss": "2.946", "ntokens": "3190.28", "nsentences": "44.72", "prob_perplexity": "181.594", "code_perplexity": "178.636", "temp": "0.767", "loss_0": "2.833", "loss_1": "0.103", "loss_2": "0.01", "accuracy": "0.52293", "wps": "17783.3", "ups": "5.57", "wpb": "3190.3", "bsz": "44.7", "num_updates": "191800", "lr": "5.27089e-05", "gnorm": "0.79", "loss_scale": "2", "train_wall": "35", "gb_free": "13.8", "wall": "36154"} [2023-11-02 03:38:04,075][train_inner][INFO] - {"epoch": 48, "update": 47.34, "loss": "2.967", "ntokens": "3218.8", "nsentences": "43.84", "prob_perplexity": "182.068", "code_perplexity": "179.235", "temp": "0.766", "loss_0": "2.854", "loss_1": "0.103", "loss_2": "0.01", "accuracy": "0.51903", "wps": "17876.2", "ups": "5.55", "wpb": "3218.8", "bsz": "43.8", "num_updates": "192000", "lr": "5.26582e-05", "gnorm": "0.796", "loss_scale": "2", "train_wall": "35", "gb_free": "13", "wall": "36190"} [2023-11-02 03:38:39,744][train_inner][INFO] - {"epoch": 48, "update": 47.39, "loss": "2.946", "ntokens": "3213.96", "nsentences": "44.08", "prob_perplexity": "181.708", "code_perplexity": "178.735", "temp": "0.765", "loss_0": "2.833", "loss_1": "0.103", "loss_2": "0.01", "accuracy": "0.5218", "wps": "18021.8", "ups": "5.61", "wpb": "3214", "bsz": "44.1", "num_updates": "192200", "lr": "5.26076e-05", "gnorm": "0.799", "loss_scale": "2", "train_wall": "35", "gb_free": "15.2", "wall": "36226"} [2023-11-02 03:39:16,033][train_inner][INFO] - {"epoch": 48, "update": 47.439, "loss": "2.994", "ntokens": "3185.96", "nsentences": "43.68", "prob_perplexity": "181.347", "code_perplexity": "178.426", "temp": "0.765", "loss_0": "2.881", "loss_1": "0.103", "loss_2": "0.009", "accuracy": "0.51577", "wps": "17560.2", "ups": "5.51", "wpb": "3186", "bsz": "43.7", "num_updates": "192400", "lr": "5.2557e-05", "gnorm": "0.811", "loss_scale": "2", "train_wall": "36", "gb_free": "14.7", "wall": "36262"} [2023-11-02 03:39:52,956][train_inner][INFO] - {"epoch": 48, "update": 47.488, "loss": "2.983", "ntokens": "3179.48", "nsentences": "43.72", "prob_perplexity": "182.541", "code_perplexity": "179.691", "temp": "0.764", "loss_0": "2.871", "loss_1": "0.103", "loss_2": "0.01", "accuracy": "0.51706", "wps": "17223.4", "ups": "5.42", "wpb": "3179.5", "bsz": "43.7", "num_updates": "192600", "lr": "5.25063e-05", "gnorm": "0.826", "loss_scale": "2", "train_wall": "36", "gb_free": "13.7", "wall": "36299"} [2023-11-02 03:40:29,446][train_inner][INFO] - {"epoch": 48, "update": 47.538, "loss": "3.012", "ntokens": "3172.56", "nsentences": "43.44", "prob_perplexity": "181.473", "code_perplexity": "178.527", "temp": "0.763", "loss_0": "2.899", "loss_1": "0.103", "loss_2": "0.01", "accuracy": "0.51141", "wps": "17389.6", "ups": "5.48", "wpb": "3172.6", "bsz": "43.4", "num_updates": "192800", "lr": "5.24557e-05", "gnorm": "0.811", "loss_scale": "2", "train_wall": "36", "gb_free": "13.9", "wall": "36336"} [2023-11-02 03:41:05,914][train_inner][INFO] - {"epoch": 48, "update": 47.587, "loss": "2.98", "ntokens": "3176.68", "nsentences": "42", "prob_perplexity": "181.149", "code_perplexity": "178.197", "temp": "0.762", "loss_0": "2.867", "loss_1": "0.103", "loss_2": "0.01", "accuracy": "0.515", "wps": "17423", "ups": "5.48", "wpb": "3176.7", "bsz": "42", "num_updates": "193000", "lr": "5.24051e-05", "gnorm": "0.806", "loss_scale": "2", "train_wall": "36", "gb_free": "16", "wall": "36372"} [2023-11-02 03:41:42,417][train_inner][INFO] - {"epoch": 48, "update": 47.636, "loss": "2.967", "ntokens": "3201.28", "nsentences": "45", "prob_perplexity": "182.272", "code_perplexity": "179.303", "temp": "0.762", "loss_0": "2.854", "loss_1": "0.103", "loss_2": "0.01", "accuracy": "0.52031", "wps": "17540.8", "ups": "5.48", "wpb": "3201.3", "bsz": "45", "num_updates": "193200", "lr": "5.23544e-05", "gnorm": "0.796", "loss_scale": "2", "train_wall": "36", "gb_free": "14.3", "wall": "36409"} [2023-11-02 03:42:18,764][train_inner][INFO] - {"epoch": 48, "update": 47.686, "loss": "2.978", "ntokens": "3172.24", "nsentences": "42.56", "prob_perplexity": "181.989", "code_perplexity": "179.059", "temp": "0.761", "loss_0": "2.865", "loss_1": "0.103", "loss_2": "0.01", "accuracy": "0.51618", "wps": "17456.4", "ups": "5.5", "wpb": "3172.2", "bsz": "42.6", "num_updates": "193400", "lr": "5.23038e-05", "gnorm": "0.796", "loss_scale": "2", "train_wall": "36", "gb_free": "12.9", "wall": "36445"} [2023-11-02 03:42:55,496][train_inner][INFO] - {"epoch": 48, "update": 47.735, "loss": "2.988", "ntokens": "3219.76", "nsentences": "43.36", "prob_perplexity": "181.929", "code_perplexity": "179.067", "temp": "0.76", "loss_0": "2.875", "loss_1": "0.103", "loss_2": "0.009", "accuracy": "0.51465", "wps": "17531.9", "ups": "5.45", "wpb": "3219.8", "bsz": "43.4", "num_updates": "193600", "lr": "5.22532e-05", "gnorm": "0.799", "loss_scale": "2", "train_wall": "36", "gb_free": "16.9", "wall": "36482"} [2023-11-02 03:43:31,620][train_inner][INFO] - {"epoch": 48, "update": 47.784, "loss": "2.929", "ntokens": "3146.68", "nsentences": "46.68", "prob_perplexity": "181.719", "code_perplexity": "178.852", "temp": "0.759", "loss_0": "2.816", "loss_1": "0.103", "loss_2": "0.01", "accuracy": "0.5288", "wps": "17423.2", "ups": "5.54", "wpb": "3146.7", "bsz": "46.7", "num_updates": "193800", "lr": "5.22025e-05", "gnorm": "0.805", "loss_scale": "2", "train_wall": "36", "gb_free": "14.5", "wall": "36518"} [2023-11-02 03:44:08,214][train_inner][INFO] - {"epoch": 48, "update": 47.834, "loss": "2.939", "ntokens": "3193.04", "nsentences": "46", "prob_perplexity": "181.7", "code_perplexity": "178.849", "temp": "0.759", "loss_0": "2.826", "loss_1": "0.103", "loss_2": "0.01", "accuracy": "0.52504", "wps": "17451.9", "ups": "5.47", "wpb": "3193", "bsz": "46", "num_updates": "194000", "lr": "5.21519e-05", "gnorm": "0.793", "loss_scale": "2", "train_wall": "36", "gb_free": "13", "wall": "36554"} [2023-11-02 03:44:44,699][train_inner][INFO] - {"epoch": 48, "update": 47.883, "loss": "2.981", "ntokens": "3182.8", "nsentences": "44.28", "prob_perplexity": "181.842", "code_perplexity": "178.953", "temp": "0.758", "loss_0": "2.868", "loss_1": "0.103", "loss_2": "0.01", "accuracy": "0.5182", "wps": "17448.7", "ups": "5.48", "wpb": "3182.8", "bsz": "44.3", "num_updates": "194200", "lr": "5.21013e-05", "gnorm": "0.802", "loss_scale": "2", "train_wall": "36", "gb_free": "15.6", "wall": "36591"} [2023-11-02 03:45:21,214][train_inner][INFO] - {"epoch": 48, "update": 47.932, "loss": "2.957", "ntokens": "3195.88", "nsentences": "44.4", "prob_perplexity": "181.883", "code_perplexity": "179.064", "temp": "0.757", "loss_0": "2.845", "loss_1": "0.103", "loss_2": "0.009", "accuracy": "0.52154", "wps": "17505.3", "ups": "5.48", "wpb": "3195.9", "bsz": "44.4", "num_updates": "194400", "lr": "5.20506e-05", "gnorm": "0.801", "loss_scale": "2", "train_wall": "36", "gb_free": "14.6", "wall": "36627"} [2023-11-02 03:45:57,317][train_inner][INFO] - {"epoch": 48, "update": 47.982, "loss": "3.019", "ntokens": "3158.12", "nsentences": "42.12", "prob_perplexity": "181.846", "code_perplexity": "178.959", "temp": "0.756", "loss_0": "2.906", "loss_1": "0.103", "loss_2": "0.01", "accuracy": "0.50969", "wps": "17496", "ups": "5.54", "wpb": "3158.1", "bsz": "42.1", "num_updates": "194600", "lr": "5.2e-05", "gnorm": "0.802", "loss_scale": "2", "train_wall": "35", "gb_free": "13.4", "wall": "36664"} [2023-11-02 03:46:10,684][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 03:46:10,685][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 03:46:10,703][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 56 [2023-11-02 03:46:36,461][valid][INFO] - {"epoch": 48, "valid_loss": "2.819", "valid_ntokens": "3161.83", "valid_nsentences": "44.1685", "valid_prob_perplexity": "181.182", "valid_code_perplexity": "178.493", "valid_temp": "0.756", "valid_loss_0": "2.706", "valid_loss_1": "0.103", "valid_loss_2": "0.01", "valid_accuracy": "0.54969", "valid_wps": "55648.7", "valid_wpb": "3161.8", "valid_bsz": "44.2", "valid_num_updates": "194675", "valid_best_loss": "2.819"} [2023-11-02 03:46:36,463][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 48 @ 194675 updates [2023-11-02 03:46:36,465][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-02 03:46:37,888][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-02 03:46:38,870][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_best.pt (epoch 48 @ 194675 updates, score 2.819) (writing took 2.407023528125137 seconds) [2023-11-02 03:46:38,871][fairseq_cli.train][INFO] - end of epoch 48 (average epoch stats below) [2023-11-02 03:46:38,873][train][INFO] - {"epoch": 48, "train_loss": "2.968", "train_ntokens": "3184.02", "train_nsentences": "44.2682", "train_prob_perplexity": "181.648", "train_code_perplexity": "178.75", "train_temp": "0.763", "train_loss_0": "2.855", "train_loss_1": "0.103", "train_loss_2": "0.01", "train_accuracy": "0.51961", "train_wps": "16942", "train_ups": "5.32", "train_wpb": "3184", "train_bsz": "44.3", "train_num_updates": "194675", "train_lr": "5.1981e-05", "train_gnorm": "0.802", "train_loss_scale": "2", "train_train_wall": "721", "train_gb_free": "14.8", "train_wall": "36705"} [2023-11-02 03:46:38,876][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 03:46:38,896][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 49 [2023-11-02 03:46:39,092][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 03:46:39,135][fairseq.trainer][INFO] - begin training epoch 49 [2023-11-02 03:46:39,136][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 03:47:01,614][train_inner][INFO] - {"epoch": 49, "update": 48.031, "loss": "2.96", "ntokens": "3173", "nsentences": "45.24", "prob_perplexity": "182.867", "code_perplexity": "179.989", "temp": "0.756", "loss_0": "2.847", "loss_1": "0.103", "loss_2": "0.01", "accuracy": "0.52285", "wps": "9870.2", "ups": "3.11", "wpb": "3173", "bsz": "45.2", "num_updates": "194800", "lr": "5.19494e-05", "gnorm": "0.799", "loss_scale": "2", "train_wall": "35", "gb_free": "14.9", "wall": "36728"} [2023-11-02 03:47:37,376][train_inner][INFO] - {"epoch": 49, "update": 48.08, "loss": "2.949", "ntokens": "3194", "nsentences": "44.2", "prob_perplexity": "182.528", "code_perplexity": "179.73", "temp": "0.755", "loss_0": "2.837", "loss_1": "0.103", "loss_2": "0.009", "accuracy": "0.5217", "wps": "17871.9", "ups": "5.6", "wpb": "3194", "bsz": "44.2", "num_updates": "195000", "lr": "5.18987e-05", "gnorm": "0.802", "loss_scale": "2", "train_wall": "35", "gb_free": "12.7", "wall": "36764"} [2023-11-02 03:48:13,755][train_inner][INFO] - {"epoch": 49, "update": 48.129, "loss": "2.903", "ntokens": "3167.36", "nsentences": "46.36", "prob_perplexity": "182.874", "code_perplexity": "180.034", "temp": "0.754", "loss_0": "2.79", "loss_1": "0.103", "loss_2": "0.009", "accuracy": "0.53213", "wps": "17414.5", "ups": "5.5", "wpb": "3167.4", "bsz": "46.4", "num_updates": "195200", "lr": "5.18481e-05", "gnorm": "0.795", "loss_scale": "2", "train_wall": "36", "gb_free": "13.8", "wall": "36800"} [2023-11-02 03:48:49,328][train_inner][INFO] - {"epoch": 49, "update": 48.179, "loss": "2.895", "ntokens": "3144.76", "nsentences": "46.24", "prob_perplexity": "182.243", "code_perplexity": "179.391", "temp": "0.753", "loss_0": "2.782", "loss_1": "0.103", "loss_2": "0.009", "accuracy": "0.53327", "wps": "17681.4", "ups": "5.62", "wpb": "3144.8", "bsz": "46.2", "num_updates": "195400", "lr": "5.17975e-05", "gnorm": "0.812", "loss_scale": "2", "train_wall": "35", "gb_free": "13.3", "wall": "36836"} [2023-11-02 03:49:25,595][train_inner][INFO] - {"epoch": 49, "update": 48.228, "loss": "2.969", "ntokens": "3202.84", "nsentences": "44.16", "prob_perplexity": "183.482", "code_perplexity": "180.731", "temp": "0.753", "loss_0": "2.856", "loss_1": "0.103", "loss_2": "0.009", "accuracy": "0.51995", "wps": "17664.1", "ups": "5.52", "wpb": "3202.8", "bsz": "44.2", "num_updates": "195600", "lr": "5.17468e-05", "gnorm": "0.791", "loss_scale": "4", "train_wall": "36", "gb_free": "14.5", "wall": "36872"} [2023-11-02 03:50:01,624][train_inner][INFO] - {"epoch": 49, "update": 48.277, "loss": "2.998", "ntokens": "3185.8", "nsentences": "42.2", "prob_perplexity": "182.183", "code_perplexity": "179.363", "temp": "0.752", "loss_0": "2.885", "loss_1": "0.103", "loss_2": "0.01", "accuracy": "0.513", "wps": "17685.7", "ups": "5.55", "wpb": "3185.8", "bsz": "42.2", "num_updates": "195800", "lr": "5.16962e-05", "gnorm": "0.797", "loss_scale": "4", "train_wall": "35", "gb_free": "13.7", "wall": "36908"} [2023-11-02 03:50:37,496][train_inner][INFO] - {"epoch": 49, "update": 48.327, "loss": "2.964", "ntokens": "3206.68", "nsentences": "43.92", "prob_perplexity": "182.429", "code_perplexity": "179.65", "temp": "0.751", "loss_0": "2.852", "loss_1": "0.103", "loss_2": "0.01", "accuracy": "0.51946", "wps": "17879.6", "ups": "5.58", "wpb": "3206.7", "bsz": "43.9", "num_updates": "196000", "lr": "5.16456e-05", "gnorm": "0.798", "loss_scale": "4", "train_wall": "35", "gb_free": "13.8", "wall": "36944"} [2023-11-02 03:51:14,052][train_inner][INFO] - {"epoch": 49, "update": 48.376, "loss": "3.007", "ntokens": "3246.92", "nsentences": "43.04", "prob_perplexity": "183.322", "code_perplexity": "180.534", "temp": "0.75", "loss_0": "2.895", "loss_1": "0.103", "loss_2": "0.009", "accuracy": "0.51146", "wps": "17765.1", "ups": "5.47", "wpb": "3246.9", "bsz": "43", "num_updates": "196200", "lr": "5.15949e-05", "gnorm": "0.797", "loss_scale": "4", "train_wall": "36", "gb_free": "13", "wall": "36980"} [2023-11-02 03:51:50,450][train_inner][INFO] - {"epoch": 49, "update": 48.425, "loss": "2.899", "ntokens": "3160.64", "nsentences": "46.16", "prob_perplexity": "183.056", "code_perplexity": "180.241", "temp": "0.749", "loss_0": "2.786", "loss_1": "0.103", "loss_2": "0.009", "accuracy": "0.533", "wps": "17368.3", "ups": "5.5", "wpb": "3160.6", "bsz": "46.2", "num_updates": "196400", "lr": "5.15443e-05", "gnorm": "0.8", "loss_scale": "4", "train_wall": "36", "gb_free": "13.5", "wall": "37017"} [2023-11-02 03:52:27,140][train_inner][INFO] - {"epoch": 49, "update": 48.475, "loss": "3", "ntokens": "3178.96", "nsentences": "43.16", "prob_perplexity": "182.353", "code_perplexity": "179.522", "temp": "0.749", "loss_0": "2.888", "loss_1": "0.103", "loss_2": "0.009", "accuracy": "0.51403", "wps": "17329.6", "ups": "5.45", "wpb": "3179", "bsz": "43.2", "num_updates": "196600", "lr": "5.14937e-05", "gnorm": "0.8", "loss_scale": "4", "train_wall": "36", "gb_free": "13.6", "wall": "37053"} [2023-11-02 03:53:03,784][train_inner][INFO] - {"epoch": 49, "update": 48.524, "loss": "2.959", "ntokens": "3206.56", "nsentences": "46.56", "prob_perplexity": "183.109", "code_perplexity": "180.271", "temp": "0.748", "loss_0": "2.847", "loss_1": "0.103", "loss_2": "0.01", "accuracy": "0.52336", "wps": "17502.5", "ups": "5.46", "wpb": "3206.6", "bsz": "46.6", "num_updates": "196800", "lr": "5.1443e-05", "gnorm": "0.797", "loss_scale": "4", "train_wall": "36", "gb_free": "14.2", "wall": "37090"} [2023-11-02 03:53:40,511][train_inner][INFO] - {"epoch": 49, "update": 48.573, "loss": "2.982", "ntokens": "3186.24", "nsentences": "42.88", "prob_perplexity": "183.104", "code_perplexity": "180.162", "temp": "0.747", "loss_0": "2.869", "loss_1": "0.103", "loss_2": "0.01", "accuracy": "0.51537", "wps": "17351.8", "ups": "5.45", "wpb": "3186.2", "bsz": "42.9", "num_updates": "197000", "lr": "5.13924e-05", "gnorm": "0.806", "loss_scale": "4", "train_wall": "36", "gb_free": "14.4", "wall": "37127"} [2023-11-02 03:54:17,654][train_inner][INFO] - {"epoch": 49, "update": 48.623, "loss": "2.962", "ntokens": "3231.28", "nsentences": "44.2", "prob_perplexity": "184.067", "code_perplexity": "181.196", "temp": "0.747", "loss_0": "2.849", "loss_1": "0.103", "loss_2": "0.01", "accuracy": "0.51958", "wps": "17400.1", "ups": "5.38", "wpb": "3231.3", "bsz": "44.2", "num_updates": "197200", "lr": "5.13418e-05", "gnorm": "0.797", "loss_scale": "4", "train_wall": "36", "gb_free": "13.8", "wall": "37164"} [2023-11-02 03:54:53,899][train_inner][INFO] - {"epoch": 49, "update": 48.672, "loss": "3.022", "ntokens": "3216.48", "nsentences": "43.64", "prob_perplexity": "184.203", "code_perplexity": "181.303", "temp": "0.746", "loss_0": "2.91", "loss_1": "0.103", "loss_2": "0.009", "accuracy": "0.51094", "wps": "17749.6", "ups": "5.52", "wpb": "3216.5", "bsz": "43.6", "num_updates": "197400", "lr": "5.12911e-05", "gnorm": "0.792", "loss_scale": "4", "train_wall": "36", "gb_free": "13.3", "wall": "37200"} [2023-11-02 03:55:30,683][train_inner][INFO] - {"epoch": 49, "update": 48.721, "loss": "2.968", "ntokens": "3195.92", "nsentences": "44.12", "prob_perplexity": "183.594", "code_perplexity": "180.713", "temp": "0.745", "loss_0": "2.856", "loss_1": "0.103", "loss_2": "0.009", "accuracy": "0.51964", "wps": "17377.7", "ups": "5.44", "wpb": "3195.9", "bsz": "44.1", "num_updates": "197600", "lr": "5.12405e-05", "gnorm": "0.803", "loss_scale": "4", "train_wall": "36", "gb_free": "13.5", "wall": "37237"} [2023-11-02 03:56:01,683][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 [2023-11-02 03:56:07,269][train_inner][INFO] - {"epoch": 49, "update": 48.771, "loss": "2.984", "ntokens": "3173.44", "nsentences": "42.44", "prob_perplexity": "183.346", "code_perplexity": "180.432", "temp": "0.744", "loss_0": "2.871", "loss_1": "0.103", "loss_2": "0.009", "accuracy": "0.51533", "wps": "17348.8", "ups": "5.47", "wpb": "3173.4", "bsz": "42.4", "num_updates": "197800", "lr": "5.11899e-05", "gnorm": "0.823", "loss_scale": "2", "train_wall": "36", "gb_free": "12.9", "wall": "37274"} [2023-11-02 03:56:43,667][train_inner][INFO] - {"epoch": 49, "update": 48.82, "loss": "2.95", "ntokens": "3177.56", "nsentences": "43.64", "prob_perplexity": "184.2", "code_perplexity": "181.293", "temp": "0.744", "loss_0": "2.838", "loss_1": "0.103", "loss_2": "0.009", "accuracy": "0.52154", "wps": "17461.3", "ups": "5.5", "wpb": "3177.6", "bsz": "43.6", "num_updates": "198000", "lr": "5.11392e-05", "gnorm": "0.8", "loss_scale": "2", "train_wall": "36", "gb_free": "13.6", "wall": "37310"} [2023-11-02 03:57:19,863][train_inner][INFO] - {"epoch": 49, "update": 48.869, "loss": "2.993", "ntokens": "3184.52", "nsentences": "43.24", "prob_perplexity": "183.972", "code_perplexity": "181.148", "temp": "0.743", "loss_0": "2.881", "loss_1": "0.103", "loss_2": "0.009", "accuracy": "0.51449", "wps": "17597.1", "ups": "5.53", "wpb": "3184.5", "bsz": "43.2", "num_updates": "198200", "lr": "5.10886e-05", "gnorm": "0.803", "loss_scale": "2", "train_wall": "36", "gb_free": "12.6", "wall": "37346"} [2023-11-02 03:57:56,148][train_inner][INFO] - {"epoch": 49, "update": 48.919, "loss": "2.904", "ntokens": "3225.32", "nsentences": "46.48", "prob_perplexity": "184.004", "code_perplexity": "181.264", "temp": "0.742", "loss_0": "2.792", "loss_1": "0.103", "loss_2": "0.009", "accuracy": "0.53163", "wps": "17778.7", "ups": "5.51", "wpb": "3225.3", "bsz": "46.5", "num_updates": "198400", "lr": "5.1038e-05", "gnorm": "0.789", "loss_scale": "2", "train_wall": "36", "gb_free": "13.4", "wall": "37382"} [2023-11-02 03:58:32,494][train_inner][INFO] - {"epoch": 49, "update": 48.968, "loss": "2.95", "ntokens": "3186", "nsentences": "44.64", "prob_perplexity": "184.326", "code_perplexity": "181.49", "temp": "0.741", "loss_0": "2.838", "loss_1": "0.103", "loss_2": "0.009", "accuracy": "0.52305", "wps": "17532.6", "ups": "5.5", "wpb": "3186", "bsz": "44.6", "num_updates": "198600", "lr": "5.09873e-05", "gnorm": "0.804", "loss_scale": "2", "train_wall": "36", "gb_free": "13.7", "wall": "37419"} [2023-11-02 03:58:56,457][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 03:58:56,458][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 03:58:56,477][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 57 [2023-11-02 03:59:22,074][valid][INFO] - {"epoch": 49, "valid_loss": "2.809", "valid_ntokens": "3163.33", "valid_nsentences": "44.1685", "valid_prob_perplexity": "184.16", "valid_code_perplexity": "181.659", "valid_temp": "0.74", "valid_loss_0": "2.697", "valid_loss_1": "0.103", "valid_loss_2": "0.009", "valid_accuracy": "0.55085", "valid_wps": "56064.5", "valid_wpb": "3163.3", "valid_bsz": "44.2", "valid_num_updates": "198730", "valid_best_loss": "2.809"} [2023-11-02 03:59:22,076][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 49 @ 198730 updates [2023-11-02 03:59:22,078][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-02 03:59:23,520][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-02 03:59:24,503][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_best.pt (epoch 49 @ 198730 updates, score 2.809) (writing took 2.4271366391330957 seconds) [2023-11-02 03:59:24,504][fairseq_cli.train][INFO] - end of epoch 49 (average epoch stats below) [2023-11-02 03:59:24,505][train][INFO] - {"epoch": 49, "train_loss": "2.963", "train_ntokens": "3193.17", "train_nsentences": "44.2575", "train_prob_perplexity": "183.292", "train_code_perplexity": "180.455", "train_temp": "0.748", "train_loss_0": "2.851", "train_loss_1": "0.103", "train_loss_2": "0.009", "train_accuracy": "0.52032", "train_wps": "16912", "train_ups": "5.3", "train_wpb": "3193.2", "train_bsz": "44.3", "train_num_updates": "198730", "train_lr": "5.09544e-05", "train_gnorm": "0.801", "train_loss_scale": "2", "train_train_wall": "724", "train_gb_free": "13.3", "train_wall": "37471"} [2023-11-02 03:59:24,508][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 03:59:24,526][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 50 [2023-11-02 03:59:24,697][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 03:59:24,743][fairseq.trainer][INFO] - begin training epoch 50 [2023-11-02 03:59:24,744][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 03:59:37,366][train_inner][INFO] - {"epoch": 50, "update": 49.017, "loss": "2.959", "ntokens": "3174.84", "nsentences": "43.08", "prob_perplexity": "184.211", "code_perplexity": "181.385", "temp": "0.741", "loss_0": "2.847", "loss_1": "0.103", "loss_2": "0.009", "accuracy": "0.5198", "wps": "9788.3", "ups": "3.08", "wpb": "3174.8", "bsz": "43.1", "num_updates": "198800", "lr": "5.09367e-05", "gnorm": "0.801", "loss_scale": "2", "train_wall": "36", "gb_free": "13.8", "wall": "37484"} [2023-11-02 04:00:12,854][train_inner][INFO] - {"epoch": 50, "update": 49.067, "loss": "2.903", "ntokens": "3212.92", "nsentences": "46.84", "prob_perplexity": "183.775", "code_perplexity": "181.002", "temp": "0.74", "loss_0": "2.791", "loss_1": "0.103", "loss_2": "0.009", "accuracy": "0.53328", "wps": "18108.1", "ups": "5.64", "wpb": "3212.9", "bsz": "46.8", "num_updates": "199000", "lr": "5.08861e-05", "gnorm": "0.799", "loss_scale": "2", "train_wall": "35", "gb_free": "12.5", "wall": "37519"} [2023-11-02 04:00:48,257][train_inner][INFO] - {"epoch": 50, "update": 49.116, "loss": "2.967", "ntokens": "3118.24", "nsentences": "43.72", "prob_perplexity": "183.707", "code_perplexity": "180.874", "temp": "0.739", "loss_0": "2.855", "loss_1": "0.103", "loss_2": "0.009", "accuracy": "0.52029", "wps": "17616.7", "ups": "5.65", "wpb": "3118.2", "bsz": "43.7", "num_updates": "199200", "lr": "5.08354e-05", "gnorm": "0.813", "loss_scale": "2", "train_wall": "35", "gb_free": "14.1", "wall": "37554"} [2023-11-02 04:01:23,818][train_inner][INFO] - {"epoch": 50, "update": 49.165, "loss": "2.956", "ntokens": "3183.24", "nsentences": "43.6", "prob_perplexity": "184.07", "code_perplexity": "181.217", "temp": "0.738", "loss_0": "2.844", "loss_1": "0.103", "loss_2": "0.009", "accuracy": "0.52106", "wps": "17904", "ups": "5.62", "wpb": "3183.2", "bsz": "43.6", "num_updates": "199400", "lr": "5.07848e-05", "gnorm": "0.806", "loss_scale": "2", "train_wall": "35", "gb_free": "14.3", "wall": "37590"} [2023-11-02 04:01:59,477][train_inner][INFO] - {"epoch": 50, "update": 49.214, "loss": "2.946", "ntokens": "3209.44", "nsentences": "45.76", "prob_perplexity": "184.906", "code_perplexity": "182.126", "temp": "0.738", "loss_0": "2.834", "loss_1": "0.103", "loss_2": "0.009", "accuracy": "0.52386", "wps": "18001.9", "ups": "5.61", "wpb": "3209.4", "bsz": "45.8", "num_updates": "199600", "lr": "5.07342e-05", "gnorm": "0.797", "loss_scale": "2", "train_wall": "35", "gb_free": "13.2", "wall": "37626"} [2023-11-02 04:02:35,347][train_inner][INFO] - {"epoch": 50, "update": 49.264, "loss": "3.027", "ntokens": "3201.92", "nsentences": "43.16", "prob_perplexity": "184.065", "code_perplexity": "181.203", "temp": "0.737", "loss_0": "2.915", "loss_1": "0.103", "loss_2": "0.009", "accuracy": "0.50896", "wps": "17854.3", "ups": "5.58", "wpb": "3201.9", "bsz": "43.2", "num_updates": "199800", "lr": "5.06835e-05", "gnorm": "0.807", "loss_scale": "2", "train_wall": "35", "gb_free": "13.2", "wall": "37662"} [2023-11-02 04:03:11,043][train_inner][INFO] - {"epoch": 50, "update": 49.313, "loss": "2.965", "ntokens": "3193", "nsentences": "45.16", "prob_perplexity": "184.656", "code_perplexity": "181.889", "temp": "0.736", "loss_0": "2.853", "loss_1": "0.103", "loss_2": "0.009", "accuracy": "0.52102", "wps": "17890.9", "ups": "5.6", "wpb": "3193", "bsz": "45.2", "num_updates": "200000", "lr": "5.06329e-05", "gnorm": "0.803", "loss_scale": "2", "train_wall": "35", "gb_free": "12.5", "wall": "37697"} [2023-11-02 04:03:11,044][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 04:03:11,046][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 04:03:11,065][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 58 [2023-11-02 04:03:36,784][valid][INFO] - {"epoch": 50, "valid_loss": "2.799", "valid_ntokens": "3157.8", "valid_nsentences": "44.1685", "valid_prob_perplexity": "184.222", "valid_code_perplexity": "181.689", "valid_temp": "0.736", "valid_loss_0": "2.688", "valid_loss_1": "0.103", "valid_loss_2": "0.009", "valid_accuracy": "0.55264", "valid_wps": "55704.3", "valid_wpb": "3157.8", "valid_bsz": "44.2", "valid_num_updates": "200000", "valid_best_loss": "2.799"} [2023-11-02 04:03:36,786][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 50 @ 200000 updates [2023-11-02 04:03:36,788][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_50_200000.pt [2023-11-02 04:03:38,136][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_50_200000.pt [2023-11-02 04:03:40,093][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_50_200000.pt (epoch 50 @ 200000 updates, score 2.799) (writing took 3.307486356701702 seconds) [2023-11-02 04:04:16,358][train_inner][INFO] - {"epoch": 50, "update": 49.362, "loss": "2.924", "ntokens": "3188.84", "nsentences": "46.28", "prob_perplexity": "185.218", "code_perplexity": "182.401", "temp": "0.735", "loss_0": "2.812", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.5289", "wps": "9764.8", "ups": "3.06", "wpb": "3188.8", "bsz": "46.3", "num_updates": "200200", "lr": "5.05823e-05", "gnorm": "0.802", "loss_scale": "2", "train_wall": "36", "gb_free": "13.2", "wall": "37763"} [2023-11-02 04:04:53,118][train_inner][INFO] - {"epoch": 50, "update": 49.412, "loss": "2.962", "ntokens": "3246.24", "nsentences": "43.52", "prob_perplexity": "185.079", "code_perplexity": "182.358", "temp": "0.735", "loss_0": "2.85", "loss_1": "0.103", "loss_2": "0.009", "accuracy": "0.51892", "wps": "17662.6", "ups": "5.44", "wpb": "3246.2", "bsz": "43.5", "num_updates": "200400", "lr": "5.05316e-05", "gnorm": "0.797", "loss_scale": "2", "train_wall": "36", "gb_free": "13.4", "wall": "37799"} [2023-11-02 04:05:28,997][train_inner][INFO] - {"epoch": 50, "update": 49.461, "loss": "2.918", "ntokens": "3180.76", "nsentences": "46.16", "prob_perplexity": "184.37", "code_perplexity": "181.54", "temp": "0.734", "loss_0": "2.806", "loss_1": "0.103", "loss_2": "0.009", "accuracy": "0.52977", "wps": "17731.8", "ups": "5.57", "wpb": "3180.8", "bsz": "46.2", "num_updates": "200600", "lr": "5.0481e-05", "gnorm": "0.805", "loss_scale": "2", "train_wall": "35", "gb_free": "13.3", "wall": "37835"} [2023-11-02 04:06:04,930][train_inner][INFO] - {"epoch": 50, "update": 49.51, "loss": "2.969", "ntokens": "3161.8", "nsentences": "44.16", "prob_perplexity": "184.272", "code_perplexity": "181.459", "temp": "0.733", "loss_0": "2.857", "loss_1": "0.103", "loss_2": "0.009", "accuracy": "0.52006", "wps": "17599.6", "ups": "5.57", "wpb": "3161.8", "bsz": "44.2", "num_updates": "200800", "lr": "5.04304e-05", "gnorm": "0.809", "loss_scale": "2", "train_wall": "35", "gb_free": "13.5", "wall": "37871"} [2023-11-02 04:06:41,595][train_inner][INFO] - {"epoch": 50, "update": 49.56, "loss": "2.93", "ntokens": "3143.88", "nsentences": "43.88", "prob_perplexity": "184.049", "code_perplexity": "181.164", "temp": "0.732", "loss_0": "2.818", "loss_1": "0.103", "loss_2": "0.009", "accuracy": "0.52574", "wps": "17150.3", "ups": "5.46", "wpb": "3143.9", "bsz": "43.9", "num_updates": "201000", "lr": "5.03797e-05", "gnorm": "0.809", "loss_scale": "2", "train_wall": "36", "gb_free": "14.2", "wall": "37908"} [2023-11-02 04:07:18,108][train_inner][INFO] - {"epoch": 50, "update": 49.609, "loss": "3.008", "ntokens": "3225.6", "nsentences": "42.32", "prob_perplexity": "184.879", "code_perplexity": "181.999", "temp": "0.732", "loss_0": "2.896", "loss_1": "0.103", "loss_2": "0.009", "accuracy": "0.50947", "wps": "17669.3", "ups": "5.48", "wpb": "3225.6", "bsz": "42.3", "num_updates": "201200", "lr": "5.03291e-05", "gnorm": "0.814", "loss_scale": "2", "train_wall": "36", "gb_free": "11.7", "wall": "37944"} [2023-11-02 04:07:54,633][train_inner][INFO] - {"epoch": 50, "update": 49.658, "loss": "2.985", "ntokens": "3211.12", "nsentences": "42.8", "prob_perplexity": "186.128", "code_perplexity": "183.308", "temp": "0.731", "loss_0": "2.874", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.51472", "wps": "17584", "ups": "5.48", "wpb": "3211.1", "bsz": "42.8", "num_updates": "201400", "lr": "5.02785e-05", "gnorm": "0.799", "loss_scale": "2", "train_wall": "36", "gb_free": "14.2", "wall": "37981"} [2023-11-02 04:08:31,766][train_inner][INFO] - {"epoch": 50, "update": 49.708, "loss": "2.985", "ntokens": "3222.28", "nsentences": "42.8", "prob_perplexity": "184.358", "code_perplexity": "181.596", "temp": "0.73", "loss_0": "2.873", "loss_1": "0.103", "loss_2": "0.009", "accuracy": "0.51505", "wps": "17356.5", "ups": "5.39", "wpb": "3222.3", "bsz": "42.8", "num_updates": "201600", "lr": "5.02278e-05", "gnorm": "0.808", "loss_scale": "2", "train_wall": "36", "gb_free": "13.4", "wall": "38018"} [2023-11-02 04:09:08,030][train_inner][INFO] - {"epoch": 50, "update": 49.757, "loss": "2.948", "ntokens": "3205.52", "nsentences": "45.24", "prob_perplexity": "185.529", "code_perplexity": "182.677", "temp": "0.73", "loss_0": "2.837", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.5229", "wps": "17679.8", "ups": "5.52", "wpb": "3205.5", "bsz": "45.2", "num_updates": "201800", "lr": "5.01772e-05", "gnorm": "0.797", "loss_scale": "2", "train_wall": "36", "gb_free": "14.1", "wall": "38054"} [2023-11-02 04:09:44,478][train_inner][INFO] - {"epoch": 50, "update": 49.806, "loss": "2.969", "ntokens": "3203.88", "nsentences": "42.56", "prob_perplexity": "185.175", "code_perplexity": "182.38", "temp": "0.729", "loss_0": "2.858", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.51718", "wps": "17582", "ups": "5.49", "wpb": "3203.9", "bsz": "42.6", "num_updates": "202000", "lr": "5.01266e-05", "gnorm": "0.801", "loss_scale": "2", "train_wall": "36", "gb_free": "13", "wall": "38091"} [2023-11-02 04:10:20,977][train_inner][INFO] - {"epoch": 50, "update": 49.856, "loss": "2.994", "ntokens": "3214.08", "nsentences": "43.28", "prob_perplexity": "185.373", "code_perplexity": "182.461", "temp": "0.728", "loss_0": "2.883", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.51393", "wps": "17612.6", "ups": "5.48", "wpb": "3214.1", "bsz": "43.3", "num_updates": "202200", "lr": "5.00759e-05", "gnorm": "0.799", "loss_scale": "2", "train_wall": "36", "gb_free": "13.7", "wall": "38127"} [2023-11-02 04:10:56,738][train_inner][INFO] - {"epoch": 50, "update": 49.905, "loss": "2.984", "ntokens": "3164.56", "nsentences": "43.76", "prob_perplexity": "184.599", "code_perplexity": "181.671", "temp": "0.727", "loss_0": "2.872", "loss_1": "0.103", "loss_2": "0.009", "accuracy": "0.51791", "wps": "17699.4", "ups": "5.59", "wpb": "3164.6", "bsz": "43.8", "num_updates": "202400", "lr": "5.00253e-05", "gnorm": "0.814", "loss_scale": "2", "train_wall": "35", "gb_free": "13.6", "wall": "38163"} [2023-11-02 04:11:32,656][train_inner][INFO] - {"epoch": 50, "update": 49.954, "loss": "2.954", "ntokens": "3191.64", "nsentences": "43.88", "prob_perplexity": "184.742", "code_perplexity": "181.862", "temp": "0.727", "loss_0": "2.842", "loss_1": "0.103", "loss_2": "0.009", "accuracy": "0.52124", "wps": "17773", "ups": "5.57", "wpb": "3191.6", "bsz": "43.9", "num_updates": "202600", "lr": "4.99747e-05", "gnorm": "0.805", "loss_scale": "2", "train_wall": "35", "gb_free": "14.5", "wall": "38199"} [2023-11-02 04:12:06,201][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 04:12:06,203][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 04:12:06,221][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 59 [2023-11-02 04:12:32,018][valid][INFO] - {"epoch": 50, "valid_loss": "2.801", "valid_ntokens": "3160.25", "valid_nsentences": "44.1685", "valid_prob_perplexity": "183.152", "valid_code_perplexity": "180.626", "valid_temp": "0.726", "valid_loss_0": "2.689", "valid_loss_1": "0.103", "valid_loss_2": "0.009", "valid_accuracy": "0.55322", "valid_wps": "55540.5", "valid_wpb": "3160.2", "valid_bsz": "44.2", "valid_num_updates": "202786", "valid_best_loss": "2.799"} [2023-11-02 04:12:32,020][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 50 @ 202786 updates [2023-11-02 04:12:32,022][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 04:12:33,524][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 04:12:33,575][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_last.pt (epoch 50 @ 202786 updates, score 2.801) (writing took 1.5545232468284667 seconds) [2023-11-02 04:12:33,576][fairseq_cli.train][INFO] - end of epoch 50 (average epoch stats below) [2023-11-02 04:12:33,578][train][INFO] - {"epoch": 50, "train_loss": "2.958", "train_ntokens": "3193.18", "train_nsentences": "44.2682", "train_prob_perplexity": "184.679", "train_code_perplexity": "181.854", "train_temp": "0.733", "train_loss_0": "2.847", "train_loss_1": "0.103", "train_loss_2": "0.009", "train_accuracy": "0.52109", "train_wps": "16413.7", "train_ups": "5.14", "train_wpb": "3193.2", "train_bsz": "44.3", "train_num_updates": "202786", "train_lr": "4.99276e-05", "train_gnorm": "0.804", "train_loss_scale": "2", "train_train_wall": "719", "train_gb_free": "12.8", "train_wall": "38260"} [2023-11-02 04:12:33,581][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 04:12:33,600][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 51 [2023-11-02 04:12:33,775][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 04:12:33,820][fairseq.trainer][INFO] - begin training epoch 51 [2023-11-02 04:12:33,821][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 04:12:36,421][train_inner][INFO] - {"epoch": 51, "update": 50.003, "loss": "2.932", "ntokens": "3205.2", "nsentences": "45.28", "prob_perplexity": "184.312", "code_perplexity": "181.586", "temp": "0.726", "loss_0": "2.821", "loss_1": "0.103", "loss_2": "0.009", "accuracy": "0.5269", "wps": "10053.5", "ups": "3.14", "wpb": "3205.2", "bsz": "45.3", "num_updates": "202800", "lr": "4.99241e-05", "gnorm": "0.807", "loss_scale": "2", "train_wall": "35", "gb_free": "13.1", "wall": "38263"} [2023-11-02 04:13:11,791][train_inner][INFO] - {"epoch": 51, "update": 50.053, "loss": "3.022", "ntokens": "3214.24", "nsentences": "42.8", "prob_perplexity": "185.31", "code_perplexity": "182.541", "temp": "0.725", "loss_0": "2.911", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.50963", "wps": "18175.9", "ups": "5.65", "wpb": "3214.2", "bsz": "42.8", "num_updates": "203000", "lr": "4.98734e-05", "gnorm": "0.81", "loss_scale": "2", "train_wall": "35", "gb_free": "13.4", "wall": "38298"} [2023-11-02 04:13:47,906][train_inner][INFO] - {"epoch": 51, "update": 50.102, "loss": "2.943", "ntokens": "3174.76", "nsentences": "43.92", "prob_perplexity": "185.703", "code_perplexity": "182.813", "temp": "0.724", "loss_0": "2.832", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.52343", "wps": "17583", "ups": "5.54", "wpb": "3174.8", "bsz": "43.9", "num_updates": "203200", "lr": "4.98228e-05", "gnorm": "0.812", "loss_scale": "2", "train_wall": "35", "gb_free": "13.1", "wall": "38334"} [2023-11-02 04:14:23,526][train_inner][INFO] - {"epoch": 51, "update": 50.151, "loss": "2.956", "ntokens": "3178.88", "nsentences": "44.2", "prob_perplexity": "185.617", "code_perplexity": "182.835", "temp": "0.724", "loss_0": "2.845", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.52174", "wps": "17858.1", "ups": "5.62", "wpb": "3178.9", "bsz": "44.2", "num_updates": "203400", "lr": "4.97722e-05", "gnorm": "0.805", "loss_scale": "2", "train_wall": "35", "gb_free": "13.5", "wall": "38370"} [2023-11-02 04:14:59,491][train_inner][INFO] - {"epoch": 51, "update": 50.201, "loss": "2.967", "ntokens": "3206.04", "nsentences": "43.04", "prob_perplexity": "185.715", "code_perplexity": "182.841", "temp": "0.723", "loss_0": "2.855", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.51786", "wps": "17830", "ups": "5.56", "wpb": "3206", "bsz": "43", "num_updates": "203600", "lr": "4.97215e-05", "gnorm": "0.811", "loss_scale": "2", "train_wall": "35", "gb_free": "13.5", "wall": "38406"} [2023-11-02 04:15:35,379][train_inner][INFO] - {"epoch": 51, "update": 50.25, "loss": "2.97", "ntokens": "3196.92", "nsentences": "43.12", "prob_perplexity": "185.204", "code_perplexity": "182.363", "temp": "0.722", "loss_0": "2.859", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.51739", "wps": "17817.1", "ups": "5.57", "wpb": "3196.9", "bsz": "43.1", "num_updates": "203800", "lr": "4.96709e-05", "gnorm": "0.825", "loss_scale": "2", "train_wall": "35", "gb_free": "13.7", "wall": "38442"} [2023-11-02 04:16:11,950][train_inner][INFO] - {"epoch": 51, "update": 50.299, "loss": "2.935", "ntokens": "3185.8", "nsentences": "45.52", "prob_perplexity": "185.417", "code_perplexity": "182.555", "temp": "0.722", "loss_0": "2.823", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.52605", "wps": "17423.7", "ups": "5.47", "wpb": "3185.8", "bsz": "45.5", "num_updates": "204000", "lr": "4.96203e-05", "gnorm": "0.798", "loss_scale": "2", "train_wall": "36", "gb_free": "14", "wall": "38478"} [2023-11-02 04:16:47,844][train_inner][INFO] - {"epoch": 51, "update": 50.349, "loss": "3.021", "ntokens": "3238.28", "nsentences": "43", "prob_perplexity": "185.899", "code_perplexity": "183.088", "temp": "0.721", "loss_0": "2.91", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.50864", "wps": "18044.7", "ups": "5.57", "wpb": "3238.3", "bsz": "43", "num_updates": "204200", "lr": "4.95696e-05", "gnorm": "0.8", "loss_scale": "2", "train_wall": "35", "gb_free": "12.9", "wall": "38514"} [2023-11-02 04:17:24,528][train_inner][INFO] - {"epoch": 51, "update": 50.398, "loss": "2.924", "ntokens": "3170.4", "nsentences": "45.52", "prob_perplexity": "185.864", "code_perplexity": "183.01", "temp": "0.72", "loss_0": "2.812", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.5286", "wps": "17285.7", "ups": "5.45", "wpb": "3170.4", "bsz": "45.5", "num_updates": "204400", "lr": "4.9519e-05", "gnorm": "0.815", "loss_scale": "2", "train_wall": "36", "gb_free": "14.3", "wall": "38551"} [2023-11-02 04:18:01,277][train_inner][INFO] - {"epoch": 51, "update": 50.447, "loss": "2.92", "ntokens": "3187.2", "nsentences": "43.84", "prob_perplexity": "186.068", "code_perplexity": "183.305", "temp": "0.719", "loss_0": "2.809", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.52769", "wps": "17346.9", "ups": "5.44", "wpb": "3187.2", "bsz": "43.8", "num_updates": "204600", "lr": "4.94684e-05", "gnorm": "0.804", "loss_scale": "2", "train_wall": "36", "gb_free": "14", "wall": "38588"} [2023-11-02 04:18:37,205][train_inner][INFO] - {"epoch": 51, "update": 50.497, "loss": "2.957", "ntokens": "3224.72", "nsentences": "45.52", "prob_perplexity": "185.869", "code_perplexity": "183.005", "temp": "0.719", "loss_0": "2.846", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.52162", "wps": "17952.4", "ups": "5.57", "wpb": "3224.7", "bsz": "45.5", "num_updates": "204800", "lr": "4.94177e-05", "gnorm": "0.8", "loss_scale": "2", "train_wall": "35", "gb_free": "14.2", "wall": "38623"} [2023-11-02 04:19:13,560][train_inner][INFO] - {"epoch": 51, "update": 50.546, "loss": "2.955", "ntokens": "3180.92", "nsentences": "42.96", "prob_perplexity": "185.55", "code_perplexity": "182.724", "temp": "0.718", "loss_0": "2.844", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.52068", "wps": "17500", "ups": "5.5", "wpb": "3180.9", "bsz": "43", "num_updates": "205000", "lr": "4.93671e-05", "gnorm": "0.811", "loss_scale": "2", "train_wall": "36", "gb_free": "13.7", "wall": "38660"} [2023-11-02 04:19:49,979][train_inner][INFO] - {"epoch": 51, "update": 50.595, "loss": "2.958", "ntokens": "3186.6", "nsentences": "43.04", "prob_perplexity": "185.688", "code_perplexity": "182.812", "temp": "0.717", "loss_0": "2.846", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.51979", "wps": "17501", "ups": "5.49", "wpb": "3186.6", "bsz": "43", "num_updates": "205200", "lr": "4.93165e-05", "gnorm": "0.819", "loss_scale": "2", "train_wall": "36", "gb_free": "14", "wall": "38696"} [2023-11-02 04:20:26,573][train_inner][INFO] - {"epoch": 51, "update": 50.644, "loss": "2.947", "ntokens": "3240.04", "nsentences": "45.48", "prob_perplexity": "187.32", "code_perplexity": "184.451", "temp": "0.717", "loss_0": "2.836", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.52391", "wps": "17708.8", "ups": "5.47", "wpb": "3240", "bsz": "45.5", "num_updates": "205400", "lr": "4.92658e-05", "gnorm": "0.799", "loss_scale": "2", "train_wall": "36", "gb_free": "13.8", "wall": "38733"} [2023-11-02 04:21:03,154][train_inner][INFO] - {"epoch": 51, "update": 50.694, "loss": "2.915", "ntokens": "3183.72", "nsentences": "44.36", "prob_perplexity": "186.001", "code_perplexity": "183.143", "temp": "0.716", "loss_0": "2.804", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.52825", "wps": "17407.3", "ups": "5.47", "wpb": "3183.7", "bsz": "44.4", "num_updates": "205600", "lr": "4.92152e-05", "gnorm": "0.808", "loss_scale": "2", "train_wall": "36", "gb_free": "14.5", "wall": "38769"} [2023-11-02 04:21:38,814][train_inner][INFO] - {"epoch": 51, "update": 50.743, "loss": "2.888", "ntokens": "3160.48", "nsentences": "45.36", "prob_perplexity": "185.788", "code_perplexity": "182.985", "temp": "0.715", "loss_0": "2.777", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.53364", "wps": "17726.6", "ups": "5.61", "wpb": "3160.5", "bsz": "45.4", "num_updates": "205800", "lr": "4.91646e-05", "gnorm": "0.806", "loss_scale": "2", "train_wall": "35", "gb_free": "13.3", "wall": "38805"} [2023-11-02 04:22:14,683][train_inner][INFO] - {"epoch": 51, "update": 50.792, "loss": "2.927", "ntokens": "3215.48", "nsentences": "45.28", "prob_perplexity": "186.763", "code_perplexity": "183.954", "temp": "0.714", "loss_0": "2.816", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.52729", "wps": "17930", "ups": "5.58", "wpb": "3215.5", "bsz": "45.3", "num_updates": "206000", "lr": "4.91139e-05", "gnorm": "0.802", "loss_scale": "2", "train_wall": "35", "gb_free": "13.6", "wall": "38841"} [2023-11-02 04:22:50,705][train_inner][INFO] - {"epoch": 51, "update": 50.842, "loss": "2.968", "ntokens": "3179.4", "nsentences": "43.36", "prob_perplexity": "186.666", "code_perplexity": "183.855", "temp": "0.714", "loss_0": "2.857", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.51869", "wps": "17654.1", "ups": "5.55", "wpb": "3179.4", "bsz": "43.4", "num_updates": "206200", "lr": "4.90633e-05", "gnorm": "0.804", "loss_scale": "2", "train_wall": "35", "gb_free": "13.2", "wall": "38877"} [2023-11-02 04:23:26,984][train_inner][INFO] - {"epoch": 51, "update": 50.891, "loss": "2.944", "ntokens": "3219.4", "nsentences": "44.24", "prob_perplexity": "186.245", "code_perplexity": "183.452", "temp": "0.713", "loss_0": "2.833", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.52284", "wps": "17748.9", "ups": "5.51", "wpb": "3219.4", "bsz": "44.2", "num_updates": "206400", "lr": "4.90127e-05", "gnorm": "0.813", "loss_scale": "2", "train_wall": "36", "gb_free": "14.5", "wall": "38913"} [2023-11-02 04:24:02,928][train_inner][INFO] - {"epoch": 51, "update": 50.94, "loss": "2.932", "ntokens": "3176.24", "nsentences": "44.76", "prob_perplexity": "185.783", "code_perplexity": "182.988", "temp": "0.712", "loss_0": "2.821", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.52641", "wps": "17674.4", "ups": "5.56", "wpb": "3176.2", "bsz": "44.8", "num_updates": "206600", "lr": "4.8962e-05", "gnorm": "0.804", "loss_scale": "2", "train_wall": "35", "gb_free": "13.1", "wall": "38949"} [2023-11-02 04:24:39,246][train_inner][INFO] - {"epoch": 51, "update": 50.99, "loss": "2.94", "ntokens": "3209.4", "nsentences": "45.32", "prob_perplexity": "187.698", "code_perplexity": "184.837", "temp": "0.712", "loss_0": "2.829", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.525", "wps": "17674.8", "ups": "5.51", "wpb": "3209.4", "bsz": "45.3", "num_updates": "206800", "lr": "4.89114e-05", "gnorm": "0.806", "loss_scale": "2", "train_wall": "36", "gb_free": "14.7", "wall": "38985"} [2023-11-02 04:24:46,628][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 04:24:46,629][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 04:24:46,647][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 60 [2023-11-02 04:25:12,134][valid][INFO] - {"epoch": 51, "valid_loss": "2.804", "valid_ntokens": "3178.61", "valid_nsentences": "44.1685", "valid_prob_perplexity": "185.345", "valid_code_perplexity": "182.759", "valid_temp": "0.711", "valid_loss_0": "2.693", "valid_loss_1": "0.102", "valid_loss_2": "0.009", "valid_accuracy": "0.55134", "valid_wps": "56558.2", "valid_wpb": "3178.6", "valid_bsz": "44.2", "valid_num_updates": "206842", "valid_best_loss": "2.799"} [2023-11-02 04:25:12,136][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 51 @ 206842 updates [2023-11-02 04:25:12,137][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 04:25:13,551][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 04:25:13,607][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_last.pt (epoch 51 @ 206842 updates, score 2.804) (writing took 1.4708828520961106 seconds) [2023-11-02 04:25:13,607][fairseq_cli.train][INFO] - end of epoch 51 (average epoch stats below) [2023-11-02 04:25:13,609][train][INFO] - {"epoch": 51, "train_loss": "2.949", "train_ntokens": "3196.02", "train_nsentences": "44.2682", "train_prob_perplexity": "186.003", "train_code_perplexity": "183.174", "train_temp": "0.718", "train_loss_0": "2.838", "train_loss_1": "0.102", "train_loss_2": "0.009", "train_accuracy": "0.52251", "train_wps": "17056", "train_ups": "5.34", "train_wpb": "3196", "train_bsz": "44.3", "train_num_updates": "206842", "train_lr": "4.89008e-05", "train_gnorm": "0.808", "train_loss_scale": "2", "train_train_wall": "720", "train_gb_free": "13.1", "train_wall": "39020"} [2023-11-02 04:25:13,612][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 04:25:13,631][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 52 [2023-11-02 04:25:13,802][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 04:25:13,848][fairseq.trainer][INFO] - begin training epoch 52 [2023-11-02 04:25:13,849][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 04:25:41,970][train_inner][INFO] - {"epoch": 52, "update": 51.039, "loss": "2.966", "ntokens": "3184.04", "nsentences": "43.44", "prob_perplexity": "185.988", "code_perplexity": "183.227", "temp": "0.711", "loss_0": "2.855", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.51886", "wps": "10152.9", "ups": "3.19", "wpb": "3184", "bsz": "43.4", "num_updates": "207000", "lr": "4.88608e-05", "gnorm": "0.813", "loss_scale": "2", "train_wall": "35", "gb_free": "14.9", "wall": "39048"} [2023-11-02 04:26:18,329][train_inner][INFO] - {"epoch": 52, "update": 51.088, "loss": "2.902", "ntokens": "3186.76", "nsentences": "45.04", "prob_perplexity": "186.803", "code_perplexity": "183.955", "temp": "0.71", "loss_0": "2.791", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.53057", "wps": "17531", "ups": "5.5", "wpb": "3186.8", "bsz": "45", "num_updates": "207200", "lr": "4.88101e-05", "gnorm": "0.809", "loss_scale": "2", "train_wall": "36", "gb_free": "13.9", "wall": "39085"} [2023-11-02 04:26:54,353][train_inner][INFO] - {"epoch": 52, "update": 51.138, "loss": "2.929", "ntokens": "3190.92", "nsentences": "44.64", "prob_perplexity": "187.31", "code_perplexity": "184.425", "temp": "0.709", "loss_0": "2.818", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.52624", "wps": "17716.4", "ups": "5.55", "wpb": "3190.9", "bsz": "44.6", "num_updates": "207400", "lr": "4.87595e-05", "gnorm": "0.799", "loss_scale": "2", "train_wall": "35", "gb_free": "12.8", "wall": "39121"} [2023-11-02 04:27:30,323][train_inner][INFO] - {"epoch": 52, "update": 51.187, "loss": "2.879", "ntokens": "3158.2", "nsentences": "45.8", "prob_perplexity": "186.89", "code_perplexity": "184.009", "temp": "0.709", "loss_0": "2.768", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.53613", "wps": "17561.1", "ups": "5.56", "wpb": "3158.2", "bsz": "45.8", "num_updates": "207600", "lr": "4.87089e-05", "gnorm": "0.8", "loss_scale": "2", "train_wall": "35", "gb_free": "12.7", "wall": "39157"} [2023-11-02 04:28:06,144][train_inner][INFO] - {"epoch": 52, "update": 51.236, "loss": "2.946", "ntokens": "3194", "nsentences": "43.72", "prob_perplexity": "187.156", "code_perplexity": "184.371", "temp": "0.708", "loss_0": "2.835", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.52267", "wps": "17842.6", "ups": "5.59", "wpb": "3194", "bsz": "43.7", "num_updates": "207800", "lr": "4.86582e-05", "gnorm": "0.81", "loss_scale": "2", "train_wall": "35", "gb_free": "12.4", "wall": "39192"} [2023-11-02 04:28:41,955][train_inner][INFO] - {"epoch": 52, "update": 51.286, "loss": "2.937", "ntokens": "3182.24", "nsentences": "43.88", "prob_perplexity": "186.064", "code_perplexity": "183.228", "temp": "0.707", "loss_0": "2.826", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.52367", "wps": "17773.5", "ups": "5.59", "wpb": "3182.2", "bsz": "43.9", "num_updates": "208000", "lr": "4.86076e-05", "gnorm": "0.808", "loss_scale": "2", "train_wall": "35", "gb_free": "13.1", "wall": "39228"} [2023-11-02 04:29:18,183][train_inner][INFO] - {"epoch": 52, "update": 51.335, "loss": "2.93", "ntokens": "3233.08", "nsentences": "44.64", "prob_perplexity": "186.272", "code_perplexity": "183.445", "temp": "0.707", "loss_0": "2.819", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.5251", "wps": "17849.4", "ups": "5.52", "wpb": "3233.1", "bsz": "44.6", "num_updates": "208200", "lr": "4.8557e-05", "gnorm": "0.811", "loss_scale": "2", "train_wall": "36", "gb_free": "13.8", "wall": "39264"} [2023-11-02 04:29:54,471][train_inner][INFO] - {"epoch": 52, "update": 51.384, "loss": "2.992", "ntokens": "3193.92", "nsentences": "43", "prob_perplexity": "186.557", "code_perplexity": "183.669", "temp": "0.706", "loss_0": "2.881", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.51444", "wps": "17604.5", "ups": "5.51", "wpb": "3193.9", "bsz": "43", "num_updates": "208400", "lr": "4.85063e-05", "gnorm": "0.815", "loss_scale": "2", "train_wall": "36", "gb_free": "15.6", "wall": "39301"} [2023-11-02 04:30:30,660][train_inner][INFO] - {"epoch": 52, "update": 51.433, "loss": "2.895", "ntokens": "3166.88", "nsentences": "46.32", "prob_perplexity": "186.556", "code_perplexity": "183.708", "temp": "0.705", "loss_0": "2.784", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.53357", "wps": "17502.7", "ups": "5.53", "wpb": "3166.9", "bsz": "46.3", "num_updates": "208600", "lr": "4.84557e-05", "gnorm": "0.808", "loss_scale": "2", "train_wall": "36", "gb_free": "13.9", "wall": "39337"} [2023-11-02 04:31:06,158][train_inner][INFO] - {"epoch": 52, "update": 51.483, "loss": "2.818", "ntokens": "3129.76", "nsentences": "48.8", "prob_perplexity": "187.35", "code_perplexity": "184.515", "temp": "0.704", "loss_0": "2.707", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.54895", "wps": "17635.1", "ups": "5.63", "wpb": "3129.8", "bsz": "48.8", "num_updates": "208800", "lr": "4.84051e-05", "gnorm": "0.804", "loss_scale": "2", "train_wall": "35", "gb_free": "13.6", "wall": "39372"} [2023-11-02 04:31:42,211][train_inner][INFO] - {"epoch": 52, "update": 51.532, "loss": "2.926", "ntokens": "3158.36", "nsentences": "44.36", "prob_perplexity": "185.993", "code_perplexity": "183.155", "temp": "0.704", "loss_0": "2.815", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.52696", "wps": "17521.7", "ups": "5.55", "wpb": "3158.4", "bsz": "44.4", "num_updates": "209000", "lr": "4.83544e-05", "gnorm": "0.816", "loss_scale": "2", "train_wall": "35", "gb_free": "13.9", "wall": "39408"} [2023-11-02 04:32:18,268][train_inner][INFO] - {"epoch": 52, "update": 51.581, "loss": "3.051", "ntokens": "3248.96", "nsentences": "40", "prob_perplexity": "187.19", "code_perplexity": "184.346", "temp": "0.703", "loss_0": "2.941", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.50119", "wps": "18022.4", "ups": "5.55", "wpb": "3249", "bsz": "40", "num_updates": "209200", "lr": "4.83038e-05", "gnorm": "0.813", "loss_scale": "2", "train_wall": "35", "gb_free": "13.8", "wall": "39444"} [2023-11-02 04:32:53,981][train_inner][INFO] - {"epoch": 52, "update": 51.631, "loss": "2.887", "ntokens": "3163.16", "nsentences": "46.28", "prob_perplexity": "187.112", "code_perplexity": "184.24", "temp": "0.702", "loss_0": "2.776", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.53569", "wps": "17715.2", "ups": "5.6", "wpb": "3163.2", "bsz": "46.3", "num_updates": "209400", "lr": "4.82532e-05", "gnorm": "0.817", "loss_scale": "2", "train_wall": "35", "gb_free": "13.6", "wall": "39480"} [2023-11-02 04:33:29,826][train_inner][INFO] - {"epoch": 52, "update": 51.68, "loss": "2.968", "ntokens": "3205.28", "nsentences": "44.28", "prob_perplexity": "187.384", "code_perplexity": "184.525", "temp": "0.702", "loss_0": "2.857", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.52039", "wps": "17885.1", "ups": "5.58", "wpb": "3205.3", "bsz": "44.3", "num_updates": "209600", "lr": "4.82025e-05", "gnorm": "0.816", "loss_scale": "2", "train_wall": "35", "gb_free": "13.5", "wall": "39516"} [2023-11-02 04:34:05,934][train_inner][INFO] - {"epoch": 52, "update": 51.729, "loss": "2.948", "ntokens": "3175.88", "nsentences": "43.8", "prob_perplexity": "187.706", "code_perplexity": "184.775", "temp": "0.701", "loss_0": "2.837", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.52292", "wps": "17592.1", "ups": "5.54", "wpb": "3175.9", "bsz": "43.8", "num_updates": "209800", "lr": "4.81519e-05", "gnorm": "0.81", "loss_scale": "2", "train_wall": "35", "gb_free": "13.8", "wall": "39552"} [2023-11-02 04:34:42,045][train_inner][INFO] - {"epoch": 52, "update": 51.779, "loss": "2.977", "ntokens": "3223.08", "nsentences": "44.24", "prob_perplexity": "188.018", "code_perplexity": "185.233", "temp": "0.7", "loss_0": "2.866", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.51743", "wps": "17852", "ups": "5.54", "wpb": "3223.1", "bsz": "44.2", "num_updates": "210000", "lr": "4.81013e-05", "gnorm": "0.816", "loss_scale": "2", "train_wall": "35", "gb_free": "14.6", "wall": "39588"} [2023-11-02 04:35:17,996][train_inner][INFO] - {"epoch": 52, "update": 51.828, "loss": "2.881", "ntokens": "3179", "nsentences": "45.4", "prob_perplexity": "187.726", "code_perplexity": "184.934", "temp": "0.7", "loss_0": "2.771", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.53441", "wps": "17686.1", "ups": "5.56", "wpb": "3179", "bsz": "45.4", "num_updates": "210200", "lr": "4.80506e-05", "gnorm": "0.813", "loss_scale": "2", "train_wall": "35", "gb_free": "11.5", "wall": "39624"} [2023-11-02 04:35:54,644][train_inner][INFO] - {"epoch": 52, "update": 51.877, "loss": "2.983", "ntokens": "3226.24", "nsentences": "44.36", "prob_perplexity": "187.082", "code_perplexity": "184.33", "temp": "0.699", "loss_0": "2.873", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.51726", "wps": "17607.6", "ups": "5.46", "wpb": "3226.2", "bsz": "44.4", "num_updates": "210400", "lr": "4.8e-05", "gnorm": "0.813", "loss_scale": "2", "train_wall": "36", "gb_free": "13.3", "wall": "39661"} [2023-11-02 04:36:31,413][train_inner][INFO] - {"epoch": 52, "update": 51.927, "loss": "2.861", "ntokens": "3195.04", "nsentences": "47.04", "prob_perplexity": "187.504", "code_perplexity": "184.775", "temp": "0.698", "loss_0": "2.75", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.53922", "wps": "17380.2", "ups": "5.44", "wpb": "3195", "bsz": "47", "num_updates": "210600", "lr": "4.79494e-05", "gnorm": "0.809", "loss_scale": "2", "train_wall": "36", "gb_free": "13.5", "wall": "39698"} [2023-11-02 04:37:08,545][train_inner][INFO] - {"epoch": 52, "update": 51.976, "loss": "3.041", "ntokens": "3233.48", "nsentences": "39.48", "prob_perplexity": "187.914", "code_perplexity": "185.049", "temp": "0.697", "loss_0": "2.931", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.50217", "wps": "17417.4", "ups": "5.39", "wpb": "3233.5", "bsz": "39.5", "num_updates": "210800", "lr": "4.78987e-05", "gnorm": "0.821", "loss_scale": "2", "train_wall": "36", "gb_free": "13.6", "wall": "39735"} [2023-11-02 04:37:26,297][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 04:37:26,299][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 04:37:26,317][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 61 [2023-11-02 04:37:52,143][valid][INFO] - {"epoch": 52, "valid_loss": "2.814", "valid_ntokens": "3175.59", "valid_nsentences": "44.1685", "valid_prob_perplexity": "186.881", "valid_code_perplexity": "184.277", "valid_temp": "0.697", "valid_loss_0": "2.703", "valid_loss_1": "0.102", "valid_loss_2": "0.008", "valid_accuracy": "0.5498", "valid_wps": "55775", "valid_wpb": "3175.6", "valid_bsz": "44.2", "valid_num_updates": "210898", "valid_best_loss": "2.799"} [2023-11-02 04:37:52,145][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 52 @ 210898 updates [2023-11-02 04:37:52,148][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 04:37:53,585][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 04:37:53,641][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_last.pt (epoch 52 @ 210898 updates, score 2.814) (writing took 1.495574225205928 seconds) [2023-11-02 04:37:53,642][fairseq_cli.train][INFO] - end of epoch 52 (average epoch stats below) [2023-11-02 04:37:53,644][train][INFO] - {"epoch": 52, "train_loss": "2.939", "train_ntokens": "3191.72", "train_nsentences": "44.2682", "train_prob_perplexity": "187.04", "train_code_perplexity": "184.206", "train_temp": "0.704", "train_loss_0": "2.828", "train_loss_1": "0.102", "train_loss_2": "0.009", "train_accuracy": "0.52419", "train_wps": "17033", "train_ups": "5.34", "train_wpb": "3191.7", "train_bsz": "44.3", "train_num_updates": "210898", "train_lr": "4.78739e-05", "train_gnorm": "0.811", "train_loss_scale": "2", "train_train_wall": "719", "train_gb_free": "13.7", "train_wall": "39780"} [2023-11-02 04:37:53,646][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 04:37:53,667][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 53 [2023-11-02 04:37:53,833][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 04:37:53,880][fairseq.trainer][INFO] - begin training epoch 53 [2023-11-02 04:37:53,881][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 04:38:12,033][train_inner][INFO] - {"epoch": 53, "update": 52.025, "loss": "2.986", "ntokens": "3184.2", "nsentences": "41", "prob_perplexity": "187.521", "code_perplexity": "184.715", "temp": "0.697", "loss_0": "2.875", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.51292", "wps": "10031.1", "ups": "3.15", "wpb": "3184.2", "bsz": "41", "num_updates": "211000", "lr": "4.78481e-05", "gnorm": "0.827", "loss_scale": "2", "train_wall": "35", "gb_free": "13.7", "wall": "39798"} [2023-11-02 04:38:47,813][train_inner][INFO] - {"epoch": 53, "update": 52.074, "loss": "2.961", "ntokens": "3226.72", "nsentences": "43.52", "prob_perplexity": "187.909", "code_perplexity": "185.096", "temp": "0.696", "loss_0": "2.851", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.51899", "wps": "18038", "ups": "5.59", "wpb": "3226.7", "bsz": "43.5", "num_updates": "211200", "lr": "4.77975e-05", "gnorm": "0.819", "loss_scale": "2", "train_wall": "35", "gb_free": "13.5", "wall": "39834"} [2023-11-02 04:39:23,291][train_inner][INFO] - {"epoch": 53, "update": 52.124, "loss": "2.91", "ntokens": "3194.2", "nsentences": "44.12", "prob_perplexity": "187.41", "code_perplexity": "184.559", "temp": "0.695", "loss_0": "2.8", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.52738", "wps": "18007.3", "ups": "5.64", "wpb": "3194.2", "bsz": "44.1", "num_updates": "211400", "lr": "4.77468e-05", "gnorm": "0.816", "loss_scale": "2", "train_wall": "35", "gb_free": "12.9", "wall": "39870"} [2023-11-02 04:39:59,240][train_inner][INFO] - {"epoch": 53, "update": 52.173, "loss": "2.989", "ntokens": "3243.16", "nsentences": "42.8", "prob_perplexity": "188.832", "code_perplexity": "185.972", "temp": "0.695", "loss_0": "2.878", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.51419", "wps": "18044.4", "ups": "5.56", "wpb": "3243.2", "bsz": "42.8", "num_updates": "211600", "lr": "4.76962e-05", "gnorm": "0.808", "loss_scale": "2", "train_wall": "35", "gb_free": "14.5", "wall": "39905"} [2023-11-02 04:40:35,330][train_inner][INFO] - {"epoch": 53, "update": 52.222, "loss": "2.861", "ntokens": "3151.8", "nsentences": "45.44", "prob_perplexity": "187.689", "code_perplexity": "184.881", "temp": "0.694", "loss_0": "2.751", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.53804", "wps": "17467.2", "ups": "5.54", "wpb": "3151.8", "bsz": "45.4", "num_updates": "211800", "lr": "4.76456e-05", "gnorm": "0.816", "loss_scale": "2", "train_wall": "35", "gb_free": "12.9", "wall": "39942"} [2023-11-02 04:41:11,324][train_inner][INFO] - {"epoch": 53, "update": 52.272, "loss": "2.88", "ntokens": "3174.96", "nsentences": "46.08", "prob_perplexity": "188.087", "code_perplexity": "185.311", "temp": "0.693", "loss_0": "2.77", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.53627", "wps": "17643.1", "ups": "5.56", "wpb": "3175", "bsz": "46.1", "num_updates": "212000", "lr": "4.75949e-05", "gnorm": "0.809", "loss_scale": "2", "train_wall": "35", "gb_free": "13.8", "wall": "39978"} [2023-11-02 04:41:47,433][train_inner][INFO] - {"epoch": 53, "update": 52.321, "loss": "2.946", "ntokens": "3177.48", "nsentences": "43.8", "prob_perplexity": "187.021", "code_perplexity": "184.289", "temp": "0.693", "loss_0": "2.835", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.52191", "wps": "17608.5", "ups": "5.54", "wpb": "3177.5", "bsz": "43.8", "num_updates": "212200", "lr": "4.75443e-05", "gnorm": "0.827", "loss_scale": "2", "train_wall": "35", "gb_free": "13.5", "wall": "40014"} [2023-11-02 04:42:23,385][train_inner][INFO] - {"epoch": 53, "update": 52.37, "loss": "2.848", "ntokens": "3161.88", "nsentences": "46.28", "prob_perplexity": "188.196", "code_perplexity": "185.484", "temp": "0.692", "loss_0": "2.737", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.54103", "wps": "17590.3", "ups": "5.56", "wpb": "3161.9", "bsz": "46.3", "num_updates": "212400", "lr": "4.74937e-05", "gnorm": "0.815", "loss_scale": "2", "train_wall": "35", "gb_free": "13.8", "wall": "40050"} [2023-11-02 04:42:59,833][train_inner][INFO] - {"epoch": 53, "update": 52.42, "loss": "2.936", "ntokens": "3196.56", "nsentences": "45.08", "prob_perplexity": "188.182", "code_perplexity": "185.389", "temp": "0.691", "loss_0": "2.825", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.52483", "wps": "17541.3", "ups": "5.49", "wpb": "3196.6", "bsz": "45.1", "num_updates": "212600", "lr": "4.7443e-05", "gnorm": "0.823", "loss_scale": "2", "train_wall": "36", "gb_free": "14.9", "wall": "40086"} [2023-11-02 04:43:36,190][train_inner][INFO] - {"epoch": 53, "update": 52.469, "loss": "3", "ntokens": "3231.64", "nsentences": "42.24", "prob_perplexity": "189.06", "code_perplexity": "186.283", "temp": "0.69", "loss_0": "2.889", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.51225", "wps": "17778.7", "ups": "5.5", "wpb": "3231.6", "bsz": "42.2", "num_updates": "212800", "lr": "4.73924e-05", "gnorm": "0.82", "loss_scale": "2", "train_wall": "36", "gb_free": "12.7", "wall": "40122"} [2023-11-02 04:44:13,263][train_inner][INFO] - {"epoch": 53, "update": 52.518, "loss": "2.977", "ntokens": "3218.8", "nsentences": "43.88", "prob_perplexity": "188.015", "code_perplexity": "185.194", "temp": "0.69", "loss_0": "2.866", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.51786", "wps": "17365.7", "ups": "5.4", "wpb": "3218.8", "bsz": "43.9", "num_updates": "213000", "lr": "4.73418e-05", "gnorm": "0.812", "loss_scale": "2", "train_wall": "36", "gb_free": "12.4", "wall": "40159"} [2023-11-02 04:44:49,926][train_inner][INFO] - {"epoch": 53, "update": 52.568, "loss": "2.976", "ntokens": "3190.36", "nsentences": "42.68", "prob_perplexity": "188.155", "code_perplexity": "185.313", "temp": "0.689", "loss_0": "2.866", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.51583", "wps": "17404.8", "ups": "5.46", "wpb": "3190.4", "bsz": "42.7", "num_updates": "213200", "lr": "4.72911e-05", "gnorm": "0.822", "loss_scale": "2", "train_wall": "36", "gb_free": "12.9", "wall": "40196"} [2023-11-02 04:45:26,569][train_inner][INFO] - {"epoch": 53, "update": 52.617, "loss": "2.913", "ntokens": "3162.32", "nsentences": "44.68", "prob_perplexity": "187.713", "code_perplexity": "184.887", "temp": "0.688", "loss_0": "2.803", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.52898", "wps": "17260.8", "ups": "5.46", "wpb": "3162.3", "bsz": "44.7", "num_updates": "213400", "lr": "4.72405e-05", "gnorm": "0.814", "loss_scale": "2", "train_wall": "36", "gb_free": "13.7", "wall": "40233"} [2023-11-02 04:46:03,370][train_inner][INFO] - {"epoch": 53, "update": 52.666, "loss": "2.877", "ntokens": "3183.92", "nsentences": "45.84", "prob_perplexity": "188.106", "code_perplexity": "185.337", "temp": "0.688", "loss_0": "2.767", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.53617", "wps": "17304.8", "ups": "5.44", "wpb": "3183.9", "bsz": "45.8", "num_updates": "213600", "lr": "4.71899e-05", "gnorm": "0.813", "loss_scale": "2", "train_wall": "36", "gb_free": "14.5", "wall": "40270"} [2023-11-02 04:46:40,052][train_inner][INFO] - {"epoch": 53, "update": 52.715, "loss": "2.899", "ntokens": "3183.76", "nsentences": "46.08", "prob_perplexity": "187.663", "code_perplexity": "184.851", "temp": "0.687", "loss_0": "2.788", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.53306", "wps": "17359.7", "ups": "5.45", "wpb": "3183.8", "bsz": "46.1", "num_updates": "213800", "lr": "4.71392e-05", "gnorm": "0.815", "loss_scale": "2", "train_wall": "36", "gb_free": "13.1", "wall": "40306"} [2023-11-02 04:47:16,721][train_inner][INFO] - {"epoch": 53, "update": 52.765, "loss": "2.954", "ntokens": "3194.72", "nsentences": "44.04", "prob_perplexity": "187.821", "code_perplexity": "185.033", "temp": "0.686", "loss_0": "2.843", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.52147", "wps": "17425.8", "ups": "5.45", "wpb": "3194.7", "bsz": "44", "num_updates": "214000", "lr": "4.70886e-05", "gnorm": "0.82", "loss_scale": "2", "train_wall": "36", "gb_free": "12.6", "wall": "40343"} [2023-11-02 04:47:53,813][train_inner][INFO] - {"epoch": 53, "update": 52.814, "loss": "2.942", "ntokens": "3203.92", "nsentences": "43.36", "prob_perplexity": "188.502", "code_perplexity": "185.801", "temp": "0.686", "loss_0": "2.832", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.52268", "wps": "17276.3", "ups": "5.39", "wpb": "3203.9", "bsz": "43.4", "num_updates": "214200", "lr": "4.7038e-05", "gnorm": "0.808", "loss_scale": "4", "train_wall": "36", "gb_free": "14.2", "wall": "40380"} [2023-11-02 04:48:30,664][train_inner][INFO] - {"epoch": 53, "update": 52.863, "loss": "2.927", "ntokens": "3178.28", "nsentences": "44.36", "prob_perplexity": "187.92", "code_perplexity": "185.123", "temp": "0.685", "loss_0": "2.816", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.52619", "wps": "17250.5", "ups": "5.43", "wpb": "3178.3", "bsz": "44.4", "num_updates": "214400", "lr": "4.69873e-05", "gnorm": "0.82", "loss_scale": "4", "train_wall": "36", "gb_free": "13.1", "wall": "40417"} [2023-11-02 04:49:07,904][train_inner][INFO] - {"epoch": 53, "update": 52.913, "loss": "2.985", "ntokens": "3248.16", "nsentences": "43.32", "prob_perplexity": "187.963", "code_perplexity": "185.207", "temp": "0.684", "loss_0": "2.874", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.51475", "wps": "17445.7", "ups": "5.37", "wpb": "3248.2", "bsz": "43.3", "num_updates": "214600", "lr": "4.69367e-05", "gnorm": "0.816", "loss_scale": "4", "train_wall": "37", "gb_free": "13.2", "wall": "40454"} [2023-11-02 04:49:44,796][train_inner][INFO] - {"epoch": 53, "update": 52.962, "loss": "2.876", "ntokens": "3178.92", "nsentences": "43.88", "prob_perplexity": "188.227", "code_perplexity": "185.455", "temp": "0.684", "loss_0": "2.765", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.53405", "wps": "17234.9", "ups": "5.42", "wpb": "3178.9", "bsz": "43.9", "num_updates": "214800", "lr": "4.68861e-05", "gnorm": "0.813", "loss_scale": "4", "train_wall": "36", "gb_free": "13.5", "wall": "40491"} [2023-11-02 04:50:12,803][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 04:50:12,804][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 04:50:12,822][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 62 [2023-11-02 04:50:38,389][valid][INFO] - {"epoch": 53, "valid_loss": "2.792", "valid_ntokens": "3162.91", "valid_nsentences": "44.1685", "valid_prob_perplexity": "186.081", "valid_code_perplexity": "183.534", "valid_temp": "0.683", "valid_loss_0": "2.681", "valid_loss_1": "0.102", "valid_loss_2": "0.009", "valid_accuracy": "0.5535", "valid_wps": "56151.1", "valid_wpb": "3162.9", "valid_bsz": "44.2", "valid_num_updates": "214954", "valid_best_loss": "2.792"} [2023-11-02 04:50:38,390][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 53 @ 214954 updates [2023-11-02 04:50:38,392][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-02 04:50:39,814][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-02 04:50:40,762][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_best.pt (epoch 53 @ 214954 updates, score 2.792) (writing took 2.3719723829999566 seconds) [2023-11-02 04:50:40,763][fairseq_cli.train][INFO] - end of epoch 53 (average epoch stats below) [2023-11-02 04:50:40,765][train][INFO] - {"epoch": 53, "train_loss": "2.928", "train_ntokens": "3193.85", "train_nsentences": "44.2682", "train_prob_perplexity": "188.029", "train_code_perplexity": "185.242", "train_temp": "0.69", "train_loss_0": "2.818", "train_loss_1": "0.102", "train_loss_2": "0.009", "train_accuracy": "0.52569", "train_wps": "16886.9", "train_ups": "5.29", "train_wpb": "3193.9", "train_bsz": "44.3", "train_num_updates": "214954", "train_lr": "4.68471e-05", "train_gnorm": "0.816", "train_loss_scale": "4", "train_train_wall": "726", "train_gb_free": "13.6", "train_wall": "40547"} [2023-11-02 04:50:40,768][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 04:50:40,791][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 54 [2023-11-02 04:50:40,990][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 04:50:41,039][fairseq.trainer][INFO] - begin training epoch 54 [2023-11-02 04:50:41,040][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 04:50:49,769][train_inner][INFO] - {"epoch": 54, "update": 53.011, "loss": "2.932", "ntokens": "3188.44", "nsentences": "43.44", "prob_perplexity": "188.095", "code_perplexity": "185.363", "temp": "0.683", "loss_0": "2.822", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.52393", "wps": "9815", "ups": "3.08", "wpb": "3188.4", "bsz": "43.4", "num_updates": "215000", "lr": "4.68354e-05", "gnorm": "0.816", "loss_scale": "4", "train_wall": "36", "gb_free": "13.1", "wall": "40556"} [2023-11-02 04:51:25,907][train_inner][INFO] - {"epoch": 54, "update": 53.061, "loss": "2.91", "ntokens": "3184.68", "nsentences": "44.24", "prob_perplexity": "188.959", "code_perplexity": "186.158", "temp": "0.682", "loss_0": "2.8", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.52907", "wps": "17626.1", "ups": "5.53", "wpb": "3184.7", "bsz": "44.2", "num_updates": "215200", "lr": "4.67848e-05", "gnorm": "0.817", "loss_scale": "4", "train_wall": "35", "gb_free": "13.7", "wall": "40592"} [2023-11-02 04:52:02,165][train_inner][INFO] - {"epoch": 54, "update": 53.11, "loss": "2.877", "ntokens": "3199.4", "nsentences": "47.48", "prob_perplexity": "188.749", "code_perplexity": "185.935", "temp": "0.682", "loss_0": "2.766", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.53719", "wps": "17649.3", "ups": "5.52", "wpb": "3199.4", "bsz": "47.5", "num_updates": "215400", "lr": "4.67342e-05", "gnorm": "0.808", "loss_scale": "4", "train_wall": "36", "gb_free": "12.8", "wall": "40628"} [2023-11-02 04:52:38,354][train_inner][INFO] - {"epoch": 54, "update": 53.159, "loss": "2.935", "ntokens": "3209.36", "nsentences": "43", "prob_perplexity": "189.055", "code_perplexity": "186.247", "temp": "0.681", "loss_0": "2.825", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.52244", "wps": "17737.5", "ups": "5.53", "wpb": "3209.4", "bsz": "43", "num_updates": "215600", "lr": "4.66835e-05", "gnorm": "0.827", "loss_scale": "4", "train_wall": "36", "gb_free": "13.4", "wall": "40665"} [2023-11-02 04:53:14,804][train_inner][INFO] - {"epoch": 54, "update": 53.209, "loss": "2.906", "ntokens": "3198.2", "nsentences": "44.88", "prob_perplexity": "189.08", "code_perplexity": "186.254", "temp": "0.68", "loss_0": "2.796", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.52964", "wps": "17549.3", "ups": "5.49", "wpb": "3198.2", "bsz": "44.9", "num_updates": "215800", "lr": "4.66329e-05", "gnorm": "0.827", "loss_scale": "4", "train_wall": "36", "gb_free": "14", "wall": "40701"} [2023-11-02 04:53:50,874][train_inner][INFO] - {"epoch": 54, "update": 53.258, "loss": "2.958", "ntokens": "3176.12", "nsentences": "42.24", "prob_perplexity": "188.941", "code_perplexity": "186.177", "temp": "0.68", "loss_0": "2.848", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.51871", "wps": "17612.4", "ups": "5.55", "wpb": "3176.1", "bsz": "42.2", "num_updates": "216000", "lr": "4.65823e-05", "gnorm": "0.822", "loss_scale": "4", "train_wall": "35", "gb_free": "14.7", "wall": "40737"} [2023-11-02 04:54:27,156][train_inner][INFO] - {"epoch": 54, "update": 53.307, "loss": "2.9", "ntokens": "3187.6", "nsentences": "43.32", "prob_perplexity": "188.698", "code_perplexity": "185.981", "temp": "0.679", "loss_0": "2.79", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.52834", "wps": "17572", "ups": "5.51", "wpb": "3187.6", "bsz": "43.3", "num_updates": "216200", "lr": "4.65316e-05", "gnorm": "0.826", "loss_scale": "4", "train_wall": "36", "gb_free": "14", "wall": "40773"} [2023-11-02 04:55:03,689][train_inner][INFO] - {"epoch": 54, "update": 53.357, "loss": "2.921", "ntokens": "3169.56", "nsentences": "44.64", "prob_perplexity": "188.156", "code_perplexity": "185.402", "temp": "0.678", "loss_0": "2.811", "loss_1": "0.102", "loss_2": "0.008", "accuracy": "0.52663", "wps": "17363.4", "ups": "5.48", "wpb": "3169.6", "bsz": "44.6", "num_updates": "216400", "lr": "4.6481e-05", "gnorm": "0.829", "loss_scale": "4", "train_wall": "36", "gb_free": "13.5", "wall": "40810"} [2023-11-02 04:55:39,881][train_inner][INFO] - {"epoch": 54, "update": 53.406, "loss": "2.903", "ntokens": "3193.2", "nsentences": "44.44", "prob_perplexity": "188.507", "code_perplexity": "185.727", "temp": "0.677", "loss_0": "2.793", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.52977", "wps": "17646.9", "ups": "5.53", "wpb": "3193.2", "bsz": "44.4", "num_updates": "216600", "lr": "4.64304e-05", "gnorm": "0.811", "loss_scale": "4", "train_wall": "36", "gb_free": "13.9", "wall": "40846"} [2023-11-02 04:56:15,783][train_inner][INFO] - {"epoch": 54, "update": 53.455, "loss": "2.908", "ntokens": "3175", "nsentences": "44", "prob_perplexity": "189.365", "code_perplexity": "186.523", "temp": "0.677", "loss_0": "2.798", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.52951", "wps": "17688.1", "ups": "5.57", "wpb": "3175", "bsz": "44", "num_updates": "216800", "lr": "4.63797e-05", "gnorm": "0.825", "loss_scale": "4", "train_wall": "35", "gb_free": "13.3", "wall": "40882"} [2023-11-02 04:56:52,313][train_inner][INFO] - {"epoch": 54, "update": 53.504, "loss": "2.884", "ntokens": "3203", "nsentences": "45.72", "prob_perplexity": "189.27", "code_perplexity": "186.475", "temp": "0.676", "loss_0": "2.773", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.53392", "wps": "17537.2", "ups": "5.48", "wpb": "3203", "bsz": "45.7", "num_updates": "217000", "lr": "4.63291e-05", "gnorm": "0.81", "loss_scale": "4", "train_wall": "36", "gb_free": "12.9", "wall": "40919"} [2023-11-02 04:57:28,816][train_inner][INFO] - {"epoch": 54, "update": 53.554, "loss": "2.986", "ntokens": "3223.96", "nsentences": "43.08", "prob_perplexity": "188.77", "code_perplexity": "185.938", "temp": "0.675", "loss_0": "2.876", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.51527", "wps": "17664.9", "ups": "5.48", "wpb": "3224", "bsz": "43.1", "num_updates": "217200", "lr": "4.62785e-05", "gnorm": "0.821", "loss_scale": "4", "train_wall": "36", "gb_free": "13.2", "wall": "40955"} [2023-11-02 04:58:05,046][train_inner][INFO] - {"epoch": 54, "update": 53.603, "loss": "2.945", "ntokens": "3180", "nsentences": "42.36", "prob_perplexity": "188.727", "code_perplexity": "185.91", "temp": "0.675", "loss_0": "2.835", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.52092", "wps": "17556.4", "ups": "5.52", "wpb": "3180", "bsz": "42.4", "num_updates": "217400", "lr": "4.62278e-05", "gnorm": "0.834", "loss_scale": "4", "train_wall": "36", "gb_free": "12.9", "wall": "40991"} [2023-11-02 04:58:41,796][train_inner][INFO] - {"epoch": 54, "update": 53.652, "loss": "2.887", "ntokens": "3170.64", "nsentences": "45.36", "prob_perplexity": "190.234", "code_perplexity": "187.475", "temp": "0.674", "loss_0": "2.777", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.53386", "wps": "17255.8", "ups": "5.44", "wpb": "3170.6", "bsz": "45.4", "num_updates": "217600", "lr": "4.61772e-05", "gnorm": "0.816", "loss_scale": "4", "train_wall": "36", "gb_free": "13.4", "wall": "41028"} [2023-11-02 04:59:17,918][train_inner][INFO] - {"epoch": 54, "update": 53.702, "loss": "2.909", "ntokens": "3183.04", "nsentences": "43.36", "prob_perplexity": "189.262", "code_perplexity": "186.543", "temp": "0.673", "loss_0": "2.799", "loss_1": "0.102", "loss_2": "0.008", "accuracy": "0.52787", "wps": "17624.9", "ups": "5.54", "wpb": "3183", "bsz": "43.4", "num_updates": "217800", "lr": "4.61266e-05", "gnorm": "0.824", "loss_scale": "4", "train_wall": "36", "gb_free": "14.3", "wall": "41064"} [2023-11-02 04:59:53,792][train_inner][INFO] - {"epoch": 54, "update": 53.751, "loss": "2.98", "ntokens": "3191.96", "nsentences": "42.4", "prob_perplexity": "188.48", "code_perplexity": "185.68", "temp": "0.673", "loss_0": "2.87", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.51634", "wps": "17796.8", "ups": "5.58", "wpb": "3192", "bsz": "42.4", "num_updates": "218000", "lr": "4.60759e-05", "gnorm": "0.827", "loss_scale": "4", "train_wall": "35", "gb_free": "13.8", "wall": "41100"} [2023-11-02 05:00:30,503][train_inner][INFO] - {"epoch": 54, "update": 53.8, "loss": "2.945", "ntokens": "3225.92", "nsentences": "43.16", "prob_perplexity": "188.948", "code_perplexity": "186.186", "temp": "0.672", "loss_0": "2.835", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.52129", "wps": "17575.5", "ups": "5.45", "wpb": "3225.9", "bsz": "43.2", "num_updates": "218200", "lr": "4.60253e-05", "gnorm": "0.822", "loss_scale": "4", "train_wall": "36", "gb_free": "13.2", "wall": "41137"} [2023-11-02 05:01:06,855][train_inner][INFO] - {"epoch": 54, "update": 53.85, "loss": "2.924", "ntokens": "3195.16", "nsentences": "44.44", "prob_perplexity": "189.387", "code_perplexity": "186.581", "temp": "0.671", "loss_0": "2.814", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.52667", "wps": "17580.6", "ups": "5.5", "wpb": "3195.2", "bsz": "44.4", "num_updates": "218400", "lr": "4.59747e-05", "gnorm": "0.825", "loss_scale": "4", "train_wall": "36", "gb_free": "13.4", "wall": "41173"} [2023-11-02 05:01:43,105][train_inner][INFO] - {"epoch": 54, "update": 53.899, "loss": "2.803", "ntokens": "3214", "nsentences": "50", "prob_perplexity": "190.556", "code_perplexity": "187.72", "temp": "0.671", "loss_0": "2.693", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.55099", "wps": "17733.4", "ups": "5.52", "wpb": "3214", "bsz": "50", "num_updates": "218600", "lr": "4.59241e-05", "gnorm": "0.8", "loss_scale": "4", "train_wall": "36", "gb_free": "13.6", "wall": "41209"} [2023-11-02 05:02:19,069][train_inner][INFO] - {"epoch": 54, "update": 53.948, "loss": "2.903", "ntokens": "3204.72", "nsentences": "44.36", "prob_perplexity": "189.136", "code_perplexity": "186.303", "temp": "0.67", "loss_0": "2.793", "loss_1": "0.102", "loss_2": "0.009", "accuracy": "0.52926", "wps": "17822.7", "ups": "5.56", "wpb": "3204.7", "bsz": "44.4", "num_updates": "218800", "lr": "4.58734e-05", "gnorm": "0.823", "loss_scale": "4", "train_wall": "35", "gb_free": "14.3", "wall": "41245"} [2023-11-02 05:02:55,062][train_inner][INFO] - {"epoch": 54, "update": 53.998, "loss": "2.901", "ntokens": "3164.44", "nsentences": "44.88", "prob_perplexity": "189.615", "code_perplexity": "186.759", "temp": "0.669", "loss_0": "2.791", "loss_1": "0.101", "loss_2": "0.009", "accuracy": "0.53122", "wps": "17584.7", "ups": "5.56", "wpb": "3164.4", "bsz": "44.9", "num_updates": "219000", "lr": "4.58228e-05", "gnorm": "0.82", "loss_scale": "4", "train_wall": "35", "gb_free": "12.8", "wall": "41281"} [2023-11-02 05:02:56,806][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 05:02:56,808][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 05:02:56,827][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 63 [2023-11-02 05:03:22,202][valid][INFO] - {"epoch": 54, "valid_loss": "2.761", "valid_ntokens": "3156.95", "valid_nsentences": "44.1685", "valid_prob_perplexity": "188.326", "valid_code_perplexity": "185.541", "valid_temp": "0.669", "valid_loss_0": "2.65", "valid_loss_1": "0.102", "valid_loss_2": "0.009", "valid_accuracy": "0.55868", "valid_wps": "56466.5", "valid_wpb": "3156.9", "valid_bsz": "44.2", "valid_num_updates": "219010", "valid_best_loss": "2.761"} [2023-11-02 05:03:22,204][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 54 @ 219010 updates [2023-11-02 05:03:22,206][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-02 05:03:23,641][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-02 05:03:24,625][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_best.pt (epoch 54 @ 219010 updates, score 2.761) (writing took 2.421055030077696 seconds) [2023-11-02 05:03:24,626][fairseq_cli.train][INFO] - end of epoch 54 (average epoch stats below) [2023-11-02 05:03:24,628][train][INFO] - {"epoch": 54, "train_loss": "2.917", "train_ntokens": "3192.94", "train_nsentences": "44.2682", "train_prob_perplexity": "189.076", "train_code_perplexity": "186.28", "train_temp": "0.676", "train_loss_0": "2.807", "train_loss_1": "0.102", "train_loss_2": "0.009", "train_accuracy": "0.52742", "train_wps": "16954.1", "train_ups": "5.31", "train_wpb": "3192.9", "train_bsz": "44.3", "train_num_updates": "219010", "train_lr": "4.58203e-05", "train_gnorm": "0.821", "train_loss_scale": "4", "train_train_wall": "723", "train_gb_free": "15.1", "train_wall": "41311"} [2023-11-02 05:03:24,631][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 05:03:24,651][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 55 [2023-11-02 05:03:24,846][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 05:03:24,894][fairseq.trainer][INFO] - begin training epoch 55 [2023-11-02 05:03:24,895][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 05:03:58,943][train_inner][INFO] - {"epoch": 55, "update": 54.047, "loss": "2.902", "ntokens": "3152.84", "nsentences": "44.12", "prob_perplexity": "189.918", "code_perplexity": "187.022", "temp": "0.669", "loss_0": "2.792", "loss_1": "0.101", "loss_2": "0.009", "accuracy": "0.52976", "wps": "9871.4", "ups": "3.13", "wpb": "3152.8", "bsz": "44.1", "num_updates": "219200", "lr": "4.57722e-05", "gnorm": "0.829", "loss_scale": "4", "train_wall": "35", "gb_free": "13.1", "wall": "41345"} [2023-11-02 05:04:35,179][train_inner][INFO] - {"epoch": 55, "update": 54.096, "loss": "2.946", "ntokens": "3238.8", "nsentences": "44.48", "prob_perplexity": "190.15", "code_perplexity": "187.369", "temp": "0.668", "loss_0": "2.836", "loss_1": "0.101", "loss_2": "0.009", "accuracy": "0.5223", "wps": "17876.9", "ups": "5.52", "wpb": "3238.8", "bsz": "44.5", "num_updates": "219400", "lr": "4.57215e-05", "gnorm": "0.811", "loss_scale": "4", "train_wall": "36", "gb_free": "14.2", "wall": "41381"} [2023-11-02 05:05:11,100][train_inner][INFO] - {"epoch": 55, "update": 54.145, "loss": "2.932", "ntokens": "3208.36", "nsentences": "43.56", "prob_perplexity": "189.897", "code_perplexity": "187.101", "temp": "0.667", "loss_0": "2.822", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.52388", "wps": "17864.8", "ups": "5.57", "wpb": "3208.4", "bsz": "43.6", "num_updates": "219600", "lr": "4.56709e-05", "gnorm": "0.824", "loss_scale": "4", "train_wall": "35", "gb_free": "13.4", "wall": "41417"} [2023-11-02 05:05:47,072][train_inner][INFO] - {"epoch": 55, "update": 54.195, "loss": "2.987", "ntokens": "3239.88", "nsentences": "41.28", "prob_perplexity": "189.854", "code_perplexity": "187.109", "temp": "0.667", "loss_0": "2.877", "loss_1": "0.101", "loss_2": "0.009", "accuracy": "0.51284", "wps": "18014.9", "ups": "5.56", "wpb": "3239.9", "bsz": "41.3", "num_updates": "219800", "lr": "4.56203e-05", "gnorm": "0.817", "loss_scale": "4", "train_wall": "35", "gb_free": "15.9", "wall": "41453"} [2023-11-02 05:06:23,429][train_inner][INFO] - {"epoch": 55, "update": 54.244, "loss": "2.838", "ntokens": "3162.4", "nsentences": "46.96", "prob_perplexity": "189.482", "code_perplexity": "186.669", "temp": "0.666", "loss_0": "2.729", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.54305", "wps": "17397.2", "ups": "5.5", "wpb": "3162.4", "bsz": "47", "num_updates": "220000", "lr": "4.55696e-05", "gnorm": "0.824", "loss_scale": "4", "train_wall": "36", "gb_free": "16.1", "wall": "41490"} [2023-11-02 05:06:59,502][train_inner][INFO] - {"epoch": 55, "update": 54.293, "loss": "2.825", "ntokens": "3137.16", "nsentences": "46", "prob_perplexity": "189.533", "code_perplexity": "186.759", "temp": "0.665", "loss_0": "2.715", "loss_1": "0.102", "loss_2": "0.008", "accuracy": "0.54491", "wps": "17394.3", "ups": "5.54", "wpb": "3137.2", "bsz": "46", "num_updates": "220200", "lr": "4.5519e-05", "gnorm": "0.82", "loss_scale": "4", "train_wall": "35", "gb_free": "14.3", "wall": "41526"} [2023-11-02 05:07:35,563][train_inner][INFO] - {"epoch": 55, "update": 54.343, "loss": "2.96", "ntokens": "3183.6", "nsentences": "41.84", "prob_perplexity": "189.189", "code_perplexity": "186.451", "temp": "0.665", "loss_0": "2.85", "loss_1": "0.102", "loss_2": "0.008", "accuracy": "0.51909", "wps": "17658.1", "ups": "5.55", "wpb": "3183.6", "bsz": "41.8", "num_updates": "220400", "lr": "4.54684e-05", "gnorm": "0.825", "loss_scale": "4", "train_wall": "35", "gb_free": "15.5", "wall": "41562"} [2023-11-02 05:08:12,239][train_inner][INFO] - {"epoch": 55, "update": 54.392, "loss": "2.868", "ntokens": "3208.48", "nsentences": "45.96", "prob_perplexity": "190.133", "code_perplexity": "187.383", "temp": "0.664", "loss_0": "2.758", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.53662", "wps": "17497.4", "ups": "5.45", "wpb": "3208.5", "bsz": "46", "num_updates": "220600", "lr": "4.54177e-05", "gnorm": "0.828", "loss_scale": "4", "train_wall": "36", "gb_free": "14", "wall": "41598"} [2023-11-02 05:08:47,881][train_inner][INFO] - {"epoch": 55, "update": 54.441, "loss": "2.876", "ntokens": "3146.2", "nsentences": "44.36", "prob_perplexity": "189.44", "code_perplexity": "186.661", "temp": "0.663", "loss_0": "2.766", "loss_1": "0.102", "loss_2": "0.008", "accuracy": "0.5343", "wps": "17655.2", "ups": "5.61", "wpb": "3146.2", "bsz": "44.4", "num_updates": "220800", "lr": "4.53671e-05", "gnorm": "0.832", "loss_scale": "4", "train_wall": "35", "gb_free": "14.1", "wall": "41634"} [2023-11-02 05:09:24,162][train_inner][INFO] - {"epoch": 55, "update": 54.491, "loss": "2.938", "ntokens": "3169.88", "nsentences": "44.32", "prob_perplexity": "189.099", "code_perplexity": "186.306", "temp": "0.663", "loss_0": "2.828", "loss_1": "0.102", "loss_2": "0.008", "accuracy": "0.52516", "wps": "17475.2", "ups": "5.51", "wpb": "3169.9", "bsz": "44.3", "num_updates": "221000", "lr": "4.53165e-05", "gnorm": "0.839", "loss_scale": "4", "train_wall": "36", "gb_free": "13.7", "wall": "41670"} [2023-11-02 05:10:00,892][train_inner][INFO] - {"epoch": 55, "update": 54.54, "loss": "2.883", "ntokens": "3169.28", "nsentences": "44.56", "prob_perplexity": "190.025", "code_perplexity": "187.268", "temp": "0.662", "loss_0": "2.773", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.53316", "wps": "17258.3", "ups": "5.45", "wpb": "3169.3", "bsz": "44.6", "num_updates": "221200", "lr": "4.52658e-05", "gnorm": "0.827", "loss_scale": "4", "train_wall": "36", "gb_free": "13.7", "wall": "41707"} [2023-11-02 05:10:37,246][train_inner][INFO] - {"epoch": 55, "update": 54.589, "loss": "2.972", "ntokens": "3233.4", "nsentences": "43.4", "prob_perplexity": "189.734", "code_perplexity": "186.904", "temp": "0.661", "loss_0": "2.862", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.51716", "wps": "17789.2", "ups": "5.5", "wpb": "3233.4", "bsz": "43.4", "num_updates": "221400", "lr": "4.52152e-05", "gnorm": "0.819", "loss_scale": "4", "train_wall": "36", "gb_free": "14.9", "wall": "41743"} [2023-11-02 05:11:13,453][train_inner][INFO] - {"epoch": 55, "update": 54.639, "loss": "2.931", "ntokens": "3198.92", "nsentences": "41.96", "prob_perplexity": "189.725", "code_perplexity": "187.003", "temp": "0.661", "loss_0": "2.821", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.52324", "wps": "17671.4", "ups": "5.52", "wpb": "3198.9", "bsz": "42", "num_updates": "221600", "lr": "4.51646e-05", "gnorm": "0.827", "loss_scale": "4", "train_wall": "36", "gb_free": "14.7", "wall": "41780"} [2023-11-02 05:11:49,788][train_inner][INFO] - {"epoch": 55, "update": 54.688, "loss": "2.864", "ntokens": "3143.52", "nsentences": "45.08", "prob_perplexity": "190.163", "code_perplexity": "187.295", "temp": "0.66", "loss_0": "2.754", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.53839", "wps": "17303.8", "ups": "5.5", "wpb": "3143.5", "bsz": "45.1", "num_updates": "221800", "lr": "4.51139e-05", "gnorm": "0.833", "loss_scale": "4", "train_wall": "36", "gb_free": "14.7", "wall": "41816"} [2023-11-02 05:12:26,343][train_inner][INFO] - {"epoch": 55, "update": 54.737, "loss": "2.863", "ntokens": "3188.92", "nsentences": "45.56", "prob_perplexity": "190.982", "code_perplexity": "188.22", "temp": "0.659", "loss_0": "2.753", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.53829", "wps": "17448.4", "ups": "5.47", "wpb": "3188.9", "bsz": "45.6", "num_updates": "222000", "lr": "4.50633e-05", "gnorm": "0.818", "loss_scale": "4", "train_wall": "36", "gb_free": "13.8", "wall": "41853"} [2023-11-02 05:13:02,764][train_inner][INFO] - {"epoch": 55, "update": 54.786, "loss": "2.884", "ntokens": "3212.48", "nsentences": "46.12", "prob_perplexity": "190.173", "code_perplexity": "187.355", "temp": "0.659", "loss_0": "2.774", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.53486", "wps": "17642.4", "ups": "5.49", "wpb": "3212.5", "bsz": "46.1", "num_updates": "222200", "lr": "4.50127e-05", "gnorm": "0.816", "loss_scale": "4", "train_wall": "36", "gb_free": "13.8", "wall": "41889"} [2023-11-02 05:13:39,372][train_inner][INFO] - {"epoch": 55, "update": 54.836, "loss": "2.925", "ntokens": "3211.36", "nsentences": "43.88", "prob_perplexity": "191.155", "code_perplexity": "188.387", "temp": "0.658", "loss_0": "2.815", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.52625", "wps": "17545.4", "ups": "5.46", "wpb": "3211.4", "bsz": "43.9", "num_updates": "222400", "lr": "4.4962e-05", "gnorm": "0.822", "loss_scale": "4", "train_wall": "36", "gb_free": "13.4", "wall": "41926"} [2023-11-02 05:14:15,790][train_inner][INFO] - {"epoch": 55, "update": 54.885, "loss": "2.952", "ntokens": "3216.24", "nsentences": "42.48", "prob_perplexity": "190.531", "code_perplexity": "187.764", "temp": "0.657", "loss_0": "2.842", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.52012", "wps": "17664", "ups": "5.49", "wpb": "3216.2", "bsz": "42.5", "num_updates": "222600", "lr": "4.49114e-05", "gnorm": "0.827", "loss_scale": "4", "train_wall": "36", "gb_free": "12.5", "wall": "41962"} [2023-11-02 05:14:52,071][train_inner][INFO] - {"epoch": 55, "update": 54.934, "loss": "2.979", "ntokens": "3170.72", "nsentences": "39.96", "prob_perplexity": "189.964", "code_perplexity": "187.153", "temp": "0.657", "loss_0": "2.869", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.51397", "wps": "17479.6", "ups": "5.51", "wpb": "3170.7", "bsz": "40", "num_updates": "222800", "lr": "4.48608e-05", "gnorm": "0.836", "loss_scale": "4", "train_wall": "36", "gb_free": "15.1", "wall": "41998"} [2023-11-02 05:15:28,212][train_inner][INFO] - {"epoch": 55, "update": 54.984, "loss": "2.84", "ntokens": "3194.24", "nsentences": "48.92", "prob_perplexity": "190.375", "code_perplexity": "187.57", "temp": "0.656", "loss_0": "2.731", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.54478", "wps": "17677.6", "ups": "5.53", "wpb": "3194.2", "bsz": "48.9", "num_updates": "223000", "lr": "4.48101e-05", "gnorm": "0.824", "loss_scale": "4", "train_wall": "36", "gb_free": "14.4", "wall": "42034"} [2023-11-02 05:15:40,094][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 05:15:40,095][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 05:15:40,115][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 64 [2023-11-02 05:16:05,907][valid][INFO] - {"epoch": 55, "valid_loss": "2.768", "valid_ntokens": "3161.92", "valid_nsentences": "44.1685", "valid_prob_perplexity": "191.263", "valid_code_perplexity": "188.648", "valid_temp": "0.656", "valid_loss_0": "2.659", "valid_loss_1": "0.101", "valid_loss_2": "0.008", "valid_accuracy": "0.55749", "valid_wps": "55551.5", "valid_wpb": "3161.9", "valid_bsz": "44.2", "valid_num_updates": "223066", "valid_best_loss": "2.761"} [2023-11-02 05:16:05,909][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 55 @ 223066 updates [2023-11-02 05:16:05,911][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 05:16:07,386][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 05:16:07,437][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_last.pt (epoch 55 @ 223066 updates, score 2.768) (writing took 1.5277310879901052 seconds) [2023-11-02 05:16:07,438][fairseq_cli.train][INFO] - end of epoch 55 (average epoch stats below) [2023-11-02 05:16:07,440][train][INFO] - {"epoch": 55, "train_loss": "2.908", "train_ntokens": "3187.97", "train_nsentences": "44.2682", "train_prob_perplexity": "189.977", "train_code_perplexity": "187.189", "train_temp": "0.662", "train_loss_0": "2.798", "train_loss_1": "0.101", "train_loss_2": "0.008", "train_accuracy": "0.52926", "train_wps": "16951", "train_ups": "5.32", "train_wpb": "3188", "train_bsz": "44.3", "train_num_updates": "223066", "train_lr": "4.47934e-05", "train_gnorm": "0.825", "train_loss_scale": "4", "train_train_wall": "722", "train_gb_free": "13.3", "train_wall": "42074"} [2023-11-02 05:16:07,443][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 05:16:07,464][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 56 [2023-11-02 05:16:07,639][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 05:16:07,690][fairseq.trainer][INFO] - begin training epoch 56 [2023-11-02 05:16:07,691][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 05:16:32,089][train_inner][INFO] - {"epoch": 56, "update": 55.033, "loss": "2.957", "ntokens": "3182.16", "nsentences": "42.76", "prob_perplexity": "190.633", "code_perplexity": "187.859", "temp": "0.656", "loss_0": "2.848", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.52007", "wps": "9963.7", "ups": "3.13", "wpb": "3182.2", "bsz": "42.8", "num_updates": "223200", "lr": "4.47595e-05", "gnorm": "0.828", "loss_scale": "4", "train_wall": "36", "gb_free": "13.4", "wall": "42098"} [2023-11-02 05:17:08,344][train_inner][INFO] - {"epoch": 56, "update": 55.082, "loss": "2.901", "ntokens": "3184.44", "nsentences": "44.04", "prob_perplexity": "190.492", "code_perplexity": "187.699", "temp": "0.655", "loss_0": "2.792", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.53024", "wps": "17568.2", "ups": "5.52", "wpb": "3184.4", "bsz": "44", "num_updates": "223400", "lr": "4.47089e-05", "gnorm": "0.83", "loss_scale": "4", "train_wall": "36", "gb_free": "13.8", "wall": "42135"} [2023-11-02 05:17:44,437][train_inner][INFO] - {"epoch": 56, "update": 55.132, "loss": "2.839", "ntokens": "3170.92", "nsentences": "47.28", "prob_perplexity": "190.194", "code_perplexity": "187.413", "temp": "0.654", "loss_0": "2.729", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.54374", "wps": "17572.1", "ups": "5.54", "wpb": "3170.9", "bsz": "47.3", "num_updates": "223600", "lr": "4.46582e-05", "gnorm": "0.823", "loss_scale": "4", "train_wall": "35", "gb_free": "13.9", "wall": "42171"} [2023-11-02 05:18:20,469][train_inner][INFO] - {"epoch": 56, "update": 55.181, "loss": "2.866", "ntokens": "3167.48", "nsentences": "45.04", "prob_perplexity": "189.623", "code_perplexity": "186.76", "temp": "0.654", "loss_0": "2.757", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.53662", "wps": "17582.5", "ups": "5.55", "wpb": "3167.5", "bsz": "45", "num_updates": "223800", "lr": "4.46076e-05", "gnorm": "0.837", "loss_scale": "4", "train_wall": "35", "gb_free": "14.1", "wall": "42207"} [2023-11-02 05:18:56,516][train_inner][INFO] - {"epoch": 56, "update": 55.23, "loss": "2.893", "ntokens": "3167.16", "nsentences": "43.88", "prob_perplexity": "190.446", "code_perplexity": "187.593", "temp": "0.653", "loss_0": "2.783", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.53179", "wps": "17573.7", "ups": "5.55", "wpb": "3167.2", "bsz": "43.9", "num_updates": "224000", "lr": "4.4557e-05", "gnorm": "0.839", "loss_scale": "4", "train_wall": "35", "gb_free": "14.9", "wall": "42243"} [2023-11-02 05:19:31,886][train_inner][INFO] - {"epoch": 56, "update": 55.28, "loss": "2.859", "ntokens": "3153.32", "nsentences": "45.28", "prob_perplexity": "190.887", "code_perplexity": "188.08", "temp": "0.652", "loss_0": "2.749", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.53907", "wps": "17831.4", "ups": "5.65", "wpb": "3153.3", "bsz": "45.3", "num_updates": "224200", "lr": "4.45063e-05", "gnorm": "0.829", "loss_scale": "4", "train_wall": "35", "gb_free": "13.2", "wall": "42278"} [2023-11-02 05:20:07,547][train_inner][INFO] - {"epoch": 56, "update": 55.329, "loss": "2.951", "ntokens": "3180.08", "nsentences": "40.64", "prob_perplexity": "190.645", "code_perplexity": "187.798", "temp": "0.652", "loss_0": "2.841", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.51875", "wps": "17836.1", "ups": "5.61", "wpb": "3180.1", "bsz": "40.6", "num_updates": "224400", "lr": "4.44557e-05", "gnorm": "0.836", "loss_scale": "4", "train_wall": "35", "gb_free": "13.4", "wall": "42314"} [2023-11-02 05:20:44,014][train_inner][INFO] - {"epoch": 56, "update": 55.378, "loss": "2.915", "ntokens": "3237.68", "nsentences": "43.84", "prob_perplexity": "191.348", "code_perplexity": "188.596", "temp": "0.651", "loss_0": "2.806", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.52712", "wps": "17757.9", "ups": "5.48", "wpb": "3237.7", "bsz": "43.8", "num_updates": "224600", "lr": "4.44051e-05", "gnorm": "0.823", "loss_scale": "4", "train_wall": "36", "gb_free": "13.6", "wall": "42350"} [2023-11-02 05:21:19,989][train_inner][INFO] - {"epoch": 56, "update": 55.428, "loss": "2.876", "ntokens": "3149.4", "nsentences": "44.84", "prob_perplexity": "190.191", "code_perplexity": "187.421", "temp": "0.65", "loss_0": "2.766", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.53532", "wps": "17510.2", "ups": "5.56", "wpb": "3149.4", "bsz": "44.8", "num_updates": "224800", "lr": "4.43544e-05", "gnorm": "0.836", "loss_scale": "4", "train_wall": "35", "gb_free": "13.6", "wall": "42386"} [2023-11-02 05:21:56,563][train_inner][INFO] - {"epoch": 56, "update": 55.477, "loss": "2.916", "ntokens": "3200.16", "nsentences": "43.6", "prob_perplexity": "190.796", "code_perplexity": "188.049", "temp": "0.65", "loss_0": "2.807", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.52774", "wps": "17508.2", "ups": "5.47", "wpb": "3200.2", "bsz": "43.6", "num_updates": "225000", "lr": "4.43038e-05", "gnorm": "0.828", "loss_scale": "4", "train_wall": "36", "gb_free": "13.8", "wall": "42423"} [2023-11-02 05:21:56,564][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 05:21:56,566][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 05:21:56,590][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 65 [2023-11-02 05:22:22,250][valid][INFO] - {"epoch": 56, "valid_loss": "2.77", "valid_ntokens": "3176.51", "valid_nsentences": "44.1685", "valid_prob_perplexity": "190.063", "valid_code_perplexity": "187.411", "valid_temp": "0.649", "valid_loss_0": "2.661", "valid_loss_1": "0.101", "valid_loss_2": "0.008", "valid_accuracy": "0.55678", "valid_wps": "56196.7", "valid_wpb": "3176.5", "valid_bsz": "44.2", "valid_num_updates": "225000", "valid_best_loss": "2.761"} [2023-11-02 05:22:22,252][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 56 @ 225000 updates [2023-11-02 05:22:22,254][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_56_225000.pt [2023-11-02 05:22:23,651][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_56_225000.pt [2023-11-02 05:22:24,659][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_56_225000.pt (epoch 56 @ 225000 updates, score 2.77) (writing took 2.406890123616904 seconds) [2023-11-02 05:23:01,402][train_inner][INFO] - {"epoch": 56, "update": 55.526, "loss": "2.929", "ntokens": "3187.76", "nsentences": "42.64", "prob_perplexity": "191.177", "code_perplexity": "188.285", "temp": "0.649", "loss_0": "2.819", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.52488", "wps": "9833.2", "ups": "3.08", "wpb": "3187.8", "bsz": "42.6", "num_updates": "225200", "lr": "4.42532e-05", "gnorm": "0.841", "loss_scale": "4", "train_wall": "36", "gb_free": "14", "wall": "42488"} [2023-11-02 05:23:37,511][train_inner][INFO] - {"epoch": 56, "update": 55.575, "loss": "2.924", "ntokens": "3172.48", "nsentences": "43.36", "prob_perplexity": "190.6", "code_perplexity": "187.733", "temp": "0.648", "loss_0": "2.814", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.52704", "wps": "17572.7", "ups": "5.54", "wpb": "3172.5", "bsz": "43.4", "num_updates": "225400", "lr": "4.42025e-05", "gnorm": "0.838", "loss_scale": "4", "train_wall": "35", "gb_free": "13.7", "wall": "42524"} [2023-11-02 05:24:13,678][train_inner][INFO] - {"epoch": 56, "update": 55.625, "loss": "2.938", "ntokens": "3197.52", "nsentences": "43.96", "prob_perplexity": "191.711", "code_perplexity": "188.938", "temp": "0.648", "loss_0": "2.828", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.52448", "wps": "17683", "ups": "5.53", "wpb": "3197.5", "bsz": "44", "num_updates": "225600", "lr": "4.41519e-05", "gnorm": "0.822", "loss_scale": "4", "train_wall": "36", "gb_free": "12.7", "wall": "42560"} [2023-11-02 05:24:49,743][train_inner][INFO] - {"epoch": 56, "update": 55.674, "loss": "2.881", "ntokens": "3178.4", "nsentences": "43.92", "prob_perplexity": "190.729", "code_perplexity": "187.938", "temp": "0.647", "loss_0": "2.772", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.53317", "wps": "17627", "ups": "5.55", "wpb": "3178.4", "bsz": "43.9", "num_updates": "225800", "lr": "4.41013e-05", "gnorm": "0.836", "loss_scale": "4", "train_wall": "35", "gb_free": "14.3", "wall": "42596"} [2023-11-02 05:25:26,099][train_inner][INFO] - {"epoch": 56, "update": 55.723, "loss": "2.889", "ntokens": "3166.36", "nsentences": "45.36", "prob_perplexity": "191.109", "code_perplexity": "188.297", "temp": "0.646", "loss_0": "2.78", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.53457", "wps": "17419.9", "ups": "5.5", "wpb": "3166.4", "bsz": "45.4", "num_updates": "226000", "lr": "4.40506e-05", "gnorm": "0.828", "loss_scale": "4", "train_wall": "36", "gb_free": "13.5", "wall": "42632"} [2023-11-02 05:26:03,537][train_inner][INFO] - {"epoch": 56, "update": 55.773, "loss": "2.94", "ntokens": "3239.32", "nsentences": "42.76", "prob_perplexity": "191.73", "code_perplexity": "188.96", "temp": "0.646", "loss_0": "2.831", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.52201", "wps": "17306", "ups": "5.34", "wpb": "3239.3", "bsz": "42.8", "num_updates": "226200", "lr": "4.4e-05", "gnorm": "0.826", "loss_scale": "4", "train_wall": "37", "gb_free": "11.8", "wall": "42670"} [2023-11-02 05:26:39,358][train_inner][INFO] - {"epoch": 56, "update": 55.822, "loss": "2.901", "ntokens": "3217.72", "nsentences": "44", "prob_perplexity": "192.557", "code_perplexity": "189.696", "temp": "0.645", "loss_0": "2.792", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.52963", "wps": "17966.4", "ups": "5.58", "wpb": "3217.7", "bsz": "44", "num_updates": "226400", "lr": "4.39494e-05", "gnorm": "0.824", "loss_scale": "4", "train_wall": "35", "gb_free": "12.9", "wall": "42706"} [2023-11-02 05:27:15,773][train_inner][INFO] - {"epoch": 56, "update": 55.871, "loss": "2.875", "ntokens": "3180", "nsentences": "43.92", "prob_perplexity": "191.792", "code_perplexity": "189.017", "temp": "0.644", "loss_0": "2.766", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.53395", "wps": "17466.6", "ups": "5.49", "wpb": "3180", "bsz": "43.9", "num_updates": "226600", "lr": "4.38987e-05", "gnorm": "0.831", "loss_scale": "4", "train_wall": "36", "gb_free": "13.4", "wall": "42742"} [2023-11-02 05:27:52,267][train_inner][INFO] - {"epoch": 56, "update": 55.921, "loss": "2.891", "ntokens": "3262.8", "nsentences": "46.44", "prob_perplexity": "192.008", "code_perplexity": "189.275", "temp": "0.644", "loss_0": "2.782", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.53247", "wps": "17882.3", "ups": "5.48", "wpb": "3262.8", "bsz": "46.4", "num_updates": "226800", "lr": "4.38481e-05", "gnorm": "0.818", "loss_scale": "4", "train_wall": "36", "gb_free": "14.7", "wall": "42778"} [2023-11-02 05:28:28,981][train_inner][INFO] - {"epoch": 56, "update": 55.97, "loss": "2.846", "ntokens": "3169.68", "nsentences": "45.2", "prob_perplexity": "190.962", "code_perplexity": "188.242", "temp": "0.643", "loss_0": "2.737", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.54002", "wps": "17267.9", "ups": "5.45", "wpb": "3169.7", "bsz": "45.2", "num_updates": "227000", "lr": "4.37975e-05", "gnorm": "0.85", "loss_scale": "4", "train_wall": "36", "gb_free": "14.3", "wall": "42815"} [2023-11-02 05:28:50,948][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 05:28:50,950][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 05:28:50,968][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 66 [2023-11-02 05:29:16,546][valid][INFO] - {"epoch": 56, "valid_loss": "2.754", "valid_ntokens": "3158.05", "valid_nsentences": "44.1685", "valid_prob_perplexity": "191.31", "valid_code_perplexity": "188.655", "valid_temp": "0.642", "valid_loss_0": "2.645", "valid_loss_1": "0.101", "valid_loss_2": "0.008", "valid_accuracy": "0.55943", "valid_wps": "56035.4", "valid_wpb": "3158", "valid_bsz": "44.2", "valid_num_updates": "227122", "valid_best_loss": "2.754"} [2023-11-02 05:29:16,548][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 56 @ 227122 updates [2023-11-02 05:29:16,550][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-02 05:29:17,971][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-02 05:29:18,969][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_best.pt (epoch 56 @ 227122 updates, score 2.754) (writing took 2.4205240039154887 seconds) [2023-11-02 05:29:18,969][fairseq_cli.train][INFO] - end of epoch 56 (average epoch stats below) [2023-11-02 05:29:18,972][train][INFO] - {"epoch": 56, "train_loss": "2.897", "train_ntokens": "3189.34", "train_nsentences": "44.2682", "train_prob_perplexity": "191.018", "train_code_perplexity": "188.219", "train_temp": "0.649", "train_loss_0": "2.788", "train_loss_1": "0.101", "train_loss_2": "0.008", "train_accuracy": "0.53113", "train_wps": "16343", "train_ups": "5.12", "train_wpb": "3189.3", "train_bsz": "44.3", "train_num_updates": "227122", "train_lr": "4.37666e-05", "train_gnorm": "0.831", "train_loss_scale": "4", "train_train_wall": "722", "train_gb_free": "13.2", "train_wall": "42865"} [2023-11-02 05:29:18,975][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 05:29:18,993][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 57 [2023-11-02 05:29:19,196][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 05:29:19,244][fairseq.trainer][INFO] - begin training epoch 57 [2023-11-02 05:29:19,245][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 05:29:33,557][train_inner][INFO] - {"epoch": 57, "update": 56.019, "loss": "2.832", "ntokens": "3198.16", "nsentences": "48.04", "prob_perplexity": "191.659", "code_perplexity": "188.904", "temp": "0.643", "loss_0": "2.723", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.54572", "wps": "9905.6", "ups": "3.1", "wpb": "3198.2", "bsz": "48", "num_updates": "227200", "lr": "4.37468e-05", "gnorm": "0.82", "loss_scale": "4", "train_wall": "36", "gb_free": "14.3", "wall": "42880"} [2023-11-02 05:30:09,814][train_inner][INFO] - {"epoch": 57, "update": 56.069, "loss": "2.856", "ntokens": "3213.84", "nsentences": "46.76", "prob_perplexity": "192.394", "code_perplexity": "189.746", "temp": "0.642", "loss_0": "2.747", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.54047", "wps": "17729.2", "ups": "5.52", "wpb": "3213.8", "bsz": "46.8", "num_updates": "227400", "lr": "4.36962e-05", "gnorm": "0.825", "loss_scale": "4", "train_wall": "36", "gb_free": "14.3", "wall": "42916"} [2023-11-02 05:30:45,752][train_inner][INFO] - {"epoch": 57, "update": 56.118, "loss": "2.889", "ntokens": "3175.36", "nsentences": "44.4", "prob_perplexity": "191.22", "code_perplexity": "188.407", "temp": "0.641", "loss_0": "2.78", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.53218", "wps": "17672.3", "ups": "5.57", "wpb": "3175.4", "bsz": "44.4", "num_updates": "227600", "lr": "4.36456e-05", "gnorm": "0.837", "loss_scale": "4", "train_wall": "35", "gb_free": "13.1", "wall": "42952"} [2023-11-02 05:31:22,518][train_inner][INFO] - {"epoch": 57, "update": 56.167, "loss": "2.92", "ntokens": "3205.84", "nsentences": "42.24", "prob_perplexity": "190.967", "code_perplexity": "188.148", "temp": "0.641", "loss_0": "2.811", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.52431", "wps": "17440.3", "ups": "5.44", "wpb": "3205.8", "bsz": "42.2", "num_updates": "227800", "lr": "4.35949e-05", "gnorm": "0.829", "loss_scale": "4", "train_wall": "36", "gb_free": "13.4", "wall": "42989"} [2023-11-02 05:31:58,580][train_inner][INFO] - {"epoch": 57, "update": 56.216, "loss": "2.884", "ntokens": "3173.8", "nsentences": "44.24", "prob_perplexity": "191.148", "code_perplexity": "188.389", "temp": "0.64", "loss_0": "2.774", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.53354", "wps": "17603.2", "ups": "5.55", "wpb": "3173.8", "bsz": "44.2", "num_updates": "228000", "lr": "4.35443e-05", "gnorm": "0.84", "loss_scale": "4", "train_wall": "35", "gb_free": "14.9", "wall": "43025"} [2023-11-02 05:32:34,724][train_inner][INFO] - {"epoch": 57, "update": 56.266, "loss": "2.891", "ntokens": "3185.12", "nsentences": "44.24", "prob_perplexity": "191.759", "code_perplexity": "188.924", "temp": "0.639", "loss_0": "2.781", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.53209", "wps": "17625.5", "ups": "5.53", "wpb": "3185.1", "bsz": "44.2", "num_updates": "228200", "lr": "4.34937e-05", "gnorm": "0.833", "loss_scale": "4", "train_wall": "35", "gb_free": "13.6", "wall": "43061"} [2023-11-02 05:33:10,908][train_inner][INFO] - {"epoch": 57, "update": 56.315, "loss": "2.928", "ntokens": "3210.76", "nsentences": "43.56", "prob_perplexity": "191.809", "code_perplexity": "188.959", "temp": "0.639", "loss_0": "2.818", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.52559", "wps": "17748.1", "ups": "5.53", "wpb": "3210.8", "bsz": "43.6", "num_updates": "228400", "lr": "4.3443e-05", "gnorm": "0.834", "loss_scale": "4", "train_wall": "36", "gb_free": "16.5", "wall": "43097"} [2023-11-02 05:33:47,352][train_inner][INFO] - {"epoch": 57, "update": 56.364, "loss": "2.941", "ntokens": "3209.76", "nsentences": "41.84", "prob_perplexity": "191.583", "code_perplexity": "188.733", "temp": "0.638", "loss_0": "2.831", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.52112", "wps": "17616", "ups": "5.49", "wpb": "3209.8", "bsz": "41.8", "num_updates": "228600", "lr": "4.33924e-05", "gnorm": "0.836", "loss_scale": "4", "train_wall": "36", "gb_free": "13", "wall": "43134"} [2023-11-02 05:34:23,598][train_inner][INFO] - {"epoch": 57, "update": 56.414, "loss": "2.896", "ntokens": "3203.2", "nsentences": "45", "prob_perplexity": "191.614", "code_perplexity": "188.824", "temp": "0.637", "loss_0": "2.787", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.53213", "wps": "17675.9", "ups": "5.52", "wpb": "3203.2", "bsz": "45", "num_updates": "228800", "lr": "4.33418e-05", "gnorm": "0.833", "loss_scale": "4", "train_wall": "36", "gb_free": "14.1", "wall": "43170"} [2023-11-02 05:34:59,876][train_inner][INFO] - {"epoch": 57, "update": 56.463, "loss": "2.937", "ntokens": "3190.84", "nsentences": "43.36", "prob_perplexity": "191.842", "code_perplexity": "188.999", "temp": "0.637", "loss_0": "2.828", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.52435", "wps": "17591.6", "ups": "5.51", "wpb": "3190.8", "bsz": "43.4", "num_updates": "229000", "lr": "4.32911e-05", "gnorm": "0.838", "loss_scale": "4", "train_wall": "36", "gb_free": "13.1", "wall": "43206"} [2023-11-02 05:35:36,410][train_inner][INFO] - {"epoch": 57, "update": 56.512, "loss": "2.948", "ntokens": "3211.2", "nsentences": "41.24", "prob_perplexity": "192.192", "code_perplexity": "189.378", "temp": "0.636", "loss_0": "2.839", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.51909", "wps": "17587.5", "ups": "5.48", "wpb": "3211.2", "bsz": "41.2", "num_updates": "229200", "lr": "4.32405e-05", "gnorm": "0.839", "loss_scale": "4", "train_wall": "36", "gb_free": "12.8", "wall": "43243"} [2023-11-02 05:36:12,340][train_inner][INFO] - {"epoch": 57, "update": 56.562, "loss": "2.903", "ntokens": "3186.28", "nsentences": "43.48", "prob_perplexity": "192.168", "code_perplexity": "189.356", "temp": "0.635", "loss_0": "2.794", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.52993", "wps": "17737.3", "ups": "5.57", "wpb": "3186.3", "bsz": "43.5", "num_updates": "229400", "lr": "4.31899e-05", "gnorm": "0.839", "loss_scale": "4", "train_wall": "35", "gb_free": "13.8", "wall": "43279"} [2023-11-02 05:36:48,154][train_inner][INFO] - {"epoch": 57, "update": 56.611, "loss": "2.814", "ntokens": "3163.36", "nsentences": "47.2", "prob_perplexity": "192.198", "code_perplexity": "189.247", "temp": "0.635", "loss_0": "2.705", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.54822", "wps": "17666.5", "ups": "5.58", "wpb": "3163.4", "bsz": "47.2", "num_updates": "229600", "lr": "4.31392e-05", "gnorm": "0.836", "loss_scale": "4", "train_wall": "35", "gb_free": "13.7", "wall": "43314"} [2023-11-02 05:37:24,124][train_inner][INFO] - {"epoch": 57, "update": 56.66, "loss": "2.876", "ntokens": "3186", "nsentences": "45.2", "prob_perplexity": "192.516", "code_perplexity": "189.691", "temp": "0.634", "loss_0": "2.767", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.5351", "wps": "17715.7", "ups": "5.56", "wpb": "3186", "bsz": "45.2", "num_updates": "229800", "lr": "4.30886e-05", "gnorm": "0.83", "loss_scale": "4", "train_wall": "35", "gb_free": "13", "wall": "43350"} [2023-11-02 05:38:00,504][train_inner][INFO] - {"epoch": 57, "update": 56.71, "loss": "2.848", "ntokens": "3188", "nsentences": "47.16", "prob_perplexity": "193.271", "code_perplexity": "190.443", "temp": "0.634", "loss_0": "2.739", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.54256", "wps": "17527.2", "ups": "5.5", "wpb": "3188", "bsz": "47.2", "num_updates": "230000", "lr": "4.3038e-05", "gnorm": "0.827", "loss_scale": "4", "train_wall": "36", "gb_free": "13.4", "wall": "43387"} [2023-11-02 05:38:36,917][train_inner][INFO] - {"epoch": 57, "update": 56.759, "loss": "2.891", "ntokens": "3196", "nsentences": "43.52", "prob_perplexity": "192.625", "code_perplexity": "189.823", "temp": "0.633", "loss_0": "2.782", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.53041", "wps": "17555.1", "ups": "5.49", "wpb": "3196", "bsz": "43.5", "num_updates": "230200", "lr": "4.29873e-05", "gnorm": "0.839", "loss_scale": "4", "train_wall": "36", "gb_free": "14.6", "wall": "43423"} [2023-11-02 05:39:13,438][train_inner][INFO] - {"epoch": 57, "update": 56.808, "loss": "2.976", "ntokens": "3204.08", "nsentences": "40.88", "prob_perplexity": "191.431", "code_perplexity": "188.686", "temp": "0.632", "loss_0": "2.867", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.51406", "wps": "17547.7", "ups": "5.48", "wpb": "3204.1", "bsz": "40.9", "num_updates": "230400", "lr": "4.29367e-05", "gnorm": "0.845", "loss_scale": "4", "train_wall": "36", "gb_free": "14.6", "wall": "43460"} [2023-11-02 05:39:49,873][train_inner][INFO] - {"epoch": 57, "update": 56.857, "loss": "2.875", "ntokens": "3181.84", "nsentences": "46.08", "prob_perplexity": "191.909", "code_perplexity": "189.067", "temp": "0.632", "loss_0": "2.766", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.53671", "wps": "17466.8", "ups": "5.49", "wpb": "3181.8", "bsz": "46.1", "num_updates": "230600", "lr": "4.28861e-05", "gnorm": "0.835", "loss_scale": "8", "train_wall": "36", "gb_free": "12.5", "wall": "43496"} [2023-11-02 05:40:03,322][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 4.0 [2023-11-02 05:40:26,262][train_inner][INFO] - {"epoch": 57, "update": 56.907, "loss": "2.94", "ntokens": "3175.24", "nsentences": "41.92", "prob_perplexity": "191.782", "code_perplexity": "188.968", "temp": "0.631", "loss_0": "2.83", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.52201", "wps": "17452.5", "ups": "5.5", "wpb": "3175.2", "bsz": "41.9", "num_updates": "230800", "lr": "4.28354e-05", "gnorm": "0.847", "loss_scale": "4", "train_wall": "36", "gb_free": "13.2", "wall": "43532"} [2023-11-02 05:41:02,926][train_inner][INFO] - {"epoch": 57, "update": 56.956, "loss": "2.926", "ntokens": "3203.96", "nsentences": "44.08", "prob_perplexity": "192.823", "code_perplexity": "190.054", "temp": "0.63", "loss_0": "2.817", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.52616", "wps": "17478.5", "ups": "5.46", "wpb": "3204", "bsz": "44.1", "num_updates": "231000", "lr": "4.27848e-05", "gnorm": "0.834", "loss_scale": "4", "train_wall": "36", "gb_free": "13.2", "wall": "43569"} [2023-11-02 05:41:34,905][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 05:41:34,907][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 05:41:34,926][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 67 [2023-11-02 05:42:00,676][valid][INFO] - {"epoch": 57, "valid_loss": "2.73", "valid_ntokens": "3158.72", "valid_nsentences": "44.1685", "valid_prob_perplexity": "191.359", "valid_code_perplexity": "188.778", "valid_temp": "0.63", "valid_loss_0": "2.621", "valid_loss_1": "0.101", "valid_loss_2": "0.008", "valid_accuracy": "0.56405", "valid_wps": "55645.5", "valid_wpb": "3158.7", "valid_bsz": "44.2", "valid_num_updates": "231177", "valid_best_loss": "2.73"} [2023-11-02 05:42:00,678][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 57 @ 231177 updates [2023-11-02 05:42:00,680][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-02 05:42:02,068][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-02 05:42:03,065][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_best.pt (epoch 57 @ 231177 updates, score 2.73) (writing took 2.3870128109119833 seconds) [2023-11-02 05:42:03,066][fairseq_cli.train][INFO] - end of epoch 57 (average epoch stats below) [2023-11-02 05:42:03,069][train][INFO] - {"epoch": 57, "train_loss": "2.897", "train_ntokens": "3192.39", "train_nsentences": "44.2772", "train_prob_perplexity": "191.965", "train_code_perplexity": "189.158", "train_temp": "0.636", "train_loss_0": "2.788", "train_loss_1": "0.101", "train_loss_2": "0.008", "train_accuracy": "0.53112", "train_wps": "16941.8", "train_ups": "5.31", "train_wpb": "3192.4", "train_bsz": "44.3", "train_num_updates": "231177", "train_lr": "4.274e-05", "train_gnorm": "0.835", "train_loss_scale": "4", "train_train_wall": "723", "train_gb_free": "14.5", "train_wall": "43629"} [2023-11-02 05:42:03,072][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 05:42:03,092][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 58 [2023-11-02 05:42:03,270][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 05:42:03,321][fairseq.trainer][INFO] - begin training epoch 58 [2023-11-02 05:42:03,322][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 05:42:07,517][train_inner][INFO] - {"epoch": 58, "update": 57.006, "loss": "2.82", "ntokens": "3173.12", "nsentences": "48.16", "prob_perplexity": "192.501", "code_perplexity": "189.747", "temp": "0.63", "loss_0": "2.711", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.54865", "wps": "9825.7", "ups": "3.1", "wpb": "3173.1", "bsz": "48.2", "num_updates": "231200", "lr": "4.27342e-05", "gnorm": "0.825", "loss_scale": "4", "train_wall": "35", "gb_free": "15.2", "wall": "43634"} [2023-11-02 05:42:43,785][train_inner][INFO] - {"epoch": 58, "update": 57.055, "loss": "2.93", "ntokens": "3196.12", "nsentences": "43.8", "prob_perplexity": "192.279", "code_perplexity": "189.471", "temp": "0.629", "loss_0": "2.821", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.52426", "wps": "17625.8", "ups": "5.51", "wpb": "3196.1", "bsz": "43.8", "num_updates": "231400", "lr": "4.26835e-05", "gnorm": "0.836", "loss_scale": "4", "train_wall": "36", "gb_free": "13.7", "wall": "43670"} [2023-11-02 05:43:19,758][train_inner][INFO] - {"epoch": 58, "update": 57.104, "loss": "2.866", "ntokens": "3147.96", "nsentences": "43.36", "prob_perplexity": "192.529", "code_perplexity": "189.701", "temp": "0.629", "loss_0": "2.757", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.53483", "wps": "17502.8", "ups": "5.56", "wpb": "3148", "bsz": "43.4", "num_updates": "231600", "lr": "4.26329e-05", "gnorm": "0.842", "loss_scale": "4", "train_wall": "35", "gb_free": "14.1", "wall": "43706"} [2023-11-02 05:43:56,243][train_inner][INFO] - {"epoch": 58, "update": 57.154, "loss": "2.948", "ntokens": "3238.2", "nsentences": "41.96", "prob_perplexity": "191.9", "code_perplexity": "189.074", "temp": "0.628", "loss_0": "2.839", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.51994", "wps": "17751.8", "ups": "5.48", "wpb": "3238.2", "bsz": "42", "num_updates": "231800", "lr": "4.25823e-05", "gnorm": "0.833", "loss_scale": "4", "train_wall": "36", "gb_free": "14", "wall": "43742"} [2023-11-02 05:44:32,134][train_inner][INFO] - {"epoch": 58, "update": 57.203, "loss": "2.909", "ntokens": "3165.48", "nsentences": "43.28", "prob_perplexity": "191.828", "code_perplexity": "188.98", "temp": "0.627", "loss_0": "2.8", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.52734", "wps": "17640.7", "ups": "5.57", "wpb": "3165.5", "bsz": "43.3", "num_updates": "232000", "lr": "4.25316e-05", "gnorm": "0.852", "loss_scale": "4", "train_wall": "35", "gb_free": "15.4", "wall": "43778"} [2023-11-02 05:45:08,239][train_inner][INFO] - {"epoch": 58, "update": 57.252, "loss": "2.952", "ntokens": "3191.48", "nsentences": "42.4", "prob_perplexity": "193.001", "code_perplexity": "190.21", "temp": "0.627", "loss_0": "2.843", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.52005", "wps": "17680", "ups": "5.54", "wpb": "3191.5", "bsz": "42.4", "num_updates": "232200", "lr": "4.2481e-05", "gnorm": "0.91", "loss_scale": "4", "train_wall": "35", "gb_free": "13.6", "wall": "43814"} [2023-11-02 05:45:44,517][train_inner][INFO] - {"epoch": 58, "update": 57.302, "loss": "2.86", "ntokens": "3220.36", "nsentences": "45.96", "prob_perplexity": "193.589", "code_perplexity": "190.793", "temp": "0.626", "loss_0": "2.751", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.5382", "wps": "17754.9", "ups": "5.51", "wpb": "3220.4", "bsz": "46", "num_updates": "232400", "lr": "4.24304e-05", "gnorm": "0.843", "loss_scale": "4", "train_wall": "36", "gb_free": "14", "wall": "43851"} [2023-11-02 05:46:20,829][train_inner][INFO] - {"epoch": 58, "update": 57.351, "loss": "2.957", "ntokens": "3193.16", "nsentences": "42.2", "prob_perplexity": "192.283", "code_perplexity": "189.418", "temp": "0.625", "loss_0": "2.847", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.51977", "wps": "17588.2", "ups": "5.51", "wpb": "3193.2", "bsz": "42.2", "num_updates": "232600", "lr": "4.23797e-05", "gnorm": "0.848", "loss_scale": "4", "train_wall": "36", "gb_free": "13.3", "wall": "43887"} [2023-11-02 05:46:57,617][train_inner][INFO] - {"epoch": 58, "update": 57.4, "loss": "2.844", "ntokens": "3193.2", "nsentences": "45.28", "prob_perplexity": "194.062", "code_perplexity": "191.192", "temp": "0.625", "loss_0": "2.735", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.54095", "wps": "17360.8", "ups": "5.44", "wpb": "3193.2", "bsz": "45.3", "num_updates": "232800", "lr": "4.23291e-05", "gnorm": "0.86", "loss_scale": "4", "train_wall": "36", "gb_free": "13.3", "wall": "43924"} [2023-11-02 05:47:34,368][train_inner][INFO] - {"epoch": 58, "update": 57.449, "loss": "2.931", "ntokens": "3221.2", "nsentences": "42.44", "prob_perplexity": "192.997", "code_perplexity": "190.216", "temp": "0.624", "loss_0": "2.822", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.52281", "wps": "17531.3", "ups": "5.44", "wpb": "3221.2", "bsz": "42.4", "num_updates": "233000", "lr": "4.22785e-05", "gnorm": "0.842", "loss_scale": "4", "train_wall": "36", "gb_free": "14.3", "wall": "43961"} [2023-11-02 05:48:11,330][train_inner][INFO] - {"epoch": 58, "update": 57.499, "loss": "2.907", "ntokens": "3177.56", "nsentences": "43.12", "prob_perplexity": "192.745", "code_perplexity": "189.942", "temp": "0.624", "loss_0": "2.798", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.52775", "wps": "17194.4", "ups": "5.41", "wpb": "3177.6", "bsz": "43.1", "num_updates": "233200", "lr": "4.22278e-05", "gnorm": "0.844", "loss_scale": "4", "train_wall": "36", "gb_free": "13.4", "wall": "43998"} [2023-11-02 05:48:47,153][train_inner][INFO] - {"epoch": 58, "update": 57.548, "loss": "2.763", "ntokens": "3126.56", "nsentences": "49.48", "prob_perplexity": "193.27", "code_perplexity": "190.442", "temp": "0.623", "loss_0": "2.654", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.55897", "wps": "17465.1", "ups": "5.59", "wpb": "3126.6", "bsz": "49.5", "num_updates": "233400", "lr": "4.21772e-05", "gnorm": "0.832", "loss_scale": "4", "train_wall": "35", "gb_free": "12.9", "wall": "44033"} [2023-11-02 05:49:23,746][train_inner][INFO] - {"epoch": 58, "update": 57.597, "loss": "2.893", "ntokens": "3223.04", "nsentences": "45.72", "prob_perplexity": "193.229", "code_perplexity": "190.474", "temp": "0.622", "loss_0": "2.784", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.53277", "wps": "17616.4", "ups": "5.47", "wpb": "3223", "bsz": "45.7", "num_updates": "233600", "lr": "4.21266e-05", "gnorm": "0.839", "loss_scale": "4", "train_wall": "36", "gb_free": "13.2", "wall": "44070"} [2023-11-02 05:50:00,154][train_inner][INFO] - {"epoch": 58, "update": 57.647, "loss": "2.87", "ntokens": "3205.68", "nsentences": "45", "prob_perplexity": "193.652", "code_perplexity": "190.876", "temp": "0.622", "loss_0": "2.762", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.53572", "wps": "17610.9", "ups": "5.49", "wpb": "3205.7", "bsz": "45", "num_updates": "233800", "lr": "4.20759e-05", "gnorm": "0.834", "loss_scale": "4", "train_wall": "36", "gb_free": "14", "wall": "44106"} [2023-11-02 05:50:35,870][train_inner][INFO] - {"epoch": 58, "update": 57.696, "loss": "2.862", "ntokens": "3196.2", "nsentences": "44.72", "prob_perplexity": "193.105", "code_perplexity": "190.408", "temp": "0.621", "loss_0": "2.754", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.53731", "wps": "17899.2", "ups": "5.6", "wpb": "3196.2", "bsz": "44.7", "num_updates": "234000", "lr": "4.20253e-05", "gnorm": "0.842", "loss_scale": "4", "train_wall": "35", "gb_free": "13.5", "wall": "44142"} [2023-11-02 05:50:59,725][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 [2023-11-02 05:51:12,289][train_inner][INFO] - {"epoch": 58, "update": 57.746, "loss": "2.914", "ntokens": "3203.4", "nsentences": "43.48", "prob_perplexity": "193.714", "code_perplexity": "190.895", "temp": "0.62", "loss_0": "2.805", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.52718", "wps": "17592.5", "ups": "5.49", "wpb": "3203.4", "bsz": "43.5", "num_updates": "234200", "lr": "4.19747e-05", "gnorm": "0.844", "loss_scale": "2", "train_wall": "36", "gb_free": "13.2", "wall": "44179"} [2023-11-02 05:51:48,576][train_inner][INFO] - {"epoch": 58, "update": 57.795, "loss": "2.893", "ntokens": "3203.16", "nsentences": "44.56", "prob_perplexity": "193.111", "code_perplexity": "190.372", "temp": "0.62", "loss_0": "2.784", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.5318", "wps": "17655.6", "ups": "5.51", "wpb": "3203.2", "bsz": "44.6", "num_updates": "234400", "lr": "4.19241e-05", "gnorm": "0.843", "loss_scale": "2", "train_wall": "36", "gb_free": "14.6", "wall": "44215"} [2023-11-02 05:52:24,651][train_inner][INFO] - {"epoch": 58, "update": 57.844, "loss": "2.855", "ntokens": "3173.16", "nsentences": "44.88", "prob_perplexity": "193.538", "code_perplexity": "190.73", "temp": "0.619", "loss_0": "2.746", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.53862", "wps": "17593.4", "ups": "5.54", "wpb": "3173.2", "bsz": "44.9", "num_updates": "234600", "lr": "4.18734e-05", "gnorm": "0.841", "loss_scale": "2", "train_wall": "35", "gb_free": "13.4", "wall": "44251"} [2023-11-02 05:53:01,125][train_inner][INFO] - {"epoch": 58, "update": 57.893, "loss": "2.829", "ntokens": "3174.48", "nsentences": "46.68", "prob_perplexity": "194.069", "code_perplexity": "191.274", "temp": "0.619", "loss_0": "2.72", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.54433", "wps": "17407.7", "ups": "5.48", "wpb": "3174.5", "bsz": "46.7", "num_updates": "234800", "lr": "4.18228e-05", "gnorm": "0.833", "loss_scale": "2", "train_wall": "36", "gb_free": "14.3", "wall": "44287"} [2023-11-02 05:53:38,197][train_inner][INFO] - {"epoch": 58, "update": 57.943, "loss": "2.918", "ntokens": "3237.44", "nsentences": "43.96", "prob_perplexity": "193.268", "code_perplexity": "190.497", "temp": "0.618", "loss_0": "2.81", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.52704", "wps": "17466.7", "ups": "5.4", "wpb": "3237.4", "bsz": "44", "num_updates": "235000", "lr": "4.17722e-05", "gnorm": "0.848", "loss_scale": "2", "train_wall": "36", "gb_free": "14.6", "wall": "44324"} [2023-11-02 05:54:14,946][train_inner][INFO] - {"epoch": 58, "update": 57.992, "loss": "2.887", "ntokens": "3194.84", "nsentences": "43.68", "prob_perplexity": "193.867", "code_perplexity": "191.063", "temp": "0.617", "loss_0": "2.778", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.53159", "wps": "17388.6", "ups": "5.44", "wpb": "3194.8", "bsz": "43.7", "num_updates": "235200", "lr": "4.17215e-05", "gnorm": "0.844", "loss_scale": "2", "train_wall": "36", "gb_free": "13.9", "wall": "44361"} [2023-11-02 05:54:20,700][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 05:54:20,702][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 05:54:20,723][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 68 [2023-11-02 05:54:46,483][valid][INFO] - {"epoch": 58, "valid_loss": "2.732", "valid_ntokens": "3162.25", "valid_nsentences": "44.1685", "valid_prob_perplexity": "193.068", "valid_code_perplexity": "190.41", "valid_temp": "0.617", "valid_loss_0": "2.624", "valid_loss_1": "0.101", "valid_loss_2": "0.008", "valid_accuracy": "0.5629", "valid_wps": "55715.3", "valid_wpb": "3162.3", "valid_bsz": "44.2", "valid_num_updates": "235232", "valid_best_loss": "2.73"} [2023-11-02 05:54:46,485][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 58 @ 235232 updates [2023-11-02 05:54:46,487][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 05:54:47,912][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 05:54:47,958][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_last.pt (epoch 58 @ 235232 updates, score 2.732) (writing took 1.473493603989482 seconds) [2023-11-02 05:54:47,959][fairseq_cli.train][INFO] - end of epoch 58 (average epoch stats below) [2023-11-02 05:54:47,961][train][INFO] - {"epoch": 58, "train_loss": "2.89", "train_ntokens": "3194.6", "train_nsentences": "44.2752", "train_prob_perplexity": "193.107", "train_code_perplexity": "190.308", "train_temp": "0.623", "train_loss_0": "2.781", "train_loss_1": "0.101", "train_loss_2": "0.008", "train_accuracy": "0.53191", "train_wps": "16935.9", "train_ups": "5.3", "train_wpb": "3194.6", "train_bsz": "44.3", "train_num_updates": "235232", "train_lr": "4.17134e-05", "train_gnorm": "0.845", "train_loss_scale": "2", "train_train_wall": "724", "train_gb_free": "13.6", "train_wall": "44394"} [2023-11-02 05:54:47,964][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 05:54:47,982][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 59 [2023-11-02 05:54:48,154][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 05:54:48,205][fairseq.trainer][INFO] - begin training epoch 59 [2023-11-02 05:54:48,206][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 05:55:18,941][train_inner][INFO] - {"epoch": 59, "update": 58.041, "loss": "2.915", "ntokens": "3203.28", "nsentences": "42.76", "prob_perplexity": "193.122", "code_perplexity": "190.302", "temp": "0.617", "loss_0": "2.806", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.52587", "wps": "10011.3", "ups": "3.13", "wpb": "3203.3", "bsz": "42.8", "num_updates": "235400", "lr": "4.16709e-05", "gnorm": "0.844", "loss_scale": "2", "train_wall": "36", "gb_free": "13", "wall": "44425"} [2023-11-02 05:55:54,841][train_inner][INFO] - {"epoch": 59, "update": 58.091, "loss": "2.806", "ntokens": "3189.6", "nsentences": "45.72", "prob_perplexity": "193.96", "code_perplexity": "191.115", "temp": "0.616", "loss_0": "2.698", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.54802", "wps": "17770.2", "ups": "5.57", "wpb": "3189.6", "bsz": "45.7", "num_updates": "235600", "lr": "4.16203e-05", "gnorm": "0.827", "loss_scale": "2", "train_wall": "35", "gb_free": "13.6", "wall": "44461"} [2023-11-02 05:56:30,418][train_inner][INFO] - {"epoch": 59, "update": 58.14, "loss": "2.859", "ntokens": "3166.96", "nsentences": "46.04", "prob_perplexity": "193.637", "code_perplexity": "190.78", "temp": "0.615", "loss_0": "2.75", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.53919", "wps": "17804.8", "ups": "5.62", "wpb": "3167", "bsz": "46", "num_updates": "235800", "lr": "4.15696e-05", "gnorm": "0.844", "loss_scale": "2", "train_wall": "35", "gb_free": "13.3", "wall": "44497"} [2023-11-02 05:57:06,015][train_inner][INFO] - {"epoch": 59, "update": 58.189, "loss": "2.84", "ntokens": "3185.6", "nsentences": "46.44", "prob_perplexity": "193.913", "code_perplexity": "191.11", "temp": "0.615", "loss_0": "2.731", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.54243", "wps": "17899.3", "ups": "5.62", "wpb": "3185.6", "bsz": "46.4", "num_updates": "236000", "lr": "4.1519e-05", "gnorm": "0.834", "loss_scale": "2", "train_wall": "35", "gb_free": "13", "wall": "44532"} [2023-11-02 05:57:41,907][train_inner][INFO] - {"epoch": 59, "update": 58.239, "loss": "2.837", "ntokens": "3176.2", "nsentences": "46.4", "prob_perplexity": "193.365", "code_perplexity": "190.585", "temp": "0.614", "loss_0": "2.728", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.54274", "wps": "17700.3", "ups": "5.57", "wpb": "3176.2", "bsz": "46.4", "num_updates": "236200", "lr": "4.14684e-05", "gnorm": "0.85", "loss_scale": "2", "train_wall": "35", "gb_free": "13.7", "wall": "44568"} [2023-11-02 05:58:17,318][train_inner][INFO] - {"epoch": 59, "update": 58.288, "loss": "2.866", "ntokens": "3201.52", "nsentences": "44.76", "prob_perplexity": "193.89", "code_perplexity": "191.058", "temp": "0.614", "loss_0": "2.758", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.53631", "wps": "18083.1", "ups": "5.65", "wpb": "3201.5", "bsz": "44.8", "num_updates": "236400", "lr": "4.14177e-05", "gnorm": "0.834", "loss_scale": "2", "train_wall": "35", "gb_free": "13.3", "wall": "44604"} [2023-11-02 05:58:53,266][train_inner][INFO] - {"epoch": 59, "update": 58.337, "loss": "2.897", "ntokens": "3167.44", "nsentences": "42.28", "prob_perplexity": "193.245", "code_perplexity": "190.395", "temp": "0.613", "loss_0": "2.788", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.52889", "wps": "17623.2", "ups": "5.56", "wpb": "3167.4", "bsz": "42.3", "num_updates": "236600", "lr": "4.13671e-05", "gnorm": "0.845", "loss_scale": "2", "train_wall": "35", "gb_free": "13.1", "wall": "44639"} [2023-11-02 05:59:28,692][train_inner][INFO] - {"epoch": 59, "update": 58.387, "loss": "2.877", "ntokens": "3198.44", "nsentences": "44.48", "prob_perplexity": "193.95", "code_perplexity": "191.148", "temp": "0.612", "loss_0": "2.768", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.53397", "wps": "18058.1", "ups": "5.65", "wpb": "3198.4", "bsz": "44.5", "num_updates": "236800", "lr": "4.13165e-05", "gnorm": "0.842", "loss_scale": "2", "train_wall": "35", "gb_free": "14.3", "wall": "44675"} [2023-11-02 06:00:04,757][train_inner][INFO] - {"epoch": 59, "update": 58.436, "loss": "2.898", "ntokens": "3226.72", "nsentences": "43.8", "prob_perplexity": "195.215", "code_perplexity": "192.383", "temp": "0.612", "loss_0": "2.79", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.53024", "wps": "17894.7", "ups": "5.55", "wpb": "3226.7", "bsz": "43.8", "num_updates": "237000", "lr": "4.12658e-05", "gnorm": "0.827", "loss_scale": "2", "train_wall": "35", "gb_free": "13.4", "wall": "44711"} [2023-11-02 06:00:40,799][train_inner][INFO] - {"epoch": 59, "update": 58.485, "loss": "2.877", "ntokens": "3208.16", "nsentences": "44.76", "prob_perplexity": "194.158", "code_perplexity": "191.39", "temp": "0.611", "loss_0": "2.768", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.53491", "wps": "17803.8", "ups": "5.55", "wpb": "3208.2", "bsz": "44.8", "num_updates": "237200", "lr": "4.12152e-05", "gnorm": "0.838", "loss_scale": "2", "train_wall": "35", "gb_free": "14.1", "wall": "44747"} [2023-11-02 06:01:16,951][train_inner][INFO] - {"epoch": 59, "update": 58.535, "loss": "2.925", "ntokens": "3233.24", "nsentences": "42.4", "prob_perplexity": "194.724", "code_perplexity": "191.926", "temp": "0.611", "loss_0": "2.816", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.5234", "wps": "17888.2", "ups": "5.53", "wpb": "3233.2", "bsz": "42.4", "num_updates": "237400", "lr": "4.11646e-05", "gnorm": "0.85", "loss_scale": "2", "train_wall": "36", "gb_free": "13.5", "wall": "44783"} [2023-11-02 06:01:53,140][train_inner][INFO] - {"epoch": 59, "update": 58.584, "loss": "2.898", "ntokens": "3175.6", "nsentences": "43.88", "prob_perplexity": "193.861", "code_perplexity": "191.033", "temp": "0.61", "loss_0": "2.79", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.53099", "wps": "17551.2", "ups": "5.53", "wpb": "3175.6", "bsz": "43.9", "num_updates": "237600", "lr": "4.11139e-05", "gnorm": "0.846", "loss_scale": "2", "train_wall": "36", "gb_free": "14.6", "wall": "44819"} [2023-11-02 06:02:29,534][train_inner][INFO] - {"epoch": 59, "update": 58.633, "loss": "2.898", "ntokens": "3180.64", "nsentences": "42.84", "prob_perplexity": "193.412", "code_perplexity": "190.581", "temp": "0.609", "loss_0": "2.79", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.5301", "wps": "17491.9", "ups": "5.5", "wpb": "3180.6", "bsz": "42.8", "num_updates": "237800", "lr": "4.10633e-05", "gnorm": "0.846", "loss_scale": "2", "train_wall": "36", "gb_free": "14.2", "wall": "44856"} [2023-11-02 06:03:05,898][train_inner][INFO] - {"epoch": 59, "update": 58.682, "loss": "2.851", "ntokens": "3174.6", "nsentences": "46.24", "prob_perplexity": "193.762", "code_perplexity": "190.925", "temp": "0.609", "loss_0": "2.742", "loss_1": "0.101", "loss_2": "0.008", "accuracy": "0.53994", "wps": "17461.4", "ups": "5.5", "wpb": "3174.6", "bsz": "46.2", "num_updates": "238000", "lr": "4.10127e-05", "gnorm": "0.849", "loss_scale": "2", "train_wall": "36", "gb_free": "14.2", "wall": "44892"} [2023-11-02 06:03:42,026][train_inner][INFO] - {"epoch": 59, "update": 58.732, "loss": "2.937", "ntokens": "3258.72", "nsentences": "42.28", "prob_perplexity": "195.403", "code_perplexity": "192.611", "temp": "0.608", "loss_0": "2.829", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.52138", "wps": "18041.3", "ups": "5.54", "wpb": "3258.7", "bsz": "42.3", "num_updates": "238200", "lr": "4.0962e-05", "gnorm": "0.838", "loss_scale": "2", "train_wall": "36", "gb_free": "13.7", "wall": "44928"} [2023-11-02 06:04:18,474][train_inner][INFO] - {"epoch": 59, "update": 58.781, "loss": "2.858", "ntokens": "3169.04", "nsentences": "46.6", "prob_perplexity": "194.822", "code_perplexity": "191.992", "temp": "0.608", "loss_0": "2.75", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.54045", "wps": "17390.7", "ups": "5.49", "wpb": "3169", "bsz": "46.6", "num_updates": "238400", "lr": "4.09114e-05", "gnorm": "0.841", "loss_scale": "2", "train_wall": "36", "gb_free": "14.3", "wall": "44965"} [2023-11-02 06:04:54,405][train_inner][INFO] - {"epoch": 59, "update": 58.83, "loss": "2.863", "ntokens": "3160.56", "nsentences": "43.88", "prob_perplexity": "194.255", "code_perplexity": "191.424", "temp": "0.607", "loss_0": "2.755", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.53692", "wps": "17593.3", "ups": "5.57", "wpb": "3160.6", "bsz": "43.9", "num_updates": "238600", "lr": "4.08608e-05", "gnorm": "0.848", "loss_scale": "2", "train_wall": "35", "gb_free": "15", "wall": "45001"} [2023-11-02 06:05:31,107][train_inner][INFO] - {"epoch": 59, "update": 58.88, "loss": "2.932", "ntokens": "3234.28", "nsentences": "43", "prob_perplexity": "194.448", "code_perplexity": "191.683", "temp": "0.606", "loss_0": "2.823", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.52426", "wps": "17625.8", "ups": "5.45", "wpb": "3234.3", "bsz": "43", "num_updates": "238800", "lr": "4.08101e-05", "gnorm": "0.841", "loss_scale": "2", "train_wall": "36", "gb_free": "14.3", "wall": "45037"} [2023-11-02 06:06:07,558][train_inner][INFO] - {"epoch": 59, "update": 58.929, "loss": "2.882", "ntokens": "3170.48", "nsentences": "42.56", "prob_perplexity": "194.736", "code_perplexity": "191.886", "temp": "0.606", "loss_0": "2.774", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.53187", "wps": "17396.8", "ups": "5.49", "wpb": "3170.5", "bsz": "42.6", "num_updates": "239000", "lr": "4.07595e-05", "gnorm": "0.853", "loss_scale": "2", "train_wall": "36", "gb_free": "13", "wall": "45074"} [2023-11-02 06:06:44,531][train_inner][INFO] - {"epoch": 59, "update": 58.978, "loss": "2.87", "ntokens": "3183.16", "nsentences": "42.84", "prob_perplexity": "194.589", "code_perplexity": "191.757", "temp": "0.605", "loss_0": "2.762", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.53394", "wps": "17220.1", "ups": "5.41", "wpb": "3183.2", "bsz": "42.8", "num_updates": "239200", "lr": "4.07089e-05", "gnorm": "0.858", "loss_scale": "2", "train_wall": "36", "gb_free": "15.5", "wall": "45111"} [2023-11-02 06:07:00,541][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 06:07:00,543][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 06:07:00,561][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 69 [2023-11-02 06:07:26,490][valid][INFO] - {"epoch": 59, "valid_loss": "2.736", "valid_ntokens": "3161.63", "valid_nsentences": "44.1685", "valid_prob_perplexity": "193.545", "valid_code_perplexity": "190.888", "valid_temp": "0.605", "valid_loss_0": "2.627", "valid_loss_1": "0.101", "valid_loss_2": "0.008", "valid_accuracy": "0.56219", "valid_wps": "55352.2", "valid_wpb": "3161.6", "valid_bsz": "44.2", "valid_num_updates": "239288", "valid_best_loss": "2.73"} [2023-11-02 06:07:26,493][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 59 @ 239288 updates [2023-11-02 06:07:26,494][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 06:07:27,922][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 06:07:27,979][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_last.pt (epoch 59 @ 239288 updates, score 2.736) (writing took 1.4865271509625018 seconds) [2023-11-02 06:07:27,980][fairseq_cli.train][INFO] - end of epoch 59 (average epoch stats below) [2023-11-02 06:07:27,982][train][INFO] - {"epoch": 59, "train_loss": "2.878", "train_ntokens": "3192.53", "train_nsentences": "44.2682", "train_prob_perplexity": "194.129", "train_code_perplexity": "191.309", "train_temp": "0.611", "train_loss_0": "2.77", "train_loss_1": "0.1", "train_loss_2": "0.008", "train_accuracy": "0.53406", "train_wps": "17037.6", "train_ups": "5.34", "train_wpb": "3192.5", "train_bsz": "44.3", "train_num_updates": "239288", "train_lr": "4.06866e-05", "train_gnorm": "0.843", "train_loss_scale": "2", "train_train_wall": "719", "train_gb_free": "13.1", "train_wall": "45154"} [2023-11-02 06:07:27,986][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 06:07:28,005][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 60 [2023-11-02 06:07:28,184][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 06:07:28,258][fairseq.trainer][INFO] - begin training epoch 60 [2023-11-02 06:07:28,259][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 06:07:48,682][train_inner][INFO] - {"epoch": 60, "update": 59.028, "loss": "2.825", "ntokens": "3202.24", "nsentences": "45.24", "prob_perplexity": "194.63", "code_perplexity": "191.861", "temp": "0.605", "loss_0": "2.717", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.54382", "wps": "9983.8", "ups": "3.12", "wpb": "3202.2", "bsz": "45.2", "num_updates": "239400", "lr": "4.06582e-05", "gnorm": "0.835", "loss_scale": "2", "train_wall": "36", "gb_free": "14.3", "wall": "45175"} [2023-11-02 06:08:24,368][train_inner][INFO] - {"epoch": 60, "update": 59.077, "loss": "2.837", "ntokens": "3195.32", "nsentences": "44.8", "prob_perplexity": "194.621", "code_perplexity": "191.855", "temp": "0.604", "loss_0": "2.728", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.54102", "wps": "17908.9", "ups": "5.6", "wpb": "3195.3", "bsz": "44.8", "num_updates": "239600", "lr": "4.06076e-05", "gnorm": "0.844", "loss_scale": "2", "train_wall": "35", "gb_free": "12.9", "wall": "45211"} [2023-11-02 06:09:00,466][train_inner][INFO] - {"epoch": 60, "update": 59.126, "loss": "2.903", "ntokens": "3205.96", "nsentences": "43.36", "prob_perplexity": "195.272", "code_perplexity": "192.448", "temp": "0.603", "loss_0": "2.795", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.52889", "wps": "17763.6", "ups": "5.54", "wpb": "3206", "bsz": "43.4", "num_updates": "239800", "lr": "4.0557e-05", "gnorm": "0.855", "loss_scale": "2", "train_wall": "35", "gb_free": "13.8", "wall": "45247"} [2023-11-02 06:09:36,733][train_inner][INFO] - {"epoch": 60, "update": 59.176, "loss": "2.909", "ntokens": "3197.88", "nsentences": "43.08", "prob_perplexity": "194.7", "code_perplexity": "191.871", "temp": "0.603", "loss_0": "2.801", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.52683", "wps": "17636.4", "ups": "5.52", "wpb": "3197.9", "bsz": "43.1", "num_updates": "240000", "lr": "4.05063e-05", "gnorm": "0.843", "loss_scale": "2", "train_wall": "36", "gb_free": "13.2", "wall": "45283"} [2023-11-02 06:10:12,562][train_inner][INFO] - {"epoch": 60, "update": 59.225, "loss": "2.838", "ntokens": "3141.76", "nsentences": "45.4", "prob_perplexity": "194.528", "code_perplexity": "191.696", "temp": "0.602", "loss_0": "2.729", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.54263", "wps": "17538.3", "ups": "5.58", "wpb": "3141.8", "bsz": "45.4", "num_updates": "240200", "lr": "4.04557e-05", "gnorm": "0.852", "loss_scale": "2", "train_wall": "35", "gb_free": "14.2", "wall": "45319"} [2023-11-02 06:10:48,210][train_inner][INFO] - {"epoch": 60, "update": 59.274, "loss": "2.906", "ntokens": "3194.2", "nsentences": "42.24", "prob_perplexity": "194.741", "code_perplexity": "191.924", "temp": "0.601", "loss_0": "2.797", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.52723", "wps": "17922.1", "ups": "5.61", "wpb": "3194.2", "bsz": "42.2", "num_updates": "240400", "lr": "4.04051e-05", "gnorm": "0.849", "loss_scale": "2", "train_wall": "35", "gb_free": "13.4", "wall": "45354"} [2023-11-02 06:11:24,238][train_inner][INFO] - {"epoch": 60, "update": 59.323, "loss": "2.857", "ntokens": "3177.64", "nsentences": "44", "prob_perplexity": "194.535", "code_perplexity": "191.737", "temp": "0.601", "loss_0": "2.749", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.53759", "wps": "17640.8", "ups": "5.55", "wpb": "3177.6", "bsz": "44", "num_updates": "240600", "lr": "4.03544e-05", "gnorm": "0.844", "loss_scale": "2", "train_wall": "35", "gb_free": "13.2", "wall": "45390"} [2023-11-02 06:12:00,666][train_inner][INFO] - {"epoch": 60, "update": 59.373, "loss": "2.781", "ntokens": "3187.76", "nsentences": "49.52", "prob_perplexity": "195.614", "code_perplexity": "192.82", "temp": "0.6", "loss_0": "2.672", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.5548", "wps": "17502.9", "ups": "5.49", "wpb": "3187.8", "bsz": "49.5", "num_updates": "240800", "lr": "4.03038e-05", "gnorm": "0.836", "loss_scale": "2", "train_wall": "36", "gb_free": "13.3", "wall": "45427"} [2023-11-02 06:12:36,808][train_inner][INFO] - {"epoch": 60, "update": 59.422, "loss": "2.842", "ntokens": "3177.76", "nsentences": "45.72", "prob_perplexity": "194.847", "code_perplexity": "192.008", "temp": "0.6", "loss_0": "2.734", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.54159", "wps": "17585.7", "ups": "5.53", "wpb": "3177.8", "bsz": "45.7", "num_updates": "241000", "lr": "4.02532e-05", "gnorm": "0.842", "loss_scale": "2", "train_wall": "36", "gb_free": "13.3", "wall": "45463"} [2023-11-02 06:13:13,500][train_inner][INFO] - {"epoch": 60, "update": 59.471, "loss": "2.867", "ntokens": "3238.92", "nsentences": "45.2", "prob_perplexity": "194.681", "code_perplexity": "191.869", "temp": "0.599", "loss_0": "2.759", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.53603", "wps": "17655.8", "ups": "5.45", "wpb": "3238.9", "bsz": "45.2", "num_updates": "241200", "lr": "4.02025e-05", "gnorm": "0.834", "loss_scale": "2", "train_wall": "36", "gb_free": "13.5", "wall": "45500"} [2023-11-02 06:13:49,875][train_inner][INFO] - {"epoch": 60, "update": 59.521, "loss": "2.879", "ntokens": "3177.28", "nsentences": "42.4", "prob_perplexity": "195.042", "code_perplexity": "192.27", "temp": "0.598", "loss_0": "2.771", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.5317", "wps": "17470.7", "ups": "5.5", "wpb": "3177.3", "bsz": "42.4", "num_updates": "241400", "lr": "4.01519e-05", "gnorm": "0.856", "loss_scale": "2", "train_wall": "36", "gb_free": "12.9", "wall": "45536"} [2023-11-02 06:14:26,119][train_inner][INFO] - {"epoch": 60, "update": 59.57, "loss": "2.9", "ntokens": "3171.28", "nsentences": "42.32", "prob_perplexity": "194.241", "code_perplexity": "191.424", "temp": "0.598", "loss_0": "2.792", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.5287", "wps": "17500.5", "ups": "5.52", "wpb": "3171.3", "bsz": "42.3", "num_updates": "241600", "lr": "4.01013e-05", "gnorm": "0.849", "loss_scale": "2", "train_wall": "36", "gb_free": "14.1", "wall": "45572"} [2023-11-02 06:15:02,423][train_inner][INFO] - {"epoch": 60, "update": 59.619, "loss": "2.849", "ntokens": "3229.28", "nsentences": "45.24", "prob_perplexity": "195.935", "code_perplexity": "193.186", "temp": "0.597", "loss_0": "2.741", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.53799", "wps": "17791.6", "ups": "5.51", "wpb": "3229.3", "bsz": "45.2", "num_updates": "241800", "lr": "4.00506e-05", "gnorm": "0.846", "loss_scale": "2", "train_wall": "36", "gb_free": "13.1", "wall": "45609"} [2023-11-02 06:15:38,152][train_inner][INFO] - {"epoch": 60, "update": 59.669, "loss": "2.895", "ntokens": "3160.76", "nsentences": "41.64", "prob_perplexity": "193.924", "code_perplexity": "191.107", "temp": "0.597", "loss_0": "2.787", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.52816", "wps": "17701.8", "ups": "5.6", "wpb": "3160.8", "bsz": "41.6", "num_updates": "242000", "lr": "4e-05", "gnorm": "0.862", "loss_scale": "2", "train_wall": "35", "gb_free": "13.2", "wall": "45644"} [2023-11-02 06:16:14,735][train_inner][INFO] - {"epoch": 60, "update": 59.718, "loss": "2.874", "ntokens": "3200.6", "nsentences": "45.12", "prob_perplexity": "195.287", "code_perplexity": "192.519", "temp": "0.596", "loss_0": "2.766", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.53578", "wps": "17499.2", "ups": "5.47", "wpb": "3200.6", "bsz": "45.1", "num_updates": "242200", "lr": "3.99494e-05", "gnorm": "0.844", "loss_scale": "2", "train_wall": "36", "gb_free": "13.4", "wall": "45681"} [2023-11-02 06:16:50,649][train_inner][INFO] - {"epoch": 60, "update": 59.767, "loss": "2.8", "ntokens": "3138.72", "nsentences": "47.24", "prob_perplexity": "194.506", "code_perplexity": "191.678", "temp": "0.596", "loss_0": "2.692", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.55068", "wps": "17479.7", "ups": "5.57", "wpb": "3138.7", "bsz": "47.2", "num_updates": "242400", "lr": "3.98987e-05", "gnorm": "0.852", "loss_scale": "2", "train_wall": "35", "gb_free": "17.1", "wall": "45717"} [2023-11-02 06:17:27,753][train_inner][INFO] - {"epoch": 60, "update": 59.817, "loss": "2.868", "ntokens": "3195.12", "nsentences": "44.28", "prob_perplexity": "195.322", "code_perplexity": "192.502", "temp": "0.595", "loss_0": "2.76", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.53624", "wps": "17223.9", "ups": "5.39", "wpb": "3195.1", "bsz": "44.3", "num_updates": "242600", "lr": "3.98481e-05", "gnorm": "0.846", "loss_scale": "2", "train_wall": "36", "gb_free": "13.5", "wall": "45754"} [2023-11-02 06:18:04,473][train_inner][INFO] - {"epoch": 60, "update": 59.866, "loss": "2.897", "ntokens": "3214.32", "nsentences": "43.56", "prob_perplexity": "195.067", "code_perplexity": "192.207", "temp": "0.594", "loss_0": "2.789", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.53015", "wps": "17508.4", "ups": "5.45", "wpb": "3214.3", "bsz": "43.6", "num_updates": "242800", "lr": "3.97975e-05", "gnorm": "0.859", "loss_scale": "2", "train_wall": "36", "gb_free": "14", "wall": "45791"} [2023-11-02 06:18:41,420][train_inner][INFO] - {"epoch": 60, "update": 59.915, "loss": "2.904", "ntokens": "3180.32", "nsentences": "41.2", "prob_perplexity": "194.82", "code_perplexity": "191.851", "temp": "0.594", "loss_0": "2.796", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.52626", "wps": "17216.4", "ups": "5.41", "wpb": "3180.3", "bsz": "41.2", "num_updates": "243000", "lr": "3.97468e-05", "gnorm": "0.854", "loss_scale": "2", "train_wall": "36", "gb_free": "13.2", "wall": "45828"} [2023-11-02 06:19:18,022][train_inner][INFO] - {"epoch": 60, "update": 59.964, "loss": "2.924", "ntokens": "3235.28", "nsentences": "43.04", "prob_perplexity": "196.229", "code_perplexity": "193.382", "temp": "0.593", "loss_0": "2.817", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.5251", "wps": "17679.4", "ups": "5.46", "wpb": "3235.3", "bsz": "43", "num_updates": "243200", "lr": "3.96962e-05", "gnorm": "0.848", "loss_scale": "2", "train_wall": "36", "gb_free": "13.3", "wall": "45864"} [2023-11-02 06:19:42,969][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 [2023-11-02 06:19:43,824][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 06:19:43,825][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 06:19:43,848][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 70 [2023-11-02 06:20:09,180][valid][INFO] - {"epoch": 60, "valid_loss": "2.749", "valid_ntokens": "3168.55", "valid_nsentences": "44.1685", "valid_prob_perplexity": "194.898", "valid_code_perplexity": "192.234", "valid_temp": "0.592", "valid_loss_0": "2.641", "valid_loss_1": "0.1", "valid_loss_2": "0.008", "valid_accuracy": "0.56008", "valid_wps": "56751.7", "valid_wpb": "3168.5", "valid_bsz": "44.2", "valid_num_updates": "243343", "valid_best_loss": "2.73"} [2023-11-02 06:20:09,183][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 60 @ 243343 updates [2023-11-02 06:20:09,184][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 06:20:10,620][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 06:20:10,676][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_last.pt (epoch 60 @ 243343 updates, score 2.749) (writing took 1.493141177110374 seconds) [2023-11-02 06:20:10,676][fairseq_cli.train][INFO] - end of epoch 60 (average epoch stats below) [2023-11-02 06:20:10,679][train][INFO] - {"epoch": 60, "train_loss": "2.869", "train_ntokens": "3192.16", "train_nsentences": "44.2673", "train_prob_perplexity": "194.985", "train_code_perplexity": "192.166", "train_temp": "0.598", "train_loss_0": "2.761", "train_loss_1": "0.1", "train_loss_2": "0.008", "train_accuracy": "0.53543", "train_wps": "16971.7", "train_ups": "5.32", "train_wpb": "3192.2", "train_bsz": "44.3", "train_num_updates": "243343", "train_lr": "3.966e-05", "train_gnorm": "0.848", "train_loss_scale": "1", "train_train_wall": "723", "train_gb_free": "13.6", "train_wall": "45917"} [2023-11-02 06:20:10,681][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 06:20:10,701][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 61 [2023-11-02 06:20:10,871][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 06:20:10,921][fairseq.trainer][INFO] - begin training epoch 61 [2023-11-02 06:20:10,922][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 06:20:21,245][train_inner][INFO] - {"epoch": 61, "update": 60.014, "loss": "2.912", "ntokens": "3216.84", "nsentences": "44.72", "prob_perplexity": "195.409", "code_perplexity": "192.556", "temp": "0.593", "loss_0": "2.804", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.52926", "wps": "10176.5", "ups": "3.16", "wpb": "3216.8", "bsz": "44.7", "num_updates": "243400", "lr": "3.96456e-05", "gnorm": "0.853", "loss_scale": "1", "train_wall": "35", "gb_free": "13.8", "wall": "45927"} [2023-11-02 06:20:57,444][train_inner][INFO] - {"epoch": 61, "update": 60.063, "loss": "2.906", "ntokens": "3218.52", "nsentences": "42.56", "prob_perplexity": "195.56", "code_perplexity": "192.754", "temp": "0.592", "loss_0": "2.798", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.52791", "wps": "17783.3", "ups": "5.53", "wpb": "3218.5", "bsz": "42.6", "num_updates": "243600", "lr": "3.95949e-05", "gnorm": "0.847", "loss_scale": "1", "train_wall": "36", "gb_free": "14.2", "wall": "45964"} [2023-11-02 06:21:33,016][train_inner][INFO] - {"epoch": 61, "update": 60.113, "loss": "2.897", "ntokens": "3178.4", "nsentences": "42.04", "prob_perplexity": "195.68", "code_perplexity": "192.856", "temp": "0.591", "loss_0": "2.789", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.52914", "wps": "17871.6", "ups": "5.62", "wpb": "3178.4", "bsz": "42", "num_updates": "243800", "lr": "3.95443e-05", "gnorm": "0.853", "loss_scale": "1", "train_wall": "35", "gb_free": "13.6", "wall": "45999"} [2023-11-02 06:22:09,057][train_inner][INFO] - {"epoch": 61, "update": 60.162, "loss": "2.937", "ntokens": "3216.72", "nsentences": "42.2", "prob_perplexity": "196.393", "code_perplexity": "193.55", "temp": "0.591", "loss_0": "2.829", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.5221", "wps": "17851.3", "ups": "5.55", "wpb": "3216.7", "bsz": "42.2", "num_updates": "244000", "lr": "3.94937e-05", "gnorm": "0.843", "loss_scale": "1", "train_wall": "35", "gb_free": "13.3", "wall": "46035"} [2023-11-02 06:22:45,047][train_inner][INFO] - {"epoch": 61, "update": 60.211, "loss": "2.854", "ntokens": "3233.96", "nsentences": "46.8", "prob_perplexity": "196.768", "code_perplexity": "193.914", "temp": "0.59", "loss_0": "2.747", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.54023", "wps": "17972.8", "ups": "5.56", "wpb": "3234", "bsz": "46.8", "num_updates": "244200", "lr": "3.9443e-05", "gnorm": "0.837", "loss_scale": "1", "train_wall": "35", "gb_free": "13.1", "wall": "46071"} [2023-11-02 06:23:21,045][train_inner][INFO] - {"epoch": 61, "update": 60.261, "loss": "2.928", "ntokens": "3215.08", "nsentences": "42.56", "prob_perplexity": "195.745", "code_perplexity": "192.96", "temp": "0.59", "loss_0": "2.821", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.52309", "wps": "17863.8", "ups": "5.56", "wpb": "3215.1", "bsz": "42.6", "num_updates": "244400", "lr": "3.93924e-05", "gnorm": "0.856", "loss_scale": "1", "train_wall": "35", "gb_free": "13.2", "wall": "46107"} [2023-11-02 06:23:57,499][train_inner][INFO] - {"epoch": 61, "update": 60.31, "loss": "2.849", "ntokens": "3228.56", "nsentences": "46.2", "prob_perplexity": "196.192", "code_perplexity": "193.4", "temp": "0.589", "loss_0": "2.741", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.54018", "wps": "17714.1", "ups": "5.49", "wpb": "3228.6", "bsz": "46.2", "num_updates": "244600", "lr": "3.93418e-05", "gnorm": "0.841", "loss_scale": "1", "train_wall": "36", "gb_free": "13.5", "wall": "46144"} [2023-11-02 06:24:33,839][train_inner][INFO] - {"epoch": 61, "update": 60.359, "loss": "2.813", "ntokens": "3222.52", "nsentences": "46.24", "prob_perplexity": "196.063", "code_perplexity": "193.244", "temp": "0.588", "loss_0": "2.705", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.54613", "wps": "17736.3", "ups": "5.5", "wpb": "3222.5", "bsz": "46.2", "num_updates": "244800", "lr": "3.92911e-05", "gnorm": "0.837", "loss_scale": "1", "train_wall": "36", "gb_free": "13.1", "wall": "46180"} [2023-11-02 06:25:10,249][train_inner][INFO] - {"epoch": 61, "update": 60.409, "loss": "2.889", "ntokens": "3165.6", "nsentences": "42.2", "prob_perplexity": "195.216", "code_perplexity": "192.379", "temp": "0.588", "loss_0": "2.781", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.52966", "wps": "17389.9", "ups": "5.49", "wpb": "3165.6", "bsz": "42.2", "num_updates": "245000", "lr": "3.92405e-05", "gnorm": "0.861", "loss_scale": "1", "train_wall": "36", "gb_free": "13", "wall": "46216"} [2023-11-02 06:25:46,581][train_inner][INFO] - {"epoch": 61, "update": 60.458, "loss": "2.86", "ntokens": "3150", "nsentences": "43.72", "prob_perplexity": "194.745", "code_perplexity": "191.881", "temp": "0.587", "loss_0": "2.752", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.53714", "wps": "17340.8", "ups": "5.51", "wpb": "3150", "bsz": "43.7", "num_updates": "245200", "lr": "3.91899e-05", "gnorm": "0.861", "loss_scale": "1", "train_wall": "36", "gb_free": "13.6", "wall": "46253"} [2023-11-02 06:26:23,070][train_inner][INFO] - {"epoch": 61, "update": 60.507, "loss": "2.913", "ntokens": "3203.24", "nsentences": "42.84", "prob_perplexity": "195.155", "code_perplexity": "192.322", "temp": "0.587", "loss_0": "2.806", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.52749", "wps": "17558.7", "ups": "5.48", "wpb": "3203.2", "bsz": "42.8", "num_updates": "245400", "lr": "3.91392e-05", "gnorm": "0.854", "loss_scale": "1", "train_wall": "36", "gb_free": "12.8", "wall": "46289"} [2023-11-02 06:26:58,939][train_inner][INFO] - {"epoch": 61, "update": 60.556, "loss": "2.843", "ntokens": "3171.12", "nsentences": "44.56", "prob_perplexity": "196.194", "code_perplexity": "193.406", "temp": "0.586", "loss_0": "2.735", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.54022", "wps": "17682.3", "ups": "5.58", "wpb": "3171.1", "bsz": "44.6", "num_updates": "245600", "lr": "3.90886e-05", "gnorm": "0.859", "loss_scale": "1", "train_wall": "35", "gb_free": "14.3", "wall": "46325"} [2023-11-02 06:27:35,278][train_inner][INFO] - {"epoch": 61, "update": 60.606, "loss": "2.874", "ntokens": "3180.92", "nsentences": "43.8", "prob_perplexity": "195.937", "code_perplexity": "193.142", "temp": "0.585", "loss_0": "2.766", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.53424", "wps": "17508", "ups": "5.5", "wpb": "3180.9", "bsz": "43.8", "num_updates": "245800", "lr": "3.9038e-05", "gnorm": "0.857", "loss_scale": "1", "train_wall": "36", "gb_free": "12.5", "wall": "46362"} [2023-11-02 06:28:12,281][train_inner][INFO] - {"epoch": 61, "update": 60.655, "loss": "2.845", "ntokens": "3204.48", "nsentences": "44.84", "prob_perplexity": "196.403", "code_perplexity": "193.599", "temp": "0.585", "loss_0": "2.738", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.53953", "wps": "17321.3", "ups": "5.41", "wpb": "3204.5", "bsz": "44.8", "num_updates": "246000", "lr": "3.89873e-05", "gnorm": "0.848", "loss_scale": "1", "train_wall": "36", "gb_free": "13", "wall": "46399"} [2023-11-02 06:28:48,981][train_inner][INFO] - {"epoch": 61, "update": 60.704, "loss": "2.846", "ntokens": "3159.32", "nsentences": "44.12", "prob_perplexity": "195.405", "code_perplexity": "192.617", "temp": "0.584", "loss_0": "2.738", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.53971", "wps": "17217.8", "ups": "5.45", "wpb": "3159.3", "bsz": "44.1", "num_updates": "246200", "lr": "3.89367e-05", "gnorm": "0.857", "loss_scale": "1", "train_wall": "36", "gb_free": "14.5", "wall": "46435"} [2023-11-02 06:29:26,014][train_inner][INFO] - {"epoch": 61, "update": 60.754, "loss": "2.879", "ntokens": "3187.36", "nsentences": "44.28", "prob_perplexity": "195.934", "code_perplexity": "193.138", "temp": "0.584", "loss_0": "2.771", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.53462", "wps": "17222.6", "ups": "5.4", "wpb": "3187.4", "bsz": "44.3", "num_updates": "246400", "lr": "3.88861e-05", "gnorm": "0.849", "loss_scale": "1", "train_wall": "36", "gb_free": "14", "wall": "46472"} [2023-11-02 06:30:02,367][train_inner][INFO] - {"epoch": 61, "update": 60.803, "loss": "2.911", "ntokens": "3186", "nsentences": "42.96", "prob_perplexity": "196.213", "code_perplexity": "193.458", "temp": "0.583", "loss_0": "2.803", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.52799", "wps": "17529.4", "ups": "5.5", "wpb": "3186", "bsz": "43", "num_updates": "246600", "lr": "3.88354e-05", "gnorm": "0.855", "loss_scale": "1", "train_wall": "36", "gb_free": "13.8", "wall": "46509"} [2023-11-02 06:30:38,551][train_inner][INFO] - {"epoch": 61, "update": 60.852, "loss": "2.807", "ntokens": "3166.64", "nsentences": "46.48", "prob_perplexity": "195.495", "code_perplexity": "192.66", "temp": "0.583", "loss_0": "2.699", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.54741", "wps": "17503.9", "ups": "5.53", "wpb": "3166.6", "bsz": "46.5", "num_updates": "246800", "lr": "3.87848e-05", "gnorm": "0.85", "loss_scale": "1", "train_wall": "36", "gb_free": "14.3", "wall": "46545"} [2023-11-02 06:31:14,780][train_inner][INFO] - {"epoch": 61, "update": 60.902, "loss": "2.801", "ntokens": "3157.52", "nsentences": "47.04", "prob_perplexity": "195.967", "code_perplexity": "193.155", "temp": "0.582", "loss_0": "2.694", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.54897", "wps": "17432.2", "ups": "5.52", "wpb": "3157.5", "bsz": "47", "num_updates": "247000", "lr": "3.87342e-05", "gnorm": "0.858", "loss_scale": "1", "train_wall": "36", "gb_free": "13.2", "wall": "46581"} [2023-11-02 06:31:51,392][train_inner][INFO] - {"epoch": 61, "update": 60.951, "loss": "2.869", "ntokens": "3206.6", "nsentences": "43.6", "prob_perplexity": "196.2", "code_perplexity": "193.364", "temp": "0.581", "loss_0": "2.762", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.53308", "wps": "17517.3", "ups": "5.46", "wpb": "3206.6", "bsz": "43.6", "num_updates": "247200", "lr": "3.86835e-05", "gnorm": "0.863", "loss_scale": "1", "train_wall": "36", "gb_free": "14.2", "wall": "46618"} [2023-11-02 06:32:27,517][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 06:32:27,518][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 06:32:27,538][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 71 [2023-11-02 06:32:53,141][valid][INFO] - {"epoch": 61, "valid_loss": "2.719", "valid_ntokens": "3162.59", "valid_nsentences": "44.1685", "valid_prob_perplexity": "197.223", "valid_code_perplexity": "194.529", "valid_temp": "0.581", "valid_loss_0": "2.612", "valid_loss_1": "0.1", "valid_loss_2": "0.008", "valid_accuracy": "0.5647", "valid_wps": "56039.8", "valid_wpb": "3162.6", "valid_bsz": "44.2", "valid_num_updates": "247399", "valid_best_loss": "2.719"} [2023-11-02 06:32:53,143][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 61 @ 247399 updates [2023-11-02 06:32:53,145][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-02 06:32:54,574][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-02 06:32:55,549][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_best.pt (epoch 61 @ 247399 updates, score 2.719) (writing took 2.4055217830464244 seconds) [2023-11-02 06:32:55,550][fairseq_cli.train][INFO] - end of epoch 61 (average epoch stats below) [2023-11-02 06:32:55,552][train][INFO] - {"epoch": 61, "train_loss": "2.869", "train_ntokens": "3191.54", "train_nsentences": "44.2682", "train_prob_perplexity": "195.906", "train_code_perplexity": "193.09", "train_temp": "0.586", "train_loss_0": "2.761", "train_loss_1": "0.1", "train_loss_2": "0.008", "train_accuracy": "0.53545", "train_wps": "16924.3", "train_ups": "5.3", "train_wpb": "3191.5", "train_bsz": "44.3", "train_num_updates": "247399", "train_lr": "3.86332e-05", "train_gnorm": "0.852", "train_loss_scale": "1", "train_train_wall": "724", "train_gb_free": "14", "train_wall": "46682"} [2023-11-02 06:32:55,554][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 06:32:55,574][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 62 [2023-11-02 06:32:55,744][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 06:32:55,795][fairseq.trainer][INFO] - begin training epoch 62 [2023-11-02 06:32:55,796][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 06:32:56,103][train_inner][INFO] - {"epoch": 62, "update": 61.0, "loss": "2.805", "ntokens": "3170.44", "nsentences": "47.76", "prob_perplexity": "197.426", "code_perplexity": "194.543", "temp": "0.581", "loss_0": "2.698", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.54944", "wps": "9799.2", "ups": "3.09", "wpb": "3170.4", "bsz": "47.8", "num_updates": "247400", "lr": "3.86329e-05", "gnorm": "0.848", "loss_scale": "1", "train_wall": "36", "gb_free": "12.9", "wall": "46682"} [2023-11-02 06:33:31,945][train_inner][INFO] - {"epoch": 62, "update": 61.05, "loss": "2.85", "ntokens": "3178.84", "nsentences": "45.8", "prob_perplexity": "197.241", "code_perplexity": "194.437", "temp": "0.58", "loss_0": "2.743", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.54052", "wps": "17738.8", "ups": "5.58", "wpb": "3178.8", "bsz": "45.8", "num_updates": "247600", "lr": "3.85823e-05", "gnorm": "0.851", "loss_scale": "1", "train_wall": "35", "gb_free": "13.3", "wall": "46718"} [2023-11-02 06:34:07,596][train_inner][INFO] - {"epoch": 62, "update": 61.099, "loss": "2.849", "ntokens": "3184.64", "nsentences": "44.16", "prob_perplexity": "197.13", "code_perplexity": "194.402", "temp": "0.58", "loss_0": "2.742", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.53826", "wps": "17866.9", "ups": "5.61", "wpb": "3184.6", "bsz": "44.2", "num_updates": "247800", "lr": "3.85316e-05", "gnorm": "0.855", "loss_scale": "1", "train_wall": "35", "gb_free": "13", "wall": "46754"} [2023-11-02 06:34:43,368][train_inner][INFO] - {"epoch": 62, "update": 61.148, "loss": "2.8", "ntokens": "3204.2", "nsentences": "49.24", "prob_perplexity": "196.853", "code_perplexity": "194.027", "temp": "0.579", "loss_0": "2.692", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.55203", "wps": "17916.1", "ups": "5.59", "wpb": "3204.2", "bsz": "49.2", "num_updates": "248000", "lr": "3.8481e-05", "gnorm": "0.844", "loss_scale": "1", "train_wall": "35", "gb_free": "13.4", "wall": "46790"} [2023-11-02 06:35:19,844][train_inner][INFO] - {"epoch": 62, "update": 61.197, "loss": "2.901", "ntokens": "3248.52", "nsentences": "43.24", "prob_perplexity": "196.734", "code_perplexity": "193.925", "temp": "0.578", "loss_0": "2.793", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.52842", "wps": "17812.8", "ups": "5.48", "wpb": "3248.5", "bsz": "43.2", "num_updates": "248200", "lr": "3.84304e-05", "gnorm": "0.856", "loss_scale": "1", "train_wall": "36", "gb_free": "13.9", "wall": "46826"} [2023-11-02 06:35:55,609][train_inner][INFO] - {"epoch": 62, "update": 61.247, "loss": "2.893", "ntokens": "3167.96", "nsentences": "41.96", "prob_perplexity": "196.271", "code_perplexity": "193.428", "temp": "0.578", "loss_0": "2.786", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.53007", "wps": "17716.3", "ups": "5.59", "wpb": "3168", "bsz": "42", "num_updates": "248400", "lr": "3.83797e-05", "gnorm": "0.865", "loss_scale": "1", "train_wall": "35", "gb_free": "14", "wall": "46862"} [2023-11-02 06:36:31,906][train_inner][INFO] - {"epoch": 62, "update": 61.296, "loss": "2.81", "ntokens": "3155.2", "nsentences": "46.76", "prob_perplexity": "197.139", "code_perplexity": "194.313", "temp": "0.577", "loss_0": "2.702", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.54786", "wps": "17386.4", "ups": "5.51", "wpb": "3155.2", "bsz": "46.8", "num_updates": "248600", "lr": "3.83291e-05", "gnorm": "0.85", "loss_scale": "1", "train_wall": "36", "gb_free": "13.8", "wall": "46898"} [2023-11-02 06:37:08,434][train_inner][INFO] - {"epoch": 62, "update": 61.345, "loss": "2.97", "ntokens": "3231", "nsentences": "42.12", "prob_perplexity": "196.838", "code_perplexity": "194.008", "temp": "0.577", "loss_0": "2.862", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.5166", "wps": "17692", "ups": "5.48", "wpb": "3231", "bsz": "42.1", "num_updates": "248800", "lr": "3.82785e-05", "gnorm": "0.858", "loss_scale": "1", "train_wall": "36", "gb_free": "13.9", "wall": "46935"} [2023-11-02 06:37:44,896][train_inner][INFO] - {"epoch": 62, "update": 61.395, "loss": "2.898", "ntokens": "3198.8", "nsentences": "43.12", "prob_perplexity": "197.144", "code_perplexity": "194.314", "temp": "0.576", "loss_0": "2.791", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.52929", "wps": "17547", "ups": "5.49", "wpb": "3198.8", "bsz": "43.1", "num_updates": "249000", "lr": "3.82278e-05", "gnorm": "0.855", "loss_scale": "1", "train_wall": "36", "gb_free": "12.8", "wall": "46971"} [2023-11-02 06:38:21,162][train_inner][INFO] - {"epoch": 62, "update": 61.444, "loss": "2.853", "ntokens": "3151.96", "nsentences": "42.32", "prob_perplexity": "196.071", "code_perplexity": "193.258", "temp": "0.576", "loss_0": "2.745", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.53696", "wps": "17383.3", "ups": "5.52", "wpb": "3152", "bsz": "42.3", "num_updates": "249200", "lr": "3.81772e-05", "gnorm": "0.868", "loss_scale": "1", "train_wall": "36", "gb_free": "14.3", "wall": "47007"} [2023-11-02 06:38:57,296][train_inner][INFO] - {"epoch": 62, "update": 61.493, "loss": "2.874", "ntokens": "3174.96", "nsentences": "42.52", "prob_perplexity": "195.752", "code_perplexity": "192.949", "temp": "0.575", "loss_0": "2.766", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.53354", "wps": "17574.3", "ups": "5.54", "wpb": "3175", "bsz": "42.5", "num_updates": "249400", "lr": "3.81266e-05", "gnorm": "0.86", "loss_scale": "1", "train_wall": "35", "gb_free": "13.9", "wall": "47044"} [2023-11-02 06:39:33,472][train_inner][INFO] - {"epoch": 62, "update": 61.543, "loss": "2.864", "ntokens": "3202.52", "nsentences": "42.96", "prob_perplexity": "197.991", "code_perplexity": "195.169", "temp": "0.574", "loss_0": "2.757", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.53373", "wps": "17706.4", "ups": "5.53", "wpb": "3202.5", "bsz": "43", "num_updates": "249600", "lr": "3.80759e-05", "gnorm": "0.854", "loss_scale": "1", "train_wall": "36", "gb_free": "13.3", "wall": "47080"} [2023-11-02 06:40:09,445][train_inner][INFO] - {"epoch": 62, "update": 61.592, "loss": "2.84", "ntokens": "3173.4", "nsentences": "44.12", "prob_perplexity": "197.191", "code_perplexity": "194.391", "temp": "0.574", "loss_0": "2.732", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.54127", "wps": "17644.4", "ups": "5.56", "wpb": "3173.4", "bsz": "44.1", "num_updates": "249800", "lr": "3.80253e-05", "gnorm": "0.864", "loss_scale": "1", "train_wall": "35", "gb_free": "14.1", "wall": "47116"} [2023-11-02 06:40:46,072][train_inner][INFO] - {"epoch": 62, "update": 61.641, "loss": "2.858", "ntokens": "3202.4", "nsentences": "44.16", "prob_perplexity": "196.937", "code_perplexity": "194.138", "temp": "0.573", "loss_0": "2.75", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.53744", "wps": "17487.4", "ups": "5.46", "wpb": "3202.4", "bsz": "44.2", "num_updates": "250000", "lr": "3.79747e-05", "gnorm": "0.861", "loss_scale": "1", "train_wall": "36", "gb_free": "13.4", "wall": "47152"} [2023-11-02 06:40:46,074][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 06:40:46,076][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 06:40:46,095][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 72 [2023-11-02 06:41:11,590][valid][INFO] - {"epoch": 62, "valid_loss": "2.736", "valid_ntokens": "3169.79", "valid_nsentences": "44.1685", "valid_prob_perplexity": "194.621", "valid_code_perplexity": "191.987", "valid_temp": "0.573", "valid_loss_0": "2.628", "valid_loss_1": "0.1", "valid_loss_2": "0.008", "valid_accuracy": "0.56208", "valid_wps": "56374.6", "valid_wpb": "3169.8", "valid_bsz": "44.2", "valid_num_updates": "250000", "valid_best_loss": "2.719"} [2023-11-02 06:41:11,592][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 62 @ 250000 updates [2023-11-02 06:41:11,594][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_62_250000.pt [2023-11-02 06:41:13,009][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_62_250000.pt [2023-11-02 06:41:13,968][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_62_250000.pt (epoch 62 @ 250000 updates, score 2.736) (writing took 2.375435099005699 seconds) [2023-11-02 06:41:51,030][train_inner][INFO] - {"epoch": 62, "update": 61.691, "loss": "2.904", "ntokens": "3206", "nsentences": "42.36", "prob_perplexity": "196.864", "code_perplexity": "194.13", "temp": "0.573", "loss_0": "2.796", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.52783", "wps": "9871.4", "ups": "3.08", "wpb": "3206", "bsz": "42.4", "num_updates": "250200", "lr": "3.79241e-05", "gnorm": "0.868", "loss_scale": "1", "train_wall": "36", "gb_free": "15.1", "wall": "47217"} [2023-11-02 06:42:27,873][train_inner][INFO] - {"epoch": 62, "update": 61.74, "loss": "2.855", "ntokens": "3181.64", "nsentences": "43.76", "prob_perplexity": "197.4", "code_perplexity": "194.668", "temp": "0.572", "loss_0": "2.748", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.53716", "wps": "17272.5", "ups": "5.43", "wpb": "3181.6", "bsz": "43.8", "num_updates": "250400", "lr": "3.78734e-05", "gnorm": "0.87", "loss_scale": "1", "train_wall": "36", "gb_free": "14", "wall": "47254"} [2023-11-02 06:43:03,956][train_inner][INFO] - {"epoch": 62, "update": 61.789, "loss": "2.85", "ntokens": "3201.72", "nsentences": "44.84", "prob_perplexity": "197.317", "code_perplexity": "194.508", "temp": "0.572", "loss_0": "2.743", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.53848", "wps": "17747.4", "ups": "5.54", "wpb": "3201.7", "bsz": "44.8", "num_updates": "250600", "lr": "3.78228e-05", "gnorm": "0.865", "loss_scale": "1", "train_wall": "35", "gb_free": "14.3", "wall": "47290"} [2023-11-02 06:43:40,225][train_inner][INFO] - {"epoch": 62, "update": 61.839, "loss": "2.857", "ntokens": "3209.92", "nsentences": "45.88", "prob_perplexity": "198.036", "code_perplexity": "195.274", "temp": "0.571", "loss_0": "2.75", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.53912", "wps": "17701.5", "ups": "5.51", "wpb": "3209.9", "bsz": "45.9", "num_updates": "250800", "lr": "3.77722e-05", "gnorm": "0.848", "loss_scale": "1", "train_wall": "36", "gb_free": "14", "wall": "47326"} [2023-11-02 06:44:16,792][train_inner][INFO] - {"epoch": 62, "update": 61.888, "loss": "2.841", "ntokens": "3189.56", "nsentences": "44.64", "prob_perplexity": "197.857", "code_perplexity": "195.12", "temp": "0.57", "loss_0": "2.734", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.54013", "wps": "17446.1", "ups": "5.47", "wpb": "3189.6", "bsz": "44.6", "num_updates": "251000", "lr": "3.77215e-05", "gnorm": "0.848", "loss_scale": "1", "train_wall": "36", "gb_free": "14.7", "wall": "47363"} [2023-11-02 06:44:52,803][train_inner][INFO] - {"epoch": 62, "update": 61.937, "loss": "2.862", "ntokens": "3191.08", "nsentences": "45.4", "prob_perplexity": "197.022", "code_perplexity": "194.278", "temp": "0.57", "loss_0": "2.754", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.53921", "wps": "17724.1", "ups": "5.55", "wpb": "3191.1", "bsz": "45.4", "num_updates": "251200", "lr": "3.76709e-05", "gnorm": "0.854", "loss_scale": "1", "train_wall": "35", "gb_free": "13.6", "wall": "47399"} [2023-11-02 06:45:28,656][train_inner][INFO] - {"epoch": 62, "update": 61.986, "loss": "2.806", "ntokens": "3183.64", "nsentences": "45.56", "prob_perplexity": "196.625", "code_perplexity": "193.866", "temp": "0.569", "loss_0": "2.698", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.54744", "wps": "17760.5", "ups": "5.58", "wpb": "3183.6", "bsz": "45.6", "num_updates": "251400", "lr": "3.76203e-05", "gnorm": "0.864", "loss_scale": "1", "train_wall": "35", "gb_free": "14.1", "wall": "47435"} [2023-11-02 06:45:38,597][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 06:45:38,599][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 06:45:38,616][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 73 [2023-11-02 06:46:04,491][valid][INFO] - {"epoch": 62, "valid_loss": "2.702", "valid_ntokens": "3144.75", "valid_nsentences": "44.1685", "valid_prob_perplexity": "196.417", "valid_code_perplexity": "193.768", "valid_temp": "0.569", "valid_loss_0": "2.594", "valid_loss_1": "0.1", "valid_loss_2": "0.008", "valid_accuracy": "0.56889", "valid_wps": "55170.5", "valid_wpb": "3144.7", "valid_bsz": "44.2", "valid_num_updates": "251455", "valid_best_loss": "2.702"} [2023-11-02 06:46:04,493][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 62 @ 251455 updates [2023-11-02 06:46:04,495][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-02 06:46:05,950][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-02 06:46:06,964][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_best.pt (epoch 62 @ 251455 updates, score 2.702) (writing took 2.4710901700891554 seconds) [2023-11-02 06:46:06,965][fairseq_cli.train][INFO] - end of epoch 62 (average epoch stats below) [2023-11-02 06:46:06,968][train][INFO] - {"epoch": 62, "train_loss": "2.862", "train_ntokens": "3193.16", "train_nsentences": "44.2682", "train_prob_perplexity": "197.033", "train_code_perplexity": "194.243", "train_temp": "0.575", "train_loss_0": "2.754", "train_loss_1": "0.1", "train_loss_2": "0.008", "train_accuracy": "0.53675", "train_wps": "16365", "train_ups": "5.13", "train_wpb": "3193.2", "train_bsz": "44.3", "train_num_updates": "251455", "train_lr": "3.76063e-05", "train_gnorm": "0.858", "train_loss_scale": "1", "train_train_wall": "722", "train_gb_free": "14.4", "train_wall": "47473"} [2023-11-02 06:46:06,970][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 06:46:06,991][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 63 [2023-11-02 06:46:07,173][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 06:46:07,225][fairseq.trainer][INFO] - begin training epoch 63 [2023-11-02 06:46:07,226][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 06:46:33,677][train_inner][INFO] - {"epoch": 63, "update": 62.036, "loss": "2.866", "ntokens": "3224.04", "nsentences": "44.4", "prob_perplexity": "196.514", "code_perplexity": "193.777", "temp": "0.569", "loss_0": "2.758", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.53607", "wps": "9917.2", "ups": "3.08", "wpb": "3224", "bsz": "44.4", "num_updates": "251600", "lr": "3.75696e-05", "gnorm": "0.868", "loss_scale": "1", "train_wall": "36", "gb_free": "13.3", "wall": "47500"} [2023-11-02 06:47:09,527][train_inner][INFO] - {"epoch": 63, "update": 62.085, "loss": "2.877", "ntokens": "3197.36", "nsentences": "43.2", "prob_perplexity": "196.245", "code_perplexity": "193.468", "temp": "0.568", "loss_0": "2.77", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.53233", "wps": "17838.4", "ups": "5.58", "wpb": "3197.4", "bsz": "43.2", "num_updates": "251800", "lr": "3.7519e-05", "gnorm": "0.864", "loss_scale": "1", "train_wall": "35", "gb_free": "13.8", "wall": "47536"} [2023-11-02 06:47:45,169][train_inner][INFO] - {"epoch": 63, "update": 62.134, "loss": "2.877", "ntokens": "3182.84", "nsentences": "41.84", "prob_perplexity": "197.656", "code_perplexity": "194.779", "temp": "0.568", "loss_0": "2.77", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.53171", "wps": "17861.6", "ups": "5.61", "wpb": "3182.8", "bsz": "41.8", "num_updates": "252000", "lr": "3.74684e-05", "gnorm": "0.863", "loss_scale": "1", "train_wall": "35", "gb_free": "13.9", "wall": "47571"} [2023-11-02 06:48:21,226][train_inner][INFO] - {"epoch": 63, "update": 62.184, "loss": "2.869", "ntokens": "3163.04", "nsentences": "43.2", "prob_perplexity": "197.203", "code_perplexity": "194.267", "temp": "0.567", "loss_0": "2.761", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.53431", "wps": "17545.7", "ups": "5.55", "wpb": "3163", "bsz": "43.2", "num_updates": "252200", "lr": "3.74177e-05", "gnorm": "0.866", "loss_scale": "1", "train_wall": "35", "gb_free": "12.4", "wall": "47607"} [2023-11-02 06:48:56,680][train_inner][INFO] - {"epoch": 63, "update": 62.233, "loss": "2.885", "ntokens": "3165.2", "nsentences": "43.76", "prob_perplexity": "197.71", "code_perplexity": "194.964", "temp": "0.566", "loss_0": "2.777", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.53254", "wps": "17856.2", "ups": "5.64", "wpb": "3165.2", "bsz": "43.8", "num_updates": "252400", "lr": "3.73671e-05", "gnorm": "0.862", "loss_scale": "1", "train_wall": "35", "gb_free": "12.8", "wall": "47643"} [2023-11-02 06:49:32,411][train_inner][INFO] - {"epoch": 63, "update": 62.282, "loss": "2.836", "ntokens": "3186.88", "nsentences": "45.16", "prob_perplexity": "196.867", "code_perplexity": "194.095", "temp": "0.566", "loss_0": "2.728", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.54203", "wps": "17839.9", "ups": "5.6", "wpb": "3186.9", "bsz": "45.2", "num_updates": "252600", "lr": "3.73165e-05", "gnorm": "0.86", "loss_scale": "1", "train_wall": "35", "gb_free": "13.4", "wall": "47679"} [2023-11-02 06:50:08,544][train_inner][INFO] - {"epoch": 63, "update": 62.332, "loss": "2.831", "ntokens": "3214.6", "nsentences": "45.72", "prob_perplexity": "197.749", "code_perplexity": "194.934", "temp": "0.565", "loss_0": "2.724", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.54267", "wps": "17794.2", "ups": "5.54", "wpb": "3214.6", "bsz": "45.7", "num_updates": "252800", "lr": "3.72658e-05", "gnorm": "0.856", "loss_scale": "1", "train_wall": "36", "gb_free": "14", "wall": "47715"} [2023-11-02 06:50:44,976][train_inner][INFO] - {"epoch": 63, "update": 62.381, "loss": "2.823", "ntokens": "3207.52", "nsentences": "46.72", "prob_perplexity": "197.825", "code_perplexity": "195.093", "temp": "0.565", "loss_0": "2.715", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.54456", "wps": "17609.4", "ups": "5.49", "wpb": "3207.5", "bsz": "46.7", "num_updates": "253000", "lr": "3.72152e-05", "gnorm": "0.857", "loss_scale": "1", "train_wall": "36", "gb_free": "13.2", "wall": "47751"} [2023-11-02 06:51:21,288][train_inner][INFO] - {"epoch": 63, "update": 62.43, "loss": "2.809", "ntokens": "3197", "nsentences": "46.6", "prob_perplexity": "198.53", "code_perplexity": "195.783", "temp": "0.564", "loss_0": "2.701", "loss_1": "0.099", "loss_2": "0.008", "accuracy": "0.54772", "wps": "17609.8", "ups": "5.51", "wpb": "3197", "bsz": "46.6", "num_updates": "253200", "lr": "3.71646e-05", "gnorm": "0.851", "loss_scale": "1", "train_wall": "36", "gb_free": "13.4", "wall": "47788"} [2023-11-02 06:51:57,674][train_inner][INFO] - {"epoch": 63, "update": 62.48, "loss": "2.793", "ntokens": "3186.64", "nsentences": "45.28", "prob_perplexity": "197.96", "code_perplexity": "195.186", "temp": "0.564", "loss_0": "2.686", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.54901", "wps": "17516.7", "ups": "5.5", "wpb": "3186.6", "bsz": "45.3", "num_updates": "253400", "lr": "3.71139e-05", "gnorm": "0.87", "loss_scale": "1", "train_wall": "36", "gb_free": "13.1", "wall": "47824"} [2023-11-02 06:52:33,937][train_inner][INFO] - {"epoch": 63, "update": 62.529, "loss": "2.822", "ntokens": "3173.88", "nsentences": "46.04", "prob_perplexity": "198.089", "code_perplexity": "195.325", "temp": "0.563", "loss_0": "2.715", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.54484", "wps": "17506", "ups": "5.52", "wpb": "3173.9", "bsz": "46", "num_updates": "253600", "lr": "3.70633e-05", "gnorm": "0.863", "loss_scale": "1", "train_wall": "36", "gb_free": "14.3", "wall": "47860"} [2023-11-02 06:53:10,642][train_inner][INFO] - {"epoch": 63, "update": 62.578, "loss": "2.895", "ntokens": "3188.68", "nsentences": "43", "prob_perplexity": "197.135", "code_perplexity": "194.383", "temp": "0.563", "loss_0": "2.787", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.53025", "wps": "17375.9", "ups": "5.45", "wpb": "3188.7", "bsz": "43", "num_updates": "253800", "lr": "3.70127e-05", "gnorm": "0.868", "loss_scale": "1", "train_wall": "36", "gb_free": "13.3", "wall": "47897"} [2023-11-02 06:53:47,289][train_inner][INFO] - {"epoch": 63, "update": 62.627, "loss": "2.868", "ntokens": "3233.48", "nsentences": "43.12", "prob_perplexity": "198.234", "code_perplexity": "195.471", "temp": "0.562", "loss_0": "2.761", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.53355", "wps": "17648.1", "ups": "5.46", "wpb": "3233.5", "bsz": "43.1", "num_updates": "254000", "lr": "3.6962e-05", "gnorm": "0.858", "loss_scale": "1", "train_wall": "36", "gb_free": "13.1", "wall": "47934"} [2023-11-02 06:54:23,788][train_inner][INFO] - {"epoch": 63, "update": 62.677, "loss": "2.844", "ntokens": "3178.84", "nsentences": "44.4", "prob_perplexity": "197.598", "code_perplexity": "194.877", "temp": "0.561", "loss_0": "2.737", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.53899", "wps": "17419.6", "ups": "5.48", "wpb": "3178.8", "bsz": "44.4", "num_updates": "254200", "lr": "3.69114e-05", "gnorm": "0.867", "loss_scale": "1", "train_wall": "36", "gb_free": "13.6", "wall": "47970"} [2023-11-02 06:55:00,223][train_inner][INFO] - {"epoch": 63, "update": 62.726, "loss": "2.854", "ntokens": "3218.8", "nsentences": "44.88", "prob_perplexity": "196.775", "code_perplexity": "194", "temp": "0.561", "loss_0": "2.746", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.5392", "wps": "17670.2", "ups": "5.49", "wpb": "3218.8", "bsz": "44.9", "num_updates": "254400", "lr": "3.68608e-05", "gnorm": "0.861", "loss_scale": "1", "train_wall": "36", "gb_free": "13.3", "wall": "48006"} [2023-11-02 06:55:36,762][train_inner][INFO] - {"epoch": 63, "update": 62.775, "loss": "2.858", "ntokens": "3182.32", "nsentences": "44.32", "prob_perplexity": "197.579", "code_perplexity": "194.898", "temp": "0.56", "loss_0": "2.751", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.53808", "wps": "17419.4", "ups": "5.47", "wpb": "3182.3", "bsz": "44.3", "num_updates": "254600", "lr": "3.68101e-05", "gnorm": "0.869", "loss_scale": "1", "train_wall": "36", "gb_free": "14", "wall": "48043"} [2023-11-02 06:56:13,895][train_inner][INFO] - {"epoch": 63, "update": 62.825, "loss": "2.921", "ntokens": "3212.92", "nsentences": "42.2", "prob_perplexity": "198.582", "code_perplexity": "195.878", "temp": "0.56", "loss_0": "2.814", "loss_1": "0.099", "loss_2": "0.008", "accuracy": "0.52451", "wps": "17313.3", "ups": "5.39", "wpb": "3212.9", "bsz": "42.2", "num_updates": "254800", "lr": "3.67595e-05", "gnorm": "0.864", "loss_scale": "1", "train_wall": "36", "gb_free": "15.3", "wall": "48080"} [2023-11-02 06:56:50,038][train_inner][INFO] - {"epoch": 63, "update": 62.874, "loss": "2.858", "ntokens": "3185.2", "nsentences": "44.28", "prob_perplexity": "198.089", "code_perplexity": "195.308", "temp": "0.559", "loss_0": "2.75", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.53835", "wps": "17627", "ups": "5.53", "wpb": "3185.2", "bsz": "44.3", "num_updates": "255000", "lr": "3.67089e-05", "gnorm": "0.86", "loss_scale": "1", "train_wall": "35", "gb_free": "13.6", "wall": "48116"} [2023-11-02 06:57:26,605][train_inner][INFO] - {"epoch": 63, "update": 62.923, "loss": "2.851", "ntokens": "3220.84", "nsentences": "44.76", "prob_perplexity": "196.911", "code_perplexity": "194.226", "temp": "0.559", "loss_0": "2.743", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.5386", "wps": "17617.1", "ups": "5.47", "wpb": "3220.8", "bsz": "44.8", "num_updates": "255200", "lr": "3.66582e-05", "gnorm": "0.868", "loss_scale": "1", "train_wall": "36", "gb_free": "13.6", "wall": "48153"} [2023-11-02 06:58:02,701][train_inner][INFO] - {"epoch": 63, "update": 62.973, "loss": "2.837", "ntokens": "3194.4", "nsentences": "43.76", "prob_perplexity": "198.169", "code_perplexity": "195.458", "temp": "0.558", "loss_0": "2.73", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.53975", "wps": "17700.3", "ups": "5.54", "wpb": "3194.4", "bsz": "43.8", "num_updates": "255400", "lr": "3.66076e-05", "gnorm": "0.867", "loss_scale": "1", "train_wall": "35", "gb_free": "14.7", "wall": "48189"} [2023-11-02 06:58:22,645][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 06:58:22,647][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 06:58:22,667][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 74 [2023-11-02 06:58:48,256][valid][INFO] - {"epoch": 63, "valid_loss": "2.717", "valid_ntokens": "3159.31", "valid_nsentences": "44.1685", "valid_prob_perplexity": "199.097", "valid_code_perplexity": "196.446", "valid_temp": "0.557", "valid_loss_0": "2.61", "valid_loss_1": "0.099", "valid_loss_2": "0.008", "valid_accuracy": "0.56572", "valid_wps": "56056", "valid_wpb": "3159.3", "valid_bsz": "44.2", "valid_num_updates": "255511", "valid_best_loss": "2.702"} [2023-11-02 06:58:48,257][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 63 @ 255511 updates [2023-11-02 06:58:48,259][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 06:58:49,727][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 06:58:49,776][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_last.pt (epoch 63 @ 255511 updates, score 2.717) (writing took 1.5185437761247158 seconds) [2023-11-02 06:58:49,777][fairseq_cli.train][INFO] - end of epoch 63 (average epoch stats below) [2023-11-02 06:58:49,779][train][INFO] - {"epoch": 63, "train_loss": "2.854", "train_ntokens": "3192.91", "train_nsentences": "44.2682", "train_prob_perplexity": "197.61", "train_code_perplexity": "194.846", "train_temp": "0.563", "train_loss_0": "2.747", "train_loss_1": "0.1", "train_loss_2": "0.008", "train_accuracy": "0.53788", "train_wps": "16977.3", "train_ups": "5.32", "train_wpb": "3192.9", "train_bsz": "44.3", "train_num_updates": "255511", "train_lr": "3.65795e-05", "train_gnorm": "0.863", "train_loss_scale": "1", "train_train_wall": "722", "train_gb_free": "13.9", "train_wall": "48236"} [2023-11-02 06:58:49,781][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 06:58:49,799][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 64 [2023-11-02 06:58:49,966][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 06:58:50,026][fairseq.trainer][INFO] - begin training epoch 64 [2023-11-02 06:58:50,028][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 06:59:05,984][train_inner][INFO] - {"epoch": 64, "update": 63.022, "loss": "2.866", "ntokens": "3150.48", "nsentences": "42.2", "prob_perplexity": "198.954", "code_perplexity": "196.23", "temp": "0.557", "loss_0": "2.759", "loss_1": "0.099", "loss_2": "0.008", "accuracy": "0.53446", "wps": "9957.2", "ups": "3.16", "wpb": "3150.5", "bsz": "42.2", "num_updates": "255600", "lr": "3.6557e-05", "gnorm": "0.869", "loss_scale": "1", "train_wall": "35", "gb_free": "15.8", "wall": "48252"} [2023-11-02 06:59:41,834][train_inner][INFO] - {"epoch": 64, "update": 63.071, "loss": "2.864", "ntokens": "3165.68", "nsentences": "41.04", "prob_perplexity": "196.897", "code_perplexity": "194.145", "temp": "0.557", "loss_0": "2.756", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.53197", "wps": "17661.7", "ups": "5.58", "wpb": "3165.7", "bsz": "41", "num_updates": "255800", "lr": "3.65063e-05", "gnorm": "0.873", "loss_scale": "1", "train_wall": "35", "gb_free": "15.9", "wall": "48288"} [2023-11-02 07:00:17,625][train_inner][INFO] - {"epoch": 64, "update": 63.121, "loss": "2.89", "ntokens": "3201.36", "nsentences": "43.88", "prob_perplexity": "197.646", "code_perplexity": "194.951", "temp": "0.556", "loss_0": "2.782", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.53186", "wps": "17890.6", "ups": "5.59", "wpb": "3201.4", "bsz": "43.9", "num_updates": "256000", "lr": "3.64557e-05", "gnorm": "0.868", "loss_scale": "1", "train_wall": "35", "gb_free": "15.2", "wall": "48324"} [2023-11-02 07:00:53,618][train_inner][INFO] - {"epoch": 64, "update": 63.17, "loss": "2.866", "ntokens": "3209.64", "nsentences": "44.64", "prob_perplexity": "198.544", "code_perplexity": "195.822", "temp": "0.556", "loss_0": "2.759", "loss_1": "0.099", "loss_2": "0.008", "accuracy": "0.53714", "wps": "17835.9", "ups": "5.56", "wpb": "3209.6", "bsz": "44.6", "num_updates": "256200", "lr": "3.64051e-05", "gnorm": "0.871", "loss_scale": "1", "train_wall": "35", "gb_free": "14.1", "wall": "48360"} [2023-11-02 07:01:29,230][train_inner][INFO] - {"epoch": 64, "update": 63.219, "loss": "2.822", "ntokens": "3168.04", "nsentences": "45.88", "prob_perplexity": "197.998", "code_perplexity": "195.222", "temp": "0.555", "loss_0": "2.714", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.54591", "wps": "17793.2", "ups": "5.62", "wpb": "3168", "bsz": "45.9", "num_updates": "256400", "lr": "3.63544e-05", "gnorm": "0.874", "loss_scale": "1", "train_wall": "35", "gb_free": "15.1", "wall": "48395"} [2023-11-02 07:02:05,243][train_inner][INFO] - {"epoch": 64, "update": 63.268, "loss": "2.859", "ntokens": "3219", "nsentences": "45.88", "prob_perplexity": "198.243", "code_perplexity": "195.537", "temp": "0.555", "loss_0": "2.752", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.53853", "wps": "17878.1", "ups": "5.55", "wpb": "3219", "bsz": "45.9", "num_updates": "256600", "lr": "3.63038e-05", "gnorm": "0.85", "loss_scale": "1", "train_wall": "35", "gb_free": "13", "wall": "48431"} [2023-11-02 07:02:41,748][train_inner][INFO] - {"epoch": 64, "update": 63.318, "loss": "2.801", "ntokens": "3173.56", "nsentences": "45.08", "prob_perplexity": "197.655", "code_perplexity": "194.98", "temp": "0.554", "loss_0": "2.694", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.54866", "wps": "17388", "ups": "5.48", "wpb": "3173.6", "bsz": "45.1", "num_updates": "256800", "lr": "3.62532e-05", "gnorm": "0.859", "loss_scale": "1", "train_wall": "36", "gb_free": "12", "wall": "48468"} [2023-11-02 07:03:18,075][train_inner][INFO] - {"epoch": 64, "update": 63.367, "loss": "2.839", "ntokens": "3206.88", "nsentences": "44.56", "prob_perplexity": "196.784", "code_perplexity": "194.073", "temp": "0.554", "loss_0": "2.732", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.54076", "wps": "17656.9", "ups": "5.51", "wpb": "3206.9", "bsz": "44.6", "num_updates": "257000", "lr": "3.62025e-05", "gnorm": "0.864", "loss_scale": "1", "train_wall": "36", "gb_free": "13.9", "wall": "48504"} [2023-11-02 07:03:54,725][train_inner][INFO] - {"epoch": 64, "update": 63.416, "loss": "2.855", "ntokens": "3218.2", "nsentences": "43.56", "prob_perplexity": "198.465", "code_perplexity": "195.806", "temp": "0.553", "loss_0": "2.748", "loss_1": "0.099", "loss_2": "0.008", "accuracy": "0.53661", "wps": "17562.7", "ups": "5.46", "wpb": "3218.2", "bsz": "43.6", "num_updates": "257200", "lr": "3.61519e-05", "gnorm": "0.862", "loss_scale": "1", "train_wall": "36", "gb_free": "14.6", "wall": "48541"} [2023-11-02 07:04:31,190][train_inner][INFO] - {"epoch": 64, "update": 63.466, "loss": "2.952", "ntokens": "3201.76", "nsentences": "40", "prob_perplexity": "197.168", "code_perplexity": "194.562", "temp": "0.552", "loss_0": "2.845", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.51727", "wps": "17561.8", "ups": "5.49", "wpb": "3201.8", "bsz": "40", "num_updates": "257400", "lr": "3.61013e-05", "gnorm": "0.874", "loss_scale": "1", "train_wall": "36", "gb_free": "15.4", "wall": "48577"} [2023-11-02 07:05:07,048][train_inner][INFO] - {"epoch": 64, "update": 63.515, "loss": "2.823", "ntokens": "3157.16", "nsentences": "43.16", "prob_perplexity": "199.106", "code_perplexity": "196.403", "temp": "0.552", "loss_0": "2.716", "loss_1": "0.099", "loss_2": "0.008", "accuracy": "0.5411", "wps": "17610.2", "ups": "5.58", "wpb": "3157.2", "bsz": "43.2", "num_updates": "257600", "lr": "3.60506e-05", "gnorm": "0.863", "loss_scale": "1", "train_wall": "35", "gb_free": "13.4", "wall": "48613"} [2023-11-02 07:05:43,536][train_inner][INFO] - {"epoch": 64, "update": 63.564, "loss": "2.84", "ntokens": "3192.64", "nsentences": "44.24", "prob_perplexity": "199.009", "code_perplexity": "196.3", "temp": "0.551", "loss_0": "2.733", "loss_1": "0.099", "loss_2": "0.008", "accuracy": "0.53925", "wps": "17501", "ups": "5.48", "wpb": "3192.6", "bsz": "44.2", "num_updates": "257800", "lr": "3.6e-05", "gnorm": "0.906", "loss_scale": "1", "train_wall": "36", "gb_free": "12.5", "wall": "48650"} [2023-11-02 07:06:19,629][train_inner][INFO] - {"epoch": 64, "update": 63.614, "loss": "2.803", "ntokens": "3203.48", "nsentences": "45.8", "prob_perplexity": "198.845", "code_perplexity": "196.134", "temp": "0.551", "loss_0": "2.696", "loss_1": "0.099", "loss_2": "0.008", "accuracy": "0.54757", "wps": "17752.3", "ups": "5.54", "wpb": "3203.5", "bsz": "45.8", "num_updates": "258000", "lr": "3.59494e-05", "gnorm": "0.872", "loss_scale": "1", "train_wall": "35", "gb_free": "13.2", "wall": "48686"} [2023-11-02 07:06:55,525][train_inner][INFO] - {"epoch": 64, "update": 63.663, "loss": "2.841", "ntokens": "3200.24", "nsentences": "45.56", "prob_perplexity": "199.68", "code_perplexity": "196.935", "temp": "0.55", "loss_0": "2.734", "loss_1": "0.099", "loss_2": "0.008", "accuracy": "0.54167", "wps": "17831.3", "ups": "5.57", "wpb": "3200.2", "bsz": "45.6", "num_updates": "258200", "lr": "3.58987e-05", "gnorm": "0.865", "loss_scale": "1", "train_wall": "35", "gb_free": "14.3", "wall": "48722"} [2023-11-02 07:07:32,126][train_inner][INFO] - {"epoch": 64, "update": 63.712, "loss": "2.851", "ntokens": "3185.08", "nsentences": "44.36", "prob_perplexity": "197.746", "code_perplexity": "195.057", "temp": "0.55", "loss_0": "2.744", "loss_1": "0.1", "loss_2": "0.008", "accuracy": "0.53892", "wps": "17405.7", "ups": "5.46", "wpb": "3185.1", "bsz": "44.4", "num_updates": "258400", "lr": "3.58481e-05", "gnorm": "0.884", "loss_scale": "1", "train_wall": "36", "gb_free": "13.3", "wall": "48758"} [2023-11-02 07:08:08,362][train_inner][INFO] - {"epoch": 64, "update": 63.762, "loss": "2.846", "ntokens": "3184.12", "nsentences": "43.04", "prob_perplexity": "199.267", "code_perplexity": "196.572", "temp": "0.549", "loss_0": "2.739", "loss_1": "0.099", "loss_2": "0.008", "accuracy": "0.53773", "wps": "17575.2", "ups": "5.52", "wpb": "3184.1", "bsz": "43", "num_updates": "258600", "lr": "3.57975e-05", "gnorm": "0.864", "loss_scale": "1", "train_wall": "36", "gb_free": "12.2", "wall": "48795"} [2023-11-02 07:08:44,123][train_inner][INFO] - {"epoch": 64, "update": 63.811, "loss": "2.767", "ntokens": "3147.08", "nsentences": "47", "prob_perplexity": "198.732", "code_perplexity": "196.011", "temp": "0.549", "loss_0": "2.66", "loss_1": "0.099", "loss_2": "0.008", "accuracy": "0.55537", "wps": "17601.9", "ups": "5.59", "wpb": "3147.1", "bsz": "47", "num_updates": "258800", "lr": "3.57468e-05", "gnorm": "0.863", "loss_scale": "1", "train_wall": "35", "gb_free": "13.3", "wall": "48830"} [2023-11-02 07:09:19,882][train_inner][INFO] - {"epoch": 64, "update": 63.86, "loss": "2.83", "ntokens": "3189.16", "nsentences": "46.24", "prob_perplexity": "198.506", "code_perplexity": "195.757", "temp": "0.548", "loss_0": "2.723", "loss_1": "0.099", "loss_2": "0.008", "accuracy": "0.54411", "wps": "17838.2", "ups": "5.59", "wpb": "3189.2", "bsz": "46.2", "num_updates": "259000", "lr": "3.56962e-05", "gnorm": "0.864", "loss_scale": "1", "train_wall": "35", "gb_free": "13.1", "wall": "48866"} [2023-11-02 07:09:55,929][train_inner][INFO] - {"epoch": 64, "update": 63.91, "loss": "2.825", "ntokens": "3204.84", "nsentences": "45.36", "prob_perplexity": "199.371", "code_perplexity": "196.622", "temp": "0.548", "loss_0": "2.718", "loss_1": "0.099", "loss_2": "0.008", "accuracy": "0.54281", "wps": "17782.1", "ups": "5.55", "wpb": "3204.8", "bsz": "45.4", "num_updates": "259200", "lr": "3.56456e-05", "gnorm": "0.855", "loss_scale": "1", "train_wall": "35", "gb_free": "13.6", "wall": "48902"} [2023-11-02 07:10:32,367][train_inner][INFO] - {"epoch": 64, "update": 63.959, "loss": "2.871", "ntokens": "3174.32", "nsentences": "42.56", "prob_perplexity": "199.302", "code_perplexity": "196.598", "temp": "0.547", "loss_0": "2.764", "loss_1": "0.099", "loss_2": "0.008", "accuracy": "0.53303", "wps": "17424.1", "ups": "5.49", "wpb": "3174.3", "bsz": "42.6", "num_updates": "259400", "lr": "3.55949e-05", "gnorm": "0.87", "loss_scale": "1", "train_wall": "36", "gb_free": "13", "wall": "48939"} [2023-11-02 07:11:02,566][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 07:11:02,568][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 07:11:02,588][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 75 [2023-11-02 07:11:28,241][valid][INFO] - {"epoch": 64, "valid_loss": "2.712", "valid_ntokens": "3165.62", "valid_nsentences": "44.1685", "valid_prob_perplexity": "197.804", "valid_code_perplexity": "195.205", "valid_temp": "0.546", "valid_loss_0": "2.605", "valid_loss_1": "0.1", "valid_loss_2": "0.008", "valid_accuracy": "0.56558", "valid_wps": "56046.7", "valid_wpb": "3165.6", "valid_bsz": "44.2", "valid_num_updates": "259567", "valid_best_loss": "2.702"} [2023-11-02 07:11:28,243][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 64 @ 259567 updates [2023-11-02 07:11:28,245][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 07:11:29,692][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 07:11:29,749][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_last.pt (epoch 64 @ 259567 updates, score 2.712) (writing took 1.5058758002705872 seconds) [2023-11-02 07:11:29,750][fairseq_cli.train][INFO] - end of epoch 64 (average epoch stats below) [2023-11-02 07:11:29,752][train][INFO] - {"epoch": 64, "train_loss": "2.845", "train_ntokens": "3189.68", "train_nsentences": "44.2682", "train_prob_perplexity": "198.379", "train_code_perplexity": "195.67", "train_temp": "0.552", "train_loss_0": "2.738", "train_loss_1": "0.1", "train_loss_2": "0.008", "train_accuracy": "0.53939", "train_wps": "17023.5", "train_ups": "5.34", "train_wpb": "3189.7", "train_bsz": "44.3", "train_num_updates": "259567", "train_lr": "3.55527e-05", "train_gnorm": "0.869", "train_loss_scale": "1", "train_train_wall": "720", "train_gb_free": "13.2", "train_wall": "48996"} [2023-11-02 07:11:29,755][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 07:11:29,772][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 65 [2023-11-02 07:11:29,941][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 07:11:29,995][fairseq.trainer][INFO] - begin training epoch 65 [2023-11-02 07:11:29,996][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 07:11:36,135][train_inner][INFO] - {"epoch": 65, "update": 64.008, "loss": "2.843", "ntokens": "3201.12", "nsentences": "45.04", "prob_perplexity": "198.549", "code_perplexity": "195.848", "temp": "0.546", "loss_0": "2.736", "loss_1": "0.099", "loss_2": "0.008", "accuracy": "0.54158", "wps": "10040.2", "ups": "3.14", "wpb": "3201.1", "bsz": "45", "num_updates": "259600", "lr": "3.55443e-05", "gnorm": "0.87", "loss_scale": "1", "train_wall": "36", "gb_free": "12.4", "wall": "49002"} [2023-11-02 07:12:11,976][train_inner][INFO] - {"epoch": 65, "update": 64.057, "loss": "2.831", "ntokens": "3208.32", "nsentences": "44.12", "prob_perplexity": "198.022", "code_perplexity": "195.346", "temp": "0.546", "loss_0": "2.724", "loss_1": "0.1", "loss_2": "0.007", "accuracy": "0.54086", "wps": "17904.4", "ups": "5.58", "wpb": "3208.3", "bsz": "44.1", "num_updates": "259800", "lr": "3.54937e-05", "gnorm": "0.87", "loss_scale": "2", "train_wall": "35", "gb_free": "13.5", "wall": "49038"} [2023-11-02 07:12:48,012][train_inner][INFO] - {"epoch": 65, "update": 64.107, "loss": "2.828", "ntokens": "3214.2", "nsentences": "45.8", "prob_perplexity": "198.756", "code_perplexity": "196.056", "temp": "0.545", "loss_0": "2.721", "loss_1": "0.099", "loss_2": "0.008", "accuracy": "0.54305", "wps": "17840.2", "ups": "5.55", "wpb": "3214.2", "bsz": "45.8", "num_updates": "260000", "lr": "3.5443e-05", "gnorm": "0.866", "loss_scale": "2", "train_wall": "35", "gb_free": "13.1", "wall": "49074"} [2023-11-02 07:13:23,919][train_inner][INFO] - {"epoch": 65, "update": 64.156, "loss": "2.877", "ntokens": "3228.8", "nsentences": "44.52", "prob_perplexity": "199.289", "code_perplexity": "196.542", "temp": "0.545", "loss_0": "2.77", "loss_1": "0.099", "loss_2": "0.008", "accuracy": "0.53326", "wps": "17985", "ups": "5.57", "wpb": "3228.8", "bsz": "44.5", "num_updates": "260200", "lr": "3.53924e-05", "gnorm": "0.863", "loss_scale": "2", "train_wall": "35", "gb_free": "13.2", "wall": "49110"} [2023-11-02 07:13:59,945][train_inner][INFO] - {"epoch": 65, "update": 64.205, "loss": "2.832", "ntokens": "3206", "nsentences": "44.96", "prob_perplexity": "199.323", "code_perplexity": "196.583", "temp": "0.544", "loss_0": "2.725", "loss_1": "0.099", "loss_2": "0.008", "accuracy": "0.54188", "wps": "17799.3", "ups": "5.55", "wpb": "3206", "bsz": "45", "num_updates": "260400", "lr": "3.53418e-05", "gnorm": "0.87", "loss_scale": "2", "train_wall": "35", "gb_free": "13.6", "wall": "49146"} [2023-11-02 07:14:36,139][train_inner][INFO] - {"epoch": 65, "update": 64.255, "loss": "2.795", "ntokens": "3148.48", "nsentences": "45.08", "prob_perplexity": "199.088", "code_perplexity": "196.395", "temp": "0.544", "loss_0": "2.688", "loss_1": "0.099", "loss_2": "0.008", "accuracy": "0.54858", "wps": "17399", "ups": "5.53", "wpb": "3148.5", "bsz": "45.1", "num_updates": "260600", "lr": "3.52911e-05", "gnorm": "0.873", "loss_scale": "2", "train_wall": "36", "gb_free": "14", "wall": "49182"} [2023-11-02 07:15:12,440][train_inner][INFO] - {"epoch": 65, "update": 64.304, "loss": "2.94", "ntokens": "3200", "nsentences": "42.04", "prob_perplexity": "198.972", "code_perplexity": "196.236", "temp": "0.543", "loss_0": "2.833", "loss_1": "0.099", "loss_2": "0.008", "accuracy": "0.52246", "wps": "17631.3", "ups": "5.51", "wpb": "3200", "bsz": "42", "num_updates": "260800", "lr": "3.52405e-05", "gnorm": "0.874", "loss_scale": "2", "train_wall": "36", "gb_free": "16.6", "wall": "49219"} [2023-11-02 07:15:49,134][train_inner][INFO] - {"epoch": 65, "update": 64.353, "loss": "2.886", "ntokens": "3251.2", "nsentences": "43.64", "prob_perplexity": "200.064", "code_perplexity": "197.389", "temp": "0.543", "loss_0": "2.779", "loss_1": "0.099", "loss_2": "0.008", "accuracy": "0.53193", "wps": "17722", "ups": "5.45", "wpb": "3251.2", "bsz": "43.6", "num_updates": "261000", "lr": "3.51899e-05", "gnorm": "0.864", "loss_scale": "2", "train_wall": "36", "gb_free": "13.5", "wall": "49255"} [2023-11-02 07:16:25,253][train_inner][INFO] - {"epoch": 65, "update": 64.403, "loss": "2.867", "ntokens": "3189.8", "nsentences": "43.24", "prob_perplexity": "198.537", "code_perplexity": "195.88", "temp": "0.542", "loss_0": "2.76", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.53418", "wps": "17663.8", "ups": "5.54", "wpb": "3189.8", "bsz": "43.2", "num_updates": "261200", "lr": "3.51392e-05", "gnorm": "0.872", "loss_scale": "2", "train_wall": "35", "gb_free": "13.1", "wall": "49291"} [2023-11-02 07:17:01,912][train_inner][INFO] - {"epoch": 65, "update": 64.452, "loss": "2.81", "ntokens": "3180.6", "nsentences": "43.24", "prob_perplexity": "199.496", "code_perplexity": "196.792", "temp": "0.542", "loss_0": "2.703", "loss_1": "0.099", "loss_2": "0.008", "accuracy": "0.54359", "wps": "17353.4", "ups": "5.46", "wpb": "3180.6", "bsz": "43.2", "num_updates": "261400", "lr": "3.50886e-05", "gnorm": "0.873", "loss_scale": "2", "train_wall": "36", "gb_free": "14.7", "wall": "49328"} [2023-11-02 07:17:37,914][train_inner][INFO] - {"epoch": 65, "update": 64.501, "loss": "2.861", "ntokens": "3174.32", "nsentences": "43.76", "prob_perplexity": "200.074", "code_perplexity": "197.342", "temp": "0.541", "loss_0": "2.755", "loss_1": "0.099", "loss_2": "0.008", "accuracy": "0.5369", "wps": "17635.4", "ups": "5.56", "wpb": "3174.3", "bsz": "43.8", "num_updates": "261600", "lr": "3.5038e-05", "gnorm": "0.87", "loss_scale": "2", "train_wall": "35", "gb_free": "14", "wall": "49364"} [2023-11-02 07:18:14,102][train_inner][INFO] - {"epoch": 65, "update": 64.551, "loss": "2.868", "ntokens": "3228.96", "nsentences": "42.96", "prob_perplexity": "199.193", "code_perplexity": "196.461", "temp": "0.54", "loss_0": "2.761", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.53325", "wps": "17846.4", "ups": "5.53", "wpb": "3229", "bsz": "43", "num_updates": "261800", "lr": "3.49873e-05", "gnorm": "0.871", "loss_scale": "2", "train_wall": "36", "gb_free": "14.5", "wall": "49400"} [2023-11-02 07:18:50,669][train_inner][INFO] - {"epoch": 65, "update": 64.6, "loss": "2.859", "ntokens": "3214.96", "nsentences": "43.24", "prob_perplexity": "200.127", "code_perplexity": "197.403", "temp": "0.54", "loss_0": "2.752", "loss_1": "0.099", "loss_2": "0.008", "accuracy": "0.53625", "wps": "17585", "ups": "5.47", "wpb": "3215", "bsz": "43.2", "num_updates": "262000", "lr": "3.49367e-05", "gnorm": "0.871", "loss_scale": "2", "train_wall": "36", "gb_free": "14.1", "wall": "49437"} [2023-11-02 07:19:26,784][train_inner][INFO] - {"epoch": 65, "update": 64.649, "loss": "2.811", "ntokens": "3203.4", "nsentences": "45.8", "prob_perplexity": "199.568", "code_perplexity": "196.898", "temp": "0.539", "loss_0": "2.704", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.54639", "wps": "17740.8", "ups": "5.54", "wpb": "3203.4", "bsz": "45.8", "num_updates": "262200", "lr": "3.48861e-05", "gnorm": "0.866", "loss_scale": "2", "train_wall": "35", "gb_free": "12.3", "wall": "49473"} [2023-11-02 07:20:02,773][train_inner][INFO] - {"epoch": 65, "update": 64.698, "loss": "2.795", "ntokens": "3156.44", "nsentences": "45.8", "prob_perplexity": "198.889", "code_perplexity": "196.159", "temp": "0.539", "loss_0": "2.688", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.54926", "wps": "17542.5", "ups": "5.56", "wpb": "3156.4", "bsz": "45.8", "num_updates": "262400", "lr": "3.48354e-05", "gnorm": "0.879", "loss_scale": "2", "train_wall": "35", "gb_free": "15.9", "wall": "49509"} [2023-11-02 07:20:39,676][train_inner][INFO] - {"epoch": 65, "update": 64.748, "loss": "2.814", "ntokens": "3155.2", "nsentences": "43.76", "prob_perplexity": "198.873", "code_perplexity": "196.171", "temp": "0.538", "loss_0": "2.707", "loss_1": "0.099", "loss_2": "0.008", "accuracy": "0.54398", "wps": "17100.7", "ups": "5.42", "wpb": "3155.2", "bsz": "43.8", "num_updates": "262600", "lr": "3.47848e-05", "gnorm": "0.883", "loss_scale": "2", "train_wall": "36", "gb_free": "15.1", "wall": "49546"} [2023-11-02 07:21:16,103][train_inner][INFO] - {"epoch": 65, "update": 64.797, "loss": "2.841", "ntokens": "3161.8", "nsentences": "44.52", "prob_perplexity": "199.295", "code_perplexity": "196.616", "temp": "0.538", "loss_0": "2.734", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.54127", "wps": "17360.6", "ups": "5.49", "wpb": "3161.8", "bsz": "44.5", "num_updates": "262800", "lr": "3.47342e-05", "gnorm": "0.872", "loss_scale": "2", "train_wall": "36", "gb_free": "13.9", "wall": "49582"} [2023-11-02 07:21:52,381][train_inner][INFO] - {"epoch": 65, "update": 64.846, "loss": "2.85", "ntokens": "3160.84", "nsentences": "41.6", "prob_perplexity": "198.39", "code_perplexity": "195.661", "temp": "0.537", "loss_0": "2.743", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.53596", "wps": "17426.8", "ups": "5.51", "wpb": "3160.8", "bsz": "41.6", "num_updates": "263000", "lr": "3.46835e-05", "gnorm": "0.882", "loss_scale": "2", "train_wall": "36", "gb_free": "14.1", "wall": "49619"} [2023-11-02 07:22:28,657][train_inner][INFO] - {"epoch": 65, "update": 64.896, "loss": "2.852", "ntokens": "3208.24", "nsentences": "44.12", "prob_perplexity": "199.973", "code_perplexity": "197.262", "temp": "0.537", "loss_0": "2.746", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.53823", "wps": "17688.9", "ups": "5.51", "wpb": "3208.2", "bsz": "44.1", "num_updates": "263200", "lr": "3.46329e-05", "gnorm": "0.867", "loss_scale": "2", "train_wall": "36", "gb_free": "12.7", "wall": "49655"} [2023-11-02 07:23:05,022][train_inner][INFO] - {"epoch": 65, "update": 64.945, "loss": "2.769", "ntokens": "3163.8", "nsentences": "46.32", "prob_perplexity": "199.712", "code_perplexity": "196.969", "temp": "0.536", "loss_0": "2.663", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.55423", "wps": "17410.5", "ups": "5.5", "wpb": "3163.8", "bsz": "46.3", "num_updates": "263400", "lr": "3.45823e-05", "gnorm": "0.87", "loss_scale": "2", "train_wall": "36", "gb_free": "13.3", "wall": "49691"} [2023-11-02 07:23:40,931][train_inner][INFO] - {"epoch": 65, "update": 64.994, "loss": "2.767", "ntokens": "3192.64", "nsentences": "47.68", "prob_perplexity": "200.215", "code_perplexity": "197.548", "temp": "0.536", "loss_0": "2.661", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.55583", "wps": "17782.9", "ups": "5.57", "wpb": "3192.6", "bsz": "47.7", "num_updates": "263600", "lr": "3.45316e-05", "gnorm": "0.867", "loss_scale": "2", "train_wall": "35", "gb_free": "13.4", "wall": "49727"} [2023-11-02 07:23:45,114][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 07:23:45,116][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 07:23:45,135][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 76 [2023-11-02 07:24:10,986][valid][INFO] - {"epoch": 65, "valid_loss": "2.71", "valid_ntokens": "3160.44", "valid_nsentences": "44.1685", "valid_prob_perplexity": "199.187", "valid_code_perplexity": "196.616", "valid_temp": "0.535", "valid_loss_0": "2.603", "valid_loss_1": "0.099", "valid_loss_2": "0.007", "valid_accuracy": "0.56607", "valid_wps": "55500.5", "valid_wpb": "3160.4", "valid_bsz": "44.2", "valid_num_updates": "263623", "valid_best_loss": "2.702"} [2023-11-02 07:24:10,988][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 65 @ 263623 updates [2023-11-02 07:24:10,990][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 07:24:12,422][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 07:24:12,481][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_last.pt (epoch 65 @ 263623 updates, score 2.71) (writing took 1.4931017220951617 seconds) [2023-11-02 07:24:12,482][fairseq_cli.train][INFO] - end of epoch 65 (average epoch stats below) [2023-11-02 07:24:12,484][train][INFO] - {"epoch": 65, "train_loss": "2.839", "train_ntokens": "3192.47", "train_nsentences": "44.2682", "train_prob_perplexity": "199.287", "train_code_perplexity": "196.582", "train_temp": "0.541", "train_loss_0": "2.732", "train_loss_1": "0.099", "train_loss_2": "0.007", "train_accuracy": "0.54035", "train_wps": "16976.8", "train_ups": "5.32", "train_wpb": "3192.5", "train_bsz": "44.3", "train_num_updates": "263623", "train_lr": "3.45258e-05", "train_gnorm": "0.871", "train_loss_scale": "2", "train_train_wall": "722", "train_gb_free": "14.4", "train_wall": "49759"} [2023-11-02 07:24:12,487][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 07:24:12,507][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 66 [2023-11-02 07:24:12,676][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 07:24:12,731][fairseq.trainer][INFO] - begin training epoch 66 [2023-11-02 07:24:12,732][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 07:24:44,742][train_inner][INFO] - {"epoch": 66, "update": 65.044, "loss": "2.791", "ntokens": "3191.88", "nsentences": "46.52", "prob_perplexity": "200.209", "code_perplexity": "197.523", "temp": "0.535", "loss_0": "2.685", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.55068", "wps": "10004.4", "ups": "3.13", "wpb": "3191.9", "bsz": "46.5", "num_updates": "263800", "lr": "3.4481e-05", "gnorm": "0.877", "loss_scale": "2", "train_wall": "35", "gb_free": "13.4", "wall": "49791"} [2023-11-02 07:25:20,461][train_inner][INFO] - {"epoch": 66, "update": 65.093, "loss": "2.811", "ntokens": "3209.88", "nsentences": "44.72", "prob_perplexity": "198.701", "code_perplexity": "196.105", "temp": "0.535", "loss_0": "2.704", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.54576", "wps": "17974.1", "ups": "5.6", "wpb": "3209.9", "bsz": "44.7", "num_updates": "264000", "lr": "3.44304e-05", "gnorm": "0.876", "loss_scale": "2", "train_wall": "35", "gb_free": "14.1", "wall": "49827"} [2023-11-02 07:25:55,923][train_inner][INFO] - {"epoch": 66, "update": 65.142, "loss": "2.743", "ntokens": "3127.32", "nsentences": "45.32", "prob_perplexity": "198.384", "code_perplexity": "195.752", "temp": "0.534", "loss_0": "2.636", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.55738", "wps": "17638.9", "ups": "5.64", "wpb": "3127.3", "bsz": "45.3", "num_updates": "264200", "lr": "3.43797e-05", "gnorm": "0.877", "loss_scale": "2", "train_wall": "35", "gb_free": "13.6", "wall": "49862"} [2023-11-02 07:26:31,965][train_inner][INFO] - {"epoch": 66, "update": 65.192, "loss": "2.805", "ntokens": "3185.12", "nsentences": "46.36", "prob_perplexity": "199.792", "code_perplexity": "197.186", "temp": "0.533", "loss_0": "2.699", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.54746", "wps": "17675.6", "ups": "5.55", "wpb": "3185.1", "bsz": "46.4", "num_updates": "264400", "lr": "3.43291e-05", "gnorm": "0.872", "loss_scale": "2", "train_wall": "35", "gb_free": "14.6", "wall": "49898"} [2023-11-02 07:27:07,391][train_inner][INFO] - {"epoch": 66, "update": 65.241, "loss": "2.892", "ntokens": "3200.88", "nsentences": "42.04", "prob_perplexity": "199.532", "code_perplexity": "196.886", "temp": "0.533", "loss_0": "2.785", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.52928", "wps": "18071.7", "ups": "5.65", "wpb": "3200.9", "bsz": "42", "num_updates": "264600", "lr": "3.42785e-05", "gnorm": "0.88", "loss_scale": "2", "train_wall": "35", "gb_free": "13.3", "wall": "49934"} [2023-11-02 07:27:43,552][train_inner][INFO] - {"epoch": 66, "update": 65.29, "loss": "2.837", "ntokens": "3201.8", "nsentences": "44.16", "prob_perplexity": "199.499", "code_perplexity": "196.848", "temp": "0.532", "loss_0": "2.73", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.54111", "wps": "17710", "ups": "5.53", "wpb": "3201.8", "bsz": "44.2", "num_updates": "264800", "lr": "3.42278e-05", "gnorm": "0.873", "loss_scale": "2", "train_wall": "36", "gb_free": "14.7", "wall": "49970"} [2023-11-02 07:28:20,096][train_inner][INFO] - {"epoch": 66, "update": 65.339, "loss": "2.862", "ntokens": "3215.24", "nsentences": "42.92", "prob_perplexity": "199.644", "code_perplexity": "196.986", "temp": "0.532", "loss_0": "2.756", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.5349", "wps": "17597.5", "ups": "5.47", "wpb": "3215.2", "bsz": "42.9", "num_updates": "265000", "lr": "3.41772e-05", "gnorm": "0.877", "loss_scale": "2", "train_wall": "36", "gb_free": "13.5", "wall": "50006"} [2023-11-02 07:28:56,287][train_inner][INFO] - {"epoch": 66, "update": 65.389, "loss": "2.899", "ntokens": "3189.32", "nsentences": "40.12", "prob_perplexity": "199.028", "code_perplexity": "196.389", "temp": "0.531", "loss_0": "2.792", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.52634", "wps": "17626.3", "ups": "5.53", "wpb": "3189.3", "bsz": "40.1", "num_updates": "265200", "lr": "3.41266e-05", "gnorm": "0.879", "loss_scale": "2", "train_wall": "36", "gb_free": "14.2", "wall": "50043"} [2023-11-02 07:29:32,930][train_inner][INFO] - {"epoch": 66, "update": 65.438, "loss": "2.79", "ntokens": "3164.8", "nsentences": "45.72", "prob_perplexity": "199.266", "code_perplexity": "196.552", "temp": "0.531", "loss_0": "2.683", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.55038", "wps": "17275", "ups": "5.46", "wpb": "3164.8", "bsz": "45.7", "num_updates": "265400", "lr": "3.40759e-05", "gnorm": "0.873", "loss_scale": "2", "train_wall": "36", "gb_free": "14.3", "wall": "50079"} [2023-11-02 07:30:09,523][train_inner][INFO] - {"epoch": 66, "update": 65.487, "loss": "2.829", "ntokens": "3157.92", "nsentences": "43.12", "prob_perplexity": "198.951", "code_perplexity": "196.277", "temp": "0.53", "loss_0": "2.722", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.54156", "wps": "17260.5", "ups": "5.47", "wpb": "3157.9", "bsz": "43.1", "num_updates": "265600", "lr": "3.40253e-05", "gnorm": "0.886", "loss_scale": "2", "train_wall": "36", "gb_free": "13.6", "wall": "50116"} [2023-11-02 07:30:46,058][train_inner][INFO] - {"epoch": 66, "update": 65.537, "loss": "2.791", "ntokens": "3162.8", "nsentences": "44.24", "prob_perplexity": "199.911", "code_perplexity": "197.224", "temp": "0.53", "loss_0": "2.685", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.54877", "wps": "17314.9", "ups": "5.47", "wpb": "3162.8", "bsz": "44.2", "num_updates": "265800", "lr": "3.39747e-05", "gnorm": "0.875", "loss_scale": "2", "train_wall": "36", "gb_free": "13.2", "wall": "50152"} [2023-11-02 07:31:22,643][train_inner][INFO] - {"epoch": 66, "update": 65.586, "loss": "2.863", "ntokens": "3191.48", "nsentences": "44.64", "prob_perplexity": "199.884", "code_perplexity": "197.237", "temp": "0.529", "loss_0": "2.757", "loss_1": "0.099", "loss_2": "0.008", "accuracy": "0.53641", "wps": "17448.2", "ups": "5.47", "wpb": "3191.5", "bsz": "44.6", "num_updates": "266000", "lr": "3.39241e-05", "gnorm": "0.884", "loss_scale": "2", "train_wall": "36", "gb_free": "13.6", "wall": "50189"} [2023-11-02 07:31:59,128][train_inner][INFO] - {"epoch": 66, "update": 65.635, "loss": "2.875", "ntokens": "3205.76", "nsentences": "43.56", "prob_perplexity": "200.615", "code_perplexity": "197.944", "temp": "0.529", "loss_0": "2.769", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.53345", "wps": "17574.1", "ups": "5.48", "wpb": "3205.8", "bsz": "43.6", "num_updates": "266200", "lr": "3.38734e-05", "gnorm": "0.876", "loss_scale": "2", "train_wall": "36", "gb_free": "13.6", "wall": "50225"} [2023-11-02 07:32:35,938][train_inner][INFO] - {"epoch": 66, "update": 65.685, "loss": "2.792", "ntokens": "3167.72", "nsentences": "45.72", "prob_perplexity": "199.972", "code_perplexity": "197.326", "temp": "0.528", "loss_0": "2.686", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.54967", "wps": "17212", "ups": "5.43", "wpb": "3167.7", "bsz": "45.7", "num_updates": "266400", "lr": "3.38228e-05", "gnorm": "0.879", "loss_scale": "2", "train_wall": "36", "gb_free": "13.7", "wall": "50262"} [2023-11-02 07:33:12,749][train_inner][INFO] - {"epoch": 66, "update": 65.734, "loss": "2.858", "ntokens": "3205.68", "nsentences": "42.12", "prob_perplexity": "200.432", "code_perplexity": "197.756", "temp": "0.528", "loss_0": "2.752", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.53418", "wps": "17418.1", "ups": "5.43", "wpb": "3205.7", "bsz": "42.1", "num_updates": "266600", "lr": "3.37722e-05", "gnorm": "0.877", "loss_scale": "2", "train_wall": "36", "gb_free": "14.1", "wall": "50299"} [2023-11-02 07:33:49,772][train_inner][INFO] - {"epoch": 66, "update": 65.783, "loss": "2.801", "ntokens": "3205.32", "nsentences": "47.96", "prob_perplexity": "199.439", "code_perplexity": "196.77", "temp": "0.527", "loss_0": "2.695", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.55048", "wps": "17316.6", "ups": "5.4", "wpb": "3205.3", "bsz": "48", "num_updates": "266800", "lr": "3.37215e-05", "gnorm": "0.866", "loss_scale": "2", "train_wall": "36", "gb_free": "14.1", "wall": "50336"} [2023-11-02 07:34:26,039][train_inner][INFO] - {"epoch": 66, "update": 65.833, "loss": "2.799", "ntokens": "3184.76", "nsentences": "45.88", "prob_perplexity": "199.934", "code_perplexity": "197.357", "temp": "0.527", "loss_0": "2.692", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.54971", "wps": "17563.5", "ups": "5.51", "wpb": "3184.8", "bsz": "45.9", "num_updates": "267000", "lr": "3.36709e-05", "gnorm": "0.861", "loss_scale": "2", "train_wall": "36", "gb_free": "14.3", "wall": "50372"} [2023-11-02 07:35:02,217][train_inner][INFO] - {"epoch": 66, "update": 65.882, "loss": "2.9", "ntokens": "3208.76", "nsentences": "41.88", "prob_perplexity": "200.66", "code_perplexity": "197.988", "temp": "0.526", "loss_0": "2.793", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.52855", "wps": "17739.9", "ups": "5.53", "wpb": "3208.8", "bsz": "41.9", "num_updates": "267200", "lr": "3.36203e-05", "gnorm": "0.872", "loss_scale": "2", "train_wall": "36", "gb_free": "13.8", "wall": "50408"} [2023-11-02 07:35:38,757][train_inner][INFO] - {"epoch": 66, "update": 65.931, "loss": "2.805", "ntokens": "3195.2", "nsentences": "44.28", "prob_perplexity": "200.193", "code_perplexity": "197.577", "temp": "0.526", "loss_0": "2.698", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.54622", "wps": "17490.4", "ups": "5.47", "wpb": "3195.2", "bsz": "44.3", "num_updates": "267400", "lr": "3.35696e-05", "gnorm": "0.87", "loss_scale": "2", "train_wall": "36", "gb_free": "14.3", "wall": "50445"} [2023-11-02 07:36:15,224][train_inner][INFO] - {"epoch": 66, "update": 65.981, "loss": "2.812", "ntokens": "3174.56", "nsentences": "43.44", "prob_perplexity": "199.956", "code_perplexity": "197.332", "temp": "0.525", "loss_0": "2.706", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.54451", "wps": "17411.6", "ups": "5.48", "wpb": "3174.6", "bsz": "43.4", "num_updates": "267600", "lr": "3.3519e-05", "gnorm": "0.891", "loss_scale": "2", "train_wall": "36", "gb_free": "13.3", "wall": "50481"} [2023-11-02 07:36:29,611][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 07:36:29,612][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 07:36:29,632][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 77 [2023-11-02 07:36:55,155][valid][INFO] - {"epoch": 66, "valid_loss": "2.685", "valid_ntokens": "3148.12", "valid_nsentences": "44.1685", "valid_prob_perplexity": "199.036", "valid_code_perplexity": "196.579", "valid_temp": "0.525", "valid_loss_0": "2.578", "valid_loss_1": "0.099", "valid_loss_2": "0.007", "valid_accuracy": "0.5699", "valid_wps": "55953.5", "valid_wpb": "3148.1", "valid_bsz": "44.2", "valid_num_updates": "267679", "valid_best_loss": "2.685"} [2023-11-02 07:36:55,157][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 66 @ 267679 updates [2023-11-02 07:36:55,159][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-02 07:36:56,620][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-02 07:36:57,698][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_best.pt (epoch 66 @ 267679 updates, score 2.685) (writing took 2.5407896139658988 seconds) [2023-11-02 07:36:57,699][fairseq_cli.train][INFO] - end of epoch 66 (average epoch stats below) [2023-11-02 07:36:57,701][train][INFO] - {"epoch": 66, "train_loss": "2.83", "train_ntokens": "3188.91", "train_nsentences": "44.2682", "train_prob_perplexity": "199.727", "train_code_perplexity": "197.077", "train_temp": "0.53", "train_loss_0": "2.723", "train_loss_1": "0.099", "train_loss_2": "0.007", "train_accuracy": "0.54207", "train_wps": "16902.7", "train_ups": "5.3", "train_wpb": "3188.9", "train_bsz": "44.3", "train_num_updates": "267679", "train_lr": "3.3499e-05", "train_gnorm": "0.876", "train_loss_scale": "2", "train_train_wall": "724", "train_gb_free": "13.9", "train_wall": "50524"} [2023-11-02 07:36:57,704][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 07:36:57,726][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 67 [2023-11-02 07:36:57,895][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 07:36:57,948][fairseq.trainer][INFO] - begin training epoch 67 [2023-11-02 07:36:57,949][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 07:37:19,682][train_inner][INFO] - {"epoch": 67, "update": 66.03, "loss": "2.856", "ntokens": "3181.08", "nsentences": "44.56", "prob_perplexity": "199.746", "code_perplexity": "197.164", "temp": "0.524", "loss_0": "2.749", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.53765", "wps": "9870.5", "ups": "3.1", "wpb": "3181.1", "bsz": "44.6", "num_updates": "267800", "lr": "3.34684e-05", "gnorm": "0.893", "loss_scale": "2", "train_wall": "35", "gb_free": "14.8", "wall": "50546"} [2023-11-02 07:37:55,294][train_inner][INFO] - {"epoch": 67, "update": 66.079, "loss": "2.845", "ntokens": "3216.96", "nsentences": "43.44", "prob_perplexity": "200.479", "code_perplexity": "197.886", "temp": "0.524", "loss_0": "2.739", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.53773", "wps": "18068", "ups": "5.62", "wpb": "3217", "bsz": "43.4", "num_updates": "268000", "lr": "3.34177e-05", "gnorm": "0.873", "loss_scale": "2", "train_wall": "35", "gb_free": "15.2", "wall": "50582"} [2023-11-02 07:38:30,565][train_inner][INFO] - {"epoch": 67, "update": 66.128, "loss": "2.782", "ntokens": "3155.04", "nsentences": "45.24", "prob_perplexity": "200.12", "code_perplexity": "197.446", "temp": "0.523", "loss_0": "2.676", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.55128", "wps": "17891.5", "ups": "5.67", "wpb": "3155", "bsz": "45.2", "num_updates": "268200", "lr": "3.33671e-05", "gnorm": "0.876", "loss_scale": "2", "train_wall": "35", "gb_free": "14.3", "wall": "50617"} [2023-11-02 07:39:06,407][train_inner][INFO] - {"epoch": 67, "update": 66.178, "loss": "2.867", "ntokens": "3181.64", "nsentences": "43.68", "prob_perplexity": "199.666", "code_perplexity": "197.016", "temp": "0.523", "loss_0": "2.761", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.53583", "wps": "17754.9", "ups": "5.58", "wpb": "3181.6", "bsz": "43.7", "num_updates": "268400", "lr": "3.33165e-05", "gnorm": "0.89", "loss_scale": "2", "train_wall": "35", "gb_free": "13.9", "wall": "50653"} [2023-11-02 07:39:42,411][train_inner][INFO] - {"epoch": 67, "update": 66.227, "loss": "2.88", "ntokens": "3206.92", "nsentences": "43.16", "prob_perplexity": "199.924", "code_perplexity": "197.239", "temp": "0.522", "loss_0": "2.773", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.53222", "wps": "17815.5", "ups": "5.56", "wpb": "3206.9", "bsz": "43.2", "num_updates": "268600", "lr": "3.32658e-05", "gnorm": "0.897", "loss_scale": "2", "train_wall": "35", "gb_free": "13.4", "wall": "50689"} [2023-11-02 07:40:18,232][train_inner][INFO] - {"epoch": 67, "update": 66.276, "loss": "2.794", "ntokens": "3191.96", "nsentences": "45.08", "prob_perplexity": "200.33", "code_perplexity": "197.602", "temp": "0.522", "loss_0": "2.687", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.54841", "wps": "17822.8", "ups": "5.58", "wpb": "3192", "bsz": "45.1", "num_updates": "268800", "lr": "3.32152e-05", "gnorm": "0.884", "loss_scale": "2", "train_wall": "35", "gb_free": "14", "wall": "50724"} [2023-11-02 07:40:54,016][train_inner][INFO] - {"epoch": 67, "update": 66.326, "loss": "2.84", "ntokens": "3211.76", "nsentences": "44.6", "prob_perplexity": "202.057", "code_perplexity": "199.35", "temp": "0.521", "loss_0": "2.734", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.53988", "wps": "17951.9", "ups": "5.59", "wpb": "3211.8", "bsz": "44.6", "num_updates": "269000", "lr": "3.31646e-05", "gnorm": "0.885", "loss_scale": "2", "train_wall": "35", "gb_free": "13.1", "wall": "50760"} [2023-11-02 07:41:30,662][train_inner][INFO] - {"epoch": 67, "update": 66.375, "loss": "2.822", "ntokens": "3196.2", "nsentences": "44.12", "prob_perplexity": "200.763", "code_perplexity": "198.043", "temp": "0.521", "loss_0": "2.716", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.54281", "wps": "17444.8", "ups": "5.46", "wpb": "3196.2", "bsz": "44.1", "num_updates": "269200", "lr": "3.31139e-05", "gnorm": "0.88", "loss_scale": "2", "train_wall": "36", "gb_free": "13.5", "wall": "50797"} [2023-11-02 07:42:07,184][train_inner][INFO] - {"epoch": 67, "update": 66.424, "loss": "2.838", "ntokens": "3227.6", "nsentences": "44.36", "prob_perplexity": "200.501", "code_perplexity": "197.777", "temp": "0.52", "loss_0": "2.731", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.5408", "wps": "17675.6", "ups": "5.48", "wpb": "3227.6", "bsz": "44.4", "num_updates": "269400", "lr": "3.30633e-05", "gnorm": "0.871", "loss_scale": "2", "train_wall": "36", "gb_free": "14", "wall": "50833"} [2023-11-02 07:42:43,469][train_inner][INFO] - {"epoch": 67, "update": 66.474, "loss": "2.87", "ntokens": "3181.4", "nsentences": "43.52", "prob_perplexity": "200.301", "code_perplexity": "197.574", "temp": "0.52", "loss_0": "2.764", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.535", "wps": "17537", "ups": "5.51", "wpb": "3181.4", "bsz": "43.5", "num_updates": "269600", "lr": "3.30127e-05", "gnorm": "0.887", "loss_scale": "2", "train_wall": "36", "gb_free": "13.5", "wall": "50870"} [2023-11-02 07:43:20,285][train_inner][INFO] - {"epoch": 67, "update": 66.523, "loss": "2.91", "ntokens": "3203.6", "nsentences": "40.28", "prob_perplexity": "201.371", "code_perplexity": "198.723", "temp": "0.519", "loss_0": "2.803", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.52507", "wps": "17404.3", "ups": "5.43", "wpb": "3203.6", "bsz": "40.3", "num_updates": "269800", "lr": "3.2962e-05", "gnorm": "0.888", "loss_scale": "2", "train_wall": "36", "gb_free": "16", "wall": "50907"} [2023-11-02 07:43:56,253][train_inner][INFO] - {"epoch": 67, "update": 66.572, "loss": "2.817", "ntokens": "3203.88", "nsentences": "44.28", "prob_perplexity": "200.657", "code_perplexity": "198.034", "temp": "0.519", "loss_0": "2.71", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.54433", "wps": "17816.1", "ups": "5.56", "wpb": "3203.9", "bsz": "44.3", "num_updates": "270000", "lr": "3.29114e-05", "gnorm": "0.885", "loss_scale": "2", "train_wall": "35", "gb_free": "14.3", "wall": "50942"} [2023-11-02 07:44:32,375][train_inner][INFO] - {"epoch": 67, "update": 66.622, "loss": "2.779", "ntokens": "3180.16", "nsentences": "46.28", "prob_perplexity": "201.359", "code_perplexity": "198.684", "temp": "0.518", "loss_0": "2.672", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.5516", "wps": "17609.1", "ups": "5.54", "wpb": "3180.2", "bsz": "46.3", "num_updates": "270200", "lr": "3.28608e-05", "gnorm": "0.872", "loss_scale": "2", "train_wall": "35", "gb_free": "14", "wall": "50979"} [2023-11-02 07:45:09,164][train_inner][INFO] - {"epoch": 67, "update": 66.671, "loss": "2.819", "ntokens": "3213.88", "nsentences": "45.04", "prob_perplexity": "201.433", "code_perplexity": "198.846", "temp": "0.518", "loss_0": "2.713", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.54385", "wps": "17473.2", "ups": "5.44", "wpb": "3213.9", "bsz": "45", "num_updates": "270400", "lr": "3.28101e-05", "gnorm": "0.873", "loss_scale": "2", "train_wall": "36", "gb_free": "13.5", "wall": "51015"} [2023-11-02 07:45:45,306][train_inner][INFO] - {"epoch": 67, "update": 66.72, "loss": "2.798", "ntokens": "3177.48", "nsentences": "44.6", "prob_perplexity": "201.584", "code_perplexity": "199.001", "temp": "0.517", "loss_0": "2.692", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.54769", "wps": "17584.4", "ups": "5.53", "wpb": "3177.5", "bsz": "44.6", "num_updates": "270600", "lr": "3.27595e-05", "gnorm": "0.876", "loss_scale": "2", "train_wall": "35", "gb_free": "13", "wall": "51052"} [2023-11-02 07:46:21,893][train_inner][INFO] - {"epoch": 67, "update": 66.769, "loss": "2.842", "ntokens": "3198.88", "nsentences": "44.84", "prob_perplexity": "200.726", "code_perplexity": "198.069", "temp": "0.517", "loss_0": "2.736", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.54035", "wps": "17487.6", "ups": "5.47", "wpb": "3198.9", "bsz": "44.8", "num_updates": "270800", "lr": "3.27089e-05", "gnorm": "0.887", "loss_scale": "2", "train_wall": "36", "gb_free": "13.6", "wall": "51088"} [2023-11-02 07:46:58,450][train_inner][INFO] - {"epoch": 67, "update": 66.819, "loss": "2.79", "ntokens": "3172.84", "nsentences": "44.68", "prob_perplexity": "200.578", "code_perplexity": "197.917", "temp": "0.516", "loss_0": "2.684", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.54972", "wps": "17359.3", "ups": "5.47", "wpb": "3172.8", "bsz": "44.7", "num_updates": "271000", "lr": "3.26582e-05", "gnorm": "0.876", "loss_scale": "2", "train_wall": "36", "gb_free": "13.2", "wall": "51125"} [2023-11-02 07:47:35,187][train_inner][INFO] - {"epoch": 67, "update": 66.868, "loss": "2.758", "ntokens": "3173.76", "nsentences": "46.16", "prob_perplexity": "201.224", "code_perplexity": "198.547", "temp": "0.516", "loss_0": "2.652", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.55614", "wps": "17279.3", "ups": "5.44", "wpb": "3173.8", "bsz": "46.2", "num_updates": "271200", "lr": "3.26076e-05", "gnorm": "0.886", "loss_scale": "2", "train_wall": "36", "gb_free": "13.5", "wall": "51161"} [2023-11-02 07:48:11,997][train_inner][INFO] - {"epoch": 67, "update": 66.917, "loss": "2.795", "ntokens": "3172.44", "nsentences": "44.24", "prob_perplexity": "201.218", "code_perplexity": "198.519", "temp": "0.515", "loss_0": "2.689", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.54756", "wps": "17238", "ups": "5.43", "wpb": "3172.4", "bsz": "44.2", "num_updates": "271400", "lr": "3.2557e-05", "gnorm": "0.884", "loss_scale": "2", "train_wall": "36", "gb_free": "14", "wall": "51198"} [2023-11-02 07:48:48,445][train_inner][INFO] - {"epoch": 67, "update": 66.967, "loss": "2.791", "ntokens": "3169.56", "nsentences": "44.96", "prob_perplexity": "200.713", "code_perplexity": "198.116", "temp": "0.515", "loss_0": "2.685", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.54983", "wps": "17393", "ups": "5.49", "wpb": "3169.6", "bsz": "45", "num_updates": "271600", "lr": "3.25063e-05", "gnorm": "0.888", "loss_scale": "2", "train_wall": "36", "gb_free": "13.8", "wall": "51235"} [2023-11-02 07:49:12,963][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 07:49:12,965][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 07:49:12,981][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 78 [2023-11-02 07:49:38,693][valid][INFO] - {"epoch": 67, "valid_loss": "2.725", "valid_ntokens": "3168.21", "valid_nsentences": "44.1685", "valid_prob_perplexity": "198.96", "valid_code_perplexity": "196.444", "valid_temp": "0.514", "valid_loss_0": "2.618", "valid_loss_1": "0.099", "valid_loss_2": "0.007", "valid_accuracy": "0.56376", "valid_wps": "55907.3", "valid_wpb": "3168.2", "valid_bsz": "44.2", "valid_num_updates": "271735", "valid_best_loss": "2.685"} [2023-11-02 07:49:38,695][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 67 @ 271735 updates [2023-11-02 07:49:38,697][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 07:49:40,155][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 07:49:40,204][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_last.pt (epoch 67 @ 271735 updates, score 2.725) (writing took 1.5090638138353825 seconds) [2023-11-02 07:49:40,205][fairseq_cli.train][INFO] - end of epoch 67 (average epoch stats below) [2023-11-02 07:49:40,220][train][INFO] - {"epoch": 67, "train_loss": "2.824", "train_ntokens": "3190.19", "train_nsentences": "44.2682", "train_prob_perplexity": "200.724", "train_code_perplexity": "198.064", "train_temp": "0.519", "train_loss_0": "2.718", "train_loss_1": "0.099", "train_loss_2": "0.007", "train_accuracy": "0.54285", "train_wps": "16969.6", "train_ups": "5.32", "train_wpb": "3190.2", "train_bsz": "44.3", "train_num_updates": "271735", "train_lr": "3.24722e-05", "train_gnorm": "0.883", "train_loss_scale": "2", "train_train_wall": "722", "train_gb_free": "13.8", "train_wall": "51286"} [2023-11-02 07:49:40,223][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 07:49:40,245][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 68 [2023-11-02 07:49:40,423][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 07:49:40,480][fairseq.trainer][INFO] - begin training epoch 68 [2023-11-02 07:49:40,481][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 07:49:52,336][train_inner][INFO] - {"epoch": 68, "update": 67.016, "loss": "2.895", "ntokens": "3205.24", "nsentences": "40.24", "prob_perplexity": "200.138", "code_perplexity": "197.497", "temp": "0.514", "loss_0": "2.789", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.52648", "wps": "10033.8", "ups": "3.13", "wpb": "3205.2", "bsz": "40.2", "num_updates": "271800", "lr": "3.24557e-05", "gnorm": "0.892", "loss_scale": "2", "train_wall": "36", "gb_free": "13.1", "wall": "51299"} [2023-11-02 07:50:27,992][train_inner][INFO] - {"epoch": 68, "update": 67.065, "loss": "2.819", "ntokens": "3219.8", "nsentences": "44.84", "prob_perplexity": "200.006", "code_perplexity": "197.377", "temp": "0.514", "loss_0": "2.713", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.54417", "wps": "18061.4", "ups": "5.61", "wpb": "3219.8", "bsz": "44.8", "num_updates": "272000", "lr": "3.24051e-05", "gnorm": "0.878", "loss_scale": "2", "train_wall": "35", "gb_free": "14.9", "wall": "51334"} [2023-11-02 07:51:03,487][train_inner][INFO] - {"epoch": 68, "update": 67.115, "loss": "2.849", "ntokens": "3164.04", "nsentences": "43.16", "prob_perplexity": "201.278", "code_perplexity": "198.621", "temp": "0.513", "loss_0": "2.743", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.53885", "wps": "17829.2", "ups": "5.63", "wpb": "3164", "bsz": "43.2", "num_updates": "272200", "lr": "3.23544e-05", "gnorm": "0.89", "loss_scale": "2", "train_wall": "35", "gb_free": "13.5", "wall": "51370"} [2023-11-02 07:51:39,367][train_inner][INFO] - {"epoch": 68, "update": 67.164, "loss": "2.893", "ntokens": "3203.64", "nsentences": "42.84", "prob_perplexity": "201.454", "code_perplexity": "198.821", "temp": "0.513", "loss_0": "2.787", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.52998", "wps": "17858.7", "ups": "5.57", "wpb": "3203.6", "bsz": "42.8", "num_updates": "272400", "lr": "3.23038e-05", "gnorm": "0.894", "loss_scale": "2", "train_wall": "35", "gb_free": "13.3", "wall": "51406"} [2023-11-02 07:52:15,485][train_inner][INFO] - {"epoch": 68, "update": 67.213, "loss": "2.843", "ntokens": "3191.24", "nsentences": "44.24", "prob_perplexity": "200.967", "code_perplexity": "198.296", "temp": "0.512", "loss_0": "2.737", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.5385", "wps": "17672.3", "ups": "5.54", "wpb": "3191.2", "bsz": "44.2", "num_updates": "272600", "lr": "3.22532e-05", "gnorm": "0.898", "loss_scale": "2", "train_wall": "35", "gb_free": "14.7", "wall": "51442"} [2023-11-02 07:52:52,038][train_inner][INFO] - {"epoch": 68, "update": 67.263, "loss": "2.835", "ntokens": "3210.68", "nsentences": "43.04", "prob_perplexity": "201.952", "code_perplexity": "199.382", "temp": "0.512", "loss_0": "2.729", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.53867", "wps": "17568.5", "ups": "5.47", "wpb": "3210.7", "bsz": "43", "num_updates": "272800", "lr": "3.22025e-05", "gnorm": "0.883", "loss_scale": "2", "train_wall": "36", "gb_free": "13.7", "wall": "51478"} [2023-11-02 07:53:28,542][train_inner][INFO] - {"epoch": 68, "update": 67.312, "loss": "2.762", "ntokens": "3221.88", "nsentences": "45.84", "prob_perplexity": "201.997", "code_perplexity": "199.342", "temp": "0.511", "loss_0": "2.656", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.55418", "wps": "17653.2", "ups": "5.48", "wpb": "3221.9", "bsz": "45.8", "num_updates": "273000", "lr": "3.21519e-05", "gnorm": "0.873", "loss_scale": "2", "train_wall": "36", "gb_free": "15", "wall": "51515"} [2023-11-02 07:54:04,725][train_inner][INFO] - {"epoch": 68, "update": 67.361, "loss": "2.78", "ntokens": "3148.52", "nsentences": "45.6", "prob_perplexity": "201.249", "code_perplexity": "198.644", "temp": "0.511", "loss_0": "2.674", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.55155", "wps": "17404.8", "ups": "5.53", "wpb": "3148.5", "bsz": "45.6", "num_updates": "273200", "lr": "3.21013e-05", "gnorm": "0.883", "loss_scale": "2", "train_wall": "36", "gb_free": "13.1", "wall": "51551"} [2023-11-02 07:54:41,838][train_inner][INFO] - {"epoch": 68, "update": 67.411, "loss": "2.874", "ntokens": "3248.44", "nsentences": "44.36", "prob_perplexity": "202.062", "code_perplexity": "199.438", "temp": "0.51", "loss_0": "2.768", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.53424", "wps": "17507", "ups": "5.39", "wpb": "3248.4", "bsz": "44.4", "num_updates": "273400", "lr": "3.20506e-05", "gnorm": "0.887", "loss_scale": "2", "train_wall": "36", "gb_free": "12.8", "wall": "51588"} [2023-11-02 07:55:18,436][train_inner][INFO] - {"epoch": 68, "update": 67.46, "loss": "2.784", "ntokens": "3156.88", "nsentences": "44.92", "prob_perplexity": "200.466", "code_perplexity": "197.869", "temp": "0.509", "loss_0": "2.677", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.54975", "wps": "17252.7", "ups": "5.47", "wpb": "3156.9", "bsz": "44.9", "num_updates": "273600", "lr": "3.2e-05", "gnorm": "0.885", "loss_scale": "2", "train_wall": "36", "gb_free": "12.4", "wall": "51625"} [2023-11-02 07:55:54,761][train_inner][INFO] - {"epoch": 68, "update": 67.509, "loss": "2.775", "ntokens": "3187.16", "nsentences": "46.56", "prob_perplexity": "201.485", "code_perplexity": "198.873", "temp": "0.509", "loss_0": "2.669", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.55415", "wps": "17548.8", "ups": "5.51", "wpb": "3187.2", "bsz": "46.6", "num_updates": "273800", "lr": "3.19494e-05", "gnorm": "0.875", "loss_scale": "2", "train_wall": "36", "gb_free": "13.8", "wall": "51661"} [2023-11-02 07:56:31,521][train_inner][INFO] - {"epoch": 68, "update": 67.558, "loss": "2.846", "ntokens": "3236.12", "nsentences": "42.8", "prob_perplexity": "201.413", "code_perplexity": "198.826", "temp": "0.508", "loss_0": "2.74", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.53808", "wps": "17608.3", "ups": "5.44", "wpb": "3236.1", "bsz": "42.8", "num_updates": "274000", "lr": "3.18987e-05", "gnorm": "0.872", "loss_scale": "2", "train_wall": "36", "gb_free": "12.2", "wall": "51698"} [2023-11-02 07:57:07,208][train_inner][INFO] - {"epoch": 68, "update": 67.608, "loss": "2.831", "ntokens": "3137", "nsentences": "41.8", "prob_perplexity": "201.308", "code_perplexity": "198.684", "temp": "0.508", "loss_0": "2.725", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.5401", "wps": "17581.9", "ups": "5.6", "wpb": "3137", "bsz": "41.8", "num_updates": "274200", "lr": "3.18481e-05", "gnorm": "0.899", "loss_scale": "2", "train_wall": "35", "gb_free": "13.5", "wall": "51733"} [2023-11-02 07:57:43,104][train_inner][INFO] - {"epoch": 68, "update": 67.657, "loss": "2.806", "ntokens": "3199.12", "nsentences": "44.68", "prob_perplexity": "201.493", "code_perplexity": "198.913", "temp": "0.507", "loss_0": "2.7", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.54548", "wps": "17825.1", "ups": "5.57", "wpb": "3199.1", "bsz": "44.7", "num_updates": "274400", "lr": "3.17975e-05", "gnorm": "0.883", "loss_scale": "2", "train_wall": "35", "gb_free": "14.2", "wall": "51769"} [2023-11-02 07:58:19,044][train_inner][INFO] - {"epoch": 68, "update": 67.706, "loss": "2.798", "ntokens": "3168.12", "nsentences": "44.68", "prob_perplexity": "200.612", "code_perplexity": "197.979", "temp": "0.507", "loss_0": "2.692", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.54752", "wps": "17631.3", "ups": "5.57", "wpb": "3168.1", "bsz": "44.7", "num_updates": "274600", "lr": "3.17468e-05", "gnorm": "0.891", "loss_scale": "2", "train_wall": "35", "gb_free": "14.2", "wall": "51805"} [2023-11-02 07:58:55,451][train_inner][INFO] - {"epoch": 68, "update": 67.756, "loss": "2.872", "ntokens": "3189.04", "nsentences": "41.92", "prob_perplexity": "202.474", "code_perplexity": "199.803", "temp": "0.506", "loss_0": "2.766", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.53212", "wps": "17520.3", "ups": "5.49", "wpb": "3189", "bsz": "41.9", "num_updates": "274800", "lr": "3.16962e-05", "gnorm": "0.899", "loss_scale": "2", "train_wall": "36", "gb_free": "15.7", "wall": "51842"} [2023-11-02 07:59:32,106][train_inner][INFO] - {"epoch": 68, "update": 67.805, "loss": "2.816", "ntokens": "3197.36", "nsentences": "45.56", "prob_perplexity": "201.064", "code_perplexity": "198.441", "temp": "0.506", "loss_0": "2.71", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.54546", "wps": "17446.7", "ups": "5.46", "wpb": "3197.4", "bsz": "45.6", "num_updates": "275000", "lr": "3.16456e-05", "gnorm": "0.882", "loss_scale": "2", "train_wall": "36", "gb_free": "13.4", "wall": "51878"} [2023-11-02 07:59:32,107][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 07:59:32,109][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 07:59:32,127][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 79 [2023-11-02 07:59:57,516][valid][INFO] - {"epoch": 68, "valid_loss": "2.723", "valid_ntokens": "3192.83", "valid_nsentences": "44.1685", "valid_prob_perplexity": "200.073", "valid_code_perplexity": "197.608", "valid_temp": "0.506", "valid_loss_0": "2.617", "valid_loss_1": "0.099", "valid_loss_2": "0.007", "valid_accuracy": "0.56418", "valid_wps": "57134.6", "valid_wpb": "3192.8", "valid_bsz": "44.2", "valid_num_updates": "275000", "valid_best_loss": "2.685"} [2023-11-02 07:59:57,518][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 68 @ 275000 updates [2023-11-02 07:59:57,520][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_68_275000.pt [2023-11-02 07:59:58,871][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_68_275000.pt [2023-11-02 07:59:59,843][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_68_275000.pt (epoch 68 @ 275000 updates, score 2.723) (writing took 2.32449861895293 seconds) [2023-11-02 08:00:36,259][train_inner][INFO] - {"epoch": 68, "update": 67.854, "loss": "2.774", "ntokens": "3151.96", "nsentences": "45.92", "prob_perplexity": "201.212", "code_perplexity": "198.605", "temp": "0.505", "loss_0": "2.668", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.55412", "wps": "9826.7", "ups": "3.12", "wpb": "3152", "bsz": "45.9", "num_updates": "275200", "lr": "3.15949e-05", "gnorm": "0.874", "loss_scale": "2", "train_wall": "36", "gb_free": "14", "wall": "51942"} [2023-11-02 08:01:12,796][train_inner][INFO] - {"epoch": 68, "update": 67.904, "loss": "2.836", "ntokens": "3210.24", "nsentences": "43.2", "prob_perplexity": "202.192", "code_perplexity": "199.601", "temp": "0.505", "loss_0": "2.73", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.54076", "wps": "17573.3", "ups": "5.47", "wpb": "3210.2", "bsz": "43.2", "num_updates": "275400", "lr": "3.15443e-05", "gnorm": "0.888", "loss_scale": "2", "train_wall": "36", "gb_free": "12.8", "wall": "51979"} [2023-11-02 08:01:49,659][train_inner][INFO] - {"epoch": 68, "update": 67.953, "loss": "2.813", "ntokens": "3219", "nsentences": "43.92", "prob_perplexity": "201.572", "code_perplexity": "198.981", "temp": "0.504", "loss_0": "2.707", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.54451", "wps": "17465.8", "ups": "5.43", "wpb": "3219", "bsz": "43.9", "num_updates": "275600", "lr": "3.14937e-05", "gnorm": "0.889", "loss_scale": "2", "train_wall": "36", "gb_free": "13.1", "wall": "52016"} [2023-11-02 08:02:24,225][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 08:02:24,227][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 08:02:24,245][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 80 [2023-11-02 08:02:49,885][valid][INFO] - {"epoch": 68, "valid_loss": "2.697", "valid_ntokens": "3163.64", "valid_nsentences": "44.1685", "valid_prob_perplexity": "202.244", "valid_code_perplexity": "199.708", "valid_temp": "0.504", "valid_loss_0": "2.591", "valid_loss_1": "0.099", "valid_loss_2": "0.007", "valid_accuracy": "0.56803", "valid_wps": "56026.3", "valid_wpb": "3163.6", "valid_bsz": "44.2", "valid_num_updates": "275791", "valid_best_loss": "2.685"} [2023-11-02 08:02:49,887][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 68 @ 275791 updates [2023-11-02 08:02:49,889][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 08:02:51,341][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 08:02:51,388][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_last.pt (epoch 68 @ 275791 updates, score 2.697) (writing took 1.5008762367069721 seconds) [2023-11-02 08:02:51,388][fairseq_cli.train][INFO] - end of epoch 68 (average epoch stats below) [2023-11-02 08:02:51,391][train][INFO] - {"epoch": 68, "train_loss": "2.821", "train_ntokens": "3193.22", "train_nsentences": "44.2682", "train_prob_perplexity": "201.348", "train_code_perplexity": "198.73", "train_temp": "0.509", "train_loss_0": "2.715", "train_loss_1": "0.099", "train_loss_2": "0.007", "train_accuracy": "0.54334", "train_wps": "16370.4", "train_ups": "5.13", "train_wpb": "3193.2", "train_bsz": "44.3", "train_num_updates": "275791", "train_lr": "3.14453e-05", "train_gnorm": "0.886", "train_loss_scale": "2", "train_train_wall": "723", "train_gb_free": "13.2", "train_wall": "52078"} [2023-11-02 08:02:51,393][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 08:02:51,414][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 69 [2023-11-02 08:02:51,579][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 08:02:51,636][fairseq.trainer][INFO] - begin training epoch 69 [2023-11-02 08:02:51,637][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 08:02:53,315][train_inner][INFO] - {"epoch": 69, "update": 68.002, "loss": "2.799", "ntokens": "3206.64", "nsentences": "47.08", "prob_perplexity": "201.274", "code_perplexity": "198.679", "temp": "0.504", "loss_0": "2.693", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.55001", "wps": "10077.8", "ups": "3.14", "wpb": "3206.6", "bsz": "47.1", "num_updates": "275800", "lr": "3.1443e-05", "gnorm": "0.885", "loss_scale": "2", "train_wall": "36", "gb_free": "13.5", "wall": "52080"} [2023-11-02 08:03:29,143][train_inner][INFO] - {"epoch": 69, "update": 68.052, "loss": "2.85", "ntokens": "3217.16", "nsentences": "43.92", "prob_perplexity": "202.669", "code_perplexity": "200.017", "temp": "0.503", "loss_0": "2.744", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.53845", "wps": "17960.2", "ups": "5.58", "wpb": "3217.2", "bsz": "43.9", "num_updates": "276000", "lr": "3.13924e-05", "gnorm": "0.873", "loss_scale": "2", "train_wall": "35", "gb_free": "15.3", "wall": "52115"} [2023-11-02 08:04:05,031][train_inner][INFO] - {"epoch": 69, "update": 68.101, "loss": "2.81", "ntokens": "3203.48", "nsentences": "44.76", "prob_perplexity": "201.956", "code_perplexity": "199.304", "temp": "0.503", "loss_0": "2.705", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.54457", "wps": "17854.2", "ups": "5.57", "wpb": "3203.5", "bsz": "44.8", "num_updates": "276200", "lr": "3.13418e-05", "gnorm": "0.883", "loss_scale": "4", "train_wall": "35", "gb_free": "13.1", "wall": "52151"} [2023-11-02 08:04:40,897][train_inner][INFO] - {"epoch": 69, "update": 68.15, "loss": "2.806", "ntokens": "3179.68", "nsentences": "44.52", "prob_perplexity": "201.032", "code_perplexity": "198.444", "temp": "0.502", "loss_0": "2.7", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.54669", "wps": "17731.8", "ups": "5.58", "wpb": "3179.7", "bsz": "44.5", "num_updates": "276400", "lr": "3.12911e-05", "gnorm": "0.9", "loss_scale": "4", "train_wall": "35", "gb_free": "13.2", "wall": "52187"} [2023-11-02 08:05:17,246][train_inner][INFO] - {"epoch": 69, "update": 68.199, "loss": "2.842", "ntokens": "3229.4", "nsentences": "44.24", "prob_perplexity": "201.953", "code_perplexity": "199.376", "temp": "0.502", "loss_0": "2.736", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.53823", "wps": "17770", "ups": "5.5", "wpb": "3229.4", "bsz": "44.2", "num_updates": "276600", "lr": "3.12405e-05", "gnorm": "0.887", "loss_scale": "4", "train_wall": "36", "gb_free": "15.1", "wall": "52223"} [2023-11-02 08:05:53,533][train_inner][INFO] - {"epoch": 69, "update": 68.249, "loss": "2.826", "ntokens": "3223.64", "nsentences": "44.6", "prob_perplexity": "201.975", "code_perplexity": "199.373", "temp": "0.501", "loss_0": "2.72", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.54181", "wps": "17768.6", "ups": "5.51", "wpb": "3223.6", "bsz": "44.6", "num_updates": "276800", "lr": "3.11899e-05", "gnorm": "0.889", "loss_scale": "4", "train_wall": "36", "gb_free": "14.6", "wall": "52260"} [2023-11-02 08:06:30,341][train_inner][INFO] - {"epoch": 69, "update": 68.298, "loss": "2.777", "ntokens": "3166.96", "nsentences": "43.8", "prob_perplexity": "200.974", "code_perplexity": "198.295", "temp": "0.501", "loss_0": "2.671", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.54987", "wps": "17208.8", "ups": "5.43", "wpb": "3167", "bsz": "43.8", "num_updates": "277000", "lr": "3.11392e-05", "gnorm": "0.893", "loss_scale": "4", "train_wall": "36", "gb_free": "13.8", "wall": "52297"} [2023-11-02 08:07:06,669][train_inner][INFO] - {"epoch": 69, "update": 68.347, "loss": "2.811", "ntokens": "3169.36", "nsentences": "44.8", "prob_perplexity": "201.396", "code_perplexity": "198.817", "temp": "0.5", "loss_0": "2.705", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.54625", "wps": "17449.7", "ups": "5.51", "wpb": "3169.4", "bsz": "44.8", "num_updates": "277200", "lr": "3.10886e-05", "gnorm": "0.889", "loss_scale": "4", "train_wall": "36", "gb_free": "14.8", "wall": "52333"} [2023-11-02 08:07:42,898][train_inner][INFO] - {"epoch": 69, "update": 68.397, "loss": "2.764", "ntokens": "3198.68", "nsentences": "45.8", "prob_perplexity": "201.843", "code_perplexity": "199.242", "temp": "0.5", "loss_0": "2.658", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.55378", "wps": "17659.1", "ups": "5.52", "wpb": "3198.7", "bsz": "45.8", "num_updates": "277400", "lr": "3.1038e-05", "gnorm": "0.886", "loss_scale": "4", "train_wall": "36", "gb_free": "14.2", "wall": "52369"} [2023-11-02 08:08:19,264][train_inner][INFO] - {"epoch": 69, "update": 68.446, "loss": "2.789", "ntokens": "3175.6", "nsentences": "45.32", "prob_perplexity": "201.224", "code_perplexity": "198.545", "temp": "0.5", "loss_0": "2.683", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.54921", "wps": "17465.5", "ups": "5.5", "wpb": "3175.6", "bsz": "45.3", "num_updates": "277600", "lr": "3.09873e-05", "gnorm": "0.887", "loss_scale": "4", "train_wall": "36", "gb_free": "15.5", "wall": "52405"} [2023-11-02 08:08:55,136][train_inner][INFO] - {"epoch": 69, "update": 68.495, "loss": "2.774", "ntokens": "3157.44", "nsentences": "44.36", "prob_perplexity": "202.427", "code_perplexity": "199.895", "temp": "0.5", "loss_0": "2.668", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.55169", "wps": "17605.2", "ups": "5.58", "wpb": "3157.4", "bsz": "44.4", "num_updates": "277800", "lr": "3.09367e-05", "gnorm": "0.891", "loss_scale": "4", "train_wall": "35", "gb_free": "14.7", "wall": "52441"} [2023-11-02 08:09:31,638][train_inner][INFO] - {"epoch": 69, "update": 68.545, "loss": "2.823", "ntokens": "3212.4", "nsentences": "44.6", "prob_perplexity": "200.519", "code_perplexity": "197.982", "temp": "0.5", "loss_0": "2.717", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.54273", "wps": "17602.5", "ups": "5.48", "wpb": "3212.4", "bsz": "44.6", "num_updates": "278000", "lr": "3.08861e-05", "gnorm": "0.905", "loss_scale": "4", "train_wall": "36", "gb_free": "13.1", "wall": "52478"} [2023-11-02 08:10:07,241][train_inner][INFO] - {"epoch": 69, "update": 68.594, "loss": "2.841", "ntokens": "3190.2", "nsentences": "43.32", "prob_perplexity": "201.436", "code_perplexity": "198.811", "temp": "0.5", "loss_0": "2.735", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.53959", "wps": "17922.3", "ups": "5.62", "wpb": "3190.2", "bsz": "43.3", "num_updates": "278200", "lr": "3.08354e-05", "gnorm": "0.895", "loss_scale": "4", "train_wall": "35", "gb_free": "13.2", "wall": "52513"} [2023-11-02 08:10:43,752][train_inner][INFO] - {"epoch": 69, "update": 68.643, "loss": "2.831", "ntokens": "3185.36", "nsentences": "42.72", "prob_perplexity": "201.772", "code_perplexity": "199.166", "temp": "0.5", "loss_0": "2.725", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.54021", "wps": "17450", "ups": "5.48", "wpb": "3185.4", "bsz": "42.7", "num_updates": "278400", "lr": "3.07848e-05", "gnorm": "0.899", "loss_scale": "4", "train_wall": "36", "gb_free": "12.8", "wall": "52550"} [2023-11-02 08:11:04,994][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 [2023-11-02 08:11:20,501][train_inner][INFO] - {"epoch": 69, "update": 68.693, "loss": "2.916", "ntokens": "3233.56", "nsentences": "41.92", "prob_perplexity": "201.816", "code_perplexity": "199.216", "temp": "0.5", "loss_0": "2.81", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.52552", "wps": "17598.7", "ups": "5.44", "wpb": "3233.6", "bsz": "41.9", "num_updates": "278600", "lr": "3.07342e-05", "gnorm": "0.896", "loss_scale": "2", "train_wall": "36", "gb_free": "13.8", "wall": "52587"} [2023-11-02 08:11:56,740][train_inner][INFO] - {"epoch": 69, "update": 68.742, "loss": "2.846", "ntokens": "3167.36", "nsentences": "41.4", "prob_perplexity": "201.78", "code_perplexity": "199.166", "temp": "0.5", "loss_0": "2.74", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.53736", "wps": "17481.4", "ups": "5.52", "wpb": "3167.4", "bsz": "41.4", "num_updates": "278800", "lr": "3.06835e-05", "gnorm": "0.894", "loss_scale": "2", "train_wall": "36", "gb_free": "15.1", "wall": "52623"} [2023-11-02 08:12:33,128][train_inner][INFO] - {"epoch": 69, "update": 68.791, "loss": "2.79", "ntokens": "3186.68", "nsentences": "44.44", "prob_perplexity": "202.11", "code_perplexity": "199.544", "temp": "0.5", "loss_0": "2.684", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.54813", "wps": "17516.2", "ups": "5.5", "wpb": "3186.7", "bsz": "44.4", "num_updates": "279000", "lr": "3.06329e-05", "gnorm": "0.887", "loss_scale": "2", "train_wall": "36", "gb_free": "13", "wall": "52659"} [2023-11-02 08:13:09,460][train_inner][INFO] - {"epoch": 69, "update": 68.841, "loss": "2.774", "ntokens": "3159.88", "nsentences": "44.2", "prob_perplexity": "201.631", "code_perplexity": "199.011", "temp": "0.5", "loss_0": "2.668", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.55119", "wps": "17395.3", "ups": "5.51", "wpb": "3159.9", "bsz": "44.2", "num_updates": "279200", "lr": "3.05823e-05", "gnorm": "0.895", "loss_scale": "2", "train_wall": "36", "gb_free": "16.7", "wall": "52696"} [2023-11-02 08:13:45,602][train_inner][INFO] - {"epoch": 69, "update": 68.89, "loss": "2.786", "ntokens": "3203.48", "nsentences": "46.68", "prob_perplexity": "202.734", "code_perplexity": "200.122", "temp": "0.5", "loss_0": "2.68", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.55187", "wps": "17729", "ups": "5.53", "wpb": "3203.5", "bsz": "46.7", "num_updates": "279400", "lr": "3.05316e-05", "gnorm": "0.884", "loss_scale": "2", "train_wall": "36", "gb_free": "13.2", "wall": "52732"} [2023-11-02 08:14:21,720][train_inner][INFO] - {"epoch": 69, "update": 68.939, "loss": "2.838", "ntokens": "3211.92", "nsentences": "43.24", "prob_perplexity": "201.975", "code_perplexity": "199.425", "temp": "0.5", "loss_0": "2.732", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.53947", "wps": "17787.4", "ups": "5.54", "wpb": "3211.9", "bsz": "43.2", "num_updates": "279600", "lr": "3.0481e-05", "gnorm": "0.893", "loss_scale": "2", "train_wall": "35", "gb_free": "13.4", "wall": "52768"} [2023-11-02 08:14:57,672][train_inner][INFO] - {"epoch": 69, "update": 68.989, "loss": "2.745", "ntokens": "3172", "nsentences": "46.32", "prob_perplexity": "202.469", "code_perplexity": "199.901", "temp": "0.5", "loss_0": "2.64", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.55703", "wps": "17646.6", "ups": "5.56", "wpb": "3172", "bsz": "46.3", "num_updates": "279800", "lr": "3.04304e-05", "gnorm": "0.891", "loss_scale": "2", "train_wall": "35", "gb_free": "13.1", "wall": "52804"} [2023-11-02 08:15:05,894][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 08:15:05,896][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 08:15:05,917][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 81 [2023-11-02 08:15:31,189][valid][INFO] - {"epoch": 69, "valid_loss": "2.686", "valid_ntokens": "3167.41", "valid_nsentences": "44.1685", "valid_prob_perplexity": "201.816", "valid_code_perplexity": "199.362", "valid_temp": "0.5", "valid_loss_0": "2.58", "valid_loss_1": "0.099", "valid_loss_2": "0.007", "valid_accuracy": "0.5697", "valid_wps": "56905.1", "valid_wpb": "3167.4", "valid_bsz": "44.2", "valid_num_updates": "279846", "valid_best_loss": "2.685"} [2023-11-02 08:15:31,191][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 69 @ 279846 updates [2023-11-02 08:15:31,194][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 08:15:32,618][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 08:15:32,678][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_last.pt (epoch 69 @ 279846 updates, score 2.686) (writing took 1.4863067995756865 seconds) [2023-11-02 08:15:32,678][fairseq_cli.train][INFO] - end of epoch 69 (average epoch stats below) [2023-11-02 08:15:32,681][train][INFO] - {"epoch": 69, "train_loss": "2.812", "train_ntokens": "3192.28", "train_nsentences": "44.2732", "train_prob_perplexity": "201.785", "train_code_perplexity": "199.181", "train_temp": "0.501", "train_loss_0": "2.706", "train_loss_1": "0.099", "train_loss_2": "0.007", "train_accuracy": "0.54469", "train_wps": "17003.7", "train_ups": "5.33", "train_wpb": "3192.3", "train_bsz": "44.3", "train_num_updates": "279846", "train_lr": "3.04187e-05", "train_gnorm": "0.891", "train_loss_scale": "2", "train_train_wall": "721", "train_gb_free": "13.3", "train_wall": "52839"} [2023-11-02 08:15:32,684][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 08:15:32,703][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 70 [2023-11-02 08:15:32,869][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 08:15:32,924][fairseq.trainer][INFO] - begin training epoch 70 [2023-11-02 08:15:32,924][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 08:16:00,240][train_inner][INFO] - {"epoch": 70, "update": 69.038, "loss": "2.871", "ntokens": "3177.8", "nsentences": "42.8", "prob_perplexity": "201.611", "code_perplexity": "198.943", "temp": "0.5", "loss_0": "2.765", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.53431", "wps": "10158.4", "ups": "3.2", "wpb": "3177.8", "bsz": "42.8", "num_updates": "280000", "lr": "3.03797e-05", "gnorm": "0.9", "loss_scale": "2", "train_wall": "35", "gb_free": "15", "wall": "52866"} [2023-11-02 08:16:35,898][train_inner][INFO] - {"epoch": 70, "update": 69.087, "loss": "2.84", "ntokens": "3180.08", "nsentences": "43.24", "prob_perplexity": "202.416", "code_perplexity": "199.911", "temp": "0.5", "loss_0": "2.734", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.53921", "wps": "17837.7", "ups": "5.61", "wpb": "3180.1", "bsz": "43.2", "num_updates": "280200", "lr": "3.03291e-05", "gnorm": "0.894", "loss_scale": "2", "train_wall": "35", "gb_free": "12.7", "wall": "52902"} [2023-11-02 08:17:11,214][train_inner][INFO] - {"epoch": 70, "update": 69.137, "loss": "2.748", "ntokens": "3177.76", "nsentences": "46.96", "prob_perplexity": "203.071", "code_perplexity": "200.495", "temp": "0.5", "loss_0": "2.643", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.5574", "wps": "17997.4", "ups": "5.66", "wpb": "3177.8", "bsz": "47", "num_updates": "280400", "lr": "3.02785e-05", "gnorm": "0.885", "loss_scale": "2", "train_wall": "35", "gb_free": "13.6", "wall": "52937"} [2023-11-02 08:17:46,845][train_inner][INFO] - {"epoch": 70, "update": 69.186, "loss": "2.773", "ntokens": "3189.68", "nsentences": "44.92", "prob_perplexity": "203.366", "code_perplexity": "200.783", "temp": "0.5", "loss_0": "2.667", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.55225", "wps": "17905.4", "ups": "5.61", "wpb": "3189.7", "bsz": "44.9", "num_updates": "280600", "lr": "3.02278e-05", "gnorm": "0.885", "loss_scale": "2", "train_wall": "35", "gb_free": "14.3", "wall": "52973"} [2023-11-02 08:18:23,841][train_inner][INFO] - {"epoch": 70, "update": 69.235, "loss": "2.842", "ntokens": "3219.44", "nsentences": "42.24", "prob_perplexity": "202.399", "code_perplexity": "199.804", "temp": "0.5", "loss_0": "2.736", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.53846", "wps": "17405", "ups": "5.41", "wpb": "3219.4", "bsz": "42.2", "num_updates": "280800", "lr": "3.01772e-05", "gnorm": "0.887", "loss_scale": "2", "train_wall": "36", "gb_free": "12.9", "wall": "53010"} [2023-11-02 08:19:00,467][train_inner][INFO] - {"epoch": 70, "update": 69.285, "loss": "2.802", "ntokens": "3147.88", "nsentences": "43.2", "prob_perplexity": "201.634", "code_perplexity": "199.031", "temp": "0.5", "loss_0": "2.696", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.54516", "wps": "17190.6", "ups": "5.46", "wpb": "3147.9", "bsz": "43.2", "num_updates": "281000", "lr": "3.01266e-05", "gnorm": "0.912", "loss_scale": "2", "train_wall": "36", "gb_free": "12.5", "wall": "53047"} [2023-11-02 08:19:36,452][train_inner][INFO] - {"epoch": 70, "update": 69.334, "loss": "2.794", "ntokens": "3161.36", "nsentences": "44.76", "prob_perplexity": "203.472", "code_perplexity": "200.901", "temp": "0.5", "loss_0": "2.689", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.54894", "wps": "17571.7", "ups": "5.56", "wpb": "3161.4", "bsz": "44.8", "num_updates": "281200", "lr": "3.00759e-05", "gnorm": "0.896", "loss_scale": "2", "train_wall": "35", "gb_free": "13.8", "wall": "53083"} [2023-11-02 08:20:12,909][train_inner][INFO] - {"epoch": 70, "update": 69.383, "loss": "2.844", "ntokens": "3200.72", "nsentences": "42.8", "prob_perplexity": "202.743", "code_perplexity": "200.121", "temp": "0.5", "loss_0": "2.738", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.53773", "wps": "17560.1", "ups": "5.49", "wpb": "3200.7", "bsz": "42.8", "num_updates": "281400", "lr": "3.00253e-05", "gnorm": "0.893", "loss_scale": "2", "train_wall": "36", "gb_free": "13.8", "wall": "53119"} [2023-11-02 08:20:48,963][train_inner][INFO] - {"epoch": 70, "update": 69.432, "loss": "2.759", "ntokens": "3167", "nsentences": "45.88", "prob_perplexity": "201.942", "code_perplexity": "199.356", "temp": "0.5", "loss_0": "2.653", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.55545", "wps": "17568.9", "ups": "5.55", "wpb": "3167", "bsz": "45.9", "num_updates": "281600", "lr": "2.99747e-05", "gnorm": "0.883", "loss_scale": "2", "train_wall": "35", "gb_free": "13.3", "wall": "53155"} [2023-11-02 08:21:24,826][train_inner][INFO] - {"epoch": 70, "update": 69.482, "loss": "2.816", "ntokens": "3134.6", "nsentences": "43.12", "prob_perplexity": "202.299", "code_perplexity": "199.59", "temp": "0.5", "loss_0": "2.71", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.54354", "wps": "17481.9", "ups": "5.58", "wpb": "3134.6", "bsz": "43.1", "num_updates": "281800", "lr": "2.99241e-05", "gnorm": "0.887", "loss_scale": "2", "train_wall": "35", "gb_free": "13.2", "wall": "53191"} [2023-11-02 08:22:00,971][train_inner][INFO] - {"epoch": 70, "update": 69.531, "loss": "2.795", "ntokens": "3171.4", "nsentences": "43.4", "prob_perplexity": "202.348", "code_perplexity": "199.732", "temp": "0.5", "loss_0": "2.689", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.54708", "wps": "17549.7", "ups": "5.53", "wpb": "3171.4", "bsz": "43.4", "num_updates": "282000", "lr": "2.98734e-05", "gnorm": "0.885", "loss_scale": "2", "train_wall": "36", "gb_free": "14", "wall": "53227"} [2023-11-02 08:22:37,814][train_inner][INFO] - {"epoch": 70, "update": 69.58, "loss": "2.823", "ntokens": "3199.4", "nsentences": "43.08", "prob_perplexity": "202.773", "code_perplexity": "200.271", "temp": "0.5", "loss_0": "2.718", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.54243", "wps": "17369.1", "ups": "5.43", "wpb": "3199.4", "bsz": "43.1", "num_updates": "282200", "lr": "2.98228e-05", "gnorm": "0.893", "loss_scale": "2", "train_wall": "36", "gb_free": "17", "wall": "53264"} [2023-11-02 08:23:14,123][train_inner][INFO] - {"epoch": 70, "update": 69.63, "loss": "2.8", "ntokens": "3160.96", "nsentences": "43.6", "prob_perplexity": "203.034", "code_perplexity": "200.485", "temp": "0.5", "loss_0": "2.694", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.54679", "wps": "17412.2", "ups": "5.51", "wpb": "3161", "bsz": "43.6", "num_updates": "282400", "lr": "2.97722e-05", "gnorm": "0.904", "loss_scale": "2", "train_wall": "36", "gb_free": "12.8", "wall": "53300"} [2023-11-02 08:23:50,530][train_inner][INFO] - {"epoch": 70, "update": 69.679, "loss": "2.812", "ntokens": "3201.04", "nsentences": "44.96", "prob_perplexity": "203.199", "code_perplexity": "200.619", "temp": "0.5", "loss_0": "2.707", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.54581", "wps": "17585.8", "ups": "5.49", "wpb": "3201", "bsz": "45", "num_updates": "282600", "lr": "2.97215e-05", "gnorm": "0.896", "loss_scale": "2", "train_wall": "36", "gb_free": "13.7", "wall": "53337"} [2023-11-02 08:24:26,893][train_inner][INFO] - {"epoch": 70, "update": 69.728, "loss": "2.759", "ntokens": "3182.64", "nsentences": "45.4", "prob_perplexity": "202.066", "code_perplexity": "199.512", "temp": "0.5", "loss_0": "2.653", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.55466", "wps": "17505.9", "ups": "5.5", "wpb": "3182.6", "bsz": "45.4", "num_updates": "282800", "lr": "2.96709e-05", "gnorm": "0.892", "loss_scale": "2", "train_wall": "36", "gb_free": "12.9", "wall": "53373"} [2023-11-02 08:25:02,700][train_inner][INFO] - {"epoch": 70, "update": 69.778, "loss": "2.796", "ntokens": "3212.68", "nsentences": "46.2", "prob_perplexity": "202.649", "code_perplexity": "200.138", "temp": "0.5", "loss_0": "2.691", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.54952", "wps": "17945.7", "ups": "5.59", "wpb": "3212.7", "bsz": "46.2", "num_updates": "283000", "lr": "2.96203e-05", "gnorm": "0.878", "loss_scale": "2", "train_wall": "35", "gb_free": "14.7", "wall": "53409"} [2023-11-02 08:25:38,990][train_inner][INFO] - {"epoch": 70, "update": 69.827, "loss": "2.735", "ntokens": "3183.8", "nsentences": "46.08", "prob_perplexity": "202.377", "code_perplexity": "199.867", "temp": "0.5", "loss_0": "2.629", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.55912", "wps": "17547.2", "ups": "5.51", "wpb": "3183.8", "bsz": "46.1", "num_updates": "283200", "lr": "2.95696e-05", "gnorm": "0.879", "loss_scale": "2", "train_wall": "36", "gb_free": "13.9", "wall": "53445"} [2023-11-02 08:26:14,942][train_inner][INFO] - {"epoch": 70, "update": 69.876, "loss": "2.786", "ntokens": "3179.04", "nsentences": "43.88", "prob_perplexity": "201.774", "code_perplexity": "199.251", "temp": "0.5", "loss_0": "2.68", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.54829", "wps": "17686.4", "ups": "5.56", "wpb": "3179", "bsz": "43.9", "num_updates": "283400", "lr": "2.9519e-05", "gnorm": "0.904", "loss_scale": "2", "train_wall": "35", "gb_free": "13", "wall": "53481"} [2023-11-02 08:26:51,544][train_inner][INFO] - {"epoch": 70, "update": 69.926, "loss": "2.78", "ntokens": "3217.88", "nsentences": "44.88", "prob_perplexity": "203.551", "code_perplexity": "201.07", "temp": "0.5", "loss_0": "2.675", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.55043", "wps": "17583.9", "ups": "5.46", "wpb": "3217.9", "bsz": "44.9", "num_updates": "283600", "lr": "2.94684e-05", "gnorm": "0.887", "loss_scale": "2", "train_wall": "36", "gb_free": "13.1", "wall": "53518"} [2023-11-02 08:27:27,672][train_inner][INFO] - {"epoch": 70, "update": 69.975, "loss": "2.775", "ntokens": "3175.68", "nsentences": "44.6", "prob_perplexity": "203.001", "code_perplexity": "200.516", "temp": "0.5", "loss_0": "2.669", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.5509", "wps": "17581.4", "ups": "5.54", "wpb": "3175.7", "bsz": "44.6", "num_updates": "283800", "lr": "2.94177e-05", "gnorm": "0.907", "loss_scale": "2", "train_wall": "36", "gb_free": "13.8", "wall": "53554"} [2023-11-02 08:27:46,129][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 08:27:46,130][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 08:27:46,149][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 82 [2023-11-02 08:28:11,787][valid][INFO] - {"epoch": 70, "valid_loss": "2.671", "valid_ntokens": "3160.82", "valid_nsentences": "44.1685", "valid_prob_perplexity": "202.717", "valid_code_perplexity": "200.264", "valid_temp": "0.5", "valid_loss_0": "2.565", "valid_loss_1": "0.099", "valid_loss_2": "0.007", "valid_accuracy": "0.57221", "valid_wps": "55916.7", "valid_wpb": "3160.8", "valid_bsz": "44.2", "valid_num_updates": "283902", "valid_best_loss": "2.671"} [2023-11-02 08:28:11,789][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 70 @ 283902 updates [2023-11-02 08:28:11,791][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-02 08:28:13,251][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-02 08:28:14,220][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_best.pt (epoch 70 @ 283902 updates, score 2.671) (writing took 2.4308750783093274 seconds) [2023-11-02 08:28:14,221][fairseq_cli.train][INFO] - end of epoch 70 (average epoch stats below) [2023-11-02 08:28:14,223][train][INFO] - {"epoch": 70, "train_loss": "2.797", "train_ntokens": "3182.2", "train_nsentences": "44.2682", "train_prob_perplexity": "202.627", "train_code_perplexity": "200.064", "train_temp": "0.5", "train_loss_0": "2.692", "train_loss_1": "0.099", "train_loss_2": "0.007", "train_accuracy": "0.54735", "train_wps": "16948.6", "train_ups": "5.33", "train_wpb": "3182.2", "train_bsz": "44.3", "train_num_updates": "283902", "train_lr": "2.93919e-05", "train_gnorm": "0.893", "train_loss_scale": "2", "train_train_wall": "720", "train_gb_free": "13.9", "train_wall": "53600"} [2023-11-02 08:28:14,226][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 08:28:14,247][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 71 [2023-11-02 08:28:14,428][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 08:28:14,487][fairseq.trainer][INFO] - begin training epoch 71 [2023-11-02 08:28:14,488][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 08:28:32,124][train_inner][INFO] - {"epoch": 71, "update": 70.024, "loss": "2.799", "ntokens": "3209.8", "nsentences": "44.84", "prob_perplexity": "203.195", "code_perplexity": "200.675", "temp": "0.5", "loss_0": "2.694", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.54715", "wps": "9960.6", "ups": "3.1", "wpb": "3209.8", "bsz": "44.8", "num_updates": "284000", "lr": "2.93671e-05", "gnorm": "0.892", "loss_scale": "2", "train_wall": "35", "gb_free": "13.6", "wall": "53618"} [2023-11-02 08:29:07,822][train_inner][INFO] - {"epoch": 71, "update": 70.073, "loss": "2.87", "ntokens": "3239.04", "nsentences": "43.56", "prob_perplexity": "203.169", "code_perplexity": "200.693", "temp": "0.5", "loss_0": "2.765", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.53353", "wps": "18147.9", "ups": "5.6", "wpb": "3239", "bsz": "43.6", "num_updates": "284200", "lr": "2.93165e-05", "gnorm": "0.896", "loss_scale": "2", "train_wall": "35", "gb_free": "14", "wall": "53654"} [2023-11-02 08:29:43,573][train_inner][INFO] - {"epoch": 71, "update": 70.123, "loss": "2.811", "ntokens": "3180.28", "nsentences": "42.84", "prob_perplexity": "202.027", "code_perplexity": "199.459", "temp": "0.5", "loss_0": "2.705", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.54401", "wps": "17799.9", "ups": "5.6", "wpb": "3180.3", "bsz": "42.8", "num_updates": "284400", "lr": "2.92658e-05", "gnorm": "0.906", "loss_scale": "2", "train_wall": "35", "gb_free": "13.1", "wall": "53690"} [2023-11-02 08:30:19,587][train_inner][INFO] - {"epoch": 71, "update": 70.172, "loss": "2.795", "ntokens": "3183.36", "nsentences": "44.24", "prob_perplexity": "203.807", "code_perplexity": "201.237", "temp": "0.5", "loss_0": "2.69", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.54699", "wps": "17679.4", "ups": "5.55", "wpb": "3183.4", "bsz": "44.2", "num_updates": "284600", "lr": "2.92152e-05", "gnorm": "0.902", "loss_scale": "2", "train_wall": "35", "gb_free": "13.7", "wall": "53726"} [2023-11-02 08:30:55,704][train_inner][INFO] - {"epoch": 71, "update": 70.221, "loss": "2.785", "ntokens": "3176.44", "nsentences": "45.32", "prob_perplexity": "201.702", "code_perplexity": "199.153", "temp": "0.5", "loss_0": "2.68", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.54982", "wps": "17591.2", "ups": "5.54", "wpb": "3176.4", "bsz": "45.3", "num_updates": "284800", "lr": "2.91646e-05", "gnorm": "0.905", "loss_scale": "2", "train_wall": "35", "gb_free": "13.4", "wall": "53762"} [2023-11-02 08:31:32,429][train_inner][INFO] - {"epoch": 71, "update": 70.271, "loss": "2.797", "ntokens": "3186.56", "nsentences": "43.48", "prob_perplexity": "203.05", "code_perplexity": "200.498", "temp": "0.5", "loss_0": "2.691", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.54662", "wps": "17354.6", "ups": "5.45", "wpb": "3186.6", "bsz": "43.5", "num_updates": "285000", "lr": "2.91139e-05", "gnorm": "0.895", "loss_scale": "2", "train_wall": "36", "gb_free": "13.7", "wall": "53799"} [2023-11-02 08:32:08,833][train_inner][INFO] - {"epoch": 71, "update": 70.32, "loss": "2.817", "ntokens": "3219.52", "nsentences": "44.52", "prob_perplexity": "203.99", "code_perplexity": "201.44", "temp": "0.5", "loss_0": "2.712", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.54387", "wps": "17689", "ups": "5.49", "wpb": "3219.5", "bsz": "44.5", "num_updates": "285200", "lr": "2.90633e-05", "gnorm": "0.887", "loss_scale": "2", "train_wall": "36", "gb_free": "13.9", "wall": "53835"} [2023-11-02 08:32:44,983][train_inner][INFO] - {"epoch": 71, "update": 70.369, "loss": "2.846", "ntokens": "3204.36", "nsentences": "44.28", "prob_perplexity": "202.826", "code_perplexity": "200.291", "temp": "0.5", "loss_0": "2.74", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.53904", "wps": "17729", "ups": "5.53", "wpb": "3204.4", "bsz": "44.3", "num_updates": "285400", "lr": "2.90127e-05", "gnorm": "0.904", "loss_scale": "2", "train_wall": "36", "gb_free": "13.5", "wall": "53871"} [2023-11-02 08:33:21,358][train_inner][INFO] - {"epoch": 71, "update": 70.419, "loss": "2.856", "ntokens": "3208.84", "nsentences": "42.48", "prob_perplexity": "202.871", "code_perplexity": "200.323", "temp": "0.5", "loss_0": "2.751", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.53482", "wps": "17644.2", "ups": "5.5", "wpb": "3208.8", "bsz": "42.5", "num_updates": "285600", "lr": "2.8962e-05", "gnorm": "0.912", "loss_scale": "2", "train_wall": "36", "gb_free": "12.9", "wall": "53908"} [2023-11-02 08:33:57,858][train_inner][INFO] - {"epoch": 71, "update": 70.468, "loss": "2.783", "ntokens": "3168.84", "nsentences": "45.04", "prob_perplexity": "203.309", "code_perplexity": "200.723", "temp": "0.5", "loss_0": "2.677", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.55059", "wps": "17364.8", "ups": "5.48", "wpb": "3168.8", "bsz": "45", "num_updates": "285800", "lr": "2.89114e-05", "gnorm": "0.894", "loss_scale": "2", "train_wall": "36", "gb_free": "13.6", "wall": "53944"} [2023-11-02 08:34:33,935][train_inner][INFO] - {"epoch": 71, "update": 70.517, "loss": "2.768", "ntokens": "3165.24", "nsentences": "43.32", "prob_perplexity": "203.075", "code_perplexity": "200.52", "temp": "0.5", "loss_0": "2.662", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.55105", "wps": "17547.9", "ups": "5.54", "wpb": "3165.2", "bsz": "43.3", "num_updates": "286000", "lr": "2.88608e-05", "gnorm": "0.892", "loss_scale": "2", "train_wall": "35", "gb_free": "12.8", "wall": "53980"} [2023-11-02 08:35:10,155][train_inner][INFO] - {"epoch": 71, "update": 70.567, "loss": "2.8", "ntokens": "3158.52", "nsentences": "42.44", "prob_perplexity": "201.995", "code_perplexity": "199.424", "temp": "0.5", "loss_0": "2.694", "loss_1": "0.099", "loss_2": "0.007", "accuracy": "0.54506", "wps": "17441.9", "ups": "5.52", "wpb": "3158.5", "bsz": "42.4", "num_updates": "286200", "lr": "2.88101e-05", "gnorm": "0.92", "loss_scale": "2", "train_wall": "36", "gb_free": "13.7", "wall": "54016"} [2023-11-02 08:35:46,773][train_inner][INFO] - {"epoch": 71, "update": 70.616, "loss": "2.778", "ntokens": "3215.8", "nsentences": "45.2", "prob_perplexity": "203.864", "code_perplexity": "201.308", "temp": "0.5", "loss_0": "2.673", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.55068", "wps": "17565.3", "ups": "5.46", "wpb": "3215.8", "bsz": "45.2", "num_updates": "286400", "lr": "2.87595e-05", "gnorm": "0.893", "loss_scale": "2", "train_wall": "36", "gb_free": "12.8", "wall": "54053"} [2023-11-02 08:36:22,794][train_inner][INFO] - {"epoch": 71, "update": 70.665, "loss": "2.869", "ntokens": "3210.84", "nsentences": "42.36", "prob_perplexity": "203.848", "code_perplexity": "201.305", "temp": "0.5", "loss_0": "2.764", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.53323", "wps": "17828.5", "ups": "5.55", "wpb": "3210.8", "bsz": "42.4", "num_updates": "286600", "lr": "2.87089e-05", "gnorm": "0.906", "loss_scale": "2", "train_wall": "35", "gb_free": "12", "wall": "54089"} [2023-11-02 08:36:59,059][train_inner][INFO] - {"epoch": 71, "update": 70.714, "loss": "2.863", "ntokens": "3233.32", "nsentences": "43.12", "prob_perplexity": "203.56", "code_perplexity": "201.019", "temp": "0.5", "loss_0": "2.757", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.53448", "wps": "17832.5", "ups": "5.52", "wpb": "3233.3", "bsz": "43.1", "num_updates": "286800", "lr": "2.86582e-05", "gnorm": "0.894", "loss_scale": "2", "train_wall": "36", "gb_free": "14.2", "wall": "54125"} [2023-11-02 08:37:35,511][train_inner][INFO] - {"epoch": 71, "update": 70.764, "loss": "2.809", "ntokens": "3178.72", "nsentences": "43.92", "prob_perplexity": "203.283", "code_perplexity": "200.72", "temp": "0.5", "loss_0": "2.704", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.54507", "wps": "17442", "ups": "5.49", "wpb": "3178.7", "bsz": "43.9", "num_updates": "287000", "lr": "2.86076e-05", "gnorm": "0.898", "loss_scale": "2", "train_wall": "36", "gb_free": "13.8", "wall": "54162"} [2023-11-02 08:38:11,745][train_inner][INFO] - {"epoch": 71, "update": 70.813, "loss": "2.765", "ntokens": "3192.8", "nsentences": "46.32", "prob_perplexity": "203.314", "code_perplexity": "200.759", "temp": "0.5", "loss_0": "2.659", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.5541", "wps": "17624", "ups": "5.52", "wpb": "3192.8", "bsz": "46.3", "num_updates": "287200", "lr": "2.8557e-05", "gnorm": "0.9", "loss_scale": "2", "train_wall": "36", "gb_free": "13.5", "wall": "54198"} [2023-11-02 08:38:47,730][train_inner][INFO] - {"epoch": 71, "update": 70.862, "loss": "2.782", "ntokens": "3193.04", "nsentences": "43.96", "prob_perplexity": "204.316", "code_perplexity": "201.718", "temp": "0.5", "loss_0": "2.677", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.54871", "wps": "17747.5", "ups": "5.56", "wpb": "3193", "bsz": "44", "num_updates": "287400", "lr": "2.85063e-05", "gnorm": "0.903", "loss_scale": "2", "train_wall": "35", "gb_free": "14.4", "wall": "54234"} [2023-11-02 08:39:24,111][train_inner][INFO] - {"epoch": 71, "update": 70.912, "loss": "2.777", "ntokens": "3193", "nsentences": "44.84", "prob_perplexity": "203.77", "code_perplexity": "201.218", "temp": "0.5", "loss_0": "2.672", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.55117", "wps": "17554.3", "ups": "5.5", "wpb": "3193", "bsz": "44.8", "num_updates": "287600", "lr": "2.84557e-05", "gnorm": "0.897", "loss_scale": "2", "train_wall": "36", "gb_free": "13.7", "wall": "54270"} [2023-11-02 08:40:00,580][train_inner][INFO] - {"epoch": 71, "update": 70.961, "loss": "2.82", "ntokens": "3222.68", "nsentences": "44.64", "prob_perplexity": "204.894", "code_perplexity": "202.39", "temp": "0.5", "loss_0": "2.715", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.54333", "wps": "17674.4", "ups": "5.48", "wpb": "3222.7", "bsz": "44.6", "num_updates": "287800", "lr": "2.84051e-05", "gnorm": "0.905", "loss_scale": "2", "train_wall": "36", "gb_free": "15.1", "wall": "54307"} [2023-11-02 08:40:29,218][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 08:40:29,220][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 08:40:29,242][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 83 [2023-11-02 08:40:54,896][valid][INFO] - {"epoch": 71, "valid_loss": "2.682", "valid_ntokens": "3159.1", "valid_nsentences": "44.1685", "valid_prob_perplexity": "204.6", "valid_code_perplexity": "202.124", "valid_temp": "0.5", "valid_loss_0": "2.577", "valid_loss_1": "0.098", "valid_loss_2": "0.007", "valid_accuracy": "0.5702", "valid_wps": "55897.2", "valid_wpb": "3159.1", "valid_bsz": "44.2", "valid_num_updates": "287958", "valid_best_loss": "2.671"} [2023-11-02 08:40:54,898][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 71 @ 287958 updates [2023-11-02 08:40:54,900][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 08:40:56,338][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 08:40:56,390][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_last.pt (epoch 71 @ 287958 updates, score 2.682) (writing took 1.4920357731170952 seconds) [2023-11-02 08:40:56,391][fairseq_cli.train][INFO] - end of epoch 71 (average epoch stats below) [2023-11-02 08:40:56,393][train][INFO] - {"epoch": 71, "train_loss": "2.806", "train_ntokens": "3196.02", "train_nsentences": "44.2682", "train_prob_perplexity": "203.302", "train_code_perplexity": "200.75", "train_temp": "0.5", "train_loss_0": "2.701", "train_loss_1": "0.098", "train_loss_2": "0.007", "train_accuracy": "0.54544", "train_wps": "17008.1", "train_ups": "5.32", "train_wpb": "3196", "train_bsz": "44.3", "train_num_updates": "287958", "train_lr": "2.83651e-05", "train_gnorm": "0.9", "train_loss_scale": "2", "train_train_wall": "722", "train_gb_free": "15.2", "train_wall": "54363"} [2023-11-02 08:40:56,396][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 08:40:56,414][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 72 [2023-11-02 08:40:56,591][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 08:40:56,650][fairseq.trainer][INFO] - begin training epoch 72 [2023-11-02 08:40:56,651][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 08:41:04,152][train_inner][INFO] - {"epoch": 72, "update": 71.01, "loss": "2.726", "ntokens": "3175.88", "nsentences": "49.36", "prob_perplexity": "203.93", "code_perplexity": "201.387", "temp": "0.5", "loss_0": "2.621", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.56441", "wps": "9991.8", "ups": "3.15", "wpb": "3175.9", "bsz": "49.4", "num_updates": "288000", "lr": "2.83544e-05", "gnorm": "0.893", "loss_scale": "2", "train_wall": "35", "gb_free": "13.4", "wall": "54370"} [2023-11-02 08:41:40,103][train_inner][INFO] - {"epoch": 72, "update": 71.06, "loss": "2.816", "ntokens": "3199.88", "nsentences": "43.84", "prob_perplexity": "204.583", "code_perplexity": "202.051", "temp": "0.5", "loss_0": "2.711", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.54346", "wps": "17802.5", "ups": "5.56", "wpb": "3199.9", "bsz": "43.8", "num_updates": "288200", "lr": "2.83038e-05", "gnorm": "0.921", "loss_scale": "2", "train_wall": "35", "gb_free": "14.3", "wall": "54406"} [2023-11-02 08:42:16,137][train_inner][INFO] - {"epoch": 72, "update": 71.109, "loss": "2.795", "ntokens": "3175.88", "nsentences": "44.6", "prob_perplexity": "203.814", "code_perplexity": "201.286", "temp": "0.5", "loss_0": "2.69", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.54825", "wps": "17628.5", "ups": "5.55", "wpb": "3175.9", "bsz": "44.6", "num_updates": "288400", "lr": "2.82532e-05", "gnorm": "0.908", "loss_scale": "2", "train_wall": "35", "gb_free": "12.7", "wall": "54442"} [2023-11-02 08:42:51,849][train_inner][INFO] - {"epoch": 72, "update": 71.158, "loss": "2.847", "ntokens": "3187.96", "nsentences": "41.84", "prob_perplexity": "204.535", "code_perplexity": "201.982", "temp": "0.5", "loss_0": "2.742", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.53667", "wps": "17854.6", "ups": "5.6", "wpb": "3188", "bsz": "41.8", "num_updates": "288600", "lr": "2.82025e-05", "gnorm": "0.906", "loss_scale": "2", "train_wall": "35", "gb_free": "13", "wall": "54478"} [2023-11-02 08:43:27,878][train_inner][INFO] - {"epoch": 72, "update": 71.208, "loss": "2.79", "ntokens": "3225.32", "nsentences": "44.32", "prob_perplexity": "205.054", "code_perplexity": "202.5", "temp": "0.5", "loss_0": "2.685", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.54818", "wps": "17913.1", "ups": "5.55", "wpb": "3225.3", "bsz": "44.3", "num_updates": "288800", "lr": "2.81519e-05", "gnorm": "0.889", "loss_scale": "2", "train_wall": "35", "gb_free": "15.7", "wall": "54514"} [2023-11-02 08:44:04,206][train_inner][INFO] - {"epoch": 72, "update": 71.257, "loss": "2.841", "ntokens": "3205.68", "nsentences": "44.6", "prob_perplexity": "204.648", "code_perplexity": "202.146", "temp": "0.5", "loss_0": "2.736", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.54041", "wps": "17649.8", "ups": "5.51", "wpb": "3205.7", "bsz": "44.6", "num_updates": "289000", "lr": "2.81013e-05", "gnorm": "0.899", "loss_scale": "2", "train_wall": "36", "gb_free": "13.5", "wall": "54550"} [2023-11-02 08:44:41,182][train_inner][INFO] - {"epoch": 72, "update": 71.306, "loss": "2.851", "ntokens": "3225.16", "nsentences": "43.88", "prob_perplexity": "203.669", "code_perplexity": "201.098", "temp": "0.5", "loss_0": "2.746", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.53754", "wps": "17445.5", "ups": "5.41", "wpb": "3225.2", "bsz": "43.9", "num_updates": "289200", "lr": "2.80506e-05", "gnorm": "0.908", "loss_scale": "2", "train_wall": "36", "gb_free": "12.8", "wall": "54587"} [2023-11-02 08:45:17,520][train_inner][INFO] - {"epoch": 72, "update": 71.356, "loss": "2.826", "ntokens": "3208.84", "nsentences": "44.44", "prob_perplexity": "203.824", "code_perplexity": "201.348", "temp": "0.5", "loss_0": "2.72", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.54285", "wps": "17662.2", "ups": "5.5", "wpb": "3208.8", "bsz": "44.4", "num_updates": "289400", "lr": "2.8e-05", "gnorm": "0.909", "loss_scale": "2", "train_wall": "36", "gb_free": "13.6", "wall": "54624"} [2023-11-02 08:45:53,894][train_inner][INFO] - {"epoch": 72, "update": 71.405, "loss": "2.774", "ntokens": "3159.12", "nsentences": "44.64", "prob_perplexity": "203.985", "code_perplexity": "201.372", "temp": "0.5", "loss_0": "2.669", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.55021", "wps": "17371.3", "ups": "5.5", "wpb": "3159.1", "bsz": "44.6", "num_updates": "289600", "lr": "2.79494e-05", "gnorm": "0.9", "loss_scale": "2", "train_wall": "36", "gb_free": "14.7", "wall": "54660"} [2023-11-02 08:46:30,105][train_inner][INFO] - {"epoch": 72, "update": 71.454, "loss": "2.734", "ntokens": "3171.96", "nsentences": "46.4", "prob_perplexity": "204.297", "code_perplexity": "201.753", "temp": "0.5", "loss_0": "2.628", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.55947", "wps": "17520.5", "ups": "5.52", "wpb": "3172", "bsz": "46.4", "num_updates": "289800", "lr": "2.78987e-05", "gnorm": "0.897", "loss_scale": "2", "train_wall": "36", "gb_free": "13.7", "wall": "54696"} [2023-11-02 08:47:06,860][train_inner][INFO] - {"epoch": 72, "update": 71.503, "loss": "2.848", "ntokens": "3235.76", "nsentences": "42.32", "prob_perplexity": "204.418", "code_perplexity": "201.868", "temp": "0.5", "loss_0": "2.743", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.53633", "wps": "17608.2", "ups": "5.44", "wpb": "3235.8", "bsz": "42.3", "num_updates": "290000", "lr": "2.78481e-05", "gnorm": "0.897", "loss_scale": "2", "train_wall": "36", "gb_free": "13.8", "wall": "54733"} [2023-11-02 08:47:42,751][train_inner][INFO] - {"epoch": 72, "update": 71.553, "loss": "2.791", "ntokens": "3207.76", "nsentences": "45.64", "prob_perplexity": "204.12", "code_perplexity": "201.618", "temp": "0.5", "loss_0": "2.685", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.55004", "wps": "17875.8", "ups": "5.57", "wpb": "3207.8", "bsz": "45.6", "num_updates": "290200", "lr": "2.77975e-05", "gnorm": "0.907", "loss_scale": "2", "train_wall": "35", "gb_free": "13.1", "wall": "54769"} [2023-11-02 08:48:19,473][train_inner][INFO] - {"epoch": 72, "update": 71.602, "loss": "2.8", "ntokens": "3183.64", "nsentences": "45.28", "prob_perplexity": "204.14", "code_perplexity": "201.583", "temp": "0.5", "loss_0": "2.695", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.54856", "wps": "17340.5", "ups": "5.45", "wpb": "3183.6", "bsz": "45.3", "num_updates": "290400", "lr": "2.77468e-05", "gnorm": "0.908", "loss_scale": "2", "train_wall": "36", "gb_free": "14.5", "wall": "54806"} [2023-11-02 08:48:55,972][train_inner][INFO] - {"epoch": 72, "update": 71.651, "loss": "2.846", "ntokens": "3184.16", "nsentences": "42.76", "prob_perplexity": "203.914", "code_perplexity": "201.335", "temp": "0.5", "loss_0": "2.741", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.53773", "wps": "17448.8", "ups": "5.48", "wpb": "3184.2", "bsz": "42.8", "num_updates": "290600", "lr": "2.76962e-05", "gnorm": "0.912", "loss_scale": "2", "train_wall": "36", "gb_free": "13.5", "wall": "54842"} [2023-11-02 08:49:32,461][train_inner][INFO] - {"epoch": 72, "update": 71.701, "loss": "2.781", "ntokens": "3186.28", "nsentences": "45.52", "prob_perplexity": "204.906", "code_perplexity": "202.413", "temp": "0.5", "loss_0": "2.676", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.55174", "wps": "17465.1", "ups": "5.48", "wpb": "3186.3", "bsz": "45.5", "num_updates": "290800", "lr": "2.76456e-05", "gnorm": "0.895", "loss_scale": "2", "train_wall": "36", "gb_free": "13.3", "wall": "54879"} [2023-11-02 08:50:09,277][train_inner][INFO] - {"epoch": 72, "update": 71.75, "loss": "2.786", "ntokens": "3194.44", "nsentences": "44.16", "prob_perplexity": "204.544", "code_perplexity": "202.027", "temp": "0.5", "loss_0": "2.681", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.54928", "wps": "17354.6", "ups": "5.43", "wpb": "3194.4", "bsz": "44.2", "num_updates": "291000", "lr": "2.75949e-05", "gnorm": "0.89", "loss_scale": "2", "train_wall": "36", "gb_free": "14.9", "wall": "54916"} [2023-11-02 08:50:45,776][train_inner][INFO] - {"epoch": 72, "update": 71.799, "loss": "2.732", "ntokens": "3155.64", "nsentences": "47.04", "prob_perplexity": "204.05", "code_perplexity": "201.531", "temp": "0.5", "loss_0": "2.626", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.56091", "wps": "17292.7", "ups": "5.48", "wpb": "3155.6", "bsz": "47", "num_updates": "291200", "lr": "2.75443e-05", "gnorm": "0.9", "loss_scale": "2", "train_wall": "36", "gb_free": "15", "wall": "54952"} [2023-11-02 08:51:22,705][train_inner][INFO] - {"epoch": 72, "update": 71.849, "loss": "2.845", "ntokens": "3212.92", "nsentences": "42.64", "prob_perplexity": "203.846", "code_perplexity": "201.333", "temp": "0.5", "loss_0": "2.74", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.53711", "wps": "17401.4", "ups": "5.42", "wpb": "3212.9", "bsz": "42.6", "num_updates": "291400", "lr": "2.74937e-05", "gnorm": "0.91", "loss_scale": "2", "train_wall": "36", "gb_free": "14.1", "wall": "54989"} [2023-11-02 08:51:58,394][train_inner][INFO] - {"epoch": 72, "update": 71.898, "loss": "2.787", "ntokens": "3161.16", "nsentences": "42.72", "prob_perplexity": "204.934", "code_perplexity": "202.384", "temp": "0.5", "loss_0": "2.682", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.54695", "wps": "17716.5", "ups": "5.6", "wpb": "3161.2", "bsz": "42.7", "num_updates": "291600", "lr": "2.7443e-05", "gnorm": "0.908", "loss_scale": "2", "train_wall": "35", "gb_free": "13.3", "wall": "55025"} [2023-11-02 08:52:35,199][train_inner][INFO] - {"epoch": 72, "update": 71.947, "loss": "2.802", "ntokens": "3205.48", "nsentences": "43.36", "prob_perplexity": "204.501", "code_perplexity": "201.904", "temp": "0.5", "loss_0": "2.697", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.5449", "wps": "17419.6", "ups": "5.43", "wpb": "3205.5", "bsz": "43.4", "num_updates": "291800", "lr": "2.73924e-05", "gnorm": "0.9", "loss_scale": "2", "train_wall": "36", "gb_free": "13.4", "wall": "55061"} [2023-11-02 08:53:11,129][train_inner][INFO] - {"epoch": 72, "update": 71.997, "loss": "2.767", "ntokens": "3186.68", "nsentences": "45.36", "prob_perplexity": "205.177", "code_perplexity": "202.64", "temp": "0.5", "loss_0": "2.662", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.55344", "wps": "17739.4", "ups": "5.57", "wpb": "3186.7", "bsz": "45.4", "num_updates": "292000", "lr": "2.73418e-05", "gnorm": "0.893", "loss_scale": "2", "train_wall": "35", "gb_free": "14.2", "wall": "55097"} [2023-11-02 08:53:13,560][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 08:53:13,561][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 08:53:13,581][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 84 [2023-11-02 08:53:38,946][valid][INFO] - {"epoch": 72, "valid_loss": "2.646", "valid_ntokens": "3150.72", "valid_nsentences": "44.1685", "valid_prob_perplexity": "203.065", "valid_code_perplexity": "200.628", "valid_temp": "0.5", "valid_loss_0": "2.541", "valid_loss_1": "0.098", "valid_loss_2": "0.007", "valid_accuracy": "0.57648", "valid_wps": "56407.6", "valid_wpb": "3150.7", "valid_bsz": "44.2", "valid_num_updates": "292014", "valid_best_loss": "2.646"} [2023-11-02 08:53:38,948][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 72 @ 292014 updates [2023-11-02 08:53:38,950][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-02 08:53:40,411][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-02 08:53:41,388][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_best.pt (epoch 72 @ 292014 updates, score 2.646) (writing took 2.4402974476106465 seconds) [2023-11-02 08:53:41,389][fairseq_cli.train][INFO] - end of epoch 72 (average epoch stats below) [2023-11-02 08:53:41,391][train][INFO] - {"epoch": 72, "train_loss": "2.804", "train_ntokens": "3193.36", "train_nsentences": "44.2682", "train_prob_perplexity": "204.341", "train_code_perplexity": "201.802", "train_temp": "0.5", "train_loss_0": "2.699", "train_loss_1": "0.098", "train_loss_2": "0.007", "train_accuracy": "0.54609", "train_wps": "16931.1", "train_ups": "5.3", "train_wpb": "3193.4", "train_bsz": "44.3", "train_num_updates": "292014", "train_lr": "2.73382e-05", "train_gnorm": "0.903", "train_loss_scale": "2", "train_train_wall": "724", "train_gb_free": "13.1", "train_wall": "55128"} [2023-11-02 08:53:41,393][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 08:53:41,413][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 73 [2023-11-02 08:53:41,600][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 08:53:41,656][fairseq.trainer][INFO] - begin training epoch 73 [2023-11-02 08:53:41,657][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 08:54:15,219][train_inner][INFO] - {"epoch": 73, "update": 72.046, "loss": "2.821", "ntokens": "3192.24", "nsentences": "42.2", "prob_perplexity": "204.153", "code_perplexity": "201.602", "temp": "0.5", "loss_0": "2.716", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.54067", "wps": "9962", "ups": "3.12", "wpb": "3192.2", "bsz": "42.2", "num_updates": "292200", "lr": "2.72911e-05", "gnorm": "0.921", "loss_scale": "2", "train_wall": "35", "gb_free": "12.8", "wall": "55161"} [2023-11-02 08:54:51,075][train_inner][INFO] - {"epoch": 73, "update": 72.095, "loss": "2.855", "ntokens": "3209.56", "nsentences": "42.96", "prob_perplexity": "204.08", "code_perplexity": "201.517", "temp": "0.5", "loss_0": "2.75", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.53692", "wps": "17903.5", "ups": "5.58", "wpb": "3209.6", "bsz": "43", "num_updates": "292400", "lr": "2.72405e-05", "gnorm": "0.913", "loss_scale": "2", "train_wall": "35", "gb_free": "14.1", "wall": "55197"} [2023-11-02 08:55:26,812][train_inner][INFO] - {"epoch": 73, "update": 72.144, "loss": "2.767", "ntokens": "3176.6", "nsentences": "45.04", "prob_perplexity": "205.089", "code_perplexity": "202.525", "temp": "0.5", "loss_0": "2.662", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.55264", "wps": "17778.6", "ups": "5.6", "wpb": "3176.6", "bsz": "45", "num_updates": "292600", "lr": "2.71899e-05", "gnorm": "0.912", "loss_scale": "2", "train_wall": "35", "gb_free": "12.9", "wall": "55233"} [2023-11-02 08:56:03,393][train_inner][INFO] - {"epoch": 73, "update": 72.194, "loss": "2.818", "ntokens": "3207.6", "nsentences": "45.44", "prob_perplexity": "203.559", "code_perplexity": "200.977", "temp": "0.5", "loss_0": "2.713", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.54458", "wps": "17538", "ups": "5.47", "wpb": "3207.6", "bsz": "45.4", "num_updates": "292800", "lr": "2.71392e-05", "gnorm": "0.906", "loss_scale": "2", "train_wall": "36", "gb_free": "12.6", "wall": "55270"} [2023-11-02 08:56:39,630][train_inner][INFO] - {"epoch": 73, "update": 72.243, "loss": "2.819", "ntokens": "3203.52", "nsentences": "44", "prob_perplexity": "203.509", "code_perplexity": "200.966", "temp": "0.5", "loss_0": "2.714", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.54366", "wps": "17689.3", "ups": "5.52", "wpb": "3203.5", "bsz": "44", "num_updates": "293000", "lr": "2.70886e-05", "gnorm": "0.914", "loss_scale": "2", "train_wall": "36", "gb_free": "14.1", "wall": "55306"} [2023-11-02 08:57:15,633][train_inner][INFO] - {"epoch": 73, "update": 72.292, "loss": "2.772", "ntokens": "3185.32", "nsentences": "43.48", "prob_perplexity": "204.877", "code_perplexity": "202.267", "temp": "0.5", "loss_0": "2.667", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.55087", "wps": "17696", "ups": "5.56", "wpb": "3185.3", "bsz": "43.5", "num_updates": "293200", "lr": "2.7038e-05", "gnorm": "0.909", "loss_scale": "2", "train_wall": "35", "gb_free": "14.7", "wall": "55342"} [2023-11-02 08:57:51,886][train_inner][INFO] - {"epoch": 73, "update": 72.342, "loss": "2.731", "ntokens": "3171.64", "nsentences": "46.2", "prob_perplexity": "204.593", "code_perplexity": "202.04", "temp": "0.5", "loss_0": "2.626", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.56022", "wps": "17498.6", "ups": "5.52", "wpb": "3171.6", "bsz": "46.2", "num_updates": "293400", "lr": "2.69873e-05", "gnorm": "0.898", "loss_scale": "2", "train_wall": "36", "gb_free": "12.1", "wall": "55378"} [2023-11-02 08:58:27,858][train_inner][INFO] - {"epoch": 73, "update": 72.391, "loss": "2.767", "ntokens": "3197.76", "nsentences": "45.48", "prob_perplexity": "203.821", "code_perplexity": "201.281", "temp": "0.5", "loss_0": "2.662", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.5531", "wps": "17780.3", "ups": "5.56", "wpb": "3197.8", "bsz": "45.5", "num_updates": "293600", "lr": "2.69367e-05", "gnorm": "0.9", "loss_scale": "2", "train_wall": "35", "gb_free": "14.3", "wall": "55414"} [2023-11-02 08:59:04,185][train_inner][INFO] - {"epoch": 73, "update": 72.44, "loss": "2.759", "ntokens": "3185.48", "nsentences": "45.76", "prob_perplexity": "203.989", "code_perplexity": "201.435", "temp": "0.5", "loss_0": "2.654", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.55483", "wps": "17538.6", "ups": "5.51", "wpb": "3185.5", "bsz": "45.8", "num_updates": "293800", "lr": "2.68861e-05", "gnorm": "0.901", "loss_scale": "2", "train_wall": "36", "gb_free": "12.8", "wall": "55450"} [2023-11-02 08:59:40,393][train_inner][INFO] - {"epoch": 73, "update": 72.49, "loss": "2.809", "ntokens": "3169.68", "nsentences": "42.72", "prob_perplexity": "204.862", "code_perplexity": "202.322", "temp": "0.5", "loss_0": "2.704", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.5436", "wps": "17509.3", "ups": "5.52", "wpb": "3169.7", "bsz": "42.7", "num_updates": "294000", "lr": "2.68354e-05", "gnorm": "0.916", "loss_scale": "2", "train_wall": "36", "gb_free": "14", "wall": "55487"} [2023-11-02 09:00:16,893][train_inner][INFO] - {"epoch": 73, "update": 72.539, "loss": "2.83", "ntokens": "3206.16", "nsentences": "43.96", "prob_perplexity": "205.298", "code_perplexity": "202.759", "temp": "0.5", "loss_0": "2.726", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.54092", "wps": "17568.9", "ups": "5.48", "wpb": "3206.2", "bsz": "44", "num_updates": "294200", "lr": "2.67848e-05", "gnorm": "0.899", "loss_scale": "2", "train_wall": "36", "gb_free": "13.3", "wall": "55523"} [2023-11-02 09:00:53,198][train_inner][INFO] - {"epoch": 73, "update": 72.588, "loss": "2.814", "ntokens": "3203.96", "nsentences": "43.08", "prob_perplexity": "204.994", "code_perplexity": "202.41", "temp": "0.5", "loss_0": "2.709", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.543", "wps": "17651.5", "ups": "5.51", "wpb": "3204", "bsz": "43.1", "num_updates": "294400", "lr": "2.67342e-05", "gnorm": "0.912", "loss_scale": "2", "train_wall": "36", "gb_free": "14", "wall": "55559"} [2023-11-02 09:01:29,008][train_inner][INFO] - {"epoch": 73, "update": 72.638, "loss": "2.818", "ntokens": "3202.16", "nsentences": "43.76", "prob_perplexity": "205.542", "code_perplexity": "203.009", "temp": "0.5", "loss_0": "2.713", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.54269", "wps": "17885.4", "ups": "5.59", "wpb": "3202.2", "bsz": "43.8", "num_updates": "294600", "lr": "2.66835e-05", "gnorm": "0.914", "loss_scale": "2", "train_wall": "35", "gb_free": "13.9", "wall": "55595"} [2023-11-02 09:02:05,574][train_inner][INFO] - {"epoch": 73, "update": 72.687, "loss": "2.776", "ntokens": "3202.76", "nsentences": "46.32", "prob_perplexity": "204.461", "code_perplexity": "201.901", "temp": "0.5", "loss_0": "2.671", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.55322", "wps": "17518.6", "ups": "5.47", "wpb": "3202.8", "bsz": "46.3", "num_updates": "294800", "lr": "2.66329e-05", "gnorm": "0.901", "loss_scale": "2", "train_wall": "36", "gb_free": "12", "wall": "55632"} [2023-11-02 09:02:41,579][train_inner][INFO] - {"epoch": 73, "update": 72.736, "loss": "2.823", "ntokens": "3209.52", "nsentences": "43.44", "prob_perplexity": "204.693", "code_perplexity": "202.102", "temp": "0.5", "loss_0": "2.719", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.54208", "wps": "17829.4", "ups": "5.56", "wpb": "3209.5", "bsz": "43.4", "num_updates": "295000", "lr": "2.65823e-05", "gnorm": "0.906", "loss_scale": "4", "train_wall": "35", "gb_free": "14.1", "wall": "55668"} [2023-11-02 09:03:17,566][train_inner][INFO] - {"epoch": 73, "update": 72.786, "loss": "2.752", "ntokens": "3168.56", "nsentences": "44.04", "prob_perplexity": "204.365", "code_perplexity": "201.782", "temp": "0.5", "loss_0": "2.647", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.55387", "wps": "17610.6", "ups": "5.56", "wpb": "3168.6", "bsz": "44", "num_updates": "295200", "lr": "2.65316e-05", "gnorm": "0.919", "loss_scale": "4", "train_wall": "35", "gb_free": "13.8", "wall": "55704"} [2023-11-02 09:03:54,307][train_inner][INFO] - {"epoch": 73, "update": 72.835, "loss": "2.737", "ntokens": "3188.88", "nsentences": "45.56", "prob_perplexity": "204.97", "code_perplexity": "202.416", "temp": "0.5", "loss_0": "2.632", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.55889", "wps": "17359.7", "ups": "5.44", "wpb": "3188.9", "bsz": "45.6", "num_updates": "295400", "lr": "2.6481e-05", "gnorm": "0.911", "loss_scale": "4", "train_wall": "36", "gb_free": "13", "wall": "55741"} [2023-11-02 09:04:31,385][train_inner][INFO] - {"epoch": 73, "update": 72.884, "loss": "2.809", "ntokens": "3185.96", "nsentences": "42.52", "prob_perplexity": "204.446", "code_perplexity": "201.816", "temp": "0.5", "loss_0": "2.704", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.54404", "wps": "17186.1", "ups": "5.39", "wpb": "3186", "bsz": "42.5", "num_updates": "295600", "lr": "2.64304e-05", "gnorm": "0.907", "loss_scale": "4", "train_wall": "36", "gb_free": "13.5", "wall": "55778"} [2023-11-02 09:05:07,742][train_inner][INFO] - {"epoch": 73, "update": 72.933, "loss": "2.802", "ntokens": "3194.88", "nsentences": "44", "prob_perplexity": "205.123", "code_perplexity": "202.504", "temp": "0.5", "loss_0": "2.697", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.54635", "wps": "17576.2", "ups": "5.5", "wpb": "3194.9", "bsz": "44", "num_updates": "295800", "lr": "2.63797e-05", "gnorm": "0.909", "loss_scale": "4", "train_wall": "36", "gb_free": "15.7", "wall": "55814"} [2023-11-02 09:05:44,198][train_inner][INFO] - {"epoch": 73, "update": 72.983, "loss": "2.791", "ntokens": "3188.28", "nsentences": "44.76", "prob_perplexity": "205.408", "code_perplexity": "202.799", "temp": "0.5", "loss_0": "2.686", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.54837", "wps": "17492", "ups": "5.49", "wpb": "3188.3", "bsz": "44.8", "num_updates": "296000", "lr": "2.63291e-05", "gnorm": "0.902", "loss_scale": "4", "train_wall": "36", "gb_free": "13.5", "wall": "55850"} [2023-11-02 09:05:56,798][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 09:05:56,799][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 09:05:56,820][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 85 [2023-11-02 09:06:22,513][valid][INFO] - {"epoch": 73, "valid_loss": "2.671", "valid_ntokens": "3167.54", "valid_nsentences": "44.1685", "valid_prob_perplexity": "205.014", "valid_code_perplexity": "202.486", "valid_temp": "0.5", "valid_loss_0": "2.566", "valid_loss_1": "0.098", "valid_loss_2": "0.007", "valid_accuracy": "0.57183", "valid_wps": "55973.6", "valid_wpb": "3167.5", "valid_bsz": "44.2", "valid_num_updates": "296070", "valid_best_loss": "2.646"} [2023-11-02 09:06:22,515][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 73 @ 296070 updates [2023-11-02 09:06:22,517][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 09:06:23,969][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 09:06:24,017][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_last.pt (epoch 73 @ 296070 updates, score 2.671) (writing took 1.5022141658701003 seconds) [2023-11-02 09:06:24,018][fairseq_cli.train][INFO] - end of epoch 73 (average epoch stats below) [2023-11-02 09:06:24,020][train][INFO] - {"epoch": 73, "train_loss": "2.793", "train_ntokens": "3193.06", "train_nsentences": "44.2682", "train_prob_perplexity": "204.63", "train_code_perplexity": "202.059", "train_temp": "0.5", "train_loss_0": "2.688", "train_loss_1": "0.098", "train_loss_2": "0.007", "train_accuracy": "0.54788", "train_wps": "16982.2", "train_ups": "5.32", "train_wpb": "3193.1", "train_bsz": "44.3", "train_num_updates": "296070", "train_lr": "2.63114e-05", "train_gnorm": "0.908", "train_loss_scale": "4", "train_train_wall": "722", "train_gb_free": "14.4", "train_wall": "55890"} [2023-11-02 09:06:24,022][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 09:06:24,041][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 74 [2023-11-02 09:06:24,218][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 09:06:24,277][fairseq.trainer][INFO] - begin training epoch 74 [2023-11-02 09:06:24,277][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 09:06:32,453][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 [2023-11-02 09:06:47,925][train_inner][INFO] - {"epoch": 74, "update": 73.032, "loss": "2.788", "ntokens": "3219.44", "nsentences": "44.44", "prob_perplexity": "205.798", "code_perplexity": "203.247", "temp": "0.5", "loss_0": "2.683", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.54899", "wps": "10104.2", "ups": "3.14", "wpb": "3219.4", "bsz": "44.4", "num_updates": "296200", "lr": "2.62785e-05", "gnorm": "0.895", "loss_scale": "2", "train_wall": "36", "gb_free": "13.6", "wall": "55914"} [2023-11-02 09:07:23,874][train_inner][INFO] - {"epoch": 74, "update": 73.082, "loss": "2.755", "ntokens": "3161.16", "nsentences": "45.28", "prob_perplexity": "205.029", "code_perplexity": "202.416", "temp": "0.5", "loss_0": "2.65", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.55613", "wps": "17588.3", "ups": "5.56", "wpb": "3161.2", "bsz": "45.3", "num_updates": "296400", "lr": "2.62278e-05", "gnorm": "0.905", "loss_scale": "2", "train_wall": "35", "gb_free": "13.8", "wall": "55950"} [2023-11-02 09:07:59,986][train_inner][INFO] - {"epoch": 74, "update": 73.131, "loss": "2.817", "ntokens": "3205.8", "nsentences": "43.56", "prob_perplexity": "206.365", "code_perplexity": "203.717", "temp": "0.5", "loss_0": "2.713", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.54398", "wps": "17755.6", "ups": "5.54", "wpb": "3205.8", "bsz": "43.6", "num_updates": "296600", "lr": "2.61772e-05", "gnorm": "0.908", "loss_scale": "2", "train_wall": "35", "gb_free": "15.1", "wall": "55986"} [2023-11-02 09:08:36,060][train_inner][INFO] - {"epoch": 74, "update": 73.18, "loss": "2.843", "ntokens": "3215.96", "nsentences": "44", "prob_perplexity": "206.053", "code_perplexity": "203.424", "temp": "0.5", "loss_0": "2.738", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.53975", "wps": "17831.2", "ups": "5.54", "wpb": "3216", "bsz": "44", "num_updates": "296800", "lr": "2.61266e-05", "gnorm": "0.904", "loss_scale": "2", "train_wall": "35", "gb_free": "13", "wall": "56022"} [2023-11-02 09:09:12,083][train_inner][INFO] - {"epoch": 74, "update": 73.23, "loss": "2.782", "ntokens": "3194.28", "nsentences": "44.56", "prob_perplexity": "205.62", "code_perplexity": "203.027", "temp": "0.5", "loss_0": "2.677", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.5506", "wps": "17735.9", "ups": "5.55", "wpb": "3194.3", "bsz": "44.6", "num_updates": "297000", "lr": "2.60759e-05", "gnorm": "0.908", "loss_scale": "2", "train_wall": "35", "gb_free": "15", "wall": "56058"} [2023-11-02 09:09:48,221][train_inner][INFO] - {"epoch": 74, "update": 73.279, "loss": "2.705", "ntokens": "3153.4", "nsentences": "46.28", "prob_perplexity": "205.545", "code_perplexity": "202.924", "temp": "0.5", "loss_0": "2.601", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.56454", "wps": "17453", "ups": "5.53", "wpb": "3153.4", "bsz": "46.3", "num_updates": "297200", "lr": "2.60253e-05", "gnorm": "0.914", "loss_scale": "2", "train_wall": "35", "gb_free": "13.7", "wall": "56094"} [2023-11-02 09:10:24,300][train_inner][INFO] - {"epoch": 74, "update": 73.328, "loss": "2.841", "ntokens": "3237.92", "nsentences": "42.44", "prob_perplexity": "205.757", "code_perplexity": "203.217", "temp": "0.5", "loss_0": "2.736", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.5371", "wps": "17957.4", "ups": "5.55", "wpb": "3237.9", "bsz": "42.4", "num_updates": "297400", "lr": "2.59747e-05", "gnorm": "0.908", "loss_scale": "2", "train_wall": "35", "gb_free": "13.7", "wall": "56131"} [2023-11-02 09:11:00,946][train_inner][INFO] - {"epoch": 74, "update": 73.377, "loss": "2.811", "ntokens": "3192.88", "nsentences": "42.48", "prob_perplexity": "204.507", "code_perplexity": "201.855", "temp": "0.5", "loss_0": "2.706", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.54268", "wps": "17426.5", "ups": "5.46", "wpb": "3192.9", "bsz": "42.5", "num_updates": "297600", "lr": "2.59241e-05", "gnorm": "0.914", "loss_scale": "2", "train_wall": "36", "gb_free": "13.1", "wall": "56167"} [2023-11-02 09:11:37,172][train_inner][INFO] - {"epoch": 74, "update": 73.427, "loss": "2.777", "ntokens": "3173", "nsentences": "43", "prob_perplexity": "205.095", "code_perplexity": "202.486", "temp": "0.5", "loss_0": "2.672", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.55023", "wps": "17519.3", "ups": "5.52", "wpb": "3173", "bsz": "43", "num_updates": "297800", "lr": "2.58734e-05", "gnorm": "0.909", "loss_scale": "2", "train_wall": "36", "gb_free": "13.5", "wall": "56203"} [2023-11-02 09:12:13,648][train_inner][INFO] - {"epoch": 74, "update": 73.476, "loss": "2.812", "ntokens": "3179.84", "nsentences": "42.92", "prob_perplexity": "206.36", "code_perplexity": "203.75", "temp": "0.5", "loss_0": "2.708", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.54344", "wps": "17436.1", "ups": "5.48", "wpb": "3179.8", "bsz": "42.9", "num_updates": "298000", "lr": "2.58228e-05", "gnorm": "0.91", "loss_scale": "2", "train_wall": "36", "gb_free": "14.9", "wall": "56240"} [2023-11-02 09:12:49,850][train_inner][INFO] - {"epoch": 74, "update": 73.525, "loss": "2.793", "ntokens": "3169.08", "nsentences": "43.44", "prob_perplexity": "205.034", "code_perplexity": "202.414", "temp": "0.5", "loss_0": "2.689", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.54718", "wps": "17508.5", "ups": "5.52", "wpb": "3169.1", "bsz": "43.4", "num_updates": "298200", "lr": "2.57722e-05", "gnorm": "0.917", "loss_scale": "2", "train_wall": "36", "gb_free": "13.7", "wall": "56276"} [2023-11-02 09:13:26,197][train_inner][INFO] - {"epoch": 74, "update": 73.575, "loss": "2.747", "ntokens": "3213.4", "nsentences": "45.76", "prob_perplexity": "205.812", "code_perplexity": "203.317", "temp": "0.5", "loss_0": "2.642", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.55644", "wps": "17683.1", "ups": "5.5", "wpb": "3213.4", "bsz": "45.8", "num_updates": "298400", "lr": "2.57215e-05", "gnorm": "0.902", "loss_scale": "2", "train_wall": "36", "gb_free": "13.3", "wall": "56312"} [2023-11-02 09:14:02,901][train_inner][INFO] - {"epoch": 74, "update": 73.624, "loss": "2.855", "ntokens": "3222.48", "nsentences": "40.12", "prob_perplexity": "205.968", "code_perplexity": "203.399", "temp": "0.5", "loss_0": "2.751", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.53319", "wps": "17560.5", "ups": "5.45", "wpb": "3222.5", "bsz": "40.1", "num_updates": "298600", "lr": "2.56709e-05", "gnorm": "0.92", "loss_scale": "2", "train_wall": "36", "gb_free": "12.6", "wall": "56349"} [2023-11-02 09:14:38,978][train_inner][INFO] - {"epoch": 74, "update": 73.673, "loss": "2.784", "ntokens": "3168.96", "nsentences": "44.28", "prob_perplexity": "204.888", "code_perplexity": "202.325", "temp": "0.5", "loss_0": "2.679", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.5494", "wps": "17568.8", "ups": "5.54", "wpb": "3169", "bsz": "44.3", "num_updates": "298800", "lr": "2.56203e-05", "gnorm": "0.918", "loss_scale": "2", "train_wall": "35", "gb_free": "14", "wall": "56385"} [2023-11-02 09:15:15,185][train_inner][INFO] - {"epoch": 74, "update": 73.723, "loss": "2.77", "ntokens": "3216.8", "nsentences": "46.92", "prob_perplexity": "207.221", "code_perplexity": "204.586", "temp": "0.5", "loss_0": "2.665", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.55366", "wps": "17770.1", "ups": "5.52", "wpb": "3216.8", "bsz": "46.9", "num_updates": "299000", "lr": "2.55696e-05", "gnorm": "0.908", "loss_scale": "2", "train_wall": "36", "gb_free": "13", "wall": "56421"} [2023-11-02 09:15:51,770][train_inner][INFO] - {"epoch": 74, "update": 73.772, "loss": "2.77", "ntokens": "3187.08", "nsentences": "45.68", "prob_perplexity": "206.244", "code_perplexity": "203.639", "temp": "0.5", "loss_0": "2.665", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.55333", "wps": "17423.8", "ups": "5.47", "wpb": "3187.1", "bsz": "45.7", "num_updates": "299200", "lr": "2.5519e-05", "gnorm": "0.909", "loss_scale": "2", "train_wall": "36", "gb_free": "15", "wall": "56458"} [2023-11-02 09:16:28,504][train_inner][INFO] - {"epoch": 74, "update": 73.821, "loss": "2.83", "ntokens": "3217.04", "nsentences": "41.8", "prob_perplexity": "205.251", "code_perplexity": "202.658", "temp": "0.5", "loss_0": "2.725", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.53857", "wps": "17516.6", "ups": "5.44", "wpb": "3217", "bsz": "41.8", "num_updates": "299400", "lr": "2.54684e-05", "gnorm": "0.916", "loss_scale": "2", "train_wall": "36", "gb_free": "14.3", "wall": "56495"} [2023-11-02 09:17:05,260][train_inner][INFO] - {"epoch": 74, "update": 73.871, "loss": "2.701", "ntokens": "3173.56", "nsentences": "47.76", "prob_perplexity": "206.084", "code_perplexity": "203.461", "temp": "0.5", "loss_0": "2.597", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.56687", "wps": "17269.4", "ups": "5.44", "wpb": "3173.6", "bsz": "47.8", "num_updates": "299600", "lr": "2.54177e-05", "gnorm": "0.901", "loss_scale": "2", "train_wall": "36", "gb_free": "13.6", "wall": "56531"} [2023-11-02 09:17:41,889][train_inner][INFO] - {"epoch": 74, "update": 73.92, "loss": "2.796", "ntokens": "3164.44", "nsentences": "45.52", "prob_perplexity": "206.112", "code_perplexity": "203.516", "temp": "0.5", "loss_0": "2.692", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.5489", "wps": "17279.5", "ups": "5.46", "wpb": "3164.4", "bsz": "45.5", "num_updates": "299800", "lr": "2.53671e-05", "gnorm": "0.913", "loss_scale": "2", "train_wall": "36", "gb_free": "13.9", "wall": "56568"} [2023-11-02 09:18:18,697][train_inner][INFO] - {"epoch": 74, "update": 73.969, "loss": "2.768", "ntokens": "3173.76", "nsentences": "46.72", "prob_perplexity": "206.006", "code_perplexity": "203.458", "temp": "0.5", "loss_0": "2.663", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.55514", "wps": "17245.9", "ups": "5.43", "wpb": "3173.8", "bsz": "46.7", "num_updates": "300000", "lr": "2.53165e-05", "gnorm": "0.915", "loss_scale": "2", "train_wall": "36", "gb_free": "13.5", "wall": "56605"} [2023-11-02 09:18:18,699][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 09:18:18,700][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 09:18:18,722][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 86 [2023-11-02 09:18:44,638][valid][INFO] - {"epoch": 74, "valid_loss": "2.687", "valid_ntokens": "3169.74", "valid_nsentences": "44.1685", "valid_prob_perplexity": "205.013", "valid_code_perplexity": "202.536", "valid_temp": "0.5", "valid_loss_0": "2.582", "valid_loss_1": "0.098", "valid_loss_2": "0.007", "valid_accuracy": "0.56997", "valid_wps": "55492.8", "valid_wpb": "3169.7", "valid_bsz": "44.2", "valid_num_updates": "300000", "valid_best_loss": "2.646"} [2023-11-02 09:18:44,640][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 74 @ 300000 updates [2023-11-02 09:18:44,642][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_74_300000.pt [2023-11-02 09:18:46,011][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_74_300000.pt [2023-11-02 09:18:46,991][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_74_300000.pt (epoch 74 @ 300000 updates, score 2.687) (writing took 2.3509600339457393 seconds) [2023-11-02 09:19:10,168][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 09:19:10,169][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 09:19:10,187][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 87 [2023-11-02 09:19:36,022][valid][INFO] - {"epoch": 74, "valid_loss": "2.676", "valid_ntokens": "3174.83", "valid_nsentences": "44.1685", "valid_prob_perplexity": "205.173", "valid_code_perplexity": "202.752", "valid_temp": "0.5", "valid_loss_0": "2.572", "valid_loss_1": "0.098", "valid_loss_2": "0.007", "valid_accuracy": "0.57101", "valid_wps": "55800.9", "valid_wpb": "3174.8", "valid_bsz": "44.2", "valid_num_updates": "300125", "valid_best_loss": "2.646"} [2023-11-02 09:19:36,024][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 74 @ 300125 updates [2023-11-02 09:19:36,026][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 09:19:37,449][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 09:19:37,497][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_last.pt (epoch 74 @ 300125 updates, score 2.676) (writing took 1.4730949839577079 seconds) [2023-11-02 09:19:37,498][fairseq_cli.train][INFO] - end of epoch 74 (average epoch stats below) [2023-11-02 09:19:37,500][train][INFO] - {"epoch": 74, "train_loss": "2.789", "train_ntokens": "3191.91", "train_nsentences": "44.2614", "train_prob_perplexity": "205.697", "train_code_perplexity": "203.102", "train_temp": "0.5", "train_loss_0": "2.685", "train_loss_1": "0.098", "train_loss_2": "0.007", "train_accuracy": "0.54855", "train_wps": "16312", "train_ups": "5.11", "train_wpb": "3191.9", "train_bsz": "44.3", "train_num_updates": "300125", "train_lr": "2.52848e-05", "train_gnorm": "0.911", "train_loss_scale": "2", "train_train_wall": "724", "train_gb_free": "14.3", "train_wall": "56684"} [2023-11-02 09:19:37,503][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 09:19:37,523][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 75 [2023-11-02 09:19:37,692][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 09:19:37,749][fairseq.trainer][INFO] - begin training epoch 75 [2023-11-02 09:19:37,750][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 09:19:51,323][train_inner][INFO] - {"epoch": 75, "update": 74.018, "loss": "2.824", "ntokens": "3190", "nsentences": "42.08", "prob_perplexity": "205.249", "code_perplexity": "202.684", "temp": "0.5", "loss_0": "2.719", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.54029", "wps": "6888.1", "ups": "2.16", "wpb": "3190", "bsz": "42.1", "num_updates": "300200", "lr": "2.52658e-05", "gnorm": "0.926", "loss_scale": "2", "train_wall": "36", "gb_free": "12.8", "wall": "56698"} [2023-11-02 09:20:27,157][train_inner][INFO] - {"epoch": 75, "update": 74.068, "loss": "2.775", "ntokens": "3220.44", "nsentences": "45.12", "prob_perplexity": "205.737", "code_perplexity": "203.144", "temp": "0.5", "loss_0": "2.67", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.55126", "wps": "17975.3", "ups": "5.58", "wpb": "3220.4", "bsz": "45.1", "num_updates": "300400", "lr": "2.52152e-05", "gnorm": "0.906", "loss_scale": "2", "train_wall": "35", "gb_free": "13.6", "wall": "56733"} [2023-11-02 09:21:02,945][train_inner][INFO] - {"epoch": 75, "update": 74.117, "loss": "2.817", "ntokens": "3182.12", "nsentences": "43.68", "prob_perplexity": "205.411", "code_perplexity": "202.844", "temp": "0.5", "loss_0": "2.713", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.54407", "wps": "17783.9", "ups": "5.59", "wpb": "3182.1", "bsz": "43.7", "num_updates": "300600", "lr": "2.51646e-05", "gnorm": "0.922", "loss_scale": "2", "train_wall": "35", "gb_free": "13.5", "wall": "56769"} [2023-11-02 09:21:39,066][train_inner][INFO] - {"epoch": 75, "update": 74.166, "loss": "2.719", "ntokens": "3202.76", "nsentences": "46.2", "prob_perplexity": "206.363", "code_perplexity": "203.813", "temp": "0.5", "loss_0": "2.614", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.56186", "wps": "17734.5", "ups": "5.54", "wpb": "3202.8", "bsz": "46.2", "num_updates": "300800", "lr": "2.51139e-05", "gnorm": "0.899", "loss_scale": "2", "train_wall": "35", "gb_free": "12.9", "wall": "56805"} [2023-11-02 09:22:15,172][train_inner][INFO] - {"epoch": 75, "update": 74.216, "loss": "2.705", "ntokens": "3159.48", "nsentences": "47.4", "prob_perplexity": "206.6", "code_perplexity": "204.039", "temp": "0.5", "loss_0": "2.6", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.56601", "wps": "17502.3", "ups": "5.54", "wpb": "3159.5", "bsz": "47.4", "num_updates": "301000", "lr": "2.50633e-05", "gnorm": "0.91", "loss_scale": "2", "train_wall": "35", "gb_free": "13.3", "wall": "56841"} [2023-11-02 09:22:51,321][train_inner][INFO] - {"epoch": 75, "update": 74.265, "loss": "2.755", "ntokens": "3169.72", "nsentences": "43.12", "prob_perplexity": "205.625", "code_perplexity": "203.071", "temp": "0.5", "loss_0": "2.651", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.55257", "wps": "17538.1", "ups": "5.53", "wpb": "3169.7", "bsz": "43.1", "num_updates": "301200", "lr": "2.50127e-05", "gnorm": "0.917", "loss_scale": "2", "train_wall": "36", "gb_free": "14.4", "wall": "56878"} [2023-11-02 09:23:27,584][train_inner][INFO] - {"epoch": 75, "update": 74.314, "loss": "2.716", "ntokens": "3186", "nsentences": "45.88", "prob_perplexity": "205.835", "code_perplexity": "203.241", "temp": "0.5", "loss_0": "2.612", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.56179", "wps": "17580.2", "ups": "5.52", "wpb": "3186", "bsz": "45.9", "num_updates": "301400", "lr": "2.4962e-05", "gnorm": "0.914", "loss_scale": "2", "train_wall": "36", "gb_free": "13.8", "wall": "56914"} [2023-11-02 09:24:04,295][train_inner][INFO] - {"epoch": 75, "update": 74.364, "loss": "2.761", "ntokens": "3186.2", "nsentences": "45.36", "prob_perplexity": "205.606", "code_perplexity": "202.989", "temp": "0.5", "loss_0": "2.657", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.55394", "wps": "17359.2", "ups": "5.45", "wpb": "3186.2", "bsz": "45.4", "num_updates": "301600", "lr": "2.49114e-05", "gnorm": "0.909", "loss_scale": "2", "train_wall": "36", "gb_free": "13.1", "wall": "56951"} [2023-11-02 09:24:40,853][train_inner][INFO] - {"epoch": 75, "update": 74.413, "loss": "2.761", "ntokens": "3216.8", "nsentences": "45.16", "prob_perplexity": "206.059", "code_perplexity": "203.532", "temp": "0.5", "loss_0": "2.657", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.55366", "wps": "17599.4", "ups": "5.47", "wpb": "3216.8", "bsz": "45.2", "num_updates": "301800", "lr": "2.48608e-05", "gnorm": "0.91", "loss_scale": "2", "train_wall": "36", "gb_free": "13.6", "wall": "56987"} [2023-11-02 09:25:16,930][train_inner][INFO] - {"epoch": 75, "update": 74.462, "loss": "2.813", "ntokens": "3199.16", "nsentences": "42.56", "prob_perplexity": "206.366", "code_perplexity": "203.844", "temp": "0.5", "loss_0": "2.709", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.54205", "wps": "17736.2", "ups": "5.54", "wpb": "3199.2", "bsz": "42.6", "num_updates": "302000", "lr": "2.48101e-05", "gnorm": "0.917", "loss_scale": "2", "train_wall": "35", "gb_free": "13.9", "wall": "57023"} [2023-11-02 09:25:53,178][train_inner][INFO] - {"epoch": 75, "update": 74.512, "loss": "2.766", "ntokens": "3186.52", "nsentences": "46.64", "prob_perplexity": "205.775", "code_perplexity": "203.267", "temp": "0.5", "loss_0": "2.662", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.55494", "wps": "17583", "ups": "5.52", "wpb": "3186.5", "bsz": "46.6", "num_updates": "302200", "lr": "2.47595e-05", "gnorm": "0.911", "loss_scale": "2", "train_wall": "36", "gb_free": "13.9", "wall": "57059"} [2023-11-02 09:26:29,394][train_inner][INFO] - {"epoch": 75, "update": 74.561, "loss": "2.761", "ntokens": "3169.32", "nsentences": "44.72", "prob_perplexity": "205.802", "code_perplexity": "203.317", "temp": "0.5", "loss_0": "2.657", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.55401", "wps": "17503", "ups": "5.52", "wpb": "3169.3", "bsz": "44.7", "num_updates": "302400", "lr": "2.47089e-05", "gnorm": "0.922", "loss_scale": "2", "train_wall": "36", "gb_free": "14.8", "wall": "57096"} [2023-11-02 09:27:06,014][train_inner][INFO] - {"epoch": 75, "update": 74.61, "loss": "2.784", "ntokens": "3200.16", "nsentences": "44.36", "prob_perplexity": "206.14", "code_perplexity": "203.658", "temp": "0.5", "loss_0": "2.68", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.54947", "wps": "17478.9", "ups": "5.46", "wpb": "3200.2", "bsz": "44.4", "num_updates": "302600", "lr": "2.46582e-05", "gnorm": "0.915", "loss_scale": "2", "train_wall": "36", "gb_free": "13.3", "wall": "57132"} [2023-11-02 09:27:42,084][train_inner][INFO] - {"epoch": 75, "update": 74.66, "loss": "2.811", "ntokens": "3172.2", "nsentences": "42.84", "prob_perplexity": "205.769", "code_perplexity": "203.151", "temp": "0.5", "loss_0": "2.707", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.54437", "wps": "17590.5", "ups": "5.55", "wpb": "3172.2", "bsz": "42.8", "num_updates": "302800", "lr": "2.46076e-05", "gnorm": "0.919", "loss_scale": "2", "train_wall": "35", "gb_free": "15.5", "wall": "57168"} [2023-11-02 09:28:18,025][train_inner][INFO] - {"epoch": 75, "update": 74.709, "loss": "2.755", "ntokens": "3165.64", "nsentences": "43.6", "prob_perplexity": "207.319", "code_perplexity": "204.77", "temp": "0.5", "loss_0": "2.651", "loss_1": "0.097", "loss_2": "0.007", "accuracy": "0.55424", "wps": "17617", "ups": "5.57", "wpb": "3165.6", "bsz": "43.6", "num_updates": "303000", "lr": "2.4557e-05", "gnorm": "0.918", "loss_scale": "2", "train_wall": "35", "gb_free": "13.1", "wall": "57204"} [2023-11-02 09:28:53,967][train_inner][INFO] - {"epoch": 75, "update": 74.758, "loss": "2.785", "ntokens": "3173.72", "nsentences": "43.16", "prob_perplexity": "206.07", "code_perplexity": "203.49", "temp": "0.5", "loss_0": "2.681", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.54744", "wps": "17661.1", "ups": "5.56", "wpb": "3173.7", "bsz": "43.2", "num_updates": "303200", "lr": "2.45063e-05", "gnorm": "0.913", "loss_scale": "2", "train_wall": "35", "gb_free": "13.4", "wall": "57240"} [2023-11-02 09:29:30,496][train_inner][INFO] - {"epoch": 75, "update": 74.807, "loss": "2.8", "ntokens": "3161.04", "nsentences": "43.6", "prob_perplexity": "207.199", "code_perplexity": "204.597", "temp": "0.5", "loss_0": "2.696", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.54681", "wps": "17308.4", "ups": "5.48", "wpb": "3161", "bsz": "43.6", "num_updates": "303400", "lr": "2.44557e-05", "gnorm": "0.916", "loss_scale": "2", "train_wall": "36", "gb_free": "13.5", "wall": "57277"} [2023-11-02 09:30:06,841][train_inner][INFO] - {"epoch": 75, "update": 74.857, "loss": "2.809", "ntokens": "3185.64", "nsentences": "43.36", "prob_perplexity": "206.76", "code_perplexity": "204.224", "temp": "0.5", "loss_0": "2.705", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.54463", "wps": "17530.9", "ups": "5.5", "wpb": "3185.6", "bsz": "43.4", "num_updates": "303600", "lr": "2.44051e-05", "gnorm": "0.921", "loss_scale": "2", "train_wall": "36", "gb_free": "12.8", "wall": "57313"} [2023-11-02 09:30:42,909][train_inner][INFO] - {"epoch": 75, "update": 74.906, "loss": "2.829", "ntokens": "3210.04", "nsentences": "42.52", "prob_perplexity": "207.082", "code_perplexity": "204.492", "temp": "0.5", "loss_0": "2.725", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.53992", "wps": "17800.8", "ups": "5.55", "wpb": "3210", "bsz": "42.5", "num_updates": "303800", "lr": "2.43544e-05", "gnorm": "0.913", "loss_scale": "2", "train_wall": "35", "gb_free": "13.6", "wall": "57349"} [2023-11-02 09:31:18,863][train_inner][INFO] - {"epoch": 75, "update": 74.955, "loss": "2.836", "ntokens": "3194.08", "nsentences": "42.36", "prob_perplexity": "206.545", "code_perplexity": "203.948", "temp": "0.5", "loss_0": "2.732", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.53851", "wps": "17768.5", "ups": "5.56", "wpb": "3194.1", "bsz": "42.4", "num_updates": "304000", "lr": "2.43038e-05", "gnorm": "0.926", "loss_scale": "2", "train_wall": "35", "gb_free": "13.1", "wall": "57385"} [2023-11-02 09:31:51,175][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 09:31:51,176][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 09:31:51,196][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 88 [2023-11-02 09:32:16,858][valid][INFO] - {"epoch": 75, "valid_loss": "2.641", "valid_ntokens": "3145.47", "valid_nsentences": "44.1685", "valid_prob_perplexity": "206.142", "valid_code_perplexity": "203.689", "valid_temp": "0.5", "valid_loss_0": "2.536", "valid_loss_1": "0.098", "valid_loss_2": "0.007", "valid_accuracy": "0.57724", "valid_wps": "55587.9", "valid_wpb": "3145.5", "valid_bsz": "44.2", "valid_num_updates": "304181", "valid_best_loss": "2.641"} [2023-11-02 09:32:16,860][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 75 @ 304181 updates [2023-11-02 09:32:16,862][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-02 09:32:18,300][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-02 09:32:19,281][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_best.pt (epoch 75 @ 304181 updates, score 2.641) (writing took 2.4214616222307086 seconds) [2023-11-02 09:32:19,282][fairseq_cli.train][INFO] - end of epoch 75 (average epoch stats below) [2023-11-02 09:32:19,285][train][INFO] - {"epoch": 75, "train_loss": "2.777", "train_ntokens": "3186.4", "train_nsentences": "44.2682", "train_prob_perplexity": "206.205", "train_code_perplexity": "203.644", "train_temp": "0.5", "train_loss_0": "2.672", "train_loss_1": "0.098", "train_loss_2": "0.007", "train_accuracy": "0.55055", "train_wps": "16965.6", "train_ups": "5.32", "train_wpb": "3186.4", "train_bsz": "44.3", "train_num_updates": "304181", "train_lr": "2.4258e-05", "train_gnorm": "0.916", "train_loss_scale": "2", "train_train_wall": "720", "train_gb_free": "13.2", "train_wall": "57446"} [2023-11-02 09:32:19,287][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 09:32:19,322][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 76 [2023-11-02 09:32:19,541][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 09:32:19,601][fairseq.trainer][INFO] - begin training epoch 76 [2023-11-02 09:32:19,602][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 09:32:23,199][train_inner][INFO] - {"epoch": 76, "update": 75.005, "loss": "2.758", "ntokens": "3189.92", "nsentences": "45.08", "prob_perplexity": "206.212", "code_perplexity": "203.602", "temp": "0.5", "loss_0": "2.654", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.55388", "wps": "9916.8", "ups": "3.11", "wpb": "3189.9", "bsz": "45.1", "num_updates": "304200", "lr": "2.42532e-05", "gnorm": "0.938", "loss_scale": "2", "train_wall": "35", "gb_free": "12.6", "wall": "57449"} [2023-11-02 09:32:59,103][train_inner][INFO] - {"epoch": 76, "update": 75.054, "loss": "2.811", "ntokens": "3207.92", "nsentences": "43", "prob_perplexity": "207", "code_perplexity": "204.389", "temp": "0.5", "loss_0": "2.707", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.543", "wps": "17870.8", "ups": "5.57", "wpb": "3207.9", "bsz": "43", "num_updates": "304400", "lr": "2.42025e-05", "gnorm": "0.912", "loss_scale": "2", "train_wall": "35", "gb_free": "14.5", "wall": "57485"} [2023-11-02 09:33:35,047][train_inner][INFO] - {"epoch": 76, "update": 75.103, "loss": "2.801", "ntokens": "3166.88", "nsentences": "44.08", "prob_perplexity": "205.866", "code_perplexity": "203.275", "temp": "0.5", "loss_0": "2.697", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.54723", "wps": "17622.2", "ups": "5.56", "wpb": "3166.9", "bsz": "44.1", "num_updates": "304600", "lr": "2.41519e-05", "gnorm": "0.915", "loss_scale": "2", "train_wall": "35", "gb_free": "14.2", "wall": "57521"} [2023-11-02 09:34:11,017][train_inner][INFO] - {"epoch": 76, "update": 75.153, "loss": "2.796", "ntokens": "3205.44", "nsentences": "41.44", "prob_perplexity": "206.438", "code_perplexity": "203.883", "temp": "0.5", "loss_0": "2.691", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.544", "wps": "17823.9", "ups": "5.56", "wpb": "3205.4", "bsz": "41.4", "num_updates": "304800", "lr": "2.41013e-05", "gnorm": "0.919", "loss_scale": "2", "train_wall": "35", "gb_free": "13.2", "wall": "57557"} [2023-11-02 09:34:47,802][train_inner][INFO] - {"epoch": 76, "update": 75.202, "loss": "2.772", "ntokens": "3228.6", "nsentences": "44.2", "prob_perplexity": "205.979", "code_perplexity": "203.444", "temp": "0.5", "loss_0": "2.668", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.54916", "wps": "17555", "ups": "5.44", "wpb": "3228.6", "bsz": "44.2", "num_updates": "305000", "lr": "2.40506e-05", "gnorm": "0.92", "loss_scale": "2", "train_wall": "36", "gb_free": "12.2", "wall": "57594"} [2023-11-02 09:35:23,836][train_inner][INFO] - {"epoch": 76, "update": 75.251, "loss": "2.775", "ntokens": "3172.28", "nsentences": "43.04", "prob_perplexity": "206.634", "code_perplexity": "204.071", "temp": "0.5", "loss_0": "2.67", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.55002", "wps": "17608.5", "ups": "5.55", "wpb": "3172.3", "bsz": "43", "num_updates": "305200", "lr": "2.4e-05", "gnorm": "0.914", "loss_scale": "2", "train_wall": "35", "gb_free": "12.8", "wall": "57630"} [2023-11-02 09:36:00,069][train_inner][INFO] - {"epoch": 76, "update": 75.301, "loss": "2.792", "ntokens": "3213.92", "nsentences": "42.36", "prob_perplexity": "206.262", "code_perplexity": "203.716", "temp": "0.5", "loss_0": "2.688", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.54462", "wps": "17741.3", "ups": "5.52", "wpb": "3213.9", "bsz": "42.4", "num_updates": "305400", "lr": "2.39494e-05", "gnorm": "0.927", "loss_scale": "2", "train_wall": "36", "gb_free": "14.4", "wall": "57666"} [2023-11-02 09:36:36,172][train_inner][INFO] - {"epoch": 76, "update": 75.35, "loss": "2.803", "ntokens": "3195.4", "nsentences": "43.48", "prob_perplexity": "206.486", "code_perplexity": "203.89", "temp": "0.5", "loss_0": "2.699", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.54535", "wps": "17702.5", "ups": "5.54", "wpb": "3195.4", "bsz": "43.5", "num_updates": "305600", "lr": "2.38987e-05", "gnorm": "0.923", "loss_scale": "2", "train_wall": "35", "gb_free": "12.6", "wall": "57702"} [2023-11-02 09:37:12,418][train_inner][INFO] - {"epoch": 76, "update": 75.399, "loss": "2.772", "ntokens": "3210.28", "nsentences": "43.6", "prob_perplexity": "206.858", "code_perplexity": "204.345", "temp": "0.5", "loss_0": "2.668", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.55002", "wps": "17722.8", "ups": "5.52", "wpb": "3210.3", "bsz": "43.6", "num_updates": "305800", "lr": "2.38481e-05", "gnorm": "0.908", "loss_scale": "2", "train_wall": "36", "gb_free": "13.9", "wall": "57739"} [2023-11-02 09:37:49,193][train_inner][INFO] - {"epoch": 76, "update": 75.448, "loss": "2.834", "ntokens": "3212.16", "nsentences": "42.36", "prob_perplexity": "207.43", "code_perplexity": "204.813", "temp": "0.5", "loss_0": "2.73", "loss_1": "0.097", "loss_2": "0.007", "accuracy": "0.53927", "wps": "17470.3", "ups": "5.44", "wpb": "3212.2", "bsz": "42.4", "num_updates": "306000", "lr": "2.37975e-05", "gnorm": "0.917", "loss_scale": "2", "train_wall": "36", "gb_free": "13.5", "wall": "57775"} [2023-11-02 09:38:25,426][train_inner][INFO] - {"epoch": 76, "update": 75.498, "loss": "2.803", "ntokens": "3178.68", "nsentences": "42.4", "prob_perplexity": "207.238", "code_perplexity": "204.718", "temp": "0.5", "loss_0": "2.699", "loss_1": "0.097", "loss_2": "0.007", "accuracy": "0.54419", "wps": "17546.9", "ups": "5.52", "wpb": "3178.7", "bsz": "42.4", "num_updates": "306200", "lr": "2.37468e-05", "gnorm": "0.93", "loss_scale": "2", "train_wall": "36", "gb_free": "13.4", "wall": "57812"} [2023-11-02 09:39:01,615][train_inner][INFO] - {"epoch": 76, "update": 75.547, "loss": "2.759", "ntokens": "3152.72", "nsentences": "43.32", "prob_perplexity": "206.078", "code_perplexity": "203.478", "temp": "0.5", "loss_0": "2.655", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.55216", "wps": "17424.6", "ups": "5.53", "wpb": "3152.7", "bsz": "43.3", "num_updates": "306400", "lr": "2.36962e-05", "gnorm": "0.928", "loss_scale": "2", "train_wall": "36", "gb_free": "13.4", "wall": "57848"} [2023-11-02 09:39:37,815][train_inner][INFO] - {"epoch": 76, "update": 75.596, "loss": "2.757", "ntokens": "3191.6", "nsentences": "44.92", "prob_perplexity": "207.076", "code_perplexity": "204.454", "temp": "0.5", "loss_0": "2.653", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.55447", "wps": "17634.1", "ups": "5.53", "wpb": "3191.6", "bsz": "44.9", "num_updates": "306600", "lr": "2.36456e-05", "gnorm": "0.922", "loss_scale": "2", "train_wall": "36", "gb_free": "14.7", "wall": "57884"} [2023-11-02 09:40:13,894][train_inner][INFO] - {"epoch": 76, "update": 75.646, "loss": "2.743", "ntokens": "3171.32", "nsentences": "45.36", "prob_perplexity": "206.668", "code_perplexity": "204.093", "temp": "0.5", "loss_0": "2.639", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.55758", "wps": "17581.2", "ups": "5.54", "wpb": "3171.3", "bsz": "45.4", "num_updates": "306800", "lr": "2.35949e-05", "gnorm": "0.92", "loss_scale": "2", "train_wall": "35", "gb_free": "15.8", "wall": "57920"} [2023-11-02 09:40:49,847][train_inner][INFO] - {"epoch": 76, "update": 75.695, "loss": "2.752", "ntokens": "3186.2", "nsentences": "46.56", "prob_perplexity": "207.454", "code_perplexity": "204.878", "temp": "0.5", "loss_0": "2.648", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55674", "wps": "17725.2", "ups": "5.56", "wpb": "3186.2", "bsz": "46.6", "num_updates": "307000", "lr": "2.35443e-05", "gnorm": "0.906", "loss_scale": "2", "train_wall": "35", "gb_free": "14.3", "wall": "57956"} [2023-11-02 09:41:25,491][train_inner][INFO] - {"epoch": 76, "update": 75.744, "loss": "2.784", "ntokens": "3169.36", "nsentences": "44.84", "prob_perplexity": "207.086", "code_perplexity": "204.495", "temp": "0.5", "loss_0": "2.68", "loss_1": "0.098", "loss_2": "0.006", "accuracy": "0.55028", "wps": "17784.2", "ups": "5.61", "wpb": "3169.4", "bsz": "44.8", "num_updates": "307200", "lr": "2.34937e-05", "gnorm": "0.92", "loss_scale": "2", "train_wall": "35", "gb_free": "13.8", "wall": "57992"} [2023-11-02 09:42:01,316][train_inner][INFO] - {"epoch": 76, "update": 75.794, "loss": "2.767", "ntokens": "3206.32", "nsentences": "46.64", "prob_perplexity": "207.495", "code_perplexity": "204.928", "temp": "0.5", "loss_0": "2.664", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.554", "wps": "17901.4", "ups": "5.58", "wpb": "3206.3", "bsz": "46.6", "num_updates": "307400", "lr": "2.3443e-05", "gnorm": "0.919", "loss_scale": "2", "train_wall": "35", "gb_free": "13.3", "wall": "58028"} [2023-11-02 09:42:37,365][train_inner][INFO] - {"epoch": 76, "update": 75.843, "loss": "2.744", "ntokens": "3193.76", "nsentences": "45.52", "prob_perplexity": "207.316", "code_perplexity": "204.765", "temp": "0.5", "loss_0": "2.64", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55686", "wps": "17720.1", "ups": "5.55", "wpb": "3193.8", "bsz": "45.5", "num_updates": "307600", "lr": "2.33924e-05", "gnorm": "0.931", "loss_scale": "2", "train_wall": "35", "gb_free": "15.5", "wall": "58064"} [2023-11-02 09:43:13,627][train_inner][INFO] - {"epoch": 76, "update": 75.892, "loss": "2.704", "ntokens": "3188.56", "nsentences": "48.2", "prob_perplexity": "207.375", "code_perplexity": "204.786", "temp": "0.5", "loss_0": "2.6", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.56617", "wps": "17587.3", "ups": "5.52", "wpb": "3188.6", "bsz": "48.2", "num_updates": "307800", "lr": "2.33418e-05", "gnorm": "0.915", "loss_scale": "2", "train_wall": "36", "gb_free": "14.3", "wall": "58100"} [2023-11-02 09:43:50,550][train_inner][INFO] - {"epoch": 76, "update": 75.942, "loss": "2.801", "ntokens": "3207.76", "nsentences": "44", "prob_perplexity": "206.405", "code_perplexity": "203.893", "temp": "0.5", "loss_0": "2.697", "loss_1": "0.098", "loss_2": "0.007", "accuracy": "0.54549", "wps": "17376.1", "ups": "5.42", "wpb": "3207.8", "bsz": "44", "num_updates": "308000", "lr": "2.32911e-05", "gnorm": "0.918", "loss_scale": "2", "train_wall": "36", "gb_free": "14.9", "wall": "58137"} [2023-11-02 09:44:27,019][train_inner][INFO] - {"epoch": 76, "update": 75.991, "loss": "2.745", "ntokens": "3188.6", "nsentences": "45.48", "prob_perplexity": "205.784", "code_perplexity": "203.212", "temp": "0.5", "loss_0": "2.641", "loss_1": "0.098", "loss_2": "0.006", "accuracy": "0.55629", "wps": "17487.7", "ups": "5.48", "wpb": "3188.6", "bsz": "45.5", "num_updates": "308200", "lr": "2.32405e-05", "gnorm": "0.933", "loss_scale": "2", "train_wall": "36", "gb_free": "13.3", "wall": "58173"} [2023-11-02 09:44:33,696][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 09:44:33,698][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 09:44:33,715][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 89 [2023-11-02 09:44:59,572][valid][INFO] - {"epoch": 76, "valid_loss": "2.656", "valid_ntokens": "3166.86", "valid_nsentences": "44.1685", "valid_prob_perplexity": "206.509", "valid_code_perplexity": "204.021", "valid_temp": "0.5", "valid_loss_0": "2.552", "valid_loss_1": "0.098", "valid_loss_2": "0.007", "valid_accuracy": "0.57435", "valid_wps": "55569.2", "valid_wpb": "3166.9", "valid_bsz": "44.2", "valid_num_updates": "308237", "valid_best_loss": "2.641"} [2023-11-02 09:44:59,575][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 76 @ 308237 updates [2023-11-02 09:44:59,576][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 09:45:01,007][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 09:45:01,062][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_last.pt (epoch 76 @ 308237 updates, score 2.656) (writing took 1.4876721380278468 seconds) [2023-11-02 09:45:01,063][fairseq_cli.train][INFO] - end of epoch 76 (average epoch stats below) [2023-11-02 09:45:01,065][train][INFO] - {"epoch": 76, "train_loss": "2.777", "train_ntokens": "3193.07", "train_nsentences": "44.2682", "train_prob_perplexity": "206.768", "train_code_perplexity": "204.196", "train_temp": "0.5", "train_loss_0": "2.672", "train_loss_1": "0.098", "train_loss_2": "0.007", "train_accuracy": "0.55026", "train_wps": "17001.1", "train_ups": "5.32", "train_wpb": "3193.1", "train_bsz": "44.3", "train_num_updates": "308237", "train_lr": "2.32311e-05", "train_gnorm": "0.92", "train_loss_scale": "2", "train_train_wall": "721", "train_gb_free": "13.2", "train_wall": "58207"} [2023-11-02 09:45:01,068][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 09:45:01,100][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 77 [2023-11-02 09:45:01,277][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 09:45:01,344][fairseq.trainer][INFO] - begin training epoch 77 [2023-11-02 09:45:01,345][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 09:45:30,472][train_inner][INFO] - {"epoch": 77, "update": 76.04, "loss": "2.822", "ntokens": "3189.32", "nsentences": "43.4", "prob_perplexity": "207.426", "code_perplexity": "204.813", "temp": "0.5", "loss_0": "2.718", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.54286", "wps": "10053", "ups": "3.15", "wpb": "3189.3", "bsz": "43.4", "num_updates": "308400", "lr": "2.31899e-05", "gnorm": "0.933", "loss_scale": "2", "train_wall": "35", "gb_free": "12.9", "wall": "58237"} [2023-11-02 09:46:06,195][train_inner][INFO] - {"epoch": 77, "update": 76.089, "loss": "2.825", "ntokens": "3204.16", "nsentences": "42.36", "prob_perplexity": "207.118", "code_perplexity": "204.537", "temp": "0.5", "loss_0": "2.721", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.53998", "wps": "17939.7", "ups": "5.6", "wpb": "3204.2", "bsz": "42.4", "num_updates": "308600", "lr": "2.31392e-05", "gnorm": "0.926", "loss_scale": "2", "train_wall": "35", "gb_free": "13.7", "wall": "58272"} [2023-11-02 09:46:41,945][train_inner][INFO] - {"epoch": 77, "update": 76.139, "loss": "2.802", "ntokens": "3183.28", "nsentences": "42.32", "prob_perplexity": "207.21", "code_perplexity": "204.56", "temp": "0.5", "loss_0": "2.698", "loss_1": "0.098", "loss_2": "0.006", "accuracy": "0.54458", "wps": "17809.7", "ups": "5.59", "wpb": "3183.3", "bsz": "42.3", "num_updates": "308800", "lr": "2.30886e-05", "gnorm": "0.938", "loss_scale": "2", "train_wall": "35", "gb_free": "13.3", "wall": "58308"} [2023-11-02 09:47:18,102][train_inner][INFO] - {"epoch": 77, "update": 76.188, "loss": "2.824", "ntokens": "3164.56", "nsentences": "42.28", "prob_perplexity": "207.419", "code_perplexity": "204.753", "temp": "0.5", "loss_0": "2.72", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.54201", "wps": "17505.8", "ups": "5.53", "wpb": "3164.6", "bsz": "42.3", "num_updates": "309000", "lr": "2.3038e-05", "gnorm": "0.919", "loss_scale": "2", "train_wall": "36", "gb_free": "13.9", "wall": "58344"} [2023-11-02 09:47:53,785][train_inner][INFO] - {"epoch": 77, "update": 76.237, "loss": "2.791", "ntokens": "3203.96", "nsentences": "44.04", "prob_perplexity": "207.492", "code_perplexity": "204.844", "temp": "0.5", "loss_0": "2.687", "loss_1": "0.097", "loss_2": "0.007", "accuracy": "0.54647", "wps": "17958.6", "ups": "5.61", "wpb": "3204", "bsz": "44", "num_updates": "309200", "lr": "2.29873e-05", "gnorm": "0.922", "loss_scale": "2", "train_wall": "35", "gb_free": "13.4", "wall": "58380"} [2023-11-02 09:48:30,265][train_inner][INFO] - {"epoch": 77, "update": 76.287, "loss": "2.806", "ntokens": "3212.08", "nsentences": "45.24", "prob_perplexity": "207.905", "code_perplexity": "205.271", "temp": "0.5", "loss_0": "2.702", "loss_1": "0.097", "loss_2": "0.007", "accuracy": "0.54681", "wps": "17611.2", "ups": "5.48", "wpb": "3212.1", "bsz": "45.2", "num_updates": "309400", "lr": "2.29367e-05", "gnorm": "0.923", "loss_scale": "2", "train_wall": "36", "gb_free": "14.2", "wall": "58416"} [2023-11-02 09:49:06,753][train_inner][INFO] - {"epoch": 77, "update": 76.336, "loss": "2.747", "ntokens": "3200.48", "nsentences": "45.24", "prob_perplexity": "208.313", "code_perplexity": "205.71", "temp": "0.5", "loss_0": "2.643", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55646", "wps": "17544", "ups": "5.48", "wpb": "3200.5", "bsz": "45.2", "num_updates": "309600", "lr": "2.28861e-05", "gnorm": "0.915", "loss_scale": "2", "train_wall": "36", "gb_free": "15.5", "wall": "58453"} [2023-11-02 09:49:43,252][train_inner][INFO] - {"epoch": 77, "update": 76.385, "loss": "2.734", "ntokens": "3196.28", "nsentences": "46.96", "prob_perplexity": "206.496", "code_perplexity": "203.944", "temp": "0.5", "loss_0": "2.63", "loss_1": "0.098", "loss_2": "0.006", "accuracy": "0.55962", "wps": "17515", "ups": "5.48", "wpb": "3196.3", "bsz": "47", "num_updates": "309800", "lr": "2.28354e-05", "gnorm": "0.919", "loss_scale": "2", "train_wall": "36", "gb_free": "14.3", "wall": "58489"} [2023-11-02 09:50:19,382][train_inner][INFO] - {"epoch": 77, "update": 76.435, "loss": "2.796", "ntokens": "3213.6", "nsentences": "44.12", "prob_perplexity": "207.667", "code_perplexity": "205.097", "temp": "0.5", "loss_0": "2.692", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.54678", "wps": "17790.1", "ups": "5.54", "wpb": "3213.6", "bsz": "44.1", "num_updates": "310000", "lr": "2.27848e-05", "gnorm": "0.923", "loss_scale": "2", "train_wall": "35", "gb_free": "12.4", "wall": "58526"} [2023-11-02 09:50:55,860][train_inner][INFO] - {"epoch": 77, "update": 76.484, "loss": "2.775", "ntokens": "3239.96", "nsentences": "44.4", "prob_perplexity": "207.132", "code_perplexity": "204.577", "temp": "0.5", "loss_0": "2.671", "loss_1": "0.098", "loss_2": "0.006", "accuracy": "0.54983", "wps": "17764.6", "ups": "5.48", "wpb": "3240", "bsz": "44.4", "num_updates": "310200", "lr": "2.27342e-05", "gnorm": "0.907", "loss_scale": "2", "train_wall": "36", "gb_free": "13.4", "wall": "58562"} [2023-11-02 09:51:32,286][train_inner][INFO] - {"epoch": 77, "update": 76.533, "loss": "2.722", "ntokens": "3176.28", "nsentences": "44.64", "prob_perplexity": "206.182", "code_perplexity": "203.595", "temp": "0.5", "loss_0": "2.618", "loss_1": "0.098", "loss_2": "0.006", "accuracy": "0.56059", "wps": "17441", "ups": "5.49", "wpb": "3176.3", "bsz": "44.6", "num_updates": "310400", "lr": "2.26835e-05", "gnorm": "0.925", "loss_scale": "2", "train_wall": "36", "gb_free": "14.8", "wall": "58599"} [2023-11-02 09:52:08,670][train_inner][INFO] - {"epoch": 77, "update": 76.583, "loss": "2.743", "ntokens": "3169.88", "nsentences": "45.48", "prob_perplexity": "207.28", "code_perplexity": "204.638", "temp": "0.5", "loss_0": "2.639", "loss_1": "0.098", "loss_2": "0.006", "accuracy": "0.55748", "wps": "17425.5", "ups": "5.5", "wpb": "3169.9", "bsz": "45.5", "num_updates": "310600", "lr": "2.26329e-05", "gnorm": "0.917", "loss_scale": "2", "train_wall": "36", "gb_free": "14.1", "wall": "58635"} [2023-11-02 09:52:45,111][train_inner][INFO] - {"epoch": 77, "update": 76.632, "loss": "2.733", "ntokens": "3160.68", "nsentences": "44.8", "prob_perplexity": "206.786", "code_perplexity": "204.199", "temp": "0.5", "loss_0": "2.629", "loss_1": "0.098", "loss_2": "0.006", "accuracy": "0.55797", "wps": "17348.1", "ups": "5.49", "wpb": "3160.7", "bsz": "44.8", "num_updates": "310800", "lr": "2.25823e-05", "gnorm": "0.915", "loss_scale": "2", "train_wall": "36", "gb_free": "13.9", "wall": "58671"} [2023-11-02 09:53:21,444][train_inner][INFO] - {"epoch": 77, "update": 76.681, "loss": "2.767", "ntokens": "3185.56", "nsentences": "44.6", "prob_perplexity": "208", "code_perplexity": "205.427", "temp": "0.5", "loss_0": "2.663", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55305", "wps": "17536.6", "ups": "5.51", "wpb": "3185.6", "bsz": "44.6", "num_updates": "311000", "lr": "2.25316e-05", "gnorm": "0.921", "loss_scale": "2", "train_wall": "36", "gb_free": "13.3", "wall": "58708"} [2023-11-02 09:53:57,940][train_inner][INFO] - {"epoch": 77, "update": 76.731, "loss": "2.729", "ntokens": "3191.2", "nsentences": "45.04", "prob_perplexity": "207.914", "code_perplexity": "205.375", "temp": "0.5", "loss_0": "2.625", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55859", "wps": "17488.5", "ups": "5.48", "wpb": "3191.2", "bsz": "45", "num_updates": "311200", "lr": "2.2481e-05", "gnorm": "0.921", "loss_scale": "2", "train_wall": "36", "gb_free": "13.9", "wall": "58744"} [2023-11-02 09:54:34,011][train_inner][INFO] - {"epoch": 77, "update": 76.78, "loss": "2.749", "ntokens": "3160.4", "nsentences": "44.32", "prob_perplexity": "207.692", "code_perplexity": "205.108", "temp": "0.5", "loss_0": "2.645", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55585", "wps": "17524.8", "ups": "5.55", "wpb": "3160.4", "bsz": "44.3", "num_updates": "311400", "lr": "2.24304e-05", "gnorm": "0.921", "loss_scale": "2", "train_wall": "35", "gb_free": "14.3", "wall": "58780"} [2023-11-02 09:55:10,440][train_inner][INFO] - {"epoch": 77, "update": 76.829, "loss": "2.771", "ntokens": "3231.56", "nsentences": "44.44", "prob_perplexity": "208.249", "code_perplexity": "205.693", "temp": "0.5", "loss_0": "2.667", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55098", "wps": "17742.4", "ups": "5.49", "wpb": "3231.6", "bsz": "44.4", "num_updates": "311600", "lr": "2.23797e-05", "gnorm": "0.914", "loss_scale": "2", "train_wall": "36", "gb_free": "11.5", "wall": "58817"} [2023-11-02 09:55:47,205][train_inner][INFO] - {"epoch": 77, "update": 76.878, "loss": "2.74", "ntokens": "3176.4", "nsentences": "44.76", "prob_perplexity": "207.44", "code_perplexity": "204.813", "temp": "0.5", "loss_0": "2.636", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55684", "wps": "17281", "ups": "5.44", "wpb": "3176.4", "bsz": "44.8", "num_updates": "311800", "lr": "2.23291e-05", "gnorm": "0.926", "loss_scale": "2", "train_wall": "36", "gb_free": "12.6", "wall": "58853"} [2023-11-02 09:56:23,209][train_inner][INFO] - {"epoch": 77, "update": 76.928, "loss": "2.793", "ntokens": "3203.64", "nsentences": "42.44", "prob_perplexity": "208.549", "code_perplexity": "205.95", "temp": "0.5", "loss_0": "2.69", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.5455", "wps": "17797.3", "ups": "5.56", "wpb": "3203.6", "bsz": "42.4", "num_updates": "312000", "lr": "2.22785e-05", "gnorm": "0.924", "loss_scale": "2", "train_wall": "35", "gb_free": "13.3", "wall": "58889"} [2023-11-02 09:56:59,792][train_inner][INFO] - {"epoch": 77, "update": 76.977, "loss": "2.774", "ntokens": "3169.64", "nsentences": "45.16", "prob_perplexity": "207.771", "code_perplexity": "205.138", "temp": "0.5", "loss_0": "2.67", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.5524", "wps": "17329.2", "ups": "5.47", "wpb": "3169.6", "bsz": "45.2", "num_updates": "312200", "lr": "2.22278e-05", "gnorm": "0.917", "loss_scale": "2", "train_wall": "36", "gb_free": "13.5", "wall": "58926"} [2023-11-02 09:57:16,680][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 09:57:16,682][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 09:57:16,700][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 90 [2023-11-02 09:57:42,193][valid][INFO] - {"epoch": 77, "valid_loss": "2.651", "valid_ntokens": "3165.52", "valid_nsentences": "44.1685", "valid_prob_perplexity": "205.5", "valid_code_perplexity": "203.024", "valid_temp": "0.5", "valid_loss_0": "2.547", "valid_loss_1": "0.098", "valid_loss_2": "0.006", "valid_accuracy": "0.5758", "valid_wps": "56400.2", "valid_wpb": "3165.5", "valid_bsz": "44.2", "valid_num_updates": "312293", "valid_best_loss": "2.641"} [2023-11-02 09:57:42,195][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 77 @ 312293 updates [2023-11-02 09:57:42,197][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 09:57:43,690][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 09:57:43,751][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_last.pt (epoch 77 @ 312293 updates, score 2.651) (writing took 1.5559582388959825 seconds) [2023-11-02 09:57:43,752][fairseq_cli.train][INFO] - end of epoch 77 (average epoch stats below) [2023-11-02 09:57:43,754][train][INFO] - {"epoch": 77, "train_loss": "2.771", "train_ntokens": "3190.36", "train_nsentences": "44.2682", "train_prob_perplexity": "207.438", "train_code_perplexity": "204.839", "train_temp": "0.5", "train_loss_0": "2.667", "train_loss_1": "0.097", "train_loss_2": "0.006", "train_accuracy": "0.55139", "train_wps": "16966.5", "train_ups": "5.32", "train_wpb": "3190.4", "train_bsz": "44.3", "train_num_updates": "312293", "train_lr": "2.22043e-05", "train_gnorm": "0.922", "train_loss_scale": "2", "train_train_wall": "722", "train_gb_free": "14", "train_wall": "58970"} [2023-11-02 09:57:43,757][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 09:57:43,775][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 78 [2023-11-02 09:57:43,945][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 09:57:44,003][fairseq.trainer][INFO] - begin training epoch 78 [2023-11-02 09:57:44,004][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 09:58:03,285][train_inner][INFO] - {"epoch": 78, "update": 77.026, "loss": "2.747", "ntokens": "3199.48", "nsentences": "45.16", "prob_perplexity": "207.158", "code_perplexity": "204.569", "temp": "0.5", "loss_0": "2.643", "loss_1": "0.098", "loss_2": "0.006", "accuracy": "0.55622", "wps": "10078.6", "ups": "3.15", "wpb": "3199.5", "bsz": "45.2", "num_updates": "312400", "lr": "2.21772e-05", "gnorm": "0.917", "loss_scale": "2", "train_wall": "35", "gb_free": "14.2", "wall": "58990"} [2023-11-02 09:58:39,141][train_inner][INFO] - {"epoch": 78, "update": 77.076, "loss": "2.767", "ntokens": "3186.52", "nsentences": "46.12", "prob_perplexity": "207.405", "code_perplexity": "204.855", "temp": "0.5", "loss_0": "2.663", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55413", "wps": "17774.8", "ups": "5.58", "wpb": "3186.5", "bsz": "46.1", "num_updates": "312600", "lr": "2.21266e-05", "gnorm": "0.932", "loss_scale": "4", "train_wall": "35", "gb_free": "15.6", "wall": "59025"} [2023-11-02 09:59:15,182][train_inner][INFO] - {"epoch": 78, "update": 77.125, "loss": "2.771", "ntokens": "3201.48", "nsentences": "42.96", "prob_perplexity": "206.562", "code_perplexity": "203.962", "temp": "0.5", "loss_0": "2.667", "loss_1": "0.098", "loss_2": "0.006", "accuracy": "0.54991", "wps": "17767.2", "ups": "5.55", "wpb": "3201.5", "bsz": "43", "num_updates": "312800", "lr": "2.20759e-05", "gnorm": "0.942", "loss_scale": "4", "train_wall": "35", "gb_free": "13.3", "wall": "59061"} [2023-11-02 09:59:51,606][train_inner][INFO] - {"epoch": 78, "update": 77.174, "loss": "2.745", "ntokens": "3154.2", "nsentences": "44.08", "prob_perplexity": "208.509", "code_perplexity": "205.968", "temp": "0.5", "loss_0": "2.642", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55606", "wps": "17320", "ups": "5.49", "wpb": "3154.2", "bsz": "44.1", "num_updates": "313000", "lr": "2.20253e-05", "gnorm": "0.923", "loss_scale": "4", "train_wall": "36", "gb_free": "14.6", "wall": "59098"} [2023-11-02 10:00:27,892][train_inner][INFO] - {"epoch": 78, "update": 77.224, "loss": "2.769", "ntokens": "3200.64", "nsentences": "44.16", "prob_perplexity": "208.788", "code_perplexity": "206.167", "temp": "0.5", "loss_0": "2.666", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55049", "wps": "17642.4", "ups": "5.51", "wpb": "3200.6", "bsz": "44.2", "num_updates": "313200", "lr": "2.19747e-05", "gnorm": "0.924", "loss_scale": "4", "train_wall": "36", "gb_free": "14.8", "wall": "59134"} [2023-11-02 10:01:04,208][train_inner][INFO] - {"epoch": 78, "update": 77.273, "loss": "2.778", "ntokens": "3246.36", "nsentences": "46.16", "prob_perplexity": "208.233", "code_perplexity": "205.609", "temp": "0.5", "loss_0": "2.674", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55152", "wps": "17879.2", "ups": "5.51", "wpb": "3246.4", "bsz": "46.2", "num_updates": "313400", "lr": "2.19241e-05", "gnorm": "0.919", "loss_scale": "4", "train_wall": "36", "gb_free": "14.9", "wall": "59170"} [2023-11-02 10:01:40,126][train_inner][INFO] - {"epoch": 78, "update": 77.322, "loss": "2.774", "ntokens": "3179.32", "nsentences": "43.28", "prob_perplexity": "207.729", "code_perplexity": "205.078", "temp": "0.5", "loss_0": "2.67", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.54987", "wps": "17704.2", "ups": "5.57", "wpb": "3179.3", "bsz": "43.3", "num_updates": "313600", "lr": "2.18734e-05", "gnorm": "0.941", "loss_scale": "4", "train_wall": "35", "gb_free": "13.5", "wall": "59206"} [2023-11-02 10:02:16,520][train_inner][INFO] - {"epoch": 78, "update": 77.372, "loss": "2.778", "ntokens": "3176.2", "nsentences": "45.16", "prob_perplexity": "208.462", "code_perplexity": "205.824", "temp": "0.5", "loss_0": "2.675", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55217", "wps": "17455.6", "ups": "5.5", "wpb": "3176.2", "bsz": "45.2", "num_updates": "313800", "lr": "2.18228e-05", "gnorm": "0.953", "loss_scale": "4", "train_wall": "36", "gb_free": "13.2", "wall": "59243"} [2023-11-02 10:02:52,545][train_inner][INFO] - {"epoch": 78, "update": 77.421, "loss": "2.79", "ntokens": "3184.56", "nsentences": "44.84", "prob_perplexity": "207.434", "code_perplexity": "204.838", "temp": "0.5", "loss_0": "2.686", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.54894", "wps": "17680.7", "ups": "5.55", "wpb": "3184.6", "bsz": "44.8", "num_updates": "314000", "lr": "2.17722e-05", "gnorm": "0.924", "loss_scale": "4", "train_wall": "35", "gb_free": "15.7", "wall": "59279"} [2023-11-02 10:03:29,033][train_inner][INFO] - {"epoch": 78, "update": 77.47, "loss": "2.801", "ntokens": "3189.68", "nsentences": "41.16", "prob_perplexity": "208.218", "code_perplexity": "205.66", "temp": "0.5", "loss_0": "2.697", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.54414", "wps": "17484.5", "ups": "5.48", "wpb": "3189.7", "bsz": "41.2", "num_updates": "314200", "lr": "2.17215e-05", "gnorm": "0.934", "loss_scale": "4", "train_wall": "36", "gb_free": "12.7", "wall": "59315"} [2023-11-02 10:04:05,673][train_inner][INFO] - {"epoch": 78, "update": 77.519, "loss": "2.811", "ntokens": "3154.24", "nsentences": "41.08", "prob_perplexity": "207.141", "code_perplexity": "204.543", "temp": "0.5", "loss_0": "2.708", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.54179", "wps": "17226.4", "ups": "5.46", "wpb": "3154.2", "bsz": "41.1", "num_updates": "314400", "lr": "2.16709e-05", "gnorm": "0.937", "loss_scale": "4", "train_wall": "36", "gb_free": "13.7", "wall": "59352"} [2023-11-02 10:04:12,052][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 [2023-11-02 10:04:42,359][train_inner][INFO] - {"epoch": 78, "update": 77.569, "loss": "2.719", "ntokens": "3205.08", "nsentences": "44.68", "prob_perplexity": "209.237", "code_perplexity": "206.643", "temp": "0.5", "loss_0": "2.616", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55915", "wps": "17474.1", "ups": "5.45", "wpb": "3205.1", "bsz": "44.7", "num_updates": "314600", "lr": "2.16203e-05", "gnorm": "0.916", "loss_scale": "2", "train_wall": "36", "gb_free": "13.6", "wall": "59389"} [2023-11-02 10:05:18,862][train_inner][INFO] - {"epoch": 78, "update": 77.618, "loss": "2.795", "ntokens": "3202.72", "nsentences": "42.64", "prob_perplexity": "209.101", "code_perplexity": "206.496", "temp": "0.5", "loss_0": "2.692", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.54582", "wps": "17548.7", "ups": "5.48", "wpb": "3202.7", "bsz": "42.6", "num_updates": "314800", "lr": "2.15696e-05", "gnorm": "0.932", "loss_scale": "2", "train_wall": "36", "gb_free": "14.4", "wall": "59425"} [2023-11-02 10:05:55,601][train_inner][INFO] - {"epoch": 78, "update": 77.668, "loss": "2.727", "ntokens": "3169.96", "nsentences": "45.56", "prob_perplexity": "207.58", "code_perplexity": "204.953", "temp": "0.5", "loss_0": "2.623", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55968", "wps": "17257.8", "ups": "5.44", "wpb": "3170", "bsz": "45.6", "num_updates": "315000", "lr": "2.1519e-05", "gnorm": "0.927", "loss_scale": "2", "train_wall": "36", "gb_free": "13.8", "wall": "59462"} [2023-11-02 10:06:32,084][train_inner][INFO] - {"epoch": 78, "update": 77.717, "loss": "2.766", "ntokens": "3206.76", "nsentences": "45.52", "prob_perplexity": "208.501", "code_perplexity": "205.926", "temp": "0.5", "loss_0": "2.662", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55385", "wps": "17580.1", "ups": "5.48", "wpb": "3206.8", "bsz": "45.5", "num_updates": "315200", "lr": "2.14684e-05", "gnorm": "0.974", "loss_scale": "2", "train_wall": "36", "gb_free": "13.5", "wall": "59498"} [2023-11-02 10:07:08,444][train_inner][INFO] - {"epoch": 78, "update": 77.766, "loss": "2.697", "ntokens": "3143.32", "nsentences": "45.08", "prob_perplexity": "207.058", "code_perplexity": "204.501", "temp": "0.5", "loss_0": "2.593", "loss_1": "0.098", "loss_2": "0.006", "accuracy": "0.56411", "wps": "17291.3", "ups": "5.5", "wpb": "3143.3", "bsz": "45.1", "num_updates": "315400", "lr": "2.14177e-05", "gnorm": "0.932", "loss_scale": "2", "train_wall": "36", "gb_free": "14.3", "wall": "59535"} [2023-11-02 10:07:44,968][train_inner][INFO] - {"epoch": 78, "update": 77.816, "loss": "2.744", "ntokens": "3159", "nsentences": "42.72", "prob_perplexity": "208.388", "code_perplexity": "205.716", "temp": "0.5", "loss_0": "2.641", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55417", "wps": "17299.1", "ups": "5.48", "wpb": "3159", "bsz": "42.7", "num_updates": "315600", "lr": "2.13671e-05", "gnorm": "0.945", "loss_scale": "2", "train_wall": "36", "gb_free": "13.4", "wall": "59571"} [2023-11-02 10:08:20,996][train_inner][INFO] - {"epoch": 78, "update": 77.865, "loss": "2.751", "ntokens": "3171.2", "nsentences": "44.68", "prob_perplexity": "207.642", "code_perplexity": "205.028", "temp": "0.5", "loss_0": "2.647", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55549", "wps": "17605.2", "ups": "5.55", "wpb": "3171.2", "bsz": "44.7", "num_updates": "315800", "lr": "2.13165e-05", "gnorm": "0.958", "loss_scale": "2", "train_wall": "35", "gb_free": "13.6", "wall": "59607"} [2023-11-02 10:08:57,664][train_inner][INFO] - {"epoch": 78, "update": 77.914, "loss": "2.714", "ntokens": "3187.2", "nsentences": "46.48", "prob_perplexity": "208.227", "code_perplexity": "205.597", "temp": "0.5", "loss_0": "2.61", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.56243", "wps": "17385.1", "ups": "5.45", "wpb": "3187.2", "bsz": "46.5", "num_updates": "316000", "lr": "2.12658e-05", "gnorm": "0.93", "loss_scale": "2", "train_wall": "36", "gb_free": "14.4", "wall": "59644"} [2023-11-02 10:09:34,437][train_inner][INFO] - {"epoch": 78, "update": 77.964, "loss": "2.772", "ntokens": "3169.36", "nsentences": "43.2", "prob_perplexity": "208.002", "code_perplexity": "205.408", "temp": "0.5", "loss_0": "2.668", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55026", "wps": "17238.4", "ups": "5.44", "wpb": "3169.4", "bsz": "43.2", "num_updates": "316200", "lr": "2.12152e-05", "gnorm": "0.942", "loss_scale": "2", "train_wall": "36", "gb_free": "13.7", "wall": "59681"} [2023-11-02 10:10:01,603][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 10:10:01,605][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 10:10:01,626][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 91 [2023-11-02 10:10:27,447][valid][INFO] - {"epoch": 78, "valid_loss": "2.635", "valid_ntokens": "3156.2", "valid_nsentences": "44.1685", "valid_prob_perplexity": "208.354", "valid_code_perplexity": "205.922", "valid_temp": "0.5", "valid_loss_0": "2.532", "valid_loss_1": "0.097", "valid_loss_2": "0.006", "valid_accuracy": "0.57747", "valid_wps": "55421.9", "valid_wpb": "3156.2", "valid_bsz": "44.2", "valid_num_updates": "316348", "valid_best_loss": "2.635"} [2023-11-02 10:10:27,449][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 78 @ 316348 updates [2023-11-02 10:10:27,451][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-02 10:10:28,870][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-02 10:10:29,880][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_best.pt (epoch 78 @ 316348 updates, score 2.635) (writing took 2.430875718127936 seconds) [2023-11-02 10:10:29,881][fairseq_cli.train][INFO] - end of epoch 78 (average epoch stats below) [2023-11-02 10:10:29,883][train][INFO] - {"epoch": 78, "train_loss": "2.762", "train_ntokens": "3187.33", "train_nsentences": "44.2713", "train_prob_perplexity": "208.01", "train_code_perplexity": "205.407", "train_temp": "0.5", "train_loss_0": "2.659", "train_loss_1": "0.097", "train_loss_2": "0.006", "train_accuracy": "0.55274", "train_wps": "16870.1", "train_ups": "5.29", "train_wpb": "3187.3", "train_bsz": "44.3", "train_num_updates": "316348", "train_lr": "2.11777e-05", "train_gnorm": "0.935", "train_loss_scale": "2", "train_train_wall": "725", "train_gb_free": "14", "train_wall": "59736"} [2023-11-02 10:10:29,885][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 10:10:29,905][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 79 [2023-11-02 10:10:30,080][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 10:10:30,139][fairseq.trainer][INFO] - begin training epoch 79 [2023-11-02 10:10:30,140][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 10:10:39,437][train_inner][INFO] - {"epoch": 79, "update": 78.013, "loss": "2.793", "ntokens": "3218.68", "nsentences": "43.72", "prob_perplexity": "207.481", "code_perplexity": "204.851", "temp": "0.5", "loss_0": "2.689", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.54673", "wps": "9903.9", "ups": "3.08", "wpb": "3218.7", "bsz": "43.7", "num_updates": "316400", "lr": "2.11646e-05", "gnorm": "0.932", "loss_scale": "2", "train_wall": "36", "gb_free": "13.4", "wall": "59746"} [2023-11-02 10:11:15,071][train_inner][INFO] - {"epoch": 79, "update": 78.062, "loss": "2.769", "ntokens": "3173.96", "nsentences": "43.88", "prob_perplexity": "209.007", "code_perplexity": "206.322", "temp": "0.5", "loss_0": "2.666", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55098", "wps": "17814.9", "ups": "5.61", "wpb": "3174", "bsz": "43.9", "num_updates": "316600", "lr": "2.11139e-05", "gnorm": "0.96", "loss_scale": "2", "train_wall": "35", "gb_free": "12.3", "wall": "59781"} [2023-11-02 10:11:51,379][train_inner][INFO] - {"epoch": 79, "update": 78.111, "loss": "2.762", "ntokens": "3224.76", "nsentences": "45.44", "prob_perplexity": "208.895", "code_perplexity": "206.291", "temp": "0.5", "loss_0": "2.659", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55347", "wps": "17764.4", "ups": "5.51", "wpb": "3224.8", "bsz": "45.4", "num_updates": "316800", "lr": "2.10633e-05", "gnorm": "0.925", "loss_scale": "2", "train_wall": "36", "gb_free": "14.3", "wall": "59818"} [2023-11-02 10:12:27,183][train_inner][INFO] - {"epoch": 79, "update": 78.161, "loss": "2.726", "ntokens": "3154.96", "nsentences": "43.6", "prob_perplexity": "208.598", "code_perplexity": "205.971", "temp": "0.5", "loss_0": "2.622", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55805", "wps": "17624.6", "ups": "5.59", "wpb": "3155", "bsz": "43.6", "num_updates": "317000", "lr": "2.10127e-05", "gnorm": "0.94", "loss_scale": "2", "train_wall": "35", "gb_free": "13.2", "wall": "59853"} [2023-11-02 10:13:02,818][train_inner][INFO] - {"epoch": 79, "update": 78.21, "loss": "2.795", "ntokens": "3201.52", "nsentences": "42.84", "prob_perplexity": "209.229", "code_perplexity": "206.63", "temp": "0.5", "loss_0": "2.691", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.54566", "wps": "17969.5", "ups": "5.61", "wpb": "3201.5", "bsz": "42.8", "num_updates": "317200", "lr": "2.0962e-05", "gnorm": "0.94", "loss_scale": "2", "train_wall": "35", "gb_free": "14.7", "wall": "59889"} [2023-11-02 10:13:38,776][train_inner][INFO] - {"epoch": 79, "update": 78.259, "loss": "2.765", "ntokens": "3185.92", "nsentences": "44.04", "prob_perplexity": "209.324", "code_perplexity": "206.711", "temp": "0.5", "loss_0": "2.662", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.5523", "wps": "17721.1", "ups": "5.56", "wpb": "3185.9", "bsz": "44", "num_updates": "317400", "lr": "2.09114e-05", "gnorm": "0.943", "loss_scale": "2", "train_wall": "35", "gb_free": "13.7", "wall": "59925"} [2023-11-02 10:14:14,722][train_inner][INFO] - {"epoch": 79, "update": 78.309, "loss": "2.81", "ntokens": "3188.76", "nsentences": "41.68", "prob_perplexity": "207.922", "code_perplexity": "205.284", "temp": "0.5", "loss_0": "2.706", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.54309", "wps": "17743.2", "ups": "5.56", "wpb": "3188.8", "bsz": "41.7", "num_updates": "317600", "lr": "2.08608e-05", "gnorm": "0.979", "loss_scale": "2", "train_wall": "35", "gb_free": "12.5", "wall": "59961"} [2023-11-02 10:14:51,034][train_inner][INFO] - {"epoch": 79, "update": 78.358, "loss": "2.711", "ntokens": "3193.28", "nsentences": "47.04", "prob_perplexity": "209.531", "code_perplexity": "206.925", "temp": "0.5", "loss_0": "2.608", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.56356", "wps": "17588.8", "ups": "5.51", "wpb": "3193.3", "bsz": "47", "num_updates": "317800", "lr": "2.08101e-05", "gnorm": "0.937", "loss_scale": "2", "train_wall": "36", "gb_free": "14.4", "wall": "59997"} [2023-11-02 10:15:27,321][train_inner][INFO] - {"epoch": 79, "update": 78.407, "loss": "2.733", "ntokens": "3206.28", "nsentences": "45.88", "prob_perplexity": "210.351", "code_perplexity": "207.745", "temp": "0.5", "loss_0": "2.63", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55871", "wps": "17673.2", "ups": "5.51", "wpb": "3206.3", "bsz": "45.9", "num_updates": "318000", "lr": "2.07595e-05", "gnorm": "0.919", "loss_scale": "2", "train_wall": "36", "gb_free": "14.1", "wall": "60034"} [2023-11-02 10:16:03,922][train_inner][INFO] - {"epoch": 79, "update": 78.457, "loss": "2.793", "ntokens": "3173.52", "nsentences": "42.76", "prob_perplexity": "209.42", "code_perplexity": "206.749", "temp": "0.5", "loss_0": "2.689", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.54631", "wps": "17342.3", "ups": "5.46", "wpb": "3173.5", "bsz": "42.8", "num_updates": "318200", "lr": "2.07089e-05", "gnorm": "0.93", "loss_scale": "2", "train_wall": "36", "gb_free": "12.9", "wall": "60070"} [2023-11-02 10:16:40,417][train_inner][INFO] - {"epoch": 79, "update": 78.506, "loss": "2.756", "ntokens": "3186.28", "nsentences": "43.4", "prob_perplexity": "208.771", "code_perplexity": "206.146", "temp": "0.5", "loss_0": "2.653", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.5532", "wps": "17462.5", "ups": "5.48", "wpb": "3186.3", "bsz": "43.4", "num_updates": "318400", "lr": "2.06582e-05", "gnorm": "0.932", "loss_scale": "2", "train_wall": "36", "gb_free": "14.1", "wall": "60107"} [2023-11-02 10:17:17,066][train_inner][INFO] - {"epoch": 79, "update": 78.555, "loss": "2.708", "ntokens": "3175.68", "nsentences": "46.32", "prob_perplexity": "209.305", "code_perplexity": "206.713", "temp": "0.5", "loss_0": "2.605", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.5633", "wps": "17331.6", "ups": "5.46", "wpb": "3175.7", "bsz": "46.3", "num_updates": "318600", "lr": "2.06076e-05", "gnorm": "0.929", "loss_scale": "2", "train_wall": "36", "gb_free": "15.2", "wall": "60143"} [2023-11-02 10:17:54,383][train_inner][INFO] - {"epoch": 79, "update": 78.605, "loss": "2.789", "ntokens": "3218.2", "nsentences": "44", "prob_perplexity": "209.213", "code_perplexity": "206.62", "temp": "0.5", "loss_0": "2.685", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.54817", "wps": "17248.7", "ups": "5.36", "wpb": "3218.2", "bsz": "44", "num_updates": "318800", "lr": "2.0557e-05", "gnorm": "0.934", "loss_scale": "2", "train_wall": "37", "gb_free": "13.9", "wall": "60181"} [2023-11-02 10:18:30,864][train_inner][INFO] - {"epoch": 79, "update": 78.654, "loss": "2.798", "ntokens": "3199.44", "nsentences": "43.08", "prob_perplexity": "208.589", "code_perplexity": "205.953", "temp": "0.5", "loss_0": "2.694", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.5458", "wps": "17541.5", "ups": "5.48", "wpb": "3199.4", "bsz": "43.1", "num_updates": "319000", "lr": "2.05063e-05", "gnorm": "0.931", "loss_scale": "2", "train_wall": "36", "gb_free": "15.5", "wall": "60217"} [2023-11-02 10:19:07,302][train_inner][INFO] - {"epoch": 79, "update": 78.703, "loss": "2.703", "ntokens": "3173.92", "nsentences": "47.4", "prob_perplexity": "208.731", "code_perplexity": "206.16", "temp": "0.5", "loss_0": "2.599", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.56607", "wps": "17421.7", "ups": "5.49", "wpb": "3173.9", "bsz": "47.4", "num_updates": "319200", "lr": "2.04557e-05", "gnorm": "0.93", "loss_scale": "2", "train_wall": "36", "gb_free": "13", "wall": "60254"} [2023-11-02 10:19:43,966][train_inner][INFO] - {"epoch": 79, "update": 78.752, "loss": "2.776", "ntokens": "3191", "nsentences": "44.28", "prob_perplexity": "209.014", "code_perplexity": "206.412", "temp": "0.5", "loss_0": "2.673", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55068", "wps": "17407.9", "ups": "5.46", "wpb": "3191", "bsz": "44.3", "num_updates": "319400", "lr": "2.04051e-05", "gnorm": "0.933", "loss_scale": "2", "train_wall": "36", "gb_free": "13", "wall": "60290"} [2023-11-02 10:20:19,911][train_inner][INFO] - {"epoch": 79, "update": 78.802, "loss": "2.755", "ntokens": "3200.44", "nsentences": "44.84", "prob_perplexity": "209.478", "code_perplexity": "206.908", "temp": "0.5", "loss_0": "2.652", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55467", "wps": "17808.4", "ups": "5.56", "wpb": "3200.4", "bsz": "44.8", "num_updates": "319600", "lr": "2.03544e-05", "gnorm": "0.959", "loss_scale": "2", "train_wall": "35", "gb_free": "13.2", "wall": "60326"} [2023-11-02 10:20:56,241][train_inner][INFO] - {"epoch": 79, "update": 78.851, "loss": "2.717", "ntokens": "3155.6", "nsentences": "45.12", "prob_perplexity": "207.749", "code_perplexity": "205.202", "temp": "0.5", "loss_0": "2.614", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.56105", "wps": "17372.7", "ups": "5.51", "wpb": "3155.6", "bsz": "45.1", "num_updates": "319800", "lr": "2.03038e-05", "gnorm": "0.939", "loss_scale": "2", "train_wall": "36", "gb_free": "13.9", "wall": "60362"} [2023-11-02 10:21:32,457][train_inner][INFO] - {"epoch": 79, "update": 78.9, "loss": "2.821", "ntokens": "3211.24", "nsentences": "42.56", "prob_perplexity": "208.338", "code_perplexity": "205.724", "temp": "0.5", "loss_0": "2.717", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.541", "wps": "17735", "ups": "5.52", "wpb": "3211.2", "bsz": "42.6", "num_updates": "320000", "lr": "2.02532e-05", "gnorm": "0.94", "loss_scale": "2", "train_wall": "36", "gb_free": "13.7", "wall": "60399"} [2023-11-02 10:22:08,979][train_inner][INFO] - {"epoch": 79, "update": 78.95, "loss": "2.752", "ntokens": "3215.88", "nsentences": "43.16", "prob_perplexity": "208.89", "code_perplexity": "206.364", "temp": "0.5", "loss_0": "2.649", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55202", "wps": "17611.3", "ups": "5.48", "wpb": "3215.9", "bsz": "43.2", "num_updates": "320200", "lr": "2.02025e-05", "gnorm": "0.929", "loss_scale": "2", "train_wall": "36", "gb_free": "13", "wall": "60435"} [2023-11-02 10:22:44,571][train_inner][INFO] - {"epoch": 79, "update": 78.999, "loss": "2.731", "ntokens": "3159.28", "nsentences": "44.52", "prob_perplexity": "209.185", "code_perplexity": "206.622", "temp": "0.5", "loss_0": "2.627", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.5585", "wps": "17754.2", "ups": "5.62", "wpb": "3159.3", "bsz": "44.5", "num_updates": "320400", "lr": "2.01519e-05", "gnorm": "0.931", "loss_scale": "2", "train_wall": "35", "gb_free": "14.4", "wall": "60471"} [2023-11-02 10:22:45,333][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 10:22:45,334][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 10:22:45,354][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 92 [2023-11-02 10:23:10,667][valid][INFO] - {"epoch": 79, "valid_loss": "2.628", "valid_ntokens": "3156.19", "valid_nsentences": "44.1685", "valid_prob_perplexity": "206.442", "valid_code_perplexity": "203.965", "valid_temp": "0.5", "valid_loss_0": "2.525", "valid_loss_1": "0.098", "valid_loss_2": "0.006", "valid_accuracy": "0.57931", "valid_wps": "56634.7", "valid_wpb": "3156.2", "valid_bsz": "44.2", "valid_num_updates": "320404", "valid_best_loss": "2.628"} [2023-11-02 10:23:10,669][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 79 @ 320404 updates [2023-11-02 10:23:10,671][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-02 10:23:12,070][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-02 10:23:13,066][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_best.pt (epoch 79 @ 320404 updates, score 2.628) (writing took 2.3969887662678957 seconds) [2023-11-02 10:23:13,067][fairseq_cli.train][INFO] - end of epoch 79 (average epoch stats below) [2023-11-02 10:23:13,070][train][INFO] - {"epoch": 79, "train_loss": "2.758", "train_ntokens": "3188.69", "train_nsentences": "44.2682", "train_prob_perplexity": "208.96", "train_code_perplexity": "206.356", "train_temp": "0.5", "train_loss_0": "2.655", "train_loss_1": "0.097", "train_loss_2": "0.006", "train_accuracy": "0.55339", "train_wps": "16946.5", "train_ups": "5.31", "train_wpb": "3188.7", "train_bsz": "44.3", "train_num_updates": "320404", "train_lr": "2.01509e-05", "train_gnorm": "0.938", "train_loss_scale": "2", "train_train_wall": "722", "train_gb_free": "12.8", "train_wall": "60499"} [2023-11-02 10:23:13,073][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 10:23:13,093][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 80 [2023-11-02 10:23:13,286][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 10:23:13,345][fairseq.trainer][INFO] - begin training epoch 80 [2023-11-02 10:23:13,346][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 10:23:49,206][train_inner][INFO] - {"epoch": 80, "update": 79.048, "loss": "2.75", "ntokens": "3234.44", "nsentences": "45.08", "prob_perplexity": "208.682", "code_perplexity": "206.135", "temp": "0.5", "loss_0": "2.647", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55523", "wps": "10008.5", "ups": "3.09", "wpb": "3234.4", "bsz": "45.1", "num_updates": "320600", "lr": "2.01013e-05", "gnorm": "0.921", "loss_scale": "2", "train_wall": "36", "gb_free": "13.4", "wall": "60535"} [2023-11-02 10:24:24,930][train_inner][INFO] - {"epoch": 80, "update": 79.098, "loss": "2.797", "ntokens": "3200", "nsentences": "44.12", "prob_perplexity": "209.157", "code_perplexity": "206.61", "temp": "0.5", "loss_0": "2.693", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.54709", "wps": "17916.3", "ups": "5.6", "wpb": "3200", "bsz": "44.1", "num_updates": "320800", "lr": "2.00506e-05", "gnorm": "0.936", "loss_scale": "2", "train_wall": "35", "gb_free": "14.5", "wall": "60571"} [2023-11-02 10:25:00,602][train_inner][INFO] - {"epoch": 80, "update": 79.147, "loss": "2.735", "ntokens": "3184.68", "nsentences": "45.36", "prob_perplexity": "209.468", "code_perplexity": "206.849", "temp": "0.5", "loss_0": "2.632", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55886", "wps": "17856.2", "ups": "5.61", "wpb": "3184.7", "bsz": "45.4", "num_updates": "321000", "lr": "2e-05", "gnorm": "0.926", "loss_scale": "2", "train_wall": "35", "gb_free": "13.3", "wall": "60607"} [2023-11-02 10:25:36,365][train_inner][INFO] - {"epoch": 80, "update": 79.196, "loss": "2.79", "ntokens": "3161.24", "nsentences": "42.24", "prob_perplexity": "208.256", "code_perplexity": "205.647", "temp": "0.5", "loss_0": "2.686", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.54599", "wps": "17679.9", "ups": "5.59", "wpb": "3161.2", "bsz": "42.2", "num_updates": "321200", "lr": "1.99494e-05", "gnorm": "0.998", "loss_scale": "2", "train_wall": "35", "gb_free": "14.3", "wall": "60643"} [2023-11-02 10:26:12,116][train_inner][INFO] - {"epoch": 80, "update": 79.246, "loss": "2.738", "ntokens": "3190.8", "nsentences": "44.52", "prob_perplexity": "209.351", "code_perplexity": "206.808", "temp": "0.5", "loss_0": "2.635", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55674", "wps": "17851", "ups": "5.59", "wpb": "3190.8", "bsz": "44.5", "num_updates": "321400", "lr": "1.98987e-05", "gnorm": "0.938", "loss_scale": "2", "train_wall": "35", "gb_free": "13.1", "wall": "60678"} [2023-11-02 10:26:47,755][train_inner][INFO] - {"epoch": 80, "update": 79.295, "loss": "2.733", "ntokens": "3129.32", "nsentences": "44.2", "prob_perplexity": "209.614", "code_perplexity": "206.963", "temp": "0.5", "loss_0": "2.63", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55816", "wps": "17562.4", "ups": "5.61", "wpb": "3129.3", "bsz": "44.2", "num_updates": "321600", "lr": "1.98481e-05", "gnorm": "0.941", "loss_scale": "2", "train_wall": "35", "gb_free": "13.3", "wall": "60714"} [2023-11-02 10:27:23,963][train_inner][INFO] - {"epoch": 80, "update": 79.344, "loss": "2.733", "ntokens": "3189.68", "nsentences": "44.88", "prob_perplexity": "209.053", "code_perplexity": "206.416", "temp": "0.5", "loss_0": "2.63", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55736", "wps": "17619.8", "ups": "5.52", "wpb": "3189.7", "bsz": "44.9", "num_updates": "321800", "lr": "1.97975e-05", "gnorm": "0.935", "loss_scale": "2", "train_wall": "36", "gb_free": "14.2", "wall": "60750"} [2023-11-02 10:28:00,516][train_inner][INFO] - {"epoch": 80, "update": 79.393, "loss": "2.737", "ntokens": "3164.48", "nsentences": "42.88", "prob_perplexity": "210.087", "code_perplexity": "207.424", "temp": "0.5", "loss_0": "2.634", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55488", "wps": "17315.6", "ups": "5.47", "wpb": "3164.5", "bsz": "42.9", "num_updates": "322000", "lr": "1.97468e-05", "gnorm": "0.964", "loss_scale": "2", "train_wall": "36", "gb_free": "14.7", "wall": "60787"} [2023-11-02 10:28:37,031][train_inner][INFO] - {"epoch": 80, "update": 79.443, "loss": "2.802", "ntokens": "3204.2", "nsentences": "43.28", "prob_perplexity": "209.308", "code_perplexity": "206.781", "temp": "0.5", "loss_0": "2.699", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.54508", "wps": "17550.9", "ups": "5.48", "wpb": "3204.2", "bsz": "43.3", "num_updates": "322200", "lr": "1.96962e-05", "gnorm": "0.932", "loss_scale": "2", "train_wall": "36", "gb_free": "13.2", "wall": "60823"} [2023-11-02 10:29:13,097][train_inner][INFO] - {"epoch": 80, "update": 79.492, "loss": "2.76", "ntokens": "3202.12", "nsentences": "45.52", "prob_perplexity": "210.389", "code_perplexity": "207.777", "temp": "0.5", "loss_0": "2.657", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55368", "wps": "17758.3", "ups": "5.55", "wpb": "3202.1", "bsz": "45.5", "num_updates": "322400", "lr": "1.96456e-05", "gnorm": "0.931", "loss_scale": "2", "train_wall": "35", "gb_free": "13.6", "wall": "60859"} [2023-11-02 10:29:49,949][train_inner][INFO] - {"epoch": 80, "update": 79.541, "loss": "2.77", "ntokens": "3214", "nsentences": "44.52", "prob_perplexity": "209.633", "code_perplexity": "207.045", "temp": "0.5", "loss_0": "2.667", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55091", "wps": "17443.6", "ups": "5.43", "wpb": "3214", "bsz": "44.5", "num_updates": "322600", "lr": "1.95949e-05", "gnorm": "0.936", "loss_scale": "2", "train_wall": "36", "gb_free": "13.5", "wall": "60896"} [2023-11-02 10:30:26,784][train_inner][INFO] - {"epoch": 80, "update": 79.591, "loss": "2.742", "ntokens": "3170.96", "nsentences": "43.44", "prob_perplexity": "208.996", "code_perplexity": "206.385", "temp": "0.5", "loss_0": "2.639", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55569", "wps": "17218.4", "ups": "5.43", "wpb": "3171", "bsz": "43.4", "num_updates": "322800", "lr": "1.95443e-05", "gnorm": "0.935", "loss_scale": "2", "train_wall": "36", "gb_free": "13.5", "wall": "60933"} [2023-11-02 10:31:04,079][train_inner][INFO] - {"epoch": 80, "update": 79.64, "loss": "2.726", "ntokens": "3248.32", "nsentences": "47.2", "prob_perplexity": "210.135", "code_perplexity": "207.572", "temp": "0.5", "loss_0": "2.623", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55992", "wps": "17428.6", "ups": "5.37", "wpb": "3248.3", "bsz": "47.2", "num_updates": "323000", "lr": "1.94937e-05", "gnorm": "0.917", "loss_scale": "2", "train_wall": "37", "gb_free": "14.2", "wall": "60970"} [2023-11-02 10:31:40,347][train_inner][INFO] - {"epoch": 80, "update": 79.689, "loss": "2.806", "ntokens": "3229.48", "nsentences": "43.24", "prob_perplexity": "209.533", "code_perplexity": "206.959", "temp": "0.5", "loss_0": "2.703", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.54461", "wps": "17810.1", "ups": "5.51", "wpb": "3229.5", "bsz": "43.2", "num_updates": "323200", "lr": "1.9443e-05", "gnorm": "0.938", "loss_scale": "2", "train_wall": "36", "gb_free": "13.5", "wall": "61007"} [2023-11-02 10:32:16,814][train_inner][INFO] - {"epoch": 80, "update": 79.739, "loss": "2.819", "ntokens": "3181.4", "nsentences": "42.52", "prob_perplexity": "209.189", "code_perplexity": "206.552", "temp": "0.5", "loss_0": "2.716", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.54188", "wps": "17449", "ups": "5.48", "wpb": "3181.4", "bsz": "42.5", "num_updates": "323400", "lr": "1.93924e-05", "gnorm": "0.965", "loss_scale": "2", "train_wall": "36", "gb_free": "13.2", "wall": "61043"} [2023-11-02 10:32:53,067][train_inner][INFO] - {"epoch": 80, "update": 79.788, "loss": "2.734", "ntokens": "3161.76", "nsentences": "45", "prob_perplexity": "209.483", "code_perplexity": "206.863", "temp": "0.5", "loss_0": "2.631", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55845", "wps": "17444", "ups": "5.52", "wpb": "3161.8", "bsz": "45", "num_updates": "323600", "lr": "1.93418e-05", "gnorm": "0.938", "loss_scale": "2", "train_wall": "36", "gb_free": "13.3", "wall": "61079"} [2023-11-02 10:33:29,115][train_inner][INFO] - {"epoch": 80, "update": 79.837, "loss": "2.727", "ntokens": "3172.52", "nsentences": "45.8", "prob_perplexity": "210.7", "code_perplexity": "208.108", "temp": "0.5", "loss_0": "2.624", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.56076", "wps": "17602.6", "ups": "5.55", "wpb": "3172.5", "bsz": "45.8", "num_updates": "323800", "lr": "1.92911e-05", "gnorm": "0.93", "loss_scale": "2", "train_wall": "35", "gb_free": "14.3", "wall": "61115"} [2023-11-02 10:34:05,751][train_inner][INFO] - {"epoch": 80, "update": 79.887, "loss": "2.753", "ntokens": "3174.4", "nsentences": "43.36", "prob_perplexity": "209.493", "code_perplexity": "206.882", "temp": "0.5", "loss_0": "2.65", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55324", "wps": "17330.4", "ups": "5.46", "wpb": "3174.4", "bsz": "43.4", "num_updates": "324000", "lr": "1.92405e-05", "gnorm": "0.938", "loss_scale": "2", "train_wall": "36", "gb_free": "13", "wall": "61152"} [2023-11-02 10:34:42,240][train_inner][INFO] - {"epoch": 80, "update": 79.936, "loss": "2.785", "ntokens": "3217.84", "nsentences": "43.72", "prob_perplexity": "210.797", "code_perplexity": "208.113", "temp": "0.5", "loss_0": "2.682", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.54847", "wps": "17638.2", "ups": "5.48", "wpb": "3217.8", "bsz": "43.7", "num_updates": "324200", "lr": "1.91899e-05", "gnorm": "0.925", "loss_scale": "2", "train_wall": "36", "gb_free": "13.6", "wall": "61188"} [2023-11-02 10:35:18,460][train_inner][INFO] - {"epoch": 80, "update": 79.985, "loss": "2.767", "ntokens": "3189.64", "nsentences": "43.8", "prob_perplexity": "210.003", "code_perplexity": "207.377", "temp": "0.5", "loss_0": "2.664", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55163", "wps": "17613.4", "ups": "5.52", "wpb": "3189.6", "bsz": "43.8", "num_updates": "324400", "lr": "1.91392e-05", "gnorm": "0.931", "loss_scale": "2", "train_wall": "36", "gb_free": "14.1", "wall": "61225"} [2023-11-02 10:35:29,325][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 10:35:29,327][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 10:35:29,347][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 93 [2023-11-02 10:35:55,064][valid][INFO] - {"epoch": 80, "valid_loss": "2.638", "valid_ntokens": "3162.39", "valid_nsentences": "44.1685", "valid_prob_perplexity": "209.162", "valid_code_perplexity": "206.665", "valid_temp": "0.5", "valid_loss_0": "2.535", "valid_loss_1": "0.097", "valid_loss_2": "0.006", "valid_accuracy": "0.57708", "valid_wps": "55772.9", "valid_wpb": "3162.4", "valid_bsz": "44.2", "valid_num_updates": "324460", "valid_best_loss": "2.628"} [2023-11-02 10:35:55,066][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 80 @ 324460 updates [2023-11-02 10:35:55,068][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 10:35:56,499][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 10:35:56,551][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_last.pt (epoch 80 @ 324460 updates, score 2.638) (writing took 1.4855228518135846 seconds) [2023-11-02 10:35:56,552][fairseq_cli.train][INFO] - end of epoch 80 (average epoch stats below) [2023-11-02 10:35:56,554][train][INFO] - {"epoch": 80, "train_loss": "2.76", "train_ntokens": "3190.17", "train_nsentences": "44.2682", "train_prob_perplexity": "209.563", "train_code_perplexity": "206.959", "train_temp": "0.5", "train_loss_0": "2.657", "train_loss_1": "0.097", "train_loss_2": "0.006", "train_accuracy": "0.55305", "train_wps": "16947.8", "train_ups": "5.31", "train_wpb": "3190.2", "train_bsz": "44.3", "train_num_updates": "324460", "train_lr": "1.91241e-05", "train_gnorm": "0.939", "train_loss_scale": "2", "train_train_wall": "723", "train_gb_free": "13.2", "train_wall": "61263"} [2023-11-02 10:35:56,557][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 10:35:56,574][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 81 [2023-11-02 10:35:56,739][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 10:35:56,798][fairseq.trainer][INFO] - begin training epoch 81 [2023-11-02 10:35:56,799][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 10:36:22,103][train_inner][INFO] - {"epoch": 81, "update": 80.035, "loss": "2.722", "ntokens": "3159.96", "nsentences": "45.52", "prob_perplexity": "209.894", "code_perplexity": "207.248", "temp": "0.5", "loss_0": "2.62", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.56012", "wps": "9930.7", "ups": "3.14", "wpb": "3160", "bsz": "45.5", "num_updates": "324600", "lr": "1.90886e-05", "gnorm": "0.934", "loss_scale": "2", "train_wall": "35", "gb_free": "13.7", "wall": "61288"} [2023-11-02 10:36:57,847][train_inner][INFO] - {"epoch": 81, "update": 80.084, "loss": "2.768", "ntokens": "3205.2", "nsentences": "43.44", "prob_perplexity": "208.976", "code_perplexity": "206.462", "temp": "0.5", "loss_0": "2.665", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55005", "wps": "17935.4", "ups": "5.6", "wpb": "3205.2", "bsz": "43.4", "num_updates": "324800", "lr": "1.9038e-05", "gnorm": "0.943", "loss_scale": "2", "train_wall": "35", "gb_free": "14.8", "wall": "61324"} [2023-11-02 10:37:33,651][train_inner][INFO] - {"epoch": 81, "update": 80.133, "loss": "2.726", "ntokens": "3165.12", "nsentences": "44.96", "prob_perplexity": "209.095", "code_perplexity": "206.492", "temp": "0.5", "loss_0": "2.623", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.56024", "wps": "17681.1", "ups": "5.59", "wpb": "3165.1", "bsz": "45", "num_updates": "325000", "lr": "1.89873e-05", "gnorm": "0.948", "loss_scale": "2", "train_wall": "35", "gb_free": "13.8", "wall": "61360"} [2023-11-02 10:37:33,652][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 10:37:33,654][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 10:37:33,672][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 94 [2023-11-02 10:37:59,453][valid][INFO] - {"epoch": 81, "valid_loss": "2.635", "valid_ntokens": "3166.71", "valid_nsentences": "44.1685", "valid_prob_perplexity": "208.517", "valid_code_perplexity": "206.028", "valid_temp": "0.5", "valid_loss_0": "2.531", "valid_loss_1": "0.097", "valid_loss_2": "0.006", "valid_accuracy": "0.57774", "valid_wps": "55764.6", "valid_wpb": "3166.7", "valid_bsz": "44.2", "valid_num_updates": "325000", "valid_best_loss": "2.628"} [2023-11-02 10:37:59,454][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 81 @ 325000 updates [2023-11-02 10:37:59,456][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_81_325000.pt [2023-11-02 10:38:00,818][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_81_325000.pt [2023-11-02 10:38:01,816][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_81_325000.pt (epoch 81 @ 325000 updates, score 2.635) (writing took 2.3614658094011247 seconds) [2023-11-02 10:38:37,558][train_inner][INFO] - {"epoch": 81, "update": 80.182, "loss": "2.725", "ntokens": "3153.88", "nsentences": "45.28", "prob_perplexity": "209.694", "code_perplexity": "207.048", "temp": "0.5", "loss_0": "2.622", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.56042", "wps": "9870.5", "ups": "3.13", "wpb": "3153.9", "bsz": "45.3", "num_updates": "325200", "lr": "1.89367e-05", "gnorm": "0.941", "loss_scale": "2", "train_wall": "35", "gb_free": "13.1", "wall": "61424"} [2023-11-02 10:39:13,476][train_inner][INFO] - {"epoch": 81, "update": 80.232, "loss": "2.764", "ntokens": "3169.72", "nsentences": "42.68", "prob_perplexity": "209.88", "code_perplexity": "207.199", "temp": "0.5", "loss_0": "2.661", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.5512", "wps": "17650.5", "ups": "5.57", "wpb": "3169.7", "bsz": "42.7", "num_updates": "325400", "lr": "1.88861e-05", "gnorm": "0.939", "loss_scale": "2", "train_wall": "35", "gb_free": "12.9", "wall": "61460"} [2023-11-02 10:39:49,383][train_inner][INFO] - {"epoch": 81, "update": 80.281, "loss": "2.798", "ntokens": "3170.56", "nsentences": "42.2", "prob_perplexity": "209.536", "code_perplexity": "206.863", "temp": "0.5", "loss_0": "2.695", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.54543", "wps": "17661.1", "ups": "5.57", "wpb": "3170.6", "bsz": "42.2", "num_updates": "325600", "lr": "1.88354e-05", "gnorm": "0.951", "loss_scale": "2", "train_wall": "35", "gb_free": "14.7", "wall": "61496"} [2023-11-02 10:40:25,615][train_inner][INFO] - {"epoch": 81, "update": 80.33, "loss": "2.738", "ntokens": "3236.16", "nsentences": "44.84", "prob_perplexity": "210.282", "code_perplexity": "207.721", "temp": "0.5", "loss_0": "2.635", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55612", "wps": "17864.7", "ups": "5.52", "wpb": "3236.2", "bsz": "44.8", "num_updates": "325800", "lr": "1.87848e-05", "gnorm": "0.931", "loss_scale": "2", "train_wall": "36", "gb_free": "13.5", "wall": "61532"} [2023-11-02 10:41:01,795][train_inner][INFO] - {"epoch": 81, "update": 80.38, "loss": "2.825", "ntokens": "3192.8", "nsentences": "41.08", "prob_perplexity": "209.642", "code_perplexity": "207.058", "temp": "0.5", "loss_0": "2.722", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.53887", "wps": "17650.7", "ups": "5.53", "wpb": "3192.8", "bsz": "41.1", "num_updates": "326000", "lr": "1.87342e-05", "gnorm": "0.947", "loss_scale": "2", "train_wall": "36", "gb_free": "13.9", "wall": "61568"} [2023-11-02 10:41:38,933][train_inner][INFO] - {"epoch": 81, "update": 80.429, "loss": "2.789", "ntokens": "3229.48", "nsentences": "44.12", "prob_perplexity": "209.526", "code_perplexity": "206.884", "temp": "0.5", "loss_0": "2.686", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.54771", "wps": "17392.7", "ups": "5.39", "wpb": "3229.5", "bsz": "44.1", "num_updates": "326200", "lr": "1.86835e-05", "gnorm": "0.936", "loss_scale": "2", "train_wall": "37", "gb_free": "12.4", "wall": "61605"} [2023-11-02 10:42:15,298][train_inner][INFO] - {"epoch": 81, "update": 80.478, "loss": "2.744", "ntokens": "3212.4", "nsentences": "43.96", "prob_perplexity": "210.561", "code_perplexity": "207.962", "temp": "0.5", "loss_0": "2.642", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55486", "wps": "17668.6", "ups": "5.5", "wpb": "3212.4", "bsz": "44", "num_updates": "326400", "lr": "1.86329e-05", "gnorm": "0.928", "loss_scale": "2", "train_wall": "36", "gb_free": "13.2", "wall": "61642"} [2023-11-02 10:42:51,776][train_inner][INFO] - {"epoch": 81, "update": 80.528, "loss": "2.76", "ntokens": "3221", "nsentences": "44.84", "prob_perplexity": "210.081", "code_perplexity": "207.501", "temp": "0.5", "loss_0": "2.657", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55382", "wps": "17660.9", "ups": "5.48", "wpb": "3221", "bsz": "44.8", "num_updates": "326600", "lr": "1.85823e-05", "gnorm": "0.927", "loss_scale": "2", "train_wall": "36", "gb_free": "13.5", "wall": "61678"} [2023-11-02 10:43:27,340][train_inner][INFO] - {"epoch": 81, "update": 80.577, "loss": "2.681", "ntokens": "3160.84", "nsentences": "45.8", "prob_perplexity": "209.77", "code_perplexity": "207.081", "temp": "0.5", "loss_0": "2.578", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.56848", "wps": "17776.6", "ups": "5.62", "wpb": "3160.8", "bsz": "45.8", "num_updates": "326800", "lr": "1.85316e-05", "gnorm": "0.924", "loss_scale": "2", "train_wall": "35", "gb_free": "13.1", "wall": "61714"} [2023-11-02 10:44:03,437][train_inner][INFO] - {"epoch": 81, "update": 80.626, "loss": "2.731", "ntokens": "3151.88", "nsentences": "44.68", "prob_perplexity": "209.918", "code_perplexity": "207.252", "temp": "0.5", "loss_0": "2.628", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55881", "wps": "17464.7", "ups": "5.54", "wpb": "3151.9", "bsz": "44.7", "num_updates": "327000", "lr": "1.8481e-05", "gnorm": "0.942", "loss_scale": "2", "train_wall": "35", "gb_free": "13.8", "wall": "61750"} [2023-11-02 10:44:40,070][train_inner][INFO] - {"epoch": 81, "update": 80.676, "loss": "2.772", "ntokens": "3192.2", "nsentences": "43.2", "prob_perplexity": "208.431", "code_perplexity": "205.843", "temp": "0.5", "loss_0": "2.669", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.5497", "wps": "17436.3", "ups": "5.46", "wpb": "3192.2", "bsz": "43.2", "num_updates": "327200", "lr": "1.84304e-05", "gnorm": "0.948", "loss_scale": "2", "train_wall": "36", "gb_free": "13.5", "wall": "61786"} [2023-11-02 10:45:16,815][train_inner][INFO] - {"epoch": 81, "update": 80.725, "loss": "2.755", "ntokens": "3203.16", "nsentences": "44.92", "prob_perplexity": "209.284", "code_perplexity": "206.635", "temp": "0.5", "loss_0": "2.652", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55466", "wps": "17435.6", "ups": "5.44", "wpb": "3203.2", "bsz": "44.9", "num_updates": "327400", "lr": "1.83797e-05", "gnorm": "0.935", "loss_scale": "2", "train_wall": "36", "gb_free": "13.4", "wall": "61823"} [2023-11-02 10:45:52,843][train_inner][INFO] - {"epoch": 81, "update": 80.774, "loss": "2.69", "ntokens": "3160.8", "nsentences": "44.48", "prob_perplexity": "209.275", "code_perplexity": "206.688", "temp": "0.5", "loss_0": "2.587", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.56389", "wps": "17547.3", "ups": "5.55", "wpb": "3160.8", "bsz": "44.5", "num_updates": "327600", "lr": "1.83291e-05", "gnorm": "0.949", "loss_scale": "2", "train_wall": "35", "gb_free": "14.5", "wall": "61859"} [2023-11-02 10:46:29,404][train_inner][INFO] - {"epoch": 81, "update": 80.823, "loss": "2.738", "ntokens": "3175.76", "nsentences": "46.28", "prob_perplexity": "209.997", "code_perplexity": "207.308", "temp": "0.5", "loss_0": "2.635", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55907", "wps": "17373.3", "ups": "5.47", "wpb": "3175.8", "bsz": "46.3", "num_updates": "327800", "lr": "1.82785e-05", "gnorm": "0.933", "loss_scale": "2", "train_wall": "36", "gb_free": "13.1", "wall": "61896"} [2023-11-02 10:47:05,736][train_inner][INFO] - {"epoch": 81, "update": 80.873, "loss": "2.697", "ntokens": "3157.6", "nsentences": "45.32", "prob_perplexity": "209.459", "code_perplexity": "206.795", "temp": "0.5", "loss_0": "2.594", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.5641", "wps": "17382.9", "ups": "5.51", "wpb": "3157.6", "bsz": "45.3", "num_updates": "328000", "lr": "1.82278e-05", "gnorm": "0.958", "loss_scale": "2", "train_wall": "36", "gb_free": "13.6", "wall": "61932"} [2023-11-02 10:47:42,647][train_inner][INFO] - {"epoch": 81, "update": 80.922, "loss": "2.788", "ntokens": "3189.68", "nsentences": "43.16", "prob_perplexity": "208.956", "code_perplexity": "206.332", "temp": "0.5", "loss_0": "2.685", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.54699", "wps": "17284", "ups": "5.42", "wpb": "3189.7", "bsz": "43.2", "num_updates": "328200", "lr": "1.81772e-05", "gnorm": "0.936", "loss_scale": "2", "train_wall": "36", "gb_free": "15.5", "wall": "61969"} [2023-11-02 10:48:19,496][train_inner][INFO] - {"epoch": 81, "update": 80.971, "loss": "2.787", "ntokens": "3231.92", "nsentences": "44", "prob_perplexity": "209.599", "code_perplexity": "207.031", "temp": "0.5", "loss_0": "2.684", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.54779", "wps": "17542.5", "ups": "5.43", "wpb": "3231.9", "bsz": "44", "num_updates": "328400", "lr": "1.81266e-05", "gnorm": "0.942", "loss_scale": "2", "train_wall": "36", "gb_free": "12.8", "wall": "62006"} [2023-11-02 10:48:40,270][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 10:48:40,272][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 10:48:40,289][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 95 [2023-11-02 10:49:06,049][valid][INFO] - {"epoch": 81, "valid_loss": "2.645", "valid_ntokens": "3160.85", "valid_nsentences": "44.1685", "valid_prob_perplexity": "209.324", "valid_code_perplexity": "206.813", "valid_temp": "0.5", "valid_loss_0": "2.542", "valid_loss_1": "0.097", "valid_loss_2": "0.006", "valid_accuracy": "0.57646", "valid_wps": "55738.5", "valid_wpb": "3160.9", "valid_bsz": "44.2", "valid_num_updates": "328516", "valid_best_loss": "2.628"} [2023-11-02 10:49:06,051][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 81 @ 328516 updates [2023-11-02 10:49:06,053][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 10:49:07,481][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 10:49:07,528][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_last.pt (epoch 81 @ 328516 updates, score 2.645) (writing took 1.476599350105971 seconds) [2023-11-02 10:49:07,529][fairseq_cli.train][INFO] - end of epoch 81 (average epoch stats below) [2023-11-02 10:49:07,531][train][INFO] - {"epoch": 81, "train_loss": "2.748", "train_ntokens": "3186.4", "train_nsentences": "44.2682", "train_prob_perplexity": "209.601", "train_code_perplexity": "206.979", "train_temp": "0.5", "train_loss_0": "2.645", "train_loss_1": "0.097", "train_loss_2": "0.006", "train_accuracy": "0.55498", "train_wps": "16339.4", "train_ups": "5.13", "train_wpb": "3186.4", "train_bsz": "44.3", "train_num_updates": "328516", "train_lr": "1.80972e-05", "train_gnorm": "0.94", "train_loss_scale": "2", "train_train_wall": "722", "train_gb_free": "15.8", "train_wall": "62054"} [2023-11-02 10:49:07,533][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 10:49:07,551][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 82 [2023-11-02 10:49:07,722][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 10:49:07,783][fairseq.trainer][INFO] - begin training epoch 82 [2023-11-02 10:49:07,783][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 10:49:23,404][train_inner][INFO] - {"epoch": 82, "update": 81.021, "loss": "2.649", "ntokens": "3149.6", "nsentences": "47", "prob_perplexity": "209.766", "code_perplexity": "207.103", "temp": "0.5", "loss_0": "2.546", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.57402", "wps": "9857.1", "ups": "3.13", "wpb": "3149.6", "bsz": "47", "num_updates": "328600", "lr": "1.80759e-05", "gnorm": "0.936", "loss_scale": "2", "train_wall": "36", "gb_free": "13.1", "wall": "62070"} [2023-11-02 10:49:59,583][train_inner][INFO] - {"epoch": 82, "update": 81.07, "loss": "2.718", "ntokens": "3184.76", "nsentences": "44.88", "prob_perplexity": "209.582", "code_perplexity": "206.907", "temp": "0.5", "loss_0": "2.615", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.56003", "wps": "17606.4", "ups": "5.53", "wpb": "3184.8", "bsz": "44.9", "num_updates": "328800", "lr": "1.80253e-05", "gnorm": "0.933", "loss_scale": "2", "train_wall": "36", "gb_free": "13.5", "wall": "62106"} [2023-11-02 10:50:35,777][train_inner][INFO] - {"epoch": 82, "update": 81.119, "loss": "2.755", "ntokens": "3189.16", "nsentences": "43.44", "prob_perplexity": "208.714", "code_perplexity": "206.048", "temp": "0.5", "loss_0": "2.652", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55313", "wps": "17623.7", "ups": "5.53", "wpb": "3189.2", "bsz": "43.4", "num_updates": "329000", "lr": "1.79747e-05", "gnorm": "0.946", "loss_scale": "2", "train_wall": "36", "gb_free": "13.7", "wall": "62142"} [2023-11-02 10:51:11,974][train_inner][INFO] - {"epoch": 82, "update": 81.169, "loss": "2.753", "ntokens": "3210.56", "nsentences": "44.96", "prob_perplexity": "209.964", "code_perplexity": "207.325", "temp": "0.5", "loss_0": "2.65", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55507", "wps": "17740.4", "ups": "5.53", "wpb": "3210.6", "bsz": "45", "num_updates": "329200", "lr": "1.79241e-05", "gnorm": "0.932", "loss_scale": "2", "train_wall": "36", "gb_free": "13.7", "wall": "62178"} [2023-11-02 10:51:48,394][train_inner][INFO] - {"epoch": 82, "update": 81.218, "loss": "2.813", "ntokens": "3227.48", "nsentences": "42", "prob_perplexity": "209.711", "code_perplexity": "207.102", "temp": "0.5", "loss_0": "2.71", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.54043", "wps": "17724.8", "ups": "5.49", "wpb": "3227.5", "bsz": "42", "num_updates": "329400", "lr": "1.78734e-05", "gnorm": "0.94", "loss_scale": "2", "train_wall": "36", "gb_free": "13.8", "wall": "62215"} [2023-11-02 10:52:24,512][train_inner][INFO] - {"epoch": 82, "update": 81.267, "loss": "2.744", "ntokens": "3176.6", "nsentences": "43.72", "prob_perplexity": "210.053", "code_perplexity": "207.469", "temp": "0.5", "loss_0": "2.642", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55495", "wps": "17591.1", "ups": "5.54", "wpb": "3176.6", "bsz": "43.7", "num_updates": "329600", "lr": "1.78228e-05", "gnorm": "0.939", "loss_scale": "2", "train_wall": "35", "gb_free": "14.1", "wall": "62251"} [2023-11-02 10:53:01,076][train_inner][INFO] - {"epoch": 82, "update": 81.317, "loss": "2.697", "ntokens": "3205.68", "nsentences": "45.36", "prob_perplexity": "210.686", "code_perplexity": "208.067", "temp": "0.5", "loss_0": "2.594", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.5636", "wps": "17535.4", "ups": "5.47", "wpb": "3205.7", "bsz": "45.4", "num_updates": "329800", "lr": "1.77722e-05", "gnorm": "0.932", "loss_scale": "2", "train_wall": "36", "gb_free": "13.7", "wall": "62287"} [2023-11-02 10:53:36,843][train_inner][INFO] - {"epoch": 82, "update": 81.366, "loss": "2.715", "ntokens": "3166.76", "nsentences": "47.08", "prob_perplexity": "210.017", "code_perplexity": "207.388", "temp": "0.5", "loss_0": "2.613", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.56378", "wps": "17708.8", "ups": "5.59", "wpb": "3166.8", "bsz": "47.1", "num_updates": "330000", "lr": "1.77215e-05", "gnorm": "0.944", "loss_scale": "2", "train_wall": "35", "gb_free": "13.5", "wall": "62323"} [2023-11-02 10:54:12,975][train_inner][INFO] - {"epoch": 82, "update": 81.415, "loss": "2.75", "ntokens": "3189.56", "nsentences": "44.4", "prob_perplexity": "210.222", "code_perplexity": "207.634", "temp": "0.5", "loss_0": "2.647", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55541", "wps": "17656.1", "ups": "5.54", "wpb": "3189.6", "bsz": "44.4", "num_updates": "330200", "lr": "1.76709e-05", "gnorm": "0.952", "loss_scale": "2", "train_wall": "35", "gb_free": "13.4", "wall": "62359"} [2023-11-02 10:54:49,188][train_inner][INFO] - {"epoch": 82, "update": 81.464, "loss": "2.72", "ntokens": "3137.76", "nsentences": "44.16", "prob_perplexity": "209.599", "code_perplexity": "206.953", "temp": "0.5", "loss_0": "2.617", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.56047", "wps": "17330.8", "ups": "5.52", "wpb": "3137.8", "bsz": "44.2", "num_updates": "330400", "lr": "1.76203e-05", "gnorm": "0.943", "loss_scale": "2", "train_wall": "36", "gb_free": "13.4", "wall": "62395"} [2023-11-02 10:55:26,037][train_inner][INFO] - {"epoch": 82, "update": 81.514, "loss": "2.722", "ntokens": "3197.48", "nsentences": "45.48", "prob_perplexity": "210.243", "code_perplexity": "207.626", "temp": "0.5", "loss_0": "2.619", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55954", "wps": "17355.3", "ups": "5.43", "wpb": "3197.5", "bsz": "45.5", "num_updates": "330600", "lr": "1.75696e-05", "gnorm": "0.926", "loss_scale": "2", "train_wall": "36", "gb_free": "13.4", "wall": "62432"} [2023-11-02 10:56:02,412][train_inner][INFO] - {"epoch": 82, "update": 81.563, "loss": "2.792", "ntokens": "3200.76", "nsentences": "43.16", "prob_perplexity": "210.073", "code_perplexity": "207.403", "temp": "0.5", "loss_0": "2.689", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.54677", "wps": "17599.8", "ups": "5.5", "wpb": "3200.8", "bsz": "43.2", "num_updates": "330800", "lr": "1.7519e-05", "gnorm": "0.949", "loss_scale": "2", "train_wall": "36", "gb_free": "13.4", "wall": "62469"} [2023-11-02 10:56:38,630][train_inner][INFO] - {"epoch": 82, "update": 81.612, "loss": "2.733", "ntokens": "3165.84", "nsentences": "44.72", "prob_perplexity": "210.642", "code_perplexity": "208.068", "temp": "0.5", "loss_0": "2.63", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55891", "wps": "17483.1", "ups": "5.52", "wpb": "3165.8", "bsz": "44.7", "num_updates": "331000", "lr": "1.74684e-05", "gnorm": "0.946", "loss_scale": "4", "train_wall": "36", "gb_free": "13.6", "wall": "62505"} [2023-11-02 10:57:14,956][train_inner][INFO] - {"epoch": 82, "update": 81.662, "loss": "2.757", "ntokens": "3214.04", "nsentences": "44.8", "prob_perplexity": "211.1", "code_perplexity": "208.511", "temp": "0.5", "loss_0": "2.655", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55356", "wps": "17696.6", "ups": "5.51", "wpb": "3214", "bsz": "44.8", "num_updates": "331200", "lr": "1.74177e-05", "gnorm": "0.943", "loss_scale": "4", "train_wall": "36", "gb_free": "14.4", "wall": "62541"} [2023-11-02 10:57:27,955][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 2.0 [2023-11-02 10:57:51,597][train_inner][INFO] - {"epoch": 82, "update": 81.711, "loss": "2.767", "ntokens": "3227.16", "nsentences": "44.68", "prob_perplexity": "210.568", "code_perplexity": "207.976", "temp": "0.5", "loss_0": "2.664", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.5507", "wps": "17623.7", "ups": "5.46", "wpb": "3227.2", "bsz": "44.7", "num_updates": "331400", "lr": "1.73671e-05", "gnorm": "0.948", "loss_scale": "2", "train_wall": "36", "gb_free": "13.4", "wall": "62578"} [2023-11-02 10:58:27,483][train_inner][INFO] - {"epoch": 82, "update": 81.761, "loss": "2.721", "ntokens": "3158.44", "nsentences": "45.12", "prob_perplexity": "209.722", "code_perplexity": "207.111", "temp": "0.5", "loss_0": "2.618", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.56091", "wps": "17603.9", "ups": "5.57", "wpb": "3158.4", "bsz": "45.1", "num_updates": "331600", "lr": "1.73165e-05", "gnorm": "0.957", "loss_scale": "2", "train_wall": "35", "gb_free": "13.9", "wall": "62614"} [2023-11-02 10:59:04,240][train_inner][INFO] - {"epoch": 82, "update": 81.81, "loss": "2.781", "ntokens": "3150.08", "nsentences": "42.68", "prob_perplexity": "209.292", "code_perplexity": "206.703", "temp": "0.5", "loss_0": "2.678", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.54793", "wps": "17140.8", "ups": "5.44", "wpb": "3150.1", "bsz": "42.7", "num_updates": "331800", "lr": "1.72658e-05", "gnorm": "0.957", "loss_scale": "2", "train_wall": "36", "gb_free": "14.3", "wall": "62650"} [2023-11-02 10:59:40,906][train_inner][INFO] - {"epoch": 82, "update": 81.859, "loss": "2.737", "ntokens": "3206.12", "nsentences": "44.6", "prob_perplexity": "210.496", "code_perplexity": "207.887", "temp": "0.5", "loss_0": "2.635", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55637", "wps": "17489.6", "ups": "5.46", "wpb": "3206.1", "bsz": "44.6", "num_updates": "332000", "lr": "1.72152e-05", "gnorm": "0.939", "loss_scale": "2", "train_wall": "36", "gb_free": "13.3", "wall": "62687"} [2023-11-02 11:00:17,244][train_inner][INFO] - {"epoch": 82, "update": 81.909, "loss": "2.77", "ntokens": "3179.4", "nsentences": "42.36", "prob_perplexity": "209.959", "code_perplexity": "207.417", "temp": "0.5", "loss_0": "2.667", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.54859", "wps": "17499.7", "ups": "5.5", "wpb": "3179.4", "bsz": "42.4", "num_updates": "332200", "lr": "1.71646e-05", "gnorm": "0.945", "loss_scale": "2", "train_wall": "36", "gb_free": "15", "wall": "62723"} [2023-11-02 11:00:53,716][train_inner][INFO] - {"epoch": 82, "update": 81.958, "loss": "2.762", "ntokens": "3181.28", "nsentences": "41.64", "prob_perplexity": "209.591", "code_perplexity": "207.062", "temp": "0.5", "loss_0": "2.659", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.54941", "wps": "17446.4", "ups": "5.48", "wpb": "3181.3", "bsz": "41.6", "num_updates": "332400", "lr": "1.71139e-05", "gnorm": "0.958", "loss_scale": "2", "train_wall": "36", "gb_free": "14", "wall": "62760"} [2023-11-02 11:01:24,757][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 11:01:24,759][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 11:01:24,779][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 96 [2023-11-02 11:01:50,210][valid][INFO] - {"epoch": 82, "valid_loss": "2.62", "valid_ntokens": "3155.3", "valid_nsentences": "44.1685", "valid_prob_perplexity": "209.307", "valid_code_perplexity": "206.805", "valid_temp": "0.5", "valid_loss_0": "2.517", "valid_loss_1": "0.097", "valid_loss_2": "0.006", "valid_accuracy": "0.5799", "valid_wps": "56295.2", "valid_wpb": "3155.3", "valid_bsz": "44.2", "valid_num_updates": "332571", "valid_best_loss": "2.62"} [2023-11-02 11:01:50,212][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 82 @ 332571 updates [2023-11-02 11:01:50,214][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-02 11:01:51,655][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-02 11:01:52,651][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_best.pt (epoch 82 @ 332571 updates, score 2.62) (writing took 2.4384025167673826 seconds) [2023-11-02 11:01:52,651][fairseq_cli.train][INFO] - end of epoch 82 (average epoch stats below) [2023-11-02 11:01:52,654][train][INFO] - {"epoch": 82, "train_loss": "2.745", "train_ntokens": "3188.15", "train_nsentences": "44.2713", "train_prob_perplexity": "209.988", "train_code_perplexity": "207.376", "train_temp": "0.5", "train_loss_0": "2.643", "train_loss_1": "0.097", "train_loss_2": "0.006", "train_accuracy": "0.5552", "train_wps": "16896.6", "train_ups": "5.3", "train_wpb": "3188.1", "train_bsz": "44.3", "train_num_updates": "332571", "train_lr": "1.70706e-05", "train_gnorm": "0.943", "train_loss_scale": "2", "train_train_wall": "724", "train_gb_free": "12.6", "train_wall": "62819"} [2023-11-02 11:01:52,656][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 11:01:52,676][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 83 [2023-11-02 11:01:52,850][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 11:01:52,909][fairseq.trainer][INFO] - begin training epoch 83 [2023-11-02 11:01:52,910][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 11:01:58,413][train_inner][INFO] - {"epoch": 83, "update": 82.007, "loss": "2.758", "ntokens": "3213.24", "nsentences": "43.8", "prob_perplexity": "209.644", "code_perplexity": "207.015", "temp": "0.5", "loss_0": "2.655", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55251", "wps": "9933.4", "ups": "3.09", "wpb": "3213.2", "bsz": "43.8", "num_updates": "332600", "lr": "1.70633e-05", "gnorm": "0.945", "loss_scale": "2", "train_wall": "36", "gb_free": "12.9", "wall": "62825"} [2023-11-02 11:02:34,210][train_inner][INFO] - {"epoch": 83, "update": 82.056, "loss": "2.732", "ntokens": "3163", "nsentences": "44.8", "prob_perplexity": "210.51", "code_perplexity": "207.898", "temp": "0.5", "loss_0": "2.63", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55845", "wps": "17672.8", "ups": "5.59", "wpb": "3163", "bsz": "44.8", "num_updates": "332800", "lr": "1.70127e-05", "gnorm": "0.943", "loss_scale": "2", "train_wall": "35", "gb_free": "14.3", "wall": "62860"} [2023-11-02 11:03:10,363][train_inner][INFO] - {"epoch": 83, "update": 82.106, "loss": "2.668", "ntokens": "3207.68", "nsentences": "47.68", "prob_perplexity": "211.246", "code_perplexity": "208.58", "temp": "0.5", "loss_0": "2.566", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.57061", "wps": "17746", "ups": "5.53", "wpb": "3207.7", "bsz": "47.7", "num_updates": "333000", "lr": "1.6962e-05", "gnorm": "0.93", "loss_scale": "2", "train_wall": "36", "gb_free": "12.8", "wall": "62897"} [2023-11-02 11:03:46,748][train_inner][INFO] - {"epoch": 83, "update": 82.155, "loss": "2.764", "ntokens": "3221.68", "nsentences": "43", "prob_perplexity": "209.802", "code_perplexity": "207.183", "temp": "0.5", "loss_0": "2.661", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55133", "wps": "17710.2", "ups": "5.5", "wpb": "3221.7", "bsz": "43", "num_updates": "333200", "lr": "1.69114e-05", "gnorm": "0.936", "loss_scale": "2", "train_wall": "36", "gb_free": "14.6", "wall": "62933"} [2023-11-02 11:04:22,632][train_inner][INFO] - {"epoch": 83, "update": 82.204, "loss": "2.771", "ntokens": "3189.2", "nsentences": "43.68", "prob_perplexity": "209.224", "code_perplexity": "206.606", "temp": "0.5", "loss_0": "2.668", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55075", "wps": "17775.7", "ups": "5.57", "wpb": "3189.2", "bsz": "43.7", "num_updates": "333400", "lr": "1.68608e-05", "gnorm": "0.947", "loss_scale": "2", "train_wall": "35", "gb_free": "13.3", "wall": "62969"} [2023-11-02 11:04:58,444][train_inner][INFO] - {"epoch": 83, "update": 82.254, "loss": "2.725", "ntokens": "3166.44", "nsentences": "44.28", "prob_perplexity": "209.567", "code_perplexity": "206.912", "temp": "0.5", "loss_0": "2.622", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55866", "wps": "17684.9", "ups": "5.59", "wpb": "3166.4", "bsz": "44.3", "num_updates": "333600", "lr": "1.68101e-05", "gnorm": "0.961", "loss_scale": "2", "train_wall": "35", "gb_free": "13.6", "wall": "63005"} [2023-11-02 11:05:34,508][train_inner][INFO] - {"epoch": 83, "update": 82.303, "loss": "2.74", "ntokens": "3208.88", "nsentences": "44.8", "prob_perplexity": "209.694", "code_perplexity": "207.117", "temp": "0.5", "loss_0": "2.637", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55564", "wps": "17796.3", "ups": "5.55", "wpb": "3208.9", "bsz": "44.8", "num_updates": "333800", "lr": "1.67595e-05", "gnorm": "0.947", "loss_scale": "2", "train_wall": "35", "gb_free": "13", "wall": "63041"} [2023-11-02 11:06:10,759][train_inner][INFO] - {"epoch": 83, "update": 82.352, "loss": "2.732", "ntokens": "3204.76", "nsentences": "44.8", "prob_perplexity": "210.652", "code_perplexity": "208.053", "temp": "0.5", "loss_0": "2.629", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55845", "wps": "17682.1", "ups": "5.52", "wpb": "3204.8", "bsz": "44.8", "num_updates": "334000", "lr": "1.67089e-05", "gnorm": "0.941", "loss_scale": "2", "train_wall": "36", "gb_free": "13.5", "wall": "63077"} [2023-11-02 11:06:47,050][train_inner][INFO] - {"epoch": 83, "update": 82.402, "loss": "2.763", "ntokens": "3196.08", "nsentences": "44", "prob_perplexity": "211.4", "code_perplexity": "208.76", "temp": "0.5", "loss_0": "2.661", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55166", "wps": "17614.9", "ups": "5.51", "wpb": "3196.1", "bsz": "44", "num_updates": "334200", "lr": "1.66582e-05", "gnorm": "0.938", "loss_scale": "2", "train_wall": "36", "gb_free": "14.1", "wall": "63113"} [2023-11-02 11:07:23,560][train_inner][INFO] - {"epoch": 83, "update": 82.451, "loss": "2.759", "ntokens": "3203.6", "nsentences": "43.2", "prob_perplexity": "211.31", "code_perplexity": "208.752", "temp": "0.5", "loss_0": "2.657", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55248", "wps": "17550.3", "ups": "5.48", "wpb": "3203.6", "bsz": "43.2", "num_updates": "334400", "lr": "1.66076e-05", "gnorm": "0.936", "loss_scale": "2", "train_wall": "36", "gb_free": "15.2", "wall": "63150"} [2023-11-02 11:07:59,679][train_inner][INFO] - {"epoch": 83, "update": 82.5, "loss": "2.737", "ntokens": "3167.04", "nsentences": "43.48", "prob_perplexity": "209.637", "code_perplexity": "207.035", "temp": "0.5", "loss_0": "2.634", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55584", "wps": "17537.7", "ups": "5.54", "wpb": "3167", "bsz": "43.5", "num_updates": "334600", "lr": "1.6557e-05", "gnorm": "0.966", "loss_scale": "2", "train_wall": "35", "gb_free": "12.5", "wall": "63186"} [2023-11-02 11:08:36,377][train_inner][INFO] - {"epoch": 83, "update": 82.55, "loss": "2.766", "ntokens": "3188.44", "nsentences": "44.72", "prob_perplexity": "210.882", "code_perplexity": "208.262", "temp": "0.5", "loss_0": "2.664", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55186", "wps": "17377.6", "ups": "5.45", "wpb": "3188.4", "bsz": "44.7", "num_updates": "334800", "lr": "1.65063e-05", "gnorm": "0.941", "loss_scale": "2", "train_wall": "36", "gb_free": "14.7", "wall": "63223"} [2023-11-02 11:09:12,957][train_inner][INFO] - {"epoch": 83, "update": 82.599, "loss": "2.729", "ntokens": "3228.96", "nsentences": "45.64", "prob_perplexity": "210.67", "code_perplexity": "208.106", "temp": "0.5", "loss_0": "2.626", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55872", "wps": "17655.3", "ups": "5.47", "wpb": "3229", "bsz": "45.6", "num_updates": "335000", "lr": "1.64557e-05", "gnorm": "0.932", "loss_scale": "2", "train_wall": "36", "gb_free": "13.4", "wall": "63259"} [2023-11-02 11:09:48,992][train_inner][INFO] - {"epoch": 83, "update": 82.648, "loss": "2.829", "ntokens": "3221.88", "nsentences": "41.16", "prob_perplexity": "209.827", "code_perplexity": "207.166", "temp": "0.5", "loss_0": "2.726", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.53776", "wps": "17883", "ups": "5.55", "wpb": "3221.9", "bsz": "41.2", "num_updates": "335200", "lr": "1.64051e-05", "gnorm": "0.946", "loss_scale": "2", "train_wall": "35", "gb_free": "14.1", "wall": "63295"} [2023-11-02 11:10:24,922][train_inner][INFO] - {"epoch": 83, "update": 82.697, "loss": "2.782", "ntokens": "3181.44", "nsentences": "44.2", "prob_perplexity": "210.139", "code_perplexity": "207.477", "temp": "0.5", "loss_0": "2.68", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.5494", "wps": "17709.9", "ups": "5.57", "wpb": "3181.4", "bsz": "44.2", "num_updates": "335400", "lr": "1.63544e-05", "gnorm": "0.943", "loss_scale": "2", "train_wall": "35", "gb_free": "14", "wall": "63331"} [2023-11-02 11:11:01,852][train_inner][INFO] - {"epoch": 83, "update": 82.747, "loss": "2.731", "ntokens": "3248.16", "nsentences": "45.44", "prob_perplexity": "210.355", "code_perplexity": "207.75", "temp": "0.5", "loss_0": "2.629", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55796", "wps": "17592", "ups": "5.42", "wpb": "3248.2", "bsz": "45.4", "num_updates": "335600", "lr": "1.63038e-05", "gnorm": "0.933", "loss_scale": "2", "train_wall": "36", "gb_free": "14.5", "wall": "63368"} [2023-11-02 11:11:37,949][train_inner][INFO] - {"epoch": 83, "update": 82.796, "loss": "2.688", "ntokens": "3184.6", "nsentences": "46.44", "prob_perplexity": "209.905", "code_perplexity": "207.287", "temp": "0.5", "loss_0": "2.585", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.56729", "wps": "17653.6", "ups": "5.54", "wpb": "3184.6", "bsz": "46.4", "num_updates": "335800", "lr": "1.62532e-05", "gnorm": "0.941", "loss_scale": "2", "train_wall": "35", "gb_free": "13.7", "wall": "63404"} [2023-11-02 11:12:14,265][train_inner][INFO] - {"epoch": 83, "update": 82.845, "loss": "2.759", "ntokens": "3150.52", "nsentences": "43.24", "prob_perplexity": "211.123", "code_perplexity": "208.502", "temp": "0.5", "loss_0": "2.657", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55211", "wps": "17351.9", "ups": "5.51", "wpb": "3150.5", "bsz": "43.2", "num_updates": "336000", "lr": "1.62025e-05", "gnorm": "0.964", "loss_scale": "2", "train_wall": "36", "gb_free": "12.2", "wall": "63440"} [2023-11-02 11:12:50,421][train_inner][INFO] - {"epoch": 83, "update": 82.895, "loss": "2.746", "ntokens": "3167.44", "nsentences": "44.4", "prob_perplexity": "210.306", "code_perplexity": "207.668", "temp": "0.5", "loss_0": "2.644", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55479", "wps": "17522", "ups": "5.53", "wpb": "3167.4", "bsz": "44.4", "num_updates": "336200", "lr": "1.61519e-05", "gnorm": "0.953", "loss_scale": "2", "train_wall": "36", "gb_free": "12.8", "wall": "63477"} [2023-11-02 11:13:26,543][train_inner][INFO] - {"epoch": 83, "update": 82.944, "loss": "2.793", "ntokens": "3167.24", "nsentences": "40.52", "prob_perplexity": "210.977", "code_perplexity": "208.358", "temp": "0.5", "loss_0": "2.691", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.54468", "wps": "17537", "ups": "5.54", "wpb": "3167.2", "bsz": "40.5", "num_updates": "336400", "lr": "1.61013e-05", "gnorm": "0.958", "loss_scale": "2", "train_wall": "35", "gb_free": "12.8", "wall": "63513"} [2023-11-02 11:14:02,916][train_inner][INFO] - {"epoch": 83, "update": 82.993, "loss": "2.676", "ntokens": "3160.12", "nsentences": "47.24", "prob_perplexity": "211.604", "code_perplexity": "209.002", "temp": "0.5", "loss_0": "2.574", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.5698", "wps": "17377.3", "ups": "5.5", "wpb": "3160.1", "bsz": "47.2", "num_updates": "336600", "lr": "1.60506e-05", "gnorm": "0.939", "loss_scale": "2", "train_wall": "36", "gb_free": "13.4", "wall": "63549"} [2023-11-02 11:14:07,705][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 11:14:07,707][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 11:14:07,724][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 97 [2023-11-02 11:14:33,208][valid][INFO] - {"epoch": 83, "valid_loss": "2.636", "valid_ntokens": "3163.67", "valid_nsentences": "44.1685", "valid_prob_perplexity": "210.717", "valid_code_perplexity": "208.196", "valid_temp": "0.5", "valid_loss_0": "2.534", "valid_loss_1": "0.097", "valid_loss_2": "0.006", "valid_accuracy": "0.57708", "valid_wps": "56343.6", "valid_wpb": "3163.7", "valid_bsz": "44.2", "valid_num_updates": "336627", "valid_best_loss": "2.62"} [2023-11-02 11:14:33,210][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 83 @ 336627 updates [2023-11-02 11:14:33,212][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 11:14:34,623][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 11:14:34,675][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_last.pt (epoch 83 @ 336627 updates, score 2.636) (writing took 1.465474006254226 seconds) [2023-11-02 11:14:34,676][fairseq_cli.train][INFO] - end of epoch 83 (average epoch stats below) [2023-11-02 11:14:34,678][train][INFO] - {"epoch": 83, "train_loss": "2.746", "train_ntokens": "3191.6", "train_nsentences": "44.2682", "train_prob_perplexity": "210.44", "train_code_perplexity": "207.822", "train_temp": "0.5", "train_loss_0": "2.644", "train_loss_1": "0.097", "train_loss_2": "0.006", "train_accuracy": "0.55507", "train_wps": "16987.9", "train_ups": "5.32", "train_wpb": "3191.6", "train_bsz": "44.3", "train_num_updates": "336627", "train_lr": "1.60438e-05", "train_gnorm": "0.945", "train_loss_scale": "2", "train_train_wall": "722", "train_gb_free": "13.7", "train_wall": "63581"} [2023-11-02 11:14:34,680][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 11:14:34,699][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 84 [2023-11-02 11:14:34,864][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 11:14:34,924][fairseq.trainer][INFO] - begin training epoch 84 [2023-11-02 11:14:34,925][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 11:15:05,619][train_inner][INFO] - {"epoch": 84, "update": 83.043, "loss": "2.803", "ntokens": "3191.4", "nsentences": "42.72", "prob_perplexity": "210.959", "code_perplexity": "208.28", "temp": "0.5", "loss_0": "2.7", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.54602", "wps": "10179.8", "ups": "3.19", "wpb": "3191.4", "bsz": "42.7", "num_updates": "336800", "lr": "1.6e-05", "gnorm": "0.949", "loss_scale": "2", "train_wall": "35", "gb_free": "14.8", "wall": "63612"} [2023-11-02 11:15:41,531][train_inner][INFO] - {"epoch": 84, "update": 83.092, "loss": "2.741", "ntokens": "3200.56", "nsentences": "43.28", "prob_perplexity": "210.598", "code_perplexity": "207.953", "temp": "0.5", "loss_0": "2.639", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55442", "wps": "17825.3", "ups": "5.57", "wpb": "3200.6", "bsz": "43.3", "num_updates": "337000", "lr": "1.59494e-05", "gnorm": "0.935", "loss_scale": "2", "train_wall": "35", "gb_free": "14", "wall": "63648"} [2023-11-02 11:16:17,696][train_inner][INFO] - {"epoch": 84, "update": 83.141, "loss": "2.729", "ntokens": "3215.28", "nsentences": "45.48", "prob_perplexity": "211.141", "code_perplexity": "208.565", "temp": "0.5", "loss_0": "2.627", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.5586", "wps": "17782.3", "ups": "5.53", "wpb": "3215.3", "bsz": "45.5", "num_updates": "337200", "lr": "1.58987e-05", "gnorm": "0.941", "loss_scale": "2", "train_wall": "36", "gb_free": "14.5", "wall": "63684"} [2023-11-02 11:16:53,849][train_inner][INFO] - {"epoch": 84, "update": 83.191, "loss": "2.738", "ntokens": "3163.4", "nsentences": "44.6", "prob_perplexity": "211.021", "code_perplexity": "208.33", "temp": "0.5", "loss_0": "2.635", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55722", "wps": "17501.3", "ups": "5.53", "wpb": "3163.4", "bsz": "44.6", "num_updates": "337400", "lr": "1.58481e-05", "gnorm": "0.949", "loss_scale": "2", "train_wall": "35", "gb_free": "14.3", "wall": "63720"} [2023-11-02 11:17:30,211][train_inner][INFO] - {"epoch": 84, "update": 83.24, "loss": "2.765", "ntokens": "3221.04", "nsentences": "44.48", "prob_perplexity": "210.293", "code_perplexity": "207.675", "temp": "0.5", "loss_0": "2.662", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55176", "wps": "17717.7", "ups": "5.5", "wpb": "3221", "bsz": "44.5", "num_updates": "337600", "lr": "1.57975e-05", "gnorm": "0.94", "loss_scale": "2", "train_wall": "36", "gb_free": "14.6", "wall": "63756"} [2023-11-02 11:18:06,959][train_inner][INFO] - {"epoch": 84, "update": 83.289, "loss": "2.795", "ntokens": "3189.88", "nsentences": "43.32", "prob_perplexity": "210.437", "code_perplexity": "207.788", "temp": "0.5", "loss_0": "2.692", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.54584", "wps": "17362", "ups": "5.44", "wpb": "3189.9", "bsz": "43.3", "num_updates": "337800", "lr": "1.57468e-05", "gnorm": "0.945", "loss_scale": "2", "train_wall": "36", "gb_free": "13.2", "wall": "63793"} [2023-11-02 11:18:43,428][train_inner][INFO] - {"epoch": 84, "update": 83.339, "loss": "2.779", "ntokens": "3202.08", "nsentences": "42.92", "prob_perplexity": "210.696", "code_perplexity": "208.071", "temp": "0.5", "loss_0": "2.677", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.5486", "wps": "17561.6", "ups": "5.48", "wpb": "3202.1", "bsz": "42.9", "num_updates": "338000", "lr": "1.56962e-05", "gnorm": "0.949", "loss_scale": "2", "train_wall": "36", "gb_free": "13.9", "wall": "63830"} [2023-11-02 11:19:19,748][train_inner][INFO] - {"epoch": 84, "update": 83.388, "loss": "2.744", "ntokens": "3194.52", "nsentences": "44.64", "prob_perplexity": "212.028", "code_perplexity": "209.389", "temp": "0.5", "loss_0": "2.642", "loss_1": "0.096", "loss_2": "0.006", "accuracy": "0.55596", "wps": "17592.1", "ups": "5.51", "wpb": "3194.5", "bsz": "44.6", "num_updates": "338200", "lr": "1.56456e-05", "gnorm": "0.946", "loss_scale": "2", "train_wall": "36", "gb_free": "13.7", "wall": "63866"} [2023-11-02 11:19:55,576][train_inner][INFO] - {"epoch": 84, "update": 83.437, "loss": "2.655", "ntokens": "3130.28", "nsentences": "47.28", "prob_perplexity": "211.132", "code_perplexity": "208.516", "temp": "0.5", "loss_0": "2.552", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.57399", "wps": "17474.8", "ups": "5.58", "wpb": "3130.3", "bsz": "47.3", "num_updates": "338400", "lr": "1.55949e-05", "gnorm": "0.949", "loss_scale": "2", "train_wall": "35", "gb_free": "14.1", "wall": "63902"} [2023-11-02 11:20:31,697][train_inner][INFO] - {"epoch": 84, "update": 83.486, "loss": "2.718", "ntokens": "3177.68", "nsentences": "44.4", "prob_perplexity": "209.976", "code_perplexity": "207.423", "temp": "0.5", "loss_0": "2.616", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55952", "wps": "17595.7", "ups": "5.54", "wpb": "3177.7", "bsz": "44.4", "num_updates": "338600", "lr": "1.55443e-05", "gnorm": "0.952", "loss_scale": "2", "train_wall": "35", "gb_free": "12.6", "wall": "63938"} [2023-11-02 11:21:08,236][train_inner][INFO] - {"epoch": 84, "update": 83.536, "loss": "2.71", "ntokens": "3204.16", "nsentences": "45.36", "prob_perplexity": "211.243", "code_perplexity": "208.654", "temp": "0.5", "loss_0": "2.607", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.56156", "wps": "17539.4", "ups": "5.47", "wpb": "3204.2", "bsz": "45.4", "num_updates": "338800", "lr": "1.54937e-05", "gnorm": "0.949", "loss_scale": "2", "train_wall": "36", "gb_free": "13.1", "wall": "63974"} [2023-11-02 11:21:44,991][train_inner][INFO] - {"epoch": 84, "update": 83.585, "loss": "2.773", "ntokens": "3196.44", "nsentences": "43.24", "prob_perplexity": "211.431", "code_perplexity": "208.849", "temp": "0.5", "loss_0": "2.67", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55", "wps": "17394.2", "ups": "5.44", "wpb": "3196.4", "bsz": "43.2", "num_updates": "339000", "lr": "1.5443e-05", "gnorm": "0.954", "loss_scale": "2", "train_wall": "36", "gb_free": "13.5", "wall": "64011"} [2023-11-02 11:22:21,214][train_inner][INFO] - {"epoch": 84, "update": 83.634, "loss": "2.75", "ntokens": "3205.68", "nsentences": "44.32", "prob_perplexity": "211.373", "code_perplexity": "208.777", "temp": "0.5", "loss_0": "2.647", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55486", "wps": "17701", "ups": "5.52", "wpb": "3205.7", "bsz": "44.3", "num_updates": "339200", "lr": "1.53924e-05", "gnorm": "0.943", "loss_scale": "2", "train_wall": "36", "gb_free": "12.9", "wall": "64047"} [2023-11-02 11:22:57,392][train_inner][INFO] - {"epoch": 84, "update": 83.684, "loss": "2.735", "ntokens": "3198", "nsentences": "43.88", "prob_perplexity": "210.688", "code_perplexity": "208.12", "temp": "0.5", "loss_0": "2.632", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.5556", "wps": "17679.8", "ups": "5.53", "wpb": "3198", "bsz": "43.9", "num_updates": "339400", "lr": "1.53418e-05", "gnorm": "0.962", "loss_scale": "2", "train_wall": "36", "gb_free": "13.8", "wall": "64084"} [2023-11-02 11:23:33,706][train_inner][INFO] - {"epoch": 84, "update": 83.733, "loss": "2.712", "ntokens": "3198.24", "nsentences": "45.84", "prob_perplexity": "210.992", "code_perplexity": "208.385", "temp": "0.5", "loss_0": "2.61", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.56246", "wps": "17615.5", "ups": "5.51", "wpb": "3198.2", "bsz": "45.8", "num_updates": "339600", "lr": "1.52911e-05", "gnorm": "0.953", "loss_scale": "2", "train_wall": "36", "gb_free": "13.3", "wall": "64120"} [2023-11-02 11:24:11,097][train_inner][INFO] - {"epoch": 84, "update": 83.782, "loss": "2.727", "ntokens": "3197.56", "nsentences": "45.4", "prob_perplexity": "212.143", "code_perplexity": "209.542", "temp": "0.5", "loss_0": "2.625", "loss_1": "0.096", "loss_2": "0.006", "accuracy": "0.55997", "wps": "17104.1", "ups": "5.35", "wpb": "3197.6", "bsz": "45.4", "num_updates": "339800", "lr": "1.52405e-05", "gnorm": "0.939", "loss_scale": "2", "train_wall": "37", "gb_free": "13.5", "wall": "64157"} [2023-11-02 11:24:47,532][train_inner][INFO] - {"epoch": 84, "update": 83.832, "loss": "2.769", "ntokens": "3218.24", "nsentences": "42.76", "prob_perplexity": "210.887", "code_perplexity": "208.208", "temp": "0.5", "loss_0": "2.666", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.54998", "wps": "17666.7", "ups": "5.49", "wpb": "3218.2", "bsz": "42.8", "num_updates": "340000", "lr": "1.51899e-05", "gnorm": "0.948", "loss_scale": "2", "train_wall": "36", "gb_free": "13.2", "wall": "64194"} [2023-11-02 11:25:24,395][train_inner][INFO] - {"epoch": 84, "update": 83.881, "loss": "2.737", "ntokens": "3202.28", "nsentences": "44.08", "prob_perplexity": "212.132", "code_perplexity": "209.555", "temp": "0.5", "loss_0": "2.635", "loss_1": "0.096", "loss_2": "0.006", "accuracy": "0.55572", "wps": "17374.9", "ups": "5.43", "wpb": "3202.3", "bsz": "44.1", "num_updates": "340200", "lr": "1.51392e-05", "gnorm": "0.951", "loss_scale": "2", "train_wall": "36", "gb_free": "14.9", "wall": "64231"} [2023-11-02 11:26:00,641][train_inner][INFO] - {"epoch": 84, "update": 83.93, "loss": "2.691", "ntokens": "3178.04", "nsentences": "45.24", "prob_perplexity": "211.4", "code_perplexity": "208.746", "temp": "0.5", "loss_0": "2.589", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.56472", "wps": "17537.1", "ups": "5.52", "wpb": "3178", "bsz": "45.2", "num_updates": "340400", "lr": "1.50886e-05", "gnorm": "0.958", "loss_scale": "2", "train_wall": "36", "gb_free": "13.5", "wall": "64267"} [2023-11-02 11:26:36,703][train_inner][INFO] - {"epoch": 84, "update": 83.98, "loss": "2.736", "ntokens": "3175.12", "nsentences": "42.96", "prob_perplexity": "210.68", "code_perplexity": "208.051", "temp": "0.5", "loss_0": "2.633", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.5553", "wps": "17610.5", "ups": "5.55", "wpb": "3175.1", "bsz": "43", "num_updates": "340600", "lr": "1.5038e-05", "gnorm": "0.96", "loss_scale": "2", "train_wall": "35", "gb_free": "14", "wall": "64303"} [2023-11-02 11:26:51,690][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 11:26:51,692][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 11:26:51,710][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 98 [2023-11-02 11:27:17,383][valid][INFO] - {"epoch": 84, "valid_loss": "2.628", "valid_ntokens": "3168.5", "valid_nsentences": "44.1685", "valid_prob_perplexity": "210.264", "valid_code_perplexity": "207.736", "valid_temp": "0.5", "valid_loss_0": "2.526", "valid_loss_1": "0.097", "valid_loss_2": "0.006", "valid_accuracy": "0.5783", "valid_wps": "56037.4", "valid_wpb": "3168.5", "valid_bsz": "44.2", "valid_num_updates": "340683", "valid_best_loss": "2.62"} [2023-11-02 11:27:17,385][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 84 @ 340683 updates [2023-11-02 11:27:17,387][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 11:27:18,800][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 11:27:18,868][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_last.pt (epoch 84 @ 340683 updates, score 2.628) (writing took 1.4826455740258098 seconds) [2023-11-02 11:27:18,869][fairseq_cli.train][INFO] - end of epoch 84 (average epoch stats below) [2023-11-02 11:27:18,871][train][INFO] - {"epoch": 84, "train_loss": "2.741", "train_ntokens": "3193.61", "train_nsentences": "44.2682", "train_prob_perplexity": "211.081", "train_code_perplexity": "208.463", "train_temp": "0.5", "train_loss_0": "2.639", "train_loss_1": "0.097", "train_loss_2": "0.006", "train_accuracy": "0.5559", "train_wps": "16950.3", "train_ups": "5.31", "train_wpb": "3193.6", "train_bsz": "44.3", "train_num_updates": "340683", "train_lr": "1.5017e-05", "train_gnorm": "0.949", "train_loss_scale": "2", "train_train_wall": "724", "train_gb_free": "13.8", "train_wall": "64345"} [2023-11-02 11:27:18,873][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 11:27:18,892][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 85 [2023-11-02 11:27:19,063][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 11:27:19,127][fairseq.trainer][INFO] - begin training epoch 85 [2023-11-02 11:27:19,128][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 11:27:40,214][train_inner][INFO] - {"epoch": 85, "update": 84.029, "loss": "2.803", "ntokens": "3195.48", "nsentences": "41.44", "prob_perplexity": "211.306", "code_perplexity": "208.683", "temp": "0.5", "loss_0": "2.701", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.54257", "wps": "10063.1", "ups": "3.15", "wpb": "3195.5", "bsz": "41.4", "num_updates": "340800", "lr": "1.49873e-05", "gnorm": "0.967", "loss_scale": "2", "train_wall": "35", "gb_free": "14.2", "wall": "64366"} [2023-11-02 11:28:16,075][train_inner][INFO] - {"epoch": 85, "update": 84.078, "loss": "2.721", "ntokens": "3179.28", "nsentences": "44.56", "prob_perplexity": "211.299", "code_perplexity": "208.656", "temp": "0.5", "loss_0": "2.618", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55985", "wps": "17732.2", "ups": "5.58", "wpb": "3179.3", "bsz": "44.6", "num_updates": "341000", "lr": "1.49367e-05", "gnorm": "0.952", "loss_scale": "2", "train_wall": "35", "gb_free": "13.2", "wall": "64402"} [2023-11-02 11:28:52,131][train_inner][INFO] - {"epoch": 85, "update": 84.127, "loss": "2.779", "ntokens": "3226.88", "nsentences": "42.68", "prob_perplexity": "211.451", "code_perplexity": "208.799", "temp": "0.5", "loss_0": "2.677", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.54762", "wps": "17900.1", "ups": "5.55", "wpb": "3226.9", "bsz": "42.7", "num_updates": "341200", "lr": "1.48861e-05", "gnorm": "0.946", "loss_scale": "2", "train_wall": "35", "gb_free": "14.4", "wall": "64438"} [2023-11-02 11:29:27,661][train_inner][INFO] - {"epoch": 85, "update": 84.177, "loss": "2.69", "ntokens": "3158.2", "nsentences": "46.6", "prob_perplexity": "212.201", "code_perplexity": "209.613", "temp": "0.5", "loss_0": "2.588", "loss_1": "0.096", "loss_2": "0.006", "accuracy": "0.56777", "wps": "17779.1", "ups": "5.63", "wpb": "3158.2", "bsz": "46.6", "num_updates": "341400", "lr": "1.48354e-05", "gnorm": "0.944", "loss_scale": "2", "train_wall": "35", "gb_free": "13.6", "wall": "64474"} [2023-11-02 11:30:03,606][train_inner][INFO] - {"epoch": 85, "update": 84.226, "loss": "2.753", "ntokens": "3187.28", "nsentences": "43.4", "prob_perplexity": "211.069", "code_perplexity": "208.414", "temp": "0.5", "loss_0": "2.65", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55291", "wps": "17735.4", "ups": "5.56", "wpb": "3187.3", "bsz": "43.4", "num_updates": "341600", "lr": "1.47848e-05", "gnorm": "0.96", "loss_scale": "2", "train_wall": "35", "gb_free": "14.7", "wall": "64510"} [2023-11-02 11:30:39,273][train_inner][INFO] - {"epoch": 85, "update": 84.275, "loss": "2.72", "ntokens": "3138.76", "nsentences": "43.72", "prob_perplexity": "210.67", "code_perplexity": "208.004", "temp": "0.5", "loss_0": "2.617", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.56017", "wps": "17601.2", "ups": "5.61", "wpb": "3138.8", "bsz": "43.7", "num_updates": "341800", "lr": "1.47342e-05", "gnorm": "0.958", "loss_scale": "2", "train_wall": "35", "gb_free": "13.1", "wall": "64546"} [2023-11-02 11:31:15,655][train_inner][INFO] - {"epoch": 85, "update": 84.325, "loss": "2.788", "ntokens": "3160.8", "nsentences": "41", "prob_perplexity": "211.147", "code_perplexity": "208.514", "temp": "0.5", "loss_0": "2.686", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.54495", "wps": "17376.7", "ups": "5.5", "wpb": "3160.8", "bsz": "41", "num_updates": "342000", "lr": "1.46835e-05", "gnorm": "0.968", "loss_scale": "2", "train_wall": "36", "gb_free": "15.8", "wall": "64582"} [2023-11-02 11:31:51,649][train_inner][INFO] - {"epoch": 85, "update": 84.374, "loss": "2.762", "ntokens": "3228.96", "nsentences": "44.72", "prob_perplexity": "210.777", "code_perplexity": "208.213", "temp": "0.5", "loss_0": "2.66", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55264", "wps": "17942.6", "ups": "5.56", "wpb": "3229", "bsz": "44.7", "num_updates": "342200", "lr": "1.46329e-05", "gnorm": "0.954", "loss_scale": "2", "train_wall": "35", "gb_free": "15.7", "wall": "64618"} [2023-11-02 11:32:28,444][train_inner][INFO] - {"epoch": 85, "update": 84.423, "loss": "2.691", "ntokens": "3185.64", "nsentences": "45.48", "prob_perplexity": "211.429", "code_perplexity": "208.838", "temp": "0.5", "loss_0": "2.589", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.56469", "wps": "17316.7", "ups": "5.44", "wpb": "3185.6", "bsz": "45.5", "num_updates": "342400", "lr": "1.45823e-05", "gnorm": "0.951", "loss_scale": "2", "train_wall": "36", "gb_free": "13.5", "wall": "64655"} [2023-11-02 11:32:46,743][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 [2023-11-02 11:33:05,114][train_inner][INFO] - {"epoch": 85, "update": 84.473, "loss": "2.662", "ntokens": "3178", "nsentences": "46.12", "prob_perplexity": "211.844", "code_perplexity": "209.202", "temp": "0.5", "loss_0": "2.56", "loss_1": "0.096", "loss_2": "0.006", "accuracy": "0.57109", "wps": "17334", "ups": "5.45", "wpb": "3178", "bsz": "46.1", "num_updates": "342600", "lr": "1.45316e-05", "gnorm": "0.936", "loss_scale": "1", "train_wall": "36", "gb_free": "13.7", "wall": "64691"} [2023-11-02 11:33:41,867][train_inner][INFO] - {"epoch": 85, "update": 84.522, "loss": "2.805", "ntokens": "3267.68", "nsentences": "41.76", "prob_perplexity": "212.768", "code_perplexity": "210.119", "temp": "0.5", "loss_0": "2.703", "loss_1": "0.096", "loss_2": "0.006", "accuracy": "0.54184", "wps": "17783", "ups": "5.44", "wpb": "3267.7", "bsz": "41.8", "num_updates": "342800", "lr": "1.4481e-05", "gnorm": "0.948", "loss_scale": "1", "train_wall": "36", "gb_free": "13.8", "wall": "64728"} [2023-11-02 11:34:17,824][train_inner][INFO] - {"epoch": 85, "update": 84.571, "loss": "2.704", "ntokens": "3130.6", "nsentences": "45.12", "prob_perplexity": "210.676", "code_perplexity": "208.075", "temp": "0.5", "loss_0": "2.602", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.56389", "wps": "17414", "ups": "5.56", "wpb": "3130.6", "bsz": "45.1", "num_updates": "343000", "lr": "1.44304e-05", "gnorm": "0.967", "loss_scale": "1", "train_wall": "35", "gb_free": "14.9", "wall": "64764"} [2023-11-02 11:34:53,913][train_inner][INFO] - {"epoch": 85, "update": 84.621, "loss": "2.669", "ntokens": "3119.56", "nsentences": "45.56", "prob_perplexity": "211.414", "code_perplexity": "208.816", "temp": "0.5", "loss_0": "2.567", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.56938", "wps": "17289.1", "ups": "5.54", "wpb": "3119.6", "bsz": "45.6", "num_updates": "343200", "lr": "1.43797e-05", "gnorm": "0.954", "loss_scale": "1", "train_wall": "35", "gb_free": "13.4", "wall": "64800"} [2023-11-02 11:35:29,850][train_inner][INFO] - {"epoch": 85, "update": 84.67, "loss": "2.712", "ntokens": "3191.24", "nsentences": "45", "prob_perplexity": "211.164", "code_perplexity": "208.511", "temp": "0.5", "loss_0": "2.61", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.56107", "wps": "17761.3", "ups": "5.57", "wpb": "3191.2", "bsz": "45", "num_updates": "343400", "lr": "1.43291e-05", "gnorm": "0.952", "loss_scale": "1", "train_wall": "35", "gb_free": "15", "wall": "64836"} [2023-11-02 11:36:06,236][train_inner][INFO] - {"epoch": 85, "update": 84.719, "loss": "2.769", "ntokens": "3208.16", "nsentences": "43.48", "prob_perplexity": "211.68", "code_perplexity": "209.072", "temp": "0.5", "loss_0": "2.667", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55089", "wps": "17635.2", "ups": "5.5", "wpb": "3208.2", "bsz": "43.5", "num_updates": "343600", "lr": "1.42785e-05", "gnorm": "0.948", "loss_scale": "1", "train_wall": "36", "gb_free": "14", "wall": "64872"} [2023-11-02 11:36:42,300][train_inner][INFO] - {"epoch": 85, "update": 84.769, "loss": "2.752", "ntokens": "3190.6", "nsentences": "44.28", "prob_perplexity": "212.139", "code_perplexity": "209.528", "temp": "0.5", "loss_0": "2.65", "loss_1": "0.096", "loss_2": "0.006", "accuracy": "0.55443", "wps": "17695.2", "ups": "5.55", "wpb": "3190.6", "bsz": "44.3", "num_updates": "343800", "lr": "1.42278e-05", "gnorm": "0.956", "loss_scale": "1", "train_wall": "35", "gb_free": "12.9", "wall": "64909"} [2023-11-02 11:37:18,244][train_inner][INFO] - {"epoch": 85, "update": 84.818, "loss": "2.721", "ntokens": "3139.96", "nsentences": "43.76", "prob_perplexity": "210.958", "code_perplexity": "208.278", "temp": "0.5", "loss_0": "2.619", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55956", "wps": "17472.3", "ups": "5.56", "wpb": "3140", "bsz": "43.8", "num_updates": "344000", "lr": "1.41772e-05", "gnorm": "0.964", "loss_scale": "1", "train_wall": "35", "gb_free": "14.9", "wall": "64944"} [2023-11-02 11:37:54,527][train_inner][INFO] - {"epoch": 85, "update": 84.867, "loss": "2.718", "ntokens": "3184.96", "nsentences": "45.32", "prob_perplexity": "211.435", "code_perplexity": "208.834", "temp": "0.5", "loss_0": "2.616", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.56139", "wps": "17557.3", "ups": "5.51", "wpb": "3185", "bsz": "45.3", "num_updates": "344200", "lr": "1.41266e-05", "gnorm": "0.949", "loss_scale": "1", "train_wall": "36", "gb_free": "13.5", "wall": "64981"} [2023-11-02 11:38:30,739][train_inner][INFO] - {"epoch": 85, "update": 84.917, "loss": "2.718", "ntokens": "3218.68", "nsentences": "45.72", "prob_perplexity": "211.523", "code_perplexity": "208.944", "temp": "0.5", "loss_0": "2.616", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.56117", "wps": "17786.1", "ups": "5.53", "wpb": "3218.7", "bsz": "45.7", "num_updates": "344400", "lr": "1.40759e-05", "gnorm": "0.945", "loss_scale": "1", "train_wall": "36", "gb_free": "12.8", "wall": "65017"} [2023-11-02 11:39:07,524][train_inner][INFO] - {"epoch": 85, "update": 84.966, "loss": "2.725", "ntokens": "3213.76", "nsentences": "44.8", "prob_perplexity": "211.02", "code_perplexity": "208.383", "temp": "0.5", "loss_0": "2.623", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55915", "wps": "17474", "ups": "5.44", "wpb": "3213.8", "bsz": "44.8", "num_updates": "344600", "lr": "1.40253e-05", "gnorm": "0.955", "loss_scale": "1", "train_wall": "36", "gb_free": "13.4", "wall": "65054"} [2023-11-02 11:39:32,761][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 11:39:32,763][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 11:39:32,782][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 99 [2023-11-02 11:39:58,274][valid][INFO] - {"epoch": 85, "valid_loss": "2.611", "valid_ntokens": "3158.35", "valid_nsentences": "44.1685", "valid_prob_perplexity": "211.807", "valid_code_perplexity": "209.339", "valid_temp": "0.5", "valid_loss_0": "2.509", "valid_loss_1": "0.096", "valid_loss_2": "0.006", "valid_accuracy": "0.5815", "valid_wps": "56240.7", "valid_wpb": "3158.4", "valid_bsz": "44.2", "valid_num_updates": "344738", "valid_best_loss": "2.611"} [2023-11-02 11:39:58,276][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 85 @ 344738 updates [2023-11-02 11:39:58,278][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-02 11:39:59,695][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-02 11:40:00,711][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_best.pt (epoch 85 @ 344738 updates, score 2.611) (writing took 2.4350225208327174 seconds) [2023-11-02 11:40:00,712][fairseq_cli.train][INFO] - end of epoch 85 (average epoch stats below) [2023-11-02 11:40:00,714][train][INFO] - {"epoch": 85, "train_loss": "2.733", "train_ntokens": "3185.74", "train_nsentences": "44.2713", "train_prob_perplexity": "211.378", "train_code_perplexity": "208.757", "train_temp": "0.5", "train_loss_0": "2.631", "train_loss_1": "0.097", "train_loss_2": "0.006", "train_accuracy": "0.55739", "train_wps": "16956.5", "train_ups": "5.32", "train_wpb": "3185.7", "train_bsz": "44.3", "train_num_updates": "344738", "train_lr": "1.39904e-05", "train_gnorm": "0.954", "train_loss_scale": "1", "train_train_wall": "721", "train_gb_free": "13.7", "train_wall": "65107"} [2023-11-02 11:40:00,718][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 11:40:00,739][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 86 [2023-11-02 11:40:00,924][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 11:40:00,986][fairseq.trainer][INFO] - begin training epoch 86 [2023-11-02 11:40:00,986][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 11:40:12,219][train_inner][INFO] - {"epoch": 86, "update": 85.015, "loss": "2.81", "ntokens": "3234.04", "nsentences": "41.96", "prob_perplexity": "211.25", "code_perplexity": "208.707", "temp": "0.5", "loss_0": "2.708", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.54146", "wps": "9998.2", "ups": "3.09", "wpb": "3234", "bsz": "42", "num_updates": "344800", "lr": "1.39747e-05", "gnorm": "0.959", "loss_scale": "1", "train_wall": "36", "gb_free": "14.8", "wall": "65118"} [2023-11-02 11:40:47,905][train_inner][INFO] - {"epoch": 86, "update": 85.065, "loss": "2.701", "ntokens": "3177.32", "nsentences": "46.44", "prob_perplexity": "212.661", "code_perplexity": "210.073", "temp": "0.5", "loss_0": "2.599", "loss_1": "0.096", "loss_2": "0.006", "accuracy": "0.56462", "wps": "17808.2", "ups": "5.6", "wpb": "3177.3", "bsz": "46.4", "num_updates": "345000", "lr": "1.39241e-05", "gnorm": "0.943", "loss_scale": "1", "train_wall": "35", "gb_free": "14.1", "wall": "65154"} [2023-11-02 11:41:23,966][train_inner][INFO] - {"epoch": 86, "update": 85.114, "loss": "2.704", "ntokens": "3180.6", "nsentences": "45.2", "prob_perplexity": "210.976", "code_perplexity": "208.395", "temp": "0.5", "loss_0": "2.602", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.56188", "wps": "17641", "ups": "5.55", "wpb": "3180.6", "bsz": "45.2", "num_updates": "345200", "lr": "1.38734e-05", "gnorm": "0.962", "loss_scale": "1", "train_wall": "35", "gb_free": "15.4", "wall": "65190"} [2023-11-02 11:41:59,705][train_inner][INFO] - {"epoch": 86, "update": 85.163, "loss": "2.81", "ntokens": "3187.68", "nsentences": "42.44", "prob_perplexity": "212.669", "code_perplexity": "210.003", "temp": "0.5", "loss_0": "2.708", "loss_1": "0.096", "loss_2": "0.006", "accuracy": "0.54277", "wps": "17839.8", "ups": "5.6", "wpb": "3187.7", "bsz": "42.4", "num_updates": "345400", "lr": "1.38228e-05", "gnorm": "0.969", "loss_scale": "1", "train_wall": "35", "gb_free": "14.8", "wall": "65226"} [2023-11-02 11:42:35,610][train_inner][INFO] - {"epoch": 86, "update": 85.213, "loss": "2.734", "ntokens": "3163.76", "nsentences": "44.04", "prob_perplexity": "211.557", "code_perplexity": "208.893", "temp": "0.5", "loss_0": "2.632", "loss_1": "0.097", "loss_2": "0.006", "accuracy": "0.55797", "wps": "17623.8", "ups": "5.57", "wpb": "3163.8", "bsz": "44", "num_updates": "345600", "lr": "1.37722e-05", "gnorm": "0.964", "loss_scale": "1", "train_wall": "35", "gb_free": "13.3", "wall": "65262"} [2023-11-02 11:43:11,649][train_inner][INFO] - {"epoch": 86, "update": 85.262, "loss": "2.694", "ntokens": "3191.84", "nsentences": "45.84", "prob_perplexity": "212.309", "code_perplexity": "209.706", "temp": "0.5", "loss_0": "2.592", "loss_1": "0.096", "loss_2": "0.006", "accuracy": "0.56474", "wps": "17714.3", "ups": "5.55", "wpb": "3191.8", "bsz": "45.8", "num_updates": "345800", "lr": "1.37215e-05", "gnorm": "0.95", "loss_scale": "1", "train_wall": "35", "gb_free": "13.8", "wall": "65298"} [2023-11-02 11:43:47,920][train_inner][INFO] - {"epoch": 86, "update": 85.311, "loss": "2.786", "ntokens": "3197.6", "nsentences": "42.44", "prob_perplexity": "212.549", "code_perplexity": "209.938", "temp": "0.5", "loss_0": "2.684", "loss_1": "0.096", "loss_2": "0.006", "accuracy": "0.54664", "wps": "17632.7", "ups": "5.51", "wpb": "3197.6", "bsz": "42.4", "num_updates": "346000", "lr": "1.36709e-05", "gnorm": "0.958", "loss_scale": "1", "train_wall": "36", "gb_free": "13.8", "wall": "65334"} [2023-11-02 11:44:24,203][train_inner][INFO] - {"epoch": 86, "update": 85.36, "loss": "2.731", "ntokens": "3197", "nsentences": "44.2", "prob_perplexity": "212.412", "code_perplexity": "209.757", "temp": "0.5", "loss_0": "2.629", "loss_1": "0.096", "loss_2": "0.006", "accuracy": "0.55885", "wps": "17623.9", "ups": "5.51", "wpb": "3197", "bsz": "44.2", "num_updates": "346200", "lr": "1.36203e-05", "gnorm": "0.941", "loss_scale": "1", "train_wall": "36", "gb_free": "14.3", "wall": "65370"} [2023-11-02 11:45:00,813][train_inner][INFO] - {"epoch": 86, "update": 85.41, "loss": "2.686", "ntokens": "3169.68", "nsentences": "45.52", "prob_perplexity": "212.125", "code_perplexity": "209.494", "temp": "0.5", "loss_0": "2.584", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56657", "wps": "17316.8", "ups": "5.46", "wpb": "3169.7", "bsz": "45.5", "num_updates": "346400", "lr": "1.35696e-05", "gnorm": "0.952", "loss_scale": "1", "train_wall": "36", "gb_free": "13.9", "wall": "65407"} [2023-11-02 11:45:37,305][train_inner][INFO] - {"epoch": 86, "update": 85.459, "loss": "2.796", "ntokens": "3169.84", "nsentences": "40.96", "prob_perplexity": "211.341", "code_perplexity": "208.726", "temp": "0.5", "loss_0": "2.694", "loss_1": "0.097", "loss_2": "0.005", "accuracy": "0.54336", "wps": "17373.6", "ups": "5.48", "wpb": "3169.8", "bsz": "41", "num_updates": "346600", "lr": "1.3519e-05", "gnorm": "0.963", "loss_scale": "1", "train_wall": "36", "gb_free": "13.7", "wall": "65444"} [2023-11-02 11:46:14,078][train_inner][INFO] - {"epoch": 86, "update": 85.508, "loss": "2.694", "ntokens": "3198.68", "nsentences": "46.52", "prob_perplexity": "212.529", "code_perplexity": "209.937", "temp": "0.5", "loss_0": "2.592", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56556", "wps": "17397.8", "ups": "5.44", "wpb": "3198.7", "bsz": "46.5", "num_updates": "346800", "lr": "1.34684e-05", "gnorm": "0.948", "loss_scale": "1", "train_wall": "36", "gb_free": "12.7", "wall": "65480"} [2023-11-02 11:46:50,316][train_inner][INFO] - {"epoch": 86, "update": 85.558, "loss": "2.722", "ntokens": "3185.16", "nsentences": "43.76", "prob_perplexity": "212.404", "code_perplexity": "209.709", "temp": "0.5", "loss_0": "2.62", "loss_1": "0.096", "loss_2": "0.006", "accuracy": "0.5575", "wps": "17580.6", "ups": "5.52", "wpb": "3185.2", "bsz": "43.8", "num_updates": "347000", "lr": "1.34177e-05", "gnorm": "0.955", "loss_scale": "1", "train_wall": "36", "gb_free": "13.2", "wall": "65517"} [2023-11-02 11:47:26,539][train_inner][INFO] - {"epoch": 86, "update": 85.607, "loss": "2.718", "ntokens": "3248.2", "nsentences": "45.04", "prob_perplexity": "212.324", "code_perplexity": "209.722", "temp": "0.5", "loss_0": "2.616", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.5595", "wps": "17935.2", "ups": "5.52", "wpb": "3248.2", "bsz": "45", "num_updates": "347200", "lr": "1.33671e-05", "gnorm": "0.945", "loss_scale": "1", "train_wall": "36", "gb_free": "14.5", "wall": "65553"} [2023-11-02 11:48:02,870][train_inner][INFO] - {"epoch": 86, "update": 85.656, "loss": "2.704", "ntokens": "3218.76", "nsentences": "46.28", "prob_perplexity": "211.799", "code_perplexity": "209.253", "temp": "0.5", "loss_0": "2.602", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56258", "wps": "17720.3", "ups": "5.51", "wpb": "3218.8", "bsz": "46.3", "num_updates": "347400", "lr": "1.33165e-05", "gnorm": "0.952", "loss_scale": "1", "train_wall": "36", "gb_free": "14.9", "wall": "65589"} [2023-11-02 11:48:39,580][train_inner][INFO] - {"epoch": 86, "update": 85.706, "loss": "2.778", "ntokens": "3208", "nsentences": "43.28", "prob_perplexity": "212.227", "code_perplexity": "209.619", "temp": "0.5", "loss_0": "2.676", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.54848", "wps": "17478.2", "ups": "5.45", "wpb": "3208", "bsz": "43.3", "num_updates": "347600", "lr": "1.32658e-05", "gnorm": "0.964", "loss_scale": "1", "train_wall": "36", "gb_free": "14.5", "wall": "65626"} [2023-11-02 11:49:15,434][train_inner][INFO] - {"epoch": 86, "update": 85.755, "loss": "2.678", "ntokens": "3135.88", "nsentences": "43.72", "prob_perplexity": "211.514", "code_perplexity": "208.937", "temp": "0.5", "loss_0": "2.576", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56586", "wps": "17493.6", "ups": "5.58", "wpb": "3135.9", "bsz": "43.7", "num_updates": "347800", "lr": "1.32152e-05", "gnorm": "0.959", "loss_scale": "1", "train_wall": "35", "gb_free": "16.6", "wall": "65662"} [2023-11-02 11:49:51,839][train_inner][INFO] - {"epoch": 86, "update": 85.804, "loss": "2.721", "ntokens": "3166.96", "nsentences": "44.44", "prob_perplexity": "212.626", "code_perplexity": "209.981", "temp": "0.5", "loss_0": "2.619", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55946", "wps": "17399.7", "ups": "5.49", "wpb": "3167", "bsz": "44.4", "num_updates": "348000", "lr": "1.31646e-05", "gnorm": "0.964", "loss_scale": "1", "train_wall": "36", "gb_free": "13.8", "wall": "65698"} [2023-11-02 11:50:28,046][train_inner][INFO] - {"epoch": 86, "update": 85.854, "loss": "2.71", "ntokens": "3202.32", "nsentences": "44.72", "prob_perplexity": "212.093", "code_perplexity": "209.505", "temp": "0.5", "loss_0": "2.608", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56167", "wps": "17689.9", "ups": "5.52", "wpb": "3202.3", "bsz": "44.7", "num_updates": "348200", "lr": "1.31139e-05", "gnorm": "0.955", "loss_scale": "1", "train_wall": "36", "gb_free": "13.3", "wall": "65734"} [2023-11-02 11:51:04,020][train_inner][INFO] - {"epoch": 86, "update": 85.903, "loss": "2.752", "ntokens": "3168.2", "nsentences": "43.96", "prob_perplexity": "212.299", "code_perplexity": "209.665", "temp": "0.5", "loss_0": "2.65", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55335", "wps": "17614.9", "ups": "5.56", "wpb": "3168.2", "bsz": "44", "num_updates": "348400", "lr": "1.30633e-05", "gnorm": "0.969", "loss_scale": "1", "train_wall": "35", "gb_free": "13.5", "wall": "65770"} [2023-11-02 11:51:40,241][train_inner][INFO] - {"epoch": 86, "update": 85.952, "loss": "2.748", "ntokens": "3178.52", "nsentences": "44.72", "prob_perplexity": "211.333", "code_perplexity": "208.727", "temp": "0.5", "loss_0": "2.646", "loss_1": "0.097", "loss_2": "0.005", "accuracy": "0.55549", "wps": "17552", "ups": "5.52", "wpb": "3178.5", "bsz": "44.7", "num_updates": "348600", "lr": "1.30127e-05", "gnorm": "0.964", "loss_scale": "1", "train_wall": "36", "gb_free": "12.3", "wall": "65806"} [2023-11-02 11:52:15,906][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 11:52:15,908][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 11:52:15,926][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 100 [2023-11-02 11:52:41,344][valid][INFO] - {"epoch": 86, "valid_loss": "2.599", "valid_ntokens": "3152.8", "valid_nsentences": "44.1685", "valid_prob_perplexity": "211.509", "valid_code_perplexity": "208.995", "valid_temp": "0.5", "valid_loss_0": "2.497", "valid_loss_1": "0.097", "valid_loss_2": "0.005", "valid_accuracy": "0.58356", "valid_wps": "56300.2", "valid_wpb": "3152.8", "valid_bsz": "44.2", "valid_num_updates": "348794", "valid_best_loss": "2.599"} [2023-11-02 11:52:41,346][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 86 @ 348794 updates [2023-11-02 11:52:41,348][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-02 11:52:42,751][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-02 11:52:43,757][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_best.pt (epoch 86 @ 348794 updates, score 2.599) (writing took 2.4106071209535003 seconds) [2023-11-02 11:52:43,757][fairseq_cli.train][INFO] - end of epoch 86 (average epoch stats below) [2023-11-02 11:52:43,760][train][INFO] - {"epoch": 86, "train_loss": "2.732", "train_ntokens": "3188.1", "train_nsentences": "44.2682", "train_prob_perplexity": "212.076", "train_code_perplexity": "209.462", "train_temp": "0.5", "train_loss_0": "2.63", "train_loss_1": "0.096", "train_loss_2": "0.005", "train_accuracy": "0.55723", "train_wps": "16946.5", "train_ups": "5.32", "train_wpb": "3188.1", "train_bsz": "44.3", "train_num_updates": "348794", "train_lr": "1.29635e-05", "train_gnorm": "0.957", "train_loss_scale": "1", "train_train_wall": "722", "train_gb_free": "13.7", "train_wall": "65870"} [2023-11-02 11:52:43,762][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 11:52:43,780][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 87 [2023-11-02 11:52:43,976][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 11:52:44,044][fairseq.trainer][INFO] - begin training epoch 87 [2023-11-02 11:52:44,045][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 11:52:45,255][train_inner][INFO] - {"epoch": 87, "update": 86.001, "loss": "2.72", "ntokens": "3186.72", "nsentences": "44.12", "prob_perplexity": "211.915", "code_perplexity": "209.329", "temp": "0.5", "loss_0": "2.618", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56015", "wps": "9803.5", "ups": "3.08", "wpb": "3186.7", "bsz": "44.1", "num_updates": "348800", "lr": "1.2962e-05", "gnorm": "0.959", "loss_scale": "1", "train_wall": "36", "gb_free": "15.6", "wall": "65871"} [2023-11-02 11:53:21,176][train_inner][INFO] - {"epoch": 87, "update": 86.051, "loss": "2.736", "ntokens": "3227.88", "nsentences": "44.72", "prob_perplexity": "212.209", "code_perplexity": "209.603", "temp": "0.5", "loss_0": "2.634", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55569", "wps": "17973", "ups": "5.57", "wpb": "3227.9", "bsz": "44.7", "num_updates": "349000", "lr": "1.29114e-05", "gnorm": "0.95", "loss_scale": "1", "train_wall": "35", "gb_free": "13.9", "wall": "65907"} [2023-11-02 11:53:57,010][train_inner][INFO] - {"epoch": 87, "update": 86.1, "loss": "2.758", "ntokens": "3194", "nsentences": "42.88", "prob_perplexity": "212.147", "code_perplexity": "209.589", "temp": "0.5", "loss_0": "2.656", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55266", "wps": "17827.8", "ups": "5.58", "wpb": "3194", "bsz": "42.9", "num_updates": "349200", "lr": "1.28608e-05", "gnorm": "0.965", "loss_scale": "1", "train_wall": "35", "gb_free": "15.2", "wall": "65943"} [2023-11-02 11:54:33,466][train_inner][INFO] - {"epoch": 87, "update": 86.149, "loss": "2.729", "ntokens": "3199.72", "nsentences": "43.6", "prob_perplexity": "211.794", "code_perplexity": "209.18", "temp": "0.5", "loss_0": "2.628", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55718", "wps": "17555.1", "ups": "5.49", "wpb": "3199.7", "bsz": "43.6", "num_updates": "349400", "lr": "1.28101e-05", "gnorm": "0.97", "loss_scale": "1", "train_wall": "36", "gb_free": "15.5", "wall": "65980"} [2023-11-02 11:55:09,655][train_inner][INFO] - {"epoch": 87, "update": 86.199, "loss": "2.686", "ntokens": "3199", "nsentences": "46.28", "prob_perplexity": "212.565", "code_perplexity": "209.929", "temp": "0.5", "loss_0": "2.584", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56702", "wps": "17680.4", "ups": "5.53", "wpb": "3199", "bsz": "46.3", "num_updates": "349600", "lr": "1.27595e-05", "gnorm": "0.945", "loss_scale": "1", "train_wall": "36", "gb_free": "13.4", "wall": "66016"} [2023-11-02 11:55:45,849][train_inner][INFO] - {"epoch": 87, "update": 86.248, "loss": "2.751", "ntokens": "3183.32", "nsentences": "41.36", "prob_perplexity": "212.556", "code_perplexity": "209.925", "temp": "0.5", "loss_0": "2.65", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55036", "wps": "17591.3", "ups": "5.53", "wpb": "3183.3", "bsz": "41.4", "num_updates": "349800", "lr": "1.27089e-05", "gnorm": "0.968", "loss_scale": "1", "train_wall": "36", "gb_free": "13.5", "wall": "66052"} [2023-11-02 11:56:22,161][train_inner][INFO] - {"epoch": 87, "update": 86.297, "loss": "2.755", "ntokens": "3215.6", "nsentences": "45.92", "prob_perplexity": "213.274", "code_perplexity": "210.668", "temp": "0.5", "loss_0": "2.653", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55442", "wps": "17712.3", "ups": "5.51", "wpb": "3215.6", "bsz": "45.9", "num_updates": "350000", "lr": "1.26582e-05", "gnorm": "0.956", "loss_scale": "1", "train_wall": "36", "gb_free": "15.2", "wall": "66088"} [2023-11-02 11:56:22,162][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 11:56:22,163][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 11:56:22,182][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 101 [2023-11-02 11:56:47,786][valid][INFO] - {"epoch": 87, "valid_loss": "2.61", "valid_ntokens": "3159.08", "valid_nsentences": "44.1685", "valid_prob_perplexity": "211.53", "valid_code_perplexity": "209.029", "valid_temp": "0.5", "valid_loss_0": "2.508", "valid_loss_1": "0.097", "valid_loss_2": "0.005", "valid_accuracy": "0.58178", "valid_wps": "55999.3", "valid_wpb": "3159.1", "valid_bsz": "44.2", "valid_num_updates": "350000", "valid_best_loss": "2.599"} [2023-11-02 11:56:47,788][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 87 @ 350000 updates [2023-11-02 11:56:47,790][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_87_350000.pt [2023-11-02 11:56:49,142][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_87_350000.pt [2023-11-02 11:56:50,148][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_87_350000.pt (epoch 87 @ 350000 updates, score 2.61) (writing took 2.3598993602208793 seconds) [2023-11-02 11:57:25,850][train_inner][INFO] - {"epoch": 87, "update": 86.347, "loss": "2.676", "ntokens": "3158.72", "nsentences": "45.92", "prob_perplexity": "212.735", "code_perplexity": "210.087", "temp": "0.5", "loss_0": "2.574", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56897", "wps": "9919.4", "ups": "3.14", "wpb": "3158.7", "bsz": "45.9", "num_updates": "350200", "lr": "1.26076e-05", "gnorm": "0.946", "loss_scale": "1", "train_wall": "35", "gb_free": "14.8", "wall": "66152"} [2023-11-02 11:58:02,651][train_inner][INFO] - {"epoch": 87, "update": 86.396, "loss": "2.756", "ntokens": "3192.36", "nsentences": "42.96", "prob_perplexity": "212.398", "code_perplexity": "209.841", "temp": "0.5", "loss_0": "2.654", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55256", "wps": "17350.4", "ups": "5.43", "wpb": "3192.4", "bsz": "43", "num_updates": "350400", "lr": "1.2557e-05", "gnorm": "0.965", "loss_scale": "1", "train_wall": "36", "gb_free": "13.4", "wall": "66189"} [2023-11-02 11:58:39,418][train_inner][INFO] - {"epoch": 87, "update": 86.445, "loss": "2.67", "ntokens": "3172.12", "nsentences": "45.92", "prob_perplexity": "212.69", "code_perplexity": "210.02", "temp": "0.5", "loss_0": "2.568", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56961", "wps": "17256.1", "ups": "5.44", "wpb": "3172.1", "bsz": "45.9", "num_updates": "350600", "lr": "1.25063e-05", "gnorm": "0.957", "loss_scale": "1", "train_wall": "36", "gb_free": "13.7", "wall": "66226"} [2023-11-02 11:59:15,930][train_inner][INFO] - {"epoch": 87, "update": 86.495, "loss": "2.76", "ntokens": "3186.56", "nsentences": "42.48", "prob_perplexity": "212.595", "code_perplexity": "209.983", "temp": "0.5", "loss_0": "2.658", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55055", "wps": "17456", "ups": "5.48", "wpb": "3186.6", "bsz": "42.5", "num_updates": "350800", "lr": "1.24557e-05", "gnorm": "0.97", "loss_scale": "1", "train_wall": "36", "gb_free": "14.2", "wall": "66262"} [2023-11-02 11:59:52,040][train_inner][INFO] - {"epoch": 87, "update": 86.544, "loss": "2.719", "ntokens": "3238.64", "nsentences": "45.92", "prob_perplexity": "212.646", "code_perplexity": "210.065", "temp": "0.5", "loss_0": "2.618", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56017", "wps": "17938.9", "ups": "5.54", "wpb": "3238.6", "bsz": "45.9", "num_updates": "351000", "lr": "1.24051e-05", "gnorm": "0.957", "loss_scale": "1", "train_wall": "35", "gb_free": "13", "wall": "66298"} [2023-11-02 12:00:27,927][train_inner][INFO] - {"epoch": 87, "update": 86.593, "loss": "2.708", "ntokens": "3145.4", "nsentences": "44.48", "prob_perplexity": "211.823", "code_perplexity": "209.255", "temp": "0.5", "loss_0": "2.606", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56228", "wps": "17530.1", "ups": "5.57", "wpb": "3145.4", "bsz": "44.5", "num_updates": "351200", "lr": "1.23544e-05", "gnorm": "0.959", "loss_scale": "1", "train_wall": "35", "gb_free": "15.1", "wall": "66334"} [2023-11-02 12:01:04,282][train_inner][INFO] - {"epoch": 87, "update": 86.643, "loss": "2.706", "ntokens": "3203.08", "nsentences": "45.36", "prob_perplexity": "211.722", "code_perplexity": "209.189", "temp": "0.5", "loss_0": "2.604", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56199", "wps": "17622.7", "ups": "5.5", "wpb": "3203.1", "bsz": "45.4", "num_updates": "351400", "lr": "1.23038e-05", "gnorm": "0.965", "loss_scale": "1", "train_wall": "36", "gb_free": "13.6", "wall": "66371"} [2023-11-02 12:01:40,558][train_inner][INFO] - {"epoch": 87, "update": 86.692, "loss": "2.698", "ntokens": "3171.8", "nsentences": "44.92", "prob_perplexity": "213.2", "code_perplexity": "210.631", "temp": "0.5", "loss_0": "2.597", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.5638", "wps": "17488.1", "ups": "5.51", "wpb": "3171.8", "bsz": "44.9", "num_updates": "351600", "lr": "1.22532e-05", "gnorm": "0.957", "loss_scale": "1", "train_wall": "36", "gb_free": "13.6", "wall": "66407"} [2023-11-02 12:02:17,083][train_inner][INFO] - {"epoch": 87, "update": 86.741, "loss": "2.735", "ntokens": "3161.92", "nsentences": "43.28", "prob_perplexity": "210.992", "code_perplexity": "208.389", "temp": "0.5", "loss_0": "2.633", "loss_1": "0.097", "loss_2": "0.005", "accuracy": "0.55574", "wps": "17314.7", "ups": "5.48", "wpb": "3161.9", "bsz": "43.3", "num_updates": "351800", "lr": "1.22025e-05", "gnorm": "0.974", "loss_scale": "1", "train_wall": "36", "gb_free": "14", "wall": "66443"} [2023-11-02 12:02:53,778][train_inner][INFO] - {"epoch": 87, "update": 86.79, "loss": "2.763", "ntokens": "3232.36", "nsentences": "45.76", "prob_perplexity": "211.745", "code_perplexity": "209.151", "temp": "0.5", "loss_0": "2.662", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55377", "wps": "17618.6", "ups": "5.45", "wpb": "3232.4", "bsz": "45.8", "num_updates": "352000", "lr": "1.21519e-05", "gnorm": "0.961", "loss_scale": "1", "train_wall": "36", "gb_free": "13.1", "wall": "66480"} [2023-11-02 12:03:29,988][train_inner][INFO] - {"epoch": 87, "update": 86.84, "loss": "2.691", "ntokens": "3189.2", "nsentences": "45.64", "prob_perplexity": "212.883", "code_perplexity": "210.283", "temp": "0.5", "loss_0": "2.59", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56557", "wps": "17615.7", "ups": "5.52", "wpb": "3189.2", "bsz": "45.6", "num_updates": "352200", "lr": "1.21013e-05", "gnorm": "0.956", "loss_scale": "1", "train_wall": "36", "gb_free": "14.1", "wall": "66516"} [2023-11-02 12:04:05,766][train_inner][INFO] - {"epoch": 87, "update": 86.889, "loss": "2.768", "ntokens": "3229.36", "nsentences": "43.28", "prob_perplexity": "212.748", "code_perplexity": "210.17", "temp": "0.5", "loss_0": "2.666", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55073", "wps": "18053.6", "ups": "5.59", "wpb": "3229.4", "bsz": "43.3", "num_updates": "352400", "lr": "1.20506e-05", "gnorm": "0.957", "loss_scale": "1", "train_wall": "35", "gb_free": "13.6", "wall": "66552"} [2023-11-02 12:04:42,441][train_inner][INFO] - {"epoch": 87, "update": 86.938, "loss": "2.768", "ntokens": "3224.32", "nsentences": "41.92", "prob_perplexity": "213.336", "code_perplexity": "210.781", "temp": "0.5", "loss_0": "2.667", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.54856", "wps": "17584.2", "ups": "5.45", "wpb": "3224.3", "bsz": "41.9", "num_updates": "352600", "lr": "1.2e-05", "gnorm": "0.968", "loss_scale": "1", "train_wall": "36", "gb_free": "13.3", "wall": "66589"} [2023-11-02 12:05:18,649][train_inner][INFO] - {"epoch": 87, "update": 86.988, "loss": "2.722", "ntokens": "3147.32", "nsentences": "43.36", "prob_perplexity": "211.766", "code_perplexity": "209.092", "temp": "0.5", "loss_0": "2.621", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.5582", "wps": "17393.7", "ups": "5.53", "wpb": "3147.3", "bsz": "43.4", "num_updates": "352800", "lr": "1.19494e-05", "gnorm": "0.968", "loss_scale": "1", "train_wall": "36", "gb_free": "13.5", "wall": "66625"} [2023-11-02 12:05:28,184][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 12:05:28,185][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 12:05:28,206][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 102 [2023-11-02 12:05:53,673][valid][INFO] - {"epoch": 87, "valid_loss": "2.595", "valid_ntokens": "3145.47", "valid_nsentences": "44.1685", "valid_prob_perplexity": "211.886", "valid_code_perplexity": "209.383", "valid_temp": "0.5", "valid_loss_0": "2.493", "valid_loss_1": "0.096", "valid_loss_2": "0.005", "valid_accuracy": "0.58432", "valid_wps": "56091.3", "valid_wpb": "3145.5", "valid_bsz": "44.2", "valid_num_updates": "352850", "valid_best_loss": "2.595"} [2023-11-02 12:05:53,675][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 87 @ 352850 updates [2023-11-02 12:05:53,677][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-02 12:05:55,083][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_best.pt [2023-11-02 12:05:56,123][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_best.pt (epoch 87 @ 352850 updates, score 2.595) (writing took 2.4480370269156992 seconds) [2023-11-02 12:05:56,123][fairseq_cli.train][INFO] - end of epoch 87 (average epoch stats below) [2023-11-02 12:05:56,126][train][INFO] - {"epoch": 87, "train_loss": "2.728", "train_ntokens": "3192.45", "train_nsentences": "44.2682", "train_prob_perplexity": "212.387", "train_code_perplexity": "209.788", "train_temp": "0.5", "train_loss_0": "2.626", "train_loss_1": "0.096", "train_loss_2": "0.005", "train_accuracy": "0.55793", "train_wps": "16341.7", "train_ups": "5.12", "train_wpb": "3192.5", "train_bsz": "44.3", "train_num_updates": "352850", "train_lr": "1.19367e-05", "train_gnorm": "0.961", "train_loss_scale": "1", "train_train_wall": "723", "train_gb_free": "13.7", "train_wall": "66662"} [2023-11-02 12:05:56,128][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 12:05:56,149][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 88 [2023-11-02 12:05:56,326][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 12:05:56,392][fairseq.trainer][INFO] - begin training epoch 88 [2023-11-02 12:05:56,393][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 12:06:23,480][train_inner][INFO] - {"epoch": 88, "update": 87.037, "loss": "2.744", "ntokens": "3191.44", "nsentences": "43.76", "prob_perplexity": "212.952", "code_perplexity": "210.371", "temp": "0.5", "loss_0": "2.642", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55418", "wps": "9845.8", "ups": "3.09", "wpb": "3191.4", "bsz": "43.8", "num_updates": "353000", "lr": "1.18987e-05", "gnorm": "0.957", "loss_scale": "1", "train_wall": "36", "gb_free": "13.9", "wall": "66690"} [2023-11-02 12:06:59,635][train_inner][INFO] - {"epoch": 88, "update": 87.086, "loss": "2.745", "ntokens": "3197.2", "nsentences": "45.2", "prob_perplexity": "212.387", "code_perplexity": "209.767", "temp": "0.5", "loss_0": "2.643", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55542", "wps": "17686.8", "ups": "5.53", "wpb": "3197.2", "bsz": "45.2", "num_updates": "353200", "lr": "1.18481e-05", "gnorm": "0.985", "loss_scale": "1", "train_wall": "36", "gb_free": "13.6", "wall": "66726"} [2023-11-02 12:07:35,537][train_inner][INFO] - {"epoch": 88, "update": 87.136, "loss": "2.762", "ntokens": "3203.88", "nsentences": "44.56", "prob_perplexity": "213.615", "code_perplexity": "210.96", "temp": "0.5", "loss_0": "2.661", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55338", "wps": "17849", "ups": "5.57", "wpb": "3203.9", "bsz": "44.6", "num_updates": "353400", "lr": "1.17975e-05", "gnorm": "0.963", "loss_scale": "1", "train_wall": "35", "gb_free": "13.2", "wall": "66762"} [2023-11-02 12:08:12,084][train_inner][INFO] - {"epoch": 88, "update": 87.185, "loss": "2.754", "ntokens": "3188.84", "nsentences": "42", "prob_perplexity": "212.683", "code_perplexity": "210.063", "temp": "0.5", "loss_0": "2.652", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.5511", "wps": "17451.8", "ups": "5.47", "wpb": "3188.8", "bsz": "42", "num_updates": "353600", "lr": "1.17468e-05", "gnorm": "0.978", "loss_scale": "1", "train_wall": "36", "gb_free": "14.3", "wall": "66798"} [2023-11-02 12:08:48,017][train_inner][INFO] - {"epoch": 88, "update": 87.234, "loss": "2.718", "ntokens": "3184.08", "nsentences": "43.6", "prob_perplexity": "212.802", "code_perplexity": "210.2", "temp": "0.5", "loss_0": "2.617", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55898", "wps": "17723.2", "ups": "5.57", "wpb": "3184.1", "bsz": "43.6", "num_updates": "353800", "lr": "1.16962e-05", "gnorm": "0.974", "loss_scale": "1", "train_wall": "35", "gb_free": "15.4", "wall": "66834"} [2023-11-02 12:09:24,881][train_inner][INFO] - {"epoch": 88, "update": 87.284, "loss": "2.727", "ntokens": "3193.64", "nsentences": "44.36", "prob_perplexity": "214.178", "code_perplexity": "211.567", "temp": "0.5", "loss_0": "2.626", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55784", "wps": "17328", "ups": "5.43", "wpb": "3193.6", "bsz": "44.4", "num_updates": "354000", "lr": "1.16456e-05", "gnorm": "0.955", "loss_scale": "1", "train_wall": "36", "gb_free": "14", "wall": "66871"} [2023-11-02 12:10:01,373][train_inner][INFO] - {"epoch": 88, "update": 87.333, "loss": "2.717", "ntokens": "3192.08", "nsentences": "44.36", "prob_perplexity": "213.593", "code_perplexity": "210.953", "temp": "0.5", "loss_0": "2.616", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56001", "wps": "17495.7", "ups": "5.48", "wpb": "3192.1", "bsz": "44.4", "num_updates": "354200", "lr": "1.15949e-05", "gnorm": "0.981", "loss_scale": "1", "train_wall": "36", "gb_free": "13.5", "wall": "66908"} [2023-11-02 12:10:37,906][train_inner][INFO] - {"epoch": 88, "update": 87.382, "loss": "2.705", "ntokens": "3166.48", "nsentences": "45.8", "prob_perplexity": "212.472", "code_perplexity": "209.83", "temp": "0.5", "loss_0": "2.604", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56327", "wps": "17335.9", "ups": "5.47", "wpb": "3166.5", "bsz": "45.8", "num_updates": "354400", "lr": "1.15443e-05", "gnorm": "0.973", "loss_scale": "1", "train_wall": "36", "gb_free": "12.9", "wall": "66944"} [2023-11-02 12:11:14,569][train_inner][INFO] - {"epoch": 88, "update": 87.431, "loss": "2.744", "ntokens": "3172.64", "nsentences": "43", "prob_perplexity": "212.043", "code_perplexity": "209.417", "temp": "0.5", "loss_0": "2.642", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.5542", "wps": "17308", "ups": "5.46", "wpb": "3172.6", "bsz": "43", "num_updates": "354600", "lr": "1.14937e-05", "gnorm": "0.969", "loss_scale": "1", "train_wall": "36", "gb_free": "14.6", "wall": "66981"} [2023-11-02 12:11:51,311][train_inner][INFO] - {"epoch": 88, "update": 87.481, "loss": "2.781", "ntokens": "3227.08", "nsentences": "43.28", "prob_perplexity": "212.459", "code_perplexity": "209.833", "temp": "0.5", "loss_0": "2.679", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.54835", "wps": "17567.1", "ups": "5.44", "wpb": "3227.1", "bsz": "43.3", "num_updates": "354800", "lr": "1.1443e-05", "gnorm": "0.972", "loss_scale": "1", "train_wall": "36", "gb_free": "14.3", "wall": "67018"} [2023-11-02 12:12:27,996][train_inner][INFO] - {"epoch": 88, "update": 87.53, "loss": "2.676", "ntokens": "3188.6", "nsentences": "45.36", "prob_perplexity": "213.553", "code_perplexity": "210.961", "temp": "0.5", "loss_0": "2.575", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56696", "wps": "17384.8", "ups": "5.45", "wpb": "3188.6", "bsz": "45.4", "num_updates": "355000", "lr": "1.13924e-05", "gnorm": "0.964", "loss_scale": "1", "train_wall": "36", "gb_free": "13.8", "wall": "67054"} [2023-11-02 12:13:04,316][train_inner][INFO] - {"epoch": 88, "update": 87.579, "loss": "2.695", "ntokens": "3226.64", "nsentences": "46.32", "prob_perplexity": "213.092", "code_perplexity": "210.424", "temp": "0.5", "loss_0": "2.594", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.5646", "wps": "17768.7", "ups": "5.51", "wpb": "3226.6", "bsz": "46.3", "num_updates": "355200", "lr": "1.13418e-05", "gnorm": "0.957", "loss_scale": "1", "train_wall": "36", "gb_free": "13.8", "wall": "67091"} [2023-11-02 12:13:40,522][train_inner][INFO] - {"epoch": 88, "update": 87.629, "loss": "2.705", "ntokens": "3169.48", "nsentences": "46", "prob_perplexity": "213.293", "code_perplexity": "210.607", "temp": "0.5", "loss_0": "2.604", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56338", "wps": "17509.2", "ups": "5.52", "wpb": "3169.5", "bsz": "46", "num_updates": "355400", "lr": "1.12911e-05", "gnorm": "0.992", "loss_scale": "1", "train_wall": "36", "gb_free": "14.6", "wall": "67127"} [2023-11-02 12:14:16,774][train_inner][INFO] - {"epoch": 88, "update": 87.678, "loss": "2.724", "ntokens": "3189.08", "nsentences": "43.12", "prob_perplexity": "213.236", "code_perplexity": "210.542", "temp": "0.5", "loss_0": "2.622", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55747", "wps": "17594.8", "ups": "5.52", "wpb": "3189.1", "bsz": "43.1", "num_updates": "355600", "lr": "1.12405e-05", "gnorm": "0.962", "loss_scale": "1", "train_wall": "36", "gb_free": "13.6", "wall": "67163"} [2023-11-02 12:14:52,993][train_inner][INFO] - {"epoch": 88, "update": 87.727, "loss": "2.684", "ntokens": "3121.56", "nsentences": "45.64", "prob_perplexity": "212.852", "code_perplexity": "210.189", "temp": "0.5", "loss_0": "2.582", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56766", "wps": "17238.3", "ups": "5.52", "wpb": "3121.6", "bsz": "45.6", "num_updates": "355800", "lr": "1.11899e-05", "gnorm": "0.976", "loss_scale": "1", "train_wall": "36", "gb_free": "13.3", "wall": "67199"} [2023-11-02 12:15:29,683][train_inner][INFO] - {"epoch": 88, "update": 87.777, "loss": "2.761", "ntokens": "3213.6", "nsentences": "42.96", "prob_perplexity": "213.004", "code_perplexity": "210.42", "temp": "0.5", "loss_0": "2.659", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55083", "wps": "17518.4", "ups": "5.45", "wpb": "3213.6", "bsz": "43", "num_updates": "356000", "lr": "1.11392e-05", "gnorm": "0.964", "loss_scale": "1", "train_wall": "36", "gb_free": "15.4", "wall": "67236"} [2023-11-02 12:16:06,512][train_inner][INFO] - {"epoch": 88, "update": 87.826, "loss": "2.7", "ntokens": "3210.92", "nsentences": "45.4", "prob_perplexity": "213.538", "code_perplexity": "210.931", "temp": "0.5", "loss_0": "2.599", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56364", "wps": "17438.2", "ups": "5.43", "wpb": "3210.9", "bsz": "45.4", "num_updates": "356200", "lr": "1.10886e-05", "gnorm": "0.956", "loss_scale": "1", "train_wall": "36", "gb_free": "13.4", "wall": "67273"} [2023-11-02 12:16:43,500][train_inner][INFO] - {"epoch": 88, "update": 87.875, "loss": "2.724", "ntokens": "3213.32", "nsentences": "45.72", "prob_perplexity": "213.928", "code_perplexity": "211.282", "temp": "0.5", "loss_0": "2.623", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55957", "wps": "17375.6", "ups": "5.41", "wpb": "3213.3", "bsz": "45.7", "num_updates": "356400", "lr": "1.1038e-05", "gnorm": "0.963", "loss_scale": "1", "train_wall": "36", "gb_free": "14.7", "wall": "67310"} [2023-11-02 12:17:20,336][train_inner][INFO] - {"epoch": 88, "update": 87.925, "loss": "2.668", "ntokens": "3182.96", "nsentences": "45.6", "prob_perplexity": "213.239", "code_perplexity": "210.579", "temp": "0.5", "loss_0": "2.566", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56917", "wps": "17282.8", "ups": "5.43", "wpb": "3183", "bsz": "45.6", "num_updates": "356600", "lr": "1.09873e-05", "gnorm": "0.964", "loss_scale": "1", "train_wall": "36", "gb_free": "13.5", "wall": "67347"} [2023-11-02 12:17:57,384][train_inner][INFO] - {"epoch": 88, "update": 87.974, "loss": "2.747", "ntokens": "3210.2", "nsentences": "42.8", "prob_perplexity": "213.682", "code_perplexity": "211.008", "temp": "0.5", "loss_0": "2.646", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55352", "wps": "17331.2", "ups": "5.4", "wpb": "3210.2", "bsz": "42.8", "num_updates": "356800", "lr": "1.09367e-05", "gnorm": "0.955", "loss_scale": "1", "train_wall": "36", "gb_free": "13.3", "wall": "67384"} [2023-11-02 12:18:16,807][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 12:18:16,809][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 12:18:16,826][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 103 [2023-11-02 12:18:42,511][valid][INFO] - {"epoch": 88, "valid_loss": "2.613", "valid_ntokens": "3165.57", "valid_nsentences": "44.1685", "valid_prob_perplexity": "213.011", "valid_code_perplexity": "210.552", "valid_temp": "0.5", "valid_loss_0": "2.511", "valid_loss_1": "0.096", "valid_loss_2": "0.005", "valid_accuracy": "0.58094", "valid_wps": "55995.5", "valid_wpb": "3165.6", "valid_bsz": "44.2", "valid_num_updates": "356906", "valid_best_loss": "2.595"} [2023-11-02 12:18:42,513][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 88 @ 356906 updates [2023-11-02 12:18:42,515][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 12:18:43,926][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 12:18:43,979][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_last.pt (epoch 88 @ 356906 updates, score 2.613) (writing took 1.4662359161302447 seconds) [2023-11-02 12:18:43,980][fairseq_cli.train][INFO] - end of epoch 88 (average epoch stats below) [2023-11-02 12:18:43,998][train][INFO] - {"epoch": 88, "train_loss": "2.728", "train_ntokens": "3192.39", "train_nsentences": "44.2682", "train_prob_perplexity": "213.086", "train_code_perplexity": "210.453", "train_temp": "0.5", "train_loss_0": "2.626", "train_loss_1": "0.096", "train_loss_2": "0.005", "train_accuracy": "0.5579", "train_wps": "16863", "train_ups": "5.28", "train_wpb": "3192.4", "train_bsz": "44.3", "train_num_updates": "356906", "train_lr": "1.09099e-05", "train_gnorm": "0.969", "train_loss_scale": "1", "train_train_wall": "727", "train_gb_free": "13.9", "train_wall": "67430"} [2023-11-02 12:18:44,001][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 12:18:44,027][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 89 [2023-11-02 12:18:44,198][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 12:18:44,262][fairseq.trainer][INFO] - begin training epoch 89 [2023-11-02 12:18:44,262][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 12:19:01,276][train_inner][INFO] - {"epoch": 89, "update": 88.023, "loss": "2.808", "ntokens": "3186.68", "nsentences": "40.76", "prob_perplexity": "212.03", "code_perplexity": "209.417", "temp": "0.5", "loss_0": "2.706", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.5418", "wps": "9975.6", "ups": "3.13", "wpb": "3186.7", "bsz": "40.8", "num_updates": "357000", "lr": "1.08861e-05", "gnorm": "0.981", "loss_scale": "1", "train_wall": "36", "gb_free": "13.3", "wall": "67448"} [2023-11-02 12:19:37,495][train_inner][INFO] - {"epoch": 89, "update": 88.072, "loss": "2.737", "ntokens": "3205.08", "nsentences": "45.2", "prob_perplexity": "213.06", "code_perplexity": "210.399", "temp": "0.5", "loss_0": "2.636", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55751", "wps": "17699.6", "ups": "5.52", "wpb": "3205.1", "bsz": "45.2", "num_updates": "357200", "lr": "1.08354e-05", "gnorm": "0.958", "loss_scale": "1", "train_wall": "36", "gb_free": "13.4", "wall": "67484"} [2023-11-02 12:20:13,847][train_inner][INFO] - {"epoch": 89, "update": 88.122, "loss": "2.739", "ntokens": "3220.08", "nsentences": "43.6", "prob_perplexity": "213.289", "code_perplexity": "210.683", "temp": "0.5", "loss_0": "2.637", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55488", "wps": "17717.2", "ups": "5.5", "wpb": "3220.1", "bsz": "43.6", "num_updates": "357400", "lr": "1.07848e-05", "gnorm": "0.963", "loss_scale": "1", "train_wall": "36", "gb_free": "13.4", "wall": "67520"} [2023-11-02 12:20:49,811][train_inner][INFO] - {"epoch": 89, "update": 88.171, "loss": "2.737", "ntokens": "3192.84", "nsentences": "43.24", "prob_perplexity": "213.766", "code_perplexity": "211.127", "temp": "0.5", "loss_0": "2.636", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55482", "wps": "17756.5", "ups": "5.56", "wpb": "3192.8", "bsz": "43.2", "num_updates": "357600", "lr": "1.07342e-05", "gnorm": "0.976", "loss_scale": "1", "train_wall": "35", "gb_free": "13", "wall": "67556"} [2023-11-02 12:21:25,740][train_inner][INFO] - {"epoch": 89, "update": 88.22, "loss": "2.776", "ntokens": "3210", "nsentences": "41.36", "prob_perplexity": "212.801", "code_perplexity": "210.188", "temp": "0.5", "loss_0": "2.675", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.54639", "wps": "17869.5", "ups": "5.57", "wpb": "3210", "bsz": "41.4", "num_updates": "357800", "lr": "1.06835e-05", "gnorm": "0.978", "loss_scale": "1", "train_wall": "35", "gb_free": "14.1", "wall": "67592"} [2023-11-02 12:22:01,758][train_inner][INFO] - {"epoch": 89, "update": 88.27, "loss": "2.716", "ntokens": "3191.16", "nsentences": "44.96", "prob_perplexity": "212.854", "code_perplexity": "210.217", "temp": "0.5", "loss_0": "2.615", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56013", "wps": "17721.1", "ups": "5.55", "wpb": "3191.2", "bsz": "45", "num_updates": "358000", "lr": "1.06329e-05", "gnorm": "0.966", "loss_scale": "1", "train_wall": "35", "gb_free": "12.8", "wall": "67628"} [2023-11-02 12:22:38,138][train_inner][INFO] - {"epoch": 89, "update": 88.319, "loss": "2.676", "ntokens": "3158.88", "nsentences": "44.36", "prob_perplexity": "213.336", "code_perplexity": "210.694", "temp": "0.5", "loss_0": "2.574", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56729", "wps": "17367", "ups": "5.5", "wpb": "3158.9", "bsz": "44.4", "num_updates": "358200", "lr": "1.05823e-05", "gnorm": "0.958", "loss_scale": "1", "train_wall": "36", "gb_free": "12.8", "wall": "67664"} [2023-11-02 12:23:14,227][train_inner][INFO] - {"epoch": 89, "update": 88.368, "loss": "2.75", "ntokens": "3218.68", "nsentences": "44.48", "prob_perplexity": "213.635", "code_perplexity": "211.055", "temp": "0.5", "loss_0": "2.649", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55423", "wps": "17838.6", "ups": "5.54", "wpb": "3218.7", "bsz": "44.5", "num_updates": "358400", "lr": "1.05316e-05", "gnorm": "0.957", "loss_scale": "1", "train_wall": "35", "gb_free": "13.8", "wall": "67700"} [2023-11-02 12:23:50,488][train_inner][INFO] - {"epoch": 89, "update": 88.418, "loss": "2.733", "ntokens": "3173.08", "nsentences": "42.28", "prob_perplexity": "212.716", "code_perplexity": "210.126", "temp": "0.5", "loss_0": "2.632", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55389", "wps": "17502.7", "ups": "5.52", "wpb": "3173.1", "bsz": "42.3", "num_updates": "358600", "lr": "1.0481e-05", "gnorm": "0.981", "loss_scale": "1", "train_wall": "36", "gb_free": "13.1", "wall": "67737"} [2023-11-02 12:24:27,456][train_inner][INFO] - {"epoch": 89, "update": 88.467, "loss": "2.76", "ntokens": "3201.84", "nsentences": "43.68", "prob_perplexity": "212.519", "code_perplexity": "209.826", "temp": "0.5", "loss_0": "2.658", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55294", "wps": "17322.8", "ups": "5.41", "wpb": "3201.8", "bsz": "43.7", "num_updates": "358800", "lr": "1.04304e-05", "gnorm": "0.973", "loss_scale": "1", "train_wall": "36", "gb_free": "12.9", "wall": "67774"} [2023-11-02 12:25:03,561][train_inner][INFO] - {"epoch": 89, "update": 88.516, "loss": "2.661", "ntokens": "3186.28", "nsentences": "45.48", "prob_perplexity": "213.658", "code_perplexity": "211.053", "temp": "0.5", "loss_0": "2.56", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56998", "wps": "17651.1", "ups": "5.54", "wpb": "3186.3", "bsz": "45.5", "num_updates": "359000", "lr": "1.03797e-05", "gnorm": "0.956", "loss_scale": "2", "train_wall": "35", "gb_free": "13.4", "wall": "67810"} [2023-11-02 12:25:40,732][train_inner][INFO] - {"epoch": 89, "update": 88.566, "loss": "2.709", "ntokens": "3180.68", "nsentences": "44.12", "prob_perplexity": "213.444", "code_perplexity": "210.879", "temp": "0.5", "loss_0": "2.608", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56158", "wps": "17114.9", "ups": "5.38", "wpb": "3180.7", "bsz": "44.1", "num_updates": "359200", "lr": "1.03291e-05", "gnorm": "0.962", "loss_scale": "2", "train_wall": "37", "gb_free": "13.6", "wall": "67847"} [2023-11-02 12:26:17,142][train_inner][INFO] - {"epoch": 89, "update": 88.615, "loss": "2.696", "ntokens": "3198.36", "nsentences": "46.24", "prob_perplexity": "214.391", "code_perplexity": "211.717", "temp": "0.5", "loss_0": "2.595", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56463", "wps": "17569.4", "ups": "5.49", "wpb": "3198.4", "bsz": "46.2", "num_updates": "359400", "lr": "1.02785e-05", "gnorm": "0.966", "loss_scale": "2", "train_wall": "36", "gb_free": "15.4", "wall": "67883"} [2023-11-02 12:26:53,545][train_inner][INFO] - {"epoch": 89, "update": 88.664, "loss": "2.733", "ntokens": "3199.68", "nsentences": "44.64", "prob_perplexity": "213.426", "code_perplexity": "210.786", "temp": "0.5", "loss_0": "2.631", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55799", "wps": "17580.6", "ups": "5.49", "wpb": "3199.7", "bsz": "44.6", "num_updates": "359600", "lr": "1.02278e-05", "gnorm": "0.964", "loss_scale": "2", "train_wall": "36", "gb_free": "12.6", "wall": "67920"} [2023-11-02 12:27:30,650][train_inner][INFO] - {"epoch": 89, "update": 88.714, "loss": "2.715", "ntokens": "3195.28", "nsentences": "43.64", "prob_perplexity": "213.966", "code_perplexity": "211.377", "temp": "0.5", "loss_0": "2.613", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55911", "wps": "17223.7", "ups": "5.39", "wpb": "3195.3", "bsz": "43.6", "num_updates": "359800", "lr": "1.01772e-05", "gnorm": "0.964", "loss_scale": "2", "train_wall": "36", "gb_free": "14.3", "wall": "67957"} [2023-11-02 12:28:06,972][train_inner][INFO] - {"epoch": 89, "update": 88.763, "loss": "2.725", "ntokens": "3189.48", "nsentences": "43.92", "prob_perplexity": "214.308", "code_perplexity": "211.701", "temp": "0.5", "loss_0": "2.624", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55842", "wps": "17563.5", "ups": "5.51", "wpb": "3189.5", "bsz": "43.9", "num_updates": "360000", "lr": "1.01266e-05", "gnorm": "0.965", "loss_scale": "2", "train_wall": "36", "gb_free": "13.6", "wall": "67993"} [2023-11-02 12:28:43,582][train_inner][INFO] - {"epoch": 89, "update": 88.812, "loss": "2.703", "ntokens": "3166.64", "nsentences": "45", "prob_perplexity": "214.218", "code_perplexity": "211.558", "temp": "0.5", "loss_0": "2.602", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56301", "wps": "17299.9", "ups": "5.46", "wpb": "3166.6", "bsz": "45", "num_updates": "360200", "lr": "1.00759e-05", "gnorm": "0.963", "loss_scale": "2", "train_wall": "36", "gb_free": "12.9", "wall": "68030"} [2023-11-02 12:29:19,737][train_inner][INFO] - {"epoch": 89, "update": 88.861, "loss": "2.695", "ntokens": "3158.12", "nsentences": "44.32", "prob_perplexity": "212.464", "code_perplexity": "209.876", "temp": "0.5", "loss_0": "2.594", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56306", "wps": "17471.1", "ups": "5.53", "wpb": "3158.1", "bsz": "44.3", "num_updates": "360400", "lr": "1.00253e-05", "gnorm": "0.981", "loss_scale": "2", "train_wall": "36", "gb_free": "13.3", "wall": "68066"} [2023-11-02 12:29:55,778][train_inner][INFO] - {"epoch": 89, "update": 88.911, "loss": "2.717", "ntokens": "3205.04", "nsentences": "46.48", "prob_perplexity": "213.506", "code_perplexity": "210.93", "temp": "0.5", "loss_0": "2.616", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56224", "wps": "17786.7", "ups": "5.55", "wpb": "3205", "bsz": "46.5", "num_updates": "360600", "lr": "9.97468e-06", "gnorm": "0.962", "loss_scale": "2", "train_wall": "35", "gb_free": "15.6", "wall": "68102"} [2023-11-02 12:30:32,276][train_inner][INFO] - {"epoch": 89, "update": 88.96, "loss": "2.739", "ntokens": "3195.56", "nsentences": "44.64", "prob_perplexity": "213.066", "code_perplexity": "210.478", "temp": "0.5", "loss_0": "2.638", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.5558", "wps": "17511.8", "ups": "5.48", "wpb": "3195.6", "bsz": "44.6", "num_updates": "360800", "lr": "9.92405e-06", "gnorm": "0.985", "loss_scale": "2", "train_wall": "36", "gb_free": "14.3", "wall": "68139"} [2023-11-02 12:31:01,583][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 12:31:01,584][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 12:31:01,602][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 104 [2023-11-02 12:31:27,610][valid][INFO] - {"epoch": 89, "valid_loss": "2.598", "valid_ntokens": "3164.82", "valid_nsentences": "44.1685", "valid_prob_perplexity": "213.449", "valid_code_perplexity": "210.946", "valid_temp": "0.5", "valid_loss_0": "2.496", "valid_loss_1": "0.096", "valid_loss_2": "0.005", "valid_accuracy": "0.58349", "valid_wps": "55244.9", "valid_wpb": "3164.8", "valid_bsz": "44.2", "valid_num_updates": "360962", "valid_best_loss": "2.595"} [2023-11-02 12:31:27,612][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 89 @ 360962 updates [2023-11-02 12:31:27,614][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 12:31:29,053][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 12:31:29,124][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_last.pt (epoch 89 @ 360962 updates, score 2.598) (writing took 1.511930295266211 seconds) [2023-11-02 12:31:29,125][fairseq_cli.train][INFO] - end of epoch 89 (average epoch stats below) [2023-11-02 12:31:29,127][train][INFO] - {"epoch": 89, "train_loss": "2.722", "train_ntokens": "3190.94", "train_nsentences": "44.2682", "train_prob_perplexity": "213.354", "train_code_perplexity": "210.734", "train_temp": "0.5", "train_loss_0": "2.621", "train_loss_1": "0.096", "train_loss_2": "0.005", "train_accuracy": "0.55876", "train_wps": "16915.4", "train_ups": "5.3", "train_wpb": "3190.9", "train_bsz": "44.3", "train_num_updates": "360962", "train_lr": "9.88304e-06", "train_gnorm": "0.968", "train_loss_scale": "2", "train_train_wall": "724", "train_gb_free": "13.5", "train_wall": "68195"} [2023-11-02 12:31:29,130][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 12:31:29,148][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 90 [2023-11-02 12:31:29,318][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 12:31:29,386][fairseq.trainer][INFO] - begin training epoch 90 [2023-11-02 12:31:29,387][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 12:31:36,198][train_inner][INFO] - {"epoch": 90, "update": 89.009, "loss": "2.732", "ntokens": "3149.24", "nsentences": "42.52", "prob_perplexity": "212.671", "code_perplexity": "210.027", "temp": "0.5", "loss_0": "2.631", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55546", "wps": "9856.3", "ups": "3.13", "wpb": "3149.2", "bsz": "42.5", "num_updates": "361000", "lr": "9.87342e-06", "gnorm": "0.999", "loss_scale": "2", "train_wall": "35", "gb_free": "13.8", "wall": "68202"} [2023-11-02 12:32:11,990][train_inner][INFO] - {"epoch": 90, "update": 89.059, "loss": "2.72", "ntokens": "3203.88", "nsentences": "44.4", "prob_perplexity": "213.436", "code_perplexity": "210.9", "temp": "0.5", "loss_0": "2.619", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.5587", "wps": "17903.8", "ups": "5.59", "wpb": "3203.9", "bsz": "44.4", "num_updates": "361200", "lr": "9.82278e-06", "gnorm": "1.011", "loss_scale": "2", "train_wall": "35", "gb_free": "13", "wall": "68238"} [2023-11-02 12:32:47,483][train_inner][INFO] - {"epoch": 90, "update": 89.108, "loss": "2.677", "ntokens": "3186.52", "nsentences": "45.4", "prob_perplexity": "213.716", "code_perplexity": "211.115", "temp": "0.5", "loss_0": "2.576", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56716", "wps": "17956.6", "ups": "5.64", "wpb": "3186.5", "bsz": "45.4", "num_updates": "361400", "lr": "9.77215e-06", "gnorm": "0.964", "loss_scale": "2", "train_wall": "35", "gb_free": "13.4", "wall": "68274"} [2023-11-02 12:33:23,384][train_inner][INFO] - {"epoch": 90, "update": 89.157, "loss": "2.75", "ntokens": "3208.76", "nsentences": "42.12", "prob_perplexity": "212.575", "code_perplexity": "209.92", "temp": "0.5", "loss_0": "2.649", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55177", "wps": "17876.6", "ups": "5.57", "wpb": "3208.8", "bsz": "42.1", "num_updates": "361600", "lr": "9.72152e-06", "gnorm": "0.981", "loss_scale": "2", "train_wall": "35", "gb_free": "14.2", "wall": "68310"} [2023-11-02 12:33:59,205][train_inner][INFO] - {"epoch": 90, "update": 89.207, "loss": "2.689", "ntokens": "3199.88", "nsentences": "47.12", "prob_perplexity": "214.271", "code_perplexity": "211.635", "temp": "0.5", "loss_0": "2.588", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56697", "wps": "17867", "ups": "5.58", "wpb": "3199.9", "bsz": "47.1", "num_updates": "361800", "lr": "9.67089e-06", "gnorm": "0.966", "loss_scale": "2", "train_wall": "35", "gb_free": "14.4", "wall": "68345"} [2023-11-02 12:34:35,204][train_inner][INFO] - {"epoch": 90, "update": 89.256, "loss": "2.673", "ntokens": "3157.16", "nsentences": "45.84", "prob_perplexity": "213.687", "code_perplexity": "211.06", "temp": "0.5", "loss_0": "2.572", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56815", "wps": "17541.5", "ups": "5.56", "wpb": "3157.2", "bsz": "45.8", "num_updates": "362000", "lr": "9.62025e-06", "gnorm": "0.984", "loss_scale": "2", "train_wall": "35", "gb_free": "13.6", "wall": "68381"} [2023-11-02 12:35:11,120][train_inner][INFO] - {"epoch": 90, "update": 89.305, "loss": "2.733", "ntokens": "3197.2", "nsentences": "42.68", "prob_perplexity": "213.469", "code_perplexity": "210.835", "temp": "0.5", "loss_0": "2.632", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55512", "wps": "17805.1", "ups": "5.57", "wpb": "3197.2", "bsz": "42.7", "num_updates": "362200", "lr": "9.56962e-06", "gnorm": "0.984", "loss_scale": "2", "train_wall": "35", "gb_free": "14.3", "wall": "68417"} [2023-11-02 12:35:47,373][train_inner][INFO] - {"epoch": 90, "update": 89.355, "loss": "2.752", "ntokens": "3223.28", "nsentences": "44.68", "prob_perplexity": "212.912", "code_perplexity": "210.31", "temp": "0.5", "loss_0": "2.65", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55432", "wps": "17783.1", "ups": "5.52", "wpb": "3223.3", "bsz": "44.7", "num_updates": "362400", "lr": "9.51899e-06", "gnorm": "0.971", "loss_scale": "2", "train_wall": "36", "gb_free": "12.8", "wall": "68454"} [2023-11-02 12:36:23,450][train_inner][INFO] - {"epoch": 90, "update": 89.404, "loss": "2.671", "ntokens": "3179.44", "nsentences": "47.52", "prob_perplexity": "214.013", "code_perplexity": "211.336", "temp": "0.5", "loss_0": "2.57", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.57118", "wps": "17626.7", "ups": "5.54", "wpb": "3179.4", "bsz": "47.5", "num_updates": "362600", "lr": "9.46835e-06", "gnorm": "0.961", "loss_scale": "2", "train_wall": "35", "gb_free": "14.3", "wall": "68490"} [2023-11-02 12:36:59,920][train_inner][INFO] - {"epoch": 90, "update": 89.453, "loss": "2.735", "ntokens": "3238.76", "nsentences": "44.12", "prob_perplexity": "213.867", "code_perplexity": "211.306", "temp": "0.5", "loss_0": "2.634", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55565", "wps": "17762.4", "ups": "5.48", "wpb": "3238.8", "bsz": "44.1", "num_updates": "362800", "lr": "9.41772e-06", "gnorm": "0.97", "loss_scale": "2", "train_wall": "36", "gb_free": "14.2", "wall": "68526"} [2023-11-02 12:37:35,767][train_inner][INFO] - {"epoch": 90, "update": 89.502, "loss": "2.668", "ntokens": "3160.08", "nsentences": "46.12", "prob_perplexity": "213.543", "code_perplexity": "210.89", "temp": "0.5", "loss_0": "2.567", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.57009", "wps": "17631.8", "ups": "5.58", "wpb": "3160.1", "bsz": "46.1", "num_updates": "363000", "lr": "9.36709e-06", "gnorm": "0.967", "loss_scale": "2", "train_wall": "35", "gb_free": "13.2", "wall": "68562"} [2023-11-02 12:38:12,514][train_inner][INFO] - {"epoch": 90, "update": 89.552, "loss": "2.674", "ntokens": "3215.92", "nsentences": "46.48", "prob_perplexity": "214.4", "code_perplexity": "211.741", "temp": "0.5", "loss_0": "2.573", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56839", "wps": "17504.2", "ups": "5.44", "wpb": "3215.9", "bsz": "46.5", "num_updates": "363200", "lr": "9.31646e-06", "gnorm": "0.965", "loss_scale": "2", "train_wall": "36", "gb_free": "13.5", "wall": "68599"} [2023-11-02 12:38:48,481][train_inner][INFO] - {"epoch": 90, "update": 89.601, "loss": "2.726", "ntokens": "3198.88", "nsentences": "44.6", "prob_perplexity": "214.129", "code_perplexity": "211.523", "temp": "0.5", "loss_0": "2.625", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55935", "wps": "17789.1", "ups": "5.56", "wpb": "3198.9", "bsz": "44.6", "num_updates": "363400", "lr": "9.26582e-06", "gnorm": "0.96", "loss_scale": "2", "train_wall": "35", "gb_free": "14", "wall": "68635"} [2023-11-02 12:39:25,090][train_inner][INFO] - {"epoch": 90, "update": 89.65, "loss": "2.719", "ntokens": "3189.16", "nsentences": "43.84", "prob_perplexity": "214.199", "code_perplexity": "211.534", "temp": "0.5", "loss_0": "2.618", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55799", "wps": "17423.5", "ups": "5.46", "wpb": "3189.2", "bsz": "43.8", "num_updates": "363600", "lr": "9.21519e-06", "gnorm": "0.978", "loss_scale": "2", "train_wall": "36", "gb_free": "13.1", "wall": "68671"} [2023-11-02 12:40:00,974][train_inner][INFO] - {"epoch": 90, "update": 89.7, "loss": "2.698", "ntokens": "3186.76", "nsentences": "44.12", "prob_perplexity": "214.013", "code_perplexity": "211.396", "temp": "0.5", "loss_0": "2.597", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56299", "wps": "17762.6", "ups": "5.57", "wpb": "3186.8", "bsz": "44.1", "num_updates": "363800", "lr": "9.16456e-06", "gnorm": "0.971", "loss_scale": "2", "train_wall": "35", "gb_free": "13.3", "wall": "68707"} [2023-11-02 12:40:37,021][train_inner][INFO] - {"epoch": 90, "update": 89.749, "loss": "2.761", "ntokens": "3180.6", "nsentences": "42.28", "prob_perplexity": "213.094", "code_perplexity": "210.453", "temp": "0.5", "loss_0": "2.659", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55005", "wps": "17648.1", "ups": "5.55", "wpb": "3180.6", "bsz": "42.3", "num_updates": "364000", "lr": "9.11392e-06", "gnorm": "0.982", "loss_scale": "2", "train_wall": "35", "gb_free": "12.9", "wall": "68743"} [2023-11-02 12:41:13,220][train_inner][INFO] - {"epoch": 90, "update": 89.798, "loss": "2.73", "ntokens": "3195.6", "nsentences": "43.68", "prob_perplexity": "213.163", "code_perplexity": "210.474", "temp": "0.5", "loss_0": "2.629", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55605", "wps": "17656.4", "ups": "5.53", "wpb": "3195.6", "bsz": "43.7", "num_updates": "364200", "lr": "9.06329e-06", "gnorm": "0.974", "loss_scale": "2", "train_wall": "36", "gb_free": "14.1", "wall": "68779"} [2023-11-02 12:41:49,184][train_inner][INFO] - {"epoch": 90, "update": 89.848, "loss": "2.749", "ntokens": "3178.72", "nsentences": "43.2", "prob_perplexity": "213.694", "code_perplexity": "211.021", "temp": "0.5", "loss_0": "2.648", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55282", "wps": "17678.4", "ups": "5.56", "wpb": "3178.7", "bsz": "43.2", "num_updates": "364400", "lr": "9.01266e-06", "gnorm": "0.982", "loss_scale": "2", "train_wall": "35", "gb_free": "14", "wall": "68815"} [2023-11-02 12:42:26,024][train_inner][INFO] - {"epoch": 90, "update": 89.897, "loss": "2.725", "ntokens": "3187.4", "nsentences": "43.92", "prob_perplexity": "213.592", "code_perplexity": "210.951", "temp": "0.5", "loss_0": "2.624", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55793", "wps": "17304.9", "ups": "5.43", "wpb": "3187.4", "bsz": "43.9", "num_updates": "364600", "lr": "8.96203e-06", "gnorm": "0.978", "loss_scale": "2", "train_wall": "36", "gb_free": "14.4", "wall": "68852"} [2023-11-02 12:43:02,833][train_inner][INFO] - {"epoch": 90, "update": 89.946, "loss": "2.771", "ntokens": "3221.08", "nsentences": "41.84", "prob_perplexity": "213.692", "code_perplexity": "211.009", "temp": "0.5", "loss_0": "2.67", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.54775", "wps": "17502.7", "ups": "5.43", "wpb": "3221.1", "bsz": "41.8", "num_updates": "364800", "lr": "8.91139e-06", "gnorm": "0.973", "loss_scale": "2", "train_wall": "36", "gb_free": "13.1", "wall": "68889"} [2023-11-02 12:43:38,518][train_inner][INFO] - {"epoch": 90, "update": 89.996, "loss": "2.73", "ntokens": "3147.8", "nsentences": "42.36", "prob_perplexity": "213.315", "code_perplexity": "210.674", "temp": "0.5", "loss_0": "2.629", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55645", "wps": "17643.1", "ups": "5.6", "wpb": "3147.8", "bsz": "42.4", "num_updates": "365000", "lr": "8.86076e-06", "gnorm": "0.977", "loss_scale": "2", "train_wall": "35", "gb_free": "13.7", "wall": "68925"} [2023-11-02 12:43:41,787][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 12:43:41,789][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 12:43:41,808][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 105 [2023-11-02 12:44:07,243][valid][INFO] - {"epoch": 90, "valid_loss": "2.602", "valid_ntokens": "3152.43", "valid_nsentences": "44.1685", "valid_prob_perplexity": "212.942", "valid_code_perplexity": "210.45", "valid_temp": "0.5", "valid_loss_0": "2.501", "valid_loss_1": "0.096", "valid_loss_2": "0.005", "valid_accuracy": "0.58315", "valid_wps": "56237.5", "valid_wpb": "3152.4", "valid_bsz": "44.2", "valid_num_updates": "365018", "valid_best_loss": "2.595"} [2023-11-02 12:44:07,245][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 90 @ 365018 updates [2023-11-02 12:44:07,247][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 12:44:08,665][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 12:44:08,730][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_last.pt (epoch 90 @ 365018 updates, score 2.602) (writing took 1.4847956849262118 seconds) [2023-11-02 12:44:08,730][fairseq_cli.train][INFO] - end of epoch 90 (average epoch stats below) [2023-11-02 12:44:08,732][train][INFO] - {"epoch": 90, "train_loss": "2.719", "train_ntokens": "3192.72", "train_nsentences": "44.2682", "train_prob_perplexity": "213.638", "train_code_perplexity": "211.003", "train_temp": "0.5", "train_loss_0": "2.618", "train_loss_1": "0.096", "train_loss_2": "0.005", "train_accuracy": "0.5592", "train_wps": "17048", "train_ups": "5.34", "train_wpb": "3192.7", "train_bsz": "44.3", "train_num_updates": "365018", "train_lr": "8.8562e-06", "train_gnorm": "0.975", "train_loss_scale": "2", "train_train_wall": "719", "train_gb_free": "12.7", "train_wall": "68955"} [2023-11-02 12:44:08,735][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 12:44:08,758][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 91 [2023-11-02 12:44:08,928][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 12:44:08,992][fairseq.trainer][INFO] - begin training epoch 91 [2023-11-02 12:44:08,992][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 12:44:41,454][train_inner][INFO] - {"epoch": 91, "update": 90.045, "loss": "2.756", "ntokens": "3180.96", "nsentences": "43.4", "prob_perplexity": "214.055", "code_perplexity": "211.431", "temp": "0.5", "loss_0": "2.655", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55303", "wps": "10108.9", "ups": "3.18", "wpb": "3181", "bsz": "43.4", "num_updates": "365200", "lr": "8.81013e-06", "gnorm": "0.988", "loss_scale": "2", "train_wall": "35", "gb_free": "13.7", "wall": "68988"} [2023-11-02 12:45:17,060][train_inner][INFO] - {"epoch": 91, "update": 90.094, "loss": "2.672", "ntokens": "3161.4", "nsentences": "45.28", "prob_perplexity": "214.158", "code_perplexity": "211.489", "temp": "0.5", "loss_0": "2.571", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56845", "wps": "17767.7", "ups": "5.62", "wpb": "3161.4", "bsz": "45.3", "num_updates": "365400", "lr": "8.75949e-06", "gnorm": "0.97", "loss_scale": "2", "train_wall": "35", "gb_free": "13", "wall": "69023"} [2023-11-02 12:45:52,797][train_inner][INFO] - {"epoch": 91, "update": 90.143, "loss": "2.745", "ntokens": "3217.92", "nsentences": "44.28", "prob_perplexity": "214.959", "code_perplexity": "212.322", "temp": "0.5", "loss_0": "2.644", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55481", "wps": "18009.8", "ups": "5.6", "wpb": "3217.9", "bsz": "44.3", "num_updates": "365600", "lr": "8.70886e-06", "gnorm": "0.964", "loss_scale": "2", "train_wall": "35", "gb_free": "13.1", "wall": "69059"} [2023-11-02 12:46:28,758][train_inner][INFO] - {"epoch": 91, "update": 90.193, "loss": "2.681", "ntokens": "3205.08", "nsentences": "46.84", "prob_perplexity": "214.233", "code_perplexity": "211.598", "temp": "0.5", "loss_0": "2.58", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56737", "wps": "17826.3", "ups": "5.56", "wpb": "3205.1", "bsz": "46.8", "num_updates": "365800", "lr": "8.65823e-06", "gnorm": "0.981", "loss_scale": "2", "train_wall": "35", "gb_free": "13.1", "wall": "69095"} [2023-11-02 12:47:04,439][train_inner][INFO] - {"epoch": 91, "update": 90.242, "loss": "2.721", "ntokens": "3210.92", "nsentences": "46.32", "prob_perplexity": "213.577", "code_perplexity": "210.947", "temp": "0.5", "loss_0": "2.62", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56089", "wps": "17999.3", "ups": "5.61", "wpb": "3210.9", "bsz": "46.3", "num_updates": "366000", "lr": "8.60759e-06", "gnorm": "0.971", "loss_scale": "2", "train_wall": "35", "gb_free": "14.2", "wall": "69131"} [2023-11-02 12:47:40,607][train_inner][INFO] - {"epoch": 91, "update": 90.291, "loss": "2.767", "ntokens": "3196.8", "nsentences": "41.44", "prob_perplexity": "213.133", "code_perplexity": "210.535", "temp": "0.5", "loss_0": "2.665", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.54818", "wps": "17678.5", "ups": "5.53", "wpb": "3196.8", "bsz": "41.4", "num_updates": "366200", "lr": "8.55696e-06", "gnorm": "0.978", "loss_scale": "2", "train_wall": "36", "gb_free": "13.8", "wall": "69167"} [2023-11-02 12:48:17,188][train_inner][INFO] - {"epoch": 91, "update": 90.341, "loss": "2.784", "ntokens": "3205.36", "nsentences": "41.64", "prob_perplexity": "214.559", "code_perplexity": "211.94", "temp": "0.5", "loss_0": "2.683", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.54648", "wps": "17525.6", "ups": "5.47", "wpb": "3205.4", "bsz": "41.6", "num_updates": "366400", "lr": "8.50633e-06", "gnorm": "0.976", "loss_scale": "2", "train_wall": "36", "gb_free": "13.8", "wall": "69203"} [2023-11-02 12:48:53,226][train_inner][INFO] - {"epoch": 91, "update": 90.39, "loss": "2.704", "ntokens": "3192.36", "nsentences": "44.32", "prob_perplexity": "213.344", "code_perplexity": "210.754", "temp": "0.5", "loss_0": "2.603", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56099", "wps": "17717.6", "ups": "5.55", "wpb": "3192.4", "bsz": "44.3", "num_updates": "366600", "lr": "8.4557e-06", "gnorm": "0.978", "loss_scale": "2", "train_wall": "35", "gb_free": "13.4", "wall": "69239"} [2023-11-02 12:49:29,121][train_inner][INFO] - {"epoch": 91, "update": 90.439, "loss": "2.731", "ntokens": "3232", "nsentences": "44", "prob_perplexity": "214.642", "code_perplexity": "212.037", "temp": "0.5", "loss_0": "2.63", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55681", "wps": "18009.4", "ups": "5.57", "wpb": "3232", "bsz": "44", "num_updates": "366800", "lr": "8.40506e-06", "gnorm": "0.961", "loss_scale": "2", "train_wall": "35", "gb_free": "13.6", "wall": "69275"} [2023-11-02 12:50:05,335][train_inner][INFO] - {"epoch": 91, "update": 90.489, "loss": "2.71", "ntokens": "3167.52", "nsentences": "43.24", "prob_perplexity": "213.566", "code_perplexity": "210.913", "temp": "0.5", "loss_0": "2.609", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55984", "wps": "17494.2", "ups": "5.52", "wpb": "3167.5", "bsz": "43.2", "num_updates": "367000", "lr": "8.35443e-06", "gnorm": "0.988", "loss_scale": "2", "train_wall": "36", "gb_free": "14.6", "wall": "69312"} [2023-11-02 12:50:41,956][train_inner][INFO] - {"epoch": 91, "update": 90.538, "loss": "2.745", "ntokens": "3171.24", "nsentences": "42.4", "prob_perplexity": "212.473", "code_perplexity": "209.853", "temp": "0.5", "loss_0": "2.643", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55429", "wps": "17320.3", "ups": "5.46", "wpb": "3171.2", "bsz": "42.4", "num_updates": "367200", "lr": "8.3038e-06", "gnorm": "0.985", "loss_scale": "2", "train_wall": "36", "gb_free": "14.3", "wall": "69348"} [2023-11-02 12:51:18,515][train_inner][INFO] - {"epoch": 91, "update": 90.587, "loss": "2.773", "ntokens": "3226.36", "nsentences": "42.04", "prob_perplexity": "214.063", "code_perplexity": "211.45", "temp": "0.5", "loss_0": "2.672", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.54789", "wps": "17651.4", "ups": "5.47", "wpb": "3226.4", "bsz": "42", "num_updates": "367400", "lr": "8.25316e-06", "gnorm": "0.978", "loss_scale": "2", "train_wall": "36", "gb_free": "13.1", "wall": "69385"} [2023-11-02 12:51:20,348][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 [2023-11-02 12:51:55,354][train_inner][INFO] - {"epoch": 91, "update": 90.637, "loss": "2.66", "ntokens": "3188.8", "nsentences": "47.08", "prob_perplexity": "214.615", "code_perplexity": "212.027", "temp": "0.5", "loss_0": "2.559", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.57175", "wps": "17312.7", "ups": "5.43", "wpb": "3188.8", "bsz": "47.1", "num_updates": "367600", "lr": "8.20253e-06", "gnorm": "0.962", "loss_scale": "1", "train_wall": "36", "gb_free": "15.1", "wall": "69422"} [2023-11-02 12:52:31,546][train_inner][INFO] - {"epoch": 91, "update": 90.686, "loss": "2.745", "ntokens": "3230.76", "nsentences": "43.4", "prob_perplexity": "214.309", "code_perplexity": "211.65", "temp": "0.5", "loss_0": "2.644", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55343", "wps": "17854.5", "ups": "5.53", "wpb": "3230.8", "bsz": "43.4", "num_updates": "367800", "lr": "8.1519e-06", "gnorm": "0.976", "loss_scale": "1", "train_wall": "36", "gb_free": "13.8", "wall": "69458"} [2023-11-02 12:53:07,028][train_inner][INFO] - {"epoch": 91, "update": 90.735, "loss": "2.741", "ntokens": "3178.04", "nsentences": "43.08", "prob_perplexity": "214.168", "code_perplexity": "211.563", "temp": "0.5", "loss_0": "2.64", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55426", "wps": "17914.8", "ups": "5.64", "wpb": "3178", "bsz": "43.1", "num_updates": "368000", "lr": "8.10127e-06", "gnorm": "0.983", "loss_scale": "1", "train_wall": "35", "gb_free": "13.3", "wall": "69493"} [2023-11-02 12:53:43,017][train_inner][INFO] - {"epoch": 91, "update": 90.785, "loss": "2.686", "ntokens": "3203.04", "nsentences": "46.04", "prob_perplexity": "213.405", "code_perplexity": "210.773", "temp": "0.5", "loss_0": "2.585", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56664", "wps": "17801.2", "ups": "5.56", "wpb": "3203", "bsz": "46", "num_updates": "368200", "lr": "8.05063e-06", "gnorm": "0.966", "loss_scale": "1", "train_wall": "35", "gb_free": "13.1", "wall": "69529"} [2023-11-02 12:54:19,037][train_inner][INFO] - {"epoch": 91, "update": 90.834, "loss": "2.671", "ntokens": "3167.84", "nsentences": "46.36", "prob_perplexity": "213.836", "code_perplexity": "211.176", "temp": "0.5", "loss_0": "2.57", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56971", "wps": "17590.4", "ups": "5.55", "wpb": "3167.8", "bsz": "46.4", "num_updates": "368400", "lr": "8e-06", "gnorm": "0.971", "loss_scale": "1", "train_wall": "35", "gb_free": "14.7", "wall": "69565"} [2023-11-02 12:54:55,401][train_inner][INFO] - {"epoch": 91, "update": 90.883, "loss": "2.683", "ntokens": "3168.52", "nsentences": "46.12", "prob_perplexity": "214.706", "code_perplexity": "212.099", "temp": "0.5", "loss_0": "2.583", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56749", "wps": "17427.6", "ups": "5.5", "wpb": "3168.5", "bsz": "46.1", "num_updates": "368600", "lr": "7.94937e-06", "gnorm": "0.965", "loss_scale": "1", "train_wall": "36", "gb_free": "15.3", "wall": "69602"} [2023-11-02 12:55:31,663][train_inner][INFO] - {"epoch": 91, "update": 90.933, "loss": "2.714", "ntokens": "3142.4", "nsentences": "43.6", "prob_perplexity": "213.374", "code_perplexity": "210.762", "temp": "0.5", "loss_0": "2.613", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56004", "wps": "17332.8", "ups": "5.52", "wpb": "3142.4", "bsz": "43.6", "num_updates": "368800", "lr": "7.89873e-06", "gnorm": "0.982", "loss_scale": "1", "train_wall": "36", "gb_free": "15.7", "wall": "69638"} [2023-11-02 12:56:08,155][train_inner][INFO] - {"epoch": 91, "update": 90.982, "loss": "2.685", "ntokens": "3178.6", "nsentences": "45.16", "prob_perplexity": "213.764", "code_perplexity": "211.155", "temp": "0.5", "loss_0": "2.584", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56566", "wps": "17421.6", "ups": "5.48", "wpb": "3178.6", "bsz": "45.2", "num_updates": "369000", "lr": "7.8481e-06", "gnorm": "0.971", "loss_scale": "1", "train_wall": "36", "gb_free": "14.2", "wall": "69674"} [2023-11-02 12:56:21,299][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 12:56:21,301][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 12:56:21,319][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 106 [2023-11-02 12:56:47,033][valid][INFO] - {"epoch": 91, "valid_loss": "2.601", "valid_ntokens": "3164.7", "valid_nsentences": "44.1685", "valid_prob_perplexity": "213.929", "valid_code_perplexity": "211.438", "valid_temp": "0.5", "valid_loss_0": "2.5", "valid_loss_1": "0.096", "valid_loss_2": "0.005", "valid_accuracy": "0.58314", "valid_wps": "55823.3", "valid_wpb": "3164.7", "valid_bsz": "44.2", "valid_num_updates": "369073", "valid_best_loss": "2.595"} [2023-11-02 12:56:47,036][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 91 @ 369073 updates [2023-11-02 12:56:47,037][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 12:56:48,475][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 12:56:48,543][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_last.pt (epoch 91 @ 369073 updates, score 2.601) (writing took 1.5074184788390994 seconds) [2023-11-02 12:56:48,544][fairseq_cli.train][INFO] - end of epoch 91 (average epoch stats below) [2023-11-02 12:56:48,546][train][INFO] - {"epoch": 91, "train_loss": "2.719", "train_ntokens": "3190.67", "train_nsentences": "44.2732", "train_prob_perplexity": "213.917", "train_code_perplexity": "211.294", "train_temp": "0.5", "train_loss_0": "2.618", "train_loss_1": "0.096", "train_loss_2": "0.005", "train_accuracy": "0.55938", "train_wps": "17028.1", "train_ups": "5.34", "train_wpb": "3190.7", "train_bsz": "44.3", "train_num_updates": "369073", "train_lr": "7.82962e-06", "train_gnorm": "0.975", "train_loss_scale": "1", "train_train_wall": "719", "train_gb_free": "16.6", "train_wall": "69715"} [2023-11-02 12:56:48,548][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 12:56:48,566][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 92 [2023-11-02 12:56:48,735][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 12:56:48,802][fairseq.trainer][INFO] - begin training epoch 92 [2023-11-02 12:56:48,803][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 12:57:11,833][train_inner][INFO] - {"epoch": 92, "update": 91.031, "loss": "2.754", "ntokens": "3170.64", "nsentences": "40.4", "prob_perplexity": "213.289", "code_perplexity": "210.645", "temp": "0.5", "loss_0": "2.653", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.54997", "wps": "9958.6", "ups": "3.14", "wpb": "3170.6", "bsz": "40.4", "num_updates": "369200", "lr": "7.79747e-06", "gnorm": "0.99", "loss_scale": "1", "train_wall": "35", "gb_free": "16.8", "wall": "69738"} [2023-11-02 12:57:47,402][train_inner][INFO] - {"epoch": 92, "update": 91.081, "loss": "2.682", "ntokens": "3172.04", "nsentences": "45.6", "prob_perplexity": "213.97", "code_perplexity": "211.336", "temp": "0.5", "loss_0": "2.581", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56646", "wps": "17837.1", "ups": "5.62", "wpb": "3172", "bsz": "45.6", "num_updates": "369400", "lr": "7.74684e-06", "gnorm": "0.984", "loss_scale": "1", "train_wall": "35", "gb_free": "13.7", "wall": "69774"} [2023-11-02 12:58:23,288][train_inner][INFO] - {"epoch": 92, "update": 91.13, "loss": "2.744", "ntokens": "3230", "nsentences": "44.68", "prob_perplexity": "214.913", "code_perplexity": "212.35", "temp": "0.5", "loss_0": "2.643", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55472", "wps": "18002.6", "ups": "5.57", "wpb": "3230", "bsz": "44.7", "num_updates": "369600", "lr": "7.6962e-06", "gnorm": "0.965", "loss_scale": "1", "train_wall": "35", "gb_free": "15", "wall": "69810"} [2023-11-02 12:58:59,384][train_inner][INFO] - {"epoch": 92, "update": 91.179, "loss": "2.664", "ntokens": "3213.32", "nsentences": "46.04", "prob_perplexity": "214.165", "code_perplexity": "211.543", "temp": "0.5", "loss_0": "2.563", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56958", "wps": "17805.4", "ups": "5.54", "wpb": "3213.3", "bsz": "46", "num_updates": "369800", "lr": "7.64557e-06", "gnorm": "0.97", "loss_scale": "1", "train_wall": "35", "gb_free": "13.4", "wall": "69846"} [2023-11-02 12:59:35,642][train_inner][INFO] - {"epoch": 92, "update": 91.229, "loss": "2.794", "ntokens": "3247.08", "nsentences": "42.44", "prob_perplexity": "214.311", "code_perplexity": "211.689", "temp": "0.5", "loss_0": "2.693", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.54519", "wps": "17912.2", "ups": "5.52", "wpb": "3247.1", "bsz": "42.4", "num_updates": "370000", "lr": "7.59494e-06", "gnorm": "0.985", "loss_scale": "1", "train_wall": "36", "gb_free": "15.5", "wall": "69882"} [2023-11-02 13:00:12,467][train_inner][INFO] - {"epoch": 92, "update": 91.278, "loss": "2.705", "ntokens": "3213.6", "nsentences": "45.6", "prob_perplexity": "214.102", "code_perplexity": "211.556", "temp": "0.5", "loss_0": "2.605", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56223", "wps": "17454.3", "ups": "5.43", "wpb": "3213.6", "bsz": "45.6", "num_updates": "370200", "lr": "7.5443e-06", "gnorm": "0.978", "loss_scale": "1", "train_wall": "36", "gb_free": "14.1", "wall": "69919"} [2023-11-02 13:00:49,004][train_inner][INFO] - {"epoch": 92, "update": 91.327, "loss": "2.723", "ntokens": "3209.04", "nsentences": "44.24", "prob_perplexity": "214.425", "code_perplexity": "211.788", "temp": "0.5", "loss_0": "2.622", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55833", "wps": "17566.9", "ups": "5.47", "wpb": "3209", "bsz": "44.2", "num_updates": "370400", "lr": "7.49367e-06", "gnorm": "0.979", "loss_scale": "1", "train_wall": "36", "gb_free": "14.5", "wall": "69955"} [2023-11-02 13:01:25,117][train_inner][INFO] - {"epoch": 92, "update": 91.376, "loss": "2.756", "ntokens": "3208.24", "nsentences": "43.24", "prob_perplexity": "214.009", "code_perplexity": "211.419", "temp": "0.5", "loss_0": "2.655", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55183", "wps": "17768.6", "ups": "5.54", "wpb": "3208.2", "bsz": "43.2", "num_updates": "370600", "lr": "7.44304e-06", "gnorm": "0.977", "loss_scale": "1", "train_wall": "35", "gb_free": "13.7", "wall": "69991"} [2023-11-02 13:02:01,510][train_inner][INFO] - {"epoch": 92, "update": 91.426, "loss": "2.714", "ntokens": "3214.28", "nsentences": "44.92", "prob_perplexity": "213.986", "code_perplexity": "211.391", "temp": "0.5", "loss_0": "2.613", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55933", "wps": "17665.3", "ups": "5.5", "wpb": "3214.3", "bsz": "44.9", "num_updates": "370800", "lr": "7.39241e-06", "gnorm": "0.977", "loss_scale": "1", "train_wall": "36", "gb_free": "12.9", "wall": "70028"} [2023-11-02 13:02:38,199][train_inner][INFO] - {"epoch": 92, "update": 91.475, "loss": "2.656", "ntokens": "3156.2", "nsentences": "44.48", "prob_perplexity": "214.701", "code_perplexity": "212.009", "temp": "0.5", "loss_0": "2.555", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56865", "wps": "17206.5", "ups": "5.45", "wpb": "3156.2", "bsz": "44.5", "num_updates": "371000", "lr": "7.34177e-06", "gnorm": "0.982", "loss_scale": "1", "train_wall": "36", "gb_free": "12.6", "wall": "70064"} [2023-11-02 13:03:14,646][train_inner][INFO] - {"epoch": 92, "update": 91.524, "loss": "2.728", "ntokens": "3193.44", "nsentences": "44.24", "prob_perplexity": "214.701", "code_perplexity": "212.049", "temp": "0.5", "loss_0": "2.627", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55764", "wps": "17524.9", "ups": "5.49", "wpb": "3193.4", "bsz": "44.2", "num_updates": "371200", "lr": "7.29114e-06", "gnorm": "0.984", "loss_scale": "1", "train_wall": "36", "gb_free": "14", "wall": "70101"} [2023-11-02 13:03:50,946][train_inner][INFO] - {"epoch": 92, "update": 91.574, "loss": "2.744", "ntokens": "3207.04", "nsentences": "43.4", "prob_perplexity": "214.817", "code_perplexity": "212.201", "temp": "0.5", "loss_0": "2.643", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.5542", "wps": "17670.8", "ups": "5.51", "wpb": "3207", "bsz": "43.4", "num_updates": "371400", "lr": "7.24051e-06", "gnorm": "0.969", "loss_scale": "1", "train_wall": "36", "gb_free": "12.4", "wall": "70137"} [2023-11-02 13:04:26,877][train_inner][INFO] - {"epoch": 92, "update": 91.623, "loss": "2.758", "ntokens": "3150.92", "nsentences": "41.44", "prob_perplexity": "212.511", "code_perplexity": "209.857", "temp": "0.5", "loss_0": "2.657", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55097", "wps": "17539.6", "ups": "5.57", "wpb": "3150.9", "bsz": "41.4", "num_updates": "371600", "lr": "7.18987e-06", "gnorm": "1", "loss_scale": "1", "train_wall": "35", "gb_free": "12.6", "wall": "70173"} [2023-11-02 13:05:03,823][train_inner][INFO] - {"epoch": 92, "update": 91.672, "loss": "2.778", "ntokens": "3206.6", "nsentences": "42.44", "prob_perplexity": "213.175", "code_perplexity": "210.523", "temp": "0.5", "loss_0": "2.677", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.54745", "wps": "17359.1", "ups": "5.41", "wpb": "3206.6", "bsz": "42.4", "num_updates": "371800", "lr": "7.13924e-06", "gnorm": "0.98", "loss_scale": "1", "train_wall": "36", "gb_free": "13.8", "wall": "70210"} [2023-11-02 13:05:40,473][train_inner][INFO] - {"epoch": 92, "update": 91.722, "loss": "2.761", "ntokens": "3243.44", "nsentences": "42.56", "prob_perplexity": "214.618", "code_perplexity": "212.01", "temp": "0.5", "loss_0": "2.661", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55029", "wps": "17700.6", "ups": "5.46", "wpb": "3243.4", "bsz": "42.6", "num_updates": "372000", "lr": "7.08861e-06", "gnorm": "0.972", "loss_scale": "1", "train_wall": "36", "gb_free": "13.4", "wall": "70247"} [2023-11-02 13:06:16,981][train_inner][INFO] - {"epoch": 92, "update": 91.771, "loss": "2.682", "ntokens": "3198.8", "nsentences": "45.84", "prob_perplexity": "214.685", "code_perplexity": "212.025", "temp": "0.5", "loss_0": "2.581", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56706", "wps": "17524.7", "ups": "5.48", "wpb": "3198.8", "bsz": "45.8", "num_updates": "372200", "lr": "7.03797e-06", "gnorm": "0.98", "loss_scale": "1", "train_wall": "36", "gb_free": "13.3", "wall": "70283"} [2023-11-02 13:06:53,494][train_inner][INFO] - {"epoch": 92, "update": 91.82, "loss": "2.7", "ntokens": "3208.56", "nsentences": "46.56", "prob_perplexity": "214.532", "code_perplexity": "211.935", "temp": "0.5", "loss_0": "2.599", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56371", "wps": "17575.9", "ups": "5.48", "wpb": "3208.6", "bsz": "46.6", "num_updates": "372400", "lr": "6.98734e-06", "gnorm": "0.974", "loss_scale": "1", "train_wall": "36", "gb_free": "13.6", "wall": "70320"} [2023-11-02 13:07:29,931][train_inner][INFO] - {"epoch": 92, "update": 91.87, "loss": "2.695", "ntokens": "3188.64", "nsentences": "45.8", "prob_perplexity": "214.864", "code_perplexity": "212.32", "temp": "0.5", "loss_0": "2.594", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56434", "wps": "17503.4", "ups": "5.49", "wpb": "3188.6", "bsz": "45.8", "num_updates": "372600", "lr": "6.93671e-06", "gnorm": "0.975", "loss_scale": "1", "train_wall": "36", "gb_free": "13.8", "wall": "70356"} [2023-11-02 13:08:05,962][train_inner][INFO] - {"epoch": 92, "update": 91.919, "loss": "2.745", "ntokens": "3208.84", "nsentences": "43.64", "prob_perplexity": "214.456", "code_perplexity": "211.792", "temp": "0.5", "loss_0": "2.644", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55369", "wps": "17812.6", "ups": "5.55", "wpb": "3208.8", "bsz": "43.6", "num_updates": "372800", "lr": "6.88608e-06", "gnorm": "0.974", "loss_scale": "1", "train_wall": "35", "gb_free": "13", "wall": "70392"} [2023-11-02 13:08:41,904][train_inner][INFO] - {"epoch": 92, "update": 91.968, "loss": "2.659", "ntokens": "3175.16", "nsentences": "46.2", "prob_perplexity": "215.085", "code_perplexity": "212.416", "temp": "0.5", "loss_0": "2.558", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.57115", "wps": "17668.9", "ups": "5.56", "wpb": "3175.2", "bsz": "46.2", "num_updates": "373000", "lr": "6.83544e-06", "gnorm": "0.975", "loss_scale": "1", "train_wall": "35", "gb_free": "14.3", "wall": "70428"} [2023-11-02 13:09:05,026][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 13:09:05,028][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 13:09:05,048][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 107 [2023-11-02 13:09:30,476][valid][INFO] - {"epoch": 92, "valid_loss": "2.604", "valid_ntokens": "3167.1", "valid_nsentences": "44.1685", "valid_prob_perplexity": "214.212", "valid_code_perplexity": "211.667", "valid_temp": "0.5", "valid_loss_0": "2.503", "valid_loss_1": "0.096", "valid_loss_2": "0.005", "valid_accuracy": "0.58214", "valid_wps": "56506.5", "valid_wpb": "3167.1", "valid_bsz": "44.2", "valid_num_updates": "373129", "valid_best_loss": "2.595"} [2023-11-02 13:09:30,478][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 92 @ 373129 updates [2023-11-02 13:09:30,480][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 13:09:31,921][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 13:09:31,991][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_last.pt (epoch 92 @ 373129 updates, score 2.604) (writing took 1.513679055031389 seconds) [2023-11-02 13:09:31,992][fairseq_cli.train][INFO] - end of epoch 92 (average epoch stats below) [2023-11-02 13:09:31,994][train][INFO] - {"epoch": 92, "train_loss": "2.722", "train_ntokens": "3199.65", "train_nsentences": "44.2682", "train_prob_perplexity": "214.29", "train_code_perplexity": "211.663", "train_temp": "0.5", "train_loss_0": "2.621", "train_loss_1": "0.096", "train_loss_2": "0.005", "train_accuracy": "0.55852", "train_wps": "16998.9", "train_ups": "5.31", "train_wpb": "3199.6", "train_bsz": "44.3", "train_num_updates": "373129", "train_lr": "6.80278e-06", "train_gnorm": "0.978", "train_loss_scale": "1", "train_train_wall": "723", "train_gb_free": "13.8", "train_wall": "70478"} [2023-11-02 13:09:31,997][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 13:09:32,018][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 93 [2023-11-02 13:09:32,200][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 13:09:32,270][fairseq.trainer][INFO] - begin training epoch 93 [2023-11-02 13:09:32,271][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 13:09:45,105][train_inner][INFO] - {"epoch": 93, "update": 92.018, "loss": "2.692", "ntokens": "3132.88", "nsentences": "45.12", "prob_perplexity": "213.768", "code_perplexity": "211.035", "temp": "0.5", "loss_0": "2.591", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56568", "wps": "9914.5", "ups": "3.16", "wpb": "3132.9", "bsz": "45.1", "num_updates": "373200", "lr": "6.78481e-06", "gnorm": "0.988", "loss_scale": "1", "train_wall": "35", "gb_free": "13.9", "wall": "70491"} [2023-11-02 13:10:21,110][train_inner][INFO] - {"epoch": 93, "update": 92.067, "loss": "2.687", "ntokens": "3154.24", "nsentences": "44.12", "prob_perplexity": "214.725", "code_perplexity": "212.017", "temp": "0.5", "loss_0": "2.587", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56453", "wps": "17522.4", "ups": "5.56", "wpb": "3154.2", "bsz": "44.1", "num_updates": "373400", "lr": "6.73418e-06", "gnorm": "0.974", "loss_scale": "1", "train_wall": "35", "gb_free": "12.5", "wall": "70527"} [2023-11-02 13:10:56,873][train_inner][INFO] - {"epoch": 93, "update": 92.116, "loss": "2.7", "ntokens": "3181.68", "nsentences": "45.8", "prob_perplexity": "214.243", "code_perplexity": "211.639", "temp": "0.5", "loss_0": "2.599", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56473", "wps": "17793.6", "ups": "5.59", "wpb": "3181.7", "bsz": "45.8", "num_updates": "373600", "lr": "6.68354e-06", "gnorm": "0.982", "loss_scale": "1", "train_wall": "35", "gb_free": "13.7", "wall": "70563"} [2023-11-02 13:11:32,898][train_inner][INFO] - {"epoch": 93, "update": 92.165, "loss": "2.706", "ntokens": "3205.4", "nsentences": "44.96", "prob_perplexity": "213.889", "code_perplexity": "211.193", "temp": "0.5", "loss_0": "2.605", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56239", "wps": "17796.9", "ups": "5.55", "wpb": "3205.4", "bsz": "45", "num_updates": "373800", "lr": "6.63291e-06", "gnorm": "0.999", "loss_scale": "1", "train_wall": "35", "gb_free": "15.3", "wall": "70599"} [2023-11-02 13:12:09,136][train_inner][INFO] - {"epoch": 93, "update": 92.215, "loss": "2.759", "ntokens": "3233.72", "nsentences": "43.04", "prob_perplexity": "215.101", "code_perplexity": "212.456", "temp": "0.5", "loss_0": "2.659", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55202", "wps": "17856.9", "ups": "5.52", "wpb": "3233.7", "bsz": "43", "num_updates": "374000", "lr": "6.58228e-06", "gnorm": "0.973", "loss_scale": "1", "train_wall": "36", "gb_free": "13.3", "wall": "70635"} [2023-11-02 13:12:44,479][train_inner][INFO] - {"epoch": 93, "update": 92.264, "loss": "2.705", "ntokens": "3163.8", "nsentences": "45.04", "prob_perplexity": "213.209", "code_perplexity": "210.507", "temp": "0.5", "loss_0": "2.604", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56301", "wps": "17904.3", "ups": "5.66", "wpb": "3163.8", "bsz": "45", "num_updates": "374200", "lr": "6.53165e-06", "gnorm": "0.983", "loss_scale": "1", "train_wall": "35", "gb_free": "13.9", "wall": "70671"} [2023-11-02 13:13:20,988][train_inner][INFO] - {"epoch": 93, "update": 92.313, "loss": "2.778", "ntokens": "3255", "nsentences": "42.64", "prob_perplexity": "215.062", "code_perplexity": "212.51", "temp": "0.5", "loss_0": "2.677", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.54725", "wps": "17832.2", "ups": "5.48", "wpb": "3255", "bsz": "42.6", "num_updates": "374400", "lr": "6.48101e-06", "gnorm": "0.977", "loss_scale": "1", "train_wall": "36", "gb_free": "14.2", "wall": "70707"} [2023-11-02 13:13:56,917][train_inner][INFO] - {"epoch": 93, "update": 92.363, "loss": "2.788", "ntokens": "3225.04", "nsentences": "43.08", "prob_perplexity": "213.931", "code_perplexity": "211.305", "temp": "0.5", "loss_0": "2.687", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.5464", "wps": "17953.3", "ups": "5.57", "wpb": "3225", "bsz": "43.1", "num_updates": "374600", "lr": "6.43038e-06", "gnorm": "0.984", "loss_scale": "1", "train_wall": "35", "gb_free": "13.4", "wall": "70743"} [2023-11-02 13:14:33,218][train_inner][INFO] - {"epoch": 93, "update": 92.412, "loss": "2.766", "ntokens": "3206.24", "nsentences": "41.84", "prob_perplexity": "213.366", "code_perplexity": "210.731", "temp": "0.5", "loss_0": "2.665", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.54854", "wps": "17666.4", "ups": "5.51", "wpb": "3206.2", "bsz": "41.8", "num_updates": "374800", "lr": "6.37975e-06", "gnorm": "0.993", "loss_scale": "1", "train_wall": "36", "gb_free": "13.3", "wall": "70779"} [2023-11-02 13:15:09,539][train_inner][INFO] - {"epoch": 93, "update": 92.461, "loss": "2.75", "ntokens": "3185.48", "nsentences": "41.84", "prob_perplexity": "213.838", "code_perplexity": "211.176", "temp": "0.5", "loss_0": "2.649", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55237", "wps": "17541.8", "ups": "5.51", "wpb": "3185.5", "bsz": "41.8", "num_updates": "375000", "lr": "6.32911e-06", "gnorm": "0.978", "loss_scale": "1", "train_wall": "36", "gb_free": "14.1", "wall": "70816"} [2023-11-02 13:15:09,540][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 13:15:09,542][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 13:15:09,560][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 108 [2023-11-02 13:15:35,084][valid][INFO] - {"epoch": 93, "valid_loss": "2.572", "valid_ntokens": "3149.43", "valid_nsentences": "44.1685", "valid_prob_perplexity": "213.123", "valid_code_perplexity": "210.567", "valid_temp": "0.5", "valid_loss_0": "2.471", "valid_loss_1": "0.096", "valid_loss_2": "0.005", "valid_accuracy": "0.58803", "valid_wps": "55990.6", "valid_wpb": "3149.4", "valid_bsz": "44.2", "valid_num_updates": "375000", "valid_best_loss": "2.572"} [2023-11-02 13:15:35,086][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 93 @ 375000 updates [2023-11-02 13:15:35,088][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_93_375000.pt [2023-11-02 13:15:36,429][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_93_375000.pt [2023-11-02 13:15:38,438][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_93_375000.pt (epoch 93 @ 375000 updates, score 2.572) (writing took 3.3511077710427344 seconds) [2023-11-02 13:16:14,490][train_inner][INFO] - {"epoch": 93, "update": 92.511, "loss": "2.72", "ntokens": "3178.08", "nsentences": "43.68", "prob_perplexity": "213.436", "code_perplexity": "210.816", "temp": "0.5", "loss_0": "2.619", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55853", "wps": "9786.4", "ups": "3.08", "wpb": "3178.1", "bsz": "43.7", "num_updates": "375200", "lr": "6.27848e-06", "gnorm": "0.985", "loss_scale": "1", "train_wall": "35", "gb_free": "13.4", "wall": "70881"} [2023-11-02 13:16:50,556][train_inner][INFO] - {"epoch": 93, "update": 92.56, "loss": "2.705", "ntokens": "3203.6", "nsentences": "44.16", "prob_perplexity": "213.987", "code_perplexity": "211.345", "temp": "0.5", "loss_0": "2.604", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.5618", "wps": "17765.9", "ups": "5.55", "wpb": "3203.6", "bsz": "44.2", "num_updates": "375400", "lr": "6.22785e-06", "gnorm": "0.984", "loss_scale": "1", "train_wall": "35", "gb_free": "15", "wall": "70917"} [2023-11-02 13:17:26,959][train_inner][INFO] - {"epoch": 93, "update": 92.609, "loss": "2.686", "ntokens": "3174.96", "nsentences": "45.72", "prob_perplexity": "214.868", "code_perplexity": "212.269", "temp": "0.5", "loss_0": "2.585", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56649", "wps": "17444.5", "ups": "5.49", "wpb": "3175", "bsz": "45.7", "num_updates": "375600", "lr": "6.17722e-06", "gnorm": "0.975", "loss_scale": "1", "train_wall": "36", "gb_free": "13.7", "wall": "70953"} [2023-11-02 13:18:02,806][train_inner][INFO] - {"epoch": 93, "update": 92.659, "loss": "2.669", "ntokens": "3174.88", "nsentences": "46.04", "prob_perplexity": "214.345", "code_perplexity": "211.71", "temp": "0.5", "loss_0": "2.568", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56951", "wps": "17714.4", "ups": "5.58", "wpb": "3174.9", "bsz": "46", "num_updates": "375800", "lr": "6.12658e-06", "gnorm": "0.976", "loss_scale": "1", "train_wall": "35", "gb_free": "13.2", "wall": "70989"} [2023-11-02 13:18:39,164][train_inner][INFO] - {"epoch": 93, "update": 92.708, "loss": "2.728", "ntokens": "3222.32", "nsentences": "44.04", "prob_perplexity": "214.025", "code_perplexity": "211.419", "temp": "0.5", "loss_0": "2.628", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55716", "wps": "17726.5", "ups": "5.5", "wpb": "3222.3", "bsz": "44", "num_updates": "376000", "lr": "6.07595e-06", "gnorm": "0.978", "loss_scale": "1", "train_wall": "36", "gb_free": "13.5", "wall": "71025"} [2023-11-02 13:19:15,177][train_inner][INFO] - {"epoch": 93, "update": 92.757, "loss": "2.654", "ntokens": "3149.96", "nsentences": "46.2", "prob_perplexity": "213.291", "code_perplexity": "210.608", "temp": "0.5", "loss_0": "2.553", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.57243", "wps": "17494.6", "ups": "5.55", "wpb": "3150", "bsz": "46.2", "num_updates": "376200", "lr": "6.02532e-06", "gnorm": "0.989", "loss_scale": "1", "train_wall": "35", "gb_free": "13.3", "wall": "71061"} [2023-11-02 13:19:51,089][train_inner][INFO] - {"epoch": 93, "update": 92.806, "loss": "2.759", "ntokens": "3256.56", "nsentences": "42.08", "prob_perplexity": "214.782", "code_perplexity": "212.179", "temp": "0.5", "loss_0": "2.658", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.54967", "wps": "18137.4", "ups": "5.57", "wpb": "3256.6", "bsz": "42.1", "num_updates": "376400", "lr": "5.97468e-06", "gnorm": "0.977", "loss_scale": "1", "train_wall": "35", "gb_free": "13.7", "wall": "71097"} [2023-11-02 13:20:27,054][train_inner][INFO] - {"epoch": 93, "update": 92.856, "loss": "2.717", "ntokens": "3219.48", "nsentences": "45.24", "prob_perplexity": "214.975", "code_perplexity": "212.292", "temp": "0.5", "loss_0": "2.617", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55947", "wps": "17904.3", "ups": "5.56", "wpb": "3219.5", "bsz": "45.2", "num_updates": "376600", "lr": "5.92405e-06", "gnorm": "0.981", "loss_scale": "1", "train_wall": "35", "gb_free": "14", "wall": "71133"} [2023-11-02 13:21:03,307][train_inner][INFO] - {"epoch": 93, "update": 92.905, "loss": "2.717", "ntokens": "3179.6", "nsentences": "43.52", "prob_perplexity": "214.998", "code_perplexity": "212.319", "temp": "0.5", "loss_0": "2.616", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.5585", "wps": "17542.5", "ups": "5.52", "wpb": "3179.6", "bsz": "43.5", "num_updates": "376800", "lr": "5.87342e-06", "gnorm": "0.986", "loss_scale": "1", "train_wall": "36", "gb_free": "13.9", "wall": "71170"} [2023-11-02 13:21:39,250][train_inner][INFO] - {"epoch": 93, "update": 92.954, "loss": "2.686", "ntokens": "3153.84", "nsentences": "45.56", "prob_perplexity": "213.384", "code_perplexity": "210.763", "temp": "0.5", "loss_0": "2.585", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56626", "wps": "17550.1", "ups": "5.56", "wpb": "3153.8", "bsz": "45.6", "num_updates": "377000", "lr": "5.82278e-06", "gnorm": "0.989", "loss_scale": "1", "train_wall": "35", "gb_free": "12.2", "wall": "71205"} [2023-11-02 13:22:12,396][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 13:22:12,398][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 13:22:12,415][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 109 [2023-11-02 13:22:38,175][valid][INFO] - {"epoch": 93, "valid_loss": "2.578", "valid_ntokens": "3156.27", "valid_nsentences": "44.1685", "valid_prob_perplexity": "213.966", "valid_code_perplexity": "211.479", "valid_temp": "0.5", "valid_loss_0": "2.478", "valid_loss_1": "0.096", "valid_loss_2": "0.005", "valid_accuracy": "0.5863", "valid_wps": "55630", "valid_wpb": "3156.3", "valid_bsz": "44.2", "valid_num_updates": "377185", "valid_best_loss": "2.572"} [2023-11-02 13:22:38,177][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 93 @ 377185 updates [2023-11-02 13:22:38,179][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 13:22:39,577][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 13:22:39,622][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_last.pt (epoch 93 @ 377185 updates, score 2.578) (writing took 1.444242476951331 seconds) [2023-11-02 13:22:39,622][fairseq_cli.train][INFO] - end of epoch 93 (average epoch stats below) [2023-11-02 13:22:39,624][train][INFO] - {"epoch": 93, "train_loss": "2.715", "train_ntokens": "3194.11", "train_nsentences": "44.2682", "train_prob_perplexity": "214.218", "train_code_perplexity": "211.575", "train_temp": "0.5", "train_loss_0": "2.614", "train_loss_1": "0.096", "train_loss_2": "0.005", "train_accuracy": "0.55996", "train_wps": "16448.5", "train_ups": "5.15", "train_wpb": "3194.1", "train_bsz": "44.3", "train_num_updates": "377185", "train_lr": "5.77595e-06", "train_gnorm": "0.982", "train_loss_scale": "1", "train_train_wall": "718", "train_gb_free": "13.7", "train_wall": "71266"} [2023-11-02 13:22:39,627][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 13:22:39,647][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 94 [2023-11-02 13:22:39,815][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 13:22:39,886][fairseq.trainer][INFO] - begin training epoch 94 [2023-11-02 13:22:39,887][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 13:22:42,718][train_inner][INFO] - {"epoch": 94, "update": 93.004, "loss": "2.636", "ntokens": "3189.8", "nsentences": "46.6", "prob_perplexity": "214.959", "code_perplexity": "212.337", "temp": "0.5", "loss_0": "2.536", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.57512", "wps": "10052", "ups": "3.15", "wpb": "3189.8", "bsz": "46.6", "num_updates": "377200", "lr": "5.77215e-06", "gnorm": "0.962", "loss_scale": "1", "train_wall": "35", "gb_free": "15.4", "wall": "71269"} [2023-11-02 13:23:18,237][train_inner][INFO] - {"epoch": 94, "update": 93.053, "loss": "2.739", "ntokens": "3216.6", "nsentences": "43.16", "prob_perplexity": "215.04", "code_perplexity": "212.38", "temp": "0.5", "loss_0": "2.639", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55301", "wps": "18113.3", "ups": "5.63", "wpb": "3216.6", "bsz": "43.2", "num_updates": "377400", "lr": "5.72152e-06", "gnorm": "1.011", "loss_scale": "1", "train_wall": "35", "gb_free": "14.2", "wall": "71304"} [2023-11-02 13:23:54,035][train_inner][INFO] - {"epoch": 94, "update": 93.102, "loss": "2.703", "ntokens": "3198.96", "nsentences": "45.84", "prob_perplexity": "214.842", "code_perplexity": "212.223", "temp": "0.5", "loss_0": "2.603", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56333", "wps": "17873.4", "ups": "5.59", "wpb": "3199", "bsz": "45.8", "num_updates": "377600", "lr": "5.67089e-06", "gnorm": "0.97", "loss_scale": "1", "train_wall": "35", "gb_free": "13.2", "wall": "71340"} [2023-11-02 13:24:29,929][train_inner][INFO] - {"epoch": 94, "update": 93.152, "loss": "2.69", "ntokens": "3166.04", "nsentences": "43.52", "prob_perplexity": "215.114", "code_perplexity": "212.533", "temp": "0.5", "loss_0": "2.589", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56345", "wps": "17642.1", "ups": "5.57", "wpb": "3166", "bsz": "43.5", "num_updates": "377800", "lr": "5.62025e-06", "gnorm": "0.984", "loss_scale": "1", "train_wall": "35", "gb_free": "13.8", "wall": "71376"} [2023-11-02 13:25:05,735][train_inner][INFO] - {"epoch": 94, "update": 93.201, "loss": "2.769", "ntokens": "3220.24", "nsentences": "42.92", "prob_perplexity": "214.877", "code_perplexity": "212.249", "temp": "0.5", "loss_0": "2.669", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.54957", "wps": "17988.1", "ups": "5.59", "wpb": "3220.2", "bsz": "42.9", "num_updates": "378000", "lr": "5.56962e-06", "gnorm": "0.979", "loss_scale": "1", "train_wall": "35", "gb_free": "13", "wall": "71412"} [2023-11-02 13:25:41,621][train_inner][INFO] - {"epoch": 94, "update": 93.25, "loss": "2.727", "ntokens": "3166.92", "nsentences": "43.4", "prob_perplexity": "213.812", "code_perplexity": "211.207", "temp": "0.5", "loss_0": "2.627", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55724", "wps": "17658.5", "ups": "5.58", "wpb": "3166.9", "bsz": "43.4", "num_updates": "378200", "lr": "5.51899e-06", "gnorm": "0.992", "loss_scale": "1", "train_wall": "35", "gb_free": "14.3", "wall": "71448"} [2023-11-02 13:26:17,731][train_inner][INFO] - {"epoch": 94, "update": 93.3, "loss": "2.729", "ntokens": "3190.24", "nsentences": "43.68", "prob_perplexity": "215.021", "code_perplexity": "212.364", "temp": "0.5", "loss_0": "2.629", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55689", "wps": "17670.7", "ups": "5.54", "wpb": "3190.2", "bsz": "43.7", "num_updates": "378400", "lr": "5.46835e-06", "gnorm": "0.983", "loss_scale": "1", "train_wall": "35", "gb_free": "13.2", "wall": "71484"} [2023-11-02 13:26:54,027][train_inner][INFO] - {"epoch": 94, "update": 93.349, "loss": "2.723", "ntokens": "3231", "nsentences": "43.64", "prob_perplexity": "215.683", "code_perplexity": "213.06", "temp": "0.5", "loss_0": "2.623", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55665", "wps": "17804.8", "ups": "5.51", "wpb": "3231", "bsz": "43.6", "num_updates": "378600", "lr": "5.41772e-06", "gnorm": "0.986", "loss_scale": "1", "train_wall": "36", "gb_free": "14", "wall": "71520"} [2023-11-02 13:27:30,165][train_inner][INFO] - {"epoch": 94, "update": 93.398, "loss": "2.67", "ntokens": "3207.4", "nsentences": "45.08", "prob_perplexity": "215.342", "code_perplexity": "212.756", "temp": "0.5", "loss_0": "2.57", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56754", "wps": "17751.9", "ups": "5.53", "wpb": "3207.4", "bsz": "45.1", "num_updates": "378800", "lr": "5.36709e-06", "gnorm": "0.977", "loss_scale": "1", "train_wall": "35", "gb_free": "13.4", "wall": "71556"} [2023-11-02 13:28:06,631][train_inner][INFO] - {"epoch": 94, "update": 93.447, "loss": "2.758", "ntokens": "3222.2", "nsentences": "42.56", "prob_perplexity": "214.493", "code_perplexity": "211.862", "temp": "0.5", "loss_0": "2.657", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.5511", "wps": "17673.3", "ups": "5.48", "wpb": "3222.2", "bsz": "42.6", "num_updates": "379000", "lr": "5.31646e-06", "gnorm": "0.985", "loss_scale": "1", "train_wall": "36", "gb_free": "14.3", "wall": "71593"} [2023-11-02 13:28:42,652][train_inner][INFO] - {"epoch": 94, "update": 93.497, "loss": "2.681", "ntokens": "3172.92", "nsentences": "44.08", "prob_perplexity": "214.74", "code_perplexity": "212.061", "temp": "0.5", "loss_0": "2.58", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56516", "wps": "17618.3", "ups": "5.55", "wpb": "3172.9", "bsz": "44.1", "num_updates": "379200", "lr": "5.26582e-06", "gnorm": "0.976", "loss_scale": "1", "train_wall": "35", "gb_free": "13.4", "wall": "71629"} [2023-11-02 13:29:18,594][train_inner][INFO] - {"epoch": 94, "update": 93.546, "loss": "2.703", "ntokens": "3189.84", "nsentences": "44.4", "prob_perplexity": "214.149", "code_perplexity": "211.543", "temp": "0.5", "loss_0": "2.603", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56103", "wps": "17751.1", "ups": "5.56", "wpb": "3189.8", "bsz": "44.4", "num_updates": "379400", "lr": "5.21519e-06", "gnorm": "0.99", "loss_scale": "1", "train_wall": "35", "gb_free": "14.4", "wall": "71665"} [2023-11-02 13:29:54,945][train_inner][INFO] - {"epoch": 94, "update": 93.595, "loss": "2.644", "ntokens": "3140.32", "nsentences": "45.24", "prob_perplexity": "213.935", "code_perplexity": "211.29", "temp": "0.5", "loss_0": "2.543", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.57249", "wps": "17278.6", "ups": "5.5", "wpb": "3140.3", "bsz": "45.2", "num_updates": "379600", "lr": "5.16456e-06", "gnorm": "0.988", "loss_scale": "1", "train_wall": "36", "gb_free": "13", "wall": "71701"} [2023-11-02 13:30:31,248][train_inner][INFO] - {"epoch": 94, "update": 93.645, "loss": "2.711", "ntokens": "3213.56", "nsentences": "44.04", "prob_perplexity": "214.862", "code_perplexity": "212.207", "temp": "0.5", "loss_0": "2.61", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56036", "wps": "17705.4", "ups": "5.51", "wpb": "3213.6", "bsz": "44", "num_updates": "379800", "lr": "5.11392e-06", "gnorm": "0.977", "loss_scale": "1", "train_wall": "36", "gb_free": "13", "wall": "71737"} [2023-11-02 13:31:07,894][train_inner][INFO] - {"epoch": 94, "update": 93.694, "loss": "2.701", "ntokens": "3194.56", "nsentences": "43.64", "prob_perplexity": "215.199", "code_perplexity": "212.607", "temp": "0.5", "loss_0": "2.601", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56118", "wps": "17435.6", "ups": "5.46", "wpb": "3194.6", "bsz": "43.6", "num_updates": "380000", "lr": "5.06329e-06", "gnorm": "0.988", "loss_scale": "1", "train_wall": "36", "gb_free": "12.5", "wall": "71774"} [2023-11-02 13:31:44,275][train_inner][INFO] - {"epoch": 94, "update": 93.743, "loss": "2.675", "ntokens": "3189.44", "nsentences": "44.32", "prob_perplexity": "214.081", "code_perplexity": "211.468", "temp": "0.5", "loss_0": "2.574", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56675", "wps": "17534.4", "ups": "5.5", "wpb": "3189.4", "bsz": "44.3", "num_updates": "380200", "lr": "5.01266e-06", "gnorm": "0.979", "loss_scale": "1", "train_wall": "36", "gb_free": "16.3", "wall": "71811"} [2023-11-02 13:32:20,846][train_inner][INFO] - {"epoch": 94, "update": 93.793, "loss": "2.673", "ntokens": "3172.24", "nsentences": "46.04", "prob_perplexity": "214.407", "code_perplexity": "211.792", "temp": "0.5", "loss_0": "2.573", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56852", "wps": "17349.2", "ups": "5.47", "wpb": "3172.2", "bsz": "46", "num_updates": "380400", "lr": "4.96203e-06", "gnorm": "0.983", "loss_scale": "1", "train_wall": "36", "gb_free": "14", "wall": "71847"} [2023-11-02 13:32:57,081][train_inner][INFO] - {"epoch": 94, "update": 93.842, "loss": "2.726", "ntokens": "3207.96", "nsentences": "43.92", "prob_perplexity": "214.252", "code_perplexity": "211.66", "temp": "0.5", "loss_0": "2.625", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55776", "wps": "17708", "ups": "5.52", "wpb": "3208", "bsz": "43.9", "num_updates": "380600", "lr": "4.91139e-06", "gnorm": "0.982", "loss_scale": "1", "train_wall": "36", "gb_free": "14.4", "wall": "71883"} [2023-11-02 13:33:33,344][train_inner][INFO] - {"epoch": 94, "update": 93.891, "loss": "2.709", "ntokens": "3192", "nsentences": "43.4", "prob_perplexity": "213.424", "code_perplexity": "210.823", "temp": "0.5", "loss_0": "2.608", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55951", "wps": "17605.7", "ups": "5.52", "wpb": "3192", "bsz": "43.4", "num_updates": "380800", "lr": "4.86076e-06", "gnorm": "0.996", "loss_scale": "1", "train_wall": "36", "gb_free": "13.2", "wall": "71920"} [2023-11-02 13:34:09,542][train_inner][INFO] - {"epoch": 94, "update": 93.941, "loss": "2.696", "ntokens": "3195.24", "nsentences": "45.44", "prob_perplexity": "214.664", "code_perplexity": "212.035", "temp": "0.5", "loss_0": "2.596", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56454", "wps": "17655.3", "ups": "5.53", "wpb": "3195.2", "bsz": "45.4", "num_updates": "381000", "lr": "4.81013e-06", "gnorm": "0.989", "loss_scale": "1", "train_wall": "36", "gb_free": "13.8", "wall": "71956"} [2023-11-02 13:34:45,833][train_inner][INFO] - {"epoch": 94, "update": 93.99, "loss": "2.692", "ntokens": "3163.44", "nsentences": "45.24", "prob_perplexity": "214.408", "code_perplexity": "211.77", "temp": "0.5", "loss_0": "2.592", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56427", "wps": "17434.5", "ups": "5.51", "wpb": "3163.4", "bsz": "45.2", "num_updates": "381200", "lr": "4.75949e-06", "gnorm": "0.99", "loss_scale": "1", "train_wall": "36", "gb_free": "13.8", "wall": "71992"} [2023-11-02 13:34:53,117][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 13:34:53,119][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 13:34:53,135][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 110 [2023-11-02 13:35:19,024][valid][INFO] - {"epoch": 94, "valid_loss": "2.614", "valid_ntokens": "3176.04", "valid_nsentences": "44.1685", "valid_prob_perplexity": "213.43", "valid_code_perplexity": "210.909", "valid_temp": "0.5", "valid_loss_0": "2.513", "valid_loss_1": "0.096", "valid_loss_2": "0.005", "valid_accuracy": "0.58068", "valid_wps": "55722.1", "valid_wpb": "3176", "valid_bsz": "44.2", "valid_num_updates": "381241", "valid_best_loss": "2.572"} [2023-11-02 13:35:19,026][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 94 @ 381241 updates [2023-11-02 13:35:19,028][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 13:35:20,463][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 13:35:20,512][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_last.pt (epoch 94 @ 381241 updates, score 2.614) (writing took 1.4865721073001623 seconds) [2023-11-02 13:35:20,513][fairseq_cli.train][INFO] - end of epoch 94 (average epoch stats below) [2023-11-02 13:35:20,515][train][INFO] - {"epoch": 94, "train_loss": "2.705", "train_ntokens": "3191.95", "train_nsentences": "44.2682", "train_prob_perplexity": "214.61", "train_code_perplexity": "211.988", "train_temp": "0.5", "train_loss_0": "2.604", "train_loss_1": "0.096", "train_loss_2": "0.005", "train_accuracy": "0.56127", "train_wps": "17015", "train_ups": "5.33", "train_wpb": "3191.9", "train_bsz": "44.3", "train_num_updates": "381241", "train_lr": "4.74911e-06", "train_gnorm": "0.985", "train_loss_scale": "1", "train_train_wall": "720", "train_gb_free": "13.1", "train_wall": "72027"} [2023-11-02 13:35:20,518][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 13:35:20,539][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 95 [2023-11-02 13:35:20,713][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 13:35:20,781][fairseq.trainer][INFO] - begin training epoch 95 [2023-11-02 13:35:20,782][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 13:35:49,794][train_inner][INFO] - {"epoch": 95, "update": 94.039, "loss": "2.756", "ntokens": "3211.16", "nsentences": "44.16", "prob_perplexity": "214.357", "code_perplexity": "211.682", "temp": "0.5", "loss_0": "2.656", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55251", "wps": "10041.4", "ups": "3.13", "wpb": "3211.2", "bsz": "44.2", "num_updates": "381400", "lr": "4.70886e-06", "gnorm": "0.996", "loss_scale": "1", "train_wall": "36", "gb_free": "13", "wall": "72056"} [2023-11-02 13:36:25,356][train_inner][INFO] - {"epoch": 95, "update": 94.089, "loss": "2.704", "ntokens": "3193.72", "nsentences": "43.8", "prob_perplexity": "214.467", "code_perplexity": "211.805", "temp": "0.5", "loss_0": "2.603", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56031", "wps": "17962.4", "ups": "5.62", "wpb": "3193.7", "bsz": "43.8", "num_updates": "381600", "lr": "4.65823e-06", "gnorm": "0.993", "loss_scale": "1", "train_wall": "35", "gb_free": "13.6", "wall": "72092"} [2023-11-02 13:37:01,608][train_inner][INFO] - {"epoch": 95, "update": 94.138, "loss": "2.646", "ntokens": "3158.36", "nsentences": "45.72", "prob_perplexity": "214.893", "code_perplexity": "212.28", "temp": "0.5", "loss_0": "2.545", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.573", "wps": "17425.3", "ups": "5.52", "wpb": "3158.4", "bsz": "45.7", "num_updates": "381800", "lr": "4.60759e-06", "gnorm": "0.984", "loss_scale": "1", "train_wall": "36", "gb_free": "13.3", "wall": "72128"} [2023-11-02 13:37:37,761][train_inner][INFO] - {"epoch": 95, "update": 94.187, "loss": "2.733", "ntokens": "3225.48", "nsentences": "44.16", "prob_perplexity": "215.054", "code_perplexity": "212.477", "temp": "0.5", "loss_0": "2.633", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55607", "wps": "17844.9", "ups": "5.53", "wpb": "3225.5", "bsz": "44.2", "num_updates": "382000", "lr": "4.55696e-06", "gnorm": "0.981", "loss_scale": "1", "train_wall": "36", "gb_free": "13.8", "wall": "72164"} [2023-11-02 13:38:13,829][train_inner][INFO] - {"epoch": 95, "update": 94.236, "loss": "2.727", "ntokens": "3227.56", "nsentences": "44", "prob_perplexity": "214.762", "code_perplexity": "212.128", "temp": "0.5", "loss_0": "2.626", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55662", "wps": "17898", "ups": "5.55", "wpb": "3227.6", "bsz": "44", "num_updates": "382200", "lr": "4.50633e-06", "gnorm": "0.987", "loss_scale": "1", "train_wall": "35", "gb_free": "13.8", "wall": "72200"} [2023-11-02 13:38:49,545][train_inner][INFO] - {"epoch": 95, "update": 94.286, "loss": "2.687", "ntokens": "3199.72", "nsentences": "44.52", "prob_perplexity": "215.401", "code_perplexity": "212.775", "temp": "0.5", "loss_0": "2.587", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56448", "wps": "17927.1", "ups": "5.6", "wpb": "3199.7", "bsz": "44.5", "num_updates": "382400", "lr": "4.4557e-06", "gnorm": "0.984", "loss_scale": "1", "train_wall": "35", "gb_free": "14.1", "wall": "72236"} [2023-11-02 13:39:25,862][train_inner][INFO] - {"epoch": 95, "update": 94.335, "loss": "2.63", "ntokens": "3174.88", "nsentences": "47.72", "prob_perplexity": "214.912", "code_perplexity": "212.248", "temp": "0.5", "loss_0": "2.53", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.57772", "wps": "17485.3", "ups": "5.51", "wpb": "3174.9", "bsz": "47.7", "num_updates": "382600", "lr": "4.40506e-06", "gnorm": "0.979", "loss_scale": "1", "train_wall": "36", "gb_free": "14", "wall": "72272"} [2023-11-02 13:40:01,936][train_inner][INFO] - {"epoch": 95, "update": 94.384, "loss": "2.718", "ntokens": "3200.12", "nsentences": "43.68", "prob_perplexity": "215.299", "code_perplexity": "212.697", "temp": "0.5", "loss_0": "2.618", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55806", "wps": "17743.4", "ups": "5.54", "wpb": "3200.1", "bsz": "43.7", "num_updates": "382800", "lr": "4.35443e-06", "gnorm": "0.981", "loss_scale": "1", "train_wall": "35", "gb_free": "15.2", "wall": "72308"} [2023-11-02 13:40:38,265][train_inner][INFO] - {"epoch": 95, "update": 94.434, "loss": "2.672", "ntokens": "3189.56", "nsentences": "46", "prob_perplexity": "214.206", "code_perplexity": "211.565", "temp": "0.5", "loss_0": "2.572", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56946", "wps": "17560", "ups": "5.51", "wpb": "3189.6", "bsz": "46", "num_updates": "383000", "lr": "4.3038e-06", "gnorm": "0.976", "loss_scale": "1", "train_wall": "36", "gb_free": "14.5", "wall": "72344"} [2023-11-02 13:41:14,953][train_inner][INFO] - {"epoch": 95, "update": 94.483, "loss": "2.699", "ntokens": "3188.56", "nsentences": "45.64", "prob_perplexity": "214.448", "code_perplexity": "211.813", "temp": "0.5", "loss_0": "2.598", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56413", "wps": "17383.4", "ups": "5.45", "wpb": "3188.6", "bsz": "45.6", "num_updates": "383200", "lr": "4.25316e-06", "gnorm": "0.983", "loss_scale": "1", "train_wall": "36", "gb_free": "13.5", "wall": "72381"} [2023-11-02 13:41:51,110][train_inner][INFO] - {"epoch": 95, "update": 94.532, "loss": "2.708", "ntokens": "3184.12", "nsentences": "44.6", "prob_perplexity": "214.294", "code_perplexity": "211.658", "temp": "0.5", "loss_0": "2.608", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56104", "wps": "17613.7", "ups": "5.53", "wpb": "3184.1", "bsz": "44.6", "num_updates": "383400", "lr": "4.20253e-06", "gnorm": "0.989", "loss_scale": "1", "train_wall": "36", "gb_free": "13.5", "wall": "72417"} [2023-11-02 13:42:27,746][train_inner][INFO] - {"epoch": 95, "update": 94.582, "loss": "2.701", "ntokens": "3164.96", "nsentences": "43.52", "prob_perplexity": "214.629", "code_perplexity": "211.938", "temp": "0.5", "loss_0": "2.6", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56173", "wps": "17278.6", "ups": "5.46", "wpb": "3165", "bsz": "43.5", "num_updates": "383600", "lr": "4.1519e-06", "gnorm": "0.986", "loss_scale": "1", "train_wall": "36", "gb_free": "13.7", "wall": "72454"} [2023-11-02 13:43:04,376][train_inner][INFO] - {"epoch": 95, "update": 94.631, "loss": "2.671", "ntokens": "3187.56", "nsentences": "43.96", "prob_perplexity": "214.448", "code_perplexity": "211.814", "temp": "0.5", "loss_0": "2.571", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56631", "wps": "17405.2", "ups": "5.46", "wpb": "3187.6", "bsz": "44", "num_updates": "383800", "lr": "4.10127e-06", "gnorm": "0.987", "loss_scale": "2", "train_wall": "36", "gb_free": "14.3", "wall": "72491"} [2023-11-02 13:43:41,057][train_inner][INFO] - {"epoch": 95, "update": 94.68, "loss": "2.737", "ntokens": "3205.52", "nsentences": "42.72", "prob_perplexity": "214.796", "code_perplexity": "212.179", "temp": "0.5", "loss_0": "2.637", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55406", "wps": "17478.9", "ups": "5.45", "wpb": "3205.5", "bsz": "42.7", "num_updates": "384000", "lr": "4.05063e-06", "gnorm": "0.999", "loss_scale": "2", "train_wall": "36", "gb_free": "13.8", "wall": "72527"} [2023-11-02 13:44:18,019][train_inner][INFO] - {"epoch": 95, "update": 94.73, "loss": "2.755", "ntokens": "3235.16", "nsentences": "44", "prob_perplexity": "214.878", "code_perplexity": "212.288", "temp": "0.5", "loss_0": "2.655", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55257", "wps": "17506.5", "ups": "5.41", "wpb": "3235.2", "bsz": "44", "num_updates": "384200", "lr": "4e-06", "gnorm": "0.982", "loss_scale": "2", "train_wall": "36", "gb_free": "13.7", "wall": "72564"} [2023-11-02 13:44:54,055][train_inner][INFO] - {"epoch": 95, "update": 94.779, "loss": "2.65", "ntokens": "3160.88", "nsentences": "47.36", "prob_perplexity": "215.03", "code_perplexity": "212.367", "temp": "0.5", "loss_0": "2.549", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.57362", "wps": "17543.7", "ups": "5.55", "wpb": "3160.9", "bsz": "47.4", "num_updates": "384400", "lr": "3.94937e-06", "gnorm": "0.984", "loss_scale": "2", "train_wall": "35", "gb_free": "12.9", "wall": "72600"} [2023-11-02 13:45:31,288][train_inner][INFO] - {"epoch": 95, "update": 94.828, "loss": "2.725", "ntokens": "3206.92", "nsentences": "43.84", "prob_perplexity": "213.799", "code_perplexity": "211.144", "temp": "0.5", "loss_0": "2.624", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55583", "wps": "17227.4", "ups": "5.37", "wpb": "3206.9", "bsz": "43.8", "num_updates": "384600", "lr": "3.89873e-06", "gnorm": "0.992", "loss_scale": "2", "train_wall": "37", "gb_free": "12.6", "wall": "72638"} [2023-11-02 13:46:07,749][train_inner][INFO] - {"epoch": 95, "update": 94.877, "loss": "2.716", "ntokens": "3186.92", "nsentences": "42.84", "prob_perplexity": "214.852", "code_perplexity": "212.203", "temp": "0.5", "loss_0": "2.615", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55786", "wps": "17482", "ups": "5.49", "wpb": "3186.9", "bsz": "42.8", "num_updates": "384800", "lr": "3.8481e-06", "gnorm": "0.98", "loss_scale": "2", "train_wall": "36", "gb_free": "13.7", "wall": "72674"} [2023-11-02 13:46:44,407][train_inner][INFO] - {"epoch": 95, "update": 94.927, "loss": "2.727", "ntokens": "3210.64", "nsentences": "42.28", "prob_perplexity": "214.992", "code_perplexity": "212.38", "temp": "0.5", "loss_0": "2.627", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55563", "wps": "17517.8", "ups": "5.46", "wpb": "3210.6", "bsz": "42.3", "num_updates": "385000", "lr": "3.79747e-06", "gnorm": "0.989", "loss_scale": "2", "train_wall": "36", "gb_free": "14.7", "wall": "72711"} [2023-11-02 13:47:20,910][train_inner][INFO] - {"epoch": 95, "update": 94.976, "loss": "2.724", "ntokens": "3200.16", "nsentences": "41.04", "prob_perplexity": "214.454", "code_perplexity": "211.84", "temp": "0.5", "loss_0": "2.623", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55464", "wps": "17534.8", "ups": "5.48", "wpb": "3200.2", "bsz": "41", "num_updates": "385200", "lr": "3.74684e-06", "gnorm": "0.995", "loss_scale": "2", "train_wall": "36", "gb_free": "13.4", "wall": "72747"} [2023-11-02 13:47:38,521][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 13:47:38,522][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 13:47:38,539][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 111 [2023-11-02 13:48:04,250][valid][INFO] - {"epoch": 95, "valid_loss": "2.585", "valid_ntokens": "3168.39", "valid_nsentences": "44.1685", "valid_prob_perplexity": "214.198", "valid_code_perplexity": "211.645", "valid_temp": "0.5", "valid_loss_0": "2.485", "valid_loss_1": "0.096", "valid_loss_2": "0.005", "valid_accuracy": "0.58515", "valid_wps": "56008.6", "valid_wpb": "3168.4", "valid_bsz": "44.2", "valid_num_updates": "385297", "valid_best_loss": "2.572"} [2023-11-02 13:48:04,252][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 95 @ 385297 updates [2023-11-02 13:48:04,254][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 13:48:05,708][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 13:48:05,755][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_last.pt (epoch 95 @ 385297 updates, score 2.585) (writing took 1.502478055190295 seconds) [2023-11-02 13:48:05,756][fairseq_cli.train][INFO] - end of epoch 95 (average epoch stats below) [2023-11-02 13:48:05,758][train][INFO] - {"epoch": 95, "train_loss": "2.704", "train_ntokens": "3194.1", "train_nsentences": "44.2682", "train_prob_perplexity": "214.716", "train_code_perplexity": "212.079", "train_temp": "0.5", "train_loss_0": "2.604", "train_loss_1": "0.096", "train_loss_2": "0.005", "train_accuracy": "0.56134", "train_wps": "16929.7", "train_ups": "5.3", "train_wpb": "3194.1", "train_bsz": "44.3", "train_num_updates": "385297", "train_lr": "3.72228e-06", "train_gnorm": "0.986", "train_loss_scale": "2", "train_train_wall": "725", "train_gb_free": "12.5", "train_wall": "72792"} [2023-11-02 13:48:05,761][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 13:48:05,778][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 96 [2023-11-02 13:48:05,945][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 13:48:06,018][fairseq.trainer][INFO] - begin training epoch 96 [2023-11-02 13:48:06,019][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 13:48:25,004][train_inner][INFO] - {"epoch": 96, "update": 95.025, "loss": "2.699", "ntokens": "3167", "nsentences": "46.04", "prob_perplexity": "214.947", "code_perplexity": "212.286", "temp": "0.5", "loss_0": "2.598", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56454", "wps": "9882.8", "ups": "3.12", "wpb": "3167", "bsz": "46", "num_updates": "385400", "lr": "3.6962e-06", "gnorm": "0.983", "loss_scale": "2", "train_wall": "36", "gb_free": "12.4", "wall": "72811"} [2023-11-02 13:49:01,018][train_inner][INFO] - {"epoch": 96, "update": 95.075, "loss": "2.668", "ntokens": "3183.84", "nsentences": "43.4", "prob_perplexity": "215.027", "code_perplexity": "212.307", "temp": "0.5", "loss_0": "2.568", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56619", "wps": "17682.1", "ups": "5.55", "wpb": "3183.8", "bsz": "43.4", "num_updates": "385600", "lr": "3.64557e-06", "gnorm": "0.982", "loss_scale": "2", "train_wall": "35", "gb_free": "13.3", "wall": "72847"} [2023-11-02 13:49:22,784][fairseq.trainer][INFO] - NOTE: gradient overflow detected, ignoring gradient, setting loss scale to: 1.0 [2023-11-02 13:49:37,462][train_inner][INFO] - {"epoch": 96, "update": 95.124, "loss": "2.669", "ntokens": "3198.68", "nsentences": "45.52", "prob_perplexity": "214.703", "code_perplexity": "212.11", "temp": "0.5", "loss_0": "2.568", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56794", "wps": "17555.2", "ups": "5.49", "wpb": "3198.7", "bsz": "45.5", "num_updates": "385800", "lr": "3.59494e-06", "gnorm": "0.981", "loss_scale": "1", "train_wall": "36", "gb_free": "13.7", "wall": "72884"} [2023-11-02 13:50:13,214][train_inner][INFO] - {"epoch": 96, "update": 95.174, "loss": "2.759", "ntokens": "3179.32", "nsentences": "42.52", "prob_perplexity": "214.469", "code_perplexity": "211.818", "temp": "0.5", "loss_0": "2.658", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55025", "wps": "17786.1", "ups": "5.59", "wpb": "3179.3", "bsz": "42.5", "num_updates": "386000", "lr": "3.5443e-06", "gnorm": "0.995", "loss_scale": "1", "train_wall": "35", "gb_free": "14.6", "wall": "72919"} [2023-11-02 13:50:49,447][train_inner][INFO] - {"epoch": 96, "update": 95.223, "loss": "2.656", "ntokens": "3222.76", "nsentences": "46.68", "prob_perplexity": "215.192", "code_perplexity": "212.536", "temp": "0.5", "loss_0": "2.555", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.57159", "wps": "17790.4", "ups": "5.52", "wpb": "3222.8", "bsz": "46.7", "num_updates": "386200", "lr": "3.49367e-06", "gnorm": "0.976", "loss_scale": "1", "train_wall": "36", "gb_free": "13", "wall": "72956"} [2023-11-02 13:51:25,374][train_inner][INFO] - {"epoch": 96, "update": 95.272, "loss": "2.702", "ntokens": "3148.44", "nsentences": "43.44", "prob_perplexity": "213.736", "code_perplexity": "211.117", "temp": "0.5", "loss_0": "2.601", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.5619", "wps": "17527.6", "ups": "5.57", "wpb": "3148.4", "bsz": "43.4", "num_updates": "386400", "lr": "3.44304e-06", "gnorm": "1.006", "loss_scale": "1", "train_wall": "35", "gb_free": "14.8", "wall": "72992"} [2023-11-02 13:52:00,763][train_inner][INFO] - {"epoch": 96, "update": 95.321, "loss": "2.699", "ntokens": "3152.28", "nsentences": "44.08", "prob_perplexity": "215.108", "code_perplexity": "212.485", "temp": "0.5", "loss_0": "2.598", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56305", "wps": "17816", "ups": "5.65", "wpb": "3152.3", "bsz": "44.1", "num_updates": "386600", "lr": "3.39241e-06", "gnorm": "0.995", "loss_scale": "1", "train_wall": "35", "gb_free": "14.3", "wall": "73027"} [2023-11-02 13:52:36,812][train_inner][INFO] - {"epoch": 96, "update": 95.371, "loss": "2.716", "ntokens": "3184.68", "nsentences": "44.4", "prob_perplexity": "215.377", "code_perplexity": "212.709", "temp": "0.5", "loss_0": "2.616", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56045", "wps": "17678.9", "ups": "5.55", "wpb": "3184.7", "bsz": "44.4", "num_updates": "386800", "lr": "3.34177e-06", "gnorm": "0.985", "loss_scale": "1", "train_wall": "35", "gb_free": "14.2", "wall": "73063"} [2023-11-02 13:53:13,429][train_inner][INFO] - {"epoch": 96, "update": 95.42, "loss": "2.751", "ntokens": "3255.76", "nsentences": "42.48", "prob_perplexity": "214.71", "code_perplexity": "212.16", "temp": "0.5", "loss_0": "2.651", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55048", "wps": "17783.8", "ups": "5.46", "wpb": "3255.8", "bsz": "42.5", "num_updates": "387000", "lr": "3.29114e-06", "gnorm": "0.981", "loss_scale": "1", "train_wall": "36", "gb_free": "14.9", "wall": "73100"} [2023-11-02 13:53:50,095][train_inner][INFO] - {"epoch": 96, "update": 95.469, "loss": "2.658", "ntokens": "3177.16", "nsentences": "44.68", "prob_perplexity": "214.666", "code_perplexity": "212.07", "temp": "0.5", "loss_0": "2.557", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56994", "wps": "17331.5", "ups": "5.46", "wpb": "3177.2", "bsz": "44.7", "num_updates": "387200", "lr": "3.24051e-06", "gnorm": "0.982", "loss_scale": "1", "train_wall": "36", "gb_free": "13.4", "wall": "73136"} [2023-11-02 13:54:26,157][train_inner][INFO] - {"epoch": 96, "update": 95.519, "loss": "2.785", "ntokens": "3204.08", "nsentences": "40.68", "prob_perplexity": "214.518", "code_perplexity": "211.887", "temp": "0.5", "loss_0": "2.684", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.54424", "wps": "17770.9", "ups": "5.55", "wpb": "3204.1", "bsz": "40.7", "num_updates": "387400", "lr": "3.18987e-06", "gnorm": "1.014", "loss_scale": "1", "train_wall": "35", "gb_free": "13.3", "wall": "73172"} [2023-11-02 13:55:02,145][train_inner][INFO] - {"epoch": 96, "update": 95.568, "loss": "2.703", "ntokens": "3154", "nsentences": "44.72", "prob_perplexity": "214.118", "code_perplexity": "211.489", "temp": "0.5", "loss_0": "2.602", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56354", "wps": "17529.4", "ups": "5.56", "wpb": "3154", "bsz": "44.7", "num_updates": "387600", "lr": "3.13924e-06", "gnorm": "1.001", "loss_scale": "1", "train_wall": "35", "gb_free": "14.4", "wall": "73208"} [2023-11-02 13:55:38,529][train_inner][INFO] - {"epoch": 96, "update": 95.617, "loss": "2.696", "ntokens": "3207.8", "nsentences": "45.4", "prob_perplexity": "215.265", "code_perplexity": "212.611", "temp": "0.5", "loss_0": "2.595", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56414", "wps": "17633.9", "ups": "5.5", "wpb": "3207.8", "bsz": "45.4", "num_updates": "387800", "lr": "3.08861e-06", "gnorm": "0.987", "loss_scale": "1", "train_wall": "36", "gb_free": "14.4", "wall": "73245"} [2023-11-02 13:56:15,276][train_inner][INFO] - {"epoch": 96, "update": 95.667, "loss": "2.694", "ntokens": "3175.6", "nsentences": "43.28", "prob_perplexity": "215.157", "code_perplexity": "212.491", "temp": "0.5", "loss_0": "2.594", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56277", "wps": "17284.6", "ups": "5.44", "wpb": "3175.6", "bsz": "43.3", "num_updates": "388000", "lr": "3.03797e-06", "gnorm": "0.996", "loss_scale": "1", "train_wall": "36", "gb_free": "14.4", "wall": "73282"} [2023-11-02 13:56:52,011][train_inner][INFO] - {"epoch": 96, "update": 95.716, "loss": "2.689", "ntokens": "3196.2", "nsentences": "45.36", "prob_perplexity": "214.81", "code_perplexity": "212.196", "temp": "0.5", "loss_0": "2.589", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.5647", "wps": "17402.7", "ups": "5.44", "wpb": "3196.2", "bsz": "45.4", "num_updates": "388200", "lr": "2.98734e-06", "gnorm": "0.991", "loss_scale": "1", "train_wall": "36", "gb_free": "13.4", "wall": "73318"} [2023-11-02 13:57:28,370][train_inner][INFO] - {"epoch": 96, "update": 95.765, "loss": "2.712", "ntokens": "3194.64", "nsentences": "43.24", "prob_perplexity": "214.903", "code_perplexity": "212.254", "temp": "0.5", "loss_0": "2.611", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56086", "wps": "17573.5", "ups": "5.5", "wpb": "3194.6", "bsz": "43.2", "num_updates": "388400", "lr": "2.93671e-06", "gnorm": "0.983", "loss_scale": "1", "train_wall": "36", "gb_free": "13.4", "wall": "73355"} [2023-11-02 13:58:04,730][train_inner][INFO] - {"epoch": 96, "update": 95.815, "loss": "2.702", "ntokens": "3198.32", "nsentences": "44.56", "prob_perplexity": "215.381", "code_perplexity": "212.757", "temp": "0.5", "loss_0": "2.601", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56184", "wps": "17593.5", "ups": "5.5", "wpb": "3198.3", "bsz": "44.6", "num_updates": "388600", "lr": "2.88608e-06", "gnorm": "0.989", "loss_scale": "1", "train_wall": "36", "gb_free": "13.5", "wall": "73391"} [2023-11-02 13:58:40,897][train_inner][INFO] - {"epoch": 96, "update": 95.864, "loss": "2.734", "ntokens": "3192.36", "nsentences": "42.2", "prob_perplexity": "214.14", "code_perplexity": "211.532", "temp": "0.5", "loss_0": "2.634", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.5544", "wps": "17654.9", "ups": "5.53", "wpb": "3192.4", "bsz": "42.2", "num_updates": "388800", "lr": "2.83544e-06", "gnorm": "1.01", "loss_scale": "1", "train_wall": "36", "gb_free": "13.6", "wall": "73427"} [2023-11-02 13:59:16,733][train_inner][INFO] - {"epoch": 96, "update": 95.913, "loss": "2.658", "ntokens": "3177.32", "nsentences": "46.32", "prob_perplexity": "215.608", "code_perplexity": "212.96", "temp": "0.5", "loss_0": "2.558", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.57167", "wps": "17733.5", "ups": "5.58", "wpb": "3177.3", "bsz": "46.3", "num_updates": "389000", "lr": "2.78481e-06", "gnorm": "0.971", "loss_scale": "1", "train_wall": "35", "gb_free": "13.8", "wall": "73463"} [2023-11-02 13:59:53,134][train_inner][INFO] - {"epoch": 96, "update": 95.963, "loss": "2.699", "ntokens": "3207.56", "nsentences": "45.96", "prob_perplexity": "215.179", "code_perplexity": "212.571", "temp": "0.5", "loss_0": "2.599", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56414", "wps": "17624.5", "ups": "5.49", "wpb": "3207.6", "bsz": "46", "num_updates": "389200", "lr": "2.73418e-06", "gnorm": "0.977", "loss_scale": "1", "train_wall": "36", "gb_free": "15.8", "wall": "73499"} [2023-11-02 14:00:21,019][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 14:00:21,021][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 14:00:21,040][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 112 [2023-11-02 14:00:46,595][valid][INFO] - {"epoch": 96, "valid_loss": "2.591", "valid_ntokens": "3161.95", "valid_nsentences": "44.1685", "valid_prob_perplexity": "214.672", "valid_code_perplexity": "212.167", "valid_temp": "0.5", "valid_loss_0": "2.491", "valid_loss_1": "0.096", "valid_loss_2": "0.005", "valid_accuracy": "0.58414", "valid_wps": "56214", "valid_wpb": "3162", "valid_bsz": "44.2", "valid_num_updates": "389352", "valid_best_loss": "2.572"} [2023-11-02 14:00:46,597][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 96 @ 389352 updates [2023-11-02 14:00:46,599][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 14:00:47,998][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 14:00:48,053][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_last.pt (epoch 96 @ 389352 updates, score 2.591) (writing took 1.455307254102081 seconds) [2023-11-02 14:00:48,053][fairseq_cli.train][INFO] - end of epoch 96 (average epoch stats below) [2023-11-02 14:00:48,056][train][INFO] - {"epoch": 96, "train_loss": "2.701", "train_ntokens": "3190.22", "train_nsentences": "44.2732", "train_prob_perplexity": "214.874", "train_code_perplexity": "212.242", "train_temp": "0.5", "train_loss_0": "2.6", "train_loss_1": "0.096", "train_loss_2": "0.005", "train_accuracy": "0.56219", "train_wps": "16970.3", "train_ups": "5.32", "train_wpb": "3190.2", "train_bsz": "44.3", "train_num_updates": "389352", "train_lr": "2.6957e-06", "train_gnorm": "0.989", "train_loss_scale": "1", "train_train_wall": "722", "train_gb_free": "12.6", "train_wall": "73554"} [2023-11-02 14:00:48,058][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 14:00:48,078][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 97 [2023-11-02 14:00:48,245][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 14:00:48,314][fairseq.trainer][INFO] - begin training epoch 97 [2023-11-02 14:00:48,315][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 14:00:57,069][train_inner][INFO] - {"epoch": 97, "update": 96.012, "loss": "2.648", "ntokens": "3192.36", "nsentences": "46.28", "prob_perplexity": "215.957", "code_perplexity": "213.376", "temp": "0.5", "loss_0": "2.548", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.5727", "wps": "9986.6", "ups": "3.13", "wpb": "3192.4", "bsz": "46.3", "num_updates": "389400", "lr": "2.68354e-06", "gnorm": "0.982", "loss_scale": "1", "train_wall": "36", "gb_free": "13.1", "wall": "73563"} [2023-11-02 14:01:32,947][train_inner][INFO] - {"epoch": 97, "update": 96.061, "loss": "2.699", "ntokens": "3185.28", "nsentences": "44.92", "prob_perplexity": "214.616", "code_perplexity": "211.995", "temp": "0.5", "loss_0": "2.598", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56281", "wps": "17757.3", "ups": "5.57", "wpb": "3185.3", "bsz": "44.9", "num_updates": "389600", "lr": "2.63291e-06", "gnorm": "0.992", "loss_scale": "1", "train_wall": "35", "gb_free": "14.6", "wall": "73599"} [2023-11-02 14:02:09,210][train_inner][INFO] - {"epoch": 97, "update": 96.11, "loss": "2.754", "ntokens": "3191.76", "nsentences": "42.88", "prob_perplexity": "214.166", "code_perplexity": "211.556", "temp": "0.5", "loss_0": "2.653", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55137", "wps": "17604.2", "ups": "5.52", "wpb": "3191.8", "bsz": "42.9", "num_updates": "389800", "lr": "2.58228e-06", "gnorm": "1.009", "loss_scale": "1", "train_wall": "36", "gb_free": "14.8", "wall": "73635"} [2023-11-02 14:02:45,061][train_inner][INFO] - {"epoch": 97, "update": 96.16, "loss": "2.655", "ntokens": "3164.76", "nsentences": "45.24", "prob_perplexity": "215.166", "code_perplexity": "212.592", "temp": "0.5", "loss_0": "2.555", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56993", "wps": "17656", "ups": "5.58", "wpb": "3164.8", "bsz": "45.2", "num_updates": "390000", "lr": "2.53165e-06", "gnorm": "0.986", "loss_scale": "1", "train_wall": "35", "gb_free": "13.2", "wall": "73671"} [2023-11-02 14:03:21,220][train_inner][INFO] - {"epoch": 97, "update": 96.209, "loss": "2.655", "ntokens": "3212.08", "nsentences": "46.64", "prob_perplexity": "215.344", "code_perplexity": "212.723", "temp": "0.5", "loss_0": "2.554", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.57103", "wps": "17767.6", "ups": "5.53", "wpb": "3212.1", "bsz": "46.6", "num_updates": "390200", "lr": "2.48101e-06", "gnorm": "0.978", "loss_scale": "1", "train_wall": "36", "gb_free": "12.7", "wall": "73707"} [2023-11-02 14:03:57,177][train_inner][INFO] - {"epoch": 97, "update": 96.258, "loss": "2.732", "ntokens": "3229.52", "nsentences": "45.32", "prob_perplexity": "215.915", "code_perplexity": "213.326", "temp": "0.5", "loss_0": "2.632", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55766", "wps": "17964.2", "ups": "5.56", "wpb": "3229.5", "bsz": "45.3", "num_updates": "390400", "lr": "2.43038e-06", "gnorm": "0.981", "loss_scale": "1", "train_wall": "35", "gb_free": "14.4", "wall": "73743"} [2023-11-02 14:04:33,635][train_inner][INFO] - {"epoch": 97, "update": 96.308, "loss": "2.705", "ntokens": "3207.08", "nsentences": "42.88", "prob_perplexity": "215.488", "code_perplexity": "212.866", "temp": "0.5", "loss_0": "2.605", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56014", "wps": "17594.2", "ups": "5.49", "wpb": "3207.1", "bsz": "42.9", "num_updates": "390600", "lr": "2.37975e-06", "gnorm": "0.98", "loss_scale": "1", "train_wall": "36", "gb_free": "12.1", "wall": "73780"} [2023-11-02 14:05:09,731][train_inner][INFO] - {"epoch": 97, "update": 96.357, "loss": "2.681", "ntokens": "3172.4", "nsentences": "44.76", "prob_perplexity": "214.658", "code_perplexity": "212.092", "temp": "0.5", "loss_0": "2.58", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56607", "wps": "17578.7", "ups": "5.54", "wpb": "3172.4", "bsz": "44.8", "num_updates": "390800", "lr": "2.32911e-06", "gnorm": "0.98", "loss_scale": "1", "train_wall": "35", "gb_free": "13.8", "wall": "73816"} [2023-11-02 14:05:46,275][train_inner][INFO] - {"epoch": 97, "update": 96.406, "loss": "2.707", "ntokens": "3177.48", "nsentences": "43.8", "prob_perplexity": "214.299", "code_perplexity": "211.689", "temp": "0.5", "loss_0": "2.607", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56071", "wps": "17398.8", "ups": "5.48", "wpb": "3177.5", "bsz": "43.8", "num_updates": "391000", "lr": "2.27848e-06", "gnorm": "0.996", "loss_scale": "1", "train_wall": "36", "gb_free": "13.8", "wall": "73852"} [2023-11-02 14:06:22,652][train_inner][INFO] - {"epoch": 97, "update": 96.456, "loss": "2.738", "ntokens": "3195", "nsentences": "41.8", "prob_perplexity": "215.479", "code_perplexity": "212.829", "temp": "0.5", "loss_0": "2.638", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.554", "wps": "17567.2", "ups": "5.5", "wpb": "3195", "bsz": "41.8", "num_updates": "391200", "lr": "2.22785e-06", "gnorm": "0.988", "loss_scale": "1", "train_wall": "36", "gb_free": "15.1", "wall": "73889"} [2023-11-02 14:06:59,376][train_inner][INFO] - {"epoch": 97, "update": 96.505, "loss": "2.697", "ntokens": "3178.16", "nsentences": "42.76", "prob_perplexity": "214.369", "code_perplexity": "211.709", "temp": "0.5", "loss_0": "2.597", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56138", "wps": "17309.4", "ups": "5.45", "wpb": "3178.2", "bsz": "42.8", "num_updates": "391400", "lr": "2.17722e-06", "gnorm": "0.995", "loss_scale": "1", "train_wall": "36", "gb_free": "12.5", "wall": "73926"} [2023-11-02 14:07:35,812][train_inner][INFO] - {"epoch": 97, "update": 96.554, "loss": "2.712", "ntokens": "3203.44", "nsentences": "43.76", "prob_perplexity": "215.119", "code_perplexity": "212.466", "temp": "0.5", "loss_0": "2.612", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56035", "wps": "17584.5", "ups": "5.49", "wpb": "3203.4", "bsz": "43.8", "num_updates": "391600", "lr": "2.12658e-06", "gnorm": "0.988", "loss_scale": "1", "train_wall": "36", "gb_free": "13.6", "wall": "73962"} [2023-11-02 14:08:11,671][train_inner][INFO] - {"epoch": 97, "update": 96.604, "loss": "2.725", "ntokens": "3172.68", "nsentences": "42.12", "prob_perplexity": "214.096", "code_perplexity": "211.467", "temp": "0.5", "loss_0": "2.625", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55534", "wps": "17696.5", "ups": "5.58", "wpb": "3172.7", "bsz": "42.1", "num_updates": "391800", "lr": "2.07595e-06", "gnorm": "0.998", "loss_scale": "1", "train_wall": "35", "gb_free": "13.9", "wall": "73998"} [2023-11-02 14:08:47,623][train_inner][INFO] - {"epoch": 97, "update": 96.653, "loss": "2.688", "ntokens": "3225.24", "nsentences": "45", "prob_perplexity": "215.28", "code_perplexity": "212.651", "temp": "0.5", "loss_0": "2.588", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56382", "wps": "17942.8", "ups": "5.56", "wpb": "3225.2", "bsz": "45", "num_updates": "392000", "lr": "2.02532e-06", "gnorm": "0.984", "loss_scale": "1", "train_wall": "35", "gb_free": "13.3", "wall": "74034"} [2023-11-02 14:09:24,070][train_inner][INFO] - {"epoch": 97, "update": 96.702, "loss": "2.636", "ntokens": "3187.32", "nsentences": "46.92", "prob_perplexity": "216.048", "code_perplexity": "213.427", "temp": "0.5", "loss_0": "2.536", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.57539", "wps": "17491.5", "ups": "5.49", "wpb": "3187.3", "bsz": "46.9", "num_updates": "392200", "lr": "1.97468e-06", "gnorm": "0.974", "loss_scale": "1", "train_wall": "36", "gb_free": "13.8", "wall": "74070"} [2023-11-02 14:10:00,113][train_inner][INFO] - {"epoch": 97, "update": 96.751, "loss": "2.69", "ntokens": "3170.64", "nsentences": "42.88", "prob_perplexity": "215.125", "code_perplexity": "212.442", "temp": "0.5", "loss_0": "2.59", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56275", "wps": "17594.5", "ups": "5.55", "wpb": "3170.6", "bsz": "42.9", "num_updates": "392400", "lr": "1.92405e-06", "gnorm": "0.994", "loss_scale": "1", "train_wall": "35", "gb_free": "14.6", "wall": "74106"} [2023-11-02 14:10:36,244][train_inner][INFO] - {"epoch": 97, "update": 96.801, "loss": "2.659", "ntokens": "3192.8", "nsentences": "44.72", "prob_perplexity": "215.182", "code_perplexity": "212.557", "temp": "0.5", "loss_0": "2.558", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56854", "wps": "17674.7", "ups": "5.54", "wpb": "3192.8", "bsz": "44.7", "num_updates": "392600", "lr": "1.87342e-06", "gnorm": "0.992", "loss_scale": "1", "train_wall": "35", "gb_free": "13.7", "wall": "74142"} [2023-11-02 14:11:12,457][train_inner][INFO] - {"epoch": 97, "update": 96.85, "loss": "2.712", "ntokens": "3207.36", "nsentences": "44.24", "prob_perplexity": "215.819", "code_perplexity": "213.266", "temp": "0.5", "loss_0": "2.612", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55959", "wps": "17714.6", "ups": "5.52", "wpb": "3207.4", "bsz": "44.2", "num_updates": "392800", "lr": "1.82278e-06", "gnorm": "0.986", "loss_scale": "1", "train_wall": "36", "gb_free": "12.6", "wall": "74179"} [2023-11-02 14:11:48,852][train_inner][INFO] - {"epoch": 97, "update": 96.899, "loss": "2.676", "ntokens": "3205.32", "nsentences": "46.52", "prob_perplexity": "215.26", "code_perplexity": "212.595", "temp": "0.5", "loss_0": "2.576", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56812", "wps": "17615.2", "ups": "5.5", "wpb": "3205.3", "bsz": "46.5", "num_updates": "393000", "lr": "1.77215e-06", "gnorm": "0.98", "loss_scale": "1", "train_wall": "36", "gb_free": "15.1", "wall": "74215"} [2023-11-02 14:12:24,963][train_inner][INFO] - {"epoch": 97, "update": 96.949, "loss": "2.719", "ntokens": "3164.24", "nsentences": "42.8", "prob_perplexity": "214.727", "code_perplexity": "212.116", "temp": "0.5", "loss_0": "2.619", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55826", "wps": "17526.2", "ups": "5.54", "wpb": "3164.2", "bsz": "42.8", "num_updates": "393200", "lr": "1.72152e-06", "gnorm": "0.994", "loss_scale": "1", "train_wall": "35", "gb_free": "13.5", "wall": "74251"} [2023-11-02 14:13:01,535][train_inner][INFO] - {"epoch": 97, "update": 96.998, "loss": "2.703", "ntokens": "3214.76", "nsentences": "45.12", "prob_perplexity": "215.151", "code_perplexity": "212.542", "temp": "0.5", "loss_0": "2.603", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56296", "wps": "17581.3", "ups": "5.47", "wpb": "3214.8", "bsz": "45.1", "num_updates": "393400", "lr": "1.67089e-06", "gnorm": "0.982", "loss_scale": "1", "train_wall": "36", "gb_free": "14.9", "wall": "74288"} [2023-11-02 14:13:02,887][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 14:13:02,888][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 14:13:02,907][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 113 [2023-11-02 14:13:28,566][valid][INFO] - {"epoch": 97, "valid_loss": "2.584", "valid_ntokens": "3165.14", "valid_nsentences": "44.1685", "valid_prob_perplexity": "214.821", "valid_code_perplexity": "212.303", "valid_temp": "0.5", "valid_loss_0": "2.484", "valid_loss_1": "0.096", "valid_loss_2": "0.005", "valid_accuracy": "0.5855", "valid_wps": "56019.9", "valid_wpb": "3165.1", "valid_bsz": "44.2", "valid_num_updates": "393408", "valid_best_loss": "2.572"} [2023-11-02 14:13:28,568][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 97 @ 393408 updates [2023-11-02 14:13:28,569][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 14:13:29,984][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 14:13:30,036][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_last.pt (epoch 97 @ 393408 updates, score 2.584) (writing took 1.4683921672403812 seconds) [2023-11-02 14:13:30,037][fairseq_cli.train][INFO] - end of epoch 97 (average epoch stats below) [2023-11-02 14:13:30,039][train][INFO] - {"epoch": 97, "train_loss": "2.697", "train_ntokens": "3192.92", "train_nsentences": "44.2682", "train_prob_perplexity": "215.08", "train_code_perplexity": "212.463", "train_temp": "0.5", "train_loss_0": "2.597", "train_loss_1": "0.096", "train_loss_2": "0.005", "train_accuracy": "0.5625", "train_wps": "16995.8", "train_ups": "5.32", "train_wpb": "3192.9", "train_bsz": "44.3", "train_num_updates": "393408", "train_lr": "1.66886e-06", "train_gnorm": "0.988", "train_loss_scale": "1", "train_train_wall": "722", "train_gb_free": "14.3", "train_wall": "74316"} [2023-11-02 14:13:30,042][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 14:13:30,062][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 98 [2023-11-02 14:13:30,230][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 14:13:30,301][fairseq.trainer][INFO] - begin training epoch 98 [2023-11-02 14:13:30,302][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 14:14:04,724][train_inner][INFO] - {"epoch": 98, "update": 97.047, "loss": "2.633", "ntokens": "3177.8", "nsentences": "45.56", "prob_perplexity": "215.502", "code_perplexity": "212.909", "temp": "0.5", "loss_0": "2.533", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.57528", "wps": "10058.5", "ups": "3.17", "wpb": "3177.8", "bsz": "45.6", "num_updates": "393600", "lr": "1.62025e-06", "gnorm": "0.979", "loss_scale": "1", "train_wall": "35", "gb_free": "14.2", "wall": "74351"} [2023-11-02 14:14:40,912][train_inner][INFO] - {"epoch": 98, "update": 97.097, "loss": "2.707", "ntokens": "3169.96", "nsentences": "43.44", "prob_perplexity": "214.558", "code_perplexity": "211.878", "temp": "0.5", "loss_0": "2.606", "loss_1": "0.096", "loss_2": "0.004", "accuracy": "0.56108", "wps": "17520.3", "ups": "5.53", "wpb": "3170", "bsz": "43.4", "num_updates": "393800", "lr": "1.56962e-06", "gnorm": "1.003", "loss_scale": "1", "train_wall": "36", "gb_free": "14.7", "wall": "74387"} [2023-11-02 14:15:16,813][train_inner][INFO] - {"epoch": 98, "update": 97.146, "loss": "2.666", "ntokens": "3161.24", "nsentences": "46.16", "prob_perplexity": "215.194", "code_perplexity": "212.621", "temp": "0.5", "loss_0": "2.566", "loss_1": "0.096", "loss_2": "0.004", "accuracy": "0.57053", "wps": "17612", "ups": "5.57", "wpb": "3161.2", "bsz": "46.2", "num_updates": "394000", "lr": "1.51899e-06", "gnorm": "0.988", "loss_scale": "1", "train_wall": "35", "gb_free": "13", "wall": "74423"} [2023-11-02 14:15:52,835][train_inner][INFO] - {"epoch": 98, "update": 97.195, "loss": "2.714", "ntokens": "3211.88", "nsentences": "44.12", "prob_perplexity": "214.781", "code_perplexity": "212.106", "temp": "0.5", "loss_0": "2.613", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56004", "wps": "17833.8", "ups": "5.55", "wpb": "3211.9", "bsz": "44.1", "num_updates": "394200", "lr": "1.46835e-06", "gnorm": "0.984", "loss_scale": "1", "train_wall": "35", "gb_free": "12.7", "wall": "74459"} [2023-11-02 14:16:28,632][train_inner][INFO] - {"epoch": 98, "update": 97.245, "loss": "2.735", "ntokens": "3188.28", "nsentences": "42.4", "prob_perplexity": "215.339", "code_perplexity": "212.678", "temp": "0.5", "loss_0": "2.635", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55542", "wps": "17814.1", "ups": "5.59", "wpb": "3188.3", "bsz": "42.4", "num_updates": "394400", "lr": "1.41772e-06", "gnorm": "0.997", "loss_scale": "1", "train_wall": "35", "gb_free": "13.9", "wall": "74495"} [2023-11-02 14:17:04,738][train_inner][INFO] - {"epoch": 98, "update": 97.294, "loss": "2.738", "ntokens": "3228.32", "nsentences": "43.28", "prob_perplexity": "215.059", "code_perplexity": "212.432", "temp": "0.5", "loss_0": "2.637", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55377", "wps": "17883.4", "ups": "5.54", "wpb": "3228.3", "bsz": "43.3", "num_updates": "394600", "lr": "1.36709e-06", "gnorm": "0.993", "loss_scale": "1", "train_wall": "36", "gb_free": "13.3", "wall": "74531"} [2023-11-02 14:17:41,111][train_inner][INFO] - {"epoch": 98, "update": 97.343, "loss": "2.728", "ntokens": "3205.6", "nsentences": "43.08", "prob_perplexity": "214.809", "code_perplexity": "212.206", "temp": "0.5", "loss_0": "2.627", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55674", "wps": "17627.4", "ups": "5.5", "wpb": "3205.6", "bsz": "43.1", "num_updates": "394800", "lr": "1.31646e-06", "gnorm": "0.989", "loss_scale": "1", "train_wall": "36", "gb_free": "14.3", "wall": "74567"} [2023-11-02 14:18:16,997][train_inner][INFO] - {"epoch": 98, "update": 97.393, "loss": "2.727", "ntokens": "3175.96", "nsentences": "42.4", "prob_perplexity": "215.111", "code_perplexity": "212.506", "temp": "0.5", "loss_0": "2.626", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55572", "wps": "17701.1", "ups": "5.57", "wpb": "3176", "bsz": "42.4", "num_updates": "395000", "lr": "1.26582e-06", "gnorm": "0.985", "loss_scale": "1", "train_wall": "35", "gb_free": "13.6", "wall": "74603"} [2023-11-02 14:18:52,797][train_inner][INFO] - {"epoch": 98, "update": 97.442, "loss": "2.663", "ntokens": "3211.84", "nsentences": "45.12", "prob_perplexity": "214.87", "code_perplexity": "212.238", "temp": "0.5", "loss_0": "2.563", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56867", "wps": "17944.3", "ups": "5.59", "wpb": "3211.8", "bsz": "45.1", "num_updates": "395200", "lr": "1.21519e-06", "gnorm": "0.99", "loss_scale": "1", "train_wall": "35", "gb_free": "13.4", "wall": "74639"} [2023-11-02 14:19:28,971][train_inner][INFO] - {"epoch": 98, "update": 97.491, "loss": "2.665", "ntokens": "3185.4", "nsentences": "45.08", "prob_perplexity": "216.281", "code_perplexity": "213.67", "temp": "0.5", "loss_0": "2.565", "loss_1": "0.095", "loss_2": "0.005", "accuracy": "0.56889", "wps": "17620.8", "ups": "5.53", "wpb": "3185.4", "bsz": "45.1", "num_updates": "395400", "lr": "1.16456e-06", "gnorm": "0.981", "loss_scale": "1", "train_wall": "36", "gb_free": "13.6", "wall": "74675"} [2023-11-02 14:20:05,260][train_inner][INFO] - {"epoch": 98, "update": 97.54, "loss": "2.66", "ntokens": "3183.8", "nsentences": "44.64", "prob_perplexity": "214.761", "code_perplexity": "212.139", "temp": "0.5", "loss_0": "2.56", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56947", "wps": "17547.8", "ups": "5.51", "wpb": "3183.8", "bsz": "44.6", "num_updates": "395600", "lr": "1.11392e-06", "gnorm": "0.998", "loss_scale": "1", "train_wall": "36", "gb_free": "14.2", "wall": "74711"} [2023-11-02 14:20:41,189][train_inner][INFO] - {"epoch": 98, "update": 97.59, "loss": "2.74", "ntokens": "3195.68", "nsentences": "42.28", "prob_perplexity": "214.881", "code_perplexity": "212.268", "temp": "0.5", "loss_0": "2.639", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55275", "wps": "17789.6", "ups": "5.57", "wpb": "3195.7", "bsz": "42.3", "num_updates": "395800", "lr": "1.06329e-06", "gnorm": "0.986", "loss_scale": "1", "train_wall": "35", "gb_free": "16.5", "wall": "74747"} [2023-11-02 14:21:17,188][train_inner][INFO] - {"epoch": 98, "update": 97.639, "loss": "2.661", "ntokens": "3215.36", "nsentences": "45.6", "prob_perplexity": "215.47", "code_perplexity": "212.865", "temp": "0.5", "loss_0": "2.561", "loss_1": "0.096", "loss_2": "0.004", "accuracy": "0.56942", "wps": "17864.7", "ups": "5.56", "wpb": "3215.4", "bsz": "45.6", "num_updates": "396000", "lr": "1.01266e-06", "gnorm": "0.984", "loss_scale": "1", "train_wall": "35", "gb_free": "13.1", "wall": "74783"} [2023-11-02 14:21:53,701][train_inner][INFO] - {"epoch": 98, "update": 97.688, "loss": "2.731", "ntokens": "3175.88", "nsentences": "42.76", "prob_perplexity": "215.58", "code_perplexity": "212.984", "temp": "0.5", "loss_0": "2.631", "loss_1": "0.096", "loss_2": "0.004", "accuracy": "0.55583", "wps": "17397.2", "ups": "5.48", "wpb": "3175.9", "bsz": "42.8", "num_updates": "396200", "lr": "9.62025e-07", "gnorm": "0.984", "loss_scale": "1", "train_wall": "36", "gb_free": "13.4", "wall": "74820"} [2023-11-02 14:22:30,265][train_inner][INFO] - {"epoch": 98, "update": 97.738, "loss": "2.698", "ntokens": "3207.92", "nsentences": "43.16", "prob_perplexity": "215.062", "code_perplexity": "212.442", "temp": "0.5", "loss_0": "2.597", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56092", "wps": "17548", "ups": "5.47", "wpb": "3207.9", "bsz": "43.2", "num_updates": "396400", "lr": "9.11392e-07", "gnorm": "0.994", "loss_scale": "1", "train_wall": "36", "gb_free": "15.7", "wall": "74856"} [2023-11-02 14:23:06,421][train_inner][INFO] - {"epoch": 98, "update": 97.787, "loss": "2.679", "ntokens": "3181.08", "nsentences": "46.36", "prob_perplexity": "215.429", "code_perplexity": "212.773", "temp": "0.5", "loss_0": "2.579", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.56794", "wps": "17597.1", "ups": "5.53", "wpb": "3181.1", "bsz": "46.4", "num_updates": "396600", "lr": "8.60759e-07", "gnorm": "0.989", "loss_scale": "1", "train_wall": "36", "gb_free": "13.2", "wall": "74893"} [2023-11-02 14:23:42,827][train_inner][INFO] - {"epoch": 98, "update": 97.836, "loss": "2.72", "ntokens": "3198.2", "nsentences": "43.16", "prob_perplexity": "214.768", "code_perplexity": "212.105", "temp": "0.5", "loss_0": "2.62", "loss_1": "0.096", "loss_2": "0.005", "accuracy": "0.55752", "wps": "17571", "ups": "5.49", "wpb": "3198.2", "bsz": "43.2", "num_updates": "396800", "lr": "8.10127e-07", "gnorm": "0.988", "loss_scale": "1", "train_wall": "36", "gb_free": "13.2", "wall": "74929"} [2023-11-02 14:24:19,339][train_inner][INFO] - {"epoch": 98, "update": 97.886, "loss": "2.652", "ntokens": "3165.2", "nsentences": "46.4", "prob_perplexity": "216.103", "code_perplexity": "213.44", "temp": "0.5", "loss_0": "2.552", "loss_1": "0.096", "loss_2": "0.004", "accuracy": "0.57279", "wps": "17338.9", "ups": "5.48", "wpb": "3165.2", "bsz": "46.4", "num_updates": "397000", "lr": "7.59494e-07", "gnorm": "0.979", "loss_scale": "1", "train_wall": "36", "gb_free": "13.7", "wall": "74966"} [2023-11-02 14:24:55,796][train_inner][INFO] - {"epoch": 98, "update": 97.935, "loss": "2.67", "ntokens": "3175.04", "nsentences": "45.24", "prob_perplexity": "215.183", "code_perplexity": "212.541", "temp": "0.5", "loss_0": "2.57", "loss_1": "0.096", "loss_2": "0.004", "accuracy": "0.56919", "wps": "17419", "ups": "5.49", "wpb": "3175", "bsz": "45.2", "num_updates": "397200", "lr": "7.08861e-07", "gnorm": "0.988", "loss_scale": "1", "train_wall": "36", "gb_free": "14.6", "wall": "75002"} [2023-11-02 14:25:32,174][train_inner][INFO] - {"epoch": 98, "update": 97.984, "loss": "2.659", "ntokens": "3188.92", "nsentences": "46.4", "prob_perplexity": "215.272", "code_perplexity": "212.65", "temp": "0.5", "loss_0": "2.559", "loss_1": "0.096", "loss_2": "0.004", "accuracy": "0.57191", "wps": "17533", "ups": "5.5", "wpb": "3188.9", "bsz": "46.4", "num_updates": "397400", "lr": "6.58228e-07", "gnorm": "0.981", "loss_scale": "1", "train_wall": "36", "gb_free": "14.6", "wall": "75038"} [2023-11-02 14:25:43,847][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 14:25:43,848][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 14:25:43,865][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 114 [2023-11-02 14:26:09,673][valid][INFO] - {"epoch": 98, "valid_loss": "2.595", "valid_ntokens": "3175.04", "valid_nsentences": "44.1685", "valid_prob_perplexity": "214.663", "valid_code_perplexity": "212.178", "valid_temp": "0.5", "valid_loss_0": "2.495", "valid_loss_1": "0.096", "valid_loss_2": "0.004", "valid_accuracy": "0.58359", "valid_wps": "55875.2", "valid_wpb": "3175", "valid_bsz": "44.2", "valid_num_updates": "397464", "valid_best_loss": "2.572"} [2023-11-02 14:26:09,675][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 98 @ 397464 updates [2023-11-02 14:26:09,677][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 14:26:11,092][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_last.pt [2023-11-02 14:26:11,131][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_last.pt (epoch 98 @ 397464 updates, score 2.595) (writing took 1.455411871895194 seconds) [2023-11-02 14:26:11,131][fairseq_cli.train][INFO] - end of epoch 98 (average epoch stats below) [2023-11-02 14:26:11,133][train][INFO] - {"epoch": 98, "train_loss": "2.693", "train_ntokens": "3191.22", "train_nsentences": "44.2682", "train_prob_perplexity": "215.185", "train_code_perplexity": "212.557", "train_temp": "0.5", "train_loss_0": "2.593", "train_loss_1": "0.096", "train_loss_2": "0.005", "train_accuracy": "0.56349", "train_wps": "17006.6", "train_ups": "5.33", "train_wpb": "3191.2", "train_bsz": "44.3", "train_num_updates": "397464", "train_lr": "6.42025e-07", "train_gnorm": "0.988", "train_loss_scale": "1", "train_train_wall": "721", "train_gb_free": "15.1", "train_wall": "75077"} [2023-11-02 14:26:11,136][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 14:26:11,155][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 99 [2023-11-02 14:26:11,322][fairseq.data.iterators][INFO] - grouped total_num_itrs = 4056 [2023-11-02 14:26:11,395][fairseq.trainer][INFO] - begin training epoch 99 [2023-11-02 14:26:11,396][fairseq_cli.train][INFO] - Start iterating over samples [2023-11-02 14:26:36,047][train_inner][INFO] - {"epoch": 99, "update": 98.034, "loss": "2.69", "ntokens": "3223.2", "nsentences": "44.76", "prob_perplexity": "215.525", "code_perplexity": "212.895", "temp": "0.5", "loss_0": "2.59", "loss_1": "0.096", "loss_2": "0.004", "accuracy": "0.56362", "wps": "10092.8", "ups": "3.13", "wpb": "3223.2", "bsz": "44.8", "num_updates": "397600", "lr": "6.07595e-07", "gnorm": "0.98", "loss_scale": "1", "train_wall": "36", "gb_free": "12.2", "wall": "75102"} [2023-11-02 14:27:11,976][train_inner][INFO] - {"epoch": 99, "update": 98.083, "loss": "2.703", "ntokens": "3163.72", "nsentences": "43.92", "prob_perplexity": "214.627", "code_perplexity": "211.998", "temp": "0.5", "loss_0": "2.602", "loss_1": "0.096", "loss_2": "0.004", "accuracy": "0.56217", "wps": "17611.9", "ups": "5.57", "wpb": "3163.7", "bsz": "43.9", "num_updates": "397800", "lr": "5.56962e-07", "gnorm": "0.999", "loss_scale": "1", "train_wall": "35", "gb_free": "13.8", "wall": "75138"} [2023-11-02 14:27:48,179][train_inner][INFO] - {"epoch": 99, "update": 98.132, "loss": "2.752", "ntokens": "3210.12", "nsentences": "42.36", "prob_perplexity": "215.419", "code_perplexity": "212.825", "temp": "0.5", "loss_0": "2.652", "loss_1": "0.096", "loss_2": "0.004", "accuracy": "0.5521", "wps": "17735.4", "ups": "5.52", "wpb": "3210.1", "bsz": "42.4", "num_updates": "398000", "lr": "5.06329e-07", "gnorm": "0.989", "loss_scale": "1", "train_wall": "36", "gb_free": "14", "wall": "75174"} [2023-11-02 14:28:24,052][train_inner][INFO] - {"epoch": 99, "update": 98.181, "loss": "2.688", "ntokens": "3165.92", "nsentences": "43.96", "prob_perplexity": "214.609", "code_perplexity": "212.007", "temp": "0.5", "loss_0": "2.587", "loss_1": "0.096", "loss_2": "0.004", "accuracy": "0.56438", "wps": "17651.6", "ups": "5.58", "wpb": "3165.9", "bsz": "44", "num_updates": "398200", "lr": "4.55696e-07", "gnorm": "0.997", "loss_scale": "1", "train_wall": "35", "gb_free": "14.6", "wall": "75210"} [2023-11-02 14:29:00,528][train_inner][INFO] - {"epoch": 99, "update": 98.231, "loss": "2.764", "ntokens": "3224.92", "nsentences": "42", "prob_perplexity": "215.634", "code_perplexity": "213.036", "temp": "0.5", "loss_0": "2.664", "loss_1": "0.096", "loss_2": "0.004", "accuracy": "0.54897", "wps": "17683.7", "ups": "5.48", "wpb": "3224.9", "bsz": "42", "num_updates": "398400", "lr": "4.05063e-07", "gnorm": "0.99", "loss_scale": "1", "train_wall": "36", "gb_free": "14.5", "wall": "75247"} [2023-11-02 14:29:36,951][train_inner][INFO] - {"epoch": 99, "update": 98.28, "loss": "2.726", "ntokens": "3158.6", "nsentences": "42.52", "prob_perplexity": "215.423", "code_perplexity": "212.771", "temp": "0.5", "loss_0": "2.626", "loss_1": "0.096", "loss_2": "0.004", "accuracy": "0.55644", "wps": "17344.9", "ups": "5.49", "wpb": "3158.6", "bsz": "42.5", "num_updates": "398600", "lr": "3.5443e-07", "gnorm": "0.99", "loss_scale": "1", "train_wall": "36", "gb_free": "13.3", "wall": "75283"} [2023-11-02 14:30:13,618][train_inner][INFO] - {"epoch": 99, "update": 98.329, "loss": "2.732", "ntokens": "3214.4", "nsentences": "43.4", "prob_perplexity": "215.399", "code_perplexity": "212.836", "temp": "0.5", "loss_0": "2.632", "loss_1": "0.096", "loss_2": "0.004", "accuracy": "0.55583", "wps": "17534", "ups": "5.45", "wpb": "3214.4", "bsz": "43.4", "num_updates": "398800", "lr": "3.03797e-07", "gnorm": "0.983", "loss_scale": "1", "train_wall": "36", "gb_free": "13.8", "wall": "75320"} [2023-11-02 14:30:49,965][train_inner][INFO] - {"epoch": 99, "update": 98.379, "loss": "2.748", "ntokens": "3202.44", "nsentences": "43.32", "prob_perplexity": "214.572", "code_perplexity": "211.944", "temp": "0.5", "loss_0": "2.648", "loss_1": "0.096", "loss_2": "0.004", "accuracy": "0.55281", "wps": "17622.2", "ups": "5.5", "wpb": "3202.4", "bsz": "43.3", "num_updates": "399000", "lr": "2.53165e-07", "gnorm": "0.996", "loss_scale": "1", "train_wall": "36", "gb_free": "13.5", "wall": "75356"} [2023-11-02 14:31:26,174][train_inner][INFO] - {"epoch": 99, "update": 98.428, "loss": "2.674", "ntokens": "3259.84", "nsentences": "45.56", "prob_perplexity": "215.585", "code_perplexity": "212.998", "temp": "0.5", "loss_0": "2.574", "loss_1": "0.096", "loss_2": "0.004", "accuracy": "0.5669", "wps": "18007", "ups": "5.52", "wpb": "3259.8", "bsz": "45.6", "num_updates": "399200", "lr": "2.02532e-07", "gnorm": "0.973", "loss_scale": "1", "train_wall": "36", "gb_free": "13.4", "wall": "75392"} [2023-11-02 14:32:02,431][train_inner][INFO] - {"epoch": 99, "update": 98.477, "loss": "2.661", "ntokens": "3185.32", "nsentences": "46.64", "prob_perplexity": "215.386", "code_perplexity": "212.749", "temp": "0.5", "loss_0": "2.56", "loss_1": "0.096", "loss_2": "0.004", "accuracy": "0.5715", "wps": "17571.6", "ups": "5.52", "wpb": "3185.3", "bsz": "46.6", "num_updates": "399400", "lr": "1.51899e-07", "gnorm": "0.988", "loss_scale": "1", "train_wall": "36", "gb_free": "14", "wall": "75429"} [2023-11-02 14:32:38,277][train_inner][INFO] - {"epoch": 99, "update": 98.527, "loss": "2.694", "ntokens": "3189.88", "nsentences": "44.16", "prob_perplexity": "215.344", "code_perplexity": "212.729", "temp": "0.5", "loss_0": "2.593", "loss_1": "0.096", "loss_2": "0.004", "accuracy": "0.56255", "wps": "17799.1", "ups": "5.58", "wpb": "3189.9", "bsz": "44.2", "num_updates": "399600", "lr": "1.01266e-07", "gnorm": "0.986", "loss_scale": "1", "train_wall": "35", "gb_free": "13.6", "wall": "75465"} [2023-11-02 14:33:14,570][train_inner][INFO] - {"epoch": 99, "update": 98.576, "loss": "2.727", "ntokens": "3180.36", "nsentences": "42.88", "prob_perplexity": "214.405", "code_perplexity": "211.727", "temp": "0.5", "loss_0": "2.626", "loss_1": "0.096", "loss_2": "0.004", "accuracy": "0.55653", "wps": "17526.8", "ups": "5.51", "wpb": "3180.4", "bsz": "42.9", "num_updates": "399800", "lr": "5.06329e-08", "gnorm": "0.991", "loss_scale": "1", "train_wall": "36", "gb_free": "15", "wall": "75501"} [2023-11-02 14:33:50,492][train_inner][INFO] - {"epoch": 99, "update": 98.625, "loss": "2.643", "ntokens": "3149.84", "nsentences": "43.64", "prob_perplexity": "215.274", "code_perplexity": "212.633", "temp": "0.5", "loss_0": "2.542", "loss_1": "0.096", "loss_2": "0.004", "accuracy": "0.572", "wps": "17537.9", "ups": "5.57", "wpb": "3149.8", "bsz": "43.6", "num_updates": "400000", "lr": "0", "gnorm": "0.98", "loss_scale": "1", "train_wall": "35", "gb_free": "14", "wall": "75537"} [2023-11-02 14:33:50,493][fairseq_cli.train][INFO] - Stopping training due to num_updates: 400000 >= max_update: 400000 [2023-11-02 14:33:50,494][fairseq_cli.train][INFO] - begin validation on "valid" subset [2023-11-02 14:33:50,495][fairseq.tasks.fairseq_task][INFO] - can_reuse_epoch_itr = True [2023-11-02 14:33:50,515][fairseq.tasks.fairseq_task][INFO] - creating new batches for epoch 115 [2023-11-02 14:34:16,086][valid][INFO] - {"epoch": 99, "valid_loss": "2.577", "valid_ntokens": "3159.33", "valid_nsentences": "44.1685", "valid_prob_perplexity": "214.699", "valid_code_perplexity": "212.185", "valid_temp": "0.5", "valid_loss_0": "2.477", "valid_loss_1": "0.096", "valid_loss_2": "0.004", "valid_accuracy": "0.58658", "valid_wps": "56117.7", "valid_wpb": "3159.3", "valid_bsz": "44.2", "valid_num_updates": "400000", "valid_best_loss": "2.572"} [2023-11-02 14:34:16,088][fairseq.checkpoint_utils][INFO] - Preparing to save checkpoint for epoch 99 @ 400000 updates [2023-11-02 14:34:16,090][fairseq.trainer][INFO] - Saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_99_400000.pt [2023-11-02 14:34:17,429][fairseq.trainer][INFO] - Finished saving checkpoint to /root/vlsp-2023-asr-ser/outputs/2023-11-01/17-34-51/checkpoints/checkpoint_99_400000.pt [2023-11-02 14:34:18,384][fairseq.checkpoint_utils][INFO] - Saved checkpoint checkpoints/checkpoint_99_400000.pt (epoch 99 @ 400000 updates, score 2.577) (writing took 2.295775666832924 seconds) [2023-11-02 14:34:18,502][fairseq_cli.train][INFO] - end of epoch 99 (average epoch stats below) [2023-11-02 14:34:18,505][train][INFO] - {"epoch": 99, "train_loss": "2.708", "train_ntokens": "3193.57", "train_nsentences": "43.858", "train_prob_perplexity": "215.187", "train_code_perplexity": "212.567", "train_temp": "0.5", "train_loss_0": "2.607", "train_loss_1": "0.096", "train_loss_2": "0.004", "train_accuracy": "0.56057", "train_wps": "16617.6", "train_ups": "5.2", "train_wpb": "3193.6", "train_bsz": "43.9", "train_num_updates": "400000", "train_lr": "0", "train_gnorm": "0.988", "train_loss_scale": "1", "train_train_wall": "451", "train_gb_free": "14", "train_wall": "75565"} [2023-11-02 14:34:18,505][fairseq_cli.train][INFO] - done training in 75560.2 seconds