diff --git "a/out.log" "b/out.log" new file mode 100644--- /dev/null +++ "b/out.log" @@ -0,0 +1,1369 @@ +2024-06-29 19:18:03,095 - INFO - allennlp.common.params - random_seed = 13370 +2024-06-29 19:18:03,095 - INFO - allennlp.common.params - numpy_seed = 1337 +2024-06-29 19:18:03,095 - INFO - allennlp.common.params - pytorch_seed = 133 +2024-06-29 19:18:03,096 - INFO - allennlp.common.checks - Pytorch version: 2.3.1+cu121 +2024-06-29 19:18:03,096 - INFO - allennlp.common.params - type = default +2024-06-29 19:18:03,097 - INFO - allennlp.common.params - dataset_reader.type = compreno_ud_dataset_reader +2024-06-29 19:18:03,097 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.type = pretrained_transformer_mismatched +2024-06-29 19:18:03,097 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.token_min_padding_length = 0 +2024-06-29 19:18:03,097 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.model_name = xlm-roberta-base +2024-06-29 19:18:03,097 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.namespace = tags +2024-06-29 19:18:03,097 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.max_length = None +2024-06-29 19:18:03,097 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.tokenizer_kwargs = None +2024-06-29 19:18:05,153 - INFO - allennlp.common.params - train_data_path = data/train.conllu +2024-06-29 19:18:05,153 - INFO - allennlp.common.params - datasets_for_vocab_creation = None +2024-06-29 19:18:05,153 - INFO - allennlp.common.params - validation_dataset_reader = None +2024-06-29 19:18:05,153 - INFO - allennlp.common.params - validation_data_path = data/validation.conllu +2024-06-29 19:18:05,153 - INFO - allennlp.common.params - test_data_path = None +2024-06-29 19:18:05,153 - INFO - allennlp.common.params - evaluate_on_test = False +2024-06-29 19:18:05,153 - INFO - allennlp.common.params - batch_weight_key = +2024-06-29 19:18:05,154 - INFO - allennlp.common.params - data_loader.type = multiprocess +2024-06-29 19:18:05,154 - INFO - allennlp.common.params - data_loader.batch_size = 24 +2024-06-29 19:18:05,154 - INFO - allennlp.common.params - data_loader.drop_last = False +2024-06-29 19:18:05,154 - INFO - allennlp.common.params - data_loader.shuffle = True +2024-06-29 19:18:05,154 - INFO - allennlp.common.params - data_loader.batch_sampler = None +2024-06-29 19:18:05,154 - INFO - allennlp.common.params - data_loader.batches_per_epoch = None +2024-06-29 19:18:05,154 - INFO - allennlp.common.params - data_loader.num_workers = 0 +2024-06-29 19:18:05,154 - INFO - allennlp.common.params - data_loader.max_instances_in_memory = None +2024-06-29 19:18:05,154 - INFO - allennlp.common.params - data_loader.start_method = fork +2024-06-29 19:18:05,154 - INFO - allennlp.common.params - data_loader.cuda_device = None +2024-06-29 19:18:05,154 - INFO - allennlp.common.params - data_loader.quiet = False +2024-06-29 19:18:05,154 - INFO - allennlp.common.params - data_loader.collate_fn = +2024-06-29 19:18:05,154 - INFO - tqdm - loading instances: 0it [00:00, ?it/s] +2024-06-29 19:18:15,215 - INFO - tqdm - loading instances: 25625it [00:10, 2590.96it/s] +2024-06-29 19:18:15,764 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess +2024-06-29 19:18:15,764 - INFO - allennlp.common.params - validation_data_loader.batch_size = 24 +2024-06-29 19:18:15,764 - INFO - allennlp.common.params - validation_data_loader.drop_last = False +2024-06-29 19:18:15,764 - INFO - allennlp.common.params - validation_data_loader.shuffle = False +2024-06-29 19:18:15,764 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None +2024-06-29 19:18:15,764 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None +2024-06-29 19:18:15,765 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0 +2024-06-29 19:18:15,765 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None +2024-06-29 19:18:15,765 - INFO - allennlp.common.params - validation_data_loader.start_method = fork +2024-06-29 19:18:15,765 - INFO - allennlp.common.params - validation_data_loader.cuda_device = None +2024-06-29 19:18:15,765 - INFO - allennlp.common.params - validation_data_loader.quiet = False +2024-06-29 19:18:15,765 - INFO - allennlp.common.params - validation_data_loader.collate_fn = +2024-06-29 19:18:15,765 - INFO - tqdm - loading instances: 0it [00:00, ?it/s] +2024-06-29 19:18:18,810 - INFO - allennlp.common.params - vocabulary.type = from_instances +2024-06-29 19:18:18,811 - INFO - allennlp.common.params - vocabulary.max_vocab_size = None +2024-06-29 19:18:18,811 - INFO - allennlp.common.params - vocabulary.non_padded_namespaces = ('*tags', '*labels') +2024-06-29 19:18:18,811 - INFO - allennlp.common.params - vocabulary.pretrained_files = None +2024-06-29 19:18:18,811 - INFO - allennlp.common.params - vocabulary.only_include_pretrained_words = False +2024-06-29 19:18:18,811 - INFO - allennlp.common.params - vocabulary.min_pretrained_embeddings = None +2024-06-29 19:18:18,811 - INFO - allennlp.common.params - vocabulary.padding_token = @@PADDING@@ +2024-06-29 19:18:18,811 - INFO - allennlp.common.params - vocabulary.oov_token = @@UNKNOWN@@ +2024-06-29 19:18:18,811 - INFO - allennlp.data.vocabulary - Fitting token dictionary from dataset. +2024-06-29 19:18:18,811 - INFO - tqdm - building vocab: 0it [00:00, ?it/s] +2024-06-29 19:18:19,437 - INFO - allennlp.common.params - model.type = morpho_syntax_semantic_parser +2024-06-29 19:18:19,438 - INFO - allennlp.common.params - model.indexer.type = pretrained_transformer_mismatched +2024-06-29 19:18:19,438 - INFO - allennlp.common.params - model.indexer.token_min_padding_length = 0 +2024-06-29 19:18:19,438 - INFO - allennlp.common.params - model.indexer.model_name = xlm-roberta-base +2024-06-29 19:18:19,438 - INFO - allennlp.common.params - model.indexer.namespace = tags +2024-06-29 19:18:19,438 - INFO - allennlp.common.params - model.indexer.max_length = None +2024-06-29 19:18:19,438 - INFO - allennlp.common.params - model.indexer.tokenizer_kwargs = None +2024-06-29 19:18:19,439 - INFO - allennlp.common.params - model.embedder.type = pretrained_transformer_mismatched +2024-06-29 19:18:19,439 - INFO - allennlp.common.params - model.embedder.model_name = xlm-roberta-base +2024-06-29 19:18:19,439 - INFO - allennlp.common.params - model.embedder.max_length = None +2024-06-29 19:18:19,439 - INFO - allennlp.common.params - model.embedder.sub_module = None +2024-06-29 19:18:19,439 - INFO - allennlp.common.params - model.embedder.train_parameters = True +2024-06-29 19:18:19,439 - INFO - allennlp.common.params - model.embedder.last_layer_only = True +2024-06-29 19:18:19,439 - INFO - allennlp.common.params - model.embedder.override_weights_file = None +2024-06-29 19:18:19,439 - INFO - allennlp.common.params - model.embedder.override_weights_strip_prefix = None +2024-06-29 19:18:19,439 - INFO - allennlp.common.params - model.embedder.load_weights = True +2024-06-29 19:18:19,439 - INFO - allennlp.common.params - model.embedder.gradient_checkpointing = None +2024-06-29 19:18:19,439 - INFO - allennlp.common.params - model.embedder.tokenizer_kwargs = None +2024-06-29 19:18:19,439 - INFO - allennlp.common.params - model.embedder.transformer_kwargs = None +2024-06-29 19:18:19,439 - INFO - allennlp.common.params - model.embedder.sub_token_mode = avg +2024-06-29 19:18:20,189 - INFO - allennlp.common.params - model.lemma_rule_classifier.hid_dim = 512 +2024-06-29 19:18:20,189 - INFO - allennlp.common.params - model.lemma_rule_classifier.activation = relu +2024-06-29 19:18:20,189 - INFO - allennlp.common.params - model.lemma_rule_classifier.dropout = 0.1 +2024-06-29 19:18:20,190 - INFO - allennlp.common.params - model.lemma_rule_classifier.dictionaries = [] +2024-06-29 19:18:20,190 - INFO - allennlp.common.params - model.lemma_rule_classifier.topk = None +2024-06-29 19:18:20,192 - INFO - allennlp.common.params - model.pos_feats_classifier.hid_dim = 256 +2024-06-29 19:18:20,192 - INFO - allennlp.common.params - model.pos_feats_classifier.activation = relu +2024-06-29 19:18:20,192 - INFO - allennlp.common.params - model.pos_feats_classifier.dropout = 0.1 +2024-06-29 19:18:20,194 - INFO - allennlp.common.params - model.depencency_classifier.hid_dim = 128 +2024-06-29 19:18:20,194 - INFO - allennlp.common.params - model.depencency_classifier.activation = relu +2024-06-29 19:18:20,194 - INFO - allennlp.common.params - model.depencency_classifier.dropout = 0.1 +2024-06-29 19:18:20,216 - INFO - allennlp.common.params - model.misc_classifier.hid_dim = 128 +2024-06-29 19:18:20,216 - INFO - allennlp.common.params - model.misc_classifier.activation = relu +2024-06-29 19:18:20,216 - INFO - allennlp.common.params - model.misc_classifier.dropout = 0.1 +2024-06-29 19:18:20,217 - INFO - allennlp.common.params - model.semslot_classifier.hid_dim = 1024 +2024-06-29 19:18:20,217 - INFO - allennlp.common.params - model.semslot_classifier.activation = relu +2024-06-29 19:18:20,217 - INFO - allennlp.common.params - model.semslot_classifier.dropout = 0.1 +2024-06-29 19:18:20,220 - INFO - allennlp.common.params - model.semclass_classifier.hid_dim = 1024 +2024-06-29 19:18:20,220 - INFO - allennlp.common.params - model.semclass_classifier.activation = relu +2024-06-29 19:18:20,220 - INFO - allennlp.common.params - model.semclass_classifier.dropout = 0.1 +2024-06-29 19:18:20,227 - INFO - allennlp.common.params - model.null_classifier.hid_dim = 512 +2024-06-29 19:18:20,227 - INFO - allennlp.common.params - model.null_classifier.activation = relu +2024-06-29 19:18:20,227 - INFO - allennlp.common.params - model.null_classifier.dropout = 0.1 +2024-06-29 19:18:20,227 - INFO - allennlp.common.params - model.null_classifier.positive_class_weight = 1.0 +2024-06-29 19:18:34,182 - INFO - allennlp.common.params - trainer.type = gradient_descent +2024-06-29 19:18:34,182 - INFO - allennlp.common.params - trainer.cuda_device = 0 +2024-06-29 19:18:34,182 - INFO - allennlp.common.params - trainer.distributed = False +2024-06-29 19:18:34,182 - INFO - allennlp.common.params - trainer.world_size = 1 +2024-06-29 19:18:34,182 - INFO - allennlp.common.params - trainer.patience = None +2024-06-29 19:18:34,182 - INFO - allennlp.common.params - trainer.validation_metric = +Avg +2024-06-29 19:18:34,183 - INFO - allennlp.common.params - trainer.num_epochs = 10 +2024-06-29 19:18:34,183 - INFO - allennlp.common.params - trainer.grad_norm = False +2024-06-29 19:18:34,183 - INFO - allennlp.common.params - trainer.grad_clipping = 5 +2024-06-29 19:18:34,183 - INFO - allennlp.common.params - trainer.num_gradient_accumulation_steps = 1 +2024-06-29 19:18:34,183 - INFO - allennlp.common.params - trainer.use_amp = False +2024-06-29 19:18:34,183 - INFO - allennlp.common.params - trainer.no_grad = None +2024-06-29 19:18:34,183 - INFO - allennlp.common.params - trainer.momentum_scheduler = None +2024-06-29 19:18:34,183 - INFO - allennlp.common.params - trainer.moving_average = None +2024-06-29 19:18:34,183 - INFO - allennlp.common.params - trainer.checkpointer = +2024-06-29 19:18:34,183 - INFO - allennlp.common.params - trainer.enable_default_callbacks = True +2024-06-29 19:18:34,183 - INFO - allennlp.common.params - trainer.run_confidence_checks = True +2024-06-29 19:18:34,183 - INFO - allennlp.common.params - trainer.grad_scaling = True +2024-06-29 19:18:34,391 - INFO - allennlp.common.params - trainer.optimizer.type = adam +2024-06-29 19:18:34,391 - INFO - allennlp.common.params - trainer.optimizer.lr = 0.01 +2024-06-29 19:18:34,392 - INFO - allennlp.common.params - trainer.optimizer.betas = (0.9, 0.999) +2024-06-29 19:18:34,392 - INFO - allennlp.common.params - trainer.optimizer.eps = 1e-08 +2024-06-29 19:18:34,392 - INFO - allennlp.common.params - trainer.optimizer.weight_decay = 0.0 +2024-06-29 19:18:34,392 - INFO - allennlp.common.params - trainer.optimizer.amsgrad = False +2024-06-29 19:18:34,393 - INFO - allennlp.training.optimizers - Done constructing parameter groups. +2024-06-29 19:18:34,393 - INFO - allennlp.training.optimizers - Group 0: ['embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.embeddings.token_type_embeddings.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.embeddings.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.5.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.3.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.embeddings.word_embeddings.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.3.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.0.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.0.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.4.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.4.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.6.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.11.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.pooler.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.0.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.embeddings.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.10.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.2.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.3.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.5.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.10.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.9.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.2.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.7.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.5.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.7.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.8.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.9.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.0.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.embeddings.position_embeddings.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.3.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.pooler.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.5.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.6.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.8.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.output.dense.weight'], {} +2024-06-29 19:18:34,393 - INFO - allennlp.training.optimizers - Group 1: ['lemma_rule_classifier.classifier.1.bias', 'null_classifier.classifier.1.weight', 'semclass_classifier.classifier.4.bias', 'lemma_rule_classifier.classifier.1.weight', 'dependency_classifier.arc_attention_ud._bias', 'dependency_classifier.arc_head_mlp.1.weight', 'dependency_classifier.rel_dep_mlp.1.bias', 'pos_feats_classifier.classifier.4.bias', 'lemma_rule_classifier.classifier.4.weight', 'dependency_classifier.arc_attention_eud._bias', 'null_classifier.classifier.1.bias', 'null_classifier.classifier.4.bias', 'dependency_classifier.rel_attention_ud._weight_matrix', 'dependency_classifier.arc_head_mlp.1.bias', 'dependency_classifier.rel_dep_mlp.1.weight', 'dependency_classifier.arc_attention_ud._weight_matrix', 'null_classifier.classifier.4.weight', 'semslot_classifier.classifier.4.weight', 'pos_feats_classifier.classifier.1.weight', 'dependency_classifier.rel_head_mlp.1.weight', 'semclass_classifier.classifier.4.weight', 'semclass_classifier.classifier.1.weight', 'dependency_classifier.arc_dep_mlp.1.bias', 'lemma_rule_classifier.classifier.4.bias', 'pos_feats_classifier.classifier.1.bias', 'misc_classifier.classifier.1.weight', 'semslot_classifier.classifier.1.bias', 'semclass_classifier.classifier.1.bias', 'misc_classifier.classifier.4.bias', 'dependency_classifier.rel_attention_eud._weight_matrix', 'pos_feats_classifier.classifier.4.weight', 'semslot_classifier.classifier.4.bias', 'semslot_classifier.classifier.1.weight', 'dependency_classifier.arc_attention_eud._weight_matrix', 'misc_classifier.classifier.4.weight', 'misc_classifier.classifier.1.bias', 'dependency_classifier.arc_dep_mlp.1.weight', 'dependency_classifier.rel_head_mlp.1.bias', 'dependency_classifier.rel_attention_ud._bias', 'dependency_classifier.rel_attention_eud._bias'], {} +2024-06-29 19:18:34,393 - INFO - allennlp.training.optimizers - Group 2: [], {} +2024-06-29 19:18:34,393 - INFO - allennlp.training.optimizers - Number of trainable parameters: 287815418 +2024-06-29 19:18:34,394 - INFO - allennlp.common.util - The following parameters are Frozen (without gradient): +2024-06-29 19:18:34,394 - INFO - allennlp.common.util - The following parameters are Tunable (with gradient): +2024-06-29 19:18:34,394 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.embeddings.word_embeddings.weight +2024-06-29 19:18:34,394 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.embeddings.position_embeddings.weight +2024-06-29 19:18:34,394 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.embeddings.token_type_embeddings.weight +2024-06-29 19:18:34,394 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.embeddings.LayerNorm.weight +2024-06-29 19:18:34,394 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.embeddings.LayerNorm.bias +2024-06-29 19:18:34,394 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.query.weight +2024-06-29 19:18:34,394 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.query.bias +2024-06-29 19:18:34,394 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.key.weight +2024-06-29 19:18:34,394 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.key.bias +2024-06-29 19:18:34,394 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.value.weight +2024-06-29 19:18:34,394 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.value.bias +2024-06-29 19:18:34,394 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.dense.weight +2024-06-29 19:18:34,394 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.dense.bias +2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.LayerNorm.weight +2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.LayerNorm.bias +2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.intermediate.dense.weight +2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.intermediate.dense.bias +2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.output.dense.weight +2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.output.dense.bias +2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.output.LayerNorm.weight +2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.output.LayerNorm.bias +2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.query.weight +2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.query.bias +2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.key.weight +2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.key.bias +2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.value.weight +2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.value.bias +2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.dense.weight +2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.dense.bias +2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.LayerNorm.weight +2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.LayerNorm.bias +2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.intermediate.dense.weight +2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.intermediate.dense.bias +2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.output.dense.weight +2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.output.dense.bias +2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.output.LayerNorm.weight +2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.output.LayerNorm.bias +2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.query.weight +2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.query.bias +2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.key.weight +2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.key.bias +2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.value.weight +2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.value.bias +2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.dense.weight +2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.dense.bias +2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.LayerNorm.weight +2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.LayerNorm.bias +2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.intermediate.dense.weight +2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.intermediate.dense.bias +2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.output.dense.weight +2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.output.dense.bias +2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.output.LayerNorm.weight +2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.output.LayerNorm.bias +2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.query.weight +2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.query.bias +2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.key.weight +2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.key.bias +2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.value.weight +2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.value.bias +2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.dense.weight +2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.dense.bias +2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.LayerNorm.weight +2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.LayerNorm.bias +2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.intermediate.dense.weight +2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.intermediate.dense.bias +2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.output.dense.weight +2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.output.dense.bias +2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.output.LayerNorm.weight +2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.output.LayerNorm.bias +2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.query.weight +2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.query.bias +2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.key.weight +2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.key.bias +2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.value.weight +2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.value.bias +2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.dense.weight +2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.dense.bias +2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.LayerNorm.weight +2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.LayerNorm.bias +2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.intermediate.dense.weight +2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.intermediate.dense.bias +2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.output.dense.weight +2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.output.dense.bias +2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.output.LayerNorm.weight +2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.output.LayerNorm.bias +2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.query.weight +2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.query.bias +2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.key.weight +2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.key.bias +2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.value.weight +2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.value.bias +2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.dense.weight +2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.dense.bias +2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.LayerNorm.weight +2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.LayerNorm.bias +2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.intermediate.dense.weight +2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.intermediate.dense.bias +2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.output.dense.weight +2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.output.dense.bias +2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.output.LayerNorm.weight +2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.output.LayerNorm.bias +2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.query.weight +2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.query.bias +2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.key.weight +2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.key.bias +2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.value.weight +2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.value.bias +2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.dense.weight +2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.dense.bias +2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.LayerNorm.weight +2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.LayerNorm.bias +2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.intermediate.dense.weight +2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.intermediate.dense.bias +2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.output.dense.weight +2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.output.dense.bias +2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.output.LayerNorm.weight +2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.output.LayerNorm.bias +2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.query.weight +2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.query.bias +2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.key.weight +2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.key.bias +2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.value.weight +2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.value.bias +2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.dense.weight +2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.dense.bias +2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.LayerNorm.weight +2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.LayerNorm.bias +2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.intermediate.dense.weight +2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.intermediate.dense.bias +2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.output.dense.weight +2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.output.dense.bias +2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.output.LayerNorm.weight +2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.output.LayerNorm.bias +2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.query.weight +2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.query.bias +2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.key.weight +2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.key.bias +2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.value.weight +2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.value.bias +2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.dense.weight +2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.dense.bias +2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.LayerNorm.weight +2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.LayerNorm.bias +2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.intermediate.dense.weight +2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.intermediate.dense.bias +2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.output.dense.weight +2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.output.dense.bias +2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.output.LayerNorm.weight +2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.output.LayerNorm.bias +2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.query.weight +2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.query.bias +2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.key.weight +2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.key.bias +2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.value.weight +2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.value.bias +2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.dense.weight +2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.dense.bias +2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.LayerNorm.weight +2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.LayerNorm.bias +2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.intermediate.dense.weight +2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.intermediate.dense.bias +2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.output.dense.weight +2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.output.dense.bias +2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.output.LayerNorm.weight +2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.output.LayerNorm.bias +2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.query.weight +2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.query.bias +2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.key.weight +2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.key.bias +2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.value.weight +2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.value.bias +2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.dense.weight +2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.dense.bias +2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.LayerNorm.weight +2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.LayerNorm.bias +2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.intermediate.dense.weight +2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.intermediate.dense.bias +2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.output.dense.weight +2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.output.dense.bias +2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.output.LayerNorm.weight +2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.output.LayerNorm.bias +2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.query.weight +2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.query.bias +2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.key.weight +2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.key.bias +2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.value.weight +2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.value.bias +2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.dense.weight +2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.dense.bias +2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.LayerNorm.weight +2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.LayerNorm.bias +2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.intermediate.dense.weight +2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.intermediate.dense.bias +2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.output.dense.weight +2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.output.dense.bias +2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.output.LayerNorm.weight +2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.output.LayerNorm.bias +2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.pooler.dense.weight +2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.pooler.dense.bias +2024-06-29 19:18:34,399 - INFO - allennlp.common.util - lemma_rule_classifier.classifier.1.weight +2024-06-29 19:18:34,400 - INFO - allennlp.common.util - lemma_rule_classifier.classifier.1.bias +2024-06-29 19:18:34,400 - INFO - allennlp.common.util - lemma_rule_classifier.classifier.4.weight +2024-06-29 19:18:34,400 - INFO - allennlp.common.util - lemma_rule_classifier.classifier.4.bias +2024-06-29 19:18:34,400 - INFO - allennlp.common.util - pos_feats_classifier.classifier.1.weight +2024-06-29 19:18:34,400 - INFO - allennlp.common.util - pos_feats_classifier.classifier.1.bias +2024-06-29 19:18:34,400 - INFO - allennlp.common.util - pos_feats_classifier.classifier.4.weight +2024-06-29 19:18:34,400 - INFO - allennlp.common.util - pos_feats_classifier.classifier.4.bias +2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.arc_dep_mlp.1.weight +2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.arc_dep_mlp.1.bias +2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.arc_head_mlp.1.weight +2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.arc_head_mlp.1.bias +2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.rel_dep_mlp.1.weight +2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.rel_dep_mlp.1.bias +2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.rel_head_mlp.1.weight +2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.rel_head_mlp.1.bias +2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.arc_attention_ud._weight_matrix +2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.arc_attention_ud._bias +2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.rel_attention_ud._weight_matrix +2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.rel_attention_ud._bias +2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.arc_attention_eud._weight_matrix +2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.arc_attention_eud._bias +2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.rel_attention_eud._weight_matrix +2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.rel_attention_eud._bias +2024-06-29 19:18:34,400 - INFO - allennlp.common.util - misc_classifier.classifier.1.weight +2024-06-29 19:18:34,400 - INFO - allennlp.common.util - misc_classifier.classifier.1.bias +2024-06-29 19:18:34,400 - INFO - allennlp.common.util - misc_classifier.classifier.4.weight +2024-06-29 19:18:34,400 - INFO - allennlp.common.util - misc_classifier.classifier.4.bias +2024-06-29 19:18:34,400 - INFO - allennlp.common.util - semslot_classifier.classifier.1.weight +2024-06-29 19:18:34,400 - INFO - allennlp.common.util - semslot_classifier.classifier.1.bias +2024-06-29 19:18:34,400 - INFO - allennlp.common.util - semslot_classifier.classifier.4.weight +2024-06-29 19:18:34,400 - INFO - allennlp.common.util - semslot_classifier.classifier.4.bias +2024-06-29 19:18:34,400 - INFO - allennlp.common.util - semclass_classifier.classifier.1.weight +2024-06-29 19:18:34,400 - INFO - allennlp.common.util - semclass_classifier.classifier.1.bias +2024-06-29 19:18:34,400 - INFO - allennlp.common.util - semclass_classifier.classifier.4.weight +2024-06-29 19:18:34,400 - INFO - allennlp.common.util - semclass_classifier.classifier.4.bias +2024-06-29 19:18:34,400 - INFO - allennlp.common.util - null_classifier.classifier.1.weight +2024-06-29 19:18:34,400 - INFO - allennlp.common.util - null_classifier.classifier.1.bias +2024-06-29 19:18:34,401 - INFO - allennlp.common.util - null_classifier.classifier.4.weight +2024-06-29 19:18:34,401 - INFO - allennlp.common.util - null_classifier.classifier.4.bias +2024-06-29 19:18:34,401 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.type = slanted_triangular +2024-06-29 19:18:34,401 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.cut_frac = 0 +2024-06-29 19:18:34,401 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.ratio = 32 +2024-06-29 19:18:34,401 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.last_epoch = -1 +2024-06-29 19:18:34,401 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.gradual_unfreezing = True +2024-06-29 19:18:34,401 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.discriminative_fine_tuning = True +2024-06-29 19:18:34,401 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.decay_factor = 0.001 +2024-06-29 19:18:34,401 - INFO - allennlp.training.learning_rate_schedulers.slanted_triangular - Gradual unfreezing. Training only the top 1 layers. +2024-06-29 19:18:34,401 - INFO - allennlp.common.params - type = default +2024-06-29 19:18:34,401 - INFO - allennlp.common.params - save_completed_epochs = True +2024-06-29 19:18:34,401 - INFO - allennlp.common.params - save_every_num_seconds = None +2024-06-29 19:18:34,401 - INFO - allennlp.common.params - save_every_num_batches = None +2024-06-29 19:18:34,401 - INFO - allennlp.common.params - keep_most_recent_by_count = 2 +2024-06-29 19:18:34,401 - INFO - allennlp.common.params - keep_most_recent_by_age = None +2024-06-29 19:18:34,401 - INFO - allennlp.common.params - trainer.callbacks.0.type = tensorboard +2024-06-29 19:18:34,402 - INFO - allennlp.common.params - trainer.callbacks.0.summary_interval = 100 +2024-06-29 19:18:34,402 - INFO - allennlp.common.params - trainer.callbacks.0.distribution_interval = None +2024-06-29 19:18:34,402 - INFO - allennlp.common.params - trainer.callbacks.0.batch_size_interval = None +2024-06-29 19:18:34,402 - INFO - allennlp.common.params - trainer.callbacks.0.should_log_parameter_statistics = False +2024-06-29 19:18:34,402 - INFO - allennlp.common.params - trainer.callbacks.0.should_log_learning_rate = True +2024-06-29 19:18:34,403 - WARNING - allennlp.training.gradient_descent_trainer - You provided a validation dataset but patience was set to None, meaning that early stopping is disabled +2024-06-29 19:18:34,405 - INFO - allennlp.training.gradient_descent_trainer - Beginning training. +2024-06-29 19:18:34,405 - INFO - allennlp.training.gradient_descent_trainer - Epoch 0/9 +2024-06-29 19:18:34,405 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 4.5G +2024-06-29 19:18:34,405 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G +2024-06-29 19:18:34,406 - INFO - allennlp.training.gradient_descent_trainer - Training +2024-06-29 19:18:34,406 - INFO - tqdm - 0%| | 0/1147 [00:00") +2024-06-29 19:18:34,811 - INFO - allennlp.training.callbacks.console_logger - batch_input/lemma_rule_labels (Shape: 24 x 48) +tensor([[ 0, 8, 0, ..., 0, 0, 0], + [ 0, 13, 0, ..., 0, 0, 0], + [ 0, 0, 0, ..., 0, 0, 0], + ..., + [ 0, 9, 0, ..., 0, 0, 0], + [ 0, 0, 33, ..., 0, 0, 0], + [ 0, 0, 0, ..., 0, 0, 0]], device='cuda:0') +2024-06-29 19:18:34,813 - INFO - allennlp.training.callbacks.console_logger - batch_input/pos_feats_labels (Shape: 24 x 48) +tensor([[143, 5, 16, ..., 0, 0, 0], + [ 48, 24, 2, ..., 0, 0, 0], + [ 7, 0, 1, ..., 0, 0, 0], + ..., + [ 1, 31, 1, ..., 0, 0, 0], + [ 43, 1, 167, ..., 0, 0, 0], + [ 24, 14, 0, ..., 0, 0, 0]], device='cuda:0') +2024-06-29 19:18:34,814 - INFO - allennlp.training.callbacks.console_logger - batch_input/deprel_labels (Shape: 24 x 48 x 48) +tensor([[[-1, 3, -1, ..., -1, -1, -1], + [-1, 5, -1, ..., -1, -1, -1], + [-1, 8, -1, ..., -1, -1, -1], + ..., + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1]], + + [[-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + ..., + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1]], + + [[-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + ..., + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1]], + + ..., + + [[-1, 2, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + ..., + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1]], + + [[-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + ..., + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1]], + + [[-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + ..., + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1]]], device='cuda:0') +2024-06-29 19:18:34,820 - INFO - allennlp.training.callbacks.console_logger - batch_input/deps_labels (Shape: 24 x 48 x 48) +tensor([[[-1, 3, -1, ..., -1, -1, -1], + [-1, 2, -1, ..., -1, -1, -1], + [-1, 7, -1, ..., -1, -1, -1], + ..., + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1]], + + [[-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + ..., + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1]], + + [[-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + ..., + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1]], + + ..., + + [[-1, 1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + ..., + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1]], + + [[-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + ..., + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1]], + + [[ 2, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + ..., + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1]]], device='cuda:0') +2024-06-29 19:18:34,826 - INFO - allennlp.training.callbacks.console_logger - batch_input/misc_labels (Shape: 24 x 48) +tensor([[0, 0, 0, ..., 0, 0, 0], + [0, 2, 0, ..., 0, 0, 0], + [1, 0, 0, ..., 0, 0, 0], + ..., + [0, 0, 0, ..., 0, 0, 0], + [0, 0, 0, ..., 0, 0, 0], + [2, 1, 0, ..., 0, 0, 0]], device='cuda:0') +2024-06-29 19:18:34,827 - INFO - allennlp.training.callbacks.console_logger - batch_input/semslot_labels (Shape: 24 x 48) +tensor([[12, 1, 3, ..., 0, 0, 0], + [21, 1, 35, ..., 0, 0, 0], + [ 0, 0, 0, ..., 0, 0, 0], + ..., + [ 0, 7, 0, ..., 0, 0, 0], + [65, 0, 13, ..., 0, 0, 0], + [ 1, 0, 0, ..., 0, 0, 0]], device='cuda:0') +2024-06-29 19:18:34,828 - INFO - allennlp.training.callbacks.console_logger - batch_input/semclass_labels (Shape: 24 x 48) +tensor([[ 2, 143, 35, ..., 0, 0, 0], + [ 2, 8, 11, ..., 0, 0, 0], + [ 7, 0, 1, ..., 0, 0, 0], + ..., + [ 1, 10, 1, ..., 0, 0, 0], + [ 11, 1, 3, ..., 0, 0, 0], + [115, 9, 0, ..., 0, 0, 0]], device='cuda:0') +2024-06-29 19:18:34,829 - INFO - allennlp.training.callbacks.console_logger - Field : "batch_input/metadata" : (Length 24 of type "") +2024-06-29 19:18:44,411 - INFO - tqdm - NullAccuracy: 0.9800, NullF1: 0.0752, Lemma: 0.7294, PosFeats: 0.6473, UD-UAS: 0.4483, UD-LAS: 0.4317, EUD-UAS: 0.1246, EUD-LAS: 0.1064, Misc: 0.8976, SS: 0.6661, SC: 0.6283, Avg: 0.5200, batch_loss: 8.0180, loss: 11.2210 ||: 16%|#5 | 181/1147 [00:10<00:51, 18.59it/s] +2024-06-29 19:18:54,439 - INFO - tqdm - NullAccuracy: 0.9836, NullF1: 0.1334, Lemma: 0.7855, PosFeats: 0.7376, UD-UAS: 0.5092, UD-LAS: 0.4962, EUD-UAS: 0.1905, EUD-LAS: 0.1709, Misc: 0.9121, SS: 0.7062, SC: 0.6972, Avg: 0.5783, batch_loss: 5.6264, loss: 8.8108 ||: 32%|###1 | 365/1147 [00:20<00:40, 19.33it/s] +2024-06-29 19:19:04,496 - INFO - tqdm - NullAccuracy: 0.9852, NullF1: 0.1735, Lemma: 0.8120, PosFeats: 0.7771, UD-UAS: 0.5424, UD-LAS: 0.5309, EUD-UAS: 0.2229, EUD-LAS: 0.2035, Misc: 0.9195, SS: 0.7247, SC: 0.7284, Avg: 0.6068, batch_loss: 5.9012, loss: 7.7643 ||: 48%|####7 | 550/1147 [00:30<00:33, 18.03it/s] +2024-06-29 19:19:14,508 - INFO - tqdm - NullAccuracy: 0.9860, NullF1: 0.2011, Lemma: 0.8282, PosFeats: 0.8002, UD-UAS: 0.5663, UD-LAS: 0.5556, EUD-UAS: 0.2468, EUD-LAS: 0.2276, Misc: 0.9244, SS: 0.7367, SC: 0.7480, Avg: 0.6260, batch_loss: 5.3222, loss: 7.1057 ||: 64%|######4 | 737/1147 [00:40<00:22, 18.18it/s] +2024-06-29 19:19:24,619 - INFO - tqdm - NullAccuracy: 0.9867, NullF1: 0.2289, Lemma: 0.8405, PosFeats: 0.8165, UD-UAS: 0.5840, UD-LAS: 0.5741, EUD-UAS: 0.2648, EUD-LAS: 0.2455, Misc: 0.9275, SS: 0.7457, SC: 0.7636, Avg: 0.6403, batch_loss: 4.6858, loss: 6.6559 ||: 81%|######## | 925/1147 [00:50<00:12, 18.26it/s] +2024-06-29 19:19:34,697 - INFO - tqdm - NullAccuracy: 0.9870, NullF1: 0.2439, Lemma: 0.8494, PosFeats: 0.8288, UD-UAS: 0.5970, UD-LAS: 0.5879, EUD-UAS: 0.2785, EUD-LAS: 0.2598, Misc: 0.9299, SS: 0.7529, SC: 0.7748, Avg: 0.6510, batch_loss: 4.3227, loss: 6.3229 ||: 97%|#########6| 1112/1147 [01:00<00:01, 18.52it/s] +2024-06-29 19:19:36,422 - INFO - tqdm - NullAccuracy: 0.9870, NullF1: 0.2466, Lemma: 0.8507, PosFeats: 0.8306, UD-UAS: 0.5986, UD-LAS: 0.5898, EUD-UAS: 0.2808, EUD-LAS: 0.2622, Misc: 0.9302, SS: 0.7540, SC: 0.7765, Avg: 0.6526, batch_loss: 4.1319, loss: 6.2726 ||: 100%|#########9| 1143/1147 [01:02<00:00, 18.05it/s] +2024-06-29 19:19:36,533 - INFO - tqdm - NullAccuracy: 0.9870, NullF1: 0.2468, Lemma: 0.8508, PosFeats: 0.8307, UD-UAS: 0.5987, UD-LAS: 0.5899, EUD-UAS: 0.2809, EUD-LAS: 0.2622, Misc: 0.9302, SS: 0.7541, SC: 0.7766, Avg: 0.6527, batch_loss: 4.6516, loss: 6.2695 ||: 100%|#########9| 1145/1147 [01:02<00:00, 18.05it/s] +2024-06-29 19:19:36,624 - INFO - tqdm - NullAccuracy: 0.9870, NullF1: 0.2468, Lemma: 0.8509, PosFeats: 0.8308, UD-UAS: 0.5989, UD-LAS: 0.5900, EUD-UAS: 0.2810, EUD-LAS: 0.2624, Misc: 0.9302, SS: 0.7541, SC: 0.7767, Avg: 0.6528, batch_loss: 5.1362, loss: 6.2673 ||: 100%|##########| 1147/1147 [01:02<00:00, 18.44it/s] +2024-06-29 19:19:36,624 - INFO - allennlp.training.gradient_descent_trainer - Validating +2024-06-29 19:19:36,625 - INFO - tqdm - 0%| | 0/287 [00:00") +2024-06-29 19:19:36,726 - INFO - allennlp.training.callbacks.console_logger - batch_input/lemma_rule_labels (Shape: 24 x 41) +tensor([[ 0, 0, 64, ..., 0, 0, 0], + [ 0, 0, 0, ..., 0, 0, 0], + [ 3, 16, 4, ..., 0, 0, 0], + ..., + [ 0, 13, 0, ..., 0, 0, 0], + [32, 0, 2, ..., 0, 0, 0], + [ 0, 17, 4, ..., 0, 0, 0]], device='cuda:0') +2024-06-29 19:19:36,727 - INFO - allennlp.training.callbacks.console_logger - batch_input/pos_feats_labels (Shape: 24 x 41) +tensor([[198, 12, 80, ..., 0, 0, 0], + [ 8, 0, 51, ..., 0, 0, 0], + [ 52, 154, 18, ..., 0, 0, 0], + ..., + [ 2, 24, 30, ..., 0, 0, 0], + [152, 38, 41, ..., 0, 0, 0], + [ 1, 323, 53, ..., 0, 0, 0]], device='cuda:0') +2024-06-29 19:19:36,729 - INFO - allennlp.training.callbacks.console_logger - batch_input/deprel_labels (Shape: 24 x 41 x 41) +tensor([[[-1, 6, -1, ..., -1, -1, -1], + [-1, -1, 3, ..., -1, -1, -1], + [-1, -1, 5, ..., -1, -1, -1], + ..., + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1]], + + [[ 5, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + ..., + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1]], + + [[-1, -1, -1, ..., -1, -1, -1], + [-1, -1, 17, ..., -1, -1, -1], + [ 1, -1, -1, ..., -1, -1, -1], + ..., + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1]], + + ..., + + [[ 5, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + [ 4, -1, -1, ..., -1, -1, -1], + ..., + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1]], + + [[-1, -1, 3, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, 5, ..., -1, -1, -1], + ..., + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1]], + + [[-1, -1, 2, ..., -1, -1, -1], + [-1, -1, 6, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + ..., + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1]]], device='cuda:0') +2024-06-29 19:19:36,735 - INFO - allennlp.training.callbacks.console_logger - batch_input/deps_labels (Shape: 24 x 41 x 41) +tensor([[[-1, 5, -1, ..., -1, -1, -1], + [-1, -1, 3, ..., -1, -1, -1], + [-1, -1, 2, ..., -1, -1, -1], + ..., + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1]], + + [[ 2, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + ..., + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1]], + + [[-1, -1, -1, ..., -1, -1, -1], + [-1, -1, 17, ..., -1, -1, -1], + [ 4, -1, -1, ..., -1, -1, -1], + ..., + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1]], + + ..., + + [[ 2, -1, -1, ..., -1, -1, -1], + [21, -1, -1, ..., -1, -1, -1], + [25, -1, -1, ..., -1, -1, -1], + ..., + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1]], + + [[-1, 9, -1, ..., -1, -1, -1], + [-1, -1, 3, ..., -1, -1, -1], + [-1, -1, 2, ..., -1, -1, -1], + ..., + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1]], + + [[-1, -1, 1, ..., -1, -1, -1], + [-1, -1, 5, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + ..., + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1]]], device='cuda:0') +2024-06-29 19:19:36,742 - INFO - allennlp.training.callbacks.console_logger - batch_input/misc_labels (Shape: 24 x 41) +tensor([[0, 0, 0, ..., 0, 0, 0], + [1, 0, 0, ..., 0, 0, 0], + [0, 0, 3, ..., 0, 0, 0], + ..., + [0, 2, 0, ..., 0, 0, 0], + [0, 2, 0, ..., 0, 0, 0], + [0, 0, 0, ..., 0, 0, 0]], device='cuda:0') +2024-06-29 19:19:36,743 - INFO - allennlp.training.callbacks.console_logger - batch_input/semslot_labels (Shape: 24 x 41) +tensor([[14, 2, 1, ..., 0, 0, 0], + [18, 0, 3, ..., 0, 0, 0], + [ 3, 11, 17, ..., 0, 0, 0], + ..., + [15, 1, 32, ..., 0, 0, 0], + [13, 2, 1, ..., 0, 0, 0], + [ 0, 7, 32, ..., 0, 0, 0]], device='cuda:0') +2024-06-29 19:19:36,744 - INFO - allennlp.training.callbacks.console_logger - batch_input/semclass_labels (Shape: 24 x 41) +tensor([[ 3, 23, 30, ..., 0, 0, 0], + [ 36, 0, 5, ..., 0, 0, 0], + [ 91, 28, 2, ..., 0, 0, 0], + ..., + [ 12, 8, 33, ..., 0, 0, 0], + [ 3, 16, 30, ..., 0, 0, 0], + [ 1, 10, 579, ..., 0, 0, 0]], device='cuda:0') +2024-06-29 19:19:36,746 - INFO - allennlp.training.callbacks.console_logger - Field : "batch_input/metadata" : (Length 24 of type "") +2024-06-29 19:19:46,640 - INFO - tqdm - NullAccuracy: 0.9895, NullF1: 0.3769, Lemma: 0.9290, PosFeats: 0.9248, UD-UAS: 0.7698, UD-LAS: 0.7705, EUD-UAS: 0.3969, EUD-LAS: 0.3773, Misc: 0.9597, SS: 0.8138, SC: 0.8693, Avg: 0.7568, batch_loss: 3.2383, loss: 3.4567 ||: 47%|####7 | 135/287 [00:10<00:10, 14.32it/s] +2024-06-29 19:19:56,700 - INFO - tqdm - NullAccuracy: 0.9899, NullF1: 0.3635, Lemma: 0.9323, PosFeats: 0.9290, UD-UAS: 0.7774, UD-LAS: 0.7774, EUD-UAS: 0.4038, EUD-LAS: 0.3829, Misc: 0.9609, SS: 0.8188, SC: 0.8762, Avg: 0.7621, batch_loss: 2.9325, loss: 3.3026 ||: 98%|#########7| 281/287 [00:20<00:00, 13.32it/s] +2024-06-29 19:19:57,107 - INFO - tqdm - NullAccuracy: 0.9899, NullF1: 0.3634, Lemma: 0.9321, PosFeats: 0.9290, UD-UAS: 0.7776, UD-LAS: 0.7776, EUD-UAS: 0.4039, EUD-LAS: 0.3830, Misc: 0.9610, SS: 0.8186, SC: 0.8761, Avg: 0.7621, batch_loss: 4.3368, loss: 3.3027 ||: 100%|##########| 287/287 [00:20<00:00, 14.49it/s] +2024-06-29 19:19:57,107 - INFO - tqdm - NullAccuracy: 0.9899, NullF1: 0.3634, Lemma: 0.9321, PosFeats: 0.9290, UD-UAS: 0.7776, UD-LAS: 0.7776, EUD-UAS: 0.4039, EUD-LAS: 0.3830, Misc: 0.9610, SS: 0.8186, SC: 0.8761, Avg: 0.7621, batch_loss: 4.3368, loss: 3.3027 ||: 100%|##########| 287/287 [00:20<00:00, 14.01it/s] +2024-06-29 19:19:57,107 - INFO - allennlp.training.learning_rate_schedulers.slanted_triangular - Gradual unfreezing finished. Training all layers. +2024-06-29 19:19:57,110 - INFO - allennlp.training.callbacks.console_logger - Training | Validation +2024-06-29 19:19:57,110 - INFO - allennlp.training.callbacks.console_logger - Avg | 0.653 | 0.762 +2024-06-29 19:19:57,111 - INFO - allennlp.training.callbacks.console_logger - EUD-LAS | 0.262 | 0.383 +2024-06-29 19:19:57,111 - INFO - allennlp.training.callbacks.console_logger - EUD-UAS | 0.281 | 0.404 +2024-06-29 19:19:57,111 - INFO - allennlp.training.callbacks.console_logger - Lemma | 0.851 | 0.932 +2024-06-29 19:19:57,111 - INFO - allennlp.training.callbacks.console_logger - Misc | 0.930 | 0.961 +2024-06-29 19:19:57,111 - INFO - allennlp.training.callbacks.console_logger - NullAccuracy | 0.987 | 0.990 +2024-06-29 19:19:57,111 - INFO - allennlp.training.callbacks.console_logger - NullF1 | 0.247 | 0.363 +2024-06-29 19:19:57,111 - INFO - allennlp.training.callbacks.console_logger - PosFeats | 0.831 | 0.929 +2024-06-29 19:19:57,111 - INFO - allennlp.training.callbacks.console_logger - SC | 0.777 | 0.876 +2024-06-29 19:19:57,111 - INFO - allennlp.training.callbacks.console_logger - SS | 0.754 | 0.819 +2024-06-29 19:19:57,111 - INFO - allennlp.training.callbacks.console_logger - UD-LAS | 0.590 | 0.778 +2024-06-29 19:19:57,111 - INFO - allennlp.training.callbacks.console_logger - UD-UAS | 0.599 | 0.778 +2024-06-29 19:19:57,111 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB | 1099.266 | N/A +2024-06-29 19:19:57,111 - INFO - allennlp.training.callbacks.console_logger - loss | 6.267 | 3.303 +2024-06-29 19:19:57,111 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB | 4585.449 | N/A +2024-06-29 19:19:58,764 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:01:24.359503 +2024-06-29 19:19:58,765 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:12:24 +2024-06-29 19:19:58,765 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9 +2024-06-29 19:19:58,765 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 4.9G +2024-06-29 19:19:58,765 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 3.2G +2024-06-29 19:19:58,766 - INFO - allennlp.training.gradient_descent_trainer - Training +2024-06-29 19:19:58,766 - INFO - tqdm - 0%| | 0/1147 [00:00