diff --git "a/out.log" "b/out.log" --- "a/out.log" +++ "b/out.log" @@ -1,411 +1,411 @@ -2024-06-29 19:18:03,095 - INFO - allennlp.common.params - random_seed = 13370 -2024-06-29 19:18:03,095 - INFO - allennlp.common.params - numpy_seed = 1337 -2024-06-29 19:18:03,095 - INFO - allennlp.common.params - pytorch_seed = 133 -2024-06-29 19:18:03,096 - INFO - allennlp.common.checks - Pytorch version: 2.3.1+cu121 -2024-06-29 19:18:03,096 - INFO - allennlp.common.params - type = default -2024-06-29 19:18:03,097 - INFO - allennlp.common.params - dataset_reader.type = compreno_ud_dataset_reader -2024-06-29 19:18:03,097 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.type = pretrained_transformer_mismatched -2024-06-29 19:18:03,097 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.token_min_padding_length = 0 -2024-06-29 19:18:03,097 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.model_name = xlm-roberta-base -2024-06-29 19:18:03,097 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.namespace = tags -2024-06-29 19:18:03,097 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.max_length = None -2024-06-29 19:18:03,097 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.tokenizer_kwargs = None -2024-06-29 19:18:05,153 - INFO - allennlp.common.params - train_data_path = data/train.conllu -2024-06-29 19:18:05,153 - INFO - allennlp.common.params - datasets_for_vocab_creation = None -2024-06-29 19:18:05,153 - INFO - allennlp.common.params - validation_dataset_reader = None -2024-06-29 19:18:05,153 - INFO - allennlp.common.params - validation_data_path = data/validation.conllu -2024-06-29 19:18:05,153 - INFO - allennlp.common.params - test_data_path = None -2024-06-29 19:18:05,153 - INFO - allennlp.common.params - evaluate_on_test = False -2024-06-29 19:18:05,153 - INFO - allennlp.common.params - batch_weight_key = -2024-06-29 19:18:05,154 - INFO - allennlp.common.params - data_loader.type = multiprocess -2024-06-29 19:18:05,154 - INFO - allennlp.common.params - data_loader.batch_size = 24 -2024-06-29 19:18:05,154 - INFO - allennlp.common.params - data_loader.drop_last = False -2024-06-29 19:18:05,154 - INFO - allennlp.common.params - data_loader.shuffle = True -2024-06-29 19:18:05,154 - INFO - allennlp.common.params - data_loader.batch_sampler = None -2024-06-29 19:18:05,154 - INFO - allennlp.common.params - data_loader.batches_per_epoch = None -2024-06-29 19:18:05,154 - INFO - allennlp.common.params - data_loader.num_workers = 0 -2024-06-29 19:18:05,154 - INFO - allennlp.common.params - data_loader.max_instances_in_memory = None -2024-06-29 19:18:05,154 - INFO - allennlp.common.params - data_loader.start_method = fork -2024-06-29 19:18:05,154 - INFO - allennlp.common.params - data_loader.cuda_device = None -2024-06-29 19:18:05,154 - INFO - allennlp.common.params - data_loader.quiet = False -2024-06-29 19:18:05,154 - INFO - allennlp.common.params - data_loader.collate_fn = -2024-06-29 19:18:05,154 - INFO - tqdm - loading instances: 0it [00:00, ?it/s] -2024-06-29 19:18:15,215 - INFO - tqdm - loading instances: 25625it [00:10, 2590.96it/s] -2024-06-29 19:18:15,764 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess -2024-06-29 19:18:15,764 - INFO - allennlp.common.params - validation_data_loader.batch_size = 24 -2024-06-29 19:18:15,764 - INFO - allennlp.common.params - validation_data_loader.drop_last = False -2024-06-29 19:18:15,764 - INFO - allennlp.common.params - validation_data_loader.shuffle = False -2024-06-29 19:18:15,764 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None -2024-06-29 19:18:15,764 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None -2024-06-29 19:18:15,765 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0 -2024-06-29 19:18:15,765 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None -2024-06-29 19:18:15,765 - INFO - allennlp.common.params - validation_data_loader.start_method = fork -2024-06-29 19:18:15,765 - INFO - allennlp.common.params - validation_data_loader.cuda_device = None -2024-06-29 19:18:15,765 - INFO - allennlp.common.params - validation_data_loader.quiet = False -2024-06-29 19:18:15,765 - INFO - allennlp.common.params - validation_data_loader.collate_fn = -2024-06-29 19:18:15,765 - INFO - tqdm - loading instances: 0it [00:00, ?it/s] -2024-06-29 19:18:18,810 - INFO - allennlp.common.params - vocabulary.type = from_instances -2024-06-29 19:18:18,811 - INFO - allennlp.common.params - vocabulary.max_vocab_size = None -2024-06-29 19:18:18,811 - INFO - allennlp.common.params - vocabulary.non_padded_namespaces = ('*tags', '*labels') -2024-06-29 19:18:18,811 - INFO - allennlp.common.params - vocabulary.pretrained_files = None -2024-06-29 19:18:18,811 - INFO - allennlp.common.params - vocabulary.only_include_pretrained_words = False -2024-06-29 19:18:18,811 - INFO - allennlp.common.params - vocabulary.min_pretrained_embeddings = None -2024-06-29 19:18:18,811 - INFO - allennlp.common.params - vocabulary.padding_token = @@PADDING@@ -2024-06-29 19:18:18,811 - INFO - allennlp.common.params - vocabulary.oov_token = @@UNKNOWN@@ -2024-06-29 19:18:18,811 - INFO - allennlp.data.vocabulary - Fitting token dictionary from dataset. -2024-06-29 19:18:18,811 - INFO - tqdm - building vocab: 0it [00:00, ?it/s] -2024-06-29 19:18:19,437 - INFO - allennlp.common.params - model.type = morpho_syntax_semantic_parser -2024-06-29 19:18:19,438 - INFO - allennlp.common.params - model.indexer.type = pretrained_transformer_mismatched -2024-06-29 19:18:19,438 - INFO - allennlp.common.params - model.indexer.token_min_padding_length = 0 -2024-06-29 19:18:19,438 - INFO - allennlp.common.params - model.indexer.model_name = xlm-roberta-base -2024-06-29 19:18:19,438 - INFO - allennlp.common.params - model.indexer.namespace = tags -2024-06-29 19:18:19,438 - INFO - allennlp.common.params - model.indexer.max_length = None -2024-06-29 19:18:19,438 - INFO - allennlp.common.params - model.indexer.tokenizer_kwargs = None -2024-06-29 19:18:19,439 - INFO - allennlp.common.params - model.embedder.type = pretrained_transformer_mismatched -2024-06-29 19:18:19,439 - INFO - allennlp.common.params - model.embedder.model_name = xlm-roberta-base -2024-06-29 19:18:19,439 - INFO - allennlp.common.params - model.embedder.max_length = None -2024-06-29 19:18:19,439 - INFO - allennlp.common.params - model.embedder.sub_module = None -2024-06-29 19:18:19,439 - INFO - allennlp.common.params - model.embedder.train_parameters = True -2024-06-29 19:18:19,439 - INFO - allennlp.common.params - model.embedder.last_layer_only = True -2024-06-29 19:18:19,439 - INFO - allennlp.common.params - model.embedder.override_weights_file = None -2024-06-29 19:18:19,439 - INFO - allennlp.common.params - model.embedder.override_weights_strip_prefix = None -2024-06-29 19:18:19,439 - INFO - allennlp.common.params - model.embedder.load_weights = True -2024-06-29 19:18:19,439 - INFO - allennlp.common.params - model.embedder.gradient_checkpointing = None -2024-06-29 19:18:19,439 - INFO - allennlp.common.params - model.embedder.tokenizer_kwargs = None -2024-06-29 19:18:19,439 - INFO - allennlp.common.params - model.embedder.transformer_kwargs = None -2024-06-29 19:18:19,439 - INFO - allennlp.common.params - model.embedder.sub_token_mode = avg -2024-06-29 19:18:20,189 - INFO - allennlp.common.params - model.lemma_rule_classifier.hid_dim = 512 -2024-06-29 19:18:20,189 - INFO - allennlp.common.params - model.lemma_rule_classifier.activation = relu -2024-06-29 19:18:20,189 - INFO - allennlp.common.params - model.lemma_rule_classifier.dropout = 0.1 -2024-06-29 19:18:20,190 - INFO - allennlp.common.params - model.lemma_rule_classifier.dictionaries = [] -2024-06-29 19:18:20,190 - INFO - allennlp.common.params - model.lemma_rule_classifier.topk = None -2024-06-29 19:18:20,192 - INFO - allennlp.common.params - model.pos_feats_classifier.hid_dim = 256 -2024-06-29 19:18:20,192 - INFO - allennlp.common.params - model.pos_feats_classifier.activation = relu -2024-06-29 19:18:20,192 - INFO - allennlp.common.params - model.pos_feats_classifier.dropout = 0.1 -2024-06-29 19:18:20,194 - INFO - allennlp.common.params - model.depencency_classifier.hid_dim = 128 -2024-06-29 19:18:20,194 - INFO - allennlp.common.params - model.depencency_classifier.activation = relu -2024-06-29 19:18:20,194 - INFO - allennlp.common.params - model.depencency_classifier.dropout = 0.1 -2024-06-29 19:18:20,216 - INFO - allennlp.common.params - model.misc_classifier.hid_dim = 128 -2024-06-29 19:18:20,216 - INFO - allennlp.common.params - model.misc_classifier.activation = relu -2024-06-29 19:18:20,216 - INFO - allennlp.common.params - model.misc_classifier.dropout = 0.1 -2024-06-29 19:18:20,217 - INFO - allennlp.common.params - model.semslot_classifier.hid_dim = 1024 -2024-06-29 19:18:20,217 - INFO - allennlp.common.params - model.semslot_classifier.activation = relu -2024-06-29 19:18:20,217 - INFO - allennlp.common.params - model.semslot_classifier.dropout = 0.1 -2024-06-29 19:18:20,220 - INFO - allennlp.common.params - model.semclass_classifier.hid_dim = 1024 -2024-06-29 19:18:20,220 - INFO - allennlp.common.params - model.semclass_classifier.activation = relu -2024-06-29 19:18:20,220 - INFO - allennlp.common.params - model.semclass_classifier.dropout = 0.1 -2024-06-29 19:18:20,227 - INFO - allennlp.common.params - model.null_classifier.hid_dim = 512 -2024-06-29 19:18:20,227 - INFO - allennlp.common.params - model.null_classifier.activation = relu -2024-06-29 19:18:20,227 - INFO - allennlp.common.params - model.null_classifier.dropout = 0.1 -2024-06-29 19:18:20,227 - INFO - allennlp.common.params - model.null_classifier.positive_class_weight = 1.0 -2024-06-29 19:18:34,182 - INFO - allennlp.common.params - trainer.type = gradient_descent -2024-06-29 19:18:34,182 - INFO - allennlp.common.params - trainer.cuda_device = 0 -2024-06-29 19:18:34,182 - INFO - allennlp.common.params - trainer.distributed = False -2024-06-29 19:18:34,182 - INFO - allennlp.common.params - trainer.world_size = 1 -2024-06-29 19:18:34,182 - INFO - allennlp.common.params - trainer.patience = None -2024-06-29 19:18:34,182 - INFO - allennlp.common.params - trainer.validation_metric = +Avg -2024-06-29 19:18:34,183 - INFO - allennlp.common.params - trainer.num_epochs = 10 -2024-06-29 19:18:34,183 - INFO - allennlp.common.params - trainer.grad_norm = False -2024-06-29 19:18:34,183 - INFO - allennlp.common.params - trainer.grad_clipping = 5 -2024-06-29 19:18:34,183 - INFO - allennlp.common.params - trainer.num_gradient_accumulation_steps = 1 -2024-06-29 19:18:34,183 - INFO - allennlp.common.params - trainer.use_amp = False -2024-06-29 19:18:34,183 - INFO - allennlp.common.params - trainer.no_grad = None -2024-06-29 19:18:34,183 - INFO - allennlp.common.params - trainer.momentum_scheduler = None -2024-06-29 19:18:34,183 - INFO - allennlp.common.params - trainer.moving_average = None -2024-06-29 19:18:34,183 - INFO - allennlp.common.params - trainer.checkpointer = -2024-06-29 19:18:34,183 - INFO - allennlp.common.params - trainer.enable_default_callbacks = True -2024-06-29 19:18:34,183 - INFO - allennlp.common.params - trainer.run_confidence_checks = True -2024-06-29 19:18:34,183 - INFO - allennlp.common.params - trainer.grad_scaling = True -2024-06-29 19:18:34,391 - INFO - allennlp.common.params - trainer.optimizer.type = adam -2024-06-29 19:18:34,391 - INFO - allennlp.common.params - trainer.optimizer.lr = 0.01 -2024-06-29 19:18:34,392 - INFO - allennlp.common.params - trainer.optimizer.betas = (0.9, 0.999) -2024-06-29 19:18:34,392 - INFO - allennlp.common.params - trainer.optimizer.eps = 1e-08 -2024-06-29 19:18:34,392 - INFO - allennlp.common.params - trainer.optimizer.weight_decay = 0.0 -2024-06-29 19:18:34,392 - INFO - allennlp.common.params - trainer.optimizer.amsgrad = False -2024-06-29 19:18:34,393 - INFO - allennlp.training.optimizers - Done constructing parameter groups. -2024-06-29 19:18:34,393 - INFO - allennlp.training.optimizers - Group 0: ['embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.embeddings.token_type_embeddings.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.embeddings.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.5.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.3.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.embeddings.word_embeddings.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.3.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.0.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.0.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.4.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.4.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.6.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.11.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.pooler.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.0.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.embeddings.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.10.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.2.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.3.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.5.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.10.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.9.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.2.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.7.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.5.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.7.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.8.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.9.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.0.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.embeddings.position_embeddings.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.3.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.pooler.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.5.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.6.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.8.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.output.dense.weight'], {} -2024-06-29 19:18:34,393 - INFO - allennlp.training.optimizers - Group 1: ['lemma_rule_classifier.classifier.1.bias', 'null_classifier.classifier.1.weight', 'semclass_classifier.classifier.4.bias', 'lemma_rule_classifier.classifier.1.weight', 'dependency_classifier.arc_attention_ud._bias', 'dependency_classifier.arc_head_mlp.1.weight', 'dependency_classifier.rel_dep_mlp.1.bias', 'pos_feats_classifier.classifier.4.bias', 'lemma_rule_classifier.classifier.4.weight', 'dependency_classifier.arc_attention_eud._bias', 'null_classifier.classifier.1.bias', 'null_classifier.classifier.4.bias', 'dependency_classifier.rel_attention_ud._weight_matrix', 'dependency_classifier.arc_head_mlp.1.bias', 'dependency_classifier.rel_dep_mlp.1.weight', 'dependency_classifier.arc_attention_ud._weight_matrix', 'null_classifier.classifier.4.weight', 'semslot_classifier.classifier.4.weight', 'pos_feats_classifier.classifier.1.weight', 'dependency_classifier.rel_head_mlp.1.weight', 'semclass_classifier.classifier.4.weight', 'semclass_classifier.classifier.1.weight', 'dependency_classifier.arc_dep_mlp.1.bias', 'lemma_rule_classifier.classifier.4.bias', 'pos_feats_classifier.classifier.1.bias', 'misc_classifier.classifier.1.weight', 'semslot_classifier.classifier.1.bias', 'semclass_classifier.classifier.1.bias', 'misc_classifier.classifier.4.bias', 'dependency_classifier.rel_attention_eud._weight_matrix', 'pos_feats_classifier.classifier.4.weight', 'semslot_classifier.classifier.4.bias', 'semslot_classifier.classifier.1.weight', 'dependency_classifier.arc_attention_eud._weight_matrix', 'misc_classifier.classifier.4.weight', 'misc_classifier.classifier.1.bias', 'dependency_classifier.arc_dep_mlp.1.weight', 'dependency_classifier.rel_head_mlp.1.bias', 'dependency_classifier.rel_attention_ud._bias', 'dependency_classifier.rel_attention_eud._bias'], {} -2024-06-29 19:18:34,393 - INFO - allennlp.training.optimizers - Group 2: [], {} -2024-06-29 19:18:34,393 - INFO - allennlp.training.optimizers - Number of trainable parameters: 287815418 -2024-06-29 19:18:34,394 - INFO - allennlp.common.util - The following parameters are Frozen (without gradient): -2024-06-29 19:18:34,394 - INFO - allennlp.common.util - The following parameters are Tunable (with gradient): -2024-06-29 19:18:34,394 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.embeddings.word_embeddings.weight -2024-06-29 19:18:34,394 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.embeddings.position_embeddings.weight -2024-06-29 19:18:34,394 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.embeddings.token_type_embeddings.weight -2024-06-29 19:18:34,394 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.embeddings.LayerNorm.weight -2024-06-29 19:18:34,394 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.embeddings.LayerNorm.bias -2024-06-29 19:18:34,394 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.query.weight -2024-06-29 19:18:34,394 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.query.bias -2024-06-29 19:18:34,394 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.key.weight -2024-06-29 19:18:34,394 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.key.bias -2024-06-29 19:18:34,394 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.value.weight -2024-06-29 19:18:34,394 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.value.bias -2024-06-29 19:18:34,394 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.dense.weight -2024-06-29 19:18:34,394 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.dense.bias -2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.LayerNorm.weight -2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.LayerNorm.bias -2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.intermediate.dense.weight -2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.intermediate.dense.bias -2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.output.dense.weight -2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.output.dense.bias -2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.output.LayerNorm.weight -2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.output.LayerNorm.bias -2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.query.weight -2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.query.bias -2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.key.weight -2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.key.bias -2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.value.weight -2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.value.bias -2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.dense.weight -2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.dense.bias -2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.LayerNorm.weight -2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.LayerNorm.bias -2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.intermediate.dense.weight -2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.intermediate.dense.bias -2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.output.dense.weight -2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.output.dense.bias -2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.output.LayerNorm.weight -2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.output.LayerNorm.bias -2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.query.weight -2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.query.bias -2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.key.weight -2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.key.bias -2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.value.weight -2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.value.bias -2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.dense.weight -2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.dense.bias -2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.LayerNorm.weight -2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.LayerNorm.bias -2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.intermediate.dense.weight -2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.intermediate.dense.bias -2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.output.dense.weight -2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.output.dense.bias -2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.output.LayerNorm.weight -2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.output.LayerNorm.bias -2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.query.weight -2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.query.bias -2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.key.weight -2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.key.bias -2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.value.weight -2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.value.bias -2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.dense.weight -2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.dense.bias -2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.LayerNorm.weight -2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.LayerNorm.bias -2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.intermediate.dense.weight -2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.intermediate.dense.bias -2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.output.dense.weight -2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.output.dense.bias -2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.output.LayerNorm.weight -2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.output.LayerNorm.bias -2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.query.weight -2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.query.bias -2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.key.weight -2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.key.bias -2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.value.weight -2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.value.bias -2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.dense.weight -2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.dense.bias -2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.LayerNorm.weight -2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.LayerNorm.bias -2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.intermediate.dense.weight -2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.intermediate.dense.bias -2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.output.dense.weight -2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.output.dense.bias -2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.output.LayerNorm.weight -2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.output.LayerNorm.bias -2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.query.weight -2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.query.bias -2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.key.weight -2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.key.bias -2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.value.weight -2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.value.bias -2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.dense.weight -2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.dense.bias -2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.LayerNorm.weight -2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.LayerNorm.bias -2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.intermediate.dense.weight -2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.intermediate.dense.bias -2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.output.dense.weight -2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.output.dense.bias -2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.output.LayerNorm.weight -2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.output.LayerNorm.bias -2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.query.weight -2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.query.bias -2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.key.weight -2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.key.bias -2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.value.weight -2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.value.bias -2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.dense.weight -2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.dense.bias -2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.LayerNorm.weight -2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.LayerNorm.bias -2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.intermediate.dense.weight -2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.intermediate.dense.bias -2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.output.dense.weight -2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.output.dense.bias -2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.output.LayerNorm.weight -2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.output.LayerNorm.bias -2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.query.weight -2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.query.bias -2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.key.weight -2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.key.bias -2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.value.weight -2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.value.bias -2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.dense.weight -2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.dense.bias -2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.LayerNorm.weight -2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.LayerNorm.bias -2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.intermediate.dense.weight -2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.intermediate.dense.bias -2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.output.dense.weight -2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.output.dense.bias -2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.output.LayerNorm.weight -2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.output.LayerNorm.bias -2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.query.weight -2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.query.bias -2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.key.weight -2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.key.bias -2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.value.weight -2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.value.bias -2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.dense.weight -2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.dense.bias -2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.LayerNorm.weight -2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.LayerNorm.bias -2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.intermediate.dense.weight -2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.intermediate.dense.bias -2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.output.dense.weight -2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.output.dense.bias -2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.output.LayerNorm.weight -2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.output.LayerNorm.bias -2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.query.weight -2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.query.bias -2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.key.weight -2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.key.bias -2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.value.weight -2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.value.bias -2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.dense.weight -2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.dense.bias -2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.LayerNorm.weight -2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.LayerNorm.bias -2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.intermediate.dense.weight -2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.intermediate.dense.bias -2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.output.dense.weight -2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.output.dense.bias -2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.output.LayerNorm.weight -2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.output.LayerNorm.bias -2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.query.weight -2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.query.bias -2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.key.weight -2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.key.bias -2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.value.weight -2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.value.bias -2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.dense.weight -2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.dense.bias -2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.LayerNorm.weight -2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.LayerNorm.bias -2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.intermediate.dense.weight -2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.intermediate.dense.bias -2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.output.dense.weight -2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.output.dense.bias -2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.output.LayerNorm.weight -2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.output.LayerNorm.bias -2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.query.weight -2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.query.bias -2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.key.weight -2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.key.bias -2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.value.weight -2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.value.bias -2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.dense.weight -2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.dense.bias -2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.LayerNorm.weight -2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.LayerNorm.bias -2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.intermediate.dense.weight -2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.intermediate.dense.bias -2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.output.dense.weight -2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.output.dense.bias -2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.output.LayerNorm.weight -2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.output.LayerNorm.bias -2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.pooler.dense.weight -2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.pooler.dense.bias -2024-06-29 19:18:34,399 - INFO - allennlp.common.util - lemma_rule_classifier.classifier.1.weight -2024-06-29 19:18:34,400 - INFO - allennlp.common.util - lemma_rule_classifier.classifier.1.bias -2024-06-29 19:18:34,400 - INFO - allennlp.common.util - lemma_rule_classifier.classifier.4.weight -2024-06-29 19:18:34,400 - INFO - allennlp.common.util - lemma_rule_classifier.classifier.4.bias -2024-06-29 19:18:34,400 - INFO - allennlp.common.util - pos_feats_classifier.classifier.1.weight -2024-06-29 19:18:34,400 - INFO - allennlp.common.util - pos_feats_classifier.classifier.1.bias -2024-06-29 19:18:34,400 - INFO - allennlp.common.util - pos_feats_classifier.classifier.4.weight -2024-06-29 19:18:34,400 - INFO - allennlp.common.util - pos_feats_classifier.classifier.4.bias -2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.arc_dep_mlp.1.weight -2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.arc_dep_mlp.1.bias -2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.arc_head_mlp.1.weight -2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.arc_head_mlp.1.bias -2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.rel_dep_mlp.1.weight -2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.rel_dep_mlp.1.bias -2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.rel_head_mlp.1.weight -2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.rel_head_mlp.1.bias -2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.arc_attention_ud._weight_matrix -2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.arc_attention_ud._bias -2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.rel_attention_ud._weight_matrix -2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.rel_attention_ud._bias -2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.arc_attention_eud._weight_matrix -2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.arc_attention_eud._bias -2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.rel_attention_eud._weight_matrix -2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.rel_attention_eud._bias -2024-06-29 19:18:34,400 - INFO - allennlp.common.util - misc_classifier.classifier.1.weight -2024-06-29 19:18:34,400 - INFO - allennlp.common.util - misc_classifier.classifier.1.bias -2024-06-29 19:18:34,400 - INFO - allennlp.common.util - misc_classifier.classifier.4.weight -2024-06-29 19:18:34,400 - INFO - allennlp.common.util - misc_classifier.classifier.4.bias -2024-06-29 19:18:34,400 - INFO - allennlp.common.util - semslot_classifier.classifier.1.weight -2024-06-29 19:18:34,400 - INFO - allennlp.common.util - semslot_classifier.classifier.1.bias -2024-06-29 19:18:34,400 - INFO - allennlp.common.util - semslot_classifier.classifier.4.weight -2024-06-29 19:18:34,400 - INFO - allennlp.common.util - semslot_classifier.classifier.4.bias -2024-06-29 19:18:34,400 - INFO - allennlp.common.util - semclass_classifier.classifier.1.weight -2024-06-29 19:18:34,400 - INFO - allennlp.common.util - semclass_classifier.classifier.1.bias -2024-06-29 19:18:34,400 - INFO - allennlp.common.util - semclass_classifier.classifier.4.weight -2024-06-29 19:18:34,400 - INFO - allennlp.common.util - semclass_classifier.classifier.4.bias -2024-06-29 19:18:34,400 - INFO - allennlp.common.util - null_classifier.classifier.1.weight -2024-06-29 19:18:34,400 - INFO - allennlp.common.util - null_classifier.classifier.1.bias -2024-06-29 19:18:34,401 - INFO - allennlp.common.util - null_classifier.classifier.4.weight -2024-06-29 19:18:34,401 - INFO - allennlp.common.util - null_classifier.classifier.4.bias -2024-06-29 19:18:34,401 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.type = slanted_triangular -2024-06-29 19:18:34,401 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.cut_frac = 0 -2024-06-29 19:18:34,401 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.ratio = 32 -2024-06-29 19:18:34,401 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.last_epoch = -1 -2024-06-29 19:18:34,401 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.gradual_unfreezing = True -2024-06-29 19:18:34,401 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.discriminative_fine_tuning = True -2024-06-29 19:18:34,401 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.decay_factor = 0.001 -2024-06-29 19:18:34,401 - INFO - allennlp.training.learning_rate_schedulers.slanted_triangular - Gradual unfreezing. Training only the top 1 layers. -2024-06-29 19:18:34,401 - INFO - allennlp.common.params - type = default -2024-06-29 19:18:34,401 - INFO - allennlp.common.params - save_completed_epochs = True -2024-06-29 19:18:34,401 - INFO - allennlp.common.params - save_every_num_seconds = None -2024-06-29 19:18:34,401 - INFO - allennlp.common.params - save_every_num_batches = None -2024-06-29 19:18:34,401 - INFO - allennlp.common.params - keep_most_recent_by_count = 2 -2024-06-29 19:18:34,401 - INFO - allennlp.common.params - keep_most_recent_by_age = None -2024-06-29 19:18:34,401 - INFO - allennlp.common.params - trainer.callbacks.0.type = tensorboard -2024-06-29 19:18:34,402 - INFO - allennlp.common.params - trainer.callbacks.0.summary_interval = 100 -2024-06-29 19:18:34,402 - INFO - allennlp.common.params - trainer.callbacks.0.distribution_interval = None -2024-06-29 19:18:34,402 - INFO - allennlp.common.params - trainer.callbacks.0.batch_size_interval = None -2024-06-29 19:18:34,402 - INFO - allennlp.common.params - trainer.callbacks.0.should_log_parameter_statistics = False -2024-06-29 19:18:34,402 - INFO - allennlp.common.params - trainer.callbacks.0.should_log_learning_rate = True -2024-06-29 19:18:34,403 - WARNING - allennlp.training.gradient_descent_trainer - You provided a validation dataset but patience was set to None, meaning that early stopping is disabled -2024-06-29 19:18:34,405 - INFO - allennlp.training.gradient_descent_trainer - Beginning training. -2024-06-29 19:18:34,405 - INFO - allennlp.training.gradient_descent_trainer - Epoch 0/9 -2024-06-29 19:18:34,405 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 4.5G -2024-06-29 19:18:34,405 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G -2024-06-29 19:18:34,406 - INFO - allennlp.training.gradient_descent_trainer - Training -2024-06-29 19:18:34,406 - INFO - tqdm - 0%| | 0/1147 [00:00 +2024-07-01 01:31:47,057 - INFO - tqdm - loading instances: 0it [00:00, ?it/s] +2024-07-01 01:31:57,129 - INFO - tqdm - loading instances: 25551it [00:10, 2339.78it/s] +2024-07-01 01:31:57,696 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess +2024-07-01 01:31:57,696 - INFO - allennlp.common.params - validation_data_loader.batch_size = 24 +2024-07-01 01:31:57,696 - INFO - allennlp.common.params - validation_data_loader.drop_last = False +2024-07-01 01:31:57,696 - INFO - allennlp.common.params - validation_data_loader.shuffle = False +2024-07-01 01:31:57,696 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None +2024-07-01 01:31:57,696 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None +2024-07-01 01:31:57,696 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0 +2024-07-01 01:31:57,696 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None +2024-07-01 01:31:57,696 - INFO - allennlp.common.params - validation_data_loader.start_method = fork +2024-07-01 01:31:57,696 - INFO - allennlp.common.params - validation_data_loader.cuda_device = None +2024-07-01 01:31:57,697 - INFO - allennlp.common.params - validation_data_loader.quiet = False +2024-07-01 01:31:57,697 - INFO - allennlp.common.params - validation_data_loader.collate_fn = +2024-07-01 01:31:57,697 - INFO - tqdm - loading instances: 0it [00:00, ?it/s] +2024-07-01 01:32:00,794 - INFO - allennlp.common.params - vocabulary.type = from_instances +2024-07-01 01:32:00,794 - INFO - allennlp.common.params - vocabulary.max_vocab_size = None +2024-07-01 01:32:00,794 - INFO - allennlp.common.params - vocabulary.non_padded_namespaces = ('*tags', '*labels') +2024-07-01 01:32:00,794 - INFO - allennlp.common.params - vocabulary.pretrained_files = None +2024-07-01 01:32:00,794 - INFO - allennlp.common.params - vocabulary.only_include_pretrained_words = False +2024-07-01 01:32:00,794 - INFO - allennlp.common.params - vocabulary.min_pretrained_embeddings = None +2024-07-01 01:32:00,794 - INFO - allennlp.common.params - vocabulary.padding_token = @@PADDING@@ +2024-07-01 01:32:00,794 - INFO - allennlp.common.params - vocabulary.oov_token = @@UNKNOWN@@ +2024-07-01 01:32:00,794 - INFO - allennlp.data.vocabulary - Fitting token dictionary from dataset. +2024-07-01 01:32:00,794 - INFO - tqdm - building vocab: 0it [00:00, ?it/s] +2024-07-01 01:32:01,418 - INFO - allennlp.common.params - model.type = morpho_syntax_semantic_parser +2024-07-01 01:32:01,418 - INFO - allennlp.common.params - model.indexer.type = pretrained_transformer_mismatched +2024-07-01 01:32:01,418 - INFO - allennlp.common.params - model.indexer.token_min_padding_length = 0 +2024-07-01 01:32:01,418 - INFO - allennlp.common.params - model.indexer.model_name = xlm-roberta-base +2024-07-01 01:32:01,418 - INFO - allennlp.common.params - model.indexer.namespace = tags +2024-07-01 01:32:01,418 - INFO - allennlp.common.params - model.indexer.max_length = None +2024-07-01 01:32:01,418 - INFO - allennlp.common.params - model.indexer.tokenizer_kwargs = None +2024-07-01 01:32:01,419 - INFO - allennlp.common.params - model.embedder.type = pretrained_transformer_mismatched +2024-07-01 01:32:01,419 - INFO - allennlp.common.params - model.embedder.model_name = xlm-roberta-base +2024-07-01 01:32:01,419 - INFO - allennlp.common.params - model.embedder.max_length = None +2024-07-01 01:32:01,419 - INFO - allennlp.common.params - model.embedder.sub_module = None +2024-07-01 01:32:01,419 - INFO - allennlp.common.params - model.embedder.train_parameters = True +2024-07-01 01:32:01,419 - INFO - allennlp.common.params - model.embedder.last_layer_only = True +2024-07-01 01:32:01,419 - INFO - allennlp.common.params - model.embedder.override_weights_file = None +2024-07-01 01:32:01,419 - INFO - allennlp.common.params - model.embedder.override_weights_strip_prefix = None +2024-07-01 01:32:01,419 - INFO - allennlp.common.params - model.embedder.load_weights = True +2024-07-01 01:32:01,419 - INFO - allennlp.common.params - model.embedder.gradient_checkpointing = None +2024-07-01 01:32:01,419 - INFO - allennlp.common.params - model.embedder.tokenizer_kwargs = None +2024-07-01 01:32:01,419 - INFO - allennlp.common.params - model.embedder.transformer_kwargs = None +2024-07-01 01:32:01,419 - INFO - allennlp.common.params - model.embedder.sub_token_mode = avg +2024-07-01 01:32:01,947 - INFO - allennlp.common.params - model.lemma_rule_classifier.hid_dim = 512 +2024-07-01 01:32:01,948 - INFO - allennlp.common.params - model.lemma_rule_classifier.activation = relu +2024-07-01 01:32:01,948 - INFO - allennlp.common.params - model.lemma_rule_classifier.dropout = 0.1 +2024-07-01 01:32:01,948 - INFO - allennlp.common.params - model.lemma_rule_classifier.dictionaries = [] +2024-07-01 01:32:01,948 - INFO - allennlp.common.params - model.lemma_rule_classifier.topk = None +2024-07-01 01:32:01,950 - INFO - allennlp.common.params - model.pos_feats_classifier.hid_dim = 256 +2024-07-01 01:32:01,950 - INFO - allennlp.common.params - model.pos_feats_classifier.activation = relu +2024-07-01 01:32:01,950 - INFO - allennlp.common.params - model.pos_feats_classifier.dropout = 0.1 +2024-07-01 01:32:01,952 - INFO - allennlp.common.params - model.depencency_classifier.hid_dim = 128 +2024-07-01 01:32:01,952 - INFO - allennlp.common.params - model.depencency_classifier.activation = relu +2024-07-01 01:32:01,952 - INFO - allennlp.common.params - model.depencency_classifier.dropout = 0.1 +2024-07-01 01:32:01,974 - INFO - allennlp.common.params - model.misc_classifier.hid_dim = 128 +2024-07-01 01:32:01,974 - INFO - allennlp.common.params - model.misc_classifier.activation = relu +2024-07-01 01:32:01,974 - INFO - allennlp.common.params - model.misc_classifier.dropout = 0.1 +2024-07-01 01:32:01,975 - INFO - allennlp.common.params - model.semslot_classifier.hid_dim = 1024 +2024-07-01 01:32:01,975 - INFO - allennlp.common.params - model.semslot_classifier.activation = relu +2024-07-01 01:32:01,975 - INFO - allennlp.common.params - model.semslot_classifier.dropout = 0.1 +2024-07-01 01:32:01,979 - INFO - allennlp.common.params - model.semclass_classifier.hid_dim = 1024 +2024-07-01 01:32:01,979 - INFO - allennlp.common.params - model.semclass_classifier.activation = relu +2024-07-01 01:32:01,979 - INFO - allennlp.common.params - model.semclass_classifier.dropout = 0.1 +2024-07-01 01:32:01,983 - INFO - allennlp.common.params - model.null_classifier.hid_dim = 512 +2024-07-01 01:32:01,983 - INFO - allennlp.common.params - model.null_classifier.activation = relu +2024-07-01 01:32:01,983 - INFO - allennlp.common.params - model.null_classifier.dropout = 0.1 +2024-07-01 01:32:01,983 - INFO - allennlp.common.params - model.null_classifier.positive_class_weight = 1.0 +2024-07-01 01:32:16,071 - INFO - allennlp.common.params - trainer.type = gradient_descent +2024-07-01 01:32:16,071 - INFO - allennlp.common.params - trainer.cuda_device = 0 +2024-07-01 01:32:16,071 - INFO - allennlp.common.params - trainer.distributed = False +2024-07-01 01:32:16,071 - INFO - allennlp.common.params - trainer.world_size = 1 +2024-07-01 01:32:16,071 - INFO - allennlp.common.params - trainer.patience = None +2024-07-01 01:32:16,071 - INFO - allennlp.common.params - trainer.validation_metric = +Avg +2024-07-01 01:32:16,072 - INFO - allennlp.common.params - trainer.num_epochs = 10 +2024-07-01 01:32:16,072 - INFO - allennlp.common.params - trainer.grad_norm = False +2024-07-01 01:32:16,072 - INFO - allennlp.common.params - trainer.grad_clipping = 5 +2024-07-01 01:32:16,072 - INFO - allennlp.common.params - trainer.num_gradient_accumulation_steps = 1 +2024-07-01 01:32:16,072 - INFO - allennlp.common.params - trainer.use_amp = False +2024-07-01 01:32:16,072 - INFO - allennlp.common.params - trainer.no_grad = None +2024-07-01 01:32:16,072 - INFO - allennlp.common.params - trainer.momentum_scheduler = None +2024-07-01 01:32:16,072 - INFO - allennlp.common.params - trainer.moving_average = None +2024-07-01 01:32:16,072 - INFO - allennlp.common.params - trainer.checkpointer = +2024-07-01 01:32:16,072 - INFO - allennlp.common.params - trainer.enable_default_callbacks = True +2024-07-01 01:32:16,072 - INFO - allennlp.common.params - trainer.run_confidence_checks = True +2024-07-01 01:32:16,072 - INFO - allennlp.common.params - trainer.grad_scaling = True +2024-07-01 01:32:16,285 - INFO - allennlp.common.params - trainer.optimizer.type = adam +2024-07-01 01:32:16,286 - INFO - allennlp.common.params - trainer.optimizer.lr = 0.01 +2024-07-01 01:32:16,286 - INFO - allennlp.common.params - trainer.optimizer.betas = (0.9, 0.999) +2024-07-01 01:32:16,286 - INFO - allennlp.common.params - trainer.optimizer.eps = 1e-08 +2024-07-01 01:32:16,286 - INFO - allennlp.common.params - trainer.optimizer.weight_decay = 0.0 +2024-07-01 01:32:16,286 - INFO - allennlp.common.params - trainer.optimizer.amsgrad = False +2024-07-01 01:32:16,287 - INFO - allennlp.training.optimizers - Done constructing parameter groups. +2024-07-01 01:32:16,287 - INFO - allennlp.training.optimizers - Group 0: ['embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.embeddings.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.9.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.2.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.embeddings.word_embeddings.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.10.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.5.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.4.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.9.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.2.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.embeddings.position_embeddings.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.3.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.2.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.embeddings.token_type_embeddings.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.11.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.6.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.4.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.10.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.11.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.4.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.5.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.7.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.output.dense.weight', 'embedder._matched_embedder.transformer_model.pooler.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.9.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.6.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.output.dense.bias', 'embedder._matched_embedder.transformer_model.embeddings.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.7.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.output.dense.weight', 'embedder._matched_embedder.transformer_model.pooler.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.5.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.5.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.11.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.8.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.9.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.output.LayerNorm.weight'], {} +2024-07-01 01:32:16,287 - INFO - allennlp.training.optimizers - Group 1: ['dependency_classifier.arc_head_mlp.1.bias', 'dependency_classifier.rel_attention_ud._bias', 'lemma_rule_classifier.classifier.1.weight', 'semslot_classifier.classifier.4.bias', 'semslot_classifier.classifier.1.weight', 'dependency_classifier.rel_attention_eud._weight_matrix', 'lemma_rule_classifier.classifier.1.bias', 'pos_feats_classifier.classifier.4.bias', 'dependency_classifier.arc_attention_eud._weight_matrix', 'null_classifier.classifier.4.bias', 'dependency_classifier.rel_dep_mlp.1.weight', 'dependency_classifier.rel_attention_ud._weight_matrix', 'dependency_classifier.rel_dep_mlp.1.bias', 'semclass_classifier.classifier.4.weight', 'misc_classifier.classifier.1.bias', 'semslot_classifier.classifier.1.bias', 'dependency_classifier.arc_attention_ud._bias', 'semslot_classifier.classifier.4.weight', 'semclass_classifier.classifier.4.bias', 'dependency_classifier.arc_attention_ud._weight_matrix', 'dependency_classifier.arc_attention_eud._bias', 'misc_classifier.classifier.4.weight', 'pos_feats_classifier.classifier.4.weight', 'pos_feats_classifier.classifier.1.bias', 'dependency_classifier.rel_head_mlp.1.bias', 'dependency_classifier.arc_dep_mlp.1.bias', 'null_classifier.classifier.4.weight', 'lemma_rule_classifier.classifier.4.weight', 'null_classifier.classifier.1.bias', 'pos_feats_classifier.classifier.1.weight', 'lemma_rule_classifier.classifier.4.bias', 'misc_classifier.classifier.1.weight', 'misc_classifier.classifier.4.bias', 'dependency_classifier.rel_head_mlp.1.weight', 'semclass_classifier.classifier.1.weight', 'semclass_classifier.classifier.1.bias', 'null_classifier.classifier.1.weight', 'dependency_classifier.arc_dep_mlp.1.weight', 'dependency_classifier.rel_attention_eud._bias', 'dependency_classifier.arc_head_mlp.1.weight'], {} +2024-07-01 01:32:16,287 - INFO - allennlp.training.optimizers - Group 2: [], {} +2024-07-01 01:32:16,287 - INFO - allennlp.training.optimizers - Number of trainable parameters: 287203493 +2024-07-01 01:32:16,288 - INFO - allennlp.common.util - The following parameters are Frozen (without gradient): +2024-07-01 01:32:16,288 - INFO - allennlp.common.util - The following parameters are Tunable (with gradient): +2024-07-01 01:32:16,288 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.embeddings.word_embeddings.weight +2024-07-01 01:32:16,288 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.embeddings.position_embeddings.weight +2024-07-01 01:32:16,288 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.embeddings.token_type_embeddings.weight +2024-07-01 01:32:16,288 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.embeddings.LayerNorm.weight +2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.embeddings.LayerNorm.bias +2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.query.weight +2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.query.bias +2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.key.weight +2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.key.bias +2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.value.weight +2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.value.bias +2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.dense.weight +2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.dense.bias +2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.LayerNorm.weight +2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.LayerNorm.bias +2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.intermediate.dense.weight +2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.intermediate.dense.bias +2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.output.dense.weight +2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.output.dense.bias +2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.output.LayerNorm.weight +2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.output.LayerNorm.bias +2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.query.weight +2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.query.bias +2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.key.weight +2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.key.bias +2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.value.weight +2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.value.bias +2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.dense.weight +2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.dense.bias +2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.LayerNorm.weight +2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.LayerNorm.bias +2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.intermediate.dense.weight +2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.intermediate.dense.bias +2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.output.dense.weight +2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.output.dense.bias +2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.output.LayerNorm.weight +2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.output.LayerNorm.bias +2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.query.weight +2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.query.bias +2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.key.weight +2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.key.bias +2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.value.weight +2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.value.bias +2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.dense.weight +2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.dense.bias +2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.LayerNorm.weight +2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.LayerNorm.bias +2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.intermediate.dense.weight +2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.intermediate.dense.bias +2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.output.dense.weight +2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.output.dense.bias +2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.output.LayerNorm.weight +2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.output.LayerNorm.bias +2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.query.weight +2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.query.bias +2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.key.weight +2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.key.bias +2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.value.weight +2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.value.bias +2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.dense.weight +2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.dense.bias +2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.LayerNorm.weight +2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.LayerNorm.bias +2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.intermediate.dense.weight +2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.intermediate.dense.bias +2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.output.dense.weight +2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.output.dense.bias +2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.output.LayerNorm.weight +2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.output.LayerNorm.bias +2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.query.weight +2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.query.bias +2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.key.weight +2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.key.bias +2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.value.weight +2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.value.bias +2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.dense.weight +2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.dense.bias +2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.LayerNorm.weight +2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.LayerNorm.bias +2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.intermediate.dense.weight +2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.intermediate.dense.bias +2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.output.dense.weight +2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.output.dense.bias +2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.output.LayerNorm.weight +2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.output.LayerNorm.bias +2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.query.weight +2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.query.bias +2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.key.weight +2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.key.bias +2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.value.weight +2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.value.bias +2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.dense.weight +2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.dense.bias +2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.LayerNorm.weight +2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.LayerNorm.bias +2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.intermediate.dense.weight +2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.intermediate.dense.bias +2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.output.dense.weight +2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.output.dense.bias +2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.output.LayerNorm.weight +2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.output.LayerNorm.bias +2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.query.weight +2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.query.bias +2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.key.weight +2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.key.bias +2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.value.weight +2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.value.bias +2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.dense.weight +2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.dense.bias +2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.LayerNorm.weight +2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.LayerNorm.bias +2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.intermediate.dense.weight +2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.intermediate.dense.bias +2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.output.dense.weight +2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.output.dense.bias +2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.output.LayerNorm.weight +2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.output.LayerNorm.bias +2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.query.weight +2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.query.bias +2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.key.weight +2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.key.bias +2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.value.weight +2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.value.bias +2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.dense.weight +2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.dense.bias +2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.LayerNorm.weight +2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.LayerNorm.bias +2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.intermediate.dense.weight +2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.intermediate.dense.bias +2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.output.dense.weight +2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.output.dense.bias +2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.output.LayerNorm.weight +2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.output.LayerNorm.bias +2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.query.weight +2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.query.bias +2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.key.weight +2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.key.bias +2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.value.weight +2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.value.bias +2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.dense.weight +2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.dense.bias +2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.LayerNorm.weight +2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.LayerNorm.bias +2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.intermediate.dense.weight +2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.intermediate.dense.bias +2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.output.dense.weight +2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.output.dense.bias +2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.output.LayerNorm.weight +2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.output.LayerNorm.bias +2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.query.weight +2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.query.bias +2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.key.weight +2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.key.bias +2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.value.weight +2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.value.bias +2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.dense.weight +2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.dense.bias +2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.LayerNorm.weight +2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.LayerNorm.bias +2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.intermediate.dense.weight +2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.intermediate.dense.bias +2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.output.dense.weight +2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.output.dense.bias +2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.output.LayerNorm.weight +2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.output.LayerNorm.bias +2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.query.weight +2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.query.bias +2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.key.weight +2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.key.bias +2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.value.weight +2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.value.bias +2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.dense.weight +2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.dense.bias +2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.LayerNorm.weight +2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.LayerNorm.bias +2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.intermediate.dense.weight +2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.intermediate.dense.bias +2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.output.dense.weight +2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.output.dense.bias +2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.output.LayerNorm.weight +2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.output.LayerNorm.bias +2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.query.weight +2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.query.bias +2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.key.weight +2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.key.bias +2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.value.weight +2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.value.bias +2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.dense.weight +2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.dense.bias +2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.LayerNorm.weight +2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.LayerNorm.bias +2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.intermediate.dense.weight +2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.intermediate.dense.bias +2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.output.dense.weight +2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.output.dense.bias +2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.output.LayerNorm.weight +2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.output.LayerNorm.bias +2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.pooler.dense.weight +2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.pooler.dense.bias +2024-07-01 01:32:16,293 - INFO - allennlp.common.util - lemma_rule_classifier.classifier.1.weight +2024-07-01 01:32:16,294 - INFO - allennlp.common.util - lemma_rule_classifier.classifier.1.bias +2024-07-01 01:32:16,294 - INFO - allennlp.common.util - lemma_rule_classifier.classifier.4.weight +2024-07-01 01:32:16,294 - INFO - allennlp.common.util - lemma_rule_classifier.classifier.4.bias +2024-07-01 01:32:16,294 - INFO - allennlp.common.util - pos_feats_classifier.classifier.1.weight +2024-07-01 01:32:16,294 - INFO - allennlp.common.util - pos_feats_classifier.classifier.1.bias +2024-07-01 01:32:16,294 - INFO - allennlp.common.util - pos_feats_classifier.classifier.4.weight +2024-07-01 01:32:16,294 - INFO - allennlp.common.util - pos_feats_classifier.classifier.4.bias +2024-07-01 01:32:16,294 - INFO - allennlp.common.util - dependency_classifier.arc_dep_mlp.1.weight +2024-07-01 01:32:16,294 - INFO - allennlp.common.util - dependency_classifier.arc_dep_mlp.1.bias +2024-07-01 01:32:16,294 - INFO - allennlp.common.util - dependency_classifier.arc_head_mlp.1.weight +2024-07-01 01:32:16,294 - INFO - allennlp.common.util - dependency_classifier.arc_head_mlp.1.bias +2024-07-01 01:32:16,294 - INFO - allennlp.common.util - dependency_classifier.rel_dep_mlp.1.weight +2024-07-01 01:32:16,294 - INFO - allennlp.common.util - dependency_classifier.rel_dep_mlp.1.bias +2024-07-01 01:32:16,294 - INFO - allennlp.common.util - dependency_classifier.rel_head_mlp.1.weight +2024-07-01 01:32:16,294 - INFO - allennlp.common.util - dependency_classifier.rel_head_mlp.1.bias +2024-07-01 01:32:16,294 - INFO - allennlp.common.util - dependency_classifier.arc_attention_ud._weight_matrix +2024-07-01 01:32:16,294 - INFO - allennlp.common.util - dependency_classifier.arc_attention_ud._bias +2024-07-01 01:32:16,294 - INFO - allennlp.common.util - dependency_classifier.rel_attention_ud._weight_matrix +2024-07-01 01:32:16,294 - INFO - allennlp.common.util - dependency_classifier.rel_attention_ud._bias +2024-07-01 01:32:16,294 - INFO - allennlp.common.util - dependency_classifier.arc_attention_eud._weight_matrix +2024-07-01 01:32:16,294 - INFO - allennlp.common.util - dependency_classifier.arc_attention_eud._bias +2024-07-01 01:32:16,294 - INFO - allennlp.common.util - dependency_classifier.rel_attention_eud._weight_matrix +2024-07-01 01:32:16,294 - INFO - allennlp.common.util - dependency_classifier.rel_attention_eud._bias +2024-07-01 01:32:16,294 - INFO - allennlp.common.util - misc_classifier.classifier.1.weight +2024-07-01 01:32:16,294 - INFO - allennlp.common.util - misc_classifier.classifier.1.bias +2024-07-01 01:32:16,294 - INFO - allennlp.common.util - misc_classifier.classifier.4.weight +2024-07-01 01:32:16,294 - INFO - allennlp.common.util - misc_classifier.classifier.4.bias +2024-07-01 01:32:16,294 - INFO - allennlp.common.util - semslot_classifier.classifier.1.weight +2024-07-01 01:32:16,294 - INFO - allennlp.common.util - semslot_classifier.classifier.1.bias +2024-07-01 01:32:16,294 - INFO - allennlp.common.util - semslot_classifier.classifier.4.weight +2024-07-01 01:32:16,294 - INFO - allennlp.common.util - semslot_classifier.classifier.4.bias +2024-07-01 01:32:16,294 - INFO - allennlp.common.util - semclass_classifier.classifier.1.weight +2024-07-01 01:32:16,294 - INFO - allennlp.common.util - semclass_classifier.classifier.1.bias +2024-07-01 01:32:16,294 - INFO - allennlp.common.util - semclass_classifier.classifier.4.weight +2024-07-01 01:32:16,294 - INFO - allennlp.common.util - semclass_classifier.classifier.4.bias +2024-07-01 01:32:16,294 - INFO - allennlp.common.util - null_classifier.classifier.1.weight +2024-07-01 01:32:16,294 - INFO - allennlp.common.util - null_classifier.classifier.1.bias +2024-07-01 01:32:16,294 - INFO - allennlp.common.util - null_classifier.classifier.4.weight +2024-07-01 01:32:16,295 - INFO - allennlp.common.util - null_classifier.classifier.4.bias +2024-07-01 01:32:16,295 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.type = slanted_triangular +2024-07-01 01:32:16,295 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.cut_frac = 0 +2024-07-01 01:32:16,295 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.ratio = 32 +2024-07-01 01:32:16,295 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.last_epoch = -1 +2024-07-01 01:32:16,295 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.gradual_unfreezing = True +2024-07-01 01:32:16,295 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.discriminative_fine_tuning = True +2024-07-01 01:32:16,295 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.decay_factor = 0.001 +2024-07-01 01:32:16,295 - INFO - allennlp.training.learning_rate_schedulers.slanted_triangular - Gradual unfreezing. Training only the top 1 layers. +2024-07-01 01:32:16,295 - INFO - allennlp.common.params - type = default +2024-07-01 01:32:16,295 - INFO - allennlp.common.params - save_completed_epochs = True +2024-07-01 01:32:16,295 - INFO - allennlp.common.params - save_every_num_seconds = None +2024-07-01 01:32:16,295 - INFO - allennlp.common.params - save_every_num_batches = None +2024-07-01 01:32:16,295 - INFO - allennlp.common.params - keep_most_recent_by_count = 2 +2024-07-01 01:32:16,295 - INFO - allennlp.common.params - keep_most_recent_by_age = None +2024-07-01 01:32:16,295 - INFO - allennlp.common.params - trainer.callbacks.0.type = tensorboard +2024-07-01 01:32:16,296 - INFO - allennlp.common.params - trainer.callbacks.0.summary_interval = 100 +2024-07-01 01:32:16,296 - INFO - allennlp.common.params - trainer.callbacks.0.distribution_interval = None +2024-07-01 01:32:16,296 - INFO - allennlp.common.params - trainer.callbacks.0.batch_size_interval = None +2024-07-01 01:32:16,296 - INFO - allennlp.common.params - trainer.callbacks.0.should_log_parameter_statistics = False +2024-07-01 01:32:16,296 - INFO - allennlp.common.params - trainer.callbacks.0.should_log_learning_rate = True +2024-07-01 01:32:16,297 - WARNING - allennlp.training.gradient_descent_trainer - You provided a validation dataset but patience was set to None, meaning that early stopping is disabled +2024-07-01 01:32:16,298 - INFO - allennlp.training.gradient_descent_trainer - Beginning training. +2024-07-01 01:32:16,298 - INFO - allennlp.training.gradient_descent_trainer - Epoch 0/9 +2024-07-01 01:32:16,299 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 4.5G +2024-07-01 01:32:16,299 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G +2024-07-01 01:32:16,300 - INFO - allennlp.training.gradient_descent_trainer - Training +2024-07-01 01:32:16,300 - INFO - tqdm - 0%| | 0/1147 [00:00") -2024-06-29 19:18:34,811 - INFO - allennlp.training.callbacks.console_logger - batch_input/lemma_rule_labels (Shape: 24 x 48) -tensor([[ 0, 8, 0, ..., 0, 0, 0], - [ 0, 13, 0, ..., 0, 0, 0], - [ 0, 0, 0, ..., 0, 0, 0], +2024-07-01 01:32:16,713 - INFO - allennlp.training.callbacks.console_logger - Field : "batch_input/sentences" : (Length 24 of type "") +2024-07-01 01:32:16,713 - INFO - allennlp.training.callbacks.console_logger - batch_input/lemma_rule_labels (Shape: 24 x 37) +tensor([[ 0, 0, 0, ..., 0, 0, 0], + [ 0, 27, 11, ..., 0, 0, 0], + [ 0, 0, 5, ..., 0, 0, 0], ..., - [ 0, 9, 0, ..., 0, 0, 0], - [ 0, 0, 33, ..., 0, 0, 0], - [ 0, 0, 0, ..., 0, 0, 0]], device='cuda:0') -2024-06-29 19:18:34,813 - INFO - allennlp.training.callbacks.console_logger - batch_input/pos_feats_labels (Shape: 24 x 48) -tensor([[143, 5, 16, ..., 0, 0, 0], - [ 48, 24, 2, ..., 0, 0, 0], - [ 7, 0, 1, ..., 0, 0, 0], + [ 5, 0, 0, ..., 0, 0, 0], + [ 0, 0, 4, ..., 0, 0, 0], + [ 0, 4, 0, ..., 0, 0, 0]], device='cuda:0') +2024-07-01 01:32:16,714 - INFO - allennlp.training.callbacks.console_logger - batch_input/pos_feats_labels (Shape: 24 x 37) +tensor([[ 8, 74, 91, ..., 0, 0, 0], + [ 1, 222, 144, ..., 0, 0, 0], + [ 2, 0, 152, ..., 0, 0, 0], ..., - [ 1, 31, 1, ..., 0, 0, 0], - [ 43, 1, 167, ..., 0, 0, 0], - [ 24, 14, 0, ..., 0, 0, 0]], device='cuda:0') -2024-06-29 19:18:34,814 - INFO - allennlp.training.callbacks.console_logger - batch_input/deprel_labels (Shape: 24 x 48 x 48) -tensor([[[-1, 3, -1, ..., -1, -1, -1], - [-1, 5, -1, ..., -1, -1, -1], - [-1, 8, -1, ..., -1, -1, -1], + [ 95, 38, 1, ..., 0, 0, 0], + [ 2, 22, 18, ..., 0, 0, 0], + [ 48, 131, 0, ..., 0, 0, 0]], device='cuda:0') +2024-07-01 01:32:16,715 - INFO - allennlp.training.callbacks.console_logger - batch_input/deprel_labels (Shape: 24 x 37 x 37) +tensor([[[-1, -1, -1, ..., -1, -1, -1], + [ 1, -1, -1, ..., -1, -1, -1], + [-1, 29, -1, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]], - [[-1, -1, -1, ..., -1, -1, -1], - [-1, -1, -1, ..., -1, -1, -1], - [-1, -1, -1, ..., -1, -1, -1], + [[-1, 2, -1, ..., -1, -1, -1], + [-1, -1, 4, ..., -1, -1, -1], + [-1, -1, 5, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]], [[-1, -1, -1, ..., -1, -1, -1], - [-1, -1, -1, ..., -1, -1, -1], + [ 0, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], @@ -523,7 +523,7 @@ tensor([[[-1, 3, -1, ..., -1, -1, -1], ..., - [[-1, 2, -1, ..., -1, -1, -1], + [[-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], ..., @@ -531,40 +531,40 @@ tensor([[[-1, 3, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]], - [[-1, -1, -1, ..., -1, -1, -1], - [-1, -1, -1, ..., -1, -1, -1], + [[-1, 11, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], + [-1, 1, -1, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]], - [[-1, -1, -1, ..., -1, -1, -1], - [-1, -1, -1, ..., -1, -1, -1], + [[-1, 2, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], + [-1, 0, -1, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]]], device='cuda:0') -2024-06-29 19:18:34,820 - INFO - allennlp.training.callbacks.console_logger - batch_input/deps_labels (Shape: 24 x 48 x 48) -tensor([[[-1, 3, -1, ..., -1, -1, -1], - [-1, 2, -1, ..., -1, -1, -1], - [-1, 7, -1, ..., -1, -1, -1], +2024-07-01 01:32:16,722 - INFO - allennlp.training.callbacks.console_logger - batch_input/deps_labels (Shape: 24 x 37 x 37) +tensor([[[-1, -1, -1, ..., -1, -1, -1], + [ 4, -1, -1, ..., -1, -1, -1], + [-1, 44, -1, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]], - [[-1, -1, -1, ..., -1, -1, -1], - [-1, -1, -1, ..., -1, -1, -1], - [-1, -1, -1, ..., -1, -1, -1], + [[-1, 1, -1, ..., -1, -1, -1], + [-1, -1, 22, ..., -1, -1, -1], + [-1, -1, 2, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]], [[-1, -1, -1, ..., -1, -1, -1], - [-1, -1, -1, ..., -1, -1, -1], + [ 0, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], @@ -573,7 +573,7 @@ tensor([[[-1, 3, -1, ..., -1, -1, -1], ..., - [[-1, 1, -1, ..., -1, -1, -1], + [[-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], ..., @@ -581,76 +581,76 @@ tensor([[[-1, 3, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]], - [[-1, -1, -1, ..., -1, -1, -1], - [-1, -1, -1, ..., -1, -1, -1], + [[-1, 11, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], + [-1, 4, -1, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]], - [[ 2, -1, -1, ..., -1, -1, -1], - [-1, -1, -1, ..., -1, -1, -1], + [[-1, 1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], + [-1, 0, -1, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]]], device='cuda:0') -2024-06-29 19:18:34,826 - INFO - allennlp.training.callbacks.console_logger - batch_input/misc_labels (Shape: 24 x 48) +2024-07-01 01:32:16,728 - INFO - allennlp.training.callbacks.console_logger - batch_input/misc_labels (Shape: 24 x 37) tensor([[0, 0, 0, ..., 0, 0, 0], - [0, 2, 0, ..., 0, 0, 0], + [0, 0, 0, ..., 0, 0, 0], [1, 0, 0, ..., 0, 0, 0], ..., + [0, 2, 0, ..., 0, 0, 0], [0, 0, 0, ..., 0, 0, 0], - [0, 0, 0, ..., 0, 0, 0], - [2, 1, 0, ..., 0, 0, 0]], device='cuda:0') -2024-06-29 19:18:34,827 - INFO - allennlp.training.callbacks.console_logger - batch_input/semslot_labels (Shape: 24 x 48) -tensor([[12, 1, 3, ..., 0, 0, 0], - [21, 1, 35, ..., 0, 0, 0], - [ 0, 0, 0, ..., 0, 0, 0], + [0, 0, 0, ..., 0, 0, 0]], device='cuda:0') +2024-07-01 01:32:16,729 - INFO - allennlp.training.callbacks.console_logger - batch_input/semslot_labels (Shape: 24 x 37) +tensor([[ 2, 3, 0, ..., 0, 0, 0], + [ 0, 25, 1, ..., 0, 0, 0], + [19, 0, 7, ..., 0, 0, 0], ..., - [ 0, 7, 0, ..., 0, 0, 0], - [65, 0, 13, ..., 0, 0, 0], - [ 1, 0, 0, ..., 0, 0, 0]], device='cuda:0') -2024-06-29 19:18:34,828 - INFO - allennlp.training.callbacks.console_logger - batch_input/semclass_labels (Shape: 24 x 48) -tensor([[ 2, 143, 35, ..., 0, 0, 0], - [ 2, 8, 11, ..., 0, 0, 0], - [ 7, 0, 1, ..., 0, 0, 0], + [13, 21, 0, ..., 0, 0, 0], + [23, 2, 12, ..., 0, 0, 0], + [ 0, 23, 0, ..., 0, 0, 0]], device='cuda:0') +2024-07-01 01:32:16,731 - INFO - allennlp.training.callbacks.console_logger - batch_input/semclass_labels (Shape: 24 x 37) +tensor([[189, 20, 20, ..., 0, 0, 0], + [ 1, 15, 19, ..., 0, 0, 0], + [ 11, 0, 9, ..., 0, 0, 0], ..., - [ 1, 10, 1, ..., 0, 0, 0], - [ 11, 1, 3, ..., 0, 0, 0], - [115, 9, 0, ..., 0, 0, 0]], device='cuda:0') -2024-06-29 19:18:34,829 - INFO - allennlp.training.callbacks.console_logger - Field : "batch_input/metadata" : (Length 24 of type "") -2024-06-29 19:18:44,411 - INFO - tqdm - NullAccuracy: 0.9800, NullF1: 0.0752, Lemma: 0.7294, PosFeats: 0.6473, UD-UAS: 0.4483, UD-LAS: 0.4317, EUD-UAS: 0.1246, EUD-LAS: 0.1064, Misc: 0.8976, SS: 0.6661, SC: 0.6283, Avg: 0.5200, batch_loss: 8.0180, loss: 11.2210 ||: 16%|#5 | 181/1147 [00:10<00:51, 18.59it/s] -2024-06-29 19:18:54,439 - INFO - tqdm - NullAccuracy: 0.9836, NullF1: 0.1334, Lemma: 0.7855, PosFeats: 0.7376, UD-UAS: 0.5092, UD-LAS: 0.4962, EUD-UAS: 0.1905, EUD-LAS: 0.1709, Misc: 0.9121, SS: 0.7062, SC: 0.6972, Avg: 0.5783, batch_loss: 5.6264, loss: 8.8108 ||: 32%|###1 | 365/1147 [00:20<00:40, 19.33it/s] -2024-06-29 19:19:04,496 - INFO - tqdm - NullAccuracy: 0.9852, NullF1: 0.1735, Lemma: 0.8120, PosFeats: 0.7771, UD-UAS: 0.5424, UD-LAS: 0.5309, EUD-UAS: 0.2229, EUD-LAS: 0.2035, Misc: 0.9195, SS: 0.7247, SC: 0.7284, Avg: 0.6068, batch_loss: 5.9012, loss: 7.7643 ||: 48%|####7 | 550/1147 [00:30<00:33, 18.03it/s] -2024-06-29 19:19:14,508 - INFO - tqdm - NullAccuracy: 0.9860, NullF1: 0.2011, Lemma: 0.8282, PosFeats: 0.8002, UD-UAS: 0.5663, UD-LAS: 0.5556, EUD-UAS: 0.2468, EUD-LAS: 0.2276, Misc: 0.9244, SS: 0.7367, SC: 0.7480, Avg: 0.6260, batch_loss: 5.3222, loss: 7.1057 ||: 64%|######4 | 737/1147 [00:40<00:22, 18.18it/s] -2024-06-29 19:19:24,619 - INFO - tqdm - NullAccuracy: 0.9867, NullF1: 0.2289, Lemma: 0.8405, PosFeats: 0.8165, UD-UAS: 0.5840, UD-LAS: 0.5741, EUD-UAS: 0.2648, EUD-LAS: 0.2455, Misc: 0.9275, SS: 0.7457, SC: 0.7636, Avg: 0.6403, batch_loss: 4.6858, loss: 6.6559 ||: 81%|######## | 925/1147 [00:50<00:12, 18.26it/s] -2024-06-29 19:19:34,697 - INFO - tqdm - NullAccuracy: 0.9870, NullF1: 0.2439, Lemma: 0.8494, PosFeats: 0.8288, UD-UAS: 0.5970, UD-LAS: 0.5879, EUD-UAS: 0.2785, EUD-LAS: 0.2598, Misc: 0.9299, SS: 0.7529, SC: 0.7748, Avg: 0.6510, batch_loss: 4.3227, loss: 6.3229 ||: 97%|#########6| 1112/1147 [01:00<00:01, 18.52it/s] -2024-06-29 19:19:36,422 - INFO - tqdm - NullAccuracy: 0.9870, NullF1: 0.2466, Lemma: 0.8507, PosFeats: 0.8306, UD-UAS: 0.5986, UD-LAS: 0.5898, EUD-UAS: 0.2808, EUD-LAS: 0.2622, Misc: 0.9302, SS: 0.7540, SC: 0.7765, Avg: 0.6526, batch_loss: 4.1319, loss: 6.2726 ||: 100%|#########9| 1143/1147 [01:02<00:00, 18.05it/s] -2024-06-29 19:19:36,533 - INFO - tqdm - NullAccuracy: 0.9870, NullF1: 0.2468, Lemma: 0.8508, PosFeats: 0.8307, UD-UAS: 0.5987, UD-LAS: 0.5899, EUD-UAS: 0.2809, EUD-LAS: 0.2622, Misc: 0.9302, SS: 0.7541, SC: 0.7766, Avg: 0.6527, batch_loss: 4.6516, loss: 6.2695 ||: 100%|#########9| 1145/1147 [01:02<00:00, 18.05it/s] -2024-06-29 19:19:36,624 - INFO - tqdm - NullAccuracy: 0.9870, NullF1: 0.2468, Lemma: 0.8509, PosFeats: 0.8308, UD-UAS: 0.5989, UD-LAS: 0.5900, EUD-UAS: 0.2810, EUD-LAS: 0.2624, Misc: 0.9302, SS: 0.7541, SC: 0.7767, Avg: 0.6528, batch_loss: 5.1362, loss: 6.2673 ||: 100%|##########| 1147/1147 [01:02<00:00, 18.44it/s] -2024-06-29 19:19:36,624 - INFO - allennlp.training.gradient_descent_trainer - Validating -2024-06-29 19:19:36,625 - INFO - tqdm - 0%| | 0/287 [00:00") +2024-07-01 01:32:26,396 - INFO - tqdm - NullAccuracy: 0.9818, NullF1: 0.0769, Lemma: 0.7297, PosFeats: 0.6471, UD-UAS: 0.4263, UD-LAS: 0.4118, EUD-UAS: 0.1424, EUD-LAS: 0.1220, Misc: 0.8961, SS: 0.6627, SC: 0.6433, Avg: 0.5201, batch_loss: 7.6293, loss: 11.3000 ||: 16%|#5 | 179/1147 [00:10<00:53, 17.95it/s] +2024-07-01 01:32:36,457 - INFO - tqdm - NullAccuracy: 0.9848, NullF1: 0.1274, Lemma: 0.7833, PosFeats: 0.7376, UD-UAS: 0.4983, UD-LAS: 0.4867, EUD-UAS: 0.2040, EUD-LAS: 0.1826, Misc: 0.9107, SS: 0.7025, SC: 0.7055, Avg: 0.5790, batch_loss: 5.4980, loss: 8.8952 ||: 32%|###1 | 362/1147 [00:20<00:45, 17.12it/s] +2024-07-01 01:32:46,538 - INFO - tqdm - NullAccuracy: 0.9857, NullF1: 0.1675, Lemma: 0.8084, PosFeats: 0.7764, UD-UAS: 0.5376, UD-LAS: 0.5274, EUD-UAS: 0.2393, EUD-LAS: 0.2183, Misc: 0.9185, SS: 0.7222, SC: 0.7351, Avg: 0.6093, batch_loss: 4.8169, loss: 7.8232 ||: 48%|####7 | 547/1147 [00:30<00:30, 19.42it/s] +2024-07-01 01:32:56,590 - INFO - tqdm - NullAccuracy: 0.9864, NullF1: 0.1917, Lemma: 0.8242, PosFeats: 0.7990, UD-UAS: 0.5641, UD-LAS: 0.5552, EUD-UAS: 0.2649, EUD-LAS: 0.2447, Misc: 0.9236, SS: 0.7351, SC: 0.7546, Avg: 0.6295, batch_loss: 5.2682, loss: 7.1602 ||: 64%|######3 | 730/1147 [00:40<00:23, 17.81it/s] +2024-07-01 01:33:06,690 - INFO - tqdm - NullAccuracy: 0.9869, NullF1: 0.2129, Lemma: 0.8366, PosFeats: 0.8144, UD-UAS: 0.5816, UD-LAS: 0.5735, EUD-UAS: 0.2811, EUD-LAS: 0.2618, Misc: 0.9269, SS: 0.7444, SC: 0.7686, Avg: 0.6432, batch_loss: 5.2146, loss: 6.7194 ||: 79%|#######9 | 911/1147 [00:50<00:13, 18.11it/s] +2024-07-01 01:33:16,753 - INFO - tqdm - NullAccuracy: 0.9873, NullF1: 0.2327, Lemma: 0.8466, PosFeats: 0.8269, UD-UAS: 0.5948, UD-LAS: 0.5873, EUD-UAS: 0.2932, EUD-LAS: 0.2747, Misc: 0.9296, SS: 0.7514, SC: 0.7794, Avg: 0.6538, batch_loss: 4.8879, loss: 6.3822 ||: 95%|#########4| 1089/1147 [01:00<00:03, 17.51it/s] +2024-07-01 01:33:19,729 - INFO - tqdm - NullAccuracy: 0.9874, NullF1: 0.2370, Lemma: 0.8490, PosFeats: 0.8300, UD-UAS: 0.5989, UD-LAS: 0.5916, EUD-UAS: 0.2972, EUD-LAS: 0.2789, Misc: 0.9304, SS: 0.7534, SC: 0.7824, Avg: 0.6569, batch_loss: 4.7072, loss: 6.2913 ||: 100%|#########9| 1143/1147 [01:03<00:00, 17.37it/s] +2024-07-01 01:33:19,839 - INFO - tqdm - NullAccuracy: 0.9874, NullF1: 0.2372, Lemma: 0.8490, PosFeats: 0.8301, UD-UAS: 0.5991, UD-LAS: 0.5917, EUD-UAS: 0.2973, EUD-LAS: 0.2790, Misc: 0.9304, SS: 0.7535, SC: 0.7825, Avg: 0.6570, batch_loss: 4.8284, loss: 6.2886 ||: 100%|#########9| 1145/1147 [01:03<00:00, 17.62it/s] +2024-07-01 01:33:19,925 - INFO - tqdm - NullAccuracy: 0.9874, NullF1: 0.2375, Lemma: 0.8491, PosFeats: 0.8302, UD-UAS: 0.5993, UD-LAS: 0.5919, EUD-UAS: 0.2975, EUD-LAS: 0.2792, Misc: 0.9305, SS: 0.7536, SC: 0.7826, Avg: 0.6571, batch_loss: 3.4145, loss: 6.2846 ||: 100%|##########| 1147/1147 [01:03<00:00, 18.03it/s] +2024-07-01 01:33:19,925 - INFO - allennlp.training.gradient_descent_trainer - Validating +2024-07-01 01:33:19,926 - INFO - tqdm - 0%| | 0/287 [00:00") -2024-06-29 19:19:36,726 - INFO - allennlp.training.callbacks.console_logger - batch_input/lemma_rule_labels (Shape: 24 x 41) -tensor([[ 0, 0, 64, ..., 0, 0, 0], - [ 0, 0, 0, ..., 0, 0, 0], - [ 3, 16, 4, ..., 0, 0, 0], + [ 0, 0], + [ 0, 0], + [ 0, 0]]], device='cuda:0') +2024-07-01 01:33:20,023 - INFO - allennlp.training.callbacks.console_logger - Field : "batch_input/sentences" : (Length 24 of type "") +2024-07-01 01:33:20,023 - INFO - allennlp.training.callbacks.console_logger - batch_input/lemma_rule_labels (Shape: 24 x 35) +tensor([[ 8, 0, 0, ..., 0, 0, 0], + [ 0, 8, 0, ..., 0, 0, 0], + [ 0, 13, 62, ..., 0, 0, 0], ..., - [ 0, 13, 0, ..., 0, 0, 0], - [32, 0, 2, ..., 0, 0, 0], - [ 0, 17, 4, ..., 0, 0, 0]], device='cuda:0') -2024-06-29 19:19:36,727 - INFO - allennlp.training.callbacks.console_logger - batch_input/pos_feats_labels (Shape: 24 x 41) -tensor([[198, 12, 80, ..., 0, 0, 0], - [ 8, 0, 51, ..., 0, 0, 0], - [ 52, 154, 18, ..., 0, 0, 0], + [ 0, 11, 0, ..., 0, 0, 0], + [ 0, 0, 13, ..., 0, 0, 0], + [124, 41, 36, ..., 0, 0, 0]], device='cuda:0') +2024-07-01 01:33:20,024 - INFO - allennlp.training.callbacks.console_logger - batch_input/pos_feats_labels (Shape: 24 x 35) +tensor([[158, 0, 34, ..., 0, 0, 0], + [ 14, 41, 12, ..., 0, 0, 0], + [197, 24, 116, ..., 0, 0, 0], ..., - [ 2, 24, 30, ..., 0, 0, 0], - [152, 38, 41, ..., 0, 0, 0], - [ 1, 323, 53, ..., 0, 0, 0]], device='cuda:0') -2024-06-29 19:19:36,729 - INFO - allennlp.training.callbacks.console_logger - batch_input/deprel_labels (Shape: 24 x 41 x 41) -tensor([[[-1, 6, -1, ..., -1, -1, -1], - [-1, -1, 3, ..., -1, -1, -1], - [-1, -1, 5, ..., -1, -1, -1], + [ 8, 33, 2, ..., 22, 0, 0], + [ 0, 12, 24, ..., 0, 0, 0], + [721, 94, 57, ..., 0, 0, 0]], device='cuda:0') +2024-07-01 01:33:20,025 - INFO - allennlp.training.callbacks.console_logger - batch_input/deprel_labels (Shape: 24 x 35 x 35) +tensor([[[ 5, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]], - [[ 5, -1, -1, ..., -1, -1, -1], - [-1, -1, -1, ..., -1, -1, -1], + [[-1, 13, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], + [-1, 3, -1, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]], - [[-1, -1, -1, ..., -1, -1, -1], - [-1, -1, 17, ..., -1, -1, -1], - [ 1, -1, -1, ..., -1, -1, -1], + [[-1, -1, 3, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, 5, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], @@ -760,49 +760,49 @@ tensor([[[-1, 6, -1, ..., -1, -1, -1], ..., - [[ 5, -1, -1, ..., -1, -1, -1], - [-1, -1, -1, ..., -1, -1, -1], - [ 4, -1, -1, ..., -1, -1, -1], + [[-1, 3, -1, ..., -1, -1, -1], + [-1, 5, -1, ..., -1, -1, -1], + [-1, 7, -1, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], - [-1, -1, -1, ..., -1, -1, -1], + [-1, 0, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]], - [[-1, -1, 3, ..., -1, -1, -1], + [[-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], - [-1, -1, 5, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]], - [[-1, -1, 2, ..., -1, -1, -1], - [-1, -1, 6, ..., -1, -1, -1], - [-1, -1, -1, ..., -1, -1, -1], + [[-1, 10, -1, ..., -1, -1, -1], + [-1, -1, 21, ..., -1, -1, -1], + [-1, -1, 5, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]]], device='cuda:0') -2024-06-29 19:19:36,735 - INFO - allennlp.training.callbacks.console_logger - batch_input/deps_labels (Shape: 24 x 41 x 41) -tensor([[[-1, 5, -1, ..., -1, -1, -1], - [-1, -1, 3, ..., -1, -1, -1], - [-1, -1, 2, ..., -1, -1, -1], +2024-07-01 01:33:20,032 - INFO - allennlp.training.callbacks.console_logger - batch_input/deps_labels (Shape: 24 x 35 x 35) +tensor([[[ 2, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]], - [[ 2, -1, -1, ..., -1, -1, -1], - [-1, -1, -1, ..., -1, -1, -1], + [[-1, 14, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], + [-1, 3, -1, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]], - [[-1, -1, -1, ..., -1, -1, -1], - [-1, -1, 17, ..., -1, -1, -1], - [ 4, -1, -1, ..., -1, -1, -1], + [[-1, -1, 3, ..., -1, -1, -1], + [-1, -1, 21, ..., -1, -1, -1], + [-1, -1, 2, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], @@ -810,560 +810,555 @@ tensor([[[-1, 5, -1, ..., -1, -1, -1], ..., - [[ 2, -1, -1, ..., -1, -1, -1], - [21, -1, -1, ..., -1, -1, -1], - [25, -1, -1, ..., -1, -1, -1], + [[-1, 3, -1, ..., -1, -1, -1], + [-1, 2, -1, ..., -1, -1, -1], + [-1, 6, -1, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], - [-1, -1, -1, ..., -1, -1, -1], + [-1, 0, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]], - [[-1, 9, -1, ..., -1, -1, -1], - [-1, -1, 3, ..., -1, -1, -1], - [-1, -1, 2, ..., -1, -1, -1], + [[-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], + [-1, -1, -1, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]], - [[-1, -1, 1, ..., -1, -1, -1], - [-1, -1, 5, ..., -1, -1, -1], - [-1, -1, -1, ..., -1, -1, -1], + [[-1, 9, -1, ..., -1, -1, -1], + [-1, -1, 24, ..., -1, -1, -1], + [-1, -1, 2, ..., -1, -1, -1], ..., [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1], [-1, -1, -1, ..., -1, -1, -1]]], device='cuda:0') -2024-06-29 19:19:36,742 - INFO - allennlp.training.callbacks.console_logger - batch_input/misc_labels (Shape: 24 x 41) -tensor([[0, 0, 0, ..., 0, 0, 0], - [1, 0, 0, ..., 0, 0, 0], - [0, 0, 3, ..., 0, 0, 0], - ..., - [0, 2, 0, ..., 0, 0, 0], +2024-07-01 01:33:20,038 - INFO - allennlp.training.callbacks.console_logger - batch_input/misc_labels (Shape: 24 x 35) +tensor([[1, 0, 0, ..., 0, 0, 0], + [0, 0, 0, ..., 0, 0, 0], [0, 2, 0, ..., 0, 0, 0], + ..., + [0, 0, 0, ..., 1, 0, 0], + [1, 0, 2, ..., 0, 0, 0], [0, 0, 0, ..., 0, 0, 0]], device='cuda:0') -2024-06-29 19:19:36,743 - INFO - allennlp.training.callbacks.console_logger - batch_input/semslot_labels (Shape: 24 x 41) -tensor([[14, 2, 1, ..., 0, 0, 0], - [18, 0, 3, ..., 0, 0, 0], - [ 3, 11, 17, ..., 0, 0, 0], +2024-07-01 01:33:20,039 - INFO - allennlp.training.callbacks.console_logger - batch_input/semslot_labels (Shape: 24 x 35) +tensor([[ 1, 0, 10, ..., 0, 0, 0], + [ 0, 19, 20, ..., 0, 0, 0], + [ 3, 1, 15, ..., 0, 0, 0], ..., - [15, 1, 32, ..., 0, 0, 0], - [13, 2, 1, ..., 0, 0, 0], - [ 0, 7, 32, ..., 0, 0, 0]], device='cuda:0') -2024-06-29 19:19:36,744 - INFO - allennlp.training.callbacks.console_logger - batch_input/semclass_labels (Shape: 24 x 41) -tensor([[ 3, 23, 30, ..., 0, 0, 0], - [ 36, 0, 5, ..., 0, 0, 0], - [ 91, 28, 2, ..., 0, 0, 0], + [ 2, 1, 23, ..., 82, 0, 0], + [ 0, 3, 1, ..., 0, 0, 0], + [13, 31, 1, ..., 0, 0, 0]], device='cuda:0') +2024-07-01 01:33:20,041 - INFO - allennlp.training.callbacks.console_logger - batch_input/semclass_labels (Shape: 24 x 35) +tensor([[ 28, 0, 373, ..., 0, 0, 0], + [ 10, 6, 60, ..., 0, 0, 0], + [ 24, 8, 12, ..., 0, 0, 0], ..., - [ 12, 8, 33, ..., 0, 0, 0], - [ 3, 16, 30, ..., 0, 0, 0], - [ 1, 10, 579, ..., 0, 0, 0]], device='cuda:0') -2024-06-29 19:19:36,746 - INFO - allennlp.training.callbacks.console_logger - Field : "batch_input/metadata" : (Length 24 of type "") -2024-06-29 19:19:46,640 - INFO - tqdm - NullAccuracy: 0.9895, NullF1: 0.3769, Lemma: 0.9290, PosFeats: 0.9248, UD-UAS: 0.7698, UD-LAS: 0.7705, EUD-UAS: 0.3969, EUD-LAS: 0.3773, Misc: 0.9597, SS: 0.8138, SC: 0.8693, Avg: 0.7568, batch_loss: 3.2383, loss: 3.4567 ||: 47%|####7 | 135/287 [00:10<00:10, 14.32it/s] -2024-06-29 19:19:56,700 - INFO - tqdm - NullAccuracy: 0.9899, NullF1: 0.3635, Lemma: 0.9323, PosFeats: 0.9290, UD-UAS: 0.7774, UD-LAS: 0.7774, EUD-UAS: 0.4038, EUD-LAS: 0.3829, Misc: 0.9609, SS: 0.8188, SC: 0.8762, Avg: 0.7621, batch_loss: 2.9325, loss: 3.3026 ||: 98%|#########7| 281/287 [00:20<00:00, 13.32it/s] -2024-06-29 19:19:57,107 - INFO - tqdm - NullAccuracy: 0.9899, NullF1: 0.3634, Lemma: 0.9321, PosFeats: 0.9290, UD-UAS: 0.7776, UD-LAS: 0.7776, EUD-UAS: 0.4039, EUD-LAS: 0.3830, Misc: 0.9610, SS: 0.8186, SC: 0.8761, Avg: 0.7621, batch_loss: 4.3368, loss: 3.3027 ||: 100%|##########| 287/287 [00:20<00:00, 14.49it/s] -2024-06-29 19:19:57,107 - INFO - tqdm - NullAccuracy: 0.9899, NullF1: 0.3634, Lemma: 0.9321, PosFeats: 0.9290, UD-UAS: 0.7776, UD-LAS: 0.7776, EUD-UAS: 0.4039, EUD-LAS: 0.3830, Misc: 0.9610, SS: 0.8186, SC: 0.8761, Avg: 0.7621, batch_loss: 4.3368, loss: 3.3027 ||: 100%|##########| 287/287 [00:20<00:00, 14.01it/s] -2024-06-29 19:19:57,107 - INFO - allennlp.training.learning_rate_schedulers.slanted_triangular - Gradual unfreezing finished. Training all layers. -2024-06-29 19:19:57,110 - INFO - allennlp.training.callbacks.console_logger - Training | Validation -2024-06-29 19:19:57,110 - INFO - allennlp.training.callbacks.console_logger - Avg | 0.653 | 0.762 -2024-06-29 19:19:57,111 - INFO - allennlp.training.callbacks.console_logger - EUD-LAS | 0.262 | 0.383 -2024-06-29 19:19:57,111 - INFO - allennlp.training.callbacks.console_logger - EUD-UAS | 0.281 | 0.404 -2024-06-29 19:19:57,111 - INFO - allennlp.training.callbacks.console_logger - Lemma | 0.851 | 0.932 -2024-06-29 19:19:57,111 - INFO - allennlp.training.callbacks.console_logger - Misc | 0.930 | 0.961 -2024-06-29 19:19:57,111 - INFO - allennlp.training.callbacks.console_logger - NullAccuracy | 0.987 | 0.990 -2024-06-29 19:19:57,111 - INFO - allennlp.training.callbacks.console_logger - NullF1 | 0.247 | 0.363 -2024-06-29 19:19:57,111 - INFO - allennlp.training.callbacks.console_logger - PosFeats | 0.831 | 0.929 -2024-06-29 19:19:57,111 - INFO - allennlp.training.callbacks.console_logger - SC | 0.777 | 0.876 -2024-06-29 19:19:57,111 - INFO - allennlp.training.callbacks.console_logger - SS | 0.754 | 0.819 -2024-06-29 19:19:57,111 - INFO - allennlp.training.callbacks.console_logger - UD-LAS | 0.590 | 0.778 -2024-06-29 19:19:57,111 - INFO - allennlp.training.callbacks.console_logger - UD-UAS | 0.599 | 0.778 -2024-06-29 19:19:57,111 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB | 1099.266 | N/A -2024-06-29 19:19:57,111 - INFO - allennlp.training.callbacks.console_logger - loss | 6.267 | 3.303 -2024-06-29 19:19:57,111 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB | 4585.449 | N/A -2024-06-29 19:19:58,764 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:01:24.359503 -2024-06-29 19:19:58,765 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:12:24 -2024-06-29 19:19:58,765 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9 -2024-06-29 19:19:58,765 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 4.9G -2024-06-29 19:19:58,765 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 3.2G -2024-06-29 19:19:58,766 - INFO - allennlp.training.gradient_descent_trainer - Training -2024-06-29 19:19:58,766 - INFO - tqdm - 0%| | 0/1147 [00:00") +2024-07-01 01:33:29,991 - INFO - tqdm - NullAccuracy: 0.9893, NullF1: 0.3847, Lemma: 0.9299, PosFeats: 0.9255, UD-UAS: 0.7652, UD-LAS: 0.7662, EUD-UAS: 0.4128, EUD-LAS: 0.3992, Misc: 0.9596, SS: 0.8146, SC: 0.8776, Avg: 0.7612, batch_loss: 3.0156, loss: 3.4289 ||: 45%|####5 | 130/287 [00:10<00:10, 14.94it/s] +2024-07-01 01:33:40,019 - INFO - tqdm - NullAccuracy: 0.9896, NullF1: 0.3925, Lemma: 0.9335, PosFeats: 0.9291, UD-UAS: 0.7714, UD-LAS: 0.7721, EUD-UAS: 0.4227, EUD-LAS: 0.4091, Misc: 0.9610, SS: 0.8174, SC: 0.8809, Avg: 0.7663, batch_loss: 3.4157, loss: 3.2924 ||: 95%|#########4| 272/287 [00:20<00:01, 13.98it/s] +2024-07-01 01:33:40,964 - INFO - tqdm - NullAccuracy: 0.9896, NullF1: 0.3936, Lemma: 0.9336, PosFeats: 0.9295, UD-UAS: 0.7720, UD-LAS: 0.7727, EUD-UAS: 0.4236, EUD-LAS: 0.4098, Misc: 0.9609, SS: 0.8173, SC: 0.8810, Avg: 0.7667, batch_loss: 2.6802, loss: 3.2845 ||: 100%|#########9| 286/287 [00:21<00:00, 14.80it/s] +2024-07-01 01:33:41,017 - INFO - tqdm - NullAccuracy: 0.9896, NullF1: 0.3941, Lemma: 0.9336, PosFeats: 0.9296, UD-UAS: 0.7722, UD-LAS: 0.7729, EUD-UAS: 0.4236, EUD-LAS: 0.4098, Misc: 0.9609, SS: 0.8173, SC: 0.8810, Avg: 0.7668, batch_loss: 2.8979, loss: 3.2831 ||: 100%|##########| 287/287 [00:21<00:00, 13.61it/s] +2024-07-01 01:33:41,017 - INFO - allennlp.training.learning_rate_schedulers.slanted_triangular - Gradual unfreezing finished. Training all layers. +2024-07-01 01:33:41,020 - INFO - allennlp.training.callbacks.console_logger - Training | Validation +2024-07-01 01:33:41,021 - INFO - allennlp.training.callbacks.console_logger - Avg | 0.657 | 0.767 +2024-07-01 01:33:41,021 - INFO - allennlp.training.callbacks.console_logger - EUD-LAS | 0.279 | 0.410 +2024-07-01 01:33:41,021 - INFO - allennlp.training.callbacks.console_logger - EUD-UAS | 0.298 | 0.424 +2024-07-01 01:33:41,021 - INFO - allennlp.training.callbacks.console_logger - Lemma | 0.849 | 0.934 +2024-07-01 01:33:41,021 - INFO - allennlp.training.callbacks.console_logger - Misc | 0.930 | 0.961 +2024-07-01 01:33:41,021 - INFO - allennlp.training.callbacks.console_logger - NullAccuracy | 0.987 | 0.990 +2024-07-01 01:33:41,021 - INFO - allennlp.training.callbacks.console_logger - NullF1 | 0.237 | 0.394 +2024-07-01 01:33:41,021 - INFO - allennlp.training.callbacks.console_logger - PosFeats | 0.830 | 0.930 +2024-07-01 01:33:41,021 - INFO - allennlp.training.callbacks.console_logger - SC | 0.783 | 0.881 +2024-07-01 01:33:41,021 - INFO - allennlp.training.callbacks.console_logger - SS | 0.754 | 0.817 +2024-07-01 01:33:41,021 - INFO - allennlp.training.callbacks.console_logger - UD-LAS | 0.592 | 0.773 +2024-07-01 01:33:41,021 - INFO - allennlp.training.callbacks.console_logger - UD-UAS | 0.599 | 0.772 +2024-07-01 01:33:41,021 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB | 1096.931 | N/A +2024-07-01 01:33:41,021 - INFO - allennlp.training.callbacks.console_logger - loss | 6.285 | 3.283 +2024-07-01 01:33:41,021 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB | 4585.289 | N/A +2024-07-01 01:33:42,564 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:01:26.265534 +2024-07-01 01:33:42,564 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:12:42 +2024-07-01 01:33:42,564 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9 +2024-07-01 01:33:42,564 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 4.9G +2024-07-01 01:33:42,565 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 3.0G +2024-07-01 01:33:42,566 - INFO - allennlp.training.gradient_descent_trainer - Training +2024-07-01 01:33:42,566 - INFO - tqdm - 0%| | 0/1147 [00:00