diff --git "a/out.log" "b/out.log"
--- "a/out.log"
+++ "b/out.log"
@@ -1,411 +1,411 @@
-2024-06-29 19:18:03,095 - INFO - allennlp.common.params - random_seed = 13370
-2024-06-29 19:18:03,095 - INFO - allennlp.common.params - numpy_seed = 1337
-2024-06-29 19:18:03,095 - INFO - allennlp.common.params - pytorch_seed = 133
-2024-06-29 19:18:03,096 - INFO - allennlp.common.checks - Pytorch version: 2.3.1+cu121
-2024-06-29 19:18:03,096 - INFO - allennlp.common.params - type = default
-2024-06-29 19:18:03,097 - INFO - allennlp.common.params - dataset_reader.type = compreno_ud_dataset_reader
-2024-06-29 19:18:03,097 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.type = pretrained_transformer_mismatched
-2024-06-29 19:18:03,097 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.token_min_padding_length = 0
-2024-06-29 19:18:03,097 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.model_name = xlm-roberta-base
-2024-06-29 19:18:03,097 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.namespace = tags
-2024-06-29 19:18:03,097 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.max_length = None
-2024-06-29 19:18:03,097 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.tokenizer_kwargs = None
-2024-06-29 19:18:05,153 - INFO - allennlp.common.params - train_data_path = data/train.conllu
-2024-06-29 19:18:05,153 - INFO - allennlp.common.params - datasets_for_vocab_creation = None
-2024-06-29 19:18:05,153 - INFO - allennlp.common.params - validation_dataset_reader = None
-2024-06-29 19:18:05,153 - INFO - allennlp.common.params - validation_data_path = data/validation.conllu
-2024-06-29 19:18:05,153 - INFO - allennlp.common.params - test_data_path = None
-2024-06-29 19:18:05,153 - INFO - allennlp.common.params - evaluate_on_test = False
-2024-06-29 19:18:05,153 - INFO - allennlp.common.params - batch_weight_key = 
-2024-06-29 19:18:05,154 - INFO - allennlp.common.params - data_loader.type = multiprocess
-2024-06-29 19:18:05,154 - INFO - allennlp.common.params - data_loader.batch_size = 24
-2024-06-29 19:18:05,154 - INFO - allennlp.common.params - data_loader.drop_last = False
-2024-06-29 19:18:05,154 - INFO - allennlp.common.params - data_loader.shuffle = True
-2024-06-29 19:18:05,154 - INFO - allennlp.common.params - data_loader.batch_sampler = None
-2024-06-29 19:18:05,154 - INFO - allennlp.common.params - data_loader.batches_per_epoch = None
-2024-06-29 19:18:05,154 - INFO - allennlp.common.params - data_loader.num_workers = 0
-2024-06-29 19:18:05,154 - INFO - allennlp.common.params - data_loader.max_instances_in_memory = None
-2024-06-29 19:18:05,154 - INFO - allennlp.common.params - data_loader.start_method = fork
-2024-06-29 19:18:05,154 - INFO - allennlp.common.params - data_loader.cuda_device = None
-2024-06-29 19:18:05,154 - INFO - allennlp.common.params - data_loader.quiet = False
-2024-06-29 19:18:05,154 - INFO - allennlp.common.params - data_loader.collate_fn = <allennlp.data.data_loaders.data_collator.DefaultDataCollator object at 0x7f657d497df0>
-2024-06-29 19:18:05,154 - INFO - tqdm - loading instances: 0it [00:00, ?it/s]
-2024-06-29 19:18:15,215 - INFO - tqdm - loading instances: 25625it [00:10, 2590.96it/s]
-2024-06-29 19:18:15,764 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
-2024-06-29 19:18:15,764 - INFO - allennlp.common.params - validation_data_loader.batch_size = 24
-2024-06-29 19:18:15,764 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
-2024-06-29 19:18:15,764 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
-2024-06-29 19:18:15,764 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
-2024-06-29 19:18:15,764 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
-2024-06-29 19:18:15,765 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
-2024-06-29 19:18:15,765 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
-2024-06-29 19:18:15,765 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
-2024-06-29 19:18:15,765 - INFO - allennlp.common.params - validation_data_loader.cuda_device = None
-2024-06-29 19:18:15,765 - INFO - allennlp.common.params - validation_data_loader.quiet = False
-2024-06-29 19:18:15,765 - INFO - allennlp.common.params - validation_data_loader.collate_fn = <allennlp.data.data_loaders.data_collator.DefaultDataCollator object at 0x7f657d497df0>
-2024-06-29 19:18:15,765 - INFO - tqdm - loading instances: 0it [00:00, ?it/s]
-2024-06-29 19:18:18,810 - INFO - allennlp.common.params - vocabulary.type = from_instances
-2024-06-29 19:18:18,811 - INFO - allennlp.common.params - vocabulary.max_vocab_size = None
-2024-06-29 19:18:18,811 - INFO - allennlp.common.params - vocabulary.non_padded_namespaces = ('*tags', '*labels')
-2024-06-29 19:18:18,811 - INFO - allennlp.common.params - vocabulary.pretrained_files = None
-2024-06-29 19:18:18,811 - INFO - allennlp.common.params - vocabulary.only_include_pretrained_words = False
-2024-06-29 19:18:18,811 - INFO - allennlp.common.params - vocabulary.min_pretrained_embeddings = None
-2024-06-29 19:18:18,811 - INFO - allennlp.common.params - vocabulary.padding_token = @@PADDING@@
-2024-06-29 19:18:18,811 - INFO - allennlp.common.params - vocabulary.oov_token = @@UNKNOWN@@
-2024-06-29 19:18:18,811 - INFO - allennlp.data.vocabulary - Fitting token dictionary from dataset.
-2024-06-29 19:18:18,811 - INFO - tqdm - building vocab: 0it [00:00, ?it/s]
-2024-06-29 19:18:19,437 - INFO - allennlp.common.params - model.type = morpho_syntax_semantic_parser
-2024-06-29 19:18:19,438 - INFO - allennlp.common.params - model.indexer.type = pretrained_transformer_mismatched
-2024-06-29 19:18:19,438 - INFO - allennlp.common.params - model.indexer.token_min_padding_length = 0
-2024-06-29 19:18:19,438 - INFO - allennlp.common.params - model.indexer.model_name = xlm-roberta-base
-2024-06-29 19:18:19,438 - INFO - allennlp.common.params - model.indexer.namespace = tags
-2024-06-29 19:18:19,438 - INFO - allennlp.common.params - model.indexer.max_length = None
-2024-06-29 19:18:19,438 - INFO - allennlp.common.params - model.indexer.tokenizer_kwargs = None
-2024-06-29 19:18:19,439 - INFO - allennlp.common.params - model.embedder.type = pretrained_transformer_mismatched
-2024-06-29 19:18:19,439 - INFO - allennlp.common.params - model.embedder.model_name = xlm-roberta-base
-2024-06-29 19:18:19,439 - INFO - allennlp.common.params - model.embedder.max_length = None
-2024-06-29 19:18:19,439 - INFO - allennlp.common.params - model.embedder.sub_module = None
-2024-06-29 19:18:19,439 - INFO - allennlp.common.params - model.embedder.train_parameters = True
-2024-06-29 19:18:19,439 - INFO - allennlp.common.params - model.embedder.last_layer_only = True
-2024-06-29 19:18:19,439 - INFO - allennlp.common.params - model.embedder.override_weights_file = None
-2024-06-29 19:18:19,439 - INFO - allennlp.common.params - model.embedder.override_weights_strip_prefix = None
-2024-06-29 19:18:19,439 - INFO - allennlp.common.params - model.embedder.load_weights = True
-2024-06-29 19:18:19,439 - INFO - allennlp.common.params - model.embedder.gradient_checkpointing = None
-2024-06-29 19:18:19,439 - INFO - allennlp.common.params - model.embedder.tokenizer_kwargs = None
-2024-06-29 19:18:19,439 - INFO - allennlp.common.params - model.embedder.transformer_kwargs = None
-2024-06-29 19:18:19,439 - INFO - allennlp.common.params - model.embedder.sub_token_mode = avg
-2024-06-29 19:18:20,189 - INFO - allennlp.common.params - model.lemma_rule_classifier.hid_dim = 512
-2024-06-29 19:18:20,189 - INFO - allennlp.common.params - model.lemma_rule_classifier.activation = relu
-2024-06-29 19:18:20,189 - INFO - allennlp.common.params - model.lemma_rule_classifier.dropout = 0.1
-2024-06-29 19:18:20,190 - INFO - allennlp.common.params - model.lemma_rule_classifier.dictionaries = []
-2024-06-29 19:18:20,190 - INFO - allennlp.common.params - model.lemma_rule_classifier.topk = None
-2024-06-29 19:18:20,192 - INFO - allennlp.common.params - model.pos_feats_classifier.hid_dim = 256
-2024-06-29 19:18:20,192 - INFO - allennlp.common.params - model.pos_feats_classifier.activation = relu
-2024-06-29 19:18:20,192 - INFO - allennlp.common.params - model.pos_feats_classifier.dropout = 0.1
-2024-06-29 19:18:20,194 - INFO - allennlp.common.params - model.depencency_classifier.hid_dim = 128
-2024-06-29 19:18:20,194 - INFO - allennlp.common.params - model.depencency_classifier.activation = relu
-2024-06-29 19:18:20,194 - INFO - allennlp.common.params - model.depencency_classifier.dropout = 0.1
-2024-06-29 19:18:20,216 - INFO - allennlp.common.params - model.misc_classifier.hid_dim = 128
-2024-06-29 19:18:20,216 - INFO - allennlp.common.params - model.misc_classifier.activation = relu
-2024-06-29 19:18:20,216 - INFO - allennlp.common.params - model.misc_classifier.dropout = 0.1
-2024-06-29 19:18:20,217 - INFO - allennlp.common.params - model.semslot_classifier.hid_dim = 1024
-2024-06-29 19:18:20,217 - INFO - allennlp.common.params - model.semslot_classifier.activation = relu
-2024-06-29 19:18:20,217 - INFO - allennlp.common.params - model.semslot_classifier.dropout = 0.1
-2024-06-29 19:18:20,220 - INFO - allennlp.common.params - model.semclass_classifier.hid_dim = 1024
-2024-06-29 19:18:20,220 - INFO - allennlp.common.params - model.semclass_classifier.activation = relu
-2024-06-29 19:18:20,220 - INFO - allennlp.common.params - model.semclass_classifier.dropout = 0.1
-2024-06-29 19:18:20,227 - INFO - allennlp.common.params - model.null_classifier.hid_dim = 512
-2024-06-29 19:18:20,227 - INFO - allennlp.common.params - model.null_classifier.activation = relu
-2024-06-29 19:18:20,227 - INFO - allennlp.common.params - model.null_classifier.dropout = 0.1
-2024-06-29 19:18:20,227 - INFO - allennlp.common.params - model.null_classifier.positive_class_weight = 1.0
-2024-06-29 19:18:34,182 - INFO - allennlp.common.params - trainer.type = gradient_descent
-2024-06-29 19:18:34,182 - INFO - allennlp.common.params - trainer.cuda_device = 0
-2024-06-29 19:18:34,182 - INFO - allennlp.common.params - trainer.distributed = False
-2024-06-29 19:18:34,182 - INFO - allennlp.common.params - trainer.world_size = 1
-2024-06-29 19:18:34,182 - INFO - allennlp.common.params - trainer.patience = None
-2024-06-29 19:18:34,182 - INFO - allennlp.common.params - trainer.validation_metric = +Avg
-2024-06-29 19:18:34,183 - INFO - allennlp.common.params - trainer.num_epochs = 10
-2024-06-29 19:18:34,183 - INFO - allennlp.common.params - trainer.grad_norm = False
-2024-06-29 19:18:34,183 - INFO - allennlp.common.params - trainer.grad_clipping = 5
-2024-06-29 19:18:34,183 - INFO - allennlp.common.params - trainer.num_gradient_accumulation_steps = 1
-2024-06-29 19:18:34,183 - INFO - allennlp.common.params - trainer.use_amp = False
-2024-06-29 19:18:34,183 - INFO - allennlp.common.params - trainer.no_grad = None
-2024-06-29 19:18:34,183 - INFO - allennlp.common.params - trainer.momentum_scheduler = None
-2024-06-29 19:18:34,183 - INFO - allennlp.common.params - trainer.moving_average = None
-2024-06-29 19:18:34,183 - INFO - allennlp.common.params - trainer.checkpointer = <allennlp.common.lazy.Lazy object at 0x7f657755ed30>
-2024-06-29 19:18:34,183 - INFO - allennlp.common.params - trainer.enable_default_callbacks = True
-2024-06-29 19:18:34,183 - INFO - allennlp.common.params - trainer.run_confidence_checks = True
-2024-06-29 19:18:34,183 - INFO - allennlp.common.params - trainer.grad_scaling = True
-2024-06-29 19:18:34,391 - INFO - allennlp.common.params - trainer.optimizer.type = adam
-2024-06-29 19:18:34,391 - INFO - allennlp.common.params - trainer.optimizer.lr = 0.01
-2024-06-29 19:18:34,392 - INFO - allennlp.common.params - trainer.optimizer.betas = (0.9, 0.999)
-2024-06-29 19:18:34,392 - INFO - allennlp.common.params - trainer.optimizer.eps = 1e-08
-2024-06-29 19:18:34,392 - INFO - allennlp.common.params - trainer.optimizer.weight_decay = 0.0
-2024-06-29 19:18:34,392 - INFO - allennlp.common.params - trainer.optimizer.amsgrad = False
-2024-06-29 19:18:34,393 - INFO - allennlp.training.optimizers - Done constructing parameter groups.
-2024-06-29 19:18:34,393 - INFO - allennlp.training.optimizers - Group 0: ['embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.embeddings.token_type_embeddings.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.embeddings.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.5.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.3.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.embeddings.word_embeddings.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.3.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.0.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.0.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.4.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.4.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.6.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.11.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.pooler.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.0.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.embeddings.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.10.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.2.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.3.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.5.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.10.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.9.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.2.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.7.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.5.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.7.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.8.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.9.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.0.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.embeddings.position_embeddings.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.3.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.pooler.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.5.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.6.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.8.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.output.dense.weight'], {}
-2024-06-29 19:18:34,393 - INFO - allennlp.training.optimizers - Group 1: ['lemma_rule_classifier.classifier.1.bias', 'null_classifier.classifier.1.weight', 'semclass_classifier.classifier.4.bias', 'lemma_rule_classifier.classifier.1.weight', 'dependency_classifier.arc_attention_ud._bias', 'dependency_classifier.arc_head_mlp.1.weight', 'dependency_classifier.rel_dep_mlp.1.bias', 'pos_feats_classifier.classifier.4.bias', 'lemma_rule_classifier.classifier.4.weight', 'dependency_classifier.arc_attention_eud._bias', 'null_classifier.classifier.1.bias', 'null_classifier.classifier.4.bias', 'dependency_classifier.rel_attention_ud._weight_matrix', 'dependency_classifier.arc_head_mlp.1.bias', 'dependency_classifier.rel_dep_mlp.1.weight', 'dependency_classifier.arc_attention_ud._weight_matrix', 'null_classifier.classifier.4.weight', 'semslot_classifier.classifier.4.weight', 'pos_feats_classifier.classifier.1.weight', 'dependency_classifier.rel_head_mlp.1.weight', 'semclass_classifier.classifier.4.weight', 'semclass_classifier.classifier.1.weight', 'dependency_classifier.arc_dep_mlp.1.bias', 'lemma_rule_classifier.classifier.4.bias', 'pos_feats_classifier.classifier.1.bias', 'misc_classifier.classifier.1.weight', 'semslot_classifier.classifier.1.bias', 'semclass_classifier.classifier.1.bias', 'misc_classifier.classifier.4.bias', 'dependency_classifier.rel_attention_eud._weight_matrix', 'pos_feats_classifier.classifier.4.weight', 'semslot_classifier.classifier.4.bias', 'semslot_classifier.classifier.1.weight', 'dependency_classifier.arc_attention_eud._weight_matrix', 'misc_classifier.classifier.4.weight', 'misc_classifier.classifier.1.bias', 'dependency_classifier.arc_dep_mlp.1.weight', 'dependency_classifier.rel_head_mlp.1.bias', 'dependency_classifier.rel_attention_ud._bias', 'dependency_classifier.rel_attention_eud._bias'], {}
-2024-06-29 19:18:34,393 - INFO - allennlp.training.optimizers - Group 2: [], {}
-2024-06-29 19:18:34,393 - INFO - allennlp.training.optimizers - Number of trainable parameters: 287815418
-2024-06-29 19:18:34,394 - INFO - allennlp.common.util - The following parameters are Frozen (without gradient):
-2024-06-29 19:18:34,394 - INFO - allennlp.common.util - The following parameters are Tunable (with gradient):
-2024-06-29 19:18:34,394 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.embeddings.word_embeddings.weight
-2024-06-29 19:18:34,394 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.embeddings.position_embeddings.weight
-2024-06-29 19:18:34,394 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.embeddings.token_type_embeddings.weight
-2024-06-29 19:18:34,394 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.embeddings.LayerNorm.weight
-2024-06-29 19:18:34,394 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.embeddings.LayerNorm.bias
-2024-06-29 19:18:34,394 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.query.weight
-2024-06-29 19:18:34,394 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.query.bias
-2024-06-29 19:18:34,394 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.key.weight
-2024-06-29 19:18:34,394 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.key.bias
-2024-06-29 19:18:34,394 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.value.weight
-2024-06-29 19:18:34,394 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.value.bias
-2024-06-29 19:18:34,394 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.dense.weight
-2024-06-29 19:18:34,394 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.dense.bias
-2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.LayerNorm.weight
-2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.LayerNorm.bias
-2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.intermediate.dense.weight
-2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.intermediate.dense.bias
-2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.output.dense.weight
-2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.output.dense.bias
-2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.output.LayerNorm.weight
-2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.output.LayerNorm.bias
-2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.query.weight
-2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.query.bias
-2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.key.weight
-2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.key.bias
-2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.value.weight
-2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.value.bias
-2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.dense.weight
-2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.dense.bias
-2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.LayerNorm.weight
-2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.LayerNorm.bias
-2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.intermediate.dense.weight
-2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.intermediate.dense.bias
-2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.output.dense.weight
-2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.output.dense.bias
-2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.output.LayerNorm.weight
-2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.output.LayerNorm.bias
-2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.query.weight
-2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.query.bias
-2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.key.weight
-2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.key.bias
-2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.value.weight
-2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.value.bias
-2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.dense.weight
-2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.dense.bias
-2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.LayerNorm.weight
-2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.LayerNorm.bias
-2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.intermediate.dense.weight
-2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.intermediate.dense.bias
-2024-06-29 19:18:34,395 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.output.dense.weight
-2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.output.dense.bias
-2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.output.LayerNorm.weight
-2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.output.LayerNorm.bias
-2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.query.weight
-2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.query.bias
-2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.key.weight
-2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.key.bias
-2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.value.weight
-2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.value.bias
-2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.dense.weight
-2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.dense.bias
-2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.LayerNorm.weight
-2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.LayerNorm.bias
-2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.intermediate.dense.weight
-2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.intermediate.dense.bias
-2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.output.dense.weight
-2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.output.dense.bias
-2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.output.LayerNorm.weight
-2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.output.LayerNorm.bias
-2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.query.weight
-2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.query.bias
-2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.key.weight
-2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.key.bias
-2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.value.weight
-2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.value.bias
-2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.dense.weight
-2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.dense.bias
-2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.LayerNorm.weight
-2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.LayerNorm.bias
-2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.intermediate.dense.weight
-2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.intermediate.dense.bias
-2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.output.dense.weight
-2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.output.dense.bias
-2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.output.LayerNorm.weight
-2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.output.LayerNorm.bias
-2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.query.weight
-2024-06-29 19:18:34,396 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.query.bias
-2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.key.weight
-2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.key.bias
-2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.value.weight
-2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.value.bias
-2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.dense.weight
-2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.dense.bias
-2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.LayerNorm.weight
-2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.LayerNorm.bias
-2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.intermediate.dense.weight
-2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.intermediate.dense.bias
-2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.output.dense.weight
-2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.output.dense.bias
-2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.output.LayerNorm.weight
-2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.output.LayerNorm.bias
-2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.query.weight
-2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.query.bias
-2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.key.weight
-2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.key.bias
-2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.value.weight
-2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.value.bias
-2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.dense.weight
-2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.dense.bias
-2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.LayerNorm.weight
-2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.LayerNorm.bias
-2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.intermediate.dense.weight
-2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.intermediate.dense.bias
-2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.output.dense.weight
-2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.output.dense.bias
-2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.output.LayerNorm.weight
-2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.output.LayerNorm.bias
-2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.query.weight
-2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.query.bias
-2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.key.weight
-2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.key.bias
-2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.value.weight
-2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.value.bias
-2024-06-29 19:18:34,397 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.dense.weight
-2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.dense.bias
-2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.LayerNorm.weight
-2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.LayerNorm.bias
-2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.intermediate.dense.weight
-2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.intermediate.dense.bias
-2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.output.dense.weight
-2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.output.dense.bias
-2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.output.LayerNorm.weight
-2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.output.LayerNorm.bias
-2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.query.weight
-2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.query.bias
-2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.key.weight
-2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.key.bias
-2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.value.weight
-2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.value.bias
-2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.dense.weight
-2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.dense.bias
-2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.LayerNorm.weight
-2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.LayerNorm.bias
-2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.intermediate.dense.weight
-2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.intermediate.dense.bias
-2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.output.dense.weight
-2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.output.dense.bias
-2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.output.LayerNorm.weight
-2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.output.LayerNorm.bias
-2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.query.weight
-2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.query.bias
-2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.key.weight
-2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.key.bias
-2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.value.weight
-2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.value.bias
-2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.dense.weight
-2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.dense.bias
-2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.LayerNorm.weight
-2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.LayerNorm.bias
-2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.intermediate.dense.weight
-2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.intermediate.dense.bias
-2024-06-29 19:18:34,398 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.output.dense.weight
-2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.output.dense.bias
-2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.output.LayerNorm.weight
-2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.output.LayerNorm.bias
-2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.query.weight
-2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.query.bias
-2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.key.weight
-2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.key.bias
-2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.value.weight
-2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.value.bias
-2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.dense.weight
-2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.dense.bias
-2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.LayerNorm.weight
-2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.LayerNorm.bias
-2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.intermediate.dense.weight
-2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.intermediate.dense.bias
-2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.output.dense.weight
-2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.output.dense.bias
-2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.output.LayerNorm.weight
-2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.output.LayerNorm.bias
-2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.query.weight
-2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.query.bias
-2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.key.weight
-2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.key.bias
-2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.value.weight
-2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.value.bias
-2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.dense.weight
-2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.dense.bias
-2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.LayerNorm.weight
-2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.LayerNorm.bias
-2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.intermediate.dense.weight
-2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.intermediate.dense.bias
-2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.output.dense.weight
-2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.output.dense.bias
-2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.output.LayerNorm.weight
-2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.output.LayerNorm.bias
-2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.pooler.dense.weight
-2024-06-29 19:18:34,399 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.pooler.dense.bias
-2024-06-29 19:18:34,399 - INFO - allennlp.common.util - lemma_rule_classifier.classifier.1.weight
-2024-06-29 19:18:34,400 - INFO - allennlp.common.util - lemma_rule_classifier.classifier.1.bias
-2024-06-29 19:18:34,400 - INFO - allennlp.common.util - lemma_rule_classifier.classifier.4.weight
-2024-06-29 19:18:34,400 - INFO - allennlp.common.util - lemma_rule_classifier.classifier.4.bias
-2024-06-29 19:18:34,400 - INFO - allennlp.common.util - pos_feats_classifier.classifier.1.weight
-2024-06-29 19:18:34,400 - INFO - allennlp.common.util - pos_feats_classifier.classifier.1.bias
-2024-06-29 19:18:34,400 - INFO - allennlp.common.util - pos_feats_classifier.classifier.4.weight
-2024-06-29 19:18:34,400 - INFO - allennlp.common.util - pos_feats_classifier.classifier.4.bias
-2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.arc_dep_mlp.1.weight
-2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.arc_dep_mlp.1.bias
-2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.arc_head_mlp.1.weight
-2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.arc_head_mlp.1.bias
-2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.rel_dep_mlp.1.weight
-2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.rel_dep_mlp.1.bias
-2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.rel_head_mlp.1.weight
-2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.rel_head_mlp.1.bias
-2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.arc_attention_ud._weight_matrix
-2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.arc_attention_ud._bias
-2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.rel_attention_ud._weight_matrix
-2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.rel_attention_ud._bias
-2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.arc_attention_eud._weight_matrix
-2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.arc_attention_eud._bias
-2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.rel_attention_eud._weight_matrix
-2024-06-29 19:18:34,400 - INFO - allennlp.common.util - dependency_classifier.rel_attention_eud._bias
-2024-06-29 19:18:34,400 - INFO - allennlp.common.util - misc_classifier.classifier.1.weight
-2024-06-29 19:18:34,400 - INFO - allennlp.common.util - misc_classifier.classifier.1.bias
-2024-06-29 19:18:34,400 - INFO - allennlp.common.util - misc_classifier.classifier.4.weight
-2024-06-29 19:18:34,400 - INFO - allennlp.common.util - misc_classifier.classifier.4.bias
-2024-06-29 19:18:34,400 - INFO - allennlp.common.util - semslot_classifier.classifier.1.weight
-2024-06-29 19:18:34,400 - INFO - allennlp.common.util - semslot_classifier.classifier.1.bias
-2024-06-29 19:18:34,400 - INFO - allennlp.common.util - semslot_classifier.classifier.4.weight
-2024-06-29 19:18:34,400 - INFO - allennlp.common.util - semslot_classifier.classifier.4.bias
-2024-06-29 19:18:34,400 - INFO - allennlp.common.util - semclass_classifier.classifier.1.weight
-2024-06-29 19:18:34,400 - INFO - allennlp.common.util - semclass_classifier.classifier.1.bias
-2024-06-29 19:18:34,400 - INFO - allennlp.common.util - semclass_classifier.classifier.4.weight
-2024-06-29 19:18:34,400 - INFO - allennlp.common.util - semclass_classifier.classifier.4.bias
-2024-06-29 19:18:34,400 - INFO - allennlp.common.util - null_classifier.classifier.1.weight
-2024-06-29 19:18:34,400 - INFO - allennlp.common.util - null_classifier.classifier.1.bias
-2024-06-29 19:18:34,401 - INFO - allennlp.common.util - null_classifier.classifier.4.weight
-2024-06-29 19:18:34,401 - INFO - allennlp.common.util - null_classifier.classifier.4.bias
-2024-06-29 19:18:34,401 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.type = slanted_triangular
-2024-06-29 19:18:34,401 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.cut_frac = 0
-2024-06-29 19:18:34,401 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.ratio = 32
-2024-06-29 19:18:34,401 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.last_epoch = -1
-2024-06-29 19:18:34,401 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.gradual_unfreezing = True
-2024-06-29 19:18:34,401 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.discriminative_fine_tuning = True
-2024-06-29 19:18:34,401 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.decay_factor = 0.001
-2024-06-29 19:18:34,401 - INFO - allennlp.training.learning_rate_schedulers.slanted_triangular - Gradual unfreezing. Training only the top 1 layers.
-2024-06-29 19:18:34,401 - INFO - allennlp.common.params - type = default
-2024-06-29 19:18:34,401 - INFO - allennlp.common.params - save_completed_epochs = True
-2024-06-29 19:18:34,401 - INFO - allennlp.common.params - save_every_num_seconds = None
-2024-06-29 19:18:34,401 - INFO - allennlp.common.params - save_every_num_batches = None
-2024-06-29 19:18:34,401 - INFO - allennlp.common.params - keep_most_recent_by_count = 2
-2024-06-29 19:18:34,401 - INFO - allennlp.common.params - keep_most_recent_by_age = None
-2024-06-29 19:18:34,401 - INFO - allennlp.common.params - trainer.callbacks.0.type = tensorboard
-2024-06-29 19:18:34,402 - INFO - allennlp.common.params - trainer.callbacks.0.summary_interval = 100
-2024-06-29 19:18:34,402 - INFO - allennlp.common.params - trainer.callbacks.0.distribution_interval = None
-2024-06-29 19:18:34,402 - INFO - allennlp.common.params - trainer.callbacks.0.batch_size_interval = None
-2024-06-29 19:18:34,402 - INFO - allennlp.common.params - trainer.callbacks.0.should_log_parameter_statistics = False
-2024-06-29 19:18:34,402 - INFO - allennlp.common.params - trainer.callbacks.0.should_log_learning_rate = True
-2024-06-29 19:18:34,403 - WARNING - allennlp.training.gradient_descent_trainer - You provided a validation dataset but patience was set to None, meaning that early stopping is disabled
-2024-06-29 19:18:34,405 - INFO - allennlp.training.gradient_descent_trainer - Beginning training.
-2024-06-29 19:18:34,405 - INFO - allennlp.training.gradient_descent_trainer - Epoch 0/9
-2024-06-29 19:18:34,405 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 4.5G
-2024-06-29 19:18:34,405 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
-2024-06-29 19:18:34,406 - INFO - allennlp.training.gradient_descent_trainer - Training
-2024-06-29 19:18:34,406 - INFO - tqdm - 0%|          | 0/1147 [00:00<?, ?it/s]
-2024-06-29 19:18:34,804 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
-2024-06-29 19:18:34,804 - INFO - allennlp.training.callbacks.console_logger - batch_input/words/tokens/token_ids (Shape: 24 x 78)
-tensor([[     0,   1509,    424,  ...,      1,      1,      1],
-        [     0, 143770,    468,  ...,      1,      1,      1],
-        [     0,    804,      6,  ...,      1,      1,      1],
+2024-07-01 01:31:45,393 - INFO - allennlp.common.params - random_seed = 13370
+2024-07-01 01:31:45,393 - INFO - allennlp.common.params - numpy_seed = 1337
+2024-07-01 01:31:45,393 - INFO - allennlp.common.params - pytorch_seed = 133
+2024-07-01 01:31:45,394 - INFO - allennlp.common.checks - Pytorch version: 2.3.1+cu121
+2024-07-01 01:31:45,394 - INFO - allennlp.common.params - type = default
+2024-07-01 01:31:45,395 - INFO - allennlp.common.params - dataset_reader.type = compreno_ud_dataset_reader
+2024-07-01 01:31:45,395 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.type = pretrained_transformer_mismatched
+2024-07-01 01:31:45,395 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.token_min_padding_length = 0
+2024-07-01 01:31:45,395 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.model_name = xlm-roberta-base
+2024-07-01 01:31:45,395 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.namespace = tags
+2024-07-01 01:31:45,395 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.max_length = None
+2024-07-01 01:31:45,395 - INFO - allennlp.common.params - dataset_reader.token_indexers.tokens.tokenizer_kwargs = None
+2024-07-01 01:31:47,056 - INFO - allennlp.common.params - train_data_path = data/train.conllu
+2024-07-01 01:31:47,056 - INFO - allennlp.common.params - datasets_for_vocab_creation = None
+2024-07-01 01:31:47,056 - INFO - allennlp.common.params - validation_dataset_reader = None
+2024-07-01 01:31:47,056 - INFO - allennlp.common.params - validation_data_path = data/validation.conllu
+2024-07-01 01:31:47,056 - INFO - allennlp.common.params - test_data_path = None
+2024-07-01 01:31:47,057 - INFO - allennlp.common.params - evaluate_on_test = False
+2024-07-01 01:31:47,057 - INFO - allennlp.common.params - batch_weight_key = 
+2024-07-01 01:31:47,057 - INFO - allennlp.common.params - data_loader.type = multiprocess
+2024-07-01 01:31:47,057 - INFO - allennlp.common.params - data_loader.batch_size = 24
+2024-07-01 01:31:47,057 - INFO - allennlp.common.params - data_loader.drop_last = False
+2024-07-01 01:31:47,057 - INFO - allennlp.common.params - data_loader.shuffle = True
+2024-07-01 01:31:47,057 - INFO - allennlp.common.params - data_loader.batch_sampler = None
+2024-07-01 01:31:47,057 - INFO - allennlp.common.params - data_loader.batches_per_epoch = None
+2024-07-01 01:31:47,057 - INFO - allennlp.common.params - data_loader.num_workers = 0
+2024-07-01 01:31:47,057 - INFO - allennlp.common.params - data_loader.max_instances_in_memory = None
+2024-07-01 01:31:47,057 - INFO - allennlp.common.params - data_loader.start_method = fork
+2024-07-01 01:31:47,057 - INFO - allennlp.common.params - data_loader.cuda_device = None
+2024-07-01 01:31:47,057 - INFO - allennlp.common.params - data_loader.quiet = False
+2024-07-01 01:31:47,057 - INFO - allennlp.common.params - data_loader.collate_fn = <allennlp.data.data_loaders.data_collator.DefaultDataCollator object at 0x7c34facaf430>
+2024-07-01 01:31:47,057 - INFO - tqdm - loading instances: 0it [00:00, ?it/s]
+2024-07-01 01:31:57,129 - INFO - tqdm - loading instances: 25551it [00:10, 2339.78it/s]
+2024-07-01 01:31:57,696 - INFO - allennlp.common.params - validation_data_loader.type = multiprocess
+2024-07-01 01:31:57,696 - INFO - allennlp.common.params - validation_data_loader.batch_size = 24
+2024-07-01 01:31:57,696 - INFO - allennlp.common.params - validation_data_loader.drop_last = False
+2024-07-01 01:31:57,696 - INFO - allennlp.common.params - validation_data_loader.shuffle = False
+2024-07-01 01:31:57,696 - INFO - allennlp.common.params - validation_data_loader.batch_sampler = None
+2024-07-01 01:31:57,696 - INFO - allennlp.common.params - validation_data_loader.batches_per_epoch = None
+2024-07-01 01:31:57,696 - INFO - allennlp.common.params - validation_data_loader.num_workers = 0
+2024-07-01 01:31:57,696 - INFO - allennlp.common.params - validation_data_loader.max_instances_in_memory = None
+2024-07-01 01:31:57,696 - INFO - allennlp.common.params - validation_data_loader.start_method = fork
+2024-07-01 01:31:57,696 - INFO - allennlp.common.params - validation_data_loader.cuda_device = None
+2024-07-01 01:31:57,697 - INFO - allennlp.common.params - validation_data_loader.quiet = False
+2024-07-01 01:31:57,697 - INFO - allennlp.common.params - validation_data_loader.collate_fn = <allennlp.data.data_loaders.data_collator.DefaultDataCollator object at 0x7c34facaf430>
+2024-07-01 01:31:57,697 - INFO - tqdm - loading instances: 0it [00:00, ?it/s]
+2024-07-01 01:32:00,794 - INFO - allennlp.common.params - vocabulary.type = from_instances
+2024-07-01 01:32:00,794 - INFO - allennlp.common.params - vocabulary.max_vocab_size = None
+2024-07-01 01:32:00,794 - INFO - allennlp.common.params - vocabulary.non_padded_namespaces = ('*tags', '*labels')
+2024-07-01 01:32:00,794 - INFO - allennlp.common.params - vocabulary.pretrained_files = None
+2024-07-01 01:32:00,794 - INFO - allennlp.common.params - vocabulary.only_include_pretrained_words = False
+2024-07-01 01:32:00,794 - INFO - allennlp.common.params - vocabulary.min_pretrained_embeddings = None
+2024-07-01 01:32:00,794 - INFO - allennlp.common.params - vocabulary.padding_token = @@PADDING@@
+2024-07-01 01:32:00,794 - INFO - allennlp.common.params - vocabulary.oov_token = @@UNKNOWN@@
+2024-07-01 01:32:00,794 - INFO - allennlp.data.vocabulary - Fitting token dictionary from dataset.
+2024-07-01 01:32:00,794 - INFO - tqdm - building vocab: 0it [00:00, ?it/s]
+2024-07-01 01:32:01,418 - INFO - allennlp.common.params - model.type = morpho_syntax_semantic_parser
+2024-07-01 01:32:01,418 - INFO - allennlp.common.params - model.indexer.type = pretrained_transformer_mismatched
+2024-07-01 01:32:01,418 - INFO - allennlp.common.params - model.indexer.token_min_padding_length = 0
+2024-07-01 01:32:01,418 - INFO - allennlp.common.params - model.indexer.model_name = xlm-roberta-base
+2024-07-01 01:32:01,418 - INFO - allennlp.common.params - model.indexer.namespace = tags
+2024-07-01 01:32:01,418 - INFO - allennlp.common.params - model.indexer.max_length = None
+2024-07-01 01:32:01,418 - INFO - allennlp.common.params - model.indexer.tokenizer_kwargs = None
+2024-07-01 01:32:01,419 - INFO - allennlp.common.params - model.embedder.type = pretrained_transformer_mismatched
+2024-07-01 01:32:01,419 - INFO - allennlp.common.params - model.embedder.model_name = xlm-roberta-base
+2024-07-01 01:32:01,419 - INFO - allennlp.common.params - model.embedder.max_length = None
+2024-07-01 01:32:01,419 - INFO - allennlp.common.params - model.embedder.sub_module = None
+2024-07-01 01:32:01,419 - INFO - allennlp.common.params - model.embedder.train_parameters = True
+2024-07-01 01:32:01,419 - INFO - allennlp.common.params - model.embedder.last_layer_only = True
+2024-07-01 01:32:01,419 - INFO - allennlp.common.params - model.embedder.override_weights_file = None
+2024-07-01 01:32:01,419 - INFO - allennlp.common.params - model.embedder.override_weights_strip_prefix = None
+2024-07-01 01:32:01,419 - INFO - allennlp.common.params - model.embedder.load_weights = True
+2024-07-01 01:32:01,419 - INFO - allennlp.common.params - model.embedder.gradient_checkpointing = None
+2024-07-01 01:32:01,419 - INFO - allennlp.common.params - model.embedder.tokenizer_kwargs = None
+2024-07-01 01:32:01,419 - INFO - allennlp.common.params - model.embedder.transformer_kwargs = None
+2024-07-01 01:32:01,419 - INFO - allennlp.common.params - model.embedder.sub_token_mode = avg
+2024-07-01 01:32:01,947 - INFO - allennlp.common.params - model.lemma_rule_classifier.hid_dim = 512
+2024-07-01 01:32:01,948 - INFO - allennlp.common.params - model.lemma_rule_classifier.activation = relu
+2024-07-01 01:32:01,948 - INFO - allennlp.common.params - model.lemma_rule_classifier.dropout = 0.1
+2024-07-01 01:32:01,948 - INFO - allennlp.common.params - model.lemma_rule_classifier.dictionaries = []
+2024-07-01 01:32:01,948 - INFO - allennlp.common.params - model.lemma_rule_classifier.topk = None
+2024-07-01 01:32:01,950 - INFO - allennlp.common.params - model.pos_feats_classifier.hid_dim = 256
+2024-07-01 01:32:01,950 - INFO - allennlp.common.params - model.pos_feats_classifier.activation = relu
+2024-07-01 01:32:01,950 - INFO - allennlp.common.params - model.pos_feats_classifier.dropout = 0.1
+2024-07-01 01:32:01,952 - INFO - allennlp.common.params - model.depencency_classifier.hid_dim = 128
+2024-07-01 01:32:01,952 - INFO - allennlp.common.params - model.depencency_classifier.activation = relu
+2024-07-01 01:32:01,952 - INFO - allennlp.common.params - model.depencency_classifier.dropout = 0.1
+2024-07-01 01:32:01,974 - INFO - allennlp.common.params - model.misc_classifier.hid_dim = 128
+2024-07-01 01:32:01,974 - INFO - allennlp.common.params - model.misc_classifier.activation = relu
+2024-07-01 01:32:01,974 - INFO - allennlp.common.params - model.misc_classifier.dropout = 0.1
+2024-07-01 01:32:01,975 - INFO - allennlp.common.params - model.semslot_classifier.hid_dim = 1024
+2024-07-01 01:32:01,975 - INFO - allennlp.common.params - model.semslot_classifier.activation = relu
+2024-07-01 01:32:01,975 - INFO - allennlp.common.params - model.semslot_classifier.dropout = 0.1
+2024-07-01 01:32:01,979 - INFO - allennlp.common.params - model.semclass_classifier.hid_dim = 1024
+2024-07-01 01:32:01,979 - INFO - allennlp.common.params - model.semclass_classifier.activation = relu
+2024-07-01 01:32:01,979 - INFO - allennlp.common.params - model.semclass_classifier.dropout = 0.1
+2024-07-01 01:32:01,983 - INFO - allennlp.common.params - model.null_classifier.hid_dim = 512
+2024-07-01 01:32:01,983 - INFO - allennlp.common.params - model.null_classifier.activation = relu
+2024-07-01 01:32:01,983 - INFO - allennlp.common.params - model.null_classifier.dropout = 0.1
+2024-07-01 01:32:01,983 - INFO - allennlp.common.params - model.null_classifier.positive_class_weight = 1.0
+2024-07-01 01:32:16,071 - INFO - allennlp.common.params - trainer.type = gradient_descent
+2024-07-01 01:32:16,071 - INFO - allennlp.common.params - trainer.cuda_device = 0
+2024-07-01 01:32:16,071 - INFO - allennlp.common.params - trainer.distributed = False
+2024-07-01 01:32:16,071 - INFO - allennlp.common.params - trainer.world_size = 1
+2024-07-01 01:32:16,071 - INFO - allennlp.common.params - trainer.patience = None
+2024-07-01 01:32:16,071 - INFO - allennlp.common.params - trainer.validation_metric = +Avg
+2024-07-01 01:32:16,072 - INFO - allennlp.common.params - trainer.num_epochs = 10
+2024-07-01 01:32:16,072 - INFO - allennlp.common.params - trainer.grad_norm = False
+2024-07-01 01:32:16,072 - INFO - allennlp.common.params - trainer.grad_clipping = 5
+2024-07-01 01:32:16,072 - INFO - allennlp.common.params - trainer.num_gradient_accumulation_steps = 1
+2024-07-01 01:32:16,072 - INFO - allennlp.common.params - trainer.use_amp = False
+2024-07-01 01:32:16,072 - INFO - allennlp.common.params - trainer.no_grad = None
+2024-07-01 01:32:16,072 - INFO - allennlp.common.params - trainer.momentum_scheduler = None
+2024-07-01 01:32:16,072 - INFO - allennlp.common.params - trainer.moving_average = None
+2024-07-01 01:32:16,072 - INFO - allennlp.common.params - trainer.checkpointer = <allennlp.common.lazy.Lazy object at 0x7c34f4d78310>
+2024-07-01 01:32:16,072 - INFO - allennlp.common.params - trainer.enable_default_callbacks = True
+2024-07-01 01:32:16,072 - INFO - allennlp.common.params - trainer.run_confidence_checks = True
+2024-07-01 01:32:16,072 - INFO - allennlp.common.params - trainer.grad_scaling = True
+2024-07-01 01:32:16,285 - INFO - allennlp.common.params - trainer.optimizer.type = adam
+2024-07-01 01:32:16,286 - INFO - allennlp.common.params - trainer.optimizer.lr = 0.01
+2024-07-01 01:32:16,286 - INFO - allennlp.common.params - trainer.optimizer.betas = (0.9, 0.999)
+2024-07-01 01:32:16,286 - INFO - allennlp.common.params - trainer.optimizer.eps = 1e-08
+2024-07-01 01:32:16,286 - INFO - allennlp.common.params - trainer.optimizer.weight_decay = 0.0
+2024-07-01 01:32:16,286 - INFO - allennlp.common.params - trainer.optimizer.amsgrad = False
+2024-07-01 01:32:16,287 - INFO - allennlp.training.optimizers - Done constructing parameter groups.
+2024-07-01 01:32:16,287 - INFO - allennlp.training.optimizers - Group 0: ['embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.embeddings.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.9.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.2.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.embeddings.word_embeddings.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.10.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.5.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.4.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.9.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.2.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.embeddings.position_embeddings.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.3.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.2.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.embeddings.token_type_embeddings.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.11.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.6.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.4.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.10.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.11.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.4.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.5.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.7.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.output.dense.weight', 'embedder._matched_embedder.transformer_model.pooler.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.5.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.9.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.6.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.output.dense.bias', 'embedder._matched_embedder.transformer_model.embeddings.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.7.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.output.dense.weight', 'embedder._matched_embedder.transformer_model.pooler.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.key.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.1.output.LayerNorm.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.intermediate.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.5.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.8.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.4.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.5.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.value.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.11.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.0.output.dense.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.8.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.query.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.LayerNorm.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.9.intermediate.dense.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.key.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.value.bias', 'embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.query.weight', 'embedder._matched_embedder.transformer_model.encoder.layer.6.output.LayerNorm.weight'], {}
+2024-07-01 01:32:16,287 - INFO - allennlp.training.optimizers - Group 1: ['dependency_classifier.arc_head_mlp.1.bias', 'dependency_classifier.rel_attention_ud._bias', 'lemma_rule_classifier.classifier.1.weight', 'semslot_classifier.classifier.4.bias', 'semslot_classifier.classifier.1.weight', 'dependency_classifier.rel_attention_eud._weight_matrix', 'lemma_rule_classifier.classifier.1.bias', 'pos_feats_classifier.classifier.4.bias', 'dependency_classifier.arc_attention_eud._weight_matrix', 'null_classifier.classifier.4.bias', 'dependency_classifier.rel_dep_mlp.1.weight', 'dependency_classifier.rel_attention_ud._weight_matrix', 'dependency_classifier.rel_dep_mlp.1.bias', 'semclass_classifier.classifier.4.weight', 'misc_classifier.classifier.1.bias', 'semslot_classifier.classifier.1.bias', 'dependency_classifier.arc_attention_ud._bias', 'semslot_classifier.classifier.4.weight', 'semclass_classifier.classifier.4.bias', 'dependency_classifier.arc_attention_ud._weight_matrix', 'dependency_classifier.arc_attention_eud._bias', 'misc_classifier.classifier.4.weight', 'pos_feats_classifier.classifier.4.weight', 'pos_feats_classifier.classifier.1.bias', 'dependency_classifier.rel_head_mlp.1.bias', 'dependency_classifier.arc_dep_mlp.1.bias', 'null_classifier.classifier.4.weight', 'lemma_rule_classifier.classifier.4.weight', 'null_classifier.classifier.1.bias', 'pos_feats_classifier.classifier.1.weight', 'lemma_rule_classifier.classifier.4.bias', 'misc_classifier.classifier.1.weight', 'misc_classifier.classifier.4.bias', 'dependency_classifier.rel_head_mlp.1.weight', 'semclass_classifier.classifier.1.weight', 'semclass_classifier.classifier.1.bias', 'null_classifier.classifier.1.weight', 'dependency_classifier.arc_dep_mlp.1.weight', 'dependency_classifier.rel_attention_eud._bias', 'dependency_classifier.arc_head_mlp.1.weight'], {}
+2024-07-01 01:32:16,287 - INFO - allennlp.training.optimizers - Group 2: [], {}
+2024-07-01 01:32:16,287 - INFO - allennlp.training.optimizers - Number of trainable parameters: 287203493
+2024-07-01 01:32:16,288 - INFO - allennlp.common.util - The following parameters are Frozen (without gradient):
+2024-07-01 01:32:16,288 - INFO - allennlp.common.util - The following parameters are Tunable (with gradient):
+2024-07-01 01:32:16,288 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.embeddings.word_embeddings.weight
+2024-07-01 01:32:16,288 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.embeddings.position_embeddings.weight
+2024-07-01 01:32:16,288 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.embeddings.token_type_embeddings.weight
+2024-07-01 01:32:16,288 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.embeddings.LayerNorm.weight
+2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.embeddings.LayerNorm.bias
+2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.query.weight
+2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.query.bias
+2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.key.weight
+2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.key.bias
+2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.value.weight
+2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.self.value.bias
+2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.dense.weight
+2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.dense.bias
+2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.LayerNorm.weight
+2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.attention.output.LayerNorm.bias
+2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.intermediate.dense.weight
+2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.intermediate.dense.bias
+2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.output.dense.weight
+2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.output.dense.bias
+2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.output.LayerNorm.weight
+2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.0.output.LayerNorm.bias
+2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.query.weight
+2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.query.bias
+2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.key.weight
+2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.key.bias
+2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.value.weight
+2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.self.value.bias
+2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.dense.weight
+2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.dense.bias
+2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.LayerNorm.weight
+2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.attention.output.LayerNorm.bias
+2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.intermediate.dense.weight
+2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.intermediate.dense.bias
+2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.output.dense.weight
+2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.output.dense.bias
+2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.output.LayerNorm.weight
+2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.1.output.LayerNorm.bias
+2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.query.weight
+2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.query.bias
+2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.key.weight
+2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.key.bias
+2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.value.weight
+2024-07-01 01:32:16,289 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.self.value.bias
+2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.dense.weight
+2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.dense.bias
+2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.LayerNorm.weight
+2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.attention.output.LayerNorm.bias
+2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.intermediate.dense.weight
+2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.intermediate.dense.bias
+2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.output.dense.weight
+2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.output.dense.bias
+2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.output.LayerNorm.weight
+2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.2.output.LayerNorm.bias
+2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.query.weight
+2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.query.bias
+2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.key.weight
+2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.key.bias
+2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.value.weight
+2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.self.value.bias
+2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.dense.weight
+2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.dense.bias
+2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.LayerNorm.weight
+2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.attention.output.LayerNorm.bias
+2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.intermediate.dense.weight
+2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.intermediate.dense.bias
+2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.output.dense.weight
+2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.output.dense.bias
+2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.output.LayerNorm.weight
+2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.3.output.LayerNorm.bias
+2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.query.weight
+2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.query.bias
+2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.key.weight
+2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.key.bias
+2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.value.weight
+2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.self.value.bias
+2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.dense.weight
+2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.dense.bias
+2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.LayerNorm.weight
+2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.attention.output.LayerNorm.bias
+2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.intermediate.dense.weight
+2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.intermediate.dense.bias
+2024-07-01 01:32:16,290 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.output.dense.weight
+2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.output.dense.bias
+2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.output.LayerNorm.weight
+2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.4.output.LayerNorm.bias
+2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.query.weight
+2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.query.bias
+2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.key.weight
+2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.key.bias
+2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.value.weight
+2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.self.value.bias
+2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.dense.weight
+2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.dense.bias
+2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.LayerNorm.weight
+2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.attention.output.LayerNorm.bias
+2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.intermediate.dense.weight
+2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.intermediate.dense.bias
+2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.output.dense.weight
+2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.output.dense.bias
+2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.output.LayerNorm.weight
+2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.5.output.LayerNorm.bias
+2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.query.weight
+2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.query.bias
+2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.key.weight
+2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.key.bias
+2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.value.weight
+2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.self.value.bias
+2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.dense.weight
+2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.dense.bias
+2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.LayerNorm.weight
+2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.attention.output.LayerNorm.bias
+2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.intermediate.dense.weight
+2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.intermediate.dense.bias
+2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.output.dense.weight
+2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.output.dense.bias
+2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.output.LayerNorm.weight
+2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.6.output.LayerNorm.bias
+2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.query.weight
+2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.query.bias
+2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.key.weight
+2024-07-01 01:32:16,291 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.key.bias
+2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.value.weight
+2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.self.value.bias
+2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.dense.weight
+2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.dense.bias
+2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.LayerNorm.weight
+2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.attention.output.LayerNorm.bias
+2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.intermediate.dense.weight
+2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.intermediate.dense.bias
+2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.output.dense.weight
+2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.output.dense.bias
+2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.output.LayerNorm.weight
+2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.7.output.LayerNorm.bias
+2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.query.weight
+2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.query.bias
+2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.key.weight
+2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.key.bias
+2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.value.weight
+2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.self.value.bias
+2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.dense.weight
+2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.dense.bias
+2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.LayerNorm.weight
+2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.attention.output.LayerNorm.bias
+2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.intermediate.dense.weight
+2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.intermediate.dense.bias
+2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.output.dense.weight
+2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.output.dense.bias
+2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.output.LayerNorm.weight
+2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.8.output.LayerNorm.bias
+2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.query.weight
+2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.query.bias
+2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.key.weight
+2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.key.bias
+2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.value.weight
+2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.self.value.bias
+2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.dense.weight
+2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.dense.bias
+2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.LayerNorm.weight
+2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.attention.output.LayerNorm.bias
+2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.intermediate.dense.weight
+2024-07-01 01:32:16,292 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.intermediate.dense.bias
+2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.output.dense.weight
+2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.output.dense.bias
+2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.output.LayerNorm.weight
+2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.9.output.LayerNorm.bias
+2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.query.weight
+2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.query.bias
+2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.key.weight
+2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.key.bias
+2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.value.weight
+2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.self.value.bias
+2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.dense.weight
+2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.dense.bias
+2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.LayerNorm.weight
+2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.attention.output.LayerNorm.bias
+2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.intermediate.dense.weight
+2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.intermediate.dense.bias
+2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.output.dense.weight
+2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.output.dense.bias
+2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.output.LayerNorm.weight
+2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.10.output.LayerNorm.bias
+2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.query.weight
+2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.query.bias
+2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.key.weight
+2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.key.bias
+2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.value.weight
+2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.self.value.bias
+2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.dense.weight
+2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.dense.bias
+2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.LayerNorm.weight
+2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.attention.output.LayerNorm.bias
+2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.intermediate.dense.weight
+2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.intermediate.dense.bias
+2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.output.dense.weight
+2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.output.dense.bias
+2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.output.LayerNorm.weight
+2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.encoder.layer.11.output.LayerNorm.bias
+2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.pooler.dense.weight
+2024-07-01 01:32:16,293 - INFO - allennlp.common.util - embedder._matched_embedder.transformer_model.pooler.dense.bias
+2024-07-01 01:32:16,293 - INFO - allennlp.common.util - lemma_rule_classifier.classifier.1.weight
+2024-07-01 01:32:16,294 - INFO - allennlp.common.util - lemma_rule_classifier.classifier.1.bias
+2024-07-01 01:32:16,294 - INFO - allennlp.common.util - lemma_rule_classifier.classifier.4.weight
+2024-07-01 01:32:16,294 - INFO - allennlp.common.util - lemma_rule_classifier.classifier.4.bias
+2024-07-01 01:32:16,294 - INFO - allennlp.common.util - pos_feats_classifier.classifier.1.weight
+2024-07-01 01:32:16,294 - INFO - allennlp.common.util - pos_feats_classifier.classifier.1.bias
+2024-07-01 01:32:16,294 - INFO - allennlp.common.util - pos_feats_classifier.classifier.4.weight
+2024-07-01 01:32:16,294 - INFO - allennlp.common.util - pos_feats_classifier.classifier.4.bias
+2024-07-01 01:32:16,294 - INFO - allennlp.common.util - dependency_classifier.arc_dep_mlp.1.weight
+2024-07-01 01:32:16,294 - INFO - allennlp.common.util - dependency_classifier.arc_dep_mlp.1.bias
+2024-07-01 01:32:16,294 - INFO - allennlp.common.util - dependency_classifier.arc_head_mlp.1.weight
+2024-07-01 01:32:16,294 - INFO - allennlp.common.util - dependency_classifier.arc_head_mlp.1.bias
+2024-07-01 01:32:16,294 - INFO - allennlp.common.util - dependency_classifier.rel_dep_mlp.1.weight
+2024-07-01 01:32:16,294 - INFO - allennlp.common.util - dependency_classifier.rel_dep_mlp.1.bias
+2024-07-01 01:32:16,294 - INFO - allennlp.common.util - dependency_classifier.rel_head_mlp.1.weight
+2024-07-01 01:32:16,294 - INFO - allennlp.common.util - dependency_classifier.rel_head_mlp.1.bias
+2024-07-01 01:32:16,294 - INFO - allennlp.common.util - dependency_classifier.arc_attention_ud._weight_matrix
+2024-07-01 01:32:16,294 - INFO - allennlp.common.util - dependency_classifier.arc_attention_ud._bias
+2024-07-01 01:32:16,294 - INFO - allennlp.common.util - dependency_classifier.rel_attention_ud._weight_matrix
+2024-07-01 01:32:16,294 - INFO - allennlp.common.util - dependency_classifier.rel_attention_ud._bias
+2024-07-01 01:32:16,294 - INFO - allennlp.common.util - dependency_classifier.arc_attention_eud._weight_matrix
+2024-07-01 01:32:16,294 - INFO - allennlp.common.util - dependency_classifier.arc_attention_eud._bias
+2024-07-01 01:32:16,294 - INFO - allennlp.common.util - dependency_classifier.rel_attention_eud._weight_matrix
+2024-07-01 01:32:16,294 - INFO - allennlp.common.util - dependency_classifier.rel_attention_eud._bias
+2024-07-01 01:32:16,294 - INFO - allennlp.common.util - misc_classifier.classifier.1.weight
+2024-07-01 01:32:16,294 - INFO - allennlp.common.util - misc_classifier.classifier.1.bias
+2024-07-01 01:32:16,294 - INFO - allennlp.common.util - misc_classifier.classifier.4.weight
+2024-07-01 01:32:16,294 - INFO - allennlp.common.util - misc_classifier.classifier.4.bias
+2024-07-01 01:32:16,294 - INFO - allennlp.common.util - semslot_classifier.classifier.1.weight
+2024-07-01 01:32:16,294 - INFO - allennlp.common.util - semslot_classifier.classifier.1.bias
+2024-07-01 01:32:16,294 - INFO - allennlp.common.util - semslot_classifier.classifier.4.weight
+2024-07-01 01:32:16,294 - INFO - allennlp.common.util - semslot_classifier.classifier.4.bias
+2024-07-01 01:32:16,294 - INFO - allennlp.common.util - semclass_classifier.classifier.1.weight
+2024-07-01 01:32:16,294 - INFO - allennlp.common.util - semclass_classifier.classifier.1.bias
+2024-07-01 01:32:16,294 - INFO - allennlp.common.util - semclass_classifier.classifier.4.weight
+2024-07-01 01:32:16,294 - INFO - allennlp.common.util - semclass_classifier.classifier.4.bias
+2024-07-01 01:32:16,294 - INFO - allennlp.common.util - null_classifier.classifier.1.weight
+2024-07-01 01:32:16,294 - INFO - allennlp.common.util - null_classifier.classifier.1.bias
+2024-07-01 01:32:16,294 - INFO - allennlp.common.util - null_classifier.classifier.4.weight
+2024-07-01 01:32:16,295 - INFO - allennlp.common.util - null_classifier.classifier.4.bias
+2024-07-01 01:32:16,295 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.type = slanted_triangular
+2024-07-01 01:32:16,295 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.cut_frac = 0
+2024-07-01 01:32:16,295 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.ratio = 32
+2024-07-01 01:32:16,295 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.last_epoch = -1
+2024-07-01 01:32:16,295 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.gradual_unfreezing = True
+2024-07-01 01:32:16,295 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.discriminative_fine_tuning = True
+2024-07-01 01:32:16,295 - INFO - allennlp.common.params - trainer.learning_rate_scheduler.decay_factor = 0.001
+2024-07-01 01:32:16,295 - INFO - allennlp.training.learning_rate_schedulers.slanted_triangular - Gradual unfreezing. Training only the top 1 layers.
+2024-07-01 01:32:16,295 - INFO - allennlp.common.params - type = default
+2024-07-01 01:32:16,295 - INFO - allennlp.common.params - save_completed_epochs = True
+2024-07-01 01:32:16,295 - INFO - allennlp.common.params - save_every_num_seconds = None
+2024-07-01 01:32:16,295 - INFO - allennlp.common.params - save_every_num_batches = None
+2024-07-01 01:32:16,295 - INFO - allennlp.common.params - keep_most_recent_by_count = 2
+2024-07-01 01:32:16,295 - INFO - allennlp.common.params - keep_most_recent_by_age = None
+2024-07-01 01:32:16,295 - INFO - allennlp.common.params - trainer.callbacks.0.type = tensorboard
+2024-07-01 01:32:16,296 - INFO - allennlp.common.params - trainer.callbacks.0.summary_interval = 100
+2024-07-01 01:32:16,296 - INFO - allennlp.common.params - trainer.callbacks.0.distribution_interval = None
+2024-07-01 01:32:16,296 - INFO - allennlp.common.params - trainer.callbacks.0.batch_size_interval = None
+2024-07-01 01:32:16,296 - INFO - allennlp.common.params - trainer.callbacks.0.should_log_parameter_statistics = False
+2024-07-01 01:32:16,296 - INFO - allennlp.common.params - trainer.callbacks.0.should_log_learning_rate = True
+2024-07-01 01:32:16,297 - WARNING - allennlp.training.gradient_descent_trainer - You provided a validation dataset but patience was set to None, meaning that early stopping is disabled
+2024-07-01 01:32:16,298 - INFO - allennlp.training.gradient_descent_trainer - Beginning training.
+2024-07-01 01:32:16,298 - INFO - allennlp.training.gradient_descent_trainer - Epoch 0/9
+2024-07-01 01:32:16,299 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 4.5G
+2024-07-01 01:32:16,299 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 1.1G
+2024-07-01 01:32:16,300 - INFO - allennlp.training.gradient_descent_trainer - Training
+2024-07-01 01:32:16,300 - INFO - tqdm - 0%|          | 0/1147 [00:00<?, ?it/s]
+2024-07-01 01:32:16,704 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
+2024-07-01 01:32:16,704 - INFO - allennlp.training.callbacks.console_logger - batch_input/words/tokens/token_ids (Shape: 24 x 63)
+tensor([[     0, 129056,     59,  ...,      1,      1,      1],
+        [     0,    417,   2790,  ...,      1,      1,      1],
+        [     0, 168615,      6,  ...,      1,      1,      1],
         ...,
-        [     0,    417,  20755,  ...,      1,      1,      1],
-        [     0,  60430,     49,  ...,      1,      1,      1],
-        [     0,    468,  33261,  ...,      1,      1,      1]],
+        [     0, 134254,    468,  ...,      1,      1,      1],
+        [     0,  52175,   5124,  ...,      1,      1,      1],
+        [     0,  36933,   2574,  ...,      1,      1,      1]],
        device='cuda:0')
-2024-06-29 19:18:34,805 - INFO - allennlp.training.callbacks.console_logger - batch_input/words/tokens/mask (Shape: 24 x 48)
+2024-07-01 01:32:16,706 - INFO - allennlp.training.callbacks.console_logger - batch_input/words/tokens/mask (Shape: 24 x 37)
 tensor([[ True,  True,  True,  ..., False, False, False],
         [ True,  True,  True,  ..., False, False, False],
         [ True,  True,  True,  ..., False, False, False],
@@ -413,7 +413,7 @@ tensor([[ True,  True,  True,  ..., False, False, False],
         [ True,  True,  True,  ..., False, False, False],
         [ True,  True,  True,  ..., False, False, False],
         [ True,  True,  True,  ..., False, False, False]], device='cuda:0')
-2024-06-29 19:18:34,806 - INFO - allennlp.training.callbacks.console_logger - batch_input/words/tokens/type_ids (Shape: 24 x 78)
+2024-07-01 01:32:16,707 - INFO - allennlp.training.callbacks.console_logger - batch_input/words/tokens/type_ids (Shape: 24 x 63)
 tensor([[0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
@@ -421,7 +421,7 @@ tensor([[0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0]], device='cuda:0')
-2024-06-29 19:18:34,808 - INFO - allennlp.training.callbacks.console_logger - batch_input/words/tokens/wordpiece_mask (Shape: 24 x 78)
+2024-07-01 01:32:16,709 - INFO - allennlp.training.callbacks.console_logger - batch_input/words/tokens/wordpiece_mask (Shape: 24 x 63)
 tensor([[ True,  True,  True,  ..., False, False, False],
         [ True,  True,  True,  ..., False, False, False],
         [ True,  True,  True,  ..., False, False, False],
@@ -429,18 +429,18 @@ tensor([[ True,  True,  True,  ..., False, False, False],
         [ True,  True,  True,  ..., False, False, False],
         [ True,  True,  True,  ..., False, False, False],
         [ True,  True,  True,  ..., False, False, False]], device='cuda:0')
-2024-06-29 19:18:34,809 - INFO - allennlp.training.callbacks.console_logger - batch_input/words/tokens/offsets (Shape: 24 x 48 x 2)
-tensor([[[1, 1],
-         [2, 4],
-         [5, 5],
+2024-07-01 01:32:16,710 - INFO - allennlp.training.callbacks.console_logger - batch_input/words/tokens/offsets (Shape: 24 x 37 x 2)
+tensor([[[1, 2],
+         [3, 3],
+         [4, 5],
          ...,
          [0, 0],
          [0, 0],
          [0, 0]],
 
         [[1, 1],
-         [2, 4],
-         [5, 5],
+         [2, 2],
+         [3, 5],
          ...,
          [0, 0],
          [0, 0],
@@ -457,64 +457,64 @@ tensor([[[1, 1],
         ...,
 
         [[1, 1],
-         [2, 2],
-         [3, 3],
+         [2, 4],
+         [5, 5],
          ...,
          [0, 0],
          [0, 0],
          [0, 0]],
 
         [[1, 1],
-         [2, 2],
-         [3, 3],
+         [2, 5],
+         [6, 6],
          ...,
          [0, 0],
          [0, 0],
          [0, 0]],
 
-        [[1, 3],
-         [4, 4],
-         [5, 6],
+        [[1, 1],
+         [2, 2],
+         [3, 4],
          ...,
          [0, 0],
          [0, 0],
          [0, 0]]], device='cuda:0')
-2024-06-29 19:18:34,811 - INFO - allennlp.training.callbacks.console_logger - Field : "batch_input/sentences" : (Length 24 of type "<class 'list'>")
-2024-06-29 19:18:34,811 - INFO - allennlp.training.callbacks.console_logger - batch_input/lemma_rule_labels (Shape: 24 x 48)
-tensor([[ 0,  8,  0,  ...,  0,  0,  0],
-        [ 0, 13,  0,  ...,  0,  0,  0],
-        [ 0,  0,  0,  ...,  0,  0,  0],
+2024-07-01 01:32:16,713 - INFO - allennlp.training.callbacks.console_logger - Field : "batch_input/sentences" : (Length 24 of type "<class 'list'>")
+2024-07-01 01:32:16,713 - INFO - allennlp.training.callbacks.console_logger - batch_input/lemma_rule_labels (Shape: 24 x 37)
+tensor([[ 0,  0,  0,  ...,  0,  0,  0],
+        [ 0, 27, 11,  ...,  0,  0,  0],
+        [ 0,  0,  5,  ...,  0,  0,  0],
         ...,
-        [ 0,  9,  0,  ...,  0,  0,  0],
-        [ 0,  0, 33,  ...,  0,  0,  0],
-        [ 0,  0,  0,  ...,  0,  0,  0]], device='cuda:0')
-2024-06-29 19:18:34,813 - INFO - allennlp.training.callbacks.console_logger - batch_input/pos_feats_labels (Shape: 24 x 48)
-tensor([[143,   5,  16,  ...,   0,   0,   0],
-        [ 48,  24,   2,  ...,   0,   0,   0],
-        [  7,   0,   1,  ...,   0,   0,   0],
+        [ 5,  0,  0,  ...,  0,  0,  0],
+        [ 0,  0,  4,  ...,  0,  0,  0],
+        [ 0,  4,  0,  ...,  0,  0,  0]], device='cuda:0')
+2024-07-01 01:32:16,714 - INFO - allennlp.training.callbacks.console_logger - batch_input/pos_feats_labels (Shape: 24 x 37)
+tensor([[  8,  74,  91,  ...,   0,   0,   0],
+        [  1, 222, 144,  ...,   0,   0,   0],
+        [  2,   0, 152,  ...,   0,   0,   0],
         ...,
-        [  1,  31,   1,  ...,   0,   0,   0],
-        [ 43,   1, 167,  ...,   0,   0,   0],
-        [ 24,  14,   0,  ...,   0,   0,   0]], device='cuda:0')
-2024-06-29 19:18:34,814 - INFO - allennlp.training.callbacks.console_logger - batch_input/deprel_labels (Shape: 24 x 48 x 48)
-tensor([[[-1,  3, -1,  ..., -1, -1, -1],
-         [-1,  5, -1,  ..., -1, -1, -1],
-         [-1,  8, -1,  ..., -1, -1, -1],
+        [ 95,  38,   1,  ...,   0,   0,   0],
+        [  2,  22,  18,  ...,   0,   0,   0],
+        [ 48, 131,   0,  ...,   0,   0,   0]], device='cuda:0')
+2024-07-01 01:32:16,715 - INFO - allennlp.training.callbacks.console_logger - batch_input/deprel_labels (Shape: 24 x 37 x 37)
+tensor([[[-1, -1, -1,  ..., -1, -1, -1],
+         [ 1, -1, -1,  ..., -1, -1, -1],
+         [-1, 29, -1,  ..., -1, -1, -1],
          ...,
          [-1, -1, -1,  ..., -1, -1, -1],
          [-1, -1, -1,  ..., -1, -1, -1],
          [-1, -1, -1,  ..., -1, -1, -1]],
 
-        [[-1, -1, -1,  ..., -1, -1, -1],
-         [-1, -1, -1,  ..., -1, -1, -1],
-         [-1, -1, -1,  ..., -1, -1, -1],
+        [[-1,  2, -1,  ..., -1, -1, -1],
+         [-1, -1,  4,  ..., -1, -1, -1],
+         [-1, -1,  5,  ..., -1, -1, -1],
          ...,
          [-1, -1, -1,  ..., -1, -1, -1],
          [-1, -1, -1,  ..., -1, -1, -1],
          [-1, -1, -1,  ..., -1, -1, -1]],
 
         [[-1, -1, -1,  ..., -1, -1, -1],
-         [-1, -1, -1,  ..., -1, -1, -1],
+         [ 0, -1, -1,  ..., -1, -1, -1],
          [-1, -1, -1,  ..., -1, -1, -1],
          ...,
          [-1, -1, -1,  ..., -1, -1, -1],
@@ -523,7 +523,7 @@ tensor([[[-1,  3, -1,  ..., -1, -1, -1],
 
         ...,
 
-        [[-1,  2, -1,  ..., -1, -1, -1],
+        [[-1, -1, -1,  ..., -1, -1, -1],
          [-1, -1, -1,  ..., -1, -1, -1],
          [-1, -1, -1,  ..., -1, -1, -1],
          ...,
@@ -531,40 +531,40 @@ tensor([[[-1,  3, -1,  ..., -1, -1, -1],
          [-1, -1, -1,  ..., -1, -1, -1],
          [-1, -1, -1,  ..., -1, -1, -1]],
 
-        [[-1, -1, -1,  ..., -1, -1, -1],
-         [-1, -1, -1,  ..., -1, -1, -1],
+        [[-1, 11, -1,  ..., -1, -1, -1],
          [-1, -1, -1,  ..., -1, -1, -1],
+         [-1,  1, -1,  ..., -1, -1, -1],
          ...,
          [-1, -1, -1,  ..., -1, -1, -1],
          [-1, -1, -1,  ..., -1, -1, -1],
          [-1, -1, -1,  ..., -1, -1, -1]],
 
-        [[-1, -1, -1,  ..., -1, -1, -1],
-         [-1, -1, -1,  ..., -1, -1, -1],
+        [[-1,  2, -1,  ..., -1, -1, -1],
          [-1, -1, -1,  ..., -1, -1, -1],
+         [-1,  0, -1,  ..., -1, -1, -1],
          ...,
          [-1, -1, -1,  ..., -1, -1, -1],
          [-1, -1, -1,  ..., -1, -1, -1],
          [-1, -1, -1,  ..., -1, -1, -1]]], device='cuda:0')
-2024-06-29 19:18:34,820 - INFO - allennlp.training.callbacks.console_logger - batch_input/deps_labels (Shape: 24 x 48 x 48)
-tensor([[[-1,  3, -1,  ..., -1, -1, -1],
-         [-1,  2, -1,  ..., -1, -1, -1],
-         [-1,  7, -1,  ..., -1, -1, -1],
+2024-07-01 01:32:16,722 - INFO - allennlp.training.callbacks.console_logger - batch_input/deps_labels (Shape: 24 x 37 x 37)
+tensor([[[-1, -1, -1,  ..., -1, -1, -1],
+         [ 4, -1, -1,  ..., -1, -1, -1],
+         [-1, 44, -1,  ..., -1, -1, -1],
          ...,
          [-1, -1, -1,  ..., -1, -1, -1],
          [-1, -1, -1,  ..., -1, -1, -1],
          [-1, -1, -1,  ..., -1, -1, -1]],
 
-        [[-1, -1, -1,  ..., -1, -1, -1],
-         [-1, -1, -1,  ..., -1, -1, -1],
-         [-1, -1, -1,  ..., -1, -1, -1],
+        [[-1,  1, -1,  ..., -1, -1, -1],
+         [-1, -1, 22,  ..., -1, -1, -1],
+         [-1, -1,  2,  ..., -1, -1, -1],
          ...,
          [-1, -1, -1,  ..., -1, -1, -1],
          [-1, -1, -1,  ..., -1, -1, -1],
          [-1, -1, -1,  ..., -1, -1, -1]],
 
         [[-1, -1, -1,  ..., -1, -1, -1],
-         [-1, -1, -1,  ..., -1, -1, -1],
+         [ 0, -1, -1,  ..., -1, -1, -1],
          [-1, -1, -1,  ..., -1, -1, -1],
          ...,
          [-1, -1, -1,  ..., -1, -1, -1],
@@ -573,7 +573,7 @@ tensor([[[-1,  3, -1,  ..., -1, -1, -1],
 
         ...,
 
-        [[-1,  1, -1,  ..., -1, -1, -1],
+        [[-1, -1, -1,  ..., -1, -1, -1],
          [-1, -1, -1,  ..., -1, -1, -1],
          [-1, -1, -1,  ..., -1, -1, -1],
          ...,
@@ -581,76 +581,76 @@ tensor([[[-1,  3, -1,  ..., -1, -1, -1],
          [-1, -1, -1,  ..., -1, -1, -1],
          [-1, -1, -1,  ..., -1, -1, -1]],
 
-        [[-1, -1, -1,  ..., -1, -1, -1],
-         [-1, -1, -1,  ..., -1, -1, -1],
+        [[-1, 11, -1,  ..., -1, -1, -1],
          [-1, -1, -1,  ..., -1, -1, -1],
+         [-1,  4, -1,  ..., -1, -1, -1],
          ...,
          [-1, -1, -1,  ..., -1, -1, -1],
          [-1, -1, -1,  ..., -1, -1, -1],
          [-1, -1, -1,  ..., -1, -1, -1]],
 
-        [[ 2, -1, -1,  ..., -1, -1, -1],
-         [-1, -1, -1,  ..., -1, -1, -1],
+        [[-1,  1, -1,  ..., -1, -1, -1],
          [-1, -1, -1,  ..., -1, -1, -1],
+         [-1,  0, -1,  ..., -1, -1, -1],
          ...,
          [-1, -1, -1,  ..., -1, -1, -1],
          [-1, -1, -1,  ..., -1, -1, -1],
          [-1, -1, -1,  ..., -1, -1, -1]]], device='cuda:0')
-2024-06-29 19:18:34,826 - INFO - allennlp.training.callbacks.console_logger - batch_input/misc_labels (Shape: 24 x 48)
+2024-07-01 01:32:16,728 - INFO - allennlp.training.callbacks.console_logger - batch_input/misc_labels (Shape: 24 x 37)
 tensor([[0, 0, 0,  ..., 0, 0, 0],
-        [0, 2, 0,  ..., 0, 0, 0],
+        [0, 0, 0,  ..., 0, 0, 0],
         [1, 0, 0,  ..., 0, 0, 0],
         ...,
+        [0, 2, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
-        [0, 0, 0,  ..., 0, 0, 0],
-        [2, 1, 0,  ..., 0, 0, 0]], device='cuda:0')
-2024-06-29 19:18:34,827 - INFO - allennlp.training.callbacks.console_logger - batch_input/semslot_labels (Shape: 24 x 48)
-tensor([[12,  1,  3,  ...,  0,  0,  0],
-        [21,  1, 35,  ...,  0,  0,  0],
-        [ 0,  0,  0,  ...,  0,  0,  0],
+        [0, 0, 0,  ..., 0, 0, 0]], device='cuda:0')
+2024-07-01 01:32:16,729 - INFO - allennlp.training.callbacks.console_logger - batch_input/semslot_labels (Shape: 24 x 37)
+tensor([[ 2,  3,  0,  ...,  0,  0,  0],
+        [ 0, 25,  1,  ...,  0,  0,  0],
+        [19,  0,  7,  ...,  0,  0,  0],
         ...,
-        [ 0,  7,  0,  ...,  0,  0,  0],
-        [65,  0, 13,  ...,  0,  0,  0],
-        [ 1,  0,  0,  ...,  0,  0,  0]], device='cuda:0')
-2024-06-29 19:18:34,828 - INFO - allennlp.training.callbacks.console_logger - batch_input/semclass_labels (Shape: 24 x 48)
-tensor([[  2, 143,  35,  ...,   0,   0,   0],
-        [  2,   8,  11,  ...,   0,   0,   0],
-        [  7,   0,   1,  ...,   0,   0,   0],
+        [13, 21,  0,  ...,  0,  0,  0],
+        [23,  2, 12,  ...,  0,  0,  0],
+        [ 0, 23,  0,  ...,  0,  0,  0]], device='cuda:0')
+2024-07-01 01:32:16,731 - INFO - allennlp.training.callbacks.console_logger - batch_input/semclass_labels (Shape: 24 x 37)
+tensor([[189,  20,  20,  ...,   0,   0,   0],
+        [  1,  15,  19,  ...,   0,   0,   0],
+        [ 11,   0,   9,  ...,   0,   0,   0],
         ...,
-        [  1,  10,   1,  ...,   0,   0,   0],
-        [ 11,   1,   3,  ...,   0,   0,   0],
-        [115,   9,   0,  ...,   0,   0,   0]], device='cuda:0')
-2024-06-29 19:18:34,829 - INFO - allennlp.training.callbacks.console_logger - Field : "batch_input/metadata" : (Length 24 of type "<class 'conllu.models.Metadata'>")
-2024-06-29 19:18:44,411 - INFO - tqdm - NullAccuracy: 0.9800, NullF1: 0.0752, Lemma: 0.7294, PosFeats: 0.6473, UD-UAS: 0.4483, UD-LAS: 0.4317, EUD-UAS: 0.1246, EUD-LAS: 0.1064, Misc: 0.8976, SS: 0.6661, SC: 0.6283, Avg: 0.5200, batch_loss: 8.0180, loss: 11.2210 ||:  16%|#5        | 181/1147 [00:10<00:51, 18.59it/s]
-2024-06-29 19:18:54,439 - INFO - tqdm - NullAccuracy: 0.9836, NullF1: 0.1334, Lemma: 0.7855, PosFeats: 0.7376, UD-UAS: 0.5092, UD-LAS: 0.4962, EUD-UAS: 0.1905, EUD-LAS: 0.1709, Misc: 0.9121, SS: 0.7062, SC: 0.6972, Avg: 0.5783, batch_loss: 5.6264, loss: 8.8108 ||:  32%|###1      | 365/1147 [00:20<00:40, 19.33it/s]
-2024-06-29 19:19:04,496 - INFO - tqdm - NullAccuracy: 0.9852, NullF1: 0.1735, Lemma: 0.8120, PosFeats: 0.7771, UD-UAS: 0.5424, UD-LAS: 0.5309, EUD-UAS: 0.2229, EUD-LAS: 0.2035, Misc: 0.9195, SS: 0.7247, SC: 0.7284, Avg: 0.6068, batch_loss: 5.9012, loss: 7.7643 ||:  48%|####7     | 550/1147 [00:30<00:33, 18.03it/s]
-2024-06-29 19:19:14,508 - INFO - tqdm - NullAccuracy: 0.9860, NullF1: 0.2011, Lemma: 0.8282, PosFeats: 0.8002, UD-UAS: 0.5663, UD-LAS: 0.5556, EUD-UAS: 0.2468, EUD-LAS: 0.2276, Misc: 0.9244, SS: 0.7367, SC: 0.7480, Avg: 0.6260, batch_loss: 5.3222, loss: 7.1057 ||:  64%|######4   | 737/1147 [00:40<00:22, 18.18it/s]
-2024-06-29 19:19:24,619 - INFO - tqdm - NullAccuracy: 0.9867, NullF1: 0.2289, Lemma: 0.8405, PosFeats: 0.8165, UD-UAS: 0.5840, UD-LAS: 0.5741, EUD-UAS: 0.2648, EUD-LAS: 0.2455, Misc: 0.9275, SS: 0.7457, SC: 0.7636, Avg: 0.6403, batch_loss: 4.6858, loss: 6.6559 ||:  81%|########  | 925/1147 [00:50<00:12, 18.26it/s]
-2024-06-29 19:19:34,697 - INFO - tqdm - NullAccuracy: 0.9870, NullF1: 0.2439, Lemma: 0.8494, PosFeats: 0.8288, UD-UAS: 0.5970, UD-LAS: 0.5879, EUD-UAS: 0.2785, EUD-LAS: 0.2598, Misc: 0.9299, SS: 0.7529, SC: 0.7748, Avg: 0.6510, batch_loss: 4.3227, loss: 6.3229 ||:  97%|#########6| 1112/1147 [01:00<00:01, 18.52it/s]
-2024-06-29 19:19:36,422 - INFO - tqdm - NullAccuracy: 0.9870, NullF1: 0.2466, Lemma: 0.8507, PosFeats: 0.8306, UD-UAS: 0.5986, UD-LAS: 0.5898, EUD-UAS: 0.2808, EUD-LAS: 0.2622, Misc: 0.9302, SS: 0.7540, SC: 0.7765, Avg: 0.6526, batch_loss: 4.1319, loss: 6.2726 ||: 100%|#########9| 1143/1147 [01:02<00:00, 18.05it/s]
-2024-06-29 19:19:36,533 - INFO - tqdm - NullAccuracy: 0.9870, NullF1: 0.2468, Lemma: 0.8508, PosFeats: 0.8307, UD-UAS: 0.5987, UD-LAS: 0.5899, EUD-UAS: 0.2809, EUD-LAS: 0.2622, Misc: 0.9302, SS: 0.7541, SC: 0.7766, Avg: 0.6527, batch_loss: 4.6516, loss: 6.2695 ||: 100%|#########9| 1145/1147 [01:02<00:00, 18.05it/s]
-2024-06-29 19:19:36,624 - INFO - tqdm - NullAccuracy: 0.9870, NullF1: 0.2468, Lemma: 0.8509, PosFeats: 0.8308, UD-UAS: 0.5989, UD-LAS: 0.5900, EUD-UAS: 0.2810, EUD-LAS: 0.2624, Misc: 0.9302, SS: 0.7541, SC: 0.7767, Avg: 0.6528, batch_loss: 5.1362, loss: 6.2673 ||: 100%|##########| 1147/1147 [01:02<00:00, 18.44it/s]
-2024-06-29 19:19:36,624 - INFO - allennlp.training.gradient_descent_trainer - Validating
-2024-06-29 19:19:36,625 - INFO - tqdm - 0%|          | 0/287 [00:00<?, ?it/s]
-2024-06-29 19:19:36,718 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
-2024-06-29 19:19:36,718 - INFO - allennlp.training.callbacks.console_logger - batch_input/words/tokens/token_ids (Shape: 24 x 64)
-tensor([[     0,  89523,    983,  ...,      1,      1,      1],
-        [     0, 113083,    415,  ...,      1,      1,      1],
-        [     0,  87149,    227,  ...,      1,      1,      1],
+        [  3,  15,   1,  ...,   0,   0,   0],
+        [ 11,  37,   2,  ...,   0,   0,   0],
+        [  0,  11,   0,  ...,   0,   0,   0]], device='cuda:0')
+2024-07-01 01:32:16,732 - INFO - allennlp.training.callbacks.console_logger - Field : "batch_input/metadata" : (Length 24 of type "<class 'conllu.models.Metadata'>")
+2024-07-01 01:32:26,396 - INFO - tqdm - NullAccuracy: 0.9818, NullF1: 0.0769, Lemma: 0.7297, PosFeats: 0.6471, UD-UAS: 0.4263, UD-LAS: 0.4118, EUD-UAS: 0.1424, EUD-LAS: 0.1220, Misc: 0.8961, SS: 0.6627, SC: 0.6433, Avg: 0.5201, batch_loss: 7.6293, loss: 11.3000 ||:  16%|#5        | 179/1147 [00:10<00:53, 17.95it/s]
+2024-07-01 01:32:36,457 - INFO - tqdm - NullAccuracy: 0.9848, NullF1: 0.1274, Lemma: 0.7833, PosFeats: 0.7376, UD-UAS: 0.4983, UD-LAS: 0.4867, EUD-UAS: 0.2040, EUD-LAS: 0.1826, Misc: 0.9107, SS: 0.7025, SC: 0.7055, Avg: 0.5790, batch_loss: 5.4980, loss: 8.8952 ||:  32%|###1      | 362/1147 [00:20<00:45, 17.12it/s]
+2024-07-01 01:32:46,538 - INFO - tqdm - NullAccuracy: 0.9857, NullF1: 0.1675, Lemma: 0.8084, PosFeats: 0.7764, UD-UAS: 0.5376, UD-LAS: 0.5274, EUD-UAS: 0.2393, EUD-LAS: 0.2183, Misc: 0.9185, SS: 0.7222, SC: 0.7351, Avg: 0.6093, batch_loss: 4.8169, loss: 7.8232 ||:  48%|####7     | 547/1147 [00:30<00:30, 19.42it/s]
+2024-07-01 01:32:56,590 - INFO - tqdm - NullAccuracy: 0.9864, NullF1: 0.1917, Lemma: 0.8242, PosFeats: 0.7990, UD-UAS: 0.5641, UD-LAS: 0.5552, EUD-UAS: 0.2649, EUD-LAS: 0.2447, Misc: 0.9236, SS: 0.7351, SC: 0.7546, Avg: 0.6295, batch_loss: 5.2682, loss: 7.1602 ||:  64%|######3   | 730/1147 [00:40<00:23, 17.81it/s]
+2024-07-01 01:33:06,690 - INFO - tqdm - NullAccuracy: 0.9869, NullF1: 0.2129, Lemma: 0.8366, PosFeats: 0.8144, UD-UAS: 0.5816, UD-LAS: 0.5735, EUD-UAS: 0.2811, EUD-LAS: 0.2618, Misc: 0.9269, SS: 0.7444, SC: 0.7686, Avg: 0.6432, batch_loss: 5.2146, loss: 6.7194 ||:  79%|#######9  | 911/1147 [00:50<00:13, 18.11it/s]
+2024-07-01 01:33:16,753 - INFO - tqdm - NullAccuracy: 0.9873, NullF1: 0.2327, Lemma: 0.8466, PosFeats: 0.8269, UD-UAS: 0.5948, UD-LAS: 0.5873, EUD-UAS: 0.2932, EUD-LAS: 0.2747, Misc: 0.9296, SS: 0.7514, SC: 0.7794, Avg: 0.6538, batch_loss: 4.8879, loss: 6.3822 ||:  95%|#########4| 1089/1147 [01:00<00:03, 17.51it/s]
+2024-07-01 01:33:19,729 - INFO - tqdm - NullAccuracy: 0.9874, NullF1: 0.2370, Lemma: 0.8490, PosFeats: 0.8300, UD-UAS: 0.5989, UD-LAS: 0.5916, EUD-UAS: 0.2972, EUD-LAS: 0.2789, Misc: 0.9304, SS: 0.7534, SC: 0.7824, Avg: 0.6569, batch_loss: 4.7072, loss: 6.2913 ||: 100%|#########9| 1143/1147 [01:03<00:00, 17.37it/s]
+2024-07-01 01:33:19,839 - INFO - tqdm - NullAccuracy: 0.9874, NullF1: 0.2372, Lemma: 0.8490, PosFeats: 0.8301, UD-UAS: 0.5991, UD-LAS: 0.5917, EUD-UAS: 0.2973, EUD-LAS: 0.2790, Misc: 0.9304, SS: 0.7535, SC: 0.7825, Avg: 0.6570, batch_loss: 4.8284, loss: 6.2886 ||: 100%|#########9| 1145/1147 [01:03<00:00, 17.62it/s]
+2024-07-01 01:33:19,925 - INFO - tqdm - NullAccuracy: 0.9874, NullF1: 0.2375, Lemma: 0.8491, PosFeats: 0.8302, UD-UAS: 0.5993, UD-LAS: 0.5919, EUD-UAS: 0.2975, EUD-LAS: 0.2792, Misc: 0.9305, SS: 0.7536, SC: 0.7826, Avg: 0.6571, batch_loss: 3.4145, loss: 6.2846 ||: 100%|##########| 1147/1147 [01:03<00:00, 18.03it/s]
+2024-07-01 01:33:19,925 - INFO - allennlp.training.gradient_descent_trainer - Validating
+2024-07-01 01:33:19,926 - INFO - tqdm - 0%|          | 0/287 [00:00<?, ?it/s]
+2024-07-01 01:33:20,015 - INFO - allennlp.training.callbacks.console_logger - Batch inputs
+2024-07-01 01:33:20,015 - INFO - allennlp.training.callbacks.console_logger - batch_input/words/tokens/token_ids (Shape: 24 x 70)
+tensor([[     0, 233070,      6,  ...,      1,      1,      1],
+        [     0,   5187, 242861,  ...,      1,      1,      1],
+        [     0,  51208,   3281,  ...,      1,      1,      1],
         ...,
-        [     0, 242808,    468,  ...,      1,      1,      1],
-        [     0,   7762,    468,  ...,      1,      1,      1],
-        [     0,    589,  24010,  ...,      1,      1,      1]],
+        [     0,   1089,  41942,  ...,      1,      1,      1],
+        [     0,     44,  66916,  ...,      1,      1,      1],
+        [     0,   5188,    197,  ...,      1,      1,      1]],
        device='cuda:0')
-2024-06-29 19:19:36,719 - INFO - allennlp.training.callbacks.console_logger - batch_input/words/tokens/mask (Shape: 24 x 41)
+2024-07-01 01:33:20,016 - INFO - allennlp.training.callbacks.console_logger - batch_input/words/tokens/mask (Shape: 24 x 35)
 tensor([[ True,  True,  True,  ..., False, False, False],
         [ True,  True,  True,  ..., False, False, False],
         [ True,  True,  True,  ..., False, False, False],
         ...,
-        [ True,  True,  True,  ..., False, False, False],
+        [ True,  True,  True,  ...,  True,  True, False],
         [ True,  True,  True,  ..., False, False, False],
         [ True,  True,  True,  ..., False, False, False]], device='cuda:0')
-2024-06-29 19:19:36,721 - INFO - allennlp.training.callbacks.console_logger - batch_input/words/tokens/type_ids (Shape: 24 x 64)
+2024-07-01 01:33:20,017 - INFO - allennlp.training.callbacks.console_logger - batch_input/words/tokens/type_ids (Shape: 24 x 70)
 tensor([[0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
@@ -658,7 +658,7 @@ tensor([[0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0]], device='cuda:0')
-2024-06-29 19:19:36,722 - INFO - allennlp.training.callbacks.console_logger - batch_input/words/tokens/wordpiece_mask (Shape: 24 x 64)
+2024-07-01 01:33:20,019 - INFO - allennlp.training.callbacks.console_logger - batch_input/words/tokens/wordpiece_mask (Shape: 24 x 70)
 tensor([[ True,  True,  True,  ..., False, False, False],
         [ True,  True,  True,  ..., False, False, False],
         [ True,  True,  True,  ..., False, False, False],
@@ -666,93 +666,93 @@ tensor([[ True,  True,  True,  ..., False, False, False],
         [ True,  True,  True,  ..., False, False, False],
         [ True,  True,  True,  ..., False, False, False],
         [ True,  True,  True,  ..., False, False, False]], device='cuda:0')
-2024-06-29 19:19:36,723 - INFO - allennlp.training.callbacks.console_logger - batch_input/words/tokens/offsets (Shape: 24 x 41 x 2)
-tensor([[[1, 2],
-         [3, 3],
-         [4, 4],
+2024-07-01 01:33:20,020 - INFO - allennlp.training.callbacks.console_logger - batch_input/words/tokens/offsets (Shape: 24 x 35 x 2)
+tensor([[[ 1,  1],
+         [ 2,  3],
+         [ 4,  6],
          ...,
-         [0, 0],
-         [0, 0],
-         [0, 0]],
+         [ 0,  0],
+         [ 0,  0],
+         [ 0,  0]],
 
-        [[1, 2],
-         [3, 3],
-         [4, 4],
+        [[ 1,  1],
+         [ 2,  2],
+         [ 3,  3],
          ...,
-         [0, 0],
-         [0, 0],
-         [0, 0]],
+         [ 0,  0],
+         [ 0,  0],
+         [ 0,  0]],
 
-        [[1, 2],
-         [3, 5],
-         [6, 9],
+        [[ 1,  2],
+         [ 3,  5],
+         [ 6,  6],
          ...,
-         [0, 0],
-         [0, 0],
-         [0, 0]],
+         [ 0,  0],
+         [ 0,  0],
+         [ 0,  0]],
 
         ...,
 
-        [[1, 1],
-         [2, 4],
-         [5, 7],
+        [[ 1,  3],
+         [ 4,  4],
+         [ 5,  5],
          ...,
-         [0, 0],
-         [0, 0],
-         [0, 0]],
+         [48, 48],
+         [49, 50],
+         [ 0,  0]],
 
-        [[1, 1],
-         [2, 4],
-         [5, 8],
+        [[ 1,  1],
+         [ 2,  2],
+         [ 3,  5],
          ...,
-         [0, 0],
-         [0, 0],
-         [0, 0]],
+         [ 0,  0],
+         [ 0,  0],
+         [ 0,  0]],
 
-        [[1, 1],
-         [2, 2],
-         [3, 4],
+        [[ 1,  3],
+         [ 4,  5],
+         [ 6,  7],
          ...,
-         [0, 0],
-         [0, 0],
-         [0, 0]]], device='cuda:0')
-2024-06-29 19:19:36,726 - INFO - allennlp.training.callbacks.console_logger - Field : "batch_input/sentences" : (Length 24 of type "<class 'list'>")
-2024-06-29 19:19:36,726 - INFO - allennlp.training.callbacks.console_logger - batch_input/lemma_rule_labels (Shape: 24 x 41)
-tensor([[ 0,  0, 64,  ...,  0,  0,  0],
-        [ 0,  0,  0,  ...,  0,  0,  0],
-        [ 3, 16,  4,  ...,  0,  0,  0],
+         [ 0,  0],
+         [ 0,  0],
+         [ 0,  0]]], device='cuda:0')
+2024-07-01 01:33:20,023 - INFO - allennlp.training.callbacks.console_logger - Field : "batch_input/sentences" : (Length 24 of type "<class 'list'>")
+2024-07-01 01:33:20,023 - INFO - allennlp.training.callbacks.console_logger - batch_input/lemma_rule_labels (Shape: 24 x 35)
+tensor([[  8,   0,   0,  ...,   0,   0,   0],
+        [  0,   8,   0,  ...,   0,   0,   0],
+        [  0,  13,  62,  ...,   0,   0,   0],
         ...,
-        [ 0, 13,  0,  ...,  0,  0,  0],
-        [32,  0,  2,  ...,  0,  0,  0],
-        [ 0, 17,  4,  ...,  0,  0,  0]], device='cuda:0')
-2024-06-29 19:19:36,727 - INFO - allennlp.training.callbacks.console_logger - batch_input/pos_feats_labels (Shape: 24 x 41)
-tensor([[198,  12,  80,  ...,   0,   0,   0],
-        [  8,   0,  51,  ...,   0,   0,   0],
-        [ 52, 154,  18,  ...,   0,   0,   0],
+        [  0,  11,   0,  ...,   0,   0,   0],
+        [  0,   0,  13,  ...,   0,   0,   0],
+        [124,  41,  36,  ...,   0,   0,   0]], device='cuda:0')
+2024-07-01 01:33:20,024 - INFO - allennlp.training.callbacks.console_logger - batch_input/pos_feats_labels (Shape: 24 x 35)
+tensor([[158,   0,  34,  ...,   0,   0,   0],
+        [ 14,  41,  12,  ...,   0,   0,   0],
+        [197,  24, 116,  ...,   0,   0,   0],
         ...,
-        [  2,  24,  30,  ...,   0,   0,   0],
-        [152,  38,  41,  ...,   0,   0,   0],
-        [  1, 323,  53,  ...,   0,   0,   0]], device='cuda:0')
-2024-06-29 19:19:36,729 - INFO - allennlp.training.callbacks.console_logger - batch_input/deprel_labels (Shape: 24 x 41 x 41)
-tensor([[[-1,  6, -1,  ..., -1, -1, -1],
-         [-1, -1,  3,  ..., -1, -1, -1],
-         [-1, -1,  5,  ..., -1, -1, -1],
+        [  8,  33,   2,  ...,  22,   0,   0],
+        [  0,  12,  24,  ...,   0,   0,   0],
+        [721,  94,  57,  ...,   0,   0,   0]], device='cuda:0')
+2024-07-01 01:33:20,025 - INFO - allennlp.training.callbacks.console_logger - batch_input/deprel_labels (Shape: 24 x 35 x 35)
+tensor([[[ 5, -1, -1,  ..., -1, -1, -1],
+         [-1, -1, -1,  ..., -1, -1, -1],
+         [-1, -1, -1,  ..., -1, -1, -1],
          ...,
          [-1, -1, -1,  ..., -1, -1, -1],
          [-1, -1, -1,  ..., -1, -1, -1],
          [-1, -1, -1,  ..., -1, -1, -1]],
 
-        [[ 5, -1, -1,  ..., -1, -1, -1],
-         [-1, -1, -1,  ..., -1, -1, -1],
+        [[-1, 13, -1,  ..., -1, -1, -1],
          [-1, -1, -1,  ..., -1, -1, -1],
+         [-1,  3, -1,  ..., -1, -1, -1],
          ...,
          [-1, -1, -1,  ..., -1, -1, -1],
          [-1, -1, -1,  ..., -1, -1, -1],
          [-1, -1, -1,  ..., -1, -1, -1]],
 
-        [[-1, -1, -1,  ..., -1, -1, -1],
-         [-1, -1, 17,  ..., -1, -1, -1],
-         [ 1, -1, -1,  ..., -1, -1, -1],
+        [[-1, -1,  3,  ..., -1, -1, -1],
+         [-1, -1, -1,  ..., -1, -1, -1],
+         [-1, -1,  5,  ..., -1, -1, -1],
          ...,
          [-1, -1, -1,  ..., -1, -1, -1],
          [-1, -1, -1,  ..., -1, -1, -1],
@@ -760,49 +760,49 @@ tensor([[[-1,  6, -1,  ..., -1, -1, -1],
 
         ...,
 
-        [[ 5, -1, -1,  ..., -1, -1, -1],
-         [-1, -1, -1,  ..., -1, -1, -1],
-         [ 4, -1, -1,  ..., -1, -1, -1],
+        [[-1,  3, -1,  ..., -1, -1, -1],
+         [-1,  5, -1,  ..., -1, -1, -1],
+         [-1,  7, -1,  ..., -1, -1, -1],
          ...,
          [-1, -1, -1,  ..., -1, -1, -1],
-         [-1, -1, -1,  ..., -1, -1, -1],
+         [-1,  0, -1,  ..., -1, -1, -1],
          [-1, -1, -1,  ..., -1, -1, -1]],
 
-        [[-1, -1,  3,  ..., -1, -1, -1],
+        [[-1, -1, -1,  ..., -1, -1, -1],
+         [-1, -1, -1,  ..., -1, -1, -1],
          [-1, -1, -1,  ..., -1, -1, -1],
-         [-1, -1,  5,  ..., -1, -1, -1],
          ...,
          [-1, -1, -1,  ..., -1, -1, -1],
          [-1, -1, -1,  ..., -1, -1, -1],
          [-1, -1, -1,  ..., -1, -1, -1]],
 
-        [[-1, -1,  2,  ..., -1, -1, -1],
-         [-1, -1,  6,  ..., -1, -1, -1],
-         [-1, -1, -1,  ..., -1, -1, -1],
+        [[-1, 10, -1,  ..., -1, -1, -1],
+         [-1, -1, 21,  ..., -1, -1, -1],
+         [-1, -1,  5,  ..., -1, -1, -1],
          ...,
          [-1, -1, -1,  ..., -1, -1, -1],
          [-1, -1, -1,  ..., -1, -1, -1],
          [-1, -1, -1,  ..., -1, -1, -1]]], device='cuda:0')
-2024-06-29 19:19:36,735 - INFO - allennlp.training.callbacks.console_logger - batch_input/deps_labels (Shape: 24 x 41 x 41)
-tensor([[[-1,  5, -1,  ..., -1, -1, -1],
-         [-1, -1,  3,  ..., -1, -1, -1],
-         [-1, -1,  2,  ..., -1, -1, -1],
+2024-07-01 01:33:20,032 - INFO - allennlp.training.callbacks.console_logger - batch_input/deps_labels (Shape: 24 x 35 x 35)
+tensor([[[ 2, -1, -1,  ..., -1, -1, -1],
+         [-1, -1, -1,  ..., -1, -1, -1],
+         [-1, -1, -1,  ..., -1, -1, -1],
          ...,
          [-1, -1, -1,  ..., -1, -1, -1],
          [-1, -1, -1,  ..., -1, -1, -1],
          [-1, -1, -1,  ..., -1, -1, -1]],
 
-        [[ 2, -1, -1,  ..., -1, -1, -1],
-         [-1, -1, -1,  ..., -1, -1, -1],
+        [[-1, 14, -1,  ..., -1, -1, -1],
          [-1, -1, -1,  ..., -1, -1, -1],
+         [-1,  3, -1,  ..., -1, -1, -1],
          ...,
          [-1, -1, -1,  ..., -1, -1, -1],
          [-1, -1, -1,  ..., -1, -1, -1],
          [-1, -1, -1,  ..., -1, -1, -1]],
 
-        [[-1, -1, -1,  ..., -1, -1, -1],
-         [-1, -1, 17,  ..., -1, -1, -1],
-         [ 4, -1, -1,  ..., -1, -1, -1],
+        [[-1, -1,  3,  ..., -1, -1, -1],
+         [-1, -1, 21,  ..., -1, -1, -1],
+         [-1, -1,  2,  ..., -1, -1, -1],
          ...,
          [-1, -1, -1,  ..., -1, -1, -1],
          [-1, -1, -1,  ..., -1, -1, -1],
@@ -810,560 +810,555 @@ tensor([[[-1,  5, -1,  ..., -1, -1, -1],
 
         ...,
 
-        [[ 2, -1, -1,  ..., -1, -1, -1],
-         [21, -1, -1,  ..., -1, -1, -1],
-         [25, -1, -1,  ..., -1, -1, -1],
+        [[-1,  3, -1,  ..., -1, -1, -1],
+         [-1,  2, -1,  ..., -1, -1, -1],
+         [-1,  6, -1,  ..., -1, -1, -1],
          ...,
          [-1, -1, -1,  ..., -1, -1, -1],
-         [-1, -1, -1,  ..., -1, -1, -1],
+         [-1,  0, -1,  ..., -1, -1, -1],
          [-1, -1, -1,  ..., -1, -1, -1]],
 
-        [[-1,  9, -1,  ..., -1, -1, -1],
-         [-1, -1,  3,  ..., -1, -1, -1],
-         [-1, -1,  2,  ..., -1, -1, -1],
+        [[-1, -1, -1,  ..., -1, -1, -1],
+         [-1, -1, -1,  ..., -1, -1, -1],
+         [-1, -1, -1,  ..., -1, -1, -1],
          ...,
          [-1, -1, -1,  ..., -1, -1, -1],
          [-1, -1, -1,  ..., -1, -1, -1],
          [-1, -1, -1,  ..., -1, -1, -1]],
 
-        [[-1, -1,  1,  ..., -1, -1, -1],
-         [-1, -1,  5,  ..., -1, -1, -1],
-         [-1, -1, -1,  ..., -1, -1, -1],
+        [[-1,  9, -1,  ..., -1, -1, -1],
+         [-1, -1, 24,  ..., -1, -1, -1],
+         [-1, -1,  2,  ..., -1, -1, -1],
          ...,
          [-1, -1, -1,  ..., -1, -1, -1],
          [-1, -1, -1,  ..., -1, -1, -1],
          [-1, -1, -1,  ..., -1, -1, -1]]], device='cuda:0')
-2024-06-29 19:19:36,742 - INFO - allennlp.training.callbacks.console_logger - batch_input/misc_labels (Shape: 24 x 41)
-tensor([[0, 0, 0,  ..., 0, 0, 0],
-        [1, 0, 0,  ..., 0, 0, 0],
-        [0, 0, 3,  ..., 0, 0, 0],
-        ...,
-        [0, 2, 0,  ..., 0, 0, 0],
+2024-07-01 01:33:20,038 - INFO - allennlp.training.callbacks.console_logger - batch_input/misc_labels (Shape: 24 x 35)
+tensor([[1, 0, 0,  ..., 0, 0, 0],
+        [0, 0, 0,  ..., 0, 0, 0],
         [0, 2, 0,  ..., 0, 0, 0],
+        ...,
+        [0, 0, 0,  ..., 1, 0, 0],
+        [1, 0, 2,  ..., 0, 0, 0],
         [0, 0, 0,  ..., 0, 0, 0]], device='cuda:0')
-2024-06-29 19:19:36,743 - INFO - allennlp.training.callbacks.console_logger - batch_input/semslot_labels (Shape: 24 x 41)
-tensor([[14,  2,  1,  ...,  0,  0,  0],
-        [18,  0,  3,  ...,  0,  0,  0],
-        [ 3, 11, 17,  ...,  0,  0,  0],
+2024-07-01 01:33:20,039 - INFO - allennlp.training.callbacks.console_logger - batch_input/semslot_labels (Shape: 24 x 35)
+tensor([[ 1,  0, 10,  ...,  0,  0,  0],
+        [ 0, 19, 20,  ...,  0,  0,  0],
+        [ 3,  1, 15,  ...,  0,  0,  0],
         ...,
-        [15,  1, 32,  ...,  0,  0,  0],
-        [13,  2,  1,  ...,  0,  0,  0],
-        [ 0,  7, 32,  ...,  0,  0,  0]], device='cuda:0')
-2024-06-29 19:19:36,744 - INFO - allennlp.training.callbacks.console_logger - batch_input/semclass_labels (Shape: 24 x 41)
-tensor([[  3,  23,  30,  ...,   0,   0,   0],
-        [ 36,   0,   5,  ...,   0,   0,   0],
-        [ 91,  28,   2,  ...,   0,   0,   0],
+        [ 2,  1, 23,  ..., 82,  0,  0],
+        [ 0,  3,  1,  ...,  0,  0,  0],
+        [13, 31,  1,  ...,  0,  0,  0]], device='cuda:0')
+2024-07-01 01:33:20,041 - INFO - allennlp.training.callbacks.console_logger - batch_input/semclass_labels (Shape: 24 x 35)
+tensor([[ 28,   0, 373,  ...,   0,   0,   0],
+        [ 10,   6,  60,  ...,   0,   0,   0],
+        [ 24,   8,  12,  ...,   0,   0,   0],
         ...,
-        [ 12,   8,  33,  ...,   0,   0,   0],
-        [  3,  16,  30,  ...,   0,   0,   0],
-        [  1,  10, 579,  ...,   0,   0,   0]], device='cuda:0')
-2024-06-29 19:19:36,746 - INFO - allennlp.training.callbacks.console_logger - Field : "batch_input/metadata" : (Length 24 of type "<class 'conllu.models.Metadata'>")
-2024-06-29 19:19:46,640 - INFO - tqdm - NullAccuracy: 0.9895, NullF1: 0.3769, Lemma: 0.9290, PosFeats: 0.9248, UD-UAS: 0.7698, UD-LAS: 0.7705, EUD-UAS: 0.3969, EUD-LAS: 0.3773, Misc: 0.9597, SS: 0.8138, SC: 0.8693, Avg: 0.7568, batch_loss: 3.2383, loss: 3.4567 ||:  47%|####7     | 135/287 [00:10<00:10, 14.32it/s]
-2024-06-29 19:19:56,700 - INFO - tqdm - NullAccuracy: 0.9899, NullF1: 0.3635, Lemma: 0.9323, PosFeats: 0.9290, UD-UAS: 0.7774, UD-LAS: 0.7774, EUD-UAS: 0.4038, EUD-LAS: 0.3829, Misc: 0.9609, SS: 0.8188, SC: 0.8762, Avg: 0.7621, batch_loss: 2.9325, loss: 3.3026 ||:  98%|#########7| 281/287 [00:20<00:00, 13.32it/s]
-2024-06-29 19:19:57,107 - INFO - tqdm - NullAccuracy: 0.9899, NullF1: 0.3634, Lemma: 0.9321, PosFeats: 0.9290, UD-UAS: 0.7776, UD-LAS: 0.7776, EUD-UAS: 0.4039, EUD-LAS: 0.3830, Misc: 0.9610, SS: 0.8186, SC: 0.8761, Avg: 0.7621, batch_loss: 4.3368, loss: 3.3027 ||: 100%|##########| 287/287 [00:20<00:00, 14.49it/s]
-2024-06-29 19:19:57,107 - INFO - tqdm - NullAccuracy: 0.9899, NullF1: 0.3634, Lemma: 0.9321, PosFeats: 0.9290, UD-UAS: 0.7776, UD-LAS: 0.7776, EUD-UAS: 0.4039, EUD-LAS: 0.3830, Misc: 0.9610, SS: 0.8186, SC: 0.8761, Avg: 0.7621, batch_loss: 4.3368, loss: 3.3027 ||: 100%|##########| 287/287 [00:20<00:00, 14.01it/s]
-2024-06-29 19:19:57,107 - INFO - allennlp.training.learning_rate_schedulers.slanted_triangular - Gradual unfreezing finished. Training all layers.
-2024-06-29 19:19:57,110 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
-2024-06-29 19:19:57,110 - INFO - allennlp.training.callbacks.console_logger - Avg                |     0.653  |     0.762
-2024-06-29 19:19:57,111 - INFO - allennlp.training.callbacks.console_logger - EUD-LAS            |     0.262  |     0.383
-2024-06-29 19:19:57,111 - INFO - allennlp.training.callbacks.console_logger - EUD-UAS            |     0.281  |     0.404
-2024-06-29 19:19:57,111 - INFO - allennlp.training.callbacks.console_logger - Lemma              |     0.851  |     0.932
-2024-06-29 19:19:57,111 - INFO - allennlp.training.callbacks.console_logger - Misc               |     0.930  |     0.961
-2024-06-29 19:19:57,111 - INFO - allennlp.training.callbacks.console_logger - NullAccuracy       |     0.987  |     0.990
-2024-06-29 19:19:57,111 - INFO - allennlp.training.callbacks.console_logger - NullF1             |     0.247  |     0.363
-2024-06-29 19:19:57,111 - INFO - allennlp.training.callbacks.console_logger - PosFeats           |     0.831  |     0.929
-2024-06-29 19:19:57,111 - INFO - allennlp.training.callbacks.console_logger - SC                 |     0.777  |     0.876
-2024-06-29 19:19:57,111 - INFO - allennlp.training.callbacks.console_logger - SS                 |     0.754  |     0.819
-2024-06-29 19:19:57,111 - INFO - allennlp.training.callbacks.console_logger - UD-LAS             |     0.590  |     0.778
-2024-06-29 19:19:57,111 - INFO - allennlp.training.callbacks.console_logger - UD-UAS             |     0.599  |     0.778
-2024-06-29 19:19:57,111 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1099.266  |       N/A
-2024-06-29 19:19:57,111 - INFO - allennlp.training.callbacks.console_logger - loss               |     6.267  |     3.303
-2024-06-29 19:19:57,111 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  4585.449  |       N/A
-2024-06-29 19:19:58,764 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:01:24.359503
-2024-06-29 19:19:58,765 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:12:24
-2024-06-29 19:19:58,765 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
-2024-06-29 19:19:58,765 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 4.9G
-2024-06-29 19:19:58,765 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 3.2G
-2024-06-29 19:19:58,766 - INFO - allennlp.training.gradient_descent_trainer - Training
-2024-06-29 19:19:58,766 - INFO - tqdm - 0%|          | 0/1147 [00:00<?, ?it/s]
-2024-06-29 19:20:08,861 - INFO - tqdm - NullAccuracy: 0.9889, NullF1: 0.3885, Lemma: 0.8957, PosFeats: 0.8985, UD-UAS: 0.6896, UD-LAS: 0.6841, EUD-UAS: 0.4012, EUD-LAS: 0.3885, Misc: 0.9578, SS: 0.7974, SC: 0.8291, Avg: 0.7269, batch_loss: 4.5622, loss: 4.3827 ||:   7%|6         | 77/1147 [00:10<02:11,  8.16it/s]
-2024-06-29 19:20:18,867 - INFO - tqdm - NullAccuracy: 0.9895, NullF1: 0.4105, Lemma: 0.9008, PosFeats: 0.9034, UD-UAS: 0.6965, UD-LAS: 0.6929, EUD-UAS: 0.4240, EUD-LAS: 0.4118, Misc: 0.9668, SS: 0.7989, SC: 0.8275, Avg: 0.7358, batch_loss: 3.9000, loss: 4.2529 ||:  14%|#3        | 155/1147 [00:20<02:11,  7.55it/s]
-2024-06-29 19:20:28,939 - INFO - tqdm - NullAccuracy: 0.9897, NullF1: 0.4188, Lemma: 0.9033, PosFeats: 0.9067, UD-UAS: 0.7086, UD-LAS: 0.7053, EUD-UAS: 0.4414, EUD-LAS: 0.4292, Misc: 0.9722, SS: 0.8036, SC: 0.8286, Avg: 0.7443, batch_loss: 3.6851, loss: 4.1387 ||:  21%|##        | 236/1147 [00:30<01:53,  8.04it/s]
-2024-06-29 19:20:39,011 - INFO - tqdm - NullAccuracy: 0.9898, NullF1: 0.4159, Lemma: 0.9065, PosFeats: 0.9094, UD-UAS: 0.7180, UD-LAS: 0.7149, EUD-UAS: 0.4553, EUD-LAS: 0.4431, Misc: 0.9754, SS: 0.8064, SC: 0.8317, Avg: 0.7512, batch_loss: 3.3486, loss: 4.0117 ||:  28%|##7       | 318/1147 [00:40<01:40,  8.26it/s]
-2024-06-29 19:20:49,119 - INFO - tqdm - NullAccuracy: 0.9897, NullF1: 0.4323, Lemma: 0.9091, PosFeats: 0.9113, UD-UAS: 0.7248, UD-LAS: 0.7217, EUD-UAS: 0.4648, EUD-LAS: 0.4528, Misc: 0.9775, SS: 0.8100, SC: 0.8337, Avg: 0.7562, batch_loss: 3.8119, loss: 3.9159 ||:  35%|###4      | 399/1147 [00:50<01:31,  8.18it/s]
-2024-06-29 19:20:59,251 - INFO - tqdm - NullAccuracy: 0.9900, NullF1: 0.4499, Lemma: 0.9113, PosFeats: 0.9142, UD-UAS: 0.7335, UD-LAS: 0.7310, EUD-UAS: 0.4761, EUD-LAS: 0.4645, Misc: 0.9792, SS: 0.8136, SC: 0.8368, Avg: 0.7622, batch_loss: 3.7333, loss: 3.8217 ||:  42%|####1     | 481/1147 [01:00<01:28,  7.49it/s]
-2024-06-29 19:21:09,324 - INFO - tqdm - NullAccuracy: 0.9902, NullF1: 0.4618, Lemma: 0.9138, PosFeats: 0.9163, UD-UAS: 0.7410, UD-LAS: 0.7389, EUD-UAS: 0.4877, EUD-LAS: 0.4766, Misc: 0.9806, SS: 0.8174, SC: 0.8384, Avg: 0.7679, batch_loss: 2.8634, loss: 3.7283 ||:  49%|####9     | 563/1147 [01:10<01:08,  8.48it/s]
-2024-06-29 19:21:19,438 - INFO - tqdm - NullAccuracy: 0.9902, NullF1: 0.4707, Lemma: 0.9151, PosFeats: 0.9182, UD-UAS: 0.7475, UD-LAS: 0.7456, EUD-UAS: 0.4976, EUD-LAS: 0.4868, Misc: 0.9817, SS: 0.8200, SC: 0.8402, Avg: 0.7725, batch_loss: 3.3490, loss: 3.6571 ||:  56%|#####6    | 645/1147 [01:20<01:07,  7.42it/s]
-2024-06-29 19:21:29,488 - INFO - tqdm - NullAccuracy: 0.9903, NullF1: 0.4847, Lemma: 0.9168, PosFeats: 0.9196, UD-UAS: 0.7518, UD-LAS: 0.7502, EUD-UAS: 0.5041, EUD-LAS: 0.4937, Misc: 0.9826, SS: 0.8225, SC: 0.8418, Avg: 0.7759, batch_loss: 3.3739, loss: 3.6056 ||:  63%|######3   | 724/1147 [01:30<00:52,  8.04it/s]
-2024-06-29 19:21:39,530 - INFO - tqdm - NullAccuracy: 0.9903, NullF1: 0.4942, Lemma: 0.9184, PosFeats: 0.9214, UD-UAS: 0.7559, UD-LAS: 0.7545, EUD-UAS: 0.5114, EUD-LAS: 0.5012, Misc: 0.9835, SS: 0.8245, SC: 0.8434, Avg: 0.7793, batch_loss: 2.8805, loss: 3.5490 ||:  70%|######9   | 802/1147 [01:40<00:41,  8.31it/s]
-2024-06-29 19:21:49,588 - INFO - tqdm - NullAccuracy: 0.9904, NullF1: 0.5030, Lemma: 0.9197, PosFeats: 0.9228, UD-UAS: 0.7598, UD-LAS: 0.7587, EUD-UAS: 0.5168, EUD-LAS: 0.5069, Misc: 0.9841, SS: 0.8266, SC: 0.8451, Avg: 0.7823, batch_loss: 2.7777, loss: 3.4950 ||:  77%|#######6  | 882/1147 [01:50<00:32,  8.11it/s]
-2024-06-29 19:21:59,632 - INFO - tqdm - NullAccuracy: 0.9905, NullF1: 0.5114, Lemma: 0.9210, PosFeats: 0.9241, UD-UAS: 0.7630, UD-LAS: 0.7621, EUD-UAS: 0.5223, EUD-LAS: 0.5129, Misc: 0.9846, SS: 0.8288, SC: 0.8465, Avg: 0.7850, batch_loss: 2.6155, loss: 3.4451 ||:  84%|########3 | 962/1147 [02:00<00:21,  8.65it/s]
-2024-06-29 19:22:09,639 - INFO - tqdm - NullAccuracy: 0.9906, NullF1: 0.5207, Lemma: 0.9224, PosFeats: 0.9256, UD-UAS: 0.7670, UD-LAS: 0.7662, EUD-UAS: 0.5292, EUD-LAS: 0.5201, Misc: 0.9850, SS: 0.8310, SC: 0.8479, Avg: 0.7883, batch_loss: 2.5013, loss: 3.3912 ||:  91%|#########1| 1046/1147 [02:10<00:12,  8.25it/s]
-2024-06-29 19:22:19,713 - INFO - tqdm - NullAccuracy: 0.9907, NullF1: 0.5306, Lemma: 0.9234, PosFeats: 0.9267, UD-UAS: 0.7702, UD-LAS: 0.7696, EUD-UAS: 0.5343, EUD-LAS: 0.5253, Misc: 0.9854, SS: 0.8326, SC: 0.8490, Avg: 0.7907, batch_loss: 1.9758, loss: 3.3518 ||:  98%|#########8| 1129/1147 [02:20<00:02,  8.31it/s]
-2024-06-29 19:22:21,311 - INFO - tqdm - NullAccuracy: 0.9907, NullF1: 0.5312, Lemma: 0.9236, PosFeats: 0.9270, UD-UAS: 0.7707, UD-LAS: 0.7701, EUD-UAS: 0.5351, EUD-LAS: 0.5262, Misc: 0.9855, SS: 0.8328, SC: 0.8492, Avg: 0.7911, batch_loss: 2.6192, loss: 3.3433 ||: 100%|#########9| 1142/1147 [02:22<00:00,  8.52it/s]
-2024-06-29 19:22:21,427 - INFO - tqdm - NullAccuracy: 0.9907, NullF1: 0.5317, Lemma: 0.9237, PosFeats: 0.9270, UD-UAS: 0.7707, UD-LAS: 0.7702, EUD-UAS: 0.5352, EUD-LAS: 0.5262, Misc: 0.9855, SS: 0.8328, SC: 0.8493, Avg: 0.7912, batch_loss: 2.8499, loss: 3.3429 ||: 100%|#########9| 1143/1147 [02:22<00:00,  8.56it/s]
-2024-06-29 19:22:21,560 - INFO - tqdm - NullAccuracy: 0.9907, NullF1: 0.5322, Lemma: 0.9237, PosFeats: 0.9270, UD-UAS: 0.7707, UD-LAS: 0.7702, EUD-UAS: 0.5353, EUD-LAS: 0.5264, Misc: 0.9855, SS: 0.8329, SC: 0.8493, Avg: 0.7912, batch_loss: 2.3103, loss: 3.3420 ||: 100%|#########9| 1144/1147 [02:22<00:00,  8.21it/s]
-2024-06-29 19:22:21,694 - INFO - tqdm - NullAccuracy: 0.9907, NullF1: 0.5323, Lemma: 0.9237, PosFeats: 0.9270, UD-UAS: 0.7708, UD-LAS: 0.7702, EUD-UAS: 0.5354, EUD-LAS: 0.5264, Misc: 0.9855, SS: 0.8329, SC: 0.8493, Avg: 0.7912, batch_loss: 3.0426, loss: 3.3417 ||: 100%|#########9| 1145/1147 [02:22<00:00,  7.97it/s]
-2024-06-29 19:22:21,816 - INFO - tqdm - NullAccuracy: 0.9907, NullF1: 0.5321, Lemma: 0.9237, PosFeats: 0.9270, UD-UAS: 0.7708, UD-LAS: 0.7702, EUD-UAS: 0.5355, EUD-LAS: 0.5265, Misc: 0.9855, SS: 0.8329, SC: 0.8493, Avg: 0.7913, batch_loss: 2.9064, loss: 3.3414 ||: 100%|#########9| 1146/1147 [02:23<00:00,  8.05it/s]
-2024-06-29 19:22:21,911 - INFO - tqdm - NullAccuracy: 0.9907, NullF1: 0.5317, Lemma: 0.9237, PosFeats: 0.9270, UD-UAS: 0.7708, UD-LAS: 0.7703, EUD-UAS: 0.5356, EUD-LAS: 0.5266, Misc: 0.9855, SS: 0.8329, SC: 0.8493, Avg: 0.7913, batch_loss: 2.5846, loss: 3.3407 ||: 100%|##########| 1147/1147 [02:23<00:00,  8.01it/s]
-2024-06-29 19:22:21,912 - INFO - allennlp.training.gradient_descent_trainer - Validating
-2024-06-29 19:22:21,914 - INFO - tqdm - 0%|          | 0/287 [00:00<?, ?it/s]
-2024-06-29 19:22:32,015 - INFO - tqdm - NullAccuracy: 0.9926, NullF1: 0.6555, Lemma: 0.9546, PosFeats: 0.9534, UD-UAS: 0.8725, UD-LAS: 0.8784, EUD-UAS: 0.7132, EUD-LAS: 0.7156, Misc: 0.9940, SS: 0.8723, SC: 0.8903, Avg: 0.8716, batch_loss: 1.4838, loss: 2.1165 ||:  53%|#####2    | 151/287 [00:10<00:08, 15.24it/s]
-2024-06-29 19:22:41,260 - INFO - tqdm - NullAccuracy: 0.9928, NullF1: 0.6462, Lemma: 0.9579, PosFeats: 0.9564, UD-UAS: 0.8798, UD-LAS: 0.8847, EUD-UAS: 0.7206, EUD-LAS: 0.7219, Misc: 0.9939, SS: 0.8755, SC: 0.8945, Avg: 0.8761, batch_loss: 3.2675, loss: 2.0234 ||: 100%|##########| 287/287 [00:19<00:00, 15.68it/s]
-2024-06-29 19:22:41,260 - INFO - tqdm - NullAccuracy: 0.9928, NullF1: 0.6462, Lemma: 0.9579, PosFeats: 0.9564, UD-UAS: 0.8798, UD-LAS: 0.8847, EUD-UAS: 0.7206, EUD-LAS: 0.7219, Misc: 0.9939, SS: 0.8755, SC: 0.8945, Avg: 0.8761, batch_loss: 3.2675, loss: 2.0234 ||: 100%|##########| 287/287 [00:19<00:00, 14.83it/s]
-2024-06-29 19:22:41,261 - INFO - allennlp.training.learning_rate_schedulers.slanted_triangular - Gradual unfreezing finished. Training all layers.
-2024-06-29 19:22:41,263 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
-2024-06-29 19:22:41,263 - INFO - allennlp.training.callbacks.console_logger - Avg                |     0.791  |     0.876
-2024-06-29 19:22:41,263 - INFO - allennlp.training.callbacks.console_logger - EUD-LAS            |     0.527  |     0.722
-2024-06-29 19:22:41,263 - INFO - allennlp.training.callbacks.console_logger - EUD-UAS            |     0.536  |     0.721
-2024-06-29 19:22:41,263 - INFO - allennlp.training.callbacks.console_logger - Lemma              |     0.924  |     0.958
-2024-06-29 19:22:41,263 - INFO - allennlp.training.callbacks.console_logger - Misc               |     0.986  |     0.994
-2024-06-29 19:22:41,263 - INFO - allennlp.training.callbacks.console_logger - NullAccuracy       |     0.991  |     0.993
-2024-06-29 19:22:41,263 - INFO - allennlp.training.callbacks.console_logger - NullF1             |     0.532  |     0.646
-2024-06-29 19:22:41,263 - INFO - allennlp.training.callbacks.console_logger - PosFeats           |     0.927  |     0.956
-2024-06-29 19:22:41,263 - INFO - allennlp.training.callbacks.console_logger - SC                 |     0.849  |     0.894
-2024-06-29 19:22:41,263 - INFO - allennlp.training.callbacks.console_logger - SS                 |     0.833  |     0.875
-2024-06-29 19:22:41,263 - INFO - allennlp.training.callbacks.console_logger - UD-LAS             |     0.770  |     0.885
-2024-06-29 19:22:41,263 - INFO - allennlp.training.callbacks.console_logger - UD-UAS             |     0.771  |     0.880
-2024-06-29 19:22:41,263 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  3289.646  |       N/A
-2024-06-29 19:22:41,263 - INFO - allennlp.training.callbacks.console_logger - loss               |     3.341  |     2.023
-2024-06-29 19:22:41,263 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  4991.422  |       N/A
-2024-06-29 19:22:45,969 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:02:47.204432
-2024-06-29 19:22:45,969 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:16:27
-2024-06-29 19:22:45,969 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
-2024-06-29 19:22:45,969 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 4.9G
-2024-06-29 19:22:45,969 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 11G
-2024-06-29 19:22:45,971 - INFO - allennlp.training.gradient_descent_trainer - Training
-2024-06-29 19:22:45,971 - INFO - tqdm - 0%|          | 0/1147 [00:00<?, ?it/s]
-2024-06-29 19:22:56,083 - INFO - tqdm - NullAccuracy: 0.9932, NullF1: 0.6917, Lemma: 0.9444, PosFeats: 0.9464, UD-UAS: 0.8248, UD-LAS: 0.8250, EUD-UAS: 0.6130, EUD-LAS: 0.6072, Misc: 0.9923, SS: 0.8651, SC: 0.8703, Avg: 0.8321, batch_loss: 2.5195, loss: 2.5009 ||:   7%|6         | 79/1147 [00:10<02:16,  7.83it/s]
-2024-06-29 19:23:06,168 - INFO - tqdm - NullAccuracy: 0.9928, NullF1: 0.6768, Lemma: 0.9439, PosFeats: 0.9492, UD-UAS: 0.8242, UD-LAS: 0.8251, EUD-UAS: 0.6194, EUD-LAS: 0.6133, Misc: 0.9925, SS: 0.8653, SC: 0.8717, Avg: 0.8339, batch_loss: 2.3169, loss: 2.5019 ||:  14%|#3        | 160/1147 [00:20<02:02,  8.09it/s]
-2024-06-29 19:23:16,243 - INFO - tqdm - NullAccuracy: 0.9927, NullF1: 0.6773, Lemma: 0.9445, PosFeats: 0.9488, UD-UAS: 0.8261, UD-LAS: 0.8272, EUD-UAS: 0.6248, EUD-LAS: 0.6192, Misc: 0.9926, SS: 0.8663, SC: 0.8748, Avg: 0.8360, batch_loss: 2.6540, loss: 2.4919 ||:  21%|##1       | 242/1147 [00:30<01:57,  7.71it/s]
-2024-06-29 19:23:26,356 - INFO - tqdm - NullAccuracy: 0.9924, NullF1: 0.6667, Lemma: 0.9447, PosFeats: 0.9485, UD-UAS: 0.8256, UD-LAS: 0.8269, EUD-UAS: 0.6238, EUD-LAS: 0.6176, Misc: 0.9926, SS: 0.8665, SC: 0.8753, Avg: 0.8357, batch_loss: 2.8054, loss: 2.4954 ||:  28%|##7       | 321/1147 [00:40<01:44,  7.88it/s]
-2024-06-29 19:23:36,359 - INFO - tqdm - NullAccuracy: 0.9926, NullF1: 0.6771, Lemma: 0.9452, PosFeats: 0.9483, UD-UAS: 0.8263, UD-LAS: 0.8279, EUD-UAS: 0.6252, EUD-LAS: 0.6195, Misc: 0.9926, SS: 0.8661, SC: 0.8762, Avg: 0.8364, batch_loss: 2.4838, loss: 2.4915 ||:  35%|###5      | 402/1147 [00:50<01:31,  8.15it/s]
-2024-06-29 19:23:46,399 - INFO - tqdm - NullAccuracy: 0.9926, NullF1: 0.6766, Lemma: 0.9457, PosFeats: 0.9485, UD-UAS: 0.8279, UD-LAS: 0.8299, EUD-UAS: 0.6288, EUD-LAS: 0.6232, Misc: 0.9926, SS: 0.8676, SC: 0.8772, Avg: 0.8379, batch_loss: 2.3959, loss: 2.4712 ||:  42%|####2     | 486/1147 [01:00<01:15,  8.73it/s]
-2024-06-29 19:23:56,455 - INFO - tqdm - NullAccuracy: 0.9926, NullF1: 0.6755, Lemma: 0.9464, PosFeats: 0.9484, UD-UAS: 0.8287, UD-LAS: 0.8312, EUD-UAS: 0.6308, EUD-LAS: 0.6256, Misc: 0.9926, SS: 0.8679, SC: 0.8777, Avg: 0.8388, batch_loss: 2.2542, loss: 2.4602 ||:  49%|####9     | 567/1147 [01:10<01:09,  8.35it/s]
-2024-06-29 19:24:06,537 - INFO - tqdm - NullAccuracy: 0.9926, NullF1: 0.6782, Lemma: 0.9468, PosFeats: 0.9488, UD-UAS: 0.8299, UD-LAS: 0.8325, EUD-UAS: 0.6325, EUD-LAS: 0.6274, Misc: 0.9927, SS: 0.8689, SC: 0.8785, Avg: 0.8398, batch_loss: 3.0092, loss: 2.4437 ||:  57%|#####6    | 649/1147 [01:20<01:01,  8.14it/s]
-2024-06-29 19:24:16,604 - INFO - tqdm - NullAccuracy: 0.9926, NullF1: 0.6758, Lemma: 0.9468, PosFeats: 0.9489, UD-UAS: 0.8312, UD-LAS: 0.8338, EUD-UAS: 0.6352, EUD-LAS: 0.6303, Misc: 0.9926, SS: 0.8694, SC: 0.8789, Avg: 0.8408, batch_loss: 1.6713, loss: 2.4371 ||:  64%|######3   | 730/1147 [01:30<00:50,  8.22it/s]
-2024-06-29 19:24:26,645 - INFO - tqdm - NullAccuracy: 0.9927, NullF1: 0.6777, Lemma: 0.9472, PosFeats: 0.9491, UD-UAS: 0.8321, UD-LAS: 0.8348, EUD-UAS: 0.6363, EUD-LAS: 0.6316, Misc: 0.9928, SS: 0.8699, SC: 0.8788, Avg: 0.8414, batch_loss: 2.9964, loss: 2.4257 ||:  71%|#######   | 809/1147 [01:40<00:45,  7.48it/s]
-2024-06-29 19:24:36,748 - INFO - tqdm - NullAccuracy: 0.9928, NullF1: 0.6822, Lemma: 0.9476, PosFeats: 0.9495, UD-UAS: 0.8335, UD-LAS: 0.8363, EUD-UAS: 0.6392, EUD-LAS: 0.6346, Misc: 0.9928, SS: 0.8706, SC: 0.8792, Avg: 0.8426, batch_loss: 2.0955, loss: 2.4113 ||:  78%|#######7  | 891/1147 [01:50<00:30,  8.45it/s]
-2024-06-29 19:24:46,751 - INFO - tqdm - NullAccuracy: 0.9928, NullF1: 0.6883, Lemma: 0.9479, PosFeats: 0.9497, UD-UAS: 0.8351, UD-LAS: 0.8382, EUD-UAS: 0.6419, EUD-LAS: 0.6376, Misc: 0.9928, SS: 0.8711, SC: 0.8791, Avg: 0.8437, batch_loss: 2.2052, loss: 2.4007 ||:  85%|########4 | 972/1147 [02:00<00:20,  8.49it/s]
-2024-06-29 19:24:56,860 - INFO - tqdm - NullAccuracy: 0.9929, NullF1: 0.6901, Lemma: 0.9480, PosFeats: 0.9498, UD-UAS: 0.8358, UD-LAS: 0.8387, EUD-UAS: 0.6433, EUD-LAS: 0.6390, Misc: 0.9928, SS: 0.8717, SC: 0.8794, Avg: 0.8443, batch_loss: 2.7321, loss: 2.3951 ||:  92%|#########1| 1052/1147 [02:10<00:11,  8.07it/s]
-2024-06-29 19:25:06,873 - INFO - tqdm - NullAccuracy: 0.9930, NullF1: 0.6947, Lemma: 0.9482, PosFeats: 0.9497, UD-UAS: 0.8367, UD-LAS: 0.8396, EUD-UAS: 0.6450, EUD-LAS: 0.6408, Misc: 0.9929, SS: 0.8720, SC: 0.8795, Avg: 0.8449, batch_loss: 2.4578, loss: 2.3910 ||:  99%|#########8| 1133/1147 [02:20<00:01,  8.14it/s]
-2024-06-29 19:25:08,005 - INFO - tqdm - NullAccuracy: 0.9930, NullF1: 0.6946, Lemma: 0.9483, PosFeats: 0.9497, UD-UAS: 0.8367, UD-LAS: 0.8396, EUD-UAS: 0.6451, EUD-LAS: 0.6409, Misc: 0.9929, SS: 0.8721, SC: 0.8796, Avg: 0.8450, batch_loss: 2.3263, loss: 2.3899 ||: 100%|#########9| 1142/1147 [02:22<00:00,  8.21it/s]
-2024-06-29 19:25:08,117 - INFO - tqdm - NullAccuracy: 0.9930, NullF1: 0.6947, Lemma: 0.9483, PosFeats: 0.9497, UD-UAS: 0.8367, UD-LAS: 0.8396, EUD-UAS: 0.6451, EUD-LAS: 0.6409, Misc: 0.9929, SS: 0.8721, SC: 0.8797, Avg: 0.8450, batch_loss: 2.1296, loss: 2.3897 ||: 100%|#########9| 1143/1147 [02:22<00:00,  8.41it/s]
-2024-06-29 19:25:08,218 - INFO - tqdm - NullAccuracy: 0.9930, NullF1: 0.6947, Lemma: 0.9483, PosFeats: 0.9497, UD-UAS: 0.8368, UD-LAS: 0.8397, EUD-UAS: 0.6452, EUD-LAS: 0.6410, Misc: 0.9929, SS: 0.8721, SC: 0.8797, Avg: 0.8450, batch_loss: 1.8111, loss: 2.3892 ||: 100%|#########9| 1144/1147 [02:22<00:00,  8.80it/s]
-2024-06-29 19:25:08,326 - INFO - tqdm - NullAccuracy: 0.9930, NullF1: 0.6945, Lemma: 0.9483, PosFeats: 0.9497, UD-UAS: 0.8369, UD-LAS: 0.8397, EUD-UAS: 0.6453, EUD-LAS: 0.6411, Misc: 0.9929, SS: 0.8721, SC: 0.8797, Avg: 0.8451, batch_loss: 2.2675, loss: 2.3891 ||: 100%|#########9| 1145/1147 [02:22<00:00,  8.94it/s]
-2024-06-29 19:25:08,432 - INFO - tqdm - NullAccuracy: 0.9930, NullF1: 0.6946, Lemma: 0.9483, PosFeats: 0.9497, UD-UAS: 0.8369, UD-LAS: 0.8398, EUD-UAS: 0.6454, EUD-LAS: 0.6412, Misc: 0.9929, SS: 0.8721, SC: 0.8797, Avg: 0.8451, batch_loss: 1.9238, loss: 2.3887 ||: 100%|#########9| 1146/1147 [02:22<00:00,  9.08it/s]
-2024-06-29 19:25:08,534 - INFO - tqdm - NullAccuracy: 0.9930, NullF1: 0.6947, Lemma: 0.9483, PosFeats: 0.9497, UD-UAS: 0.8369, UD-LAS: 0.8398, EUD-UAS: 0.6455, EUD-LAS: 0.6413, Misc: 0.9929, SS: 0.8721, SC: 0.8797, Avg: 0.8451, batch_loss: 2.0053, loss: 2.3884 ||: 100%|##########| 1147/1147 [02:22<00:00,  9.29it/s]
-2024-06-29 19:25:08,534 - INFO - tqdm - NullAccuracy: 0.9930, NullF1: 0.6947, Lemma: 0.9483, PosFeats: 0.9497, UD-UAS: 0.8369, UD-LAS: 0.8398, EUD-UAS: 0.6455, EUD-LAS: 0.6413, Misc: 0.9929, SS: 0.8721, SC: 0.8797, Avg: 0.8451, batch_loss: 2.0053, loss: 2.3884 ||: 100%|##########| 1147/1147 [02:22<00:00,  8.05it/s]
-2024-06-29 19:25:08,535 - INFO - allennlp.training.gradient_descent_trainer - Validating
-2024-06-29 19:25:08,536 - INFO - tqdm - 0%|          | 0/287 [00:00<?, ?it/s]
-2024-06-29 19:25:18,575 - INFO - tqdm - NullAccuracy: 0.9941, NullF1: 0.7521, Lemma: 0.9651, PosFeats: 0.9622, UD-UAS: 0.9042, UD-LAS: 0.9126, EUD-UAS: 0.8064, EUD-LAS: 0.8094, Misc: 0.9949, SS: 0.8944, SC: 0.9020, Avg: 0.9057, batch_loss: 1.7717, loss: 1.7578 ||:  51%|#####     | 145/287 [00:10<00:10, 13.90it/s]
-2024-06-29 19:25:27,628 - INFO - tqdm - NullAccuracy: 0.9944, NullF1: 0.7548, Lemma: 0.9679, PosFeats: 0.9647, UD-UAS: 0.9096, UD-LAS: 0.9172, EUD-UAS: 0.8128, EUD-LAS: 0.8148, Misc: 0.9950, SS: 0.8975, SC: 0.9071, Avg: 0.9096, batch_loss: 2.8492, loss: 1.6721 ||: 100%|##########| 287/287 [00:19<00:00, 16.14it/s]
-2024-06-29 19:25:27,628 - INFO - tqdm - NullAccuracy: 0.9944, NullF1: 0.7548, Lemma: 0.9679, PosFeats: 0.9647, UD-UAS: 0.9096, UD-LAS: 0.9172, EUD-UAS: 0.8128, EUD-LAS: 0.8148, Misc: 0.9950, SS: 0.8975, SC: 0.9071, Avg: 0.9096, batch_loss: 2.8492, loss: 1.6721 ||: 100%|##########| 287/287 [00:19<00:00, 15.03it/s]
-2024-06-29 19:25:27,629 - INFO - allennlp.training.learning_rate_schedulers.slanted_triangular - Gradual unfreezing finished. Training all layers.
-2024-06-29 19:25:27,631 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
-2024-06-29 19:25:27,631 - INFO - allennlp.training.callbacks.console_logger - Avg                |     0.845  |     0.910
-2024-06-29 19:25:27,631 - INFO - allennlp.training.callbacks.console_logger - EUD-LAS            |     0.641  |     0.815
-2024-06-29 19:25:27,631 - INFO - allennlp.training.callbacks.console_logger - EUD-UAS            |     0.645  |     0.813
-2024-06-29 19:25:27,631 - INFO - allennlp.training.callbacks.console_logger - Lemma              |     0.948  |     0.968
-2024-06-29 19:25:27,631 - INFO - allennlp.training.callbacks.console_logger - Misc               |     0.993  |     0.995
-2024-06-29 19:25:27,631 - INFO - allennlp.training.callbacks.console_logger - NullAccuracy       |     0.993  |     0.994
-2024-06-29 19:25:27,631 - INFO - allennlp.training.callbacks.console_logger - NullF1             |     0.695  |     0.755
-2024-06-29 19:25:27,631 - INFO - allennlp.training.callbacks.console_logger - PosFeats           |     0.950  |     0.965
-2024-06-29 19:25:27,631 - INFO - allennlp.training.callbacks.console_logger - SC                 |     0.880  |     0.907
-2024-06-29 19:25:27,631 - INFO - allennlp.training.callbacks.console_logger - SS                 |     0.872  |     0.898
-2024-06-29 19:25:27,631 - INFO - allennlp.training.callbacks.console_logger - UD-LAS             |     0.840  |     0.917
-2024-06-29 19:25:27,632 - INFO - allennlp.training.callbacks.console_logger - UD-UAS             |     0.837  |     0.910
-2024-06-29 19:25:27,632 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  11256.346  |       N/A
-2024-06-29 19:25:27,632 - INFO - allennlp.training.callbacks.console_logger - loss               |     2.388  |     1.672
-2024-06-29 19:25:27,632 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  5001.855  |       N/A
-2024-06-29 19:25:32,461 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:02:46.491371
-2024-06-29 19:25:32,461 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:16:04
-2024-06-29 19:25:32,461 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
-2024-06-29 19:25:32,461 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 4.9G
-2024-06-29 19:25:32,461 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 11G
-2024-06-29 19:25:32,462 - INFO - allennlp.training.gradient_descent_trainer - Training
-2024-06-29 19:25:32,462 - INFO - tqdm - 0%|          | 0/1147 [00:00<?, ?it/s]
-2024-06-29 19:25:42,527 - INFO - tqdm - NullAccuracy: 0.9923, NullF1: 0.6938, Lemma: 0.9561, PosFeats: 0.9603, UD-UAS: 0.8502, UD-LAS: 0.8525, EUD-UAS: 0.6673, EUD-LAS: 0.6636, Misc: 0.9937, SS: 0.8862, SC: 0.8948, Avg: 0.8583, batch_loss: 2.1601, loss: 2.0514 ||:   7%|7         | 82/1147 [00:10<02:15,  7.84it/s]
-2024-06-29 19:25:52,568 - INFO - tqdm - NullAccuracy: 0.9930, NullF1: 0.7211, Lemma: 0.9556, PosFeats: 0.9584, UD-UAS: 0.8509, UD-LAS: 0.8541, EUD-UAS: 0.6694, EUD-LAS: 0.6661, Misc: 0.9933, SS: 0.8841, SC: 0.8923, Avg: 0.8582, batch_loss: 1.8708, loss: 2.0802 ||:  14%|#4        | 165/1147 [00:20<01:54,  8.56it/s]
-2024-06-29 19:26:02,633 - INFO - tqdm - NullAccuracy: 0.9932, NullF1: 0.7313, Lemma: 0.9561, PosFeats: 0.9582, UD-UAS: 0.8547, UD-LAS: 0.8585, EUD-UAS: 0.6766, EUD-LAS: 0.6745, Misc: 0.9935, SS: 0.8850, SC: 0.8926, Avg: 0.8611, batch_loss: 2.0120, loss: 2.0519 ||:  22%|##1       | 248/1147 [00:30<01:46,  8.45it/s]
-2024-06-29 19:26:12,660 - INFO - tqdm - NullAccuracy: 0.9934, NullF1: 0.7323, Lemma: 0.9566, PosFeats: 0.9580, UD-UAS: 0.8565, UD-LAS: 0.8601, EUD-UAS: 0.6820, EUD-LAS: 0.6793, Misc: 0.9937, SS: 0.8863, SC: 0.8933, Avg: 0.8629, batch_loss: 2.2314, loss: 2.0485 ||:  29%|##8       | 331/1147 [00:40<01:39,  8.20it/s]
-2024-06-29 19:26:22,664 - INFO - tqdm - NullAccuracy: 0.9934, NullF1: 0.7319, Lemma: 0.9566, PosFeats: 0.9581, UD-UAS: 0.8567, UD-LAS: 0.8602, EUD-UAS: 0.6813, EUD-LAS: 0.6786, Misc: 0.9939, SS: 0.8870, SC: 0.8934, Avg: 0.8629, batch_loss: 2.0134, loss: 2.0442 ||:  36%|###5      | 412/1147 [00:50<01:27,  8.42it/s]
-2024-06-29 19:26:32,722 - INFO - tqdm - NullAccuracy: 0.9935, NullF1: 0.7352, Lemma: 0.9568, PosFeats: 0.9583, UD-UAS: 0.8566, UD-LAS: 0.8600, EUD-UAS: 0.6825, EUD-LAS: 0.6795, Misc: 0.9939, SS: 0.8876, SC: 0.8927, Avg: 0.8631, batch_loss: 1.3696, loss: 2.0397 ||:  43%|####3     | 495/1147 [01:00<01:18,  8.29it/s]
-2024-06-29 19:26:42,742 - INFO - tqdm - NullAccuracy: 0.9936, NullF1: 0.7370, Lemma: 0.9567, PosFeats: 0.9583, UD-UAS: 0.8572, UD-LAS: 0.8607, EUD-UAS: 0.6837, EUD-LAS: 0.6807, Misc: 0.9940, SS: 0.8880, SC: 0.8928, Avg: 0.8636, batch_loss: 2.2186, loss: 2.0316 ||:  50%|#####     | 576/1147 [01:10<01:09,  8.24it/s]
-2024-06-29 19:26:52,816 - INFO - tqdm - NullAccuracy: 0.9936, NullF1: 0.7371, Lemma: 0.9568, PosFeats: 0.9583, UD-UAS: 0.8586, UD-LAS: 0.8622, EUD-UAS: 0.6858, EUD-LAS: 0.6829, Misc: 0.9941, SS: 0.8883, SC: 0.8928, Avg: 0.8644, batch_loss: 1.7874, loss: 2.0241 ||:  57%|#####7    | 659/1147 [01:20<01:00,  8.02it/s]
-2024-06-29 19:27:02,901 - INFO - tqdm - NullAccuracy: 0.9938, NullF1: 0.7382, Lemma: 0.9570, PosFeats: 0.9584, UD-UAS: 0.8594, UD-LAS: 0.8632, EUD-UAS: 0.6879, EUD-LAS: 0.6850, Misc: 0.9940, SS: 0.8885, SC: 0.8922, Avg: 0.8651, batch_loss: 2.6033, loss: 2.0194 ||:  65%|######4   | 743/1147 [01:30<00:53,  7.60it/s]
-2024-06-29 19:27:12,954 - INFO - tqdm - NullAccuracy: 0.9937, NullF1: 0.7372, Lemma: 0.9571, PosFeats: 0.9585, UD-UAS: 0.8595, UD-LAS: 0.8634, EUD-UAS: 0.6881, EUD-LAS: 0.6853, Misc: 0.9942, SS: 0.8892, SC: 0.8925, Avg: 0.8653, batch_loss: 2.4531, loss: 2.0119 ||:  72%|#######2  | 826/1147 [01:40<00:37,  8.50it/s]
-2024-06-29 19:27:23,007 - INFO - tqdm - NullAccuracy: 0.9937, NullF1: 0.7380, Lemma: 0.9574, PosFeats: 0.9587, UD-UAS: 0.8601, UD-LAS: 0.8640, EUD-UAS: 0.6898, EUD-LAS: 0.6873, Misc: 0.9941, SS: 0.8899, SC: 0.8930, Avg: 0.8660, batch_loss: 1.9132, loss: 2.0046 ||:  79%|#######9  | 908/1147 [01:50<00:27,  8.59it/s]
-2024-06-29 19:27:33,079 - INFO - tqdm - NullAccuracy: 0.9938, NullF1: 0.7380, Lemma: 0.9576, PosFeats: 0.9587, UD-UAS: 0.8605, UD-LAS: 0.8645, EUD-UAS: 0.6911, EUD-LAS: 0.6889, Misc: 0.9942, SS: 0.8900, SC: 0.8929, Avg: 0.8665, batch_loss: 1.8705, loss: 2.0014 ||:  86%|########6 | 990/1147 [02:00<00:19,  7.92it/s]
-2024-06-29 19:27:43,094 - INFO - tqdm - NullAccuracy: 0.9938, NullF1: 0.7380, Lemma: 0.9578, PosFeats: 0.9588, UD-UAS: 0.8607, UD-LAS: 0.8648, EUD-UAS: 0.6917, EUD-LAS: 0.6894, Misc: 0.9942, SS: 0.8900, SC: 0.8930, Avg: 0.8667, batch_loss: 1.8390, loss: 1.9978 ||:  93%|#########3| 1071/1147 [02:10<00:09,  8.21it/s]
-2024-06-29 19:27:51,639 - INFO - tqdm - NullAccuracy: 0.9938, NullF1: 0.7401, Lemma: 0.9581, PosFeats: 0.9589, UD-UAS: 0.8614, UD-LAS: 0.8656, EUD-UAS: 0.6931, EUD-LAS: 0.6908, Misc: 0.9941, SS: 0.8902, SC: 0.8933, Avg: 0.8673, batch_loss: 1.8860, loss: 1.9913 ||: 100%|#########9| 1142/1147 [02:19<00:00,  7.74it/s]
-2024-06-29 19:27:51,757 - INFO - tqdm - NullAccuracy: 0.9938, NullF1: 0.7397, Lemma: 0.9581, PosFeats: 0.9589, UD-UAS: 0.8614, UD-LAS: 0.8656, EUD-UAS: 0.6931, EUD-LAS: 0.6908, Misc: 0.9941, SS: 0.8902, SC: 0.8933, Avg: 0.8673, batch_loss: 2.2029, loss: 1.9914 ||: 100%|#########9| 1143/1147 [02:19<00:00,  7.96it/s]
-2024-06-29 19:27:51,892 - INFO - tqdm - NullAccuracy: 0.9938, NullF1: 0.7397, Lemma: 0.9581, PosFeats: 0.9589, UD-UAS: 0.8614, UD-LAS: 0.8656, EUD-UAS: 0.6931, EUD-LAS: 0.6908, Misc: 0.9941, SS: 0.8902, SC: 0.8933, Avg: 0.8673, batch_loss: 2.0576, loss: 1.9915 ||: 100%|#########9| 1144/1147 [02:19<00:00,  7.78it/s]
-2024-06-29 19:27:52,013 - INFO - tqdm - NullAccuracy: 0.9938, NullF1: 0.7393, Lemma: 0.9581, PosFeats: 0.9589, UD-UAS: 0.8614, UD-LAS: 0.8656, EUD-UAS: 0.6931, EUD-LAS: 0.6908, Misc: 0.9941, SS: 0.8902, SC: 0.8933, Avg: 0.8673, batch_loss: 2.0820, loss: 1.9916 ||: 100%|#########9| 1145/1147 [02:19<00:00,  7.91it/s]
-2024-06-29 19:27:52,121 - INFO - tqdm - NullAccuracy: 0.9938, NullF1: 0.7395, Lemma: 0.9581, PosFeats: 0.9589, UD-UAS: 0.8614, UD-LAS: 0.8656, EUD-UAS: 0.6931, EUD-LAS: 0.6908, Misc: 0.9941, SS: 0.8902, SC: 0.8933, Avg: 0.8673, batch_loss: 1.6593, loss: 1.9913 ||: 100%|#########9| 1146/1147 [02:19<00:00,  8.28it/s]
-2024-06-29 19:27:52,214 - INFO - tqdm - NullAccuracy: 0.9938, NullF1: 0.7395, Lemma: 0.9581, PosFeats: 0.9589, UD-UAS: 0.8614, UD-LAS: 0.8657, EUD-UAS: 0.6931, EUD-LAS: 0.6908, Misc: 0.9941, SS: 0.8902, SC: 0.8933, Avg: 0.8673, batch_loss: 1.6613, loss: 1.9910 ||: 100%|##########| 1147/1147 [02:19<00:00,  8.21it/s]
-2024-06-29 19:27:52,215 - INFO - allennlp.training.gradient_descent_trainer - Validating
-2024-06-29 19:27:52,216 - INFO - tqdm - 0%|          | 0/287 [00:00<?, ?it/s]
-2024-06-29 19:28:02,279 - INFO - tqdm - NullAccuracy: 0.9944, NullF1: 0.7813, Lemma: 0.9702, PosFeats: 0.9667, UD-UAS: 0.9208, UD-LAS: 0.9299, EUD-UAS: 0.8189, EUD-LAS: 0.8245, Misc: 0.9955, SS: 0.9043, SC: 0.9101, Avg: 0.9156, batch_loss: 2.0391, loss: 1.5916 ||:  53%|#####3    | 153/287 [00:10<00:08, 15.40it/s]
-2024-06-29 19:28:10,592 - INFO - tqdm - NullAccuracy: 0.9946, NullF1: 0.7798, Lemma: 0.9723, PosFeats: 0.9681, UD-UAS: 0.9251, UD-LAS: 0.9338, EUD-UAS: 0.8248, EUD-LAS: 0.8296, Misc: 0.9956, SS: 0.9076, SC: 0.9133, Avg: 0.9189, batch_loss: 2.8009, loss: 1.5228 ||: 100%|##########| 287/287 [00:18<00:00, 16.62it/s]
-2024-06-29 19:28:10,592 - INFO - tqdm - NullAccuracy: 0.9946, NullF1: 0.7798, Lemma: 0.9723, PosFeats: 0.9681, UD-UAS: 0.9251, UD-LAS: 0.9338, EUD-UAS: 0.8248, EUD-LAS: 0.8296, Misc: 0.9956, SS: 0.9076, SC: 0.9133, Avg: 0.9189, batch_loss: 2.8009, loss: 1.5228 ||: 100%|##########| 287/287 [00:18<00:00, 15.62it/s]
-2024-06-29 19:28:10,592 - INFO - allennlp.training.learning_rate_schedulers.slanted_triangular - Gradual unfreezing finished. Training all layers.
-2024-06-29 19:28:10,595 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
-2024-06-29 19:28:10,595 - INFO - allennlp.training.callbacks.console_logger - Avg                |     0.867  |     0.919
-2024-06-29 19:28:10,595 - INFO - allennlp.training.callbacks.console_logger - EUD-LAS            |     0.691  |     0.830
-2024-06-29 19:28:10,595 - INFO - allennlp.training.callbacks.console_logger - EUD-UAS            |     0.693  |     0.825
-2024-06-29 19:28:10,595 - INFO - allennlp.training.callbacks.console_logger - Lemma              |     0.958  |     0.972
-2024-06-29 19:28:10,595 - INFO - allennlp.training.callbacks.console_logger - Misc               |     0.994  |     0.996
-2024-06-29 19:28:10,595 - INFO - allennlp.training.callbacks.console_logger - NullAccuracy       |     0.994  |     0.995
-2024-06-29 19:28:10,595 - INFO - allennlp.training.callbacks.console_logger - NullF1             |     0.739  |     0.780
-2024-06-29 19:28:10,595 - INFO - allennlp.training.callbacks.console_logger - PosFeats           |     0.959  |     0.968
-2024-06-29 19:28:10,595 - INFO - allennlp.training.callbacks.console_logger - SC                 |     0.893  |     0.913
-2024-06-29 19:28:10,595 - INFO - allennlp.training.callbacks.console_logger - SS                 |     0.890  |     0.908
-2024-06-29 19:28:10,595 - INFO - allennlp.training.callbacks.console_logger - UD-LAS             |     0.866  |     0.934
-2024-06-29 19:28:10,595 - INFO - allennlp.training.callbacks.console_logger - UD-UAS             |     0.861  |     0.925
-2024-06-29 19:28:10,596 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  11258.267  |       N/A
-2024-06-29 19:28:10,596 - INFO - allennlp.training.callbacks.console_logger - loss               |     1.991  |     1.523
-2024-06-29 19:28:10,596 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  5001.855  |       N/A
-2024-06-29 19:28:15,643 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:02:43.182164
-2024-06-29 19:28:15,643 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:14:24
-2024-06-29 19:28:15,643 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
-2024-06-29 19:28:15,643 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 4.9G
-2024-06-29 19:28:15,643 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 11G
-2024-06-29 19:28:15,645 - INFO - allennlp.training.gradient_descent_trainer - Training
-2024-06-29 19:28:15,645 - INFO - tqdm - 0%|          | 0/1147 [00:00<?, ?it/s]
-2024-06-29 19:28:25,667 - INFO - tqdm - NullAccuracy: 0.9943, NullF1: 0.7439, Lemma: 0.9636, PosFeats: 0.9601, UD-UAS: 0.8702, UD-LAS: 0.8750, EUD-UAS: 0.7100, EUD-LAS: 0.7082, Misc: 0.9940, SS: 0.9056, SC: 0.9046, Avg: 0.8768, batch_loss: 1.7985, loss: 1.7494 ||:   7%|7         | 81/1147 [00:10<02:07,  8.37it/s]
-2024-06-29 19:28:35,763 - INFO - tqdm - NullAccuracy: 0.9942, NullF1: 0.7625, Lemma: 0.9634, PosFeats: 0.9625, UD-UAS: 0.8732, UD-LAS: 0.8783, EUD-UAS: 0.7127, EUD-LAS: 0.7107, Misc: 0.9946, SS: 0.9028, SC: 0.9057, Avg: 0.8782, batch_loss: 2.0678, loss: 1.7426 ||:  14%|#4        | 164/1147 [00:20<01:56,  8.42it/s]
-2024-06-29 19:28:45,891 - INFO - tqdm - NullAccuracy: 0.9940, NullF1: 0.7539, Lemma: 0.9642, PosFeats: 0.9632, UD-UAS: 0.8709, UD-LAS: 0.8759, EUD-UAS: 0.7100, EUD-LAS: 0.7086, Misc: 0.9946, SS: 0.9009, SC: 0.9053, Avg: 0.8771, batch_loss: 1.5885, loss: 1.7559 ||:  21%|##1       | 246/1147 [00:30<01:55,  7.77it/s]
-2024-06-29 19:28:55,901 - INFO - tqdm - NullAccuracy: 0.9941, NullF1: 0.7529, Lemma: 0.9647, PosFeats: 0.9637, UD-UAS: 0.8719, UD-LAS: 0.8764, EUD-UAS: 0.7113, EUD-LAS: 0.7097, Misc: 0.9947, SS: 0.9016, SC: 0.9049, Avg: 0.8776, batch_loss: 1.3044, loss: 1.7376 ||:  29%|##8       | 329/1147 [00:40<01:37,  8.38it/s]
-2024-06-29 19:29:05,932 - INFO - tqdm - NullAccuracy: 0.9941, NullF1: 0.7561, Lemma: 0.9647, PosFeats: 0.9638, UD-UAS: 0.8716, UD-LAS: 0.8762, EUD-UAS: 0.7118, EUD-LAS: 0.7103, Misc: 0.9947, SS: 0.9015, SC: 0.9037, Avg: 0.8776, batch_loss: 1.4945, loss: 1.7464 ||:  36%|###5      | 411/1147 [00:50<01:30,  8.17it/s]
-2024-06-29 19:29:16,022 - INFO - tqdm - NullAccuracy: 0.9942, NullF1: 0.7587, Lemma: 0.9647, PosFeats: 0.9640, UD-UAS: 0.8722, UD-LAS: 0.8768, EUD-UAS: 0.7126, EUD-LAS: 0.7108, Misc: 0.9948, SS: 0.9016, SC: 0.9034, Avg: 0.8779, batch_loss: 1.8177, loss: 1.7477 ||:  43%|####3     | 494/1147 [01:00<01:21,  8.03it/s]
-2024-06-29 19:29:26,084 - INFO - tqdm - NullAccuracy: 0.9942, NullF1: 0.7597, Lemma: 0.9642, PosFeats: 0.9640, UD-UAS: 0.8730, UD-LAS: 0.8776, EUD-UAS: 0.7145, EUD-LAS: 0.7126, Misc: 0.9947, SS: 0.9018, SC: 0.9037, Avg: 0.8785, batch_loss: 1.3929, loss: 1.7487 ||:  50%|#####     | 576/1147 [01:10<01:08,  8.34it/s]
-2024-06-29 19:29:36,123 - INFO - tqdm - NullAccuracy: 0.9943, NullF1: 0.7599, Lemma: 0.9642, PosFeats: 0.9639, UD-UAS: 0.8741, UD-LAS: 0.8788, EUD-UAS: 0.7167, EUD-LAS: 0.7152, Misc: 0.9947, SS: 0.9019, SC: 0.9036, Avg: 0.8792, batch_loss: 1.4890, loss: 1.7475 ||:  57%|#####7    | 658/1147 [01:20<00:59,  8.28it/s]
-2024-06-29 19:29:46,159 - INFO - tqdm - NullAccuracy: 0.9943, NullF1: 0.7582, Lemma: 0.9643, PosFeats: 0.9640, UD-UAS: 0.8746, UD-LAS: 0.8792, EUD-UAS: 0.7184, EUD-LAS: 0.7169, Misc: 0.9948, SS: 0.9019, SC: 0.9035, Avg: 0.8797, batch_loss: 1.5896, loss: 1.7439 ||:  65%|######4   | 740/1147 [01:30<00:48,  8.45it/s]
-2024-06-29 19:29:56,269 - INFO - tqdm - NullAccuracy: 0.9943, NullF1: 0.7583, Lemma: 0.9646, PosFeats: 0.9640, UD-UAS: 0.8756, UD-LAS: 0.8804, EUD-UAS: 0.7207, EUD-LAS: 0.7193, Misc: 0.9948, SS: 0.9020, SC: 0.9034, Avg: 0.8805, batch_loss: 1.5503, loss: 1.7403 ||:  72%|#######1  | 822/1147 [01:40<00:41,  7.75it/s]
-2024-06-29 19:30:06,279 - INFO - tqdm - NullAccuracy: 0.9943, NullF1: 0.7576, Lemma: 0.9648, PosFeats: 0.9644, UD-UAS: 0.8762, UD-LAS: 0.8808, EUD-UAS: 0.7213, EUD-LAS: 0.7200, Misc: 0.9949, SS: 0.9018, SC: 0.9034, Avg: 0.8808, batch_loss: 1.5092, loss: 1.7373 ||:  79%|#######8  | 903/1147 [01:50<00:29,  8.20it/s]
-2024-06-29 19:30:16,315 - INFO - tqdm - NullAccuracy: 0.9943, NullF1: 0.7601, Lemma: 0.9648, PosFeats: 0.9647, UD-UAS: 0.8766, UD-LAS: 0.8813, EUD-UAS: 0.7224, EUD-LAS: 0.7212, Misc: 0.9948, SS: 0.9018, SC: 0.9035, Avg: 0.8812, batch_loss: 1.7932, loss: 1.7347 ||:  86%|########5 | 985/1147 [02:00<00:22,  7.22it/s]
-2024-06-29 19:30:26,371 - INFO - tqdm - NullAccuracy: 0.9943, NullF1: 0.7618, Lemma: 0.9650, PosFeats: 0.9649, UD-UAS: 0.8774, UD-LAS: 0.8821, EUD-UAS: 0.7241, EUD-LAS: 0.7231, Misc: 0.9949, SS: 0.9018, SC: 0.9036, Avg: 0.8819, batch_loss: 1.5564, loss: 1.7322 ||:  93%|#########3| 1068/1147 [02:10<00:08,  9.05it/s]
-2024-06-29 19:30:35,276 - INFO - tqdm - NullAccuracy: 0.9943, NullF1: 0.7635, Lemma: 0.9651, PosFeats: 0.9649, UD-UAS: 0.8779, UD-LAS: 0.8825, EUD-UAS: 0.7246, EUD-LAS: 0.7235, Misc: 0.9949, SS: 0.9021, SC: 0.9040, Avg: 0.8822, batch_loss: 1.5629, loss: 1.7277 ||: 100%|#########9| 1142/1147 [02:19<00:00,  8.51it/s]
-2024-06-29 19:30:35,397 - INFO - tqdm - NullAccuracy: 0.9943, NullF1: 0.7633, Lemma: 0.9651, PosFeats: 0.9649, UD-UAS: 0.8779, UD-LAS: 0.8825, EUD-UAS: 0.7246, EUD-LAS: 0.7236, Misc: 0.9949, SS: 0.9021, SC: 0.9040, Avg: 0.8822, batch_loss: 1.6916, loss: 1.7276 ||: 100%|#########9| 1143/1147 [02:19<00:00,  8.44it/s]
-2024-06-29 19:30:35,521 - INFO - tqdm - NullAccuracy: 0.9943, NullF1: 0.7634, Lemma: 0.9651, PosFeats: 0.9649, UD-UAS: 0.8779, UD-LAS: 0.8825, EUD-UAS: 0.7246, EUD-LAS: 0.7235, Misc: 0.9949, SS: 0.9021, SC: 0.9040, Avg: 0.8822, batch_loss: 1.6203, loss: 1.7276 ||: 100%|#########9| 1144/1147 [02:19<00:00,  8.32it/s]
-2024-06-29 19:30:35,654 - INFO - tqdm - NullAccuracy: 0.9943, NullF1: 0.7634, Lemma: 0.9651, PosFeats: 0.9649, UD-UAS: 0.8779, UD-LAS: 0.8825, EUD-UAS: 0.7246, EUD-LAS: 0.7236, Misc: 0.9949, SS: 0.9021, SC: 0.9040, Avg: 0.8822, batch_loss: 1.4994, loss: 1.7274 ||: 100%|#########9| 1145/1147 [02:20<00:00,  8.07it/s]
-2024-06-29 19:30:35,790 - INFO - tqdm - NullAccuracy: 0.9943, NullF1: 0.7631, Lemma: 0.9651, PosFeats: 0.9648, UD-UAS: 0.8778, UD-LAS: 0.8825, EUD-UAS: 0.7246, EUD-LAS: 0.7236, Misc: 0.9949, SS: 0.9021, SC: 0.9040, Avg: 0.8822, batch_loss: 1.8959, loss: 1.7275 ||: 100%|#########9| 1146/1147 [02:20<00:00,  7.83it/s]
-2024-06-29 19:30:35,884 - INFO - tqdm - NullAccuracy: 0.9943, NullF1: 0.7632, Lemma: 0.9651, PosFeats: 0.9648, UD-UAS: 0.8778, UD-LAS: 0.8825, EUD-UAS: 0.7245, EUD-LAS: 0.7235, Misc: 0.9949, SS: 0.9021, SC: 0.9040, Avg: 0.8821, batch_loss: 1.8477, loss: 1.7276 ||: 100%|##########| 1147/1147 [02:20<00:00,  8.18it/s]
-2024-06-29 19:30:35,885 - INFO - allennlp.training.gradient_descent_trainer - Validating
-2024-06-29 19:30:35,886 - INFO - tqdm - 0%|          | 0/287 [00:00<?, ?it/s]
-2024-06-29 19:30:45,915 - INFO - tqdm - NullAccuracy: 0.9948, NullF1: 0.7925, Lemma: 0.9736, PosFeats: 0.9702, UD-UAS: 0.9256, UD-LAS: 0.9349, EUD-UAS: 0.7750, EUD-LAS: 0.7824, Misc: 0.9959, SS: 0.9106, SC: 0.9146, Avg: 0.9092, batch_loss: 0.9713, loss: 1.4837 ||:  51%|#####1    | 147/287 [00:10<00:09, 14.45it/s]
-2024-06-29 19:30:54,843 - INFO - tqdm - NullAccuracy: 0.9951, NullF1: 0.7952, Lemma: 0.9762, PosFeats: 0.9718, UD-UAS: 0.9295, UD-LAS: 0.9381, EUD-UAS: 0.7816, EUD-LAS: 0.7870, Misc: 0.9959, SS: 0.9135, SC: 0.9194, Avg: 0.9126, batch_loss: 2.8006, loss: 1.4128 ||: 100%|##########| 287/287 [00:18<00:00, 16.21it/s]
-2024-06-29 19:30:54,843 - INFO - tqdm - NullAccuracy: 0.9951, NullF1: 0.7952, Lemma: 0.9762, PosFeats: 0.9718, UD-UAS: 0.9295, UD-LAS: 0.9381, EUD-UAS: 0.7816, EUD-LAS: 0.7870, Misc: 0.9959, SS: 0.9135, SC: 0.9194, Avg: 0.9126, batch_loss: 2.8006, loss: 1.4128 ||: 100%|##########| 287/287 [00:18<00:00, 15.14it/s]
-2024-06-29 19:30:54,844 - INFO - allennlp.training.learning_rate_schedulers.slanted_triangular - Gradual unfreezing finished. Training all layers.
-2024-06-29 19:30:54,846 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
-2024-06-29 19:30:54,846 - INFO - allennlp.training.callbacks.console_logger - Avg                |     0.882  |     0.913
-2024-06-29 19:30:54,846 - INFO - allennlp.training.callbacks.console_logger - EUD-LAS            |     0.724  |     0.787
-2024-06-29 19:30:54,846 - INFO - allennlp.training.callbacks.console_logger - EUD-UAS            |     0.725  |     0.782
-2024-06-29 19:30:54,846 - INFO - allennlp.training.callbacks.console_logger - Lemma              |     0.965  |     0.976
-2024-06-29 19:30:54,846 - INFO - allennlp.training.callbacks.console_logger - Misc               |     0.995  |     0.996
-2024-06-29 19:30:54,846 - INFO - allennlp.training.callbacks.console_logger - NullAccuracy       |     0.994  |     0.995
-2024-06-29 19:30:54,846 - INFO - allennlp.training.callbacks.console_logger - NullF1             |     0.763  |     0.795
-2024-06-29 19:30:54,846 - INFO - allennlp.training.callbacks.console_logger - PosFeats           |     0.965  |     0.972
-2024-06-29 19:30:54,846 - INFO - allennlp.training.callbacks.console_logger - SC                 |     0.904  |     0.919
-2024-06-29 19:30:54,846 - INFO - allennlp.training.callbacks.console_logger - SS                 |     0.902  |     0.913
-2024-06-29 19:30:54,846 - INFO - allennlp.training.callbacks.console_logger - UD-LAS             |     0.882  |     0.938
-2024-06-29 19:30:54,846 - INFO - allennlp.training.callbacks.console_logger - UD-UAS             |     0.878  |     0.929
-2024-06-29 19:30:54,846 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  11269.433  |       N/A
-2024-06-29 19:30:54,846 - INFO - allennlp.training.callbacks.console_logger - loss               |     1.728  |     1.413
-2024-06-29 19:30:54,846 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  5001.855  |       N/A
-2024-06-29 19:30:59,900 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:02:44.257205
-2024-06-29 19:30:59,901 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:12:20
-2024-06-29 19:30:59,901 - INFO - allennlp.training.gradient_descent_trainer - Epoch 5/9
-2024-06-29 19:30:59,901 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 4.9G
-2024-06-29 19:30:59,901 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 11G
-2024-06-29 19:30:59,902 - INFO - allennlp.training.gradient_descent_trainer - Training
-2024-06-29 19:30:59,902 - INFO - tqdm - 0%|          | 0/1147 [00:00<?, ?it/s]
-2024-06-29 19:31:09,974 - INFO - tqdm - NullAccuracy: 0.9948, NullF1: 0.7919, Lemma: 0.9719, PosFeats: 0.9684, UD-UAS: 0.8842, UD-LAS: 0.8884, EUD-UAS: 0.7340, EUD-LAS: 0.7320, Misc: 0.9954, SS: 0.9114, SC: 0.9156, Avg: 0.8890, batch_loss: 1.6398, loss: 1.5423 ||:   7%|7         | 83/1147 [00:10<02:02,  8.65it/s]
-2024-06-29 19:31:20,054 - INFO - tqdm - NullAccuracy: 0.9949, NullF1: 0.7883, Lemma: 0.9707, PosFeats: 0.9693, UD-UAS: 0.8832, UD-LAS: 0.8884, EUD-UAS: 0.7318, EUD-LAS: 0.7311, Misc: 0.9956, SS: 0.9104, SC: 0.9145, Avg: 0.8883, batch_loss: 1.3990, loss: 1.5374 ||:  14%|#4        | 165/1147 [00:20<02:04,  7.90it/s]
-2024-06-29 19:31:30,080 - INFO - tqdm - NullAccuracy: 0.9952, NullF1: 0.7955, Lemma: 0.9706, PosFeats: 0.9695, UD-UAS: 0.8835, UD-LAS: 0.8885, EUD-UAS: 0.7305, EUD-LAS: 0.7299, Misc: 0.9955, SS: 0.9106, SC: 0.9140, Avg: 0.8881, batch_loss: 1.4056, loss: 1.5388 ||:  21%|##1       | 245/1147 [00:30<01:53,  7.97it/s]
-2024-06-29 19:31:40,213 - INFO - tqdm - NullAccuracy: 0.9950, NullF1: 0.7860, Lemma: 0.9706, PosFeats: 0.9689, UD-UAS: 0.8844, UD-LAS: 0.8897, EUD-UAS: 0.7337, EUD-LAS: 0.7335, Misc: 0.9954, SS: 0.9108, SC: 0.9135, Avg: 0.8890, batch_loss: 1.9566, loss: 1.5382 ||:  28%|##7       | 321/1147 [00:40<01:57,  7.05it/s]
-2024-06-29 19:31:50,221 - INFO - tqdm - NullAccuracy: 0.9950, NullF1: 0.7901, Lemma: 0.9704, PosFeats: 0.9688, UD-UAS: 0.8852, UD-LAS: 0.8904, EUD-UAS: 0.7343, EUD-LAS: 0.7338, Misc: 0.9956, SS: 0.9108, SC: 0.9137, Avg: 0.8892, batch_loss: 1.2495, loss: 1.5371 ||:  35%|###4      | 399/1147 [00:50<01:34,  7.91it/s]
-2024-06-29 19:32:00,256 - INFO - tqdm - NullAccuracy: 0.9950, NullF1: 0.7868, Lemma: 0.9706, PosFeats: 0.9688, UD-UAS: 0.8854, UD-LAS: 0.8905, EUD-UAS: 0.7363, EUD-LAS: 0.7354, Misc: 0.9957, SS: 0.9109, SC: 0.9141, Avg: 0.8897, batch_loss: 1.5455, loss: 1.5362 ||:  41%|####1     | 475/1147 [01:00<01:35,  7.03it/s]
-2024-06-29 19:32:10,342 - INFO - tqdm - NullAccuracy: 0.9949, NullF1: 0.7880, Lemma: 0.9708, PosFeats: 0.9690, UD-UAS: 0.8858, UD-LAS: 0.8909, EUD-UAS: 0.7360, EUD-LAS: 0.7348, Misc: 0.9957, SS: 0.9102, SC: 0.9137, Avg: 0.8896, batch_loss: 1.5492, loss: 1.5383 ||:  48%|####8     | 552/1147 [01:10<01:11,  8.35it/s]
-2024-06-29 19:32:20,380 - INFO - tqdm - NullAccuracy: 0.9949, NullF1: 0.7874, Lemma: 0.9708, PosFeats: 0.9691, UD-UAS: 0.8870, UD-LAS: 0.8919, EUD-UAS: 0.7379, EUD-LAS: 0.7367, Misc: 0.9957, SS: 0.9101, SC: 0.9137, Avg: 0.8903, batch_loss: 1.3705, loss: 1.5368 ||:  55%|#####5    | 635/1147 [01:20<01:01,  8.33it/s]
-2024-06-29 19:32:30,438 - INFO - tqdm - NullAccuracy: 0.9949, NullF1: 0.7872, Lemma: 0.9710, PosFeats: 0.9690, UD-UAS: 0.8885, UD-LAS: 0.8935, EUD-UAS: 0.7404, EUD-LAS: 0.7394, Misc: 0.9957, SS: 0.9103, SC: 0.9134, Avg: 0.8913, batch_loss: 1.4505, loss: 1.5349 ||:  63%|######2   | 718/1147 [01:30<00:49,  8.71it/s]
-2024-06-29 19:32:40,559 - INFO - tqdm - NullAccuracy: 0.9949, NullF1: 0.7872, Lemma: 0.9709, PosFeats: 0.9692, UD-UAS: 0.8893, UD-LAS: 0.8942, EUD-UAS: 0.7417, EUD-LAS: 0.7408, Misc: 0.9957, SS: 0.9105, SC: 0.9130, Avg: 0.8917, batch_loss: 1.7792, loss: 1.5318 ||:  70%|######9   | 802/1147 [01:40<00:43,  7.89it/s]
-2024-06-29 19:32:50,636 - INFO - tqdm - NullAccuracy: 0.9949, NullF1: 0.7891, Lemma: 0.9712, PosFeats: 0.9693, UD-UAS: 0.8888, UD-LAS: 0.8938, EUD-UAS: 0.7419, EUD-LAS: 0.7410, Misc: 0.9957, SS: 0.9104, SC: 0.9132, Avg: 0.8917, batch_loss: 1.5494, loss: 1.5303 ||:  77%|#######6  | 883/1147 [01:50<00:31,  8.35it/s]
-2024-06-29 19:33:00,749 - INFO - tqdm - NullAccuracy: 0.9949, NullF1: 0.7903, Lemma: 0.9712, PosFeats: 0.9693, UD-UAS: 0.8881, UD-LAS: 0.8932, EUD-UAS: 0.7410, EUD-LAS: 0.7405, Misc: 0.9957, SS: 0.9110, SC: 0.9132, Avg: 0.8915, batch_loss: 1.7464, loss: 1.5329 ||:  84%|########4 | 965/1147 [02:00<00:24,  7.43it/s]
-2024-06-29 19:33:10,816 - INFO - tqdm - NullAccuracy: 0.9948, NullF1: 0.7891, Lemma: 0.9712, PosFeats: 0.9696, UD-UAS: 0.8879, UD-LAS: 0.8933, EUD-UAS: 0.7410, EUD-LAS: 0.7408, Misc: 0.9957, SS: 0.9111, SC: 0.9134, Avg: 0.8915, batch_loss: 1.2881, loss: 1.5325 ||:  91%|#########1| 1047/1147 [02:10<00:12,  8.16it/s]
-2024-06-29 19:33:20,932 - INFO - tqdm - NullAccuracy: 0.9948, NullF1: 0.7882, Lemma: 0.9712, PosFeats: 0.9695, UD-UAS: 0.8884, UD-LAS: 0.8937, EUD-UAS: 0.7420, EUD-LAS: 0.7418, Misc: 0.9957, SS: 0.9111, SC: 0.9132, Avg: 0.8918, batch_loss: 1.7167, loss: 1.5316 ||:  99%|#########8| 1130/1147 [02:21<00:02,  8.23it/s]
-2024-06-29 19:33:22,353 - INFO - tqdm - NullAccuracy: 0.9949, NullF1: 0.7890, Lemma: 0.9711, PosFeats: 0.9695, UD-UAS: 0.8886, UD-LAS: 0.8939, EUD-UAS: 0.7423, EUD-LAS: 0.7421, Misc: 0.9957, SS: 0.9112, SC: 0.9133, Avg: 0.8920, batch_loss: 1.2463, loss: 1.5303 ||: 100%|#########9| 1142/1147 [02:22<00:00,  8.80it/s]
-2024-06-29 19:33:22,466 - INFO - tqdm - NullAccuracy: 0.9949, NullF1: 0.7890, Lemma: 0.9712, PosFeats: 0.9695, UD-UAS: 0.8887, UD-LAS: 0.8940, EUD-UAS: 0.7424, EUD-LAS: 0.7422, Misc: 0.9957, SS: 0.9112, SC: 0.9133, Avg: 0.8920, batch_loss: 1.0164, loss: 1.5298 ||: 100%|#########9| 1143/1147 [02:22<00:00,  8.82it/s]
-2024-06-29 19:33:22,597 - INFO - tqdm - NullAccuracy: 0.9948, NullF1: 0.7888, Lemma: 0.9712, PosFeats: 0.9695, UD-UAS: 0.8886, UD-LAS: 0.8940, EUD-UAS: 0.7424, EUD-LAS: 0.7422, Misc: 0.9957, SS: 0.9112, SC: 0.9133, Avg: 0.8920, batch_loss: 1.5510, loss: 1.5298 ||: 100%|#########9| 1144/1147 [02:22<00:00,  8.42it/s]
-2024-06-29 19:33:22,706 - INFO - tqdm - NullAccuracy: 0.9948, NullF1: 0.7888, Lemma: 0.9712, PosFeats: 0.9695, UD-UAS: 0.8887, UD-LAS: 0.8940, EUD-UAS: 0.7424, EUD-LAS: 0.7422, Misc: 0.9957, SS: 0.9112, SC: 0.9133, Avg: 0.8920, batch_loss: 1.3037, loss: 1.5296 ||: 100%|#########9| 1145/1147 [02:22<00:00,  8.63it/s]
-2024-06-29 19:33:22,830 - INFO - tqdm - NullAccuracy: 0.9948, NullF1: 0.7885, Lemma: 0.9712, PosFeats: 0.9696, UD-UAS: 0.8887, UD-LAS: 0.8940, EUD-UAS: 0.7424, EUD-LAS: 0.7422, Misc: 0.9957, SS: 0.9112, SC: 0.9133, Avg: 0.8920, batch_loss: 1.6812, loss: 1.5298 ||: 100%|#########9| 1146/1147 [02:22<00:00,  8.45it/s]
-2024-06-29 19:33:22,932 - INFO - tqdm - NullAccuracy: 0.9948, NullF1: 0.7885, Lemma: 0.9712, PosFeats: 0.9696, UD-UAS: 0.8887, UD-LAS: 0.8940, EUD-UAS: 0.7424, EUD-LAS: 0.7422, Misc: 0.9957, SS: 0.9111, SC: 0.9133, Avg: 0.8920, batch_loss: 1.5941, loss: 1.5298 ||: 100%|##########| 1147/1147 [02:23<00:00,  8.81it/s]
-2024-06-29 19:33:22,933 - INFO - tqdm - NullAccuracy: 0.9948, NullF1: 0.7885, Lemma: 0.9712, PosFeats: 0.9696, UD-UAS: 0.8887, UD-LAS: 0.8940, EUD-UAS: 0.7424, EUD-LAS: 0.7422, Misc: 0.9957, SS: 0.9111, SC: 0.9133, Avg: 0.8920, batch_loss: 1.5941, loss: 1.5298 ||: 100%|##########| 1147/1147 [02:23<00:00,  8.02it/s]
-2024-06-29 19:33:22,933 - INFO - allennlp.training.gradient_descent_trainer - Validating
-2024-06-29 19:33:22,934 - INFO - tqdm - 0%|          | 0/287 [00:00<?, ?it/s]
-2024-06-29 19:33:32,999 - INFO - tqdm - NullAccuracy: 0.9949, NullF1: 0.7966, Lemma: 0.9764, PosFeats: 0.9717, UD-UAS: 0.9323, UD-LAS: 0.9412, EUD-UAS: 0.8439, EUD-LAS: 0.8480, Misc: 0.9962, SS: 0.9150, SC: 0.9211, Avg: 0.9273, batch_loss: 0.7610, loss: 1.4052 ||:  53%|#####2    | 151/287 [00:10<00:08, 15.51it/s]
-2024-06-29 19:33:41,487 - INFO - tqdm - NullAccuracy: 0.9952, NullF1: 0.8002, Lemma: 0.9787, PosFeats: 0.9733, UD-UAS: 0.9356, UD-LAS: 0.9434, EUD-UAS: 0.8480, EUD-LAS: 0.8510, Misc: 0.9963, SS: 0.9173, SC: 0.9238, Avg: 0.9297, batch_loss: 2.7584, loss: 1.3465 ||: 100%|##########| 287/287 [00:18<00:00, 16.52it/s]
-2024-06-29 19:33:41,488 - INFO - tqdm - NullAccuracy: 0.9952, NullF1: 0.8002, Lemma: 0.9787, PosFeats: 0.9733, UD-UAS: 0.9356, UD-LAS: 0.9434, EUD-UAS: 0.8480, EUD-LAS: 0.8510, Misc: 0.9963, SS: 0.9173, SC: 0.9238, Avg: 0.9297, batch_loss: 2.7584, loss: 1.3465 ||: 100%|##########| 287/287 [00:18<00:00, 15.47it/s]
-2024-06-29 19:33:41,507 - INFO - allennlp.training.learning_rate_schedulers.slanted_triangular - Gradual unfreezing finished. Training all layers.
-2024-06-29 19:33:41,510 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
-2024-06-29 19:33:41,510 - INFO - allennlp.training.callbacks.console_logger - Avg                |     0.892  |     0.930
-2024-06-29 19:33:41,510 - INFO - allennlp.training.callbacks.console_logger - EUD-LAS            |     0.742  |     0.851
-2024-06-29 19:33:41,510 - INFO - allennlp.training.callbacks.console_logger - EUD-UAS            |     0.742  |     0.848
-2024-06-29 19:33:41,510 - INFO - allennlp.training.callbacks.console_logger - Lemma              |     0.971  |     0.979
-2024-06-29 19:33:41,510 - INFO - allennlp.training.callbacks.console_logger - Misc               |     0.996  |     0.996
-2024-06-29 19:33:41,510 - INFO - allennlp.training.callbacks.console_logger - NullAccuracy       |     0.995  |     0.995
-2024-06-29 19:33:41,510 - INFO - allennlp.training.callbacks.console_logger - NullF1             |     0.789  |     0.800
-2024-06-29 19:33:41,510 - INFO - allennlp.training.callbacks.console_logger - PosFeats           |     0.970  |     0.973
-2024-06-29 19:33:41,510 - INFO - allennlp.training.callbacks.console_logger - SC                 |     0.913  |     0.924
-2024-06-29 19:33:41,510 - INFO - allennlp.training.callbacks.console_logger - SS                 |     0.911  |     0.917
-2024-06-29 19:33:41,510 - INFO - allennlp.training.callbacks.console_logger - UD-LAS             |     0.894  |     0.943
-2024-06-29 19:33:41,510 - INFO - allennlp.training.callbacks.console_logger - UD-UAS             |     0.889  |     0.936
-2024-06-29 19:33:41,510 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  11257.638  |       N/A
-2024-06-29 19:33:41,511 - INFO - allennlp.training.callbacks.console_logger - loss               |     1.530  |     1.346
-2024-06-29 19:33:41,511 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  5001.953  |       N/A
-2024-06-29 19:33:47,202 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:02:47.300998
-2024-06-29 19:33:47,202 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:10:04
-2024-06-29 19:33:47,202 - INFO - allennlp.training.gradient_descent_trainer - Epoch 6/9
-2024-06-29 19:33:47,202 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 4.9G
-2024-06-29 19:33:47,202 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 11G
-2024-06-29 19:33:47,204 - INFO - allennlp.training.gradient_descent_trainer - Training
-2024-06-29 19:33:47,204 - INFO - tqdm - 0%|          | 0/1147 [00:00<?, ?it/s]
-2024-06-29 19:33:57,291 - INFO - tqdm - NullAccuracy: 0.9948, NullF1: 0.8017, Lemma: 0.9735, PosFeats: 0.9728, UD-UAS: 0.8895, UD-LAS: 0.8956, EUD-UAS: 0.7473, EUD-LAS: 0.7478, Misc: 0.9971, SS: 0.9164, SC: 0.9233, Avg: 0.8959, batch_loss: 1.2881, loss: 1.4102 ||:   7%|7         | 82/1147 [00:10<02:16,  7.78it/s]
-2024-06-29 19:34:07,384 - INFO - tqdm - NullAccuracy: 0.9954, NullF1: 0.8121, Lemma: 0.9752, PosFeats: 0.9734, UD-UAS: 0.8916, UD-LAS: 0.8975, EUD-UAS: 0.7513, EUD-LAS: 0.7518, Misc: 0.9967, SS: 0.9158, SC: 0.9216, Avg: 0.8972, batch_loss: 1.3237, loss: 1.3957 ||:  14%|#4        | 165/1147 [00:20<01:51,  8.82it/s]
-2024-06-29 19:34:17,490 - INFO - tqdm - NullAccuracy: 0.9954, NullF1: 0.8144, Lemma: 0.9744, PosFeats: 0.9727, UD-UAS: 0.8941, UD-LAS: 0.9001, EUD-UAS: 0.7546, EUD-LAS: 0.7551, Misc: 0.9965, SS: 0.9167, SC: 0.9211, Avg: 0.8984, batch_loss: 1.2684, loss: 1.3864 ||:  22%|##1       | 249/1147 [00:30<01:53,  7.89it/s]
-2024-06-29 19:34:27,493 - INFO - tqdm - NullAccuracy: 0.9954, NullF1: 0.8128, Lemma: 0.9742, PosFeats: 0.9730, UD-UAS: 0.8951, UD-LAS: 0.9002, EUD-UAS: 0.7574, EUD-LAS: 0.7575, Misc: 0.9963, SS: 0.9169, SC: 0.9203, Avg: 0.8990, batch_loss: 1.1529, loss: 1.3789 ||:  29%|##8       | 332/1147 [00:40<01:39,  8.23it/s]
-2024-06-29 19:34:37,560 - INFO - tqdm - NullAccuracy: 0.9954, NullF1: 0.8134, Lemma: 0.9745, PosFeats: 0.9730, UD-UAS: 0.8957, UD-LAS: 0.9009, EUD-UAS: 0.7598, EUD-LAS: 0.7597, Misc: 0.9964, SS: 0.9173, SC: 0.9202, Avg: 0.8997, batch_loss: 1.3392, loss: 1.3741 ||:  36%|###6      | 417/1147 [00:50<01:26,  8.48it/s]
-2024-06-29 19:34:47,631 - INFO - tqdm - NullAccuracy: 0.9953, NullF1: 0.8112, Lemma: 0.9745, PosFeats: 0.9731, UD-UAS: 0.8958, UD-LAS: 0.9014, EUD-UAS: 0.7602, EUD-LAS: 0.7604, Misc: 0.9963, SS: 0.9169, SC: 0.9201, Avg: 0.8998, batch_loss: 1.0445, loss: 1.3804 ||:  44%|####3     | 500/1147 [01:00<01:12,  8.93it/s]
-2024-06-29 19:34:57,694 - INFO - tqdm - NullAccuracy: 0.9952, NullF1: 0.8070, Lemma: 0.9745, PosFeats: 0.9731, UD-UAS: 0.8960, UD-LAS: 0.9016, EUD-UAS: 0.7601, EUD-LAS: 0.7605, Misc: 0.9963, SS: 0.9174, SC: 0.9203, Avg: 0.9000, batch_loss: 1.1462, loss: 1.3750 ||:  51%|#####     | 584/1147 [01:10<01:06,  8.50it/s]
-2024-06-29 19:35:07,806 - INFO - tqdm - NullAccuracy: 0.9953, NullF1: 0.8109, Lemma: 0.9745, PosFeats: 0.9730, UD-UAS: 0.8960, UD-LAS: 0.9017, EUD-UAS: 0.7599, EUD-LAS: 0.7605, Misc: 0.9962, SS: 0.9176, SC: 0.9205, Avg: 0.9000, batch_loss: 1.5332, loss: 1.3764 ||:  58%|#####8    | 667/1147 [01:20<00:57,  8.32it/s]
-2024-06-29 19:35:17,854 - INFO - tqdm - NullAccuracy: 0.9953, NullF1: 0.8110, Lemma: 0.9746, PosFeats: 0.9730, UD-UAS: 0.8957, UD-LAS: 0.9014, EUD-UAS: 0.7594, EUD-LAS: 0.7600, Misc: 0.9962, SS: 0.9173, SC: 0.9204, Avg: 0.8998, batch_loss: 1.1324, loss: 1.3802 ||:  65%|######5   | 749/1147 [01:30<00:47,  8.43it/s]
-2024-06-29 19:35:27,900 - INFO - tqdm - NullAccuracy: 0.9953, NullF1: 0.8116, Lemma: 0.9745, PosFeats: 0.9730, UD-UAS: 0.8963, UD-LAS: 0.9020, EUD-UAS: 0.7602, EUD-LAS: 0.7609, Misc: 0.9961, SS: 0.9177, SC: 0.9204, Avg: 0.9001, batch_loss: 0.9856, loss: 1.3761 ||:  73%|#######2  | 832/1147 [01:40<00:37,  8.48it/s]
-2024-06-29 19:35:37,964 - INFO - tqdm - NullAccuracy: 0.9953, NullF1: 0.8104, Lemma: 0.9746, PosFeats: 0.9730, UD-UAS: 0.8969, UD-LAS: 0.9026, EUD-UAS: 0.7620, EUD-LAS: 0.7626, Misc: 0.9961, SS: 0.9178, SC: 0.9204, Avg: 0.9007, batch_loss: 1.0854, loss: 1.3727 ||:  80%|#######9  | 916/1147 [01:50<00:27,  8.36it/s]
-2024-06-29 19:35:47,965 - INFO - tqdm - NullAccuracy: 0.9953, NullF1: 0.8076, Lemma: 0.9745, PosFeats: 0.9730, UD-UAS: 0.8972, UD-LAS: 0.9027, EUD-UAS: 0.7625, EUD-LAS: 0.7630, Misc: 0.9961, SS: 0.9181, SC: 0.9204, Avg: 0.9008, batch_loss: 1.3083, loss: 1.3695 ||:  87%|########7 | 999/1147 [02:00<00:17,  8.41it/s]
-2024-06-29 19:35:57,986 - INFO - tqdm - NullAccuracy: 0.9953, NullF1: 0.8080, Lemma: 0.9745, PosFeats: 0.9729, UD-UAS: 0.8975, UD-LAS: 0.9031, EUD-UAS: 0.7633, EUD-LAS: 0.7638, Misc: 0.9962, SS: 0.9184, SC: 0.9205, Avg: 0.9011, batch_loss: 1.4589, loss: 1.3648 ||:  94%|#########4| 1082/1147 [02:10<00:07,  8.36it/s]
-2024-06-29 19:36:05,268 - INFO - tqdm - NullAccuracy: 0.9953, NullF1: 0.8073, Lemma: 0.9746, PosFeats: 0.9730, UD-UAS: 0.8974, UD-LAS: 0.9030, EUD-UAS: 0.7629, EUD-LAS: 0.7635, Misc: 0.9962, SS: 0.9185, SC: 0.9208, Avg: 0.9011, batch_loss: 1.2374, loss: 1.3627 ||: 100%|#########9| 1142/1147 [02:18<00:00,  8.57it/s]
-2024-06-29 19:36:05,382 - INFO - tqdm - NullAccuracy: 0.9953, NullF1: 0.8073, Lemma: 0.9746, PosFeats: 0.9730, UD-UAS: 0.8974, UD-LAS: 0.9030, EUD-UAS: 0.7630, EUD-LAS: 0.7636, Misc: 0.9962, SS: 0.9185, SC: 0.9208, Avg: 0.9011, batch_loss: 1.0635, loss: 1.3625 ||: 100%|#########9| 1143/1147 [02:18<00:00,  8.63it/s]
-2024-06-29 19:36:05,527 - INFO - tqdm - NullAccuracy: 0.9953, NullF1: 0.8072, Lemma: 0.9746, PosFeats: 0.9730, UD-UAS: 0.8974, UD-LAS: 0.9030, EUD-UAS: 0.7630, EUD-LAS: 0.7636, Misc: 0.9962, SS: 0.9185, SC: 0.9208, Avg: 0.9011, batch_loss: 1.5301, loss: 1.3626 ||: 100%|#########9| 1144/1147 [02:18<00:00,  8.04it/s]
-2024-06-29 19:36:05,640 - INFO - tqdm - NullAccuracy: 0.9953, NullF1: 0.8073, Lemma: 0.9746, PosFeats: 0.9730, UD-UAS: 0.8974, UD-LAS: 0.9030, EUD-UAS: 0.7630, EUD-LAS: 0.7636, Misc: 0.9962, SS: 0.9185, SC: 0.9208, Avg: 0.9011, batch_loss: 1.3743, loss: 1.3626 ||: 100%|#########9| 1145/1147 [02:18<00:00,  8.27it/s]
-2024-06-29 19:36:05,746 - INFO - tqdm - NullAccuracy: 0.9953, NullF1: 0.8074, Lemma: 0.9746, PosFeats: 0.9730, UD-UAS: 0.8974, UD-LAS: 0.9030, EUD-UAS: 0.7631, EUD-LAS: 0.7637, Misc: 0.9962, SS: 0.9185, SC: 0.9208, Avg: 0.9011, batch_loss: 0.7187, loss: 1.3621 ||: 100%|#########9| 1146/1147 [02:18<00:00,  8.58it/s]
-2024-06-29 19:36:05,844 - INFO - tqdm - NullAccuracy: 0.9953, NullF1: 0.8073, Lemma: 0.9746, PosFeats: 0.9730, UD-UAS: 0.8974, UD-LAS: 0.9030, EUD-UAS: 0.7632, EUD-LAS: 0.7638, Misc: 0.9962, SS: 0.9185, SC: 0.9208, Avg: 0.9012, batch_loss: 1.4546, loss: 1.3621 ||: 100%|##########| 1147/1147 [02:18<00:00,  8.27it/s]
-2024-06-29 19:36:05,845 - INFO - allennlp.training.gradient_descent_trainer - Validating
-2024-06-29 19:36:05,846 - INFO - tqdm - 0%|          | 0/287 [00:00<?, ?it/s]
-2024-06-29 19:36:15,940 - INFO - tqdm - NullAccuracy: 0.9950, NullF1: 0.8000, Lemma: 0.9792, PosFeats: 0.9734, UD-UAS: 0.9383, UD-LAS: 0.9473, EUD-UAS: 0.8658, EUD-LAS: 0.8700, Misc: 0.9962, SS: 0.9197, SC: 0.9249, Avg: 0.9350, batch_loss: 1.0108, loss: 1.3464 ||:  54%|#####3    | 154/287 [00:10<00:08, 15.88it/s]
-2024-06-29 19:36:24,121 - INFO - tqdm - NullAccuracy: 0.9954, NullF1: 0.8038, Lemma: 0.9810, PosFeats: 0.9746, UD-UAS: 0.9417, UD-LAS: 0.9499, EUD-UAS: 0.8702, EUD-LAS: 0.8733, Misc: 0.9961, SS: 0.9221, SC: 0.9274, Avg: 0.9374, batch_loss: 2.7066, loss: 1.2898 ||: 100%|##########| 287/287 [00:18<00:00, 16.81it/s]
-2024-06-29 19:36:24,122 - INFO - tqdm - NullAccuracy: 0.9954, NullF1: 0.8038, Lemma: 0.9810, PosFeats: 0.9746, UD-UAS: 0.9417, UD-LAS: 0.9499, EUD-UAS: 0.8702, EUD-LAS: 0.8733, Misc: 0.9961, SS: 0.9221, SC: 0.9274, Avg: 0.9374, batch_loss: 2.7066, loss: 1.2898 ||: 100%|##########| 287/287 [00:18<00:00, 15.70it/s]
-2024-06-29 19:36:24,122 - INFO - allennlp.training.learning_rate_schedulers.slanted_triangular - Gradual unfreezing finished. Training all layers.
-2024-06-29 19:36:24,124 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
-2024-06-29 19:36:24,125 - INFO - allennlp.training.callbacks.console_logger - Avg                |     0.901  |     0.937
-2024-06-29 19:36:24,125 - INFO - allennlp.training.callbacks.console_logger - EUD-LAS            |     0.764  |     0.873
-2024-06-29 19:36:24,125 - INFO - allennlp.training.callbacks.console_logger - EUD-UAS            |     0.763  |     0.870
-2024-06-29 19:36:24,125 - INFO - allennlp.training.callbacks.console_logger - Lemma              |     0.975  |     0.981
-2024-06-29 19:36:24,125 - INFO - allennlp.training.callbacks.console_logger - Misc               |     0.996  |     0.996
-2024-06-29 19:36:24,125 - INFO - allennlp.training.callbacks.console_logger - NullAccuracy       |     0.995  |     0.995
-2024-06-29 19:36:24,125 - INFO - allennlp.training.callbacks.console_logger - NullF1             |     0.807  |     0.804
-2024-06-29 19:36:24,125 - INFO - allennlp.training.callbacks.console_logger - PosFeats           |     0.973  |     0.975
-2024-06-29 19:36:24,125 - INFO - allennlp.training.callbacks.console_logger - SC                 |     0.921  |     0.927
-2024-06-29 19:36:24,125 - INFO - allennlp.training.callbacks.console_logger - SS                 |     0.919  |     0.922
-2024-06-29 19:36:24,125 - INFO - allennlp.training.callbacks.console_logger - UD-LAS             |     0.903  |     0.950
-2024-06-29 19:36:24,125 - INFO - allennlp.training.callbacks.console_logger - UD-UAS             |     0.897  |     0.942
-2024-06-29 19:36:24,125 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  11409.911  |       N/A
-2024-06-29 19:36:24,125 - INFO - allennlp.training.callbacks.console_logger - loss               |     1.362  |     1.290
-2024-06-29 19:36:24,125 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  5001.953  |       N/A
-2024-06-29 19:36:29,404 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:02:42.202102
-2024-06-29 19:36:29,404 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:07:38
-2024-06-29 19:36:29,404 - INFO - allennlp.training.gradient_descent_trainer - Epoch 7/9
-2024-06-29 19:36:29,404 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 4.9G
-2024-06-29 19:36:29,405 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 11G
-2024-06-29 19:36:29,406 - INFO - allennlp.training.gradient_descent_trainer - Training
-2024-06-29 19:36:29,406 - INFO - tqdm - 0%|          | 0/1147 [00:00<?, ?it/s]
-2024-06-29 19:36:39,425 - INFO - tqdm - NullAccuracy: 0.9950, NullF1: 0.8006, Lemma: 0.9768, PosFeats: 0.9765, UD-UAS: 0.9019, UD-LAS: 0.9081, EUD-UAS: 0.7693, EUD-LAS: 0.7706, Misc: 0.9971, SS: 0.9226, SC: 0.9285, Avg: 0.9057, batch_loss: 1.0939, loss: 1.2545 ||:   7%|7         | 83/1147 [00:10<02:05,  8.50it/s]
-2024-06-29 19:36:49,427 - INFO - tqdm - NullAccuracy: 0.9952, NullF1: 0.8090, Lemma: 0.9777, PosFeats: 0.9772, UD-UAS: 0.9037, UD-LAS: 0.9099, EUD-UAS: 0.7727, EUD-LAS: 0.7737, Misc: 0.9969, SS: 0.9238, SC: 0.9294, Avg: 0.9072, batch_loss: 1.1221, loss: 1.2150 ||:  14%|#4        | 166/1147 [00:20<01:58,  8.31it/s]
-2024-06-29 19:36:59,497 - INFO - tqdm - NullAccuracy: 0.9953, NullF1: 0.8119, Lemma: 0.9777, PosFeats: 0.9772, UD-UAS: 0.9061, UD-LAS: 0.9123, EUD-UAS: 0.7767, EUD-LAS: 0.7777, Misc: 0.9970, SS: 0.9242, SC: 0.9296, Avg: 0.9087, batch_loss: 1.4607, loss: 1.2037 ||:  21%|##        | 237/1147 [00:30<01:44,  8.70it/s]
-2024-06-29 19:37:09,571 - INFO - tqdm - NullAccuracy: 0.9953, NullF1: 0.8114, Lemma: 0.9776, PosFeats: 0.9769, UD-UAS: 0.9065, UD-LAS: 0.9124, EUD-UAS: 0.7785, EUD-LAS: 0.7790, Misc: 0.9968, SS: 0.9248, SC: 0.9292, Avg: 0.9091, batch_loss: 1.6282, loss: 1.2096 ||:  28%|##7       | 321/1147 [00:40<01:40,  8.23it/s]
-2024-06-29 19:37:19,670 - INFO - tqdm - NullAccuracy: 0.9955, NullF1: 0.8176, Lemma: 0.9778, PosFeats: 0.9766, UD-UAS: 0.9074, UD-LAS: 0.9133, EUD-UAS: 0.7811, EUD-LAS: 0.7815, Misc: 0.9966, SS: 0.9243, SC: 0.9286, Avg: 0.9097, batch_loss: 1.0417, loss: 1.2077 ||:  35%|###5      | 405/1147 [00:50<01:27,  8.49it/s]
-2024-06-29 19:37:29,718 - INFO - tqdm - NullAccuracy: 0.9955, NullF1: 0.8190, Lemma: 0.9780, PosFeats: 0.9762, UD-UAS: 0.9071, UD-LAS: 0.9129, EUD-UAS: 0.7802, EUD-LAS: 0.7806, Misc: 0.9967, SS: 0.9244, SC: 0.9284, Avg: 0.9094, batch_loss: 1.2412, loss: 1.2113 ||:  43%|####2     | 489/1147 [01:00<01:14,  8.79it/s]
-2024-06-29 19:37:39,800 - INFO - tqdm - NullAccuracy: 0.9955, NullF1: 0.8202, Lemma: 0.9781, PosFeats: 0.9762, UD-UAS: 0.9074, UD-LAS: 0.9134, EUD-UAS: 0.7806, EUD-LAS: 0.7813, Misc: 0.9966, SS: 0.9246, SC: 0.9284, Avg: 0.9096, batch_loss: 0.9601, loss: 1.2111 ||:  50%|####9     | 571/1147 [01:10<01:17,  7.44it/s]
-2024-06-29 19:37:49,881 - INFO - tqdm - NullAccuracy: 0.9954, NullF1: 0.8168, Lemma: 0.9781, PosFeats: 0.9760, UD-UAS: 0.9070, UD-LAS: 0.9128, EUD-UAS: 0.7795, EUD-LAS: 0.7803, Misc: 0.9967, SS: 0.9242, SC: 0.9282, Avg: 0.9092, batch_loss: 1.2752, loss: 1.2187 ||:  57%|#####6    | 653/1147 [01:20<00:59,  8.30it/s]
-2024-06-29 19:37:59,963 - INFO - tqdm - NullAccuracy: 0.9954, NullF1: 0.8151, Lemma: 0.9783, PosFeats: 0.9762, UD-UAS: 0.9071, UD-LAS: 0.9131, EUD-UAS: 0.7797, EUD-LAS: 0.7807, Misc: 0.9967, SS: 0.9243, SC: 0.9279, Avg: 0.9093, batch_loss: 1.7001, loss: 1.2187 ||:  64%|######4   | 736/1147 [01:30<00:47,  8.61it/s]
-2024-06-29 19:38:10,066 - INFO - tqdm - NullAccuracy: 0.9955, NullF1: 0.8166, Lemma: 0.9782, PosFeats: 0.9763, UD-UAS: 0.9073, UD-LAS: 0.9131, EUD-UAS: 0.7805, EUD-LAS: 0.7813, Misc: 0.9968, SS: 0.9244, SC: 0.9280, Avg: 0.9095, batch_loss: 1.2517, loss: 1.2151 ||:  71%|#######1  | 819/1147 [01:40<00:36,  8.88it/s]
-2024-06-29 19:38:20,085 - INFO - tqdm - NullAccuracy: 0.9955, NullF1: 0.8164, Lemma: 0.9782, PosFeats: 0.9765, UD-UAS: 0.9076, UD-LAS: 0.9133, EUD-UAS: 0.7806, EUD-LAS: 0.7815, Misc: 0.9968, SS: 0.9245, SC: 0.9279, Avg: 0.9096, batch_loss: 1.2216, loss: 1.2149 ||:  79%|#######8  | 901/1147 [01:50<00:27,  8.94it/s]
-2024-06-29 19:38:30,132 - INFO - tqdm - NullAccuracy: 0.9954, NullF1: 0.8157, Lemma: 0.9782, PosFeats: 0.9764, UD-UAS: 0.9071, UD-LAS: 0.9130, EUD-UAS: 0.7800, EUD-LAS: 0.7811, Misc: 0.9967, SS: 0.9243, SC: 0.9280, Avg: 0.9094, batch_loss: 1.0139, loss: 1.2197 ||:  86%|########5 | 983/1147 [02:00<00:19,  8.36it/s]
-2024-06-29 19:38:40,146 - INFO - tqdm - NullAccuracy: 0.9954, NullF1: 0.8153, Lemma: 0.9783, PosFeats: 0.9764, UD-UAS: 0.9075, UD-LAS: 0.9132, EUD-UAS: 0.7807, EUD-LAS: 0.7818, Misc: 0.9967, SS: 0.9243, SC: 0.9280, Avg: 0.9096, batch_loss: 1.3527, loss: 1.2164 ||:  93%|#########2| 1066/1147 [02:10<00:09,  8.10it/s]
-2024-06-29 19:38:49,400 - INFO - tqdm - NullAccuracy: 0.9954, NullF1: 0.8153, Lemma: 0.9783, PosFeats: 0.9764, UD-UAS: 0.9073, UD-LAS: 0.9131, EUD-UAS: 0.7804, EUD-LAS: 0.7815, Misc: 0.9968, SS: 0.9242, SC: 0.9278, Avg: 0.9095, batch_loss: 1.3320, loss: 1.2185 ||: 100%|#########9| 1142/1147 [02:19<00:00,  8.45it/s]
-2024-06-29 19:38:49,525 - INFO - tqdm - NullAccuracy: 0.9954, NullF1: 0.8154, Lemma: 0.9783, PosFeats: 0.9764, UD-UAS: 0.9073, UD-LAS: 0.9130, EUD-UAS: 0.7804, EUD-LAS: 0.7815, Misc: 0.9968, SS: 0.9242, SC: 0.9278, Avg: 0.9095, batch_loss: 1.3673, loss: 1.2186 ||: 100%|#########9| 1143/1147 [02:20<00:00,  8.30it/s]
-2024-06-29 19:38:49,631 - INFO - tqdm - NullAccuracy: 0.9954, NullF1: 0.8150, Lemma: 0.9783, PosFeats: 0.9764, UD-UAS: 0.9073, UD-LAS: 0.9131, EUD-UAS: 0.7803, EUD-LAS: 0.7815, Misc: 0.9968, SS: 0.9242, SC: 0.9278, Avg: 0.9095, batch_loss: 1.5132, loss: 1.2189 ||: 100%|#########9| 1144/1147 [02:20<00:00,  8.63it/s]
-2024-06-29 19:38:49,742 - INFO - tqdm - NullAccuracy: 0.9954, NullF1: 0.8149, Lemma: 0.9782, PosFeats: 0.9764, UD-UAS: 0.9074, UD-LAS: 0.9131, EUD-UAS: 0.7803, EUD-LAS: 0.7814, Misc: 0.9968, SS: 0.9242, SC: 0.9278, Avg: 0.9095, batch_loss: 1.1053, loss: 1.2188 ||: 100%|#########9| 1145/1147 [02:20<00:00,  8.74it/s]
-2024-06-29 19:38:49,849 - INFO - tqdm - NullAccuracy: 0.9954, NullF1: 0.8148, Lemma: 0.9782, PosFeats: 0.9764, UD-UAS: 0.9073, UD-LAS: 0.9131, EUD-UAS: 0.7803, EUD-LAS: 0.7814, Misc: 0.9968, SS: 0.9242, SC: 0.9278, Avg: 0.9095, batch_loss: 1.3207, loss: 1.2189 ||: 100%|#########9| 1146/1147 [02:20<00:00,  8.90it/s]
-2024-06-29 19:38:49,953 - INFO - tqdm - NullAccuracy: 0.9954, NullF1: 0.8146, Lemma: 0.9782, PosFeats: 0.9764, UD-UAS: 0.9073, UD-LAS: 0.9130, EUD-UAS: 0.7803, EUD-LAS: 0.7814, Misc: 0.9968, SS: 0.9242, SC: 0.9278, Avg: 0.9095, batch_loss: 1.3824, loss: 1.2190 ||: 100%|##########| 1147/1147 [02:20<00:00,  9.11it/s]
-2024-06-29 19:38:49,953 - INFO - tqdm - NullAccuracy: 0.9954, NullF1: 0.8146, Lemma: 0.9782, PosFeats: 0.9764, UD-UAS: 0.9073, UD-LAS: 0.9130, EUD-UAS: 0.7803, EUD-LAS: 0.7814, Misc: 0.9968, SS: 0.9242, SC: 0.9278, Avg: 0.9095, batch_loss: 1.3824, loss: 1.2190 ||: 100%|##########| 1147/1147 [02:20<00:00,  8.16it/s]
-2024-06-29 19:38:49,954 - INFO - allennlp.training.gradient_descent_trainer - Validating
-2024-06-29 19:38:49,955 - INFO - tqdm - 0%|          | 0/287 [00:00<?, ?it/s]
-2024-06-29 19:39:00,060 - INFO - tqdm - NullAccuracy: 0.9951, NullF1: 0.8036, Lemma: 0.9801, PosFeats: 0.9746, UD-UAS: 0.9401, UD-LAS: 0.9488, EUD-UAS: 0.8580, EUD-LAS: 0.8643, Misc: 0.9967, SS: 0.9230, SC: 0.9272, Avg: 0.9348, batch_loss: 0.7028, loss: 1.2827 ||:  54%|#####4    | 155/287 [00:10<00:08, 15.87it/s]
-2024-06-29 19:39:08,168 - INFO - tqdm - NullAccuracy: 0.9954, NullF1: 0.8047, Lemma: 0.9819, PosFeats: 0.9758, UD-UAS: 0.9434, UD-LAS: 0.9513, EUD-UAS: 0.8628, EUD-LAS: 0.8680, Misc: 0.9967, SS: 0.9250, SC: 0.9300, Avg: 0.9372, batch_loss: 2.6062, loss: 1.2296 ||: 100%|##########| 287/287 [00:18<00:00, 16.82it/s]
-2024-06-29 19:39:08,168 - INFO - tqdm - NullAccuracy: 0.9954, NullF1: 0.8047, Lemma: 0.9819, PosFeats: 0.9758, UD-UAS: 0.9434, UD-LAS: 0.9513, EUD-UAS: 0.8628, EUD-LAS: 0.8680, Misc: 0.9967, SS: 0.9250, SC: 0.9300, Avg: 0.9372, batch_loss: 2.6062, loss: 1.2296 ||: 100%|##########| 287/287 [00:18<00:00, 15.76it/s]
-2024-06-29 19:39:08,168 - INFO - allennlp.training.learning_rate_schedulers.slanted_triangular - Gradual unfreezing finished. Training all layers.
-2024-06-29 19:39:08,171 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
-2024-06-29 19:39:08,171 - INFO - allennlp.training.callbacks.console_logger - Avg                |     0.909  |     0.937
-2024-06-29 19:39:08,171 - INFO - allennlp.training.callbacks.console_logger - EUD-LAS            |     0.781  |     0.868
-2024-06-29 19:39:08,171 - INFO - allennlp.training.callbacks.console_logger - EUD-UAS            |     0.780  |     0.863
-2024-06-29 19:39:08,171 - INFO - allennlp.training.callbacks.console_logger - Lemma              |     0.978  |     0.982
-2024-06-29 19:39:08,171 - INFO - allennlp.training.callbacks.console_logger - Misc               |     0.997  |     0.997
-2024-06-29 19:39:08,171 - INFO - allennlp.training.callbacks.console_logger - NullAccuracy       |     0.995  |     0.995
-2024-06-29 19:39:08,171 - INFO - allennlp.training.callbacks.console_logger - NullF1             |     0.815  |     0.805
-2024-06-29 19:39:08,171 - INFO - allennlp.training.callbacks.console_logger - PosFeats           |     0.976  |     0.976
-2024-06-29 19:39:08,171 - INFO - allennlp.training.callbacks.console_logger - SC                 |     0.928  |     0.930
-2024-06-29 19:39:08,171 - INFO - allennlp.training.callbacks.console_logger - SS                 |     0.924  |     0.925
-2024-06-29 19:39:08,171 - INFO - allennlp.training.callbacks.console_logger - UD-LAS             |     0.913  |     0.951
-2024-06-29 19:39:08,171 - INFO - allennlp.training.callbacks.console_logger - UD-UAS             |     0.907  |     0.943
-2024-06-29 19:39:08,171 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  11258.147  |       N/A
-2024-06-29 19:39:08,171 - INFO - allennlp.training.callbacks.console_logger - loss               |     1.219  |     1.230
-2024-06-29 19:39:08,171 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  5001.953  |       N/A
-2024-06-29 19:39:12,912 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:02:43.508106
-2024-06-29 19:39:12,912 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:05:08
-2024-06-29 19:39:12,912 - INFO - allennlp.training.gradient_descent_trainer - Epoch 8/9
-2024-06-29 19:39:12,913 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 4.9G
-2024-06-29 19:39:12,913 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 11G
-2024-06-29 19:39:12,914 - INFO - allennlp.training.gradient_descent_trainer - Training
-2024-06-29 19:39:12,914 - INFO - tqdm - 0%|          | 0/1147 [00:00<?, ?it/s]
-2024-06-29 19:39:23,014 - INFO - tqdm - NullAccuracy: 0.9955, NullF1: 0.8197, Lemma: 0.9802, PosFeats: 0.9785, UD-UAS: 0.9125, UD-LAS: 0.9181, EUD-UAS: 0.7944, EUD-LAS: 0.7953, Misc: 0.9966, SS: 0.9291, SC: 0.9310, Avg: 0.9151, batch_loss: 0.9805, loss: 1.1218 ||:   7%|7         | 84/1147 [00:10<02:01,  8.74it/s]
-2024-06-29 19:39:33,073 - INFO - tqdm - NullAccuracy: 0.9958, NullF1: 0.8333, Lemma: 0.9804, PosFeats: 0.9790, UD-UAS: 0.9156, UD-LAS: 0.9205, EUD-UAS: 0.8006, EUD-LAS: 0.7997, Misc: 0.9966, SS: 0.9304, SC: 0.9320, Avg: 0.9172, batch_loss: 0.9275, loss: 1.1092 ||:  15%|#4        | 169/1147 [00:20<01:53,  8.63it/s]
-2024-06-29 19:39:43,199 - INFO - tqdm - NullAccuracy: 0.9958, NullF1: 0.8386, Lemma: 0.9805, PosFeats: 0.9788, UD-UAS: 0.9136, UD-LAS: 0.9187, EUD-UAS: 0.7971, EUD-LAS: 0.7975, Misc: 0.9966, SS: 0.9297, SC: 0.9314, Avg: 0.9160, batch_loss: 1.2340, loss: 1.1239 ||:  22%|##1       | 252/1147 [00:30<01:44,  8.54it/s]
-2024-06-29 19:39:53,220 - INFO - tqdm - NullAccuracy: 0.9959, NullF1: 0.8402, Lemma: 0.9807, PosFeats: 0.9788, UD-UAS: 0.9139, UD-LAS: 0.9194, EUD-UAS: 0.7961, EUD-LAS: 0.7971, Misc: 0.9967, SS: 0.9295, SC: 0.9312, Avg: 0.9159, batch_loss: 1.3311, loss: 1.1237 ||:  29%|##9       | 336/1147 [00:40<01:40,  8.04it/s]
-2024-06-29 19:40:03,338 - INFO - tqdm - NullAccuracy: 0.9960, NullF1: 0.8404, Lemma: 0.9807, PosFeats: 0.9789, UD-UAS: 0.9135, UD-LAS: 0.9189, EUD-UAS: 0.7953, EUD-LAS: 0.7963, Misc: 0.9968, SS: 0.9298, SC: 0.9321, Avg: 0.9158, batch_loss: 0.9099, loss: 1.1166 ||:  36%|###6      | 417/1147 [00:50<01:30,  8.09it/s]
-2024-06-29 19:40:13,432 - INFO - tqdm - NullAccuracy: 0.9960, NullF1: 0.8393, Lemma: 0.9804, PosFeats: 0.9790, UD-UAS: 0.9136, UD-LAS: 0.9190, EUD-UAS: 0.7949, EUD-LAS: 0.7958, Misc: 0.9968, SS: 0.9296, SC: 0.9322, Avg: 0.9157, batch_loss: 1.0461, loss: 1.1155 ||:  44%|####3     | 501/1147 [01:00<01:12,  8.87it/s]
-2024-06-29 19:40:23,445 - INFO - tqdm - NullAccuracy: 0.9960, NullF1: 0.8410, Lemma: 0.9805, PosFeats: 0.9793, UD-UAS: 0.9129, UD-LAS: 0.9187, EUD-UAS: 0.7938, EUD-LAS: 0.7953, Misc: 0.9969, SS: 0.9295, SC: 0.9323, Avg: 0.9155, batch_loss: 1.2139, loss: 1.1152 ||:  51%|#####     | 584/1147 [01:10<01:04,  8.70it/s]
-2024-06-29 19:40:33,459 - INFO - tqdm - NullAccuracy: 0.9959, NullF1: 0.8366, Lemma: 0.9805, PosFeats: 0.9792, UD-UAS: 0.9129, UD-LAS: 0.9188, EUD-UAS: 0.7941, EUD-LAS: 0.7957, Misc: 0.9969, SS: 0.9295, SC: 0.9325, Avg: 0.9156, batch_loss: 1.1480, loss: 1.1146 ||:  58%|#####8    | 667/1147 [01:20<00:59,  8.11it/s]
-2024-06-29 19:40:43,487 - INFO - tqdm - NullAccuracy: 0.9958, NullF1: 0.8326, Lemma: 0.9806, PosFeats: 0.9792, UD-UAS: 0.9132, UD-LAS: 0.9193, EUD-UAS: 0.7945, EUD-LAS: 0.7961, Misc: 0.9969, SS: 0.9297, SC: 0.9329, Avg: 0.9158, batch_loss: 1.0566, loss: 1.1100 ||:  65%|######5   | 750/1147 [01:30<00:50,  7.79it/s]
-2024-06-29 19:40:53,593 - INFO - tqdm - NullAccuracy: 0.9958, NullF1: 0.8332, Lemma: 0.9808, PosFeats: 0.9792, UD-UAS: 0.9133, UD-LAS: 0.9194, EUD-UAS: 0.7952, EUD-LAS: 0.7968, Misc: 0.9969, SS: 0.9299, SC: 0.9332, Avg: 0.9161, batch_loss: 1.3605, loss: 1.1070 ||:  73%|#######2  | 834/1147 [01:40<00:38,  8.19it/s]
-2024-06-29 19:41:03,685 - INFO - tqdm - NullAccuracy: 0.9958, NullF1: 0.8321, Lemma: 0.9808, PosFeats: 0.9791, UD-UAS: 0.9132, UD-LAS: 0.9192, EUD-UAS: 0.7953, EUD-LAS: 0.7967, Misc: 0.9969, SS: 0.9299, SC: 0.9332, Avg: 0.9160, batch_loss: 1.1754, loss: 1.1064 ||:  80%|#######9  | 916/1147 [01:50<00:32,  7.07it/s]
-2024-06-29 19:41:13,799 - INFO - tqdm - NullAccuracy: 0.9957, NullF1: 0.8291, Lemma: 0.9808, PosFeats: 0.9792, UD-UAS: 0.9132, UD-LAS: 0.9190, EUD-UAS: 0.7952, EUD-LAS: 0.7966, Misc: 0.9970, SS: 0.9300, SC: 0.9334, Avg: 0.9160, batch_loss: 1.3157, loss: 1.1042 ||:  87%|########7 | 998/1147 [02:00<00:17,  8.28it/s]
-2024-06-29 19:41:23,896 - INFO - tqdm - NullAccuracy: 0.9957, NullF1: 0.8287, Lemma: 0.9809, PosFeats: 0.9791, UD-UAS: 0.9133, UD-LAS: 0.9192, EUD-UAS: 0.7953, EUD-LAS: 0.7967, Misc: 0.9970, SS: 0.9303, SC: 0.9336, Avg: 0.9161, batch_loss: 1.1725, loss: 1.1040 ||:  94%|#########4| 1081/1147 [02:10<00:08,  8.21it/s]
-2024-06-29 19:41:31,266 - INFO - tqdm - NullAccuracy: 0.9957, NullF1: 0.8281, Lemma: 0.9810, PosFeats: 0.9792, UD-UAS: 0.9134, UD-LAS: 0.9193, EUD-UAS: 0.7956, EUD-LAS: 0.7970, Misc: 0.9970, SS: 0.9302, SC: 0.9337, Avg: 0.9163, batch_loss: 1.2624, loss: 1.1023 ||: 100%|#########9| 1142/1147 [02:18<00:00,  8.38it/s]
-2024-06-29 19:41:31,377 - INFO - tqdm - NullAccuracy: 0.9957, NullF1: 0.8281, Lemma: 0.9809, PosFeats: 0.9792, UD-UAS: 0.9134, UD-LAS: 0.9194, EUD-UAS: 0.7956, EUD-LAS: 0.7970, Misc: 0.9970, SS: 0.9303, SC: 0.9337, Avg: 0.9163, batch_loss: 1.0048, loss: 1.1022 ||: 100%|#########9| 1143/1147 [02:18<00:00,  8.56it/s]
-2024-06-29 19:41:31,503 - INFO - tqdm - NullAccuracy: 0.9957, NullF1: 0.8282, Lemma: 0.9810, PosFeats: 0.9792, UD-UAS: 0.9134, UD-LAS: 0.9193, EUD-UAS: 0.7956, EUD-LAS: 0.7970, Misc: 0.9970, SS: 0.9302, SC: 0.9337, Avg: 0.9163, batch_loss: 1.2619, loss: 1.1023 ||: 100%|#########9| 1144/1147 [02:18<00:00,  8.37it/s]
-2024-06-29 19:41:31,646 - INFO - tqdm - NullAccuracy: 0.9957, NullF1: 0.8282, Lemma: 0.9810, PosFeats: 0.9792, UD-UAS: 0.9134, UD-LAS: 0.9193, EUD-UAS: 0.7956, EUD-LAS: 0.7970, Misc: 0.9970, SS: 0.9302, SC: 0.9337, Avg: 0.9163, batch_loss: 1.0054, loss: 1.1022 ||: 100%|#########9| 1145/1147 [02:18<00:00,  7.91it/s]
-2024-06-29 19:41:31,779 - INFO - tqdm - NullAccuracy: 0.9957, NullF1: 0.8284, Lemma: 0.9809, PosFeats: 0.9793, UD-UAS: 0.9134, UD-LAS: 0.9193, EUD-UAS: 0.7956, EUD-LAS: 0.7969, Misc: 0.9970, SS: 0.9302, SC: 0.9337, Avg: 0.9163, batch_loss: 0.9726, loss: 1.1021 ||: 100%|#########9| 1146/1147 [02:18<00:00,  7.78it/s]
-2024-06-29 19:41:31,878 - INFO - tqdm - NullAccuracy: 0.9957, NullF1: 0.8285, Lemma: 0.9809, PosFeats: 0.9792, UD-UAS: 0.9134, UD-LAS: 0.9193, EUD-UAS: 0.7956, EUD-LAS: 0.7969, Misc: 0.9970, SS: 0.9302, SC: 0.9337, Avg: 0.9163, batch_loss: 1.1922, loss: 1.1022 ||: 100%|##########| 1147/1147 [02:18<00:00,  8.25it/s]
-2024-06-29 19:41:31,879 - INFO - allennlp.training.gradient_descent_trainer - Validating
-2024-06-29 19:41:31,880 - INFO - tqdm - 0%|          | 0/287 [00:00<?, ?it/s]
-2024-06-29 19:41:41,987 - INFO - tqdm - NullAccuracy: 0.9953, NullF1: 0.8142, Lemma: 0.9819, PosFeats: 0.9759, UD-UAS: 0.9432, UD-LAS: 0.9515, EUD-UAS: 0.8512, EUD-LAS: 0.8579, Misc: 0.9968, SS: 0.9257, SC: 0.9303, Avg: 0.9349, batch_loss: 1.3403, loss: 1.2645 ||:  54%|#####4    | 156/287 [00:10<00:08, 16.14it/s]
-2024-06-29 19:41:49,948 - INFO - tqdm - NullAccuracy: 0.9956, NullF1: 0.8178, Lemma: 0.9835, PosFeats: 0.9769, UD-UAS: 0.9457, UD-LAS: 0.9536, EUD-UAS: 0.8560, EUD-LAS: 0.8612, Misc: 0.9968, SS: 0.9276, SC: 0.9327, Avg: 0.9371, batch_loss: 2.6657, loss: 1.2141 ||: 100%|##########| 287/287 [00:18<00:00, 16.89it/s]
-2024-06-29 19:41:49,948 - INFO - tqdm - NullAccuracy: 0.9956, NullF1: 0.8178, Lemma: 0.9835, PosFeats: 0.9769, UD-UAS: 0.9457, UD-LAS: 0.9536, EUD-UAS: 0.8560, EUD-LAS: 0.8612, Misc: 0.9968, SS: 0.9276, SC: 0.9327, Avg: 0.9371, batch_loss: 2.6657, loss: 1.2141 ||: 100%|##########| 287/287 [00:18<00:00, 15.88it/s]
-2024-06-29 19:41:49,949 - INFO - allennlp.training.learning_rate_schedulers.slanted_triangular - Gradual unfreezing finished. Training all layers.
-2024-06-29 19:41:49,951 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
-2024-06-29 19:41:49,951 - INFO - allennlp.training.callbacks.console_logger - Avg                |     0.916  |     0.937
-2024-06-29 19:41:49,951 - INFO - allennlp.training.callbacks.console_logger - EUD-LAS            |     0.797  |     0.861
-2024-06-29 19:41:49,952 - INFO - allennlp.training.callbacks.console_logger - EUD-UAS            |     0.796  |     0.856
-2024-06-29 19:41:49,952 - INFO - allennlp.training.callbacks.console_logger - Lemma              |     0.981  |     0.983
-2024-06-29 19:41:49,952 - INFO - allennlp.training.callbacks.console_logger - Misc               |     0.997  |     0.997
-2024-06-29 19:41:49,952 - INFO - allennlp.training.callbacks.console_logger - NullAccuracy       |     0.996  |     0.996
-2024-06-29 19:41:49,952 - INFO - allennlp.training.callbacks.console_logger - NullF1             |     0.828  |     0.818
-2024-06-29 19:41:49,952 - INFO - allennlp.training.callbacks.console_logger - PosFeats           |     0.979  |     0.977
-2024-06-29 19:41:49,952 - INFO - allennlp.training.callbacks.console_logger - SC                 |     0.934  |     0.933
-2024-06-29 19:41:49,952 - INFO - allennlp.training.callbacks.console_logger - SS                 |     0.930  |     0.928
-2024-06-29 19:41:49,952 - INFO - allennlp.training.callbacks.console_logger - UD-LAS             |     0.919  |     0.954
-2024-06-29 19:41:49,952 - INFO - allennlp.training.callbacks.console_logger - UD-UAS             |     0.913  |     0.946
-2024-06-29 19:41:49,952 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  11302.523  |       N/A
-2024-06-29 19:41:49,952 - INFO - allennlp.training.callbacks.console_logger - loss               |     1.102  |     1.214
-2024-06-29 19:41:49,952 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  5001.953  |       N/A
-2024-06-29 19:41:54,667 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:02:41.754446
-2024-06-29 19:41:54,667 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:35
-2024-06-29 19:41:54,667 - INFO - allennlp.training.gradient_descent_trainer - Epoch 9/9
-2024-06-29 19:41:54,667 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 4.9G
-2024-06-29 19:41:54,667 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 11G
-2024-06-29 19:41:54,669 - INFO - allennlp.training.gradient_descent_trainer - Training
-2024-06-29 19:41:54,669 - INFO - tqdm - 0%|          | 0/1147 [00:00<?, ?it/s]
-2024-06-29 19:42:04,785 - INFO - tqdm - NullAccuracy: 0.9960, NullF1: 0.8455, Lemma: 0.9838, PosFeats: 0.9822, UD-UAS: 0.9153, UD-LAS: 0.9219, EUD-UAS: 0.7983, EUD-LAS: 0.7996, Misc: 0.9972, SS: 0.9327, SC: 0.9401, Avg: 0.9190, batch_loss: 0.7848, loss: 1.0079 ||:   7%|7         | 82/1147 [00:10<02:03,  8.60it/s]
-2024-06-29 19:42:14,888 - INFO - tqdm - NullAccuracy: 0.9958, NullF1: 0.8307, Lemma: 0.9834, PosFeats: 0.9812, UD-UAS: 0.9158, UD-LAS: 0.9221, EUD-UAS: 0.7977, EUD-LAS: 0.7990, Misc: 0.9972, SS: 0.9333, SC: 0.9407, Avg: 0.9189, batch_loss: 0.8913, loss: 1.0137 ||:  14%|#4        | 164/1147 [00:20<01:58,  8.29it/s]
-2024-06-29 19:42:24,990 - INFO - tqdm - NullAccuracy: 0.9960, NullF1: 0.8409, Lemma: 0.9837, PosFeats: 0.9817, UD-UAS: 0.9158, UD-LAS: 0.9221, EUD-UAS: 0.7978, EUD-LAS: 0.7996, Misc: 0.9973, SS: 0.9337, SC: 0.9399, Avg: 0.9191, batch_loss: 1.0167, loss: 1.0196 ||:  22%|##1       | 247/1147 [00:30<01:50,  8.17it/s]
-2024-06-29 19:42:35,045 - INFO - tqdm - NullAccuracy: 0.9960, NullF1: 0.8411, Lemma: 0.9835, PosFeats: 0.9812, UD-UAS: 0.9172, UD-LAS: 0.9230, EUD-UAS: 0.7994, EUD-LAS: 0.8012, Misc: 0.9974, SS: 0.9339, SC: 0.9400, Avg: 0.9196, batch_loss: 0.8981, loss: 1.0156 ||:  29%|##8       | 329/1147 [00:40<01:37,  8.37it/s]
-2024-06-29 19:42:45,116 - INFO - tqdm - NullAccuracy: 0.9961, NullF1: 0.8441, Lemma: 0.9835, PosFeats: 0.9814, UD-UAS: 0.9172, UD-LAS: 0.9227, EUD-UAS: 0.8001, EUD-LAS: 0.8021, Misc: 0.9976, SS: 0.9341, SC: 0.9403, Avg: 0.9199, batch_loss: 1.0670, loss: 1.0123 ||:  36%|###5      | 411/1147 [00:50<01:29,  8.24it/s]
-2024-06-29 19:42:55,169 - INFO - tqdm - NullAccuracy: 0.9959, NullF1: 0.8396, Lemma: 0.9836, PosFeats: 0.9812, UD-UAS: 0.9174, UD-LAS: 0.9228, EUD-UAS: 0.8005, EUD-LAS: 0.8022, Misc: 0.9976, SS: 0.9340, SC: 0.9400, Avg: 0.9199, batch_loss: 1.3882, loss: 1.0119 ||:  43%|####3     | 495/1147 [01:00<01:18,  8.34it/s]
-2024-06-29 19:43:05,240 - INFO - tqdm - NullAccuracy: 0.9959, NullF1: 0.8361, Lemma: 0.9833, PosFeats: 0.9812, UD-UAS: 0.9177, UD-LAS: 0.9230, EUD-UAS: 0.8014, EUD-LAS: 0.8029, Misc: 0.9976, SS: 0.9341, SC: 0.9399, Avg: 0.9201, batch_loss: 0.8409, loss: 1.0118 ||:  50%|#####     | 577/1147 [01:10<01:10,  8.05it/s]
-2024-06-29 19:43:15,299 - INFO - tqdm - NullAccuracy: 0.9959, NullF1: 0.8368, Lemma: 0.9834, PosFeats: 0.9810, UD-UAS: 0.9178, UD-LAS: 0.9233, EUD-UAS: 0.8015, EUD-LAS: 0.8031, Misc: 0.9976, SS: 0.9341, SC: 0.9399, Avg: 0.9202, batch_loss: 1.0741, loss: 1.0137 ||:  58%|#####7    | 661/1147 [01:20<00:57,  8.39it/s]
-2024-06-29 19:43:25,363 - INFO - tqdm - NullAccuracy: 0.9959, NullF1: 0.8370, Lemma: 0.9833, PosFeats: 0.9812, UD-UAS: 0.9173, UD-LAS: 0.9229, EUD-UAS: 0.8011, EUD-LAS: 0.8028, Misc: 0.9976, SS: 0.9340, SC: 0.9400, Avg: 0.9200, batch_loss: 0.8668, loss: 1.0147 ||:  65%|######4   | 742/1147 [01:30<00:48,  8.33it/s]
-2024-06-29 19:43:35,447 - INFO - tqdm - NullAccuracy: 0.9959, NullF1: 0.8348, Lemma: 0.9833, PosFeats: 0.9811, UD-UAS: 0.9175, UD-LAS: 0.9234, EUD-UAS: 0.8016, EUD-LAS: 0.8035, Misc: 0.9976, SS: 0.9343, SC: 0.9398, Avg: 0.9202, batch_loss: 0.9316, loss: 1.0138 ||:  72%|#######2  | 826/1147 [01:40<00:37,  8.51it/s]
-2024-06-29 19:43:45,513 - INFO - tqdm - NullAccuracy: 0.9959, NullF1: 0.8348, Lemma: 0.9833, PosFeats: 0.9811, UD-UAS: 0.9181, UD-LAS: 0.9239, EUD-UAS: 0.8025, EUD-LAS: 0.8043, Misc: 0.9976, SS: 0.9345, SC: 0.9398, Avg: 0.9206, batch_loss: 1.3381, loss: 1.0087 ||:  79%|#######9  | 910/1147 [01:50<00:29,  7.95it/s]
-2024-06-29 19:43:55,624 - INFO - tqdm - NullAccuracy: 0.9959, NullF1: 0.8362, Lemma: 0.9834, PosFeats: 0.9811, UD-UAS: 0.9186, UD-LAS: 0.9244, EUD-UAS: 0.8035, EUD-LAS: 0.8055, Misc: 0.9976, SS: 0.9344, SC: 0.9397, Avg: 0.9209, batch_loss: 1.5786, loss: 1.0056 ||:  87%|########6 | 995/1147 [02:00<00:18,  8.35it/s]
-2024-06-29 19:44:05,698 - INFO - tqdm - NullAccuracy: 0.9959, NullF1: 0.8357, Lemma: 0.9835, PosFeats: 0.9811, UD-UAS: 0.9188, UD-LAS: 0.9245, EUD-UAS: 0.8040, EUD-LAS: 0.8060, Misc: 0.9976, SS: 0.9345, SC: 0.9396, Avg: 0.9211, batch_loss: 0.8823, loss: 1.0035 ||:  94%|#########4| 1079/1147 [02:11<00:07,  8.56it/s]
-2024-06-29 19:44:13,178 - INFO - tqdm - NullAccuracy: 0.9959, NullF1: 0.8350, Lemma: 0.9834, PosFeats: 0.9812, UD-UAS: 0.9194, UD-LAS: 0.9251, EUD-UAS: 0.8052, EUD-LAS: 0.8070, Misc: 0.9976, SS: 0.9344, SC: 0.9396, Avg: 0.9214, batch_loss: 1.2278, loss: 1.0012 ||: 100%|#########9| 1142/1147 [02:18<00:00,  8.20it/s]
-2024-06-29 19:44:13,290 - INFO - tqdm - NullAccuracy: 0.9959, NullF1: 0.8352, Lemma: 0.9834, PosFeats: 0.9812, UD-UAS: 0.9194, UD-LAS: 0.9251, EUD-UAS: 0.8052, EUD-LAS: 0.8069, Misc: 0.9976, SS: 0.9344, SC: 0.9396, Avg: 0.9214, batch_loss: 1.1826, loss: 1.0013 ||: 100%|#########9| 1143/1147 [02:18<00:00,  8.39it/s]
-2024-06-29 19:44:13,412 - INFO - tqdm - NullAccuracy: 0.9959, NullF1: 0.8352, Lemma: 0.9834, PosFeats: 0.9812, UD-UAS: 0.9194, UD-LAS: 0.9251, EUD-UAS: 0.8052, EUD-LAS: 0.8070, Misc: 0.9976, SS: 0.9344, SC: 0.9396, Avg: 0.9214, batch_loss: 0.7944, loss: 1.0012 ||: 100%|#########9| 1144/1147 [02:18<00:00,  8.34it/s]
-2024-06-29 19:44:13,524 - INFO - tqdm - NullAccuracy: 0.9959, NullF1: 0.8353, Lemma: 0.9834, PosFeats: 0.9812, UD-UAS: 0.9194, UD-LAS: 0.9251, EUD-UAS: 0.8052, EUD-LAS: 0.8070, Misc: 0.9976, SS: 0.9344, SC: 0.9396, Avg: 0.9214, batch_loss: 1.1721, loss: 1.0013 ||: 100%|#########9| 1145/1147 [02:18<00:00,  8.52it/s]
-2024-06-29 19:44:13,650 - INFO - tqdm - NullAccuracy: 0.9959, NullF1: 0.8352, Lemma: 0.9834, PosFeats: 0.9812, UD-UAS: 0.9194, UD-LAS: 0.9250, EUD-UAS: 0.8052, EUD-LAS: 0.8070, Misc: 0.9976, SS: 0.9344, SC: 0.9396, Avg: 0.9214, batch_loss: 1.0943, loss: 1.0014 ||: 100%|#########9| 1146/1147 [02:18<00:00,  8.33it/s]
-2024-06-29 19:44:13,748 - INFO - tqdm - NullAccuracy: 0.9959, NullF1: 0.8353, Lemma: 0.9834, PosFeats: 0.9812, UD-UAS: 0.9194, UD-LAS: 0.9251, EUD-UAS: 0.8052, EUD-LAS: 0.8070, Misc: 0.9976, SS: 0.9344, SC: 0.9396, Avg: 0.9214, batch_loss: 0.9567, loss: 1.0014 ||: 100%|##########| 1147/1147 [02:19<00:00,  8.25it/s]
-2024-06-29 19:44:13,749 - INFO - allennlp.training.gradient_descent_trainer - Validating
-2024-06-29 19:44:13,750 - INFO - tqdm - 0%|          | 0/287 [00:00<?, ?it/s]
-2024-06-29 19:44:23,841 - INFO - tqdm - NullAccuracy: 0.9953, NullF1: 0.8150, Lemma: 0.9822, PosFeats: 0.9764, UD-UAS: 0.9441, UD-LAS: 0.9521, EUD-UAS: 0.8602, EUD-LAS: 0.8667, Misc: 0.9970, SS: 0.9276, SC: 0.9317, Avg: 0.9376, batch_loss: 1.0500, loss: 1.2419 ||:  55%|#####5    | 158/287 [00:10<00:07, 16.87it/s]
-2024-06-29 19:44:31,553 - INFO - tqdm - NullAccuracy: 0.9956, NullF1: 0.8189, Lemma: 0.9838, PosFeats: 0.9776, UD-UAS: 0.9472, UD-LAS: 0.9548, EUD-UAS: 0.8649, EUD-LAS: 0.8704, Misc: 0.9969, SS: 0.9296, SC: 0.9345, Avg: 0.9400, batch_loss: 1.7458, loss: 1.1879 ||: 100%|#########9| 286/287 [00:17<00:00, 16.61it/s]
-2024-06-29 19:44:31,599 - INFO - tqdm - NullAccuracy: 0.9956, NullF1: 0.8188, Lemma: 0.9838, PosFeats: 0.9775, UD-UAS: 0.9471, UD-LAS: 0.9546, EUD-UAS: 0.8646, EUD-LAS: 0.8701, Misc: 0.9969, SS: 0.9295, SC: 0.9344, Avg: 0.9398, batch_loss: 2.6710, loss: 1.1931 ||: 100%|##########| 287/287 [00:17<00:00, 16.08it/s]
-2024-06-29 19:44:31,599 - INFO - allennlp.training.learning_rate_schedulers.slanted_triangular - Gradual unfreezing finished. Training all layers.
-2024-06-29 19:44:31,602 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
-2024-06-29 19:44:31,602 - INFO - allennlp.training.callbacks.console_logger - Avg                |     0.921  |     0.940
-2024-06-29 19:44:31,602 - INFO - allennlp.training.callbacks.console_logger - EUD-LAS            |     0.807  |     0.870
-2024-06-29 19:44:31,602 - INFO - allennlp.training.callbacks.console_logger - EUD-UAS            |     0.805  |     0.865
-2024-06-29 19:44:31,602 - INFO - allennlp.training.callbacks.console_logger - Lemma              |     0.983  |     0.984
-2024-06-29 19:44:31,602 - INFO - allennlp.training.callbacks.console_logger - Misc               |     0.998  |     0.997
-2024-06-29 19:44:31,602 - INFO - allennlp.training.callbacks.console_logger - NullAccuracy       |     0.996  |     0.996
-2024-06-29 19:44:31,602 - INFO - allennlp.training.callbacks.console_logger - NullF1             |     0.835  |     0.819
-2024-06-29 19:44:31,603 - INFO - allennlp.training.callbacks.console_logger - PosFeats           |     0.981  |     0.978
-2024-06-29 19:44:31,603 - INFO - allennlp.training.callbacks.console_logger - SC                 |     0.940  |     0.934
-2024-06-29 19:44:31,603 - INFO - allennlp.training.callbacks.console_logger - SS                 |     0.934  |     0.929
-2024-06-29 19:44:31,603 - INFO - allennlp.training.callbacks.console_logger - UD-LAS             |     0.925  |     0.955
-2024-06-29 19:44:31,603 - INFO - allennlp.training.callbacks.console_logger - UD-UAS             |     0.919  |     0.947
-2024-06-29 19:44:31,603 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  11258.240  |       N/A
-2024-06-29 19:44:31,603 - INFO - allennlp.training.callbacks.console_logger - loss               |     1.001  |     1.193
-2024-06-29 19:44:31,603 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  5002.051  |       N/A
-2024-06-29 19:44:36,559 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:02:41.891413
-2024-06-29 19:44:36,561 - INFO - allennlp.common.util - Metrics: {
+        [ 87,  22, 120,  ..., 222,   0,   0],
+        [  0,   2,   8,  ...,   0,   0,   0],
+        [  3,  16,   6,  ...,   0,   0,   0]], device='cuda:0')
+2024-07-01 01:33:20,042 - INFO - allennlp.training.callbacks.console_logger - Field : "batch_input/metadata" : (Length 24 of type "<class 'conllu.models.Metadata'>")
+2024-07-01 01:33:29,991 - INFO - tqdm - NullAccuracy: 0.9893, NullF1: 0.3847, Lemma: 0.9299, PosFeats: 0.9255, UD-UAS: 0.7652, UD-LAS: 0.7662, EUD-UAS: 0.4128, EUD-LAS: 0.3992, Misc: 0.9596, SS: 0.8146, SC: 0.8776, Avg: 0.7612, batch_loss: 3.0156, loss: 3.4289 ||:  45%|####5     | 130/287 [00:10<00:10, 14.94it/s]
+2024-07-01 01:33:40,019 - INFO - tqdm - NullAccuracy: 0.9896, NullF1: 0.3925, Lemma: 0.9335, PosFeats: 0.9291, UD-UAS: 0.7714, UD-LAS: 0.7721, EUD-UAS: 0.4227, EUD-LAS: 0.4091, Misc: 0.9610, SS: 0.8174, SC: 0.8809, Avg: 0.7663, batch_loss: 3.4157, loss: 3.2924 ||:  95%|#########4| 272/287 [00:20<00:01, 13.98it/s]
+2024-07-01 01:33:40,964 - INFO - tqdm - NullAccuracy: 0.9896, NullF1: 0.3936, Lemma: 0.9336, PosFeats: 0.9295, UD-UAS: 0.7720, UD-LAS: 0.7727, EUD-UAS: 0.4236, EUD-LAS: 0.4098, Misc: 0.9609, SS: 0.8173, SC: 0.8810, Avg: 0.7667, batch_loss: 2.6802, loss: 3.2845 ||: 100%|#########9| 286/287 [00:21<00:00, 14.80it/s]
+2024-07-01 01:33:41,017 - INFO - tqdm - NullAccuracy: 0.9896, NullF1: 0.3941, Lemma: 0.9336, PosFeats: 0.9296, UD-UAS: 0.7722, UD-LAS: 0.7729, EUD-UAS: 0.4236, EUD-LAS: 0.4098, Misc: 0.9609, SS: 0.8173, SC: 0.8810, Avg: 0.7668, batch_loss: 2.8979, loss: 3.2831 ||: 100%|##########| 287/287 [00:21<00:00, 13.61it/s]
+2024-07-01 01:33:41,017 - INFO - allennlp.training.learning_rate_schedulers.slanted_triangular - Gradual unfreezing finished. Training all layers.
+2024-07-01 01:33:41,020 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
+2024-07-01 01:33:41,021 - INFO - allennlp.training.callbacks.console_logger - Avg                |     0.657  |     0.767
+2024-07-01 01:33:41,021 - INFO - allennlp.training.callbacks.console_logger - EUD-LAS            |     0.279  |     0.410
+2024-07-01 01:33:41,021 - INFO - allennlp.training.callbacks.console_logger - EUD-UAS            |     0.298  |     0.424
+2024-07-01 01:33:41,021 - INFO - allennlp.training.callbacks.console_logger - Lemma              |     0.849  |     0.934
+2024-07-01 01:33:41,021 - INFO - allennlp.training.callbacks.console_logger - Misc               |     0.930  |     0.961
+2024-07-01 01:33:41,021 - INFO - allennlp.training.callbacks.console_logger - NullAccuracy       |     0.987  |     0.990
+2024-07-01 01:33:41,021 - INFO - allennlp.training.callbacks.console_logger - NullF1             |     0.237  |     0.394
+2024-07-01 01:33:41,021 - INFO - allennlp.training.callbacks.console_logger - PosFeats           |     0.830  |     0.930
+2024-07-01 01:33:41,021 - INFO - allennlp.training.callbacks.console_logger - SC                 |     0.783  |     0.881
+2024-07-01 01:33:41,021 - INFO - allennlp.training.callbacks.console_logger - SS                 |     0.754  |     0.817
+2024-07-01 01:33:41,021 - INFO - allennlp.training.callbacks.console_logger - UD-LAS             |     0.592  |     0.773
+2024-07-01 01:33:41,021 - INFO - allennlp.training.callbacks.console_logger - UD-UAS             |     0.599  |     0.772
+2024-07-01 01:33:41,021 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  1096.931  |       N/A
+2024-07-01 01:33:41,021 - INFO - allennlp.training.callbacks.console_logger - loss               |     6.285  |     3.283
+2024-07-01 01:33:41,021 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  4585.289  |       N/A
+2024-07-01 01:33:42,564 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:01:26.265534
+2024-07-01 01:33:42,564 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:12:42
+2024-07-01 01:33:42,564 - INFO - allennlp.training.gradient_descent_trainer - Epoch 1/9
+2024-07-01 01:33:42,564 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 4.9G
+2024-07-01 01:33:42,565 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 3.0G
+2024-07-01 01:33:42,566 - INFO - allennlp.training.gradient_descent_trainer - Training
+2024-07-01 01:33:42,566 - INFO - tqdm - 0%|          | 0/1147 [00:00<?, ?it/s]
+2024-07-01 01:33:52,667 - INFO - tqdm - NullAccuracy: 0.9890, NullF1: 0.3805, Lemma: 0.8986, PosFeats: 0.8975, UD-UAS: 0.6864, UD-LAS: 0.6858, EUD-UAS: 0.4017, EUD-LAS: 0.3915, Misc: 0.9582, SS: 0.7912, SC: 0.8314, Avg: 0.7269, batch_loss: 4.8329, loss: 4.4436 ||:   7%|7         | 82/1147 [00:10<02:17,  7.77it/s]
+2024-07-01 01:34:02,793 - INFO - tqdm - NullAccuracy: 0.9892, NullF1: 0.3710, Lemma: 0.9002, PosFeats: 0.8986, UD-UAS: 0.6977, UD-LAS: 0.6969, EUD-UAS: 0.4223, EUD-LAS: 0.4121, Misc: 0.9667, SS: 0.7971, SC: 0.8311, Avg: 0.7359, batch_loss: 3.4639, loss: 4.3207 ||:  14%|#4        | 162/1147 [00:20<02:07,  7.72it/s]
+2024-07-01 01:34:12,920 - INFO - tqdm - NullAccuracy: 0.9894, NullF1: 0.3764, Lemma: 0.9030, PosFeats: 0.9038, UD-UAS: 0.7063, UD-LAS: 0.7052, EUD-UAS: 0.4341, EUD-LAS: 0.4235, Misc: 0.9716, SS: 0.8011, SC: 0.8321, Avg: 0.7423, batch_loss: 4.0116, loss: 4.2033 ||:  21%|##        | 240/1147 [00:30<01:57,  7.71it/s]
+2024-07-01 01:34:22,921 - INFO - tqdm - NullAccuracy: 0.9891, NullF1: 0.3885, Lemma: 0.9060, PosFeats: 0.9077, UD-UAS: 0.7191, UD-LAS: 0.7180, EUD-UAS: 0.4516, EUD-LAS: 0.4411, Misc: 0.9747, SS: 0.8059, SC: 0.8343, Avg: 0.7509, batch_loss: 3.4345, loss: 4.0593 ||:  28%|##7       | 320/1147 [00:40<01:49,  7.55it/s]
+2024-07-01 01:34:33,030 - INFO - tqdm - NullAccuracy: 0.9892, NullF1: 0.4051, Lemma: 0.9085, PosFeats: 0.9106, UD-UAS: 0.7305, UD-LAS: 0.7296, EUD-UAS: 0.4674, EUD-LAS: 0.4564, Misc: 0.9772, SS: 0.8104, SC: 0.8370, Avg: 0.7586, batch_loss: 3.2411, loss: 3.9368 ||:  35%|###5      | 403/1147 [00:50<01:31,  8.16it/s]
+2024-07-01 01:34:43,233 - INFO - tqdm - NullAccuracy: 0.9895, NullF1: 0.4325, Lemma: 0.9111, PosFeats: 0.9122, UD-UAS: 0.7390, UD-LAS: 0.7380, EUD-UAS: 0.4801, EUD-LAS: 0.4693, Misc: 0.9788, SS: 0.8136, SC: 0.8385, Avg: 0.7645, batch_loss: 3.1285, loss: 3.8470 ||:  42%|####2     | 487/1147 [01:00<01:11,  9.22it/s]
+2024-07-01 01:34:53,265 - INFO - tqdm - NullAccuracy: 0.9897, NullF1: 0.4430, Lemma: 0.9129, PosFeats: 0.9141, UD-UAS: 0.7458, UD-LAS: 0.7449, EUD-UAS: 0.4903, EUD-LAS: 0.4797, Misc: 0.9801, SS: 0.8165, SC: 0.8405, Avg: 0.7694, batch_loss: 3.7757, loss: 3.7639 ||:  50%|####9     | 569/1147 [01:10<01:11,  8.11it/s]
+2024-07-01 01:35:03,297 - INFO - tqdm - NullAccuracy: 0.9898, NullF1: 0.4499, Lemma: 0.9152, PosFeats: 0.9163, UD-UAS: 0.7524, UD-LAS: 0.7517, EUD-UAS: 0.4993, EUD-LAS: 0.4887, Misc: 0.9813, SS: 0.8194, SC: 0.8424, Avg: 0.7741, batch_loss: 2.6312, loss: 3.6860 ||:  57%|#####6    | 652/1147 [01:20<00:59,  8.34it/s]
+2024-07-01 01:35:13,367 - INFO - tqdm - NullAccuracy: 0.9899, NullF1: 0.4630, Lemma: 0.9168, PosFeats: 0.9179, UD-UAS: 0.7574, UD-LAS: 0.7570, EUD-UAS: 0.5077, EUD-LAS: 0.4975, Misc: 0.9822, SS: 0.8209, SC: 0.8434, Avg: 0.7779, batch_loss: 3.0925, loss: 3.6257 ||:  64%|######3   | 734/1147 [01:30<00:52,  7.86it/s]
+2024-07-01 01:35:23,390 - INFO - tqdm - NullAccuracy: 0.9901, NullF1: 0.4756, Lemma: 0.9179, PosFeats: 0.9195, UD-UAS: 0.7624, UD-LAS: 0.7622, EUD-UAS: 0.5147, EUD-LAS: 0.5048, Misc: 0.9830, SS: 0.8235, SC: 0.8448, Avg: 0.7814, batch_loss: 2.9022, loss: 3.5639 ||:  71%|#######1  | 815/1147 [01:40<00:39,  8.48it/s]
+2024-07-01 01:35:33,430 - INFO - tqdm - NullAccuracy: 0.9902, NullF1: 0.4909, Lemma: 0.9194, PosFeats: 0.9208, UD-UAS: 0.7667, UD-LAS: 0.7669, EUD-UAS: 0.5215, EUD-LAS: 0.5119, Misc: 0.9836, SS: 0.8256, SC: 0.8464, Avg: 0.7848, batch_loss: 3.2201, loss: 3.5066 ||:  78%|#######8  | 898/1147 [01:50<00:30,  8.10it/s]
+2024-07-01 01:35:43,522 - INFO - tqdm - NullAccuracy: 0.9903, NullF1: 0.5014, Lemma: 0.9206, PosFeats: 0.9222, UD-UAS: 0.7708, UD-LAS: 0.7711, EUD-UAS: 0.5288, EUD-LAS: 0.5196, Misc: 0.9842, SS: 0.8277, SC: 0.8478, Avg: 0.7881, batch_loss: 3.3338, loss: 3.4521 ||:  85%|########5 | 980/1147 [02:00<00:19,  8.38it/s]
+2024-07-01 01:35:53,525 - INFO - tqdm - NullAccuracy: 0.9905, NullF1: 0.5102, Lemma: 0.9218, PosFeats: 0.9236, UD-UAS: 0.7742, UD-LAS: 0.7747, EUD-UAS: 0.5348, EUD-LAS: 0.5260, Misc: 0.9847, SS: 0.8295, SC: 0.8491, Avg: 0.7909, batch_loss: 2.8285, loss: 3.4055 ||:  93%|#########2| 1061/1147 [02:10<00:10,  8.20it/s]
+2024-07-01 01:36:03,316 - INFO - tqdm - NullAccuracy: 0.9906, NullF1: 0.5188, Lemma: 0.9227, PosFeats: 0.9248, UD-UAS: 0.7774, UD-LAS: 0.7782, EUD-UAS: 0.5401, EUD-LAS: 0.5318, Misc: 0.9852, SS: 0.8312, SC: 0.8507, Avg: 0.7936, batch_loss: 2.2333, loss: 3.3612 ||: 100%|#########9| 1142/1147 [02:20<00:00,  8.06it/s]
+2024-07-01 01:36:03,435 - INFO - tqdm - NullAccuracy: 0.9906, NullF1: 0.5191, Lemma: 0.9228, PosFeats: 0.9248, UD-UAS: 0.7775, UD-LAS: 0.7783, EUD-UAS: 0.5402, EUD-LAS: 0.5319, Misc: 0.9852, SS: 0.8313, SC: 0.8507, Avg: 0.7936, batch_loss: 2.3214, loss: 3.3603 ||: 100%|#########9| 1143/1147 [02:20<00:00,  8.17it/s]
+2024-07-01 01:36:03,556 - INFO - tqdm - NullAccuracy: 0.9906, NullF1: 0.5192, Lemma: 0.9228, PosFeats: 0.9248, UD-UAS: 0.7775, UD-LAS: 0.7784, EUD-UAS: 0.5404, EUD-LAS: 0.5321, Misc: 0.9852, SS: 0.8313, SC: 0.8508, Avg: 0.7937, batch_loss: 2.1049, loss: 3.3592 ||: 100%|#########9| 1144/1147 [02:20<00:00,  8.19it/s]
+2024-07-01 01:36:03,680 - INFO - tqdm - NullAccuracy: 0.9906, NullF1: 0.5196, Lemma: 0.9228, PosFeats: 0.9249, UD-UAS: 0.7775, UD-LAS: 0.7784, EUD-UAS: 0.5405, EUD-LAS: 0.5323, Misc: 0.9852, SS: 0.8314, SC: 0.8508, Avg: 0.7938, batch_loss: 2.4366, loss: 3.3584 ||: 100%|#########9| 1145/1147 [02:21<00:00,  8.17it/s]
+2024-07-01 01:36:03,808 - INFO - tqdm - NullAccuracy: 0.9906, NullF1: 0.5198, Lemma: 0.9228, PosFeats: 0.9249, UD-UAS: 0.7776, UD-LAS: 0.7784, EUD-UAS: 0.5407, EUD-LAS: 0.5324, Misc: 0.9853, SS: 0.8314, SC: 0.8508, Avg: 0.7938, batch_loss: 2.7843, loss: 3.3579 ||: 100%|#########9| 1146/1147 [02:21<00:00,  8.04it/s]
+2024-07-01 01:36:03,907 - INFO - tqdm - NullAccuracy: 0.9906, NullF1: 0.5201, Lemma: 0.9229, PosFeats: 0.9249, UD-UAS: 0.7775, UD-LAS: 0.7784, EUD-UAS: 0.5407, EUD-LAS: 0.5324, Misc: 0.9853, SS: 0.8314, SC: 0.8509, Avg: 0.7938, batch_loss: 3.1478, loss: 3.3577 ||: 100%|##########| 1147/1147 [02:21<00:00,  8.12it/s]
+2024-07-01 01:36:03,908 - INFO - allennlp.training.gradient_descent_trainer - Validating
+2024-07-01 01:36:03,909 - INFO - tqdm - 0%|          | 0/287 [00:00<?, ?it/s]
+2024-07-01 01:36:13,912 - INFO - tqdm - NullAccuracy: 0.9937, NullF1: 0.7350, Lemma: 0.9581, PosFeats: 0.9560, UD-UAS: 0.8839, UD-LAS: 0.8905, EUD-UAS: 0.6907, EUD-LAS: 0.6820, Misc: 0.9938, SS: 0.8709, SC: 0.8954, Avg: 0.8690, batch_loss: 1.7955, loss: 2.0981 ||:  49%|####9     | 142/287 [00:10<00:09, 15.47it/s]
+2024-07-01 01:36:23,267 - INFO - tqdm - NullAccuracy: 0.9937, NullF1: 0.7326, Lemma: 0.9613, PosFeats: 0.9591, UD-UAS: 0.8887, UD-LAS: 0.8952, EUD-UAS: 0.6953, EUD-LAS: 0.6865, Misc: 0.9942, SS: 0.8734, SC: 0.8991, Avg: 0.8725, batch_loss: 1.4771, loss: 2.0054 ||: 100%|#########9| 286/287 [00:19<00:00, 15.93it/s]
+2024-07-01 01:36:23,316 - INFO - tqdm - NullAccuracy: 0.9937, NullF1: 0.7325, Lemma: 0.9613, PosFeats: 0.9591, UD-UAS: 0.8888, UD-LAS: 0.8954, EUD-UAS: 0.6956, EUD-LAS: 0.6867, Misc: 0.9942, SS: 0.8734, SC: 0.8990, Avg: 0.8726, batch_loss: 1.7781, loss: 2.0047 ||: 100%|##########| 287/287 [00:19<00:00, 14.79it/s]
+2024-07-01 01:36:23,316 - INFO - allennlp.training.learning_rate_schedulers.slanted_triangular - Gradual unfreezing finished. Training all layers.
+2024-07-01 01:36:23,319 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
+2024-07-01 01:36:23,319 - INFO - allennlp.training.callbacks.console_logger - Avg                |     0.794  |     0.873
+2024-07-01 01:36:23,319 - INFO - allennlp.training.callbacks.console_logger - EUD-LAS            |     0.532  |     0.687
+2024-07-01 01:36:23,319 - INFO - allennlp.training.callbacks.console_logger - EUD-UAS            |     0.541  |     0.696
+2024-07-01 01:36:23,319 - INFO - allennlp.training.callbacks.console_logger - Lemma              |     0.923  |     0.961
+2024-07-01 01:36:23,319 - INFO - allennlp.training.callbacks.console_logger - Misc               |     0.985  |     0.994
+2024-07-01 01:36:23,319 - INFO - allennlp.training.callbacks.console_logger - NullAccuracy       |     0.991  |     0.994
+2024-07-01 01:36:23,319 - INFO - allennlp.training.callbacks.console_logger - NullF1             |     0.520  |     0.732
+2024-07-01 01:36:23,319 - INFO - allennlp.training.callbacks.console_logger - PosFeats           |     0.925  |     0.959
+2024-07-01 01:36:23,320 - INFO - allennlp.training.callbacks.console_logger - SC                 |     0.851  |     0.899
+2024-07-01 01:36:23,320 - INFO - allennlp.training.callbacks.console_logger - SS                 |     0.831  |     0.873
+2024-07-01 01:36:23,320 - INFO - allennlp.training.callbacks.console_logger - UD-LAS             |     0.778  |     0.895
+2024-07-01 01:36:23,320 - INFO - allennlp.training.callbacks.console_logger - UD-UAS             |     0.778  |     0.889
+2024-07-01 01:36:23,320 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  3067.137  |       N/A
+2024-07-01 01:36:23,320 - INFO - allennlp.training.callbacks.console_logger - loss               |     3.358  |     2.005
+2024-07-01 01:36:23,320 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  4998.883  |       N/A
+2024-07-01 01:36:27,626 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:02:45.062124
+2024-07-01 01:36:27,627 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:16:28
+2024-07-01 01:36:27,627 - INFO - allennlp.training.gradient_descent_trainer - Epoch 2/9
+2024-07-01 01:36:27,627 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 4.9G
+2024-07-01 01:36:27,627 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 9.3G
+2024-07-01 01:36:27,628 - INFO - allennlp.training.gradient_descent_trainer - Training
+2024-07-01 01:36:27,628 - INFO - tqdm - 0%|          | 0/1147 [00:00<?, ?it/s]
+2024-07-01 01:36:37,649 - INFO - tqdm - NullAccuracy: 0.9928, NullF1: 0.6385, Lemma: 0.9456, PosFeats: 0.9438, UD-UAS: 0.8249, UD-LAS: 0.8305, EUD-UAS: 0.6015, EUD-LAS: 0.5989, Misc: 0.9917, SS: 0.8666, SC: 0.8768, Avg: 0.8311, batch_loss: 3.4248, loss: 2.5519 ||:   7%|7         | 81/1147 [00:10<02:25,  7.33it/s]
+2024-07-01 01:36:47,685 - INFO - tqdm - NullAccuracy: 0.9922, NullF1: 0.6260, Lemma: 0.9456, PosFeats: 0.9442, UD-UAS: 0.8239, UD-LAS: 0.8287, EUD-UAS: 0.6122, EUD-LAS: 0.6091, Misc: 0.9916, SS: 0.8649, SC: 0.8787, Avg: 0.8332, batch_loss: 2.3987, loss: 2.5549 ||:  14%|#4        | 162/1147 [00:20<02:06,  7.77it/s]
+2024-07-01 01:36:57,761 - INFO - tqdm - NullAccuracy: 0.9924, NullF1: 0.6399, Lemma: 0.9462, PosFeats: 0.9458, UD-UAS: 0.8282, UD-LAS: 0.8326, EUD-UAS: 0.6211, EUD-LAS: 0.6185, Misc: 0.9918, SS: 0.8658, SC: 0.8774, Avg: 0.8364, batch_loss: 2.0114, loss: 2.5171 ||:  21%|##1       | 246/1147 [00:30<01:48,  8.33it/s]
+2024-07-01 01:37:07,867 - INFO - tqdm - NullAccuracy: 0.9926, NullF1: 0.6582, Lemma: 0.9470, PosFeats: 0.9470, UD-UAS: 0.8282, UD-LAS: 0.8326, EUD-UAS: 0.6255, EUD-LAS: 0.6230, Misc: 0.9919, SS: 0.8666, SC: 0.8783, Avg: 0.8378, batch_loss: 2.4963, loss: 2.5121 ||:  29%|##8       | 328/1147 [00:40<01:41,  8.08it/s]
+2024-07-01 01:37:17,901 - INFO - tqdm - NullAccuracy: 0.9924, NullF1: 0.6563, Lemma: 0.9467, PosFeats: 0.9469, UD-UAS: 0.8319, UD-LAS: 0.8366, EUD-UAS: 0.6307, EUD-LAS: 0.6282, Misc: 0.9919, SS: 0.8664, SC: 0.8775, Avg: 0.8396, batch_loss: 2.3996, loss: 2.5025 ||:  36%|###5      | 412/1147 [00:50<01:24,  8.71it/s]
+2024-07-01 01:37:27,962 - INFO - tqdm - NullAccuracy: 0.9925, NullF1: 0.6582, Lemma: 0.9472, PosFeats: 0.9476, UD-UAS: 0.8326, UD-LAS: 0.8374, EUD-UAS: 0.6330, EUD-LAS: 0.6302, Misc: 0.9918, SS: 0.8668, SC: 0.8776, Avg: 0.8405, batch_loss: 2.5172, loss: 2.4849 ||:  43%|####3     | 495/1147 [01:00<01:22,  7.91it/s]
+2024-07-01 01:37:37,977 - INFO - tqdm - NullAccuracy: 0.9925, NullF1: 0.6596, Lemma: 0.9468, PosFeats: 0.9480, UD-UAS: 0.8342, UD-LAS: 0.8389, EUD-UAS: 0.6353, EUD-LAS: 0.6324, Misc: 0.9921, SS: 0.8677, SC: 0.8779, Avg: 0.8415, batch_loss: 2.4407, loss: 2.4738 ||:  50%|#####     | 577/1147 [01:10<01:07,  8.40it/s]
+2024-07-01 01:37:47,990 - INFO - tqdm - NullAccuracy: 0.9926, NullF1: 0.6598, Lemma: 0.9471, PosFeats: 0.9481, UD-UAS: 0.8363, UD-LAS: 0.8407, EUD-UAS: 0.6396, EUD-LAS: 0.6363, Misc: 0.9921, SS: 0.8681, SC: 0.8784, Avg: 0.8430, batch_loss: 2.8663, loss: 2.4639 ||:  57%|#####7    | 659/1147 [01:20<00:59,  8.16it/s]
+2024-07-01 01:37:58,048 - INFO - tqdm - NullAccuracy: 0.9927, NullF1: 0.6642, Lemma: 0.9472, PosFeats: 0.9485, UD-UAS: 0.8381, UD-LAS: 0.8424, EUD-UAS: 0.6431, EUD-LAS: 0.6396, Misc: 0.9923, SS: 0.8687, SC: 0.8788, Avg: 0.8443, batch_loss: 2.7735, loss: 2.4464 ||:  65%|######4   | 742/1147 [01:30<00:48,  8.38it/s]
+2024-07-01 01:38:08,110 - INFO - tqdm - NullAccuracy: 0.9927, NullF1: 0.6711, Lemma: 0.9476, PosFeats: 0.9490, UD-UAS: 0.8401, UD-LAS: 0.8444, EUD-UAS: 0.6473, EUD-LAS: 0.6440, Misc: 0.9924, SS: 0.8698, SC: 0.8796, Avg: 0.8460, batch_loss: 1.6699, loss: 2.4247 ||:  72%|#######1  | 825/1147 [01:40<00:38,  8.30it/s]
+2024-07-01 01:38:18,145 - INFO - tqdm - NullAccuracy: 0.9928, NullF1: 0.6723, Lemma: 0.9481, PosFeats: 0.9494, UD-UAS: 0.8420, UD-LAS: 0.8462, EUD-UAS: 0.6507, EUD-LAS: 0.6473, Misc: 0.9925, SS: 0.8706, SC: 0.8802, Avg: 0.8474, batch_loss: 1.6734, loss: 2.4074 ||:  79%|#######9  | 908/1147 [01:50<00:29,  8.10it/s]
+2024-07-01 01:38:28,211 - INFO - tqdm - NullAccuracy: 0.9928, NullF1: 0.6754, Lemma: 0.9482, PosFeats: 0.9498, UD-UAS: 0.8436, UD-LAS: 0.8478, EUD-UAS: 0.6544, EUD-LAS: 0.6508, Misc: 0.9926, SS: 0.8711, SC: 0.8806, Avg: 0.8488, batch_loss: 2.1809, loss: 2.3934 ||:  86%|########6 | 992/1147 [02:00<00:19,  7.89it/s]
+2024-07-01 01:38:38,333 - INFO - tqdm - NullAccuracy: 0.9928, NullF1: 0.6797, Lemma: 0.9484, PosFeats: 0.9502, UD-UAS: 0.8440, UD-LAS: 0.8482, EUD-UAS: 0.6561, EUD-LAS: 0.6526, Misc: 0.9927, SS: 0.8713, SC: 0.8807, Avg: 0.8494, batch_loss: 2.1207, loss: 2.3858 ||:  94%|#########3| 1075/1147 [02:10<00:08,  8.05it/s]
+2024-07-01 01:38:46,571 - INFO - tqdm - NullAccuracy: 0.9929, NullF1: 0.6824, Lemma: 0.9486, PosFeats: 0.9504, UD-UAS: 0.8445, UD-LAS: 0.8488, EUD-UAS: 0.6573, EUD-LAS: 0.6539, Misc: 0.9927, SS: 0.8717, SC: 0.8810, Avg: 0.8499, batch_loss: 2.4237, loss: 2.3772 ||: 100%|#########9| 1142/1147 [02:18<00:00,  8.02it/s]
+2024-07-01 01:38:46,683 - INFO - tqdm - NullAccuracy: 0.9929, NullF1: 0.6822, Lemma: 0.9486, PosFeats: 0.9504, UD-UAS: 0.8446, UD-LAS: 0.8488, EUD-UAS: 0.6572, EUD-LAS: 0.6538, Misc: 0.9927, SS: 0.8717, SC: 0.8810, Avg: 0.8499, batch_loss: 2.0077, loss: 2.3769 ||: 100%|#########9| 1143/1147 [02:19<00:00,  8.28it/s]
+2024-07-01 01:38:46,799 - INFO - tqdm - NullAccuracy: 0.9929, NullF1: 0.6822, Lemma: 0.9486, PosFeats: 0.9504, UD-UAS: 0.8446, UD-LAS: 0.8488, EUD-UAS: 0.6572, EUD-LAS: 0.6538, Misc: 0.9927, SS: 0.8717, SC: 0.8811, Avg: 0.8499, batch_loss: 2.1680, loss: 2.3767 ||: 100%|#########9| 1144/1147 [02:19<00:00,  8.38it/s]
+2024-07-01 01:38:46,924 - INFO - tqdm - NullAccuracy: 0.9929, NullF1: 0.6823, Lemma: 0.9486, PosFeats: 0.9504, UD-UAS: 0.8446, UD-LAS: 0.8488, EUD-UAS: 0.6572, EUD-LAS: 0.6538, Misc: 0.9927, SS: 0.8717, SC: 0.8811, Avg: 0.8499, batch_loss: 2.3514, loss: 2.3767 ||: 100%|#########9| 1145/1147 [02:19<00:00,  8.25it/s]
+2024-07-01 01:38:47,042 - INFO - tqdm - NullAccuracy: 0.9929, NullF1: 0.6824, Lemma: 0.9486, PosFeats: 0.9504, UD-UAS: 0.8446, UD-LAS: 0.8488, EUD-UAS: 0.6572, EUD-LAS: 0.6538, Misc: 0.9927, SS: 0.8718, SC: 0.8811, Avg: 0.8499, batch_loss: 1.8661, loss: 2.3763 ||: 100%|#########9| 1146/1147 [02:19<00:00,  8.31it/s]
+2024-07-01 01:38:47,136 - INFO - tqdm - NullAccuracy: 0.9929, NullF1: 0.6822, Lemma: 0.9486, PosFeats: 0.9505, UD-UAS: 0.8446, UD-LAS: 0.8489, EUD-UAS: 0.6572, EUD-LAS: 0.6538, Misc: 0.9927, SS: 0.8718, SC: 0.8811, Avg: 0.8499, batch_loss: 2.2916, loss: 2.3762 ||: 100%|##########| 1147/1147 [02:19<00:00,  8.22it/s]
+2024-07-01 01:38:47,137 - INFO - allennlp.training.gradient_descent_trainer - Validating
+2024-07-01 01:38:47,138 - INFO - tqdm - 0%|          | 0/287 [00:00<?, ?it/s]
+2024-07-01 01:38:57,218 - INFO - tqdm - NullAccuracy: 0.9943, NullF1: 0.7586, Lemma: 0.9660, PosFeats: 0.9654, UD-UAS: 0.9092, UD-LAS: 0.9169, EUD-UAS: 0.7870, EUD-LAS: 0.7868, Misc: 0.9948, SS: 0.8953, SC: 0.9093, Avg: 0.9034, batch_loss: 1.5071, loss: 1.7272 ||:  51%|#####     | 146/287 [00:10<00:09, 15.32it/s]
+2024-07-01 01:39:06,109 - INFO - tqdm - NullAccuracy: 0.9944, NullF1: 0.7610, Lemma: 0.9689, PosFeats: 0.9671, UD-UAS: 0.9123, UD-LAS: 0.9201, EUD-UAS: 0.7916, EUD-LAS: 0.7911, Misc: 0.9951, SS: 0.8967, SC: 0.9111, Avg: 0.9060, batch_loss: 1.1648, loss: 1.6534 ||: 100%|#########9| 286/287 [00:18<00:00, 16.20it/s]
+2024-07-01 01:39:06,157 - INFO - tqdm - NullAccuracy: 0.9944, NullF1: 0.7608, Lemma: 0.9689, PosFeats: 0.9670, UD-UAS: 0.9124, UD-LAS: 0.9202, EUD-UAS: 0.7919, EUD-LAS: 0.7913, Misc: 0.9951, SS: 0.8967, SC: 0.9111, Avg: 0.9061, batch_loss: 1.3653, loss: 1.6524 ||: 100%|##########| 287/287 [00:19<00:00, 15.09it/s]
+2024-07-01 01:39:06,157 - INFO - allennlp.training.learning_rate_schedulers.slanted_triangular - Gradual unfreezing finished. Training all layers.
+2024-07-01 01:39:06,160 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
+2024-07-01 01:39:06,160 - INFO - allennlp.training.callbacks.console_logger - Avg                |     0.850  |     0.906
+2024-07-01 01:39:06,160 - INFO - allennlp.training.callbacks.console_logger - EUD-LAS            |     0.654  |     0.791
+2024-07-01 01:39:06,160 - INFO - allennlp.training.callbacks.console_logger - EUD-UAS            |     0.657  |     0.792
+2024-07-01 01:39:06,160 - INFO - allennlp.training.callbacks.console_logger - Lemma              |     0.949  |     0.969
+2024-07-01 01:39:06,160 - INFO - allennlp.training.callbacks.console_logger - Misc               |     0.993  |     0.995
+2024-07-01 01:39:06,160 - INFO - allennlp.training.callbacks.console_logger - NullAccuracy       |     0.993  |     0.994
+2024-07-01 01:39:06,160 - INFO - allennlp.training.callbacks.console_logger - NullF1             |     0.682  |     0.761
+2024-07-01 01:39:06,160 - INFO - allennlp.training.callbacks.console_logger - PosFeats           |     0.950  |     0.967
+2024-07-01 01:39:06,160 - INFO - allennlp.training.callbacks.console_logger - SC                 |     0.881  |     0.911
+2024-07-01 01:39:06,160 - INFO - allennlp.training.callbacks.console_logger - SS                 |     0.872  |     0.897
+2024-07-01 01:39:06,160 - INFO - allennlp.training.callbacks.console_logger - UD-LAS             |     0.849  |     0.920
+2024-07-01 01:39:06,161 - INFO - allennlp.training.callbacks.console_logger - UD-UAS             |     0.845  |     0.912
+2024-07-01 01:39:06,161 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  9519.072  |       N/A
+2024-07-01 01:39:06,161 - INFO - allennlp.training.callbacks.console_logger - loss               |     2.376  |     1.652
+2024-07-01 01:39:06,161 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  5007.570  |       N/A
+2024-07-01 01:39:10,626 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:02:42.999300
+2024-07-01 01:39:10,626 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:15:56
+2024-07-01 01:39:10,626 - INFO - allennlp.training.gradient_descent_trainer - Epoch 3/9
+2024-07-01 01:39:10,626 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 4.9G
+2024-07-01 01:39:10,626 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 9.3G
+2024-07-01 01:39:10,627 - INFO - allennlp.training.gradient_descent_trainer - Training
+2024-07-01 01:39:10,628 - INFO - tqdm - 0%|          | 0/1147 [00:00<?, ?it/s]
+2024-07-01 01:39:20,689 - INFO - tqdm - NullAccuracy: 0.9937, NullF1: 0.7281, Lemma: 0.9604, PosFeats: 0.9565, UD-UAS: 0.8629, UD-LAS: 0.8674, EUD-UAS: 0.6924, EUD-LAS: 0.6882, Misc: 0.9938, SS: 0.8843, SC: 0.8939, Avg: 0.8666, batch_loss: 1.5753, loss: 2.0521 ||:   7%|7         | 83/1147 [00:10<02:04,  8.53it/s]
+2024-07-01 01:39:30,723 - INFO - tqdm - NullAccuracy: 0.9936, NullF1: 0.7258, Lemma: 0.9600, PosFeats: 0.9571, UD-UAS: 0.8620, UD-LAS: 0.8653, EUD-UAS: 0.6916, EUD-LAS: 0.6879, Misc: 0.9939, SS: 0.8859, SC: 0.8938, Avg: 0.8664, batch_loss: 2.1277, loss: 2.0330 ||:  14%|#4        | 165/1147 [00:20<02:01,  8.10it/s]
+2024-07-01 01:39:40,817 - INFO - tqdm - NullAccuracy: 0.9935, NullF1: 0.7237, Lemma: 0.9591, PosFeats: 0.9577, UD-UAS: 0.8644, UD-LAS: 0.8689, EUD-UAS: 0.6958, EUD-LAS: 0.6938, Misc: 0.9940, SS: 0.8873, SC: 0.8947, Avg: 0.8684, batch_loss: 2.0544, loss: 2.0241 ||:  22%|##1       | 249/1147 [00:30<01:50,  8.15it/s]
+2024-07-01 01:39:50,866 - INFO - tqdm - NullAccuracy: 0.9935, NullF1: 0.7243, Lemma: 0.9583, PosFeats: 0.9583, UD-UAS: 0.8652, UD-LAS: 0.8701, EUD-UAS: 0.6972, EUD-LAS: 0.6956, Misc: 0.9940, SS: 0.8875, SC: 0.8948, Avg: 0.8690, batch_loss: 1.8091, loss: 2.0234 ||:  29%|##8       | 331/1147 [00:40<01:37,  8.40it/s]
+2024-07-01 01:40:00,944 - INFO - tqdm - NullAccuracy: 0.9936, NullF1: 0.7274, Lemma: 0.9582, PosFeats: 0.9583, UD-UAS: 0.8667, UD-LAS: 0.8718, EUD-UAS: 0.6981, EUD-LAS: 0.6968, Misc: 0.9939, SS: 0.8883, SC: 0.8953, Avg: 0.8697, batch_loss: 2.1528, loss: 2.0129 ||:  36%|###6      | 414/1147 [00:50<01:25,  8.55it/s]
+2024-07-01 01:40:11,059 - INFO - tqdm - NullAccuracy: 0.9936, NullF1: 0.7289, Lemma: 0.9581, PosFeats: 0.9581, UD-UAS: 0.8664, UD-LAS: 0.8716, EUD-UAS: 0.6976, EUD-LAS: 0.6964, Misc: 0.9939, SS: 0.8889, SC: 0.8957, Avg: 0.8696, batch_loss: 1.9392, loss: 2.0083 ||:  43%|####3     | 497/1147 [01:00<01:22,  7.91it/s]
+2024-07-01 01:40:21,153 - INFO - tqdm - NullAccuracy: 0.9936, NullF1: 0.7259, Lemma: 0.9583, PosFeats: 0.9584, UD-UAS: 0.8663, UD-LAS: 0.8713, EUD-UAS: 0.6972, EUD-LAS: 0.6957, Misc: 0.9940, SS: 0.8889, SC: 0.8959, Avg: 0.8695, batch_loss: 2.0621, loss: 2.0038 ||:  51%|#####     | 580/1147 [01:10<01:07,  8.37it/s]
+2024-07-01 01:40:31,271 - INFO - tqdm - NullAccuracy: 0.9936, NullF1: 0.7289, Lemma: 0.9583, PosFeats: 0.9586, UD-UAS: 0.8663, UD-LAS: 0.8714, EUD-UAS: 0.6976, EUD-LAS: 0.6964, Misc: 0.9941, SS: 0.8892, SC: 0.8961, Avg: 0.8698, batch_loss: 1.6918, loss: 2.0016 ||:  58%|#####7    | 663/1147 [01:20<01:00,  8.03it/s]
+2024-07-01 01:40:41,311 - INFO - tqdm - NullAccuracy: 0.9936, NullF1: 0.7315, Lemma: 0.9588, PosFeats: 0.9587, UD-UAS: 0.8676, UD-LAS: 0.8727, EUD-UAS: 0.6994, EUD-LAS: 0.6985, Misc: 0.9942, SS: 0.8901, SC: 0.8962, Avg: 0.8707, batch_loss: 1.7014, loss: 1.9893 ||:  65%|######5   | 746/1147 [01:30<00:47,  8.52it/s]
+2024-07-01 01:40:51,401 - INFO - tqdm - NullAccuracy: 0.9936, NullF1: 0.7316, Lemma: 0.9586, PosFeats: 0.9587, UD-UAS: 0.8686, UD-LAS: 0.8735, EUD-UAS: 0.7020, EUD-LAS: 0.7009, Misc: 0.9942, SS: 0.8903, SC: 0.8961, Avg: 0.8714, batch_loss: 1.4868, loss: 1.9834 ||:  72%|#######2  | 829/1147 [01:40<00:38,  8.34it/s]
+2024-07-01 01:41:01,517 - INFO - tqdm - NullAccuracy: 0.9937, NullF1: 0.7320, Lemma: 0.9586, PosFeats: 0.9587, UD-UAS: 0.8691, UD-LAS: 0.8740, EUD-UAS: 0.7037, EUD-LAS: 0.7025, Misc: 0.9942, SS: 0.8905, SC: 0.8965, Avg: 0.8720, batch_loss: 2.3749, loss: 1.9824 ||:  80%|#######9  | 913/1147 [01:50<00:28,  8.08it/s]
+2024-07-01 01:41:11,554 - INFO - tqdm - NullAccuracy: 0.9937, NullF1: 0.7309, Lemma: 0.9588, PosFeats: 0.9586, UD-UAS: 0.8689, UD-LAS: 0.8740, EUD-UAS: 0.7043, EUD-LAS: 0.7035, Misc: 0.9942, SS: 0.8905, SC: 0.8967, Avg: 0.8722, batch_loss: 2.0620, loss: 1.9827 ||:  87%|########6 | 995/1147 [02:00<00:18,  8.30it/s]
+2024-07-01 01:41:21,575 - INFO - tqdm - NullAccuracy: 0.9937, NullF1: 0.7306, Lemma: 0.9588, PosFeats: 0.9586, UD-UAS: 0.8690, UD-LAS: 0.8741, EUD-UAS: 0.7043, EUD-LAS: 0.7035, Misc: 0.9942, SS: 0.8907, SC: 0.8968, Avg: 0.8722, batch_loss: 1.6076, loss: 1.9834 ||:  94%|#########3| 1076/1147 [02:10<00:08,  8.44it/s]
+2024-07-01 01:41:29,495 - INFO - tqdm - NullAccuracy: 0.9938, NullF1: 0.7303, Lemma: 0.9589, PosFeats: 0.9586, UD-UAS: 0.8698, UD-LAS: 0.8749, EUD-UAS: 0.7054, EUD-LAS: 0.7047, Misc: 0.9942, SS: 0.8909, SC: 0.8966, Avg: 0.8727, batch_loss: 1.9566, loss: 1.9803 ||: 100%|#########9| 1142/1147 [02:18<00:00,  8.14it/s]
+2024-07-01 01:41:29,630 - INFO - tqdm - NullAccuracy: 0.9938, NullF1: 0.7303, Lemma: 0.9589, PosFeats: 0.9586, UD-UAS: 0.8698, UD-LAS: 0.8749, EUD-UAS: 0.7054, EUD-LAS: 0.7046, Misc: 0.9942, SS: 0.8909, SC: 0.8966, Avg: 0.8727, batch_loss: 1.9341, loss: 1.9802 ||: 100%|#########9| 1143/1147 [02:19<00:00,  7.89it/s]
+2024-07-01 01:41:29,761 - INFO - tqdm - NullAccuracy: 0.9938, NullF1: 0.7304, Lemma: 0.9589, PosFeats: 0.9586, UD-UAS: 0.8698, UD-LAS: 0.8750, EUD-UAS: 0.7054, EUD-LAS: 0.7046, Misc: 0.9942, SS: 0.8909, SC: 0.8967, Avg: 0.8727, batch_loss: 1.6716, loss: 1.9800 ||: 100%|#########9| 1144/1147 [02:19<00:00,  7.82it/s]
+2024-07-01 01:41:29,873 - INFO - tqdm - NullAccuracy: 0.9938, NullF1: 0.7304, Lemma: 0.9589, PosFeats: 0.9586, UD-UAS: 0.8698, UD-LAS: 0.8750, EUD-UAS: 0.7054, EUD-LAS: 0.7046, Misc: 0.9942, SS: 0.8909, SC: 0.8967, Avg: 0.8727, batch_loss: 1.8307, loss: 1.9798 ||: 100%|#########9| 1145/1147 [02:19<00:00,  8.13it/s]
+2024-07-01 01:41:29,985 - INFO - tqdm - NullAccuracy: 0.9938, NullF1: 0.7305, Lemma: 0.9589, PosFeats: 0.9586, UD-UAS: 0.8698, UD-LAS: 0.8750, EUD-UAS: 0.7054, EUD-LAS: 0.7046, Misc: 0.9942, SS: 0.8909, SC: 0.8967, Avg: 0.8727, batch_loss: 1.4562, loss: 1.9794 ||: 100%|#########9| 1146/1147 [02:19<00:00,  8.35it/s]
+2024-07-01 01:41:30,081 - INFO - tqdm - NullAccuracy: 0.9938, NullF1: 0.7305, Lemma: 0.9589, PosFeats: 0.9586, UD-UAS: 0.8698, UD-LAS: 0.8750, EUD-UAS: 0.7055, EUD-LAS: 0.7047, Misc: 0.9942, SS: 0.8909, SC: 0.8967, Avg: 0.8727, batch_loss: 1.4338, loss: 1.9789 ||: 100%|##########| 1147/1147 [02:19<00:00,  8.22it/s]
+2024-07-01 01:41:30,082 - INFO - allennlp.training.gradient_descent_trainer - Validating
+2024-07-01 01:41:30,083 - INFO - tqdm - 0%|          | 0/287 [00:00<?, ?it/s]
+2024-07-01 01:41:40,110 - INFO - tqdm - NullAccuracy: 0.9947, NullF1: 0.7916, Lemma: 0.9726, PosFeats: 0.9684, UD-UAS: 0.9218, UD-LAS: 0.9313, EUD-UAS: 0.8167, EUD-LAS: 0.8201, Misc: 0.9954, SS: 0.9046, SC: 0.9152, Avg: 0.9162, batch_loss: 1.1126, loss: 1.5938 ||:  52%|#####1    | 148/287 [00:10<00:08, 15.94it/s]
+2024-07-01 01:41:48,702 - INFO - tqdm - NullAccuracy: 0.9950, NullF1: 0.8010, Lemma: 0.9745, PosFeats: 0.9703, UD-UAS: 0.9255, UD-LAS: 0.9348, EUD-UAS: 0.8208, EUD-LAS: 0.8239, Misc: 0.9956, SS: 0.9055, SC: 0.9173, Avg: 0.9187, batch_loss: 0.9258, loss: 1.5288 ||: 100%|#########9| 286/287 [00:18<00:00, 16.57it/s]
+2024-07-01 01:41:48,749 - INFO - tqdm - NullAccuracy: 0.9950, NullF1: 0.8008, Lemma: 0.9745, PosFeats: 0.9702, UD-UAS: 0.9256, UD-LAS: 0.9350, EUD-UAS: 0.8209, EUD-LAS: 0.8241, Misc: 0.9956, SS: 0.9056, SC: 0.9172, Avg: 0.9188, batch_loss: 1.2592, loss: 1.5278 ||: 100%|##########| 287/287 [00:18<00:00, 15.38it/s]
+2024-07-01 01:41:48,749 - INFO - allennlp.training.learning_rate_schedulers.slanted_triangular - Gradual unfreezing finished. Training all layers.
+2024-07-01 01:41:48,752 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
+2024-07-01 01:41:48,752 - INFO - allennlp.training.callbacks.console_logger - Avg                |     0.873  |     0.919
+2024-07-01 01:41:48,752 - INFO - allennlp.training.callbacks.console_logger - EUD-LAS            |     0.705  |     0.824
+2024-07-01 01:41:48,753 - INFO - allennlp.training.callbacks.console_logger - EUD-UAS            |     0.705  |     0.821
+2024-07-01 01:41:48,753 - INFO - allennlp.training.callbacks.console_logger - Lemma              |     0.959  |     0.974
+2024-07-01 01:41:48,753 - INFO - allennlp.training.callbacks.console_logger - Misc               |     0.994  |     0.996
+2024-07-01 01:41:48,753 - INFO - allennlp.training.callbacks.console_logger - NullAccuracy       |     0.994  |     0.995
+2024-07-01 01:41:48,753 - INFO - allennlp.training.callbacks.console_logger - NullF1             |     0.731  |     0.801
+2024-07-01 01:41:48,753 - INFO - allennlp.training.callbacks.console_logger - PosFeats           |     0.959  |     0.970
+2024-07-01 01:41:48,753 - INFO - allennlp.training.callbacks.console_logger - SC                 |     0.897  |     0.917
+2024-07-01 01:41:48,753 - INFO - allennlp.training.callbacks.console_logger - SS                 |     0.891  |     0.906
+2024-07-01 01:41:48,753 - INFO - allennlp.training.callbacks.console_logger - UD-LAS             |     0.875  |     0.935
+2024-07-01 01:41:48,753 - INFO - allennlp.training.callbacks.console_logger - UD-UAS             |     0.870  |     0.926
+2024-07-01 01:41:48,753 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  9519.093  |       N/A
+2024-07-01 01:41:48,753 - INFO - allennlp.training.callbacks.console_logger - loss               |     1.979  |     1.528
+2024-07-01 01:41:48,753 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  5007.570  |       N/A
+2024-07-01 01:41:53,642 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:02:43.015832
+2024-07-01 01:41:53,642 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:14:18
+2024-07-01 01:41:53,642 - INFO - allennlp.training.gradient_descent_trainer - Epoch 4/9
+2024-07-01 01:41:53,642 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 4.9G
+2024-07-01 01:41:53,642 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 9.3G
+2024-07-01 01:41:53,644 - INFO - allennlp.training.gradient_descent_trainer - Training
+2024-07-01 01:41:53,644 - INFO - tqdm - 0%|          | 0/1147 [00:00<?, ?it/s]
+2024-07-01 01:42:03,743 - INFO - tqdm - NullAccuracy: 0.9941, NullF1: 0.7616, Lemma: 0.9631, PosFeats: 0.9636, UD-UAS: 0.8825, UD-LAS: 0.8887, EUD-UAS: 0.7243, EUD-LAS: 0.7241, Misc: 0.9946, SS: 0.8983, SC: 0.9045, Avg: 0.8826, batch_loss: 1.8762, loss: 1.7780 ||:   7%|7         | 83/1147 [00:10<02:10,  8.17it/s]
+2024-07-01 01:42:13,782 - INFO - tqdm - NullAccuracy: 0.9938, NullF1: 0.7397, Lemma: 0.9629, PosFeats: 0.9640, UD-UAS: 0.8827, UD-LAS: 0.8896, EUD-UAS: 0.7273, EUD-LAS: 0.7283, Misc: 0.9943, SS: 0.8998, SC: 0.9054, Avg: 0.8838, batch_loss: 2.1329, loss: 1.7641 ||:  14%|#4        | 166/1147 [00:20<02:03,  7.93it/s]
+2024-07-01 01:42:23,786 - INFO - tqdm - NullAccuracy: 0.9941, NullF1: 0.7486, Lemma: 0.9636, PosFeats: 0.9638, UD-UAS: 0.8811, UD-LAS: 0.8877, EUD-UAS: 0.7244, EUD-LAS: 0.7245, Misc: 0.9939, SS: 0.8999, SC: 0.9061, Avg: 0.8828, batch_loss: 2.0589, loss: 1.7664 ||:  21%|##1       | 246/1147 [00:30<01:53,  7.91it/s]
+2024-07-01 01:42:33,818 - INFO - tqdm - NullAccuracy: 0.9940, NullF1: 0.7421, Lemma: 0.9644, PosFeats: 0.9642, UD-UAS: 0.8848, UD-LAS: 0.8914, EUD-UAS: 0.7301, EUD-LAS: 0.7310, Misc: 0.9943, SS: 0.8999, SC: 0.9063, Avg: 0.8852, batch_loss: 1.4447, loss: 1.7427 ||:  29%|##8       | 329/1147 [00:40<01:39,  8.22it/s]
+2024-07-01 01:42:43,868 - INFO - tqdm - NullAccuracy: 0.9941, NullF1: 0.7449, Lemma: 0.9647, PosFeats: 0.9647, UD-UAS: 0.8856, UD-LAS: 0.8920, EUD-UAS: 0.7326, EUD-LAS: 0.7336, Misc: 0.9943, SS: 0.9000, SC: 0.9057, Avg: 0.8859, batch_loss: 1.7681, loss: 1.7403 ||:  36%|###5      | 412/1147 [00:50<01:32,  7.99it/s]
+2024-07-01 01:42:53,959 - INFO - tqdm - NullAccuracy: 0.9941, NullF1: 0.7446, Lemma: 0.9644, PosFeats: 0.9643, UD-UAS: 0.8841, UD-LAS: 0.8907, EUD-UAS: 0.7302, EUD-LAS: 0.7314, Misc: 0.9943, SS: 0.8999, SC: 0.9060, Avg: 0.8850, batch_loss: 2.0627, loss: 1.7506 ||:  43%|####3     | 494/1147 [01:00<01:26,  7.55it/s]
+2024-07-01 01:43:04,039 - INFO - tqdm - NullAccuracy: 0.9941, NullF1: 0.7456, Lemma: 0.9647, PosFeats: 0.9643, UD-UAS: 0.8848, UD-LAS: 0.8911, EUD-UAS: 0.7318, EUD-LAS: 0.7328, Misc: 0.9944, SS: 0.9003, SC: 0.9061, Avg: 0.8856, batch_loss: 1.7441, loss: 1.7437 ||:  50%|#####     | 577/1147 [01:10<01:06,  8.52it/s]
+2024-07-01 01:43:14,148 - INFO - tqdm - NullAccuracy: 0.9941, NullF1: 0.7508, Lemma: 0.9646, PosFeats: 0.9642, UD-UAS: 0.8854, UD-LAS: 0.8916, EUD-UAS: 0.7328, EUD-LAS: 0.7338, Misc: 0.9945, SS: 0.9002, SC: 0.9059, Avg: 0.8859, batch_loss: 1.7781, loss: 1.7440 ||:  58%|#####7    | 661/1147 [01:20<01:03,  7.70it/s]
+2024-07-01 01:43:24,282 - INFO - tqdm - NullAccuracy: 0.9941, NullF1: 0.7506, Lemma: 0.9648, PosFeats: 0.9645, UD-UAS: 0.8862, UD-LAS: 0.8923, EUD-UAS: 0.7343, EUD-LAS: 0.7352, Misc: 0.9947, SS: 0.9006, SC: 0.9059, Avg: 0.8865, batch_loss: 1.3924, loss: 1.7341 ||:  65%|######4   | 745/1147 [01:30<00:51,  7.84it/s]
+2024-07-01 01:43:34,306 - INFO - tqdm - NullAccuracy: 0.9941, NullF1: 0.7507, Lemma: 0.9650, PosFeats: 0.9645, UD-UAS: 0.8866, UD-LAS: 0.8926, EUD-UAS: 0.7349, EUD-LAS: 0.7358, Misc: 0.9948, SS: 0.9011, SC: 0.9058, Avg: 0.8868, batch_loss: 1.9429, loss: 1.7288 ||:  72%|#######1  | 825/1147 [01:40<00:44,  7.18it/s]
+2024-07-01 01:43:44,420 - INFO - tqdm - NullAccuracy: 0.9942, NullF1: 0.7533, Lemma: 0.9652, PosFeats: 0.9644, UD-UAS: 0.8869, UD-LAS: 0.8929, EUD-UAS: 0.7358, EUD-LAS: 0.7366, Misc: 0.9948, SS: 0.9014, SC: 0.9062, Avg: 0.8871, batch_loss: 1.4450, loss: 1.7235 ||:  79%|#######8  | 905/1147 [01:50<00:32,  7.40it/s]
+2024-07-01 01:43:54,513 - INFO - tqdm - NullAccuracy: 0.9942, NullF1: 0.7554, Lemma: 0.9654, PosFeats: 0.9645, UD-UAS: 0.8870, UD-LAS: 0.8928, EUD-UAS: 0.7358, EUD-LAS: 0.7364, Misc: 0.9948, SS: 0.9019, SC: 0.9065, Avg: 0.8872, batch_loss: 1.3269, loss: 1.7213 ||:  86%|########6 | 987/1147 [02:00<00:20,  7.97it/s]
+2024-07-01 01:44:04,537 - INFO - tqdm - NullAccuracy: 0.9942, NullF1: 0.7560, Lemma: 0.9656, PosFeats: 0.9648, UD-UAS: 0.8875, UD-LAS: 0.8934, EUD-UAS: 0.7369, EUD-LAS: 0.7375, Misc: 0.9948, SS: 0.9021, SC: 0.9070, Avg: 0.8877, batch_loss: 1.6796, loss: 1.7156 ||:  93%|#########2| 1065/1147 [02:10<00:09,  8.47it/s]
+2024-07-01 01:44:14,252 - INFO - tqdm - NullAccuracy: 0.9943, NullF1: 0.7564, Lemma: 0.9658, PosFeats: 0.9649, UD-UAS: 0.8878, UD-LAS: 0.8936, EUD-UAS: 0.7378, EUD-LAS: 0.7384, Misc: 0.9949, SS: 0.9022, SC: 0.9070, Avg: 0.8880, batch_loss: 1.9642, loss: 1.7118 ||: 100%|#########9| 1142/1147 [02:20<00:00,  7.84it/s]
+2024-07-01 01:44:14,377 - INFO - tqdm - NullAccuracy: 0.9943, NullF1: 0.7565, Lemma: 0.9657, PosFeats: 0.9649, UD-UAS: 0.8878, UD-LAS: 0.8936, EUD-UAS: 0.7378, EUD-LAS: 0.7384, Misc: 0.9949, SS: 0.9022, SC: 0.9070, Avg: 0.8880, batch_loss: 1.8354, loss: 1.7119 ||: 100%|#########9| 1143/1147 [02:20<00:00,  7.88it/s]
+2024-07-01 01:44:14,483 - INFO - tqdm - NullAccuracy: 0.9943, NullF1: 0.7564, Lemma: 0.9658, PosFeats: 0.9649, UD-UAS: 0.8878, UD-LAS: 0.8936, EUD-UAS: 0.7379, EUD-LAS: 0.7384, Misc: 0.9949, SS: 0.9022, SC: 0.9071, Avg: 0.8881, batch_loss: 1.3463, loss: 1.7116 ||: 100%|#########9| 1144/1147 [02:20<00:00,  8.28it/s]
+2024-07-01 01:44:14,635 - INFO - tqdm - NullAccuracy: 0.9943, NullF1: 0.7563, Lemma: 0.9658, PosFeats: 0.9649, UD-UAS: 0.8878, UD-LAS: 0.8936, EUD-UAS: 0.7378, EUD-LAS: 0.7384, Misc: 0.9949, SS: 0.9021, SC: 0.9071, Avg: 0.8880, batch_loss: 1.9256, loss: 1.7118 ||: 100%|#########9| 1145/1147 [02:20<00:00,  7.69it/s]
+2024-07-01 01:44:14,748 - INFO - tqdm - NullAccuracy: 0.9943, NullF1: 0.7563, Lemma: 0.9658, PosFeats: 0.9648, UD-UAS: 0.8878, UD-LAS: 0.8936, EUD-UAS: 0.7379, EUD-LAS: 0.7385, Misc: 0.9949, SS: 0.9021, SC: 0.9070, Avg: 0.8880, batch_loss: 1.6272, loss: 1.7117 ||: 100%|#########9| 1146/1147 [02:21<00:00,  8.02it/s]
+2024-07-01 01:44:14,836 - INFO - tqdm - NullAccuracy: 0.9943, NullF1: 0.7563, Lemma: 0.9658, PosFeats: 0.9649, UD-UAS: 0.8878, UD-LAS: 0.8936, EUD-UAS: 0.7380, EUD-LAS: 0.7386, Misc: 0.9949, SS: 0.9021, SC: 0.9070, Avg: 0.8881, batch_loss: 1.4024, loss: 1.7114 ||: 100%|##########| 1147/1147 [02:21<00:00,  8.12it/s]
+2024-07-01 01:44:14,838 - INFO - allennlp.training.gradient_descent_trainer - Validating
+2024-07-01 01:44:14,839 - INFO - tqdm - 0%|          | 0/287 [00:00<?, ?it/s]
+2024-07-01 01:44:24,895 - INFO - tqdm - NullAccuracy: 0.9951, NullF1: 0.8044, Lemma: 0.9753, PosFeats: 0.9701, UD-UAS: 0.9294, UD-LAS: 0.9374, EUD-UAS: 0.8374, EUD-LAS: 0.8462, Misc: 0.9959, SS: 0.9128, SC: 0.9215, Avg: 0.9251, batch_loss: 1.2404, loss: 1.4648 ||:  49%|####9     | 142/287 [00:10<00:09, 15.12it/s]
+2024-07-01 01:44:34,383 - INFO - tqdm - NullAccuracy: 0.9952, NullF1: 0.8106, Lemma: 0.9776, PosFeats: 0.9723, UD-UAS: 0.9328, UD-LAS: 0.9406, EUD-UAS: 0.8425, EUD-LAS: 0.8509, Misc: 0.9960, SS: 0.9143, SC: 0.9236, Avg: 0.9278, batch_loss: 0.8250, loss: 1.3925 ||: 100%|#########9| 286/287 [00:19<00:00, 15.82it/s]
+2024-07-01 01:44:34,431 - INFO - tqdm - NullAccuracy: 0.9952, NullF1: 0.8104, Lemma: 0.9776, PosFeats: 0.9723, UD-UAS: 0.9329, UD-LAS: 0.9407, EUD-UAS: 0.8426, EUD-LAS: 0.8510, Misc: 0.9960, SS: 0.9144, SC: 0.9236, Avg: 0.9279, batch_loss: 1.1330, loss: 1.3916 ||: 100%|##########| 287/287 [00:19<00:00, 14.65it/s]
+2024-07-01 01:44:34,432 - INFO - allennlp.training.learning_rate_schedulers.slanted_triangular - Gradual unfreezing finished. Training all layers.
+2024-07-01 01:44:34,434 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
+2024-07-01 01:44:34,434 - INFO - allennlp.training.callbacks.console_logger - Avg                |     0.888  |     0.928
+2024-07-01 01:44:34,434 - INFO - allennlp.training.callbacks.console_logger - EUD-LAS            |     0.739  |     0.851
+2024-07-01 01:44:34,435 - INFO - allennlp.training.callbacks.console_logger - EUD-UAS            |     0.738  |     0.843
+2024-07-01 01:44:34,435 - INFO - allennlp.training.callbacks.console_logger - Lemma              |     0.966  |     0.978
+2024-07-01 01:44:34,435 - INFO - allennlp.training.callbacks.console_logger - Misc               |     0.995  |     0.996
+2024-07-01 01:44:34,435 - INFO - allennlp.training.callbacks.console_logger - NullAccuracy       |     0.994  |     0.995
+2024-07-01 01:44:34,435 - INFO - allennlp.training.callbacks.console_logger - NullF1             |     0.756  |     0.810
+2024-07-01 01:44:34,435 - INFO - allennlp.training.callbacks.console_logger - PosFeats           |     0.965  |     0.972
+2024-07-01 01:44:34,435 - INFO - allennlp.training.callbacks.console_logger - SC                 |     0.907  |     0.924
+2024-07-01 01:44:34,435 - INFO - allennlp.training.callbacks.console_logger - SS                 |     0.902  |     0.914
+2024-07-01 01:44:34,435 - INFO - allennlp.training.callbacks.console_logger - UD-LAS             |     0.894  |     0.941
+2024-07-01 01:44:34,435 - INFO - allennlp.training.callbacks.console_logger - UD-UAS             |     0.888  |     0.933
+2024-07-01 01:44:34,435 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  9518.764  |       N/A
+2024-07-01 01:44:34,435 - INFO - allennlp.training.callbacks.console_logger - loss               |     1.711  |     1.392
+2024-07-01 01:44:34,435 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  5027.195  |       N/A
+2024-07-01 01:44:39,488 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:02:45.845914
+2024-07-01 01:44:39,488 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:12:18
+2024-07-01 01:44:39,488 - INFO - allennlp.training.gradient_descent_trainer - Epoch 5/9
+2024-07-01 01:44:39,488 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 4.9G
+2024-07-01 01:44:39,488 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 9.3G
+2024-07-01 01:44:39,490 - INFO - allennlp.training.gradient_descent_trainer - Training
+2024-07-01 01:44:39,490 - INFO - tqdm - 0%|          | 0/1147 [00:00<?, ?it/s]
+2024-07-01 01:44:49,495 - INFO - tqdm - NullAccuracy: 0.9941, NullF1: 0.7607, Lemma: 0.9701, PosFeats: 0.9686, UD-UAS: 0.8922, UD-LAS: 0.8981, EUD-UAS: 0.7541, EUD-LAS: 0.7533, Misc: 0.9951, SS: 0.9102, SC: 0.9133, Avg: 0.8950, batch_loss: 1.1367, loss: 1.5425 ||:   7%|6         | 80/1147 [00:10<02:05,  8.51it/s]
+2024-07-01 01:44:59,534 - INFO - tqdm - NullAccuracy: 0.9946, NullF1: 0.7731, Lemma: 0.9699, PosFeats: 0.9691, UD-UAS: 0.8923, UD-LAS: 0.8979, EUD-UAS: 0.7549, EUD-LAS: 0.7540, Misc: 0.9955, SS: 0.9109, SC: 0.9150, Avg: 0.8955, batch_loss: 1.4917, loss: 1.5379 ||:  14%|#3        | 160/1147 [00:20<02:01,  8.09it/s]
+2024-07-01 01:45:09,617 - INFO - tqdm - NullAccuracy: 0.9948, NullF1: 0.7769, Lemma: 0.9704, PosFeats: 0.9689, UD-UAS: 0.8954, UD-LAS: 0.9007, EUD-UAS: 0.7582, EUD-LAS: 0.7576, Misc: 0.9955, SS: 0.9099, SC: 0.9155, Avg: 0.8969, batch_loss: 1.7533, loss: 1.5254 ||:  21%|##1       | 242/1147 [00:30<01:54,  7.93it/s]
+2024-07-01 01:45:19,639 - INFO - tqdm - NullAccuracy: 0.9947, NullF1: 0.7753, Lemma: 0.9700, PosFeats: 0.9691, UD-UAS: 0.8945, UD-LAS: 0.9006, EUD-UAS: 0.7556, EUD-LAS: 0.7555, Misc: 0.9955, SS: 0.9096, SC: 0.9145, Avg: 0.8961, batch_loss: 1.4288, loss: 1.5312 ||:  28%|##7       | 319/1147 [00:40<01:43,  8.04it/s]
+2024-07-01 01:45:29,733 - INFO - tqdm - NullAccuracy: 0.9948, NullF1: 0.7794, Lemma: 0.9706, PosFeats: 0.9694, UD-UAS: 0.8952, UD-LAS: 0.9012, EUD-UAS: 0.7548, EUD-LAS: 0.7547, Misc: 0.9955, SS: 0.9103, SC: 0.9146, Avg: 0.8963, batch_loss: 1.7479, loss: 1.5240 ||:  35%|###4      | 400/1147 [00:50<01:35,  7.86it/s]
+2024-07-01 01:45:39,852 - INFO - tqdm - NullAccuracy: 0.9945, NullF1: 0.7687, Lemma: 0.9706, PosFeats: 0.9693, UD-UAS: 0.8959, UD-LAS: 0.9017, EUD-UAS: 0.7555, EUD-LAS: 0.7556, Misc: 0.9955, SS: 0.9107, SC: 0.9146, Avg: 0.8966, batch_loss: 1.4419, loss: 1.5222 ||:  42%|####1     | 481/1147 [01:00<01:22,  8.11it/s]
+2024-07-01 01:45:49,902 - INFO - tqdm - NullAccuracy: 0.9947, NullF1: 0.7741, Lemma: 0.9704, PosFeats: 0.9692, UD-UAS: 0.8961, UD-LAS: 0.9022, EUD-UAS: 0.7564, EUD-LAS: 0.7566, Misc: 0.9955, SS: 0.9112, SC: 0.9148, Avg: 0.8969, batch_loss: 0.9642, loss: 1.5188 ||:  49%|####8     | 561/1147 [01:10<01:13,  7.95it/s]
+2024-07-01 01:46:00,004 - INFO - tqdm - NullAccuracy: 0.9946, NullF1: 0.7703, Lemma: 0.9704, PosFeats: 0.9689, UD-UAS: 0.8970, UD-LAS: 0.9029, EUD-UAS: 0.7576, EUD-LAS: 0.7581, Misc: 0.9956, SS: 0.9113, SC: 0.9149, Avg: 0.8974, batch_loss: 1.6824, loss: 1.5161 ||:  56%|#####6    | 643/1147 [01:20<01:02,  8.05it/s]
+2024-07-01 01:46:10,178 - INFO - tqdm - NullAccuracy: 0.9945, NullF1: 0.7697, Lemma: 0.9705, PosFeats: 0.9689, UD-UAS: 0.8964, UD-LAS: 0.9023, EUD-UAS: 0.7578, EUD-LAS: 0.7582, Misc: 0.9956, SS: 0.9113, SC: 0.9149, Avg: 0.8973, batch_loss: 1.6860, loss: 1.5193 ||:  63%|######3   | 723/1147 [01:30<00:49,  8.57it/s]
+2024-07-01 01:46:20,186 - INFO - tqdm - NullAccuracy: 0.9945, NullF1: 0.7698, Lemma: 0.9706, PosFeats: 0.9691, UD-UAS: 0.8966, UD-LAS: 0.9025, EUD-UAS: 0.7582, EUD-LAS: 0.7587, Misc: 0.9956, SS: 0.9115, SC: 0.9151, Avg: 0.8975, batch_loss: 1.5567, loss: 1.5126 ||:  70%|#######   | 803/1147 [01:40<00:41,  8.30it/s]
+2024-07-01 01:46:30,247 - INFO - tqdm - NullAccuracy: 0.9945, NullF1: 0.7696, Lemma: 0.9703, PosFeats: 0.9692, UD-UAS: 0.8966, UD-LAS: 0.9026, EUD-UAS: 0.7588, EUD-LAS: 0.7594, Misc: 0.9956, SS: 0.9114, SC: 0.9154, Avg: 0.8977, batch_loss: 1.5144, loss: 1.5144 ||:  77%|#######6  | 883/1147 [01:50<00:34,  7.69it/s]
+2024-07-01 01:46:40,359 - INFO - tqdm - NullAccuracy: 0.9945, NullF1: 0.7717, Lemma: 0.9705, PosFeats: 0.9693, UD-UAS: 0.8977, UD-LAS: 0.9035, EUD-UAS: 0.7607, EUD-LAS: 0.7613, Misc: 0.9956, SS: 0.9115, SC: 0.9154, Avg: 0.8984, batch_loss: 1.6262, loss: 1.5085 ||:  84%|########4 | 965/1147 [02:00<00:24,  7.50it/s]
+2024-07-01 01:46:50,460 - INFO - tqdm - NullAccuracy: 0.9946, NullF1: 0.7734, Lemma: 0.9707, PosFeats: 0.9692, UD-UAS: 0.8978, UD-LAS: 0.9037, EUD-UAS: 0.7611, EUD-LAS: 0.7618, Misc: 0.9955, SS: 0.9114, SC: 0.9153, Avg: 0.8985, batch_loss: 1.8424, loss: 1.5088 ||:  91%|#########1| 1046/1147 [02:10<00:13,  7.35it/s]
+2024-07-01 01:47:00,503 - INFO - tqdm - NullAccuracy: 0.9946, NullF1: 0.7721, Lemma: 0.9708, PosFeats: 0.9693, UD-UAS: 0.8982, UD-LAS: 0.9040, EUD-UAS: 0.7615, EUD-LAS: 0.7620, Misc: 0.9955, SS: 0.9116, SC: 0.9153, Avg: 0.8987, batch_loss: 1.4555, loss: 1.5049 ||:  98%|#########8| 1128/1147 [02:21<00:02,  8.46it/s]
+2024-07-01 01:47:02,242 - INFO - tqdm - NullAccuracy: 0.9946, NullF1: 0.7729, Lemma: 0.9708, PosFeats: 0.9694, UD-UAS: 0.8981, UD-LAS: 0.9039, EUD-UAS: 0.7613, EUD-LAS: 0.7617, Misc: 0.9955, SS: 0.9115, SC: 0.9152, Avg: 0.8986, batch_loss: 1.8237, loss: 1.5055 ||: 100%|#########9| 1142/1147 [02:22<00:00,  7.83it/s]
+2024-07-01 01:47:02,357 - INFO - tqdm - NullAccuracy: 0.9946, NullF1: 0.7730, Lemma: 0.9708, PosFeats: 0.9694, UD-UAS: 0.8981, UD-LAS: 0.9039, EUD-UAS: 0.7612, EUD-LAS: 0.7617, Misc: 0.9955, SS: 0.9115, SC: 0.9152, Avg: 0.8986, batch_loss: 1.6657, loss: 1.5057 ||: 100%|#########9| 1143/1147 [02:22<00:00,  8.06it/s]
+2024-07-01 01:47:02,490 - INFO - tqdm - NullAccuracy: 0.9946, NullF1: 0.7729, Lemma: 0.9708, PosFeats: 0.9694, UD-UAS: 0.8981, UD-LAS: 0.9039, EUD-UAS: 0.7612, EUD-LAS: 0.7617, Misc: 0.9955, SS: 0.9114, SC: 0.9152, Avg: 0.8986, batch_loss: 2.0071, loss: 1.5061 ||: 100%|#########9| 1144/1147 [02:23<00:00,  7.89it/s]
+2024-07-01 01:47:02,619 - INFO - tqdm - NullAccuracy: 0.9946, NullF1: 0.7730, Lemma: 0.9708, PosFeats: 0.9693, UD-UAS: 0.8981, UD-LAS: 0.9039, EUD-UAS: 0.7612, EUD-LAS: 0.7616, Misc: 0.9955, SS: 0.9114, SC: 0.9152, Avg: 0.8986, batch_loss: 1.8256, loss: 1.5064 ||: 100%|#########9| 1145/1147 [02:23<00:00,  7.85it/s]
+2024-07-01 01:47:02,749 - INFO - tqdm - NullAccuracy: 0.9946, NullF1: 0.7728, Lemma: 0.9708, PosFeats: 0.9694, UD-UAS: 0.8980, UD-LAS: 0.9039, EUD-UAS: 0.7611, EUD-LAS: 0.7616, Misc: 0.9955, SS: 0.9114, SC: 0.9152, Avg: 0.8985, batch_loss: 1.6000, loss: 1.5065 ||: 100%|#########9| 1146/1147 [02:23<00:00,  7.80it/s]
+2024-07-01 01:47:02,837 - INFO - tqdm - NullAccuracy: 0.9946, NullF1: 0.7728, Lemma: 0.9709, PosFeats: 0.9693, UD-UAS: 0.8981, UD-LAS: 0.9039, EUD-UAS: 0.7611, EUD-LAS: 0.7616, Misc: 0.9955, SS: 0.9114, SC: 0.9152, Avg: 0.8986, batch_loss: 1.5598, loss: 1.5065 ||: 100%|##########| 1147/1147 [02:23<00:00,  8.00it/s]
+2024-07-01 01:47:02,838 - INFO - allennlp.training.gradient_descent_trainer - Validating
+2024-07-01 01:47:02,839 - INFO - tqdm - 0%|          | 0/287 [00:00<?, ?it/s]
+2024-07-01 01:47:12,878 - INFO - tqdm - NullAccuracy: 0.9953, NullF1: 0.8154, Lemma: 0.9786, PosFeats: 0.9727, UD-UAS: 0.9337, UD-LAS: 0.9421, EUD-UAS: 0.7952, EUD-LAS: 0.7984, Misc: 0.9964, SS: 0.9164, SC: 0.9250, Avg: 0.9176, batch_loss: 1.2094, loss: 1.3711 ||:  49%|####9     | 142/287 [00:10<00:09, 15.53it/s]
+2024-07-01 01:47:22,216 - INFO - tqdm - NullAccuracy: 0.9954, NullF1: 0.8198, Lemma: 0.9805, PosFeats: 0.9753, UD-UAS: 0.9378, UD-LAS: 0.9461, EUD-UAS: 0.8020, EUD-LAS: 0.8054, Misc: 0.9966, SS: 0.9189, SC: 0.9267, Avg: 0.9210, batch_loss: 0.7191, loss: 1.2991 ||: 100%|#########9| 286/287 [00:19<00:00, 15.92it/s]
+2024-07-01 01:47:22,263 - INFO - tqdm - NullAccuracy: 0.9954, NullF1: 0.8196, Lemma: 0.9805, PosFeats: 0.9753, UD-UAS: 0.9379, UD-LAS: 0.9463, EUD-UAS: 0.8021, EUD-LAS: 0.8055, Misc: 0.9966, SS: 0.9190, SC: 0.9267, Avg: 0.9211, batch_loss: 1.0898, loss: 1.2984 ||: 100%|##########| 287/287 [00:19<00:00, 14.78it/s]
+2024-07-01 01:47:22,263 - INFO - allennlp.training.learning_rate_schedulers.slanted_triangular - Gradual unfreezing finished. Training all layers.
+2024-07-01 01:47:22,266 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
+2024-07-01 01:47:22,266 - INFO - allennlp.training.callbacks.console_logger - Avg                |     0.899  |     0.921
+2024-07-01 01:47:22,266 - INFO - allennlp.training.callbacks.console_logger - EUD-LAS            |     0.762  |     0.806
+2024-07-01 01:47:22,266 - INFO - allennlp.training.callbacks.console_logger - EUD-UAS            |     0.761  |     0.802
+2024-07-01 01:47:22,266 - INFO - allennlp.training.callbacks.console_logger - Lemma              |     0.971  |     0.981
+2024-07-01 01:47:22,267 - INFO - allennlp.training.callbacks.console_logger - Misc               |     0.996  |     0.997
+2024-07-01 01:47:22,267 - INFO - allennlp.training.callbacks.console_logger - NullAccuracy       |     0.995  |     0.995
+2024-07-01 01:47:22,267 - INFO - allennlp.training.callbacks.console_logger - NullF1             |     0.773  |     0.820
+2024-07-01 01:47:22,267 - INFO - allennlp.training.callbacks.console_logger - PosFeats           |     0.969  |     0.975
+2024-07-01 01:47:22,267 - INFO - allennlp.training.callbacks.console_logger - SC                 |     0.915  |     0.927
+2024-07-01 01:47:22,267 - INFO - allennlp.training.callbacks.console_logger - SS                 |     0.911  |     0.919
+2024-07-01 01:47:22,267 - INFO - allennlp.training.callbacks.console_logger - UD-LAS             |     0.904  |     0.946
+2024-07-01 01:47:22,267 - INFO - allennlp.training.callbacks.console_logger - UD-UAS             |     0.898  |     0.938
+2024-07-01 01:47:22,267 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  9519.087  |       N/A
+2024-07-01 01:47:22,267 - INFO - allennlp.training.callbacks.console_logger - loss               |     1.507  |     1.298
+2024-07-01 01:47:22,267 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  5027.195  |       N/A
+2024-07-01 01:47:27,084 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:02:47.595809
+2024-07-01 01:47:27,084 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:10:03
+2024-07-01 01:47:27,084 - INFO - allennlp.training.gradient_descent_trainer - Epoch 6/9
+2024-07-01 01:47:27,084 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 4.9G
+2024-07-01 01:47:27,084 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 9.3G
+2024-07-01 01:47:27,086 - INFO - allennlp.training.gradient_descent_trainer - Training
+2024-07-01 01:47:27,086 - INFO - tqdm - 0%|          | 0/1147 [00:00<?, ?it/s]
+2024-07-01 01:47:37,181 - INFO - tqdm - NullAccuracy: 0.9952, NullF1: 0.8000, Lemma: 0.9747, PosFeats: 0.9737, UD-UAS: 0.9066, UD-LAS: 0.9144, EUD-UAS: 0.7710, EUD-LAS: 0.7725, Misc: 0.9952, SS: 0.9154, SC: 0.9241, Avg: 0.9053, batch_loss: 1.7418, loss: 1.3523 ||:   7%|7         | 82/1147 [00:10<02:17,  7.73it/s]
+2024-07-01 01:47:47,255 - INFO - tqdm - NullAccuracy: 0.9953, NullF1: 0.8003, Lemma: 0.9742, PosFeats: 0.9736, UD-UAS: 0.9032, UD-LAS: 0.9099, EUD-UAS: 0.7683, EUD-LAS: 0.7688, Misc: 0.9956, SS: 0.9153, SC: 0.9239, Avg: 0.9036, batch_loss: 1.3185, loss: 1.3571 ||:  14%|#4        | 162/1147 [00:20<01:59,  8.26it/s]
+2024-07-01 01:47:57,318 - INFO - tqdm - NullAccuracy: 0.9952, NullF1: 0.7996, Lemma: 0.9744, PosFeats: 0.9729, UD-UAS: 0.9045, UD-LAS: 0.9113, EUD-UAS: 0.7693, EUD-LAS: 0.7701, Misc: 0.9958, SS: 0.9166, SC: 0.9234, Avg: 0.9043, batch_loss: 0.9773, loss: 1.3468 ||:  21%|##1       | 243/1147 [00:30<01:44,  8.66it/s]
+2024-07-01 01:48:07,365 - INFO - tqdm - NullAccuracy: 0.9952, NullF1: 0.7975, Lemma: 0.9743, PosFeats: 0.9729, UD-UAS: 0.9047, UD-LAS: 0.9114, EUD-UAS: 0.7695, EUD-LAS: 0.7704, Misc: 0.9959, SS: 0.9167, SC: 0.9227, Avg: 0.9043, batch_loss: 1.3665, loss: 1.3486 ||:  28%|##8       | 323/1147 [00:40<01:38,  8.34it/s]
+2024-07-01 01:48:17,467 - INFO - tqdm - NullAccuracy: 0.9951, NullF1: 0.7916, Lemma: 0.9747, PosFeats: 0.9731, UD-UAS: 0.9045, UD-LAS: 0.9108, EUD-UAS: 0.7707, EUD-LAS: 0.7712, Misc: 0.9961, SS: 0.9177, SC: 0.9232, Avg: 0.9047, batch_loss: 1.3151, loss: 1.3408 ||:  35%|###5      | 404/1147 [00:50<01:29,  8.32it/s]
+2024-07-01 01:48:27,474 - INFO - tqdm - NullAccuracy: 0.9952, NullF1: 0.7991, Lemma: 0.9749, PosFeats: 0.9731, UD-UAS: 0.9042, UD-LAS: 0.9105, EUD-UAS: 0.7699, EUD-LAS: 0.7704, Misc: 0.9961, SS: 0.9176, SC: 0.9227, Avg: 0.9044, batch_loss: 1.5321, loss: 1.3493 ||:  42%|####2     | 484/1147 [01:00<01:20,  8.20it/s]
+2024-07-01 01:48:37,484 - INFO - tqdm - NullAccuracy: 0.9952, NullF1: 0.8005, Lemma: 0.9750, PosFeats: 0.9733, UD-UAS: 0.9041, UD-LAS: 0.9103, EUD-UAS: 0.7710, EUD-LAS: 0.7716, Misc: 0.9960, SS: 0.9178, SC: 0.9226, Avg: 0.9046, batch_loss: 1.1813, loss: 1.3478 ||:  49%|####9     | 565/1147 [01:10<01:07,  8.59it/s]
+2024-07-01 01:48:47,581 - INFO - tqdm - NullAccuracy: 0.9953, NullF1: 0.8054, Lemma: 0.9749, PosFeats: 0.9731, UD-UAS: 0.9054, UD-LAS: 0.9115, EUD-UAS: 0.7731, EUD-LAS: 0.7737, Misc: 0.9960, SS: 0.9181, SC: 0.9226, Avg: 0.9054, batch_loss: 1.6662, loss: 1.3454 ||:  57%|#####6    | 649/1147 [01:20<01:02,  8.02it/s]
+2024-07-01 01:48:57,613 - INFO - tqdm - NullAccuracy: 0.9953, NullF1: 0.8063, Lemma: 0.9750, PosFeats: 0.9730, UD-UAS: 0.9058, UD-LAS: 0.9118, EUD-UAS: 0.7735, EUD-LAS: 0.7741, Misc: 0.9960, SS: 0.9180, SC: 0.9227, Avg: 0.9055, batch_loss: 1.4604, loss: 1.3426 ||:  64%|######3   | 731/1147 [01:30<00:49,  8.42it/s]
+2024-07-01 01:49:07,640 - INFO - tqdm - NullAccuracy: 0.9952, NullF1: 0.8042, Lemma: 0.9748, PosFeats: 0.9729, UD-UAS: 0.9059, UD-LAS: 0.9120, EUD-UAS: 0.7739, EUD-LAS: 0.7746, Misc: 0.9960, SS: 0.9178, SC: 0.9226, Avg: 0.9056, batch_loss: 1.5409, loss: 1.3471 ||:  71%|#######   | 813/1147 [01:40<00:43,  7.64it/s]
+2024-07-01 01:49:17,660 - INFO - tqdm - NullAccuracy: 0.9952, NullF1: 0.8013, Lemma: 0.9748, PosFeats: 0.9728, UD-UAS: 0.9061, UD-LAS: 0.9121, EUD-UAS: 0.7746, EUD-LAS: 0.7753, Misc: 0.9960, SS: 0.9179, SC: 0.9226, Avg: 0.9058, batch_loss: 1.5951, loss: 1.3472 ||:  78%|#######7  | 894/1147 [01:50<00:32,  7.75it/s]
+2024-07-01 01:49:27,751 - INFO - tqdm - NullAccuracy: 0.9951, NullF1: 0.8001, Lemma: 0.9750, PosFeats: 0.9730, UD-UAS: 0.9064, UD-LAS: 0.9125, EUD-UAS: 0.7755, EUD-LAS: 0.7764, Misc: 0.9960, SS: 0.9182, SC: 0.9229, Avg: 0.9062, batch_loss: 1.1645, loss: 1.3426 ||:  85%|########5 | 976/1147 [02:00<00:21,  8.14it/s]
+2024-07-01 01:49:37,778 - INFO - tqdm - NullAccuracy: 0.9951, NullF1: 0.7982, Lemma: 0.9751, PosFeats: 0.9731, UD-UAS: 0.9068, UD-LAS: 0.9129, EUD-UAS: 0.7765, EUD-LAS: 0.7775, Misc: 0.9960, SS: 0.9181, SC: 0.9231, Avg: 0.9066, batch_loss: 1.1753, loss: 1.3395 ||:  92%|#########2| 1059/1147 [02:10<00:11,  7.97it/s]
+2024-07-01 01:49:47,827 - INFO - tqdm - NullAccuracy: 0.9951, NullF1: 0.7974, Lemma: 0.9752, PosFeats: 0.9732, UD-UAS: 0.9069, UD-LAS: 0.9130, EUD-UAS: 0.7768, EUD-LAS: 0.7776, Misc: 0.9960, SS: 0.9182, SC: 0.9232, Avg: 0.9067, batch_loss: 1.3640, loss: 1.3385 ||: 100%|#########9| 1142/1147 [02:20<00:00,  8.76it/s]
+2024-07-01 01:49:47,950 - INFO - tqdm - NullAccuracy: 0.9951, NullF1: 0.7975, Lemma: 0.9752, PosFeats: 0.9732, UD-UAS: 0.9069, UD-LAS: 0.9130, EUD-UAS: 0.7768, EUD-LAS: 0.7776, Misc: 0.9960, SS: 0.9182, SC: 0.9232, Avg: 0.9067, batch_loss: 1.6903, loss: 1.3388 ||: 100%|#########9| 1143/1147 [02:20<00:00,  8.57it/s]
+2024-07-01 01:49:48,064 - INFO - tqdm - NullAccuracy: 0.9951, NullF1: 0.7971, Lemma: 0.9752, PosFeats: 0.9732, UD-UAS: 0.9069, UD-LAS: 0.9130, EUD-UAS: 0.7768, EUD-LAS: 0.7777, Misc: 0.9960, SS: 0.9182, SC: 0.9232, Avg: 0.9067, batch_loss: 1.7863, loss: 1.3392 ||: 100%|#########9| 1144/1147 [02:20<00:00,  8.63it/s]
+2024-07-01 01:49:48,174 - INFO - tqdm - NullAccuracy: 0.9951, NullF1: 0.7972, Lemma: 0.9752, PosFeats: 0.9732, UD-UAS: 0.9069, UD-LAS: 0.9130, EUD-UAS: 0.7769, EUD-LAS: 0.7777, Misc: 0.9960, SS: 0.9182, SC: 0.9232, Avg: 0.9067, batch_loss: 1.2380, loss: 1.3391 ||: 100%|#########9| 1145/1147 [02:21<00:00,  8.77it/s]
+2024-07-01 01:49:48,294 - INFO - tqdm - NullAccuracy: 0.9951, NullF1: 0.7974, Lemma: 0.9752, PosFeats: 0.9732, UD-UAS: 0.9069, UD-LAS: 0.9130, EUD-UAS: 0.7769, EUD-LAS: 0.7777, Misc: 0.9960, SS: 0.9182, SC: 0.9232, Avg: 0.9067, batch_loss: 1.4591, loss: 1.3392 ||: 100%|#########9| 1146/1147 [02:21<00:00,  8.62it/s]
+2024-07-01 01:49:48,389 - INFO - tqdm - NullAccuracy: 0.9951, NullF1: 0.7974, Lemma: 0.9752, PosFeats: 0.9732, UD-UAS: 0.9069, UD-LAS: 0.9130, EUD-UAS: 0.7769, EUD-LAS: 0.7778, Misc: 0.9960, SS: 0.9182, SC: 0.9232, Avg: 0.9067, batch_loss: 1.3516, loss: 1.3392 ||: 100%|##########| 1147/1147 [02:21<00:00,  8.12it/s]
+2024-07-01 01:49:48,389 - INFO - allennlp.training.gradient_descent_trainer - Validating
+2024-07-01 01:49:48,390 - INFO - tqdm - 0%|          | 0/287 [00:00<?, ?it/s]
+2024-07-01 01:49:58,496 - INFO - tqdm - NullAccuracy: 0.9953, NullF1: 0.8063, Lemma: 0.9797, PosFeats: 0.9739, UD-UAS: 0.9394, UD-LAS: 0.9465, EUD-UAS: 0.8638, EUD-LAS: 0.8692, Misc: 0.9965, SS: 0.9197, SC: 0.9294, Avg: 0.9353, batch_loss: 1.0636, loss: 1.3207 ||:  51%|#####     | 146/287 [00:10<00:09, 15.29it/s]
+2024-07-01 01:50:07,367 - INFO - tqdm - NullAccuracy: 0.9955, NullF1: 0.8132, Lemma: 0.9818, PosFeats: 0.9754, UD-UAS: 0.9427, UD-LAS: 0.9498, EUD-UAS: 0.8682, EUD-LAS: 0.8738, Misc: 0.9966, SS: 0.9209, SC: 0.9310, Avg: 0.9378, batch_loss: 0.6879, loss: 1.2587 ||: 100%|#########9| 286/287 [00:18<00:00, 16.04it/s]
+2024-07-01 01:50:07,414 - INFO - tqdm - NullAccuracy: 0.9955, NullF1: 0.8130, Lemma: 0.9818, PosFeats: 0.9754, UD-UAS: 0.9428, UD-LAS: 0.9500, EUD-UAS: 0.8684, EUD-LAS: 0.8740, Misc: 0.9967, SS: 0.9210, SC: 0.9309, Avg: 0.9379, batch_loss: 1.0697, loss: 1.2580 ||: 100%|##########| 287/287 [00:19<00:00, 15.09it/s]
+2024-07-01 01:50:07,415 - INFO - allennlp.training.learning_rate_schedulers.slanted_triangular - Gradual unfreezing finished. Training all layers.
+2024-07-01 01:50:07,417 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
+2024-07-01 01:50:07,417 - INFO - allennlp.training.callbacks.console_logger - Avg                |     0.907  |     0.938
+2024-07-01 01:50:07,417 - INFO - allennlp.training.callbacks.console_logger - EUD-LAS            |     0.778  |     0.874
+2024-07-01 01:50:07,417 - INFO - allennlp.training.callbacks.console_logger - EUD-UAS            |     0.777  |     0.868
+2024-07-01 01:50:07,417 - INFO - allennlp.training.callbacks.console_logger - Lemma              |     0.975  |     0.982
+2024-07-01 01:50:07,418 - INFO - allennlp.training.callbacks.console_logger - Misc               |     0.996  |     0.997
+2024-07-01 01:50:07,418 - INFO - allennlp.training.callbacks.console_logger - NullAccuracy       |     0.995  |     0.995
+2024-07-01 01:50:07,418 - INFO - allennlp.training.callbacks.console_logger - NullF1             |     0.797  |     0.813
+2024-07-01 01:50:07,418 - INFO - allennlp.training.callbacks.console_logger - PosFeats           |     0.973  |     0.975
+2024-07-01 01:50:07,418 - INFO - allennlp.training.callbacks.console_logger - SC                 |     0.923  |     0.931
+2024-07-01 01:50:07,418 - INFO - allennlp.training.callbacks.console_logger - SS                 |     0.918  |     0.921
+2024-07-01 01:50:07,418 - INFO - allennlp.training.callbacks.console_logger - UD-LAS             |     0.913  |     0.950
+2024-07-01 01:50:07,418 - INFO - allennlp.training.callbacks.console_logger - UD-UAS             |     0.907  |     0.943
+2024-07-01 01:50:07,418 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  9519.091  |       N/A
+2024-07-01 01:50:07,418 - INFO - allennlp.training.callbacks.console_logger - loss               |     1.339  |     1.258
+2024-07-01 01:50:07,419 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  5027.195  |       N/A
+2024-07-01 01:50:12,383 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:02:45.299267
+2024-07-01 01:50:12,384 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:07:39
+2024-07-01 01:50:12,384 - INFO - allennlp.training.gradient_descent_trainer - Epoch 7/9
+2024-07-01 01:50:12,384 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 4.9G
+2024-07-01 01:50:12,384 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 9.3G
+2024-07-01 01:50:12,385 - INFO - allennlp.training.gradient_descent_trainer - Training
+2024-07-01 01:50:12,385 - INFO - tqdm - 0%|          | 0/1147 [00:00<?, ?it/s]
+2024-07-01 01:50:22,418 - INFO - tqdm - NullAccuracy: 0.9948, NullF1: 0.7860, Lemma: 0.9780, PosFeats: 0.9756, UD-UAS: 0.9139, UD-LAS: 0.9203, EUD-UAS: 0.7835, EUD-LAS: 0.7858, Misc: 0.9968, SS: 0.9258, SC: 0.9321, Avg: 0.9124, batch_loss: 1.1737, loss: 1.2002 ||:   7%|7         | 83/1147 [00:10<02:11,  8.12it/s]
+2024-07-01 01:50:32,546 - INFO - tqdm - NullAccuracy: 0.9947, NullF1: 0.7954, Lemma: 0.9785, PosFeats: 0.9766, UD-UAS: 0.9119, UD-LAS: 0.9177, EUD-UAS: 0.7834, EUD-LAS: 0.7860, Misc: 0.9965, SS: 0.9252, SC: 0.9314, Avg: 0.9119, batch_loss: 1.4575, loss: 1.2118 ||:  14%|#4        | 165/1147 [00:20<02:07,  7.70it/s]
+2024-07-01 01:50:42,586 - INFO - tqdm - NullAccuracy: 0.9951, NullF1: 0.8076, Lemma: 0.9784, PosFeats: 0.9769, UD-UAS: 0.9139, UD-LAS: 0.9195, EUD-UAS: 0.7859, EUD-LAS: 0.7874, Misc: 0.9966, SS: 0.9250, SC: 0.9318, Avg: 0.9128, batch_loss: 1.2399, loss: 1.2069 ||:  21%|##1       | 246/1147 [00:30<01:47,  8.39it/s]
+2024-07-01 01:50:52,700 - INFO - tqdm - NullAccuracy: 0.9952, NullF1: 0.8077, Lemma: 0.9783, PosFeats: 0.9765, UD-UAS: 0.9121, UD-LAS: 0.9182, EUD-UAS: 0.7846, EUD-LAS: 0.7863, Misc: 0.9965, SS: 0.9237, SC: 0.9307, Avg: 0.9119, batch_loss: 1.3964, loss: 1.2281 ||:  29%|##8       | 327/1147 [00:40<01:47,  7.64it/s]
+2024-07-01 01:51:02,775 - INFO - tqdm - NullAccuracy: 0.9953, NullF1: 0.8090, Lemma: 0.9779, PosFeats: 0.9764, UD-UAS: 0.9119, UD-LAS: 0.9180, EUD-UAS: 0.7850, EUD-LAS: 0.7864, Misc: 0.9964, SS: 0.9235, SC: 0.9303, Avg: 0.9118, batch_loss: 1.1632, loss: 1.2299 ||:  36%|###5      | 409/1147 [00:50<01:24,  8.71it/s]
+2024-07-01 01:51:12,838 - INFO - tqdm - NullAccuracy: 0.9954, NullF1: 0.8089, Lemma: 0.9777, PosFeats: 0.9764, UD-UAS: 0.9115, UD-LAS: 0.9176, EUD-UAS: 0.7853, EUD-LAS: 0.7870, Misc: 0.9965, SS: 0.9234, SC: 0.9296, Avg: 0.9117, batch_loss: 1.1154, loss: 1.2298 ||:  43%|####2     | 491/1147 [01:00<01:20,  8.18it/s]
+2024-07-01 01:51:22,938 - INFO - tqdm - NullAccuracy: 0.9954, NullF1: 0.8134, Lemma: 0.9777, PosFeats: 0.9764, UD-UAS: 0.9124, UD-LAS: 0.9183, EUD-UAS: 0.7877, EUD-LAS: 0.7888, Misc: 0.9965, SS: 0.9243, SC: 0.9303, Avg: 0.9125, batch_loss: 0.9493, loss: 1.2221 ||:  50%|#####     | 575/1147 [01:10<01:08,  8.33it/s]
+2024-07-01 01:51:33,040 - INFO - tqdm - NullAccuracy: 0.9954, NullF1: 0.8129, Lemma: 0.9778, PosFeats: 0.9764, UD-UAS: 0.9129, UD-LAS: 0.9185, EUD-UAS: 0.7888, EUD-LAS: 0.7897, Misc: 0.9965, SS: 0.9241, SC: 0.9305, Avg: 0.9128, batch_loss: 0.9905, loss: 1.2160 ||:  57%|#####7    | 658/1147 [01:20<00:57,  8.45it/s]
+2024-07-01 01:51:44,017 - INFO - tqdm - NullAccuracy: 0.9954, NullF1: 0.8132, Lemma: 0.9779, PosFeats: 0.9763, UD-UAS: 0.9126, UD-LAS: 0.9182, EUD-UAS: 0.7888, EUD-LAS: 0.7899, Misc: 0.9965, SS: 0.9242, SC: 0.9306, Avg: 0.9128, batch_loss: 1.1512, loss: 1.2141 ||:  64%|######3   | 734/1147 [01:31<04:06,  1.67it/s]
+2024-07-01 01:51:54,125 - INFO - tqdm - NullAccuracy: 0.9954, NullF1: 0.8099, Lemma: 0.9782, PosFeats: 0.9763, UD-UAS: 0.9126, UD-LAS: 0.9184, EUD-UAS: 0.7890, EUD-LAS: 0.7901, Misc: 0.9964, SS: 0.9242, SC: 0.9306, Avg: 0.9129, batch_loss: 1.2792, loss: 1.2144 ||:  71%|#######1  | 817/1147 [01:41<00:40,  8.24it/s]
+2024-07-01 01:52:04,180 - INFO - tqdm - NullAccuracy: 0.9955, NullF1: 0.8120, Lemma: 0.9782, PosFeats: 0.9762, UD-UAS: 0.9130, UD-LAS: 0.9187, EUD-UAS: 0.7899, EUD-LAS: 0.7908, Misc: 0.9965, SS: 0.9242, SC: 0.9307, Avg: 0.9131, batch_loss: 1.2256, loss: 1.2115 ||:  78%|#######8  | 899/1147 [01:51<00:30,  8.10it/s]
+2024-07-01 01:52:14,255 - INFO - tqdm - NullAccuracy: 0.9955, NullF1: 0.8136, Lemma: 0.9781, PosFeats: 0.9761, UD-UAS: 0.9135, UD-LAS: 0.9192, EUD-UAS: 0.7914, EUD-LAS: 0.7924, Misc: 0.9965, SS: 0.9241, SC: 0.9305, Avg: 0.9135, batch_loss: 1.1569, loss: 1.2111 ||:  86%|########5 | 983/1147 [02:01<00:19,  8.38it/s]
+2024-07-01 01:52:24,331 - INFO - tqdm - NullAccuracy: 0.9954, NullF1: 0.8121, Lemma: 0.9782, PosFeats: 0.9761, UD-UAS: 0.9132, UD-LAS: 0.9190, EUD-UAS: 0.7912, EUD-LAS: 0.7923, Misc: 0.9965, SS: 0.9243, SC: 0.9305, Avg: 0.9135, batch_loss: 1.0056, loss: 1.2091 ||:  93%|#########2| 1065/1147 [02:11<00:10,  8.08it/s]
+2024-07-01 01:52:33,756 - INFO - tqdm - NullAccuracy: 0.9954, NullF1: 0.8108, Lemma: 0.9783, PosFeats: 0.9761, UD-UAS: 0.9134, UD-LAS: 0.9193, EUD-UAS: 0.7920, EUD-LAS: 0.7930, Misc: 0.9965, SS: 0.9243, SC: 0.9304, Avg: 0.9137, batch_loss: 0.9569, loss: 1.2094 ||: 100%|#########9| 1142/1147 [02:21<00:00,  7.91it/s]
+2024-07-01 01:52:33,875 - INFO - tqdm - NullAccuracy: 0.9954, NullF1: 0.8106, Lemma: 0.9783, PosFeats: 0.9761, UD-UAS: 0.9134, UD-LAS: 0.9193, EUD-UAS: 0.7919, EUD-LAS: 0.7930, Misc: 0.9965, SS: 0.9243, SC: 0.9304, Avg: 0.9137, batch_loss: 1.2298, loss: 1.2094 ||: 100%|#########9| 1143/1147 [02:21<00:00,  8.05it/s]
+2024-07-01 01:52:34,021 - INFO - tqdm - NullAccuracy: 0.9954, NullF1: 0.8104, Lemma: 0.9783, PosFeats: 0.9761, UD-UAS: 0.9134, UD-LAS: 0.9193, EUD-UAS: 0.7919, EUD-LAS: 0.7930, Misc: 0.9965, SS: 0.9243, SC: 0.9304, Avg: 0.9137, batch_loss: 1.3342, loss: 1.2095 ||: 100%|#########9| 1144/1147 [02:21<00:00,  7.65it/s]
+2024-07-01 01:52:34,150 - INFO - tqdm - NullAccuracy: 0.9954, NullF1: 0.8104, Lemma: 0.9783, PosFeats: 0.9761, UD-UAS: 0.9134, UD-LAS: 0.9193, EUD-UAS: 0.7920, EUD-LAS: 0.7930, Misc: 0.9965, SS: 0.9243, SC: 0.9304, Avg: 0.9137, batch_loss: 1.0231, loss: 1.2093 ||: 100%|#########9| 1145/1147 [02:21<00:00,  7.68it/s]
+2024-07-01 01:52:34,273 - INFO - tqdm - NullAccuracy: 0.9954, NullF1: 0.8105, Lemma: 0.9783, PosFeats: 0.9761, UD-UAS: 0.9134, UD-LAS: 0.9193, EUD-UAS: 0.7920, EUD-LAS: 0.7931, Misc: 0.9965, SS: 0.9243, SC: 0.9304, Avg: 0.9137, batch_loss: 1.2454, loss: 1.2094 ||: 100%|#########9| 1146/1147 [02:21<00:00,  7.80it/s]
+2024-07-01 01:52:34,357 - INFO - tqdm - NullAccuracy: 0.9954, NullF1: 0.8106, Lemma: 0.9783, PosFeats: 0.9761, UD-UAS: 0.9135, UD-LAS: 0.9194, EUD-UAS: 0.7921, EUD-LAS: 0.7932, Misc: 0.9965, SS: 0.9243, SC: 0.9304, Avg: 0.9138, batch_loss: 0.7547, loss: 1.2090 ||: 100%|##########| 1147/1147 [02:21<00:00,  8.08it/s]
+2024-07-01 01:52:34,358 - INFO - allennlp.training.gradient_descent_trainer - Validating
+2024-07-01 01:52:34,359 - INFO - tqdm - 0%|          | 0/287 [00:00<?, ?it/s]
+2024-07-01 01:52:44,361 - INFO - tqdm - NullAccuracy: 0.9954, NullF1: 0.8181, Lemma: 0.9823, PosFeats: 0.9754, UD-UAS: 0.9431, UD-LAS: 0.9508, EUD-UAS: 0.8492, EUD-LAS: 0.8577, Misc: 0.9964, SS: 0.9240, SC: 0.9342, Avg: 0.9348, batch_loss: 0.8738, loss: 1.2700 ||:  52%|#####1    | 148/287 [00:10<00:08, 15.89it/s]
+2024-07-01 01:52:52,995 - INFO - tqdm - NullAccuracy: 0.9957, NullF1: 0.8297, Lemma: 0.9837, PosFeats: 0.9768, UD-UAS: 0.9456, UD-LAS: 0.9534, EUD-UAS: 0.8533, EUD-LAS: 0.8621, Misc: 0.9968, SS: 0.9249, SC: 0.9358, Avg: 0.9369, batch_loss: 0.6727, loss: 1.2156 ||: 100%|#########9| 286/287 [00:18<00:00, 16.37it/s]
+2024-07-01 01:52:53,041 - INFO - tqdm - NullAccuracy: 0.9957, NullF1: 0.8295, Lemma: 0.9838, PosFeats: 0.9768, UD-UAS: 0.9457, UD-LAS: 0.9535, EUD-UAS: 0.8535, EUD-LAS: 0.8623, Misc: 0.9968, SS: 0.9250, SC: 0.9358, Avg: 0.9370, batch_loss: 0.9692, loss: 1.2147 ||: 100%|##########| 287/287 [00:18<00:00, 15.36it/s]
+2024-07-01 01:52:53,042 - INFO - allennlp.training.learning_rate_schedulers.slanted_triangular - Gradual unfreezing finished. Training all layers.
+2024-07-01 01:52:53,044 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
+2024-07-01 01:52:53,044 - INFO - allennlp.training.callbacks.console_logger - Avg                |     0.914  |     0.937
+2024-07-01 01:52:53,045 - INFO - allennlp.training.callbacks.console_logger - EUD-LAS            |     0.793  |     0.862
+2024-07-01 01:52:53,045 - INFO - allennlp.training.callbacks.console_logger - EUD-UAS            |     0.792  |     0.853
+2024-07-01 01:52:53,045 - INFO - allennlp.training.callbacks.console_logger - Lemma              |     0.978  |     0.984
+2024-07-01 01:52:53,045 - INFO - allennlp.training.callbacks.console_logger - Misc               |     0.997  |     0.997
+2024-07-01 01:52:53,045 - INFO - allennlp.training.callbacks.console_logger - NullAccuracy       |     0.995  |     0.996
+2024-07-01 01:52:53,045 - INFO - allennlp.training.callbacks.console_logger - NullF1             |     0.811  |     0.830
+2024-07-01 01:52:53,045 - INFO - allennlp.training.callbacks.console_logger - PosFeats           |     0.976  |     0.977
+2024-07-01 01:52:53,045 - INFO - allennlp.training.callbacks.console_logger - SC                 |     0.930  |     0.936
+2024-07-01 01:52:53,045 - INFO - allennlp.training.callbacks.console_logger - SS                 |     0.924  |     0.925
+2024-07-01 01:52:53,045 - INFO - allennlp.training.callbacks.console_logger - UD-LAS             |     0.919  |     0.954
+2024-07-01 01:52:53,045 - INFO - allennlp.training.callbacks.console_logger - UD-UAS             |     0.913  |     0.946
+2024-07-01 01:52:53,045 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  9518.764  |       N/A
+2024-07-01 01:52:53,045 - INFO - allennlp.training.callbacks.console_logger - loss               |     1.209  |     1.215
+2024-07-01 01:52:53,045 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  5027.195  |       N/A
+2024-07-01 01:52:58,005 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:02:45.621464
+2024-07-01 01:52:58,006 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:05:09
+2024-07-01 01:52:58,006 - INFO - allennlp.training.gradient_descent_trainer - Epoch 8/9
+2024-07-01 01:52:58,006 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 4.9G
+2024-07-01 01:52:58,006 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 9.3G
+2024-07-01 01:52:58,007 - INFO - allennlp.training.gradient_descent_trainer - Training
+2024-07-01 01:52:58,007 - INFO - tqdm - 0%|          | 0/1147 [00:00<?, ?it/s]
+2024-07-01 01:53:08,085 - INFO - tqdm - NullAccuracy: 0.9960, NullF1: 0.8246, Lemma: 0.9812, PosFeats: 0.9801, UD-UAS: 0.9192, UD-LAS: 0.9257, EUD-UAS: 0.8011, EUD-LAS: 0.8031, Misc: 0.9966, SS: 0.9267, SC: 0.9347, Avg: 0.9187, batch_loss: 1.0071, loss: 1.0958 ||:   7%|7         | 83/1147 [00:10<02:04,  8.55it/s]
+2024-07-01 01:53:18,161 - INFO - tqdm - NullAccuracy: 0.9958, NullF1: 0.8252, Lemma: 0.9820, PosFeats: 0.9793, UD-UAS: 0.9192, UD-LAS: 0.9253, EUD-UAS: 0.8051, EUD-LAS: 0.8077, Misc: 0.9971, SS: 0.9286, SC: 0.9360, Avg: 0.9200, batch_loss: 1.2117, loss: 1.0720 ||:  14%|#4        | 166/1147 [00:20<01:54,  8.55it/s]
+2024-07-01 01:53:28,246 - INFO - tqdm - NullAccuracy: 0.9959, NullF1: 0.8299, Lemma: 0.9820, PosFeats: 0.9791, UD-UAS: 0.9189, UD-LAS: 0.9248, EUD-UAS: 0.8060, EUD-LAS: 0.8085, Misc: 0.9969, SS: 0.9288, SC: 0.9358, Avg: 0.9201, batch_loss: 1.1026, loss: 1.0803 ||:  22%|##1       | 248/1147 [00:30<01:51,  8.05it/s]
+2024-07-01 01:53:38,363 - INFO - tqdm - NullAccuracy: 0.9960, NullF1: 0.8328, Lemma: 0.9821, PosFeats: 0.9785, UD-UAS: 0.9193, UD-LAS: 0.9255, EUD-UAS: 0.8071, EUD-LAS: 0.8098, Misc: 0.9970, SS: 0.9296, SC: 0.9361, Avg: 0.9205, batch_loss: 0.7792, loss: 1.0743 ||:  29%|##8       | 331/1147 [00:40<01:40,  8.11it/s]
+2024-07-01 01:53:48,382 - INFO - tqdm - NullAccuracy: 0.9958, NullF1: 0.8263, Lemma: 0.9821, PosFeats: 0.9785, UD-UAS: 0.9196, UD-LAS: 0.9255, EUD-UAS: 0.8076, EUD-LAS: 0.8100, Misc: 0.9970, SS: 0.9291, SC: 0.9361, Avg: 0.9206, batch_loss: 1.2527, loss: 1.0759 ||:  36%|###6      | 413/1147 [00:50<01:28,  8.30it/s]
+2024-07-01 01:53:58,447 - INFO - tqdm - NullAccuracy: 0.9958, NullF1: 0.8244, Lemma: 0.9818, PosFeats: 0.9785, UD-UAS: 0.9192, UD-LAS: 0.9252, EUD-UAS: 0.8068, EUD-LAS: 0.8091, Misc: 0.9970, SS: 0.9292, SC: 0.9361, Avg: 0.9203, batch_loss: 0.8224, loss: 1.0842 ||:  43%|####3     | 494/1147 [01:00<01:18,  8.31it/s]
+2024-07-01 01:54:08,538 - INFO - tqdm - NullAccuracy: 0.9957, NullF1: 0.8202, Lemma: 0.9818, PosFeats: 0.9785, UD-UAS: 0.9196, UD-LAS: 0.9256, EUD-UAS: 0.8072, EUD-LAS: 0.8096, Misc: 0.9969, SS: 0.9290, SC: 0.9359, Avg: 0.9205, batch_loss: 1.0715, loss: 1.0869 ||:  50%|#####     | 577/1147 [01:10<01:09,  8.17it/s]
+2024-07-01 01:54:18,640 - INFO - tqdm - NullAccuracy: 0.9958, NullF1: 0.8238, Lemma: 0.9817, PosFeats: 0.9785, UD-UAS: 0.9195, UD-LAS: 0.9254, EUD-UAS: 0.8068, EUD-LAS: 0.8094, Misc: 0.9970, SS: 0.9290, SC: 0.9357, Avg: 0.9203, batch_loss: 1.1458, loss: 1.0890 ||:  58%|#####7    | 660/1147 [01:20<00:58,  8.30it/s]
+2024-07-01 01:54:28,642 - INFO - tqdm - NullAccuracy: 0.9957, NullF1: 0.8200, Lemma: 0.9817, PosFeats: 0.9786, UD-UAS: 0.9195, UD-LAS: 0.9255, EUD-UAS: 0.8070, EUD-LAS: 0.8097, Misc: 0.9970, SS: 0.9294, SC: 0.9357, Avg: 0.9204, batch_loss: 0.9995, loss: 1.0892 ||:  65%|######4   | 741/1147 [01:30<00:48,  8.36it/s]
+2024-07-01 01:54:38,763 - INFO - tqdm - NullAccuracy: 0.9957, NullF1: 0.8212, Lemma: 0.9815, PosFeats: 0.9787, UD-UAS: 0.9203, UD-LAS: 0.9263, EUD-UAS: 0.8079, EUD-LAS: 0.8104, Misc: 0.9969, SS: 0.9296, SC: 0.9357, Avg: 0.9208, batch_loss: 1.1294, loss: 1.0853 ||:  72%|#######1  | 824/1147 [01:40<00:40,  7.94it/s]
+2024-07-01 01:54:48,860 - INFO - tqdm - NullAccuracy: 0.9957, NullF1: 0.8208, Lemma: 0.9816, PosFeats: 0.9788, UD-UAS: 0.9206, UD-LAS: 0.9266, EUD-UAS: 0.8081, EUD-LAS: 0.8106, Misc: 0.9969, SS: 0.9296, SC: 0.9361, Avg: 0.9210, batch_loss: 1.3031, loss: 1.0824 ||:  79%|#######9  | 907/1147 [01:50<00:31,  7.53it/s]
+2024-07-01 01:54:58,924 - INFO - tqdm - NullAccuracy: 0.9956, NullF1: 0.8181, Lemma: 0.9815, PosFeats: 0.9788, UD-UAS: 0.9206, UD-LAS: 0.9265, EUD-UAS: 0.8081, EUD-LAS: 0.8105, Misc: 0.9970, SS: 0.9294, SC: 0.9361, Avg: 0.9209, batch_loss: 0.8064, loss: 1.0827 ||:  86%|########6 | 988/1147 [02:00<00:19,  8.32it/s]
+2024-07-01 01:55:08,935 - INFO - tqdm - NullAccuracy: 0.9956, NullF1: 0.8180, Lemma: 0.9816, PosFeats: 0.9788, UD-UAS: 0.9210, UD-LAS: 0.9268, EUD-UAS: 0.8089, EUD-LAS: 0.8112, Misc: 0.9970, SS: 0.9296, SC: 0.9366, Avg: 0.9213, batch_loss: 0.9148, loss: 1.0797 ||:  93%|#########3| 1072/1147 [02:10<00:08,  9.05it/s]
+2024-07-01 01:55:17,420 - INFO - tqdm - NullAccuracy: 0.9955, NullF1: 0.8166, Lemma: 0.9816, PosFeats: 0.9789, UD-UAS: 0.9213, UD-LAS: 0.9272, EUD-UAS: 0.8093, EUD-LAS: 0.8117, Misc: 0.9971, SS: 0.9295, SC: 0.9366, Avg: 0.9215, batch_loss: 1.0831, loss: 1.0780 ||: 100%|#########9| 1142/1147 [02:19<00:00,  8.38it/s]
+2024-07-01 01:55:17,548 - INFO - tqdm - NullAccuracy: 0.9955, NullF1: 0.8166, Lemma: 0.9816, PosFeats: 0.9789, UD-UAS: 0.9213, UD-LAS: 0.9272, EUD-UAS: 0.8093, EUD-LAS: 0.8117, Misc: 0.9971, SS: 0.9295, SC: 0.9366, Avg: 0.9215, batch_loss: 1.2866, loss: 1.0782 ||: 100%|#########9| 1143/1147 [02:19<00:00,  8.21it/s]
+2024-07-01 01:55:17,666 - INFO - tqdm - NullAccuracy: 0.9955, NullF1: 0.8168, Lemma: 0.9816, PosFeats: 0.9789, UD-UAS: 0.9213, UD-LAS: 0.9272, EUD-UAS: 0.8093, EUD-LAS: 0.8117, Misc: 0.9971, SS: 0.9295, SC: 0.9366, Avg: 0.9215, batch_loss: 1.0052, loss: 1.0781 ||: 100%|#########9| 1144/1147 [02:19<00:00,  8.28it/s]
+2024-07-01 01:55:17,814 - INFO - tqdm - NullAccuracy: 0.9955, NullF1: 0.8168, Lemma: 0.9816, PosFeats: 0.9789, UD-UAS: 0.9212, UD-LAS: 0.9272, EUD-UAS: 0.8093, EUD-LAS: 0.8117, Misc: 0.9970, SS: 0.9295, SC: 0.9366, Avg: 0.9215, batch_loss: 1.2340, loss: 1.0783 ||: 100%|#########9| 1145/1147 [02:19<00:00,  7.76it/s]
+2024-07-01 01:55:17,921 - INFO - tqdm - NullAccuracy: 0.9955, NullF1: 0.8169, Lemma: 0.9816, PosFeats: 0.9789, UD-UAS: 0.9213, UD-LAS: 0.9272, EUD-UAS: 0.8093, EUD-LAS: 0.8117, Misc: 0.9970, SS: 0.9296, SC: 0.9366, Avg: 0.9215, batch_loss: 0.8934, loss: 1.0781 ||: 100%|#########9| 1146/1147 [02:19<00:00,  8.16it/s]
+2024-07-01 01:55:18,012 - INFO - tqdm - NullAccuracy: 0.9955, NullF1: 0.8169, Lemma: 0.9816, PosFeats: 0.9789, UD-UAS: 0.9213, UD-LAS: 0.9272, EUD-UAS: 0.8093, EUD-LAS: 0.8117, Misc: 0.9970, SS: 0.9296, SC: 0.9366, Avg: 0.9215, batch_loss: 1.0365, loss: 1.0781 ||: 100%|##########| 1147/1147 [02:20<00:00,  8.19it/s]
+2024-07-01 01:55:18,013 - INFO - allennlp.training.gradient_descent_trainer - Validating
+2024-07-01 01:55:18,014 - INFO - tqdm - 0%|          | 0/287 [00:00<?, ?it/s]
+2024-07-01 01:55:28,123 - INFO - tqdm - NullAccuracy: 0.9955, NullF1: 0.8220, Lemma: 0.9836, PosFeats: 0.9761, UD-UAS: 0.9473, UD-LAS: 0.9546, EUD-UAS: 0.8712, EUD-LAS: 0.8771, Misc: 0.9965, SS: 0.9270, SC: 0.9355, Avg: 0.9410, batch_loss: 0.8355, loss: 1.2235 ||:  53%|#####2    | 152/287 [00:10<00:08, 16.82it/s]
+2024-07-01 01:55:36,484 - INFO - tqdm - NullAccuracy: 0.9957, NullF1: 0.8323, Lemma: 0.9848, PosFeats: 0.9779, UD-UAS: 0.9488, UD-LAS: 0.9562, EUD-UAS: 0.8749, EUD-LAS: 0.8807, Misc: 0.9968, SS: 0.9278, SC: 0.9368, Avg: 0.9427, batch_loss: 0.6462, loss: 1.1740 ||: 100%|#########9| 286/287 [00:18<00:00, 16.44it/s]
+2024-07-01 01:55:36,530 - INFO - tqdm - NullAccuracy: 0.9957, NullF1: 0.8321, Lemma: 0.9848, PosFeats: 0.9779, UD-UAS: 0.9489, UD-LAS: 0.9563, EUD-UAS: 0.8750, EUD-LAS: 0.8809, Misc: 0.9968, SS: 0.9278, SC: 0.9368, Avg: 0.9428, batch_loss: 0.9245, loss: 1.1732 ||: 100%|##########| 287/287 [00:18<00:00, 15.50it/s]
+2024-07-01 01:55:36,531 - INFO - allennlp.training.learning_rate_schedulers.slanted_triangular - Gradual unfreezing finished. Training all layers.
+2024-07-01 01:55:36,533 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
+2024-07-01 01:55:36,533 - INFO - allennlp.training.callbacks.console_logger - Avg                |     0.921  |     0.943
+2024-07-01 01:55:36,533 - INFO - allennlp.training.callbacks.console_logger - EUD-LAS            |     0.812  |     0.881
+2024-07-01 01:55:36,533 - INFO - allennlp.training.callbacks.console_logger - EUD-UAS            |     0.809  |     0.875
+2024-07-01 01:55:36,533 - INFO - allennlp.training.callbacks.console_logger - Lemma              |     0.982  |     0.985
+2024-07-01 01:55:36,533 - INFO - allennlp.training.callbacks.console_logger - Misc               |     0.997  |     0.997
+2024-07-01 01:55:36,534 - INFO - allennlp.training.callbacks.console_logger - NullAccuracy       |     0.996  |     0.996
+2024-07-01 01:55:36,534 - INFO - allennlp.training.callbacks.console_logger - NullF1             |     0.817  |     0.832
+2024-07-01 01:55:36,534 - INFO - allennlp.training.callbacks.console_logger - PosFeats           |     0.979  |     0.978
+2024-07-01 01:55:36,534 - INFO - allennlp.training.callbacks.console_logger - SC                 |     0.937  |     0.937
+2024-07-01 01:55:36,534 - INFO - allennlp.training.callbacks.console_logger - SS                 |     0.930  |     0.928
+2024-07-01 01:55:36,534 - INFO - allennlp.training.callbacks.console_logger - UD-LAS             |     0.927  |     0.956
+2024-07-01 01:55:36,534 - INFO - allennlp.training.callbacks.console_logger - UD-UAS             |     0.921  |     0.949
+2024-07-01 01:55:36,534 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  9519.138  |       N/A
+2024-07-01 01:55:36,534 - INFO - allennlp.training.callbacks.console_logger - loss               |     1.078  |     1.173
+2024-07-01 01:55:36,534 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  5027.195  |       N/A
+2024-07-01 01:55:41,511 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:02:43.505069
+2024-07-01 01:55:41,511 - INFO - allennlp.training.gradient_descent_trainer - Estimated training time remaining: 0:02:35
+2024-07-01 01:55:41,511 - INFO - allennlp.training.gradient_descent_trainer - Epoch 9/9
+2024-07-01 01:55:41,511 - INFO - allennlp.training.gradient_descent_trainer - Worker 0 memory usage: 4.9G
+2024-07-01 01:55:41,511 - INFO - allennlp.training.gradient_descent_trainer - GPU 0 memory usage: 9.3G
+2024-07-01 01:55:41,513 - INFO - allennlp.training.gradient_descent_trainer - Training
+2024-07-01 01:55:41,513 - INFO - tqdm - 0%|          | 0/1147 [00:00<?, ?it/s]
+2024-07-01 01:55:51,592 - INFO - tqdm - NullAccuracy: 0.9957, NullF1: 0.8139, Lemma: 0.9829, PosFeats: 0.9797, UD-UAS: 0.9190, UD-LAS: 0.9262, EUD-UAS: 0.8078, EUD-LAS: 0.8105, Misc: 0.9973, SS: 0.9351, SC: 0.9420, Avg: 0.9223, batch_loss: 1.2000, loss: 1.0149 ||:   7%|7         | 82/1147 [00:10<02:19,  7.66it/s]
+2024-07-01 01:56:01,682 - INFO - tqdm - NullAccuracy: 0.9951, NullF1: 0.8063, Lemma: 0.9821, PosFeats: 0.9798, UD-UAS: 0.9195, UD-LAS: 0.9262, EUD-UAS: 0.8073, EUD-LAS: 0.8101, Misc: 0.9974, SS: 0.9347, SC: 0.9432, Avg: 0.9223, batch_loss: 1.0858, loss: 1.0165 ||:  14%|#4        | 164/1147 [00:20<02:06,  7.80it/s]
+2024-07-01 01:56:11,714 - INFO - tqdm - NullAccuracy: 0.9953, NullF1: 0.8088, Lemma: 0.9827, PosFeats: 0.9803, UD-UAS: 0.9206, UD-LAS: 0.9275, EUD-UAS: 0.8097, EUD-LAS: 0.8117, Misc: 0.9973, SS: 0.9349, SC: 0.9434, Avg: 0.9231, batch_loss: 1.1080, loss: 1.0044 ||:  21%|##1       | 246/1147 [00:30<01:50,  8.16it/s]
+2024-07-01 01:56:21,734 - INFO - tqdm - NullAccuracy: 0.9955, NullF1: 0.8127, Lemma: 0.9828, PosFeats: 0.9804, UD-UAS: 0.9214, UD-LAS: 0.9283, EUD-UAS: 0.8111, EUD-LAS: 0.8133, Misc: 0.9973, SS: 0.9345, SC: 0.9427, Avg: 0.9235, batch_loss: 1.0768, loss: 1.0045 ||:  29%|##8       | 328/1147 [00:40<01:41,  8.10it/s]
+2024-07-01 01:56:31,750 - INFO - tqdm - NullAccuracy: 0.9955, NullF1: 0.8173, Lemma: 0.9831, PosFeats: 0.9802, UD-UAS: 0.9228, UD-LAS: 0.9297, EUD-UAS: 0.8125, EUD-LAS: 0.8147, Misc: 0.9973, SS: 0.9340, SC: 0.9427, Avg: 0.9241, batch_loss: 1.3388, loss: 1.0012 ||:  36%|###5      | 410/1147 [00:50<01:26,  8.55it/s]
+2024-07-01 01:56:41,767 - INFO - tqdm - NullAccuracy: 0.9954, NullF1: 0.8159, Lemma: 0.9830, PosFeats: 0.9803, UD-UAS: 0.9233, UD-LAS: 0.9300, EUD-UAS: 0.8134, EUD-LAS: 0.8155, Misc: 0.9973, SS: 0.9337, SC: 0.9424, Avg: 0.9243, batch_loss: 0.9676, loss: 1.0000 ||:  43%|####2     | 493/1147 [01:00<01:20,  8.07it/s]
+2024-07-01 01:56:51,837 - INFO - tqdm - NullAccuracy: 0.9955, NullF1: 0.8226, Lemma: 0.9832, PosFeats: 0.9805, UD-UAS: 0.9237, UD-LAS: 0.9304, EUD-UAS: 0.8138, EUD-LAS: 0.8159, Misc: 0.9972, SS: 0.9334, SC: 0.9424, Avg: 0.9245, batch_loss: 1.0010, loss: 0.9951 ||:  50%|#####     | 575/1147 [01:10<01:08,  8.34it/s]
+2024-07-01 01:57:01,927 - INFO - tqdm - NullAccuracy: 0.9956, NullF1: 0.8245, Lemma: 0.9830, PosFeats: 0.9806, UD-UAS: 0.9239, UD-LAS: 0.9307, EUD-UAS: 0.8149, EUD-LAS: 0.8171, Misc: 0.9971, SS: 0.9336, SC: 0.9424, Avg: 0.9248, batch_loss: 1.3102, loss: 0.9940 ||:  57%|#####7    | 658/1147 [01:20<01:00,  8.03it/s]
+2024-07-01 01:57:11,966 - INFO - tqdm - NullAccuracy: 0.9956, NullF1: 0.8241, Lemma: 0.9831, PosFeats: 0.9806, UD-UAS: 0.9240, UD-LAS: 0.9307, EUD-UAS: 0.8157, EUD-LAS: 0.8180, Misc: 0.9971, SS: 0.9338, SC: 0.9424, Avg: 0.9250, batch_loss: 0.7980, loss: 0.9914 ||:  65%|######4   | 741/1147 [01:30<00:47,  8.58it/s]
+2024-07-01 01:57:22,016 - INFO - tqdm - NullAccuracy: 0.9957, NullF1: 0.8250, Lemma: 0.9833, PosFeats: 0.9808, UD-UAS: 0.9241, UD-LAS: 0.9308, EUD-UAS: 0.8162, EUD-LAS: 0.8184, Misc: 0.9971, SS: 0.9337, SC: 0.9425, Avg: 0.9252, batch_loss: 0.9348, loss: 0.9888 ||:  72%|#######1  | 823/1147 [01:40<00:41,  7.82it/s]
+2024-07-01 01:57:32,101 - INFO - tqdm - NullAccuracy: 0.9957, NullF1: 0.8248, Lemma: 0.9834, PosFeats: 0.9809, UD-UAS: 0.9244, UD-LAS: 0.9310, EUD-UAS: 0.8164, EUD-LAS: 0.8187, Misc: 0.9972, SS: 0.9337, SC: 0.9425, Avg: 0.9254, batch_loss: 0.9322, loss: 0.9866 ||:  79%|#######8  | 905/1147 [01:50<00:29,  8.26it/s]
+2024-07-01 01:57:42,125 - INFO - tqdm - NullAccuracy: 0.9957, NullF1: 0.8259, Lemma: 0.9834, PosFeats: 0.9810, UD-UAS: 0.9247, UD-LAS: 0.9313, EUD-UAS: 0.8171, EUD-LAS: 0.8191, Misc: 0.9972, SS: 0.9339, SC: 0.9426, Avg: 0.9256, batch_loss: 1.0787, loss: 0.9849 ||:  86%|########6 | 988/1147 [02:00<00:19,  8.09it/s]
+2024-07-01 01:57:52,280 - INFO - tqdm - NullAccuracy: 0.9957, NullF1: 0.8249, Lemma: 0.9834, PosFeats: 0.9810, UD-UAS: 0.9251, UD-LAS: 0.9316, EUD-UAS: 0.8179, EUD-LAS: 0.8200, Misc: 0.9972, SS: 0.9339, SC: 0.9425, Avg: 0.9259, batch_loss: 1.0316, loss: 0.9838 ||:  93%|#########3| 1072/1147 [02:10<00:08,  8.83it/s]
+2024-07-01 01:58:00,779 - INFO - tqdm - NullAccuracy: 0.9957, NullF1: 0.8253, Lemma: 0.9835, PosFeats: 0.9810, UD-UAS: 0.9254, UD-LAS: 0.9319, EUD-UAS: 0.8188, EUD-LAS: 0.8209, Misc: 0.9973, SS: 0.9339, SC: 0.9425, Avg: 0.9261, batch_loss: 0.8165, loss: 0.9829 ||: 100%|#########9| 1142/1147 [02:19<00:00,  8.17it/s]
+2024-07-01 01:58:00,903 - INFO - tqdm - NullAccuracy: 0.9957, NullF1: 0.8254, Lemma: 0.9836, PosFeats: 0.9810, UD-UAS: 0.9254, UD-LAS: 0.9319, EUD-UAS: 0.8188, EUD-LAS: 0.8209, Misc: 0.9973, SS: 0.9339, SC: 0.9425, Avg: 0.9261, batch_loss: 1.1596, loss: 0.9831 ||: 100%|#########9| 1143/1147 [02:19<00:00,  8.16it/s]
+2024-07-01 01:58:01,027 - INFO - tqdm - NullAccuracy: 0.9957, NullF1: 0.8255, Lemma: 0.9836, PosFeats: 0.9810, UD-UAS: 0.9254, UD-LAS: 0.9319, EUD-UAS: 0.8187, EUD-LAS: 0.8208, Misc: 0.9973, SS: 0.9339, SC: 0.9425, Avg: 0.9261, batch_loss: 0.9862, loss: 0.9831 ||: 100%|#########9| 1144/1147 [02:19<00:00,  8.11it/s]
+2024-07-01 01:58:01,150 - INFO - tqdm - NullAccuracy: 0.9957, NullF1: 0.8256, Lemma: 0.9836, PosFeats: 0.9810, UD-UAS: 0.9254, UD-LAS: 0.9319, EUD-UAS: 0.8187, EUD-LAS: 0.8208, Misc: 0.9973, SS: 0.9339, SC: 0.9425, Avg: 0.9261, batch_loss: 0.7115, loss: 0.9828 ||: 100%|#########9| 1145/1147 [02:19<00:00,  8.13it/s]
+2024-07-01 01:58:01,258 - INFO - tqdm - NullAccuracy: 0.9957, NullF1: 0.8256, Lemma: 0.9836, PosFeats: 0.9810, UD-UAS: 0.9254, UD-LAS: 0.9319, EUD-UAS: 0.8188, EUD-LAS: 0.8208, Misc: 0.9973, SS: 0.9339, SC: 0.9425, Avg: 0.9261, batch_loss: 0.9453, loss: 0.9828 ||: 100%|#########9| 1146/1147 [02:19<00:00,  8.43it/s]
+2024-07-01 01:58:01,346 - INFO - tqdm - NullAccuracy: 0.9957, NullF1: 0.8257, Lemma: 0.9836, PosFeats: 0.9810, UD-UAS: 0.9254, UD-LAS: 0.9319, EUD-UAS: 0.8188, EUD-LAS: 0.8209, Misc: 0.9973, SS: 0.9339, SC: 0.9425, Avg: 0.9261, batch_loss: 0.8698, loss: 0.9827 ||: 100%|##########| 1147/1147 [02:19<00:00,  8.20it/s]
+2024-07-01 01:58:01,347 - INFO - allennlp.training.gradient_descent_trainer - Validating
+2024-07-01 01:58:01,348 - INFO - tqdm - 0%|          | 0/287 [00:00<?, ?it/s]
+2024-07-01 01:58:11,449 - INFO - tqdm - NullAccuracy: 0.9958, NullF1: 0.8336, Lemma: 0.9842, PosFeats: 0.9766, UD-UAS: 0.9477, UD-LAS: 0.9552, EUD-UAS: 0.8797, EUD-LAS: 0.8857, Misc: 0.9969, SS: 0.9278, SC: 0.9377, Avg: 0.9435, batch_loss: 0.8263, loss: 1.2045 ||:  53%|#####2    | 152/287 [00:10<00:07, 16.91it/s]
+2024-07-01 01:58:19,787 - INFO - tqdm - NullAccuracy: 0.9959, NullF1: 0.8387, Lemma: 0.9856, PosFeats: 0.9784, UD-UAS: 0.9492, UD-LAS: 0.9570, EUD-UAS: 0.8826, EUD-LAS: 0.8888, Misc: 0.9972, SS: 0.9289, SC: 0.9388, Avg: 0.9452, batch_loss: 0.5826, loss: 1.1556 ||: 100%|#########9| 286/287 [00:18<00:00, 16.40it/s]
+2024-07-01 01:58:19,833 - INFO - tqdm - NullAccuracy: 0.9959, NullF1: 0.8385, Lemma: 0.9856, PosFeats: 0.9784, UD-UAS: 0.9493, UD-LAS: 0.9571, EUD-UAS: 0.8827, EUD-LAS: 0.8889, Misc: 0.9972, SS: 0.9290, SC: 0.9388, Avg: 0.9452, batch_loss: 0.8951, loss: 1.1547 ||: 100%|##########| 287/287 [00:18<00:00, 15.53it/s]
+2024-07-01 01:58:19,834 - INFO - allennlp.training.learning_rate_schedulers.slanted_triangular - Gradual unfreezing finished. Training all layers.
+2024-07-01 01:58:19,836 - INFO - allennlp.training.callbacks.console_logger -                        Training |  Validation
+2024-07-01 01:58:19,836 - INFO - allennlp.training.callbacks.console_logger - Avg                |     0.926  |     0.945
+2024-07-01 01:58:19,836 - INFO - allennlp.training.callbacks.console_logger - EUD-LAS            |     0.821  |     0.889
+2024-07-01 01:58:19,836 - INFO - allennlp.training.callbacks.console_logger - EUD-UAS            |     0.819  |     0.883
+2024-07-01 01:58:19,836 - INFO - allennlp.training.callbacks.console_logger - Lemma              |     0.984  |     0.986
+2024-07-01 01:58:19,836 - INFO - allennlp.training.callbacks.console_logger - Misc               |     0.997  |     0.997
+2024-07-01 01:58:19,836 - INFO - allennlp.training.callbacks.console_logger - NullAccuracy       |     0.996  |     0.996
+2024-07-01 01:58:19,836 - INFO - allennlp.training.callbacks.console_logger - NullF1             |     0.826  |     0.838
+2024-07-01 01:58:19,836 - INFO - allennlp.training.callbacks.console_logger - PosFeats           |     0.981  |     0.978
+2024-07-01 01:58:19,836 - INFO - allennlp.training.callbacks.console_logger - SC                 |     0.943  |     0.939
+2024-07-01 01:58:19,836 - INFO - allennlp.training.callbacks.console_logger - SS                 |     0.934  |     0.929
+2024-07-01 01:58:19,836 - INFO - allennlp.training.callbacks.console_logger - UD-LAS             |     0.932  |     0.957
+2024-07-01 01:58:19,836 - INFO - allennlp.training.callbacks.console_logger - UD-UAS             |     0.925  |     0.949
+2024-07-01 01:58:19,836 - INFO - allennlp.training.callbacks.console_logger - gpu_0_memory_MB    |  9519.083  |       N/A
+2024-07-01 01:58:19,836 - INFO - allennlp.training.callbacks.console_logger - loss               |     0.983  |     1.155
+2024-07-01 01:58:19,836 - INFO - allennlp.training.callbacks.console_logger - worker_0_memory_MB |  5027.195  |       N/A
+2024-07-01 01:58:24,810 - INFO - allennlp.training.gradient_descent_trainer - Epoch duration: 0:02:43.298841
+2024-07-01 01:58:24,811 - INFO - allennlp.common.util - Metrics: {
   "best_epoch": 9,
-  "peak_worker_0_memory_MB": 5002.05078125,
-  "peak_gpu_0_memory_MB": 11409.91064453125,
-  "training_duration": "0:25:57.194477",
+  "peak_worker_0_memory_MB": 5027.1953125,
+  "peak_gpu_0_memory_MB": 9519.13818359375,
+  "training_duration": "0:26:03.535227",
   "epoch": 9,
-  "training_NullAccuracy": 0.9959010905042368,
-  "training_NullF1": 0.8352854251861572,
-  "training_Lemma": 0.9834424902494665,
-  "training_PosFeats": 0.9811832906505086,
-  "training_UD-UAS": 0.9193629114176333,
-  "training_UD-LAS": 0.9250535106478481,
-  "training_EUD-UAS": 0.8051802036896266,
-  "training_EUD-LAS": 0.8069673531803114,
-  "training_Misc": 0.9976119935947866,
-  "training_SS": 0.9344274643054417,
-  "training_SC": 0.9396247259093116,
-  "training_Avg": 0.9214282159605484,
-  "training_loss": 1.0013585822314102,
-  "training_worker_0_memory_MB": 5002.05078125,
-  "training_gpu_0_memory_MB": 11258.240234375,
-  "validation_NullAccuracy": 0.9956414668576316,
-  "validation_NullF1": 0.8188437223434448,
-  "validation_Lemma": 0.9837974515087999,
-  "validation_PosFeats": 0.9775191169552898,
-  "validation_UD-UAS": 0.9471266043421227,
-  "validation_UD-LAS": 0.9546475382174867,
-  "validation_EUD-UAS": 0.8646401306741709,
-  "validation_EUD-LAS": 0.8701164952135139,
-  "validation_Misc": 0.996925971710188,
-  "validation_SS": 0.9294833225760576,
-  "validation_SC": 0.9343842644758289,
-  "validation_Avg": 0.939848988408162,
-  "validation_loss": 1.1931014731785976,
-  "best_validation_NullAccuracy": 0.9956414668576316,
-  "best_validation_NullF1": 0.8188437223434448,
-  "best_validation_Lemma": 0.9837974515087999,
-  "best_validation_PosFeats": 0.9775191169552898,
-  "best_validation_UD-UAS": 0.9471266043421227,
-  "best_validation_UD-LAS": 0.9546475382174867,
-  "best_validation_EUD-UAS": 0.8646401306741709,
-  "best_validation_EUD-LAS": 0.8701164952135139,
-  "best_validation_Misc": 0.996925971710188,
-  "best_validation_SS": 0.9294833225760576,
-  "best_validation_SC": 0.9343842644758289,
-  "best_validation_Avg": 0.939848988408162,
-  "best_validation_loss": 1.1931014731785976
+  "training_NullAccuracy": 0.9957235531706647,
+  "training_NullF1": 0.8256801962852478,
+  "training_Lemma": 0.9835520888448912,
+  "training_PosFeats": 0.9810301599142935,
+  "training_UD-UAS": 0.9254102606250432,
+  "training_UD-LAS": 0.9319205584029282,
+  "training_EUD-UAS": 0.8187803610763775,
+  "training_EUD-LAS": 0.8208646692034237,
+  "training_Misc": 0.9972908241391496,
+  "training_SS": 0.9338719900383099,
+  "training_SC": 0.9425099420584128,
+  "training_Avg": 0.9261367615892033,
+  "training_loss": 0.9827121988315632,
+  "training_worker_0_memory_MB": 5027.1953125,
+  "training_gpu_0_memory_MB": 9519.08251953125,
+  "validation_NullAccuracy": 0.9959020954963523,
+  "validation_NullF1": 0.8384523391723633,
+  "validation_Lemma": 0.985611901991278,
+  "validation_PosFeats": 0.9784465665627043,
+  "validation_UD-UAS": 0.9493275025730825,
+  "validation_UD-LAS": 0.957065463503782,
+  "validation_EUD-UAS": 0.8827050763621938,
+  "validation_EUD-LAS": 0.8889461539814618,
+  "validation_Misc": 0.9971597912178601,
+  "validation_SS": 0.9289733449085238,
+  "validation_SC": 0.93875866799567,
+  "validation_Avg": 0.9452216076773952,
+  "validation_loss": 1.1547149845532008,
+  "best_validation_NullAccuracy": 0.9959020954963523,
+  "best_validation_NullF1": 0.8384523391723633,
+  "best_validation_Lemma": 0.985611901991278,
+  "best_validation_PosFeats": 0.9784465665627043,
+  "best_validation_UD-UAS": 0.9493275025730825,
+  "best_validation_UD-LAS": 0.957065463503782,
+  "best_validation_EUD-UAS": 0.8827050763621938,
+  "best_validation_EUD-LAS": 0.8889461539814618,
+  "best_validation_Misc": 0.9971597912178601,
+  "best_validation_SS": 0.9289733449085238,
+  "best_validation_SC": 0.93875866799567,
+  "best_validation_Avg": 0.9452216076773952,
+  "best_validation_loss": 1.1547149845532008
 }
-2024-06-29 19:44:36,561 - INFO - allennlp.models.archival - archiving weights and vocabulary to serialization/xlm-roberta-base-ru/model.tar.gz
+2024-07-01 01:58:24,811 - INFO - allennlp.models.archival - archiving weights and vocabulary to serialization/xlm-roberta-base-ru/model.tar.gz