# Install

In [2]:
%pip install uv

Note: you may need to restart the kernel to use updated packages.


In [None]:
!uv pip install dagshub setuptools accelerate toml torch torchvision transformers mlflow datasets ipywidgets python-dotenv evaluate

# Setup

In [1]:
import os
import toml
import torch
import mlflow
import dagshub
import datasets
import evaluate
from dotenv import load_dotenv
from torchvision.transforms import v2
from transformers import AutoImageProcessor, AutoModelForImageClassification, TrainingArguments, Trainer

ENV_PATH = "/Users/andrewmayes/Openclassroom/CanineNet/.env"
CONFIG_PATH = "/Users/andrewmayes/Openclassroom/CanineNet/code/config.toml"
CONFIG = toml.load(CONFIG_PATH)

load_dotenv(ENV_PATH)

dagshub.init(repo_name=os.environ['MLFLOW_TRACKING_PROJECTNAME'], repo_owner=os.environ['MLFLOW_TRACKING_USERNAME'], mlflow=True, dvc=True)

os.environ['MLFLOW_TRACKING_USERNAME'] = "amaye15"

mlflow.set_tracking_uri(f'https://dagshub.com/' + os.environ['MLFLOW_TRACKING_USERNAME']
                         + '/' + os.environ['MLFLOW_TRACKING_PROJECTNAME'] + '.mlflow')

CREATE_DATASET = True
ORIGINAL_DATASET = "Alanox/stanford-dogs"
MODIFIED_DATASET = "amaye15/stanford-dogs"
REMOVE_COLUMNS = ["name", "annotations"]
RENAME_COLUMNS = {"image":"pixel_values", "target":"label"}
SPLIT = 0.2

METRICS = ["accuracy", "f1", "precision", "recall"]
# MODELS = 'google/vit-base-patch16-224'
# MODELS = "google/siglip-base-patch16-224"



# Dataset

In [2]:
if CREATE_DATASET:
    ds = datasets.load_dataset(ORIGINAL_DATASET, token=os.getenv("HF_TOKEN"), split="full", trust_remote_code=True)
    ds = ds.remove_columns(REMOVE_COLUMNS).rename_columns(RENAME_COLUMNS)

    labels = ds.select_columns("label").to_pandas().sort_values("label").get("label").unique().tolist()
    numbers = range(len(labels))
    label2int = dict(zip(labels, numbers))
    int2label = dict(zip(numbers, labels))

    for key, val in label2int.items():
        print(f"{key}: {val}")

    ds = ds.class_encode_column("label")
    ds = ds.align_labels_with_mapping(label2int, "label")

    ds = ds.train_test_split(test_size=SPLIT, stratify_by_column = "label")
    #ds.push_to_hub(MODIFIED_DATASET, token=os.getenv("HF_TOKEN"))

    CONFIG["label2int"] = str(label2int)
    CONFIG["int2label"] = str(int2label)

    # with open("output.toml", "w") as toml_file:
    #     toml.dump(toml.dumps(CONFIG), toml_file)

    #ds = datasets.load_dataset(MODIFIED_DATASET, token=os.getenv("HF_TOKEN"), trust_remote_code=True, streaming=True)

Affenpinscher: 0
Afghan Hound: 1
African Hunting Dog: 2
Airedale: 3
American Staffordshire Terrier: 4
Appenzeller: 5
Australian Terrier: 6
Basenji: 7
Basset: 8
Beagle: 9
Bedlington Terrier: 10
Bernese Mountain Dog: 11
Black And Tan Coonhound: 12
Blenheim Spaniel: 13
Bloodhound: 14
Bluetick: 15
Border Collie: 16
Border Terrier: 17
Borzoi: 18
Boston Bull: 19
Bouvier Des Flandres: 20
Boxer: 21
Brabancon Griffon: 22
Briard: 23
Brittany Spaniel: 24
Bull Mastiff: 25
Cairn: 26
Cardigan: 27
Chesapeake Bay Retriever: 28
Chihuahua: 29
Chow: 30
Clumber: 31
Cocker Spaniel: 32
Collie: 33
Curly Coated Retriever: 34
Dandie Dinmont: 35
Dhole: 36
Dingo: 37
Doberman: 38
English Foxhound: 39
English Setter: 40
English Springer: 41
Entlebucher: 42
Eskimo Dog: 43
Flat Coated Retriever: 44
French Bulldog: 45
German Shepherd: 46
German Short Haired Pointer: 47
Giant Schnauzer: 48
Golden Retriever: 49
Gordon Setter: 50
Great Dane: 51
Great Pyrenees: 52
Greater Swiss Mountain Dog: 53
Groenendael: 54
Ibizan Hou

In [3]:
metrics = {metric: evaluate.load(metric) for metric in METRICS}
for lr in [5e-3, 5e-4, 5e-5]: # 5e-5
    for batch in [32, 64, 128, 32]: # 32
        for model_name in ["google/vit-base-patch16-224", "microsoft/swinv2-base-patch4-window16-256", "google/siglip-base-patch16-224"]: # "facebook/dinov2-base"
            if ((lr != 5e-5) & (batch != 32)) or ((lr != 5e-5) & (batch != 64) & (model_name != "google/vit-base-patch16-224")):
                image_processor = AutoImageProcessor.from_pretrained(model_name)
                model = AutoModelForImageClassification.from_pretrained(
                model_name,
                num_labels=len(label2int),
                id2label=int2label,
                label2id=label2int,
                ignore_mismatched_sizes=True,
                )

                # Then, in your transformations:
                def train_transform(examples, num_ops=10, magnitude=9, num_magnitude_bins=31):

                    transformation = v2.Compose(
                        [
                            v2.RandAugment(
                                num_ops=num_ops,
                                magnitude=magnitude,
                                num_magnitude_bins=num_magnitude_bins,
                            )
                        ]
                    )
                    # Ensure each image has three dimensions (in this case, ensure it's RGB)
                    examples["pixel_values"] = [
                        image.convert("RGB") for image in examples["pixel_values"]
                    ]
                    # Apply transformations
                    examples["pixel_values"] = [
                        image_processor(transformation(image), return_tensors="pt")[
                            "pixel_values"
                        ].squeeze()
                        for image in examples["pixel_values"]
                    ]
                    return examples


                def test_transform(examples):
                    # Ensure each image is RGB
                    examples["pixel_values"] = [
                        image.convert("RGB") for image in examples["pixel_values"]
                    ]
                    # Apply processing
                    examples["pixel_values"] = [
                        image_processor(image, return_tensors="pt")["pixel_values"].squeeze()
                        for image in examples["pixel_values"]
                    ]
                    return examples


                def compute_metrics(eval_pred):
                    predictions, labels = eval_pred
                    # predictions = np.argmax(logits, axis=-1)
                    results = {}
                    for key, val in metrics.items():
                        if "accuracy" == key:
                            result = next(
                                iter(val.compute(predictions=predictions, references=labels).items())
                            )
                        if "accuracy" != key:
                            result = next(
                                iter(
                                    val.compute(
                                        predictions=predictions, references=labels, average="macro"
                                    ).items()
                                )
                            )
                        results[result[0]] = result[1]
                    return results


                def collate_fn(examples):
                    pixel_values = torch.stack([example["pixel_values"] for example in examples])
                    labels = torch.tensor([example["label"] for example in examples])
                    return {"pixel_values": pixel_values, "labels": labels}


                def preprocess_logits_for_metrics(logits, labels):
                    """
                    Original Trainer may have a memory leak.
                    This is a workaround to avoid storing too many tensors that are not needed.
                    """
                    pred_ids = torch.argmax(logits, dim=-1)
                    return pred_ids

                ds["train"].set_transform(train_transform)
                ds["test"].set_transform(test_transform)

                training_args = TrainingArguments(**CONFIG["training_args"])
                training_args.per_device_train_batch_size = batch
                training_args.per_device_eval_batch_size = batch
                training_args.hub_model_id = f"amaye15/{model_name.replace('/','-')}-batch{batch}-lr{lr}-standford-dogs"

                mlflow.start_run(run_name=f"{model_name.replace('/','-')}-batch{batch}-lr{lr}")

                trainer = Trainer(
                    model=model,
                    args=training_args,
                    train_dataset=ds["train"],
                    eval_dataset=ds["test"],
                    tokenizer=image_processor,
                    data_collator=collate_fn,
                    compute_metrics=compute_metrics,
                    # callbacks=[early_stopping_callback],
                    preprocess_logits_for_metrics=preprocess_logits_for_metrics,
                )

                # Train the model
                trainer.train()

                trainer.push_to_hub()

                mlflow.end_run()

Some weights of Swinv2ForImageClassification were not initialized from the model checkpoint at microsoft/swinv2-base-patch4-window16-256 and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([1000, 1024]) in the checkpoint and torch.Size([120, 1024]) in the model instantiated
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([120]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
max_steps is given, it will override any value given in num_train_epochs


  0%|          | 0/1000 [00:00<?, ?it/s]



{'loss': 4.7451, 'grad_norm': 7.392679691314697, 'learning_rate': 4.9500000000000004e-05, 'epoch': 0.08}


  0%|          | 0/129 [00:00<?, ?it/s]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


{'eval_loss': 4.6182661056518555, 'eval_accuracy': 0.07167152575315841, 'eval_f1': 0.06175833391362469, 'eval_precision': 0.06693832428157515, 'eval_recall': 0.06805161257279692, 'eval_runtime': 118.9863, 'eval_samples_per_second': 34.592, 'eval_steps_per_second': 1.084, 'epoch': 0.08}




{'loss': 4.5204, 'grad_norm': 17.792699813842773, 'learning_rate': 4.9e-05, 'epoch': 0.16}


  0%|          | 0/129 [00:00<?, ?it/s]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


{'eval_loss': 4.324223518371582, 'eval_accuracy': 0.20238095238095238, 'eval_f1': 0.14930686647738964, 'eval_precision': 0.18576762567564203, 'eval_recall': 0.1827081374434747, 'eval_runtime': 119.253, 'eval_samples_per_second': 34.515, 'eval_steps_per_second': 1.082, 'epoch': 0.16}




{'loss': 4.2163, 'grad_norm': 27.42233657836914, 'learning_rate': 4.85e-05, 'epoch': 0.23}


  0%|          | 0/129 [00:00<?, ?it/s]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


{'eval_loss': 3.85136079788208, 'eval_accuracy': 0.3816812439261419, 'eval_f1': 0.3107746196937497, 'eval_precision': 0.3754529363764648, 'eval_recall': 0.3598087581195972, 'eval_runtime': 116.3653, 'eval_samples_per_second': 35.371, 'eval_steps_per_second': 1.109, 'epoch': 0.23}




{'loss': 3.5996, 'grad_norm': 39.71358871459961, 'learning_rate': 4.8e-05, 'epoch': 0.31}


  0%|          | 0/129 [00:00<?, ?it/s]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


{'eval_loss': 2.993649959564209, 'eval_accuracy': 0.6025267249757046, 'eval_f1': 0.5396699491275935, 'eval_precision': 0.5985274492543927, 'eval_recall': 0.5851786479410009, 'eval_runtime': 116.097, 'eval_samples_per_second': 35.453, 'eval_steps_per_second': 1.111, 'epoch': 0.31}




{'loss': 2.7565, 'grad_norm': 35.98662567138672, 'learning_rate': 4.75e-05, 'epoch': 0.39}


  0%|          | 0/129 [00:00<?, ?it/s]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


{'eval_loss': 1.8901652097702026, 'eval_accuracy': 0.7738095238095238, 'eval_f1': 0.7419140762746178, 'eval_precision': 0.80018778271747, 'eval_recall': 0.759897512543306, 'eval_runtime': 115.975, 'eval_samples_per_second': 35.49, 'eval_steps_per_second': 1.112, 'epoch': 0.39}




{'loss': 1.9695, 'grad_norm': 69.04105377197266, 'learning_rate': 4.7e-05, 'epoch': 0.47}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 1.2026917934417725, 'eval_accuracy': 0.8644314868804664, 'eval_f1': 0.851211822050322, 'eval_precision': 0.8810420986229455, 'eval_recall': 0.8584653488560972, 'eval_runtime': 116.5016, 'eval_samples_per_second': 35.33, 'eval_steps_per_second': 1.107, 'epoch': 0.47}




{'loss': 1.4292, 'grad_norm': 35.54287338256836, 'learning_rate': 4.6500000000000005e-05, 'epoch': 0.54}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.8375147581100464, 'eval_accuracy': 0.8901846452866861, 'eval_f1': 0.8767897170357445, 'eval_precision': 0.9034288303237173, 'eval_recall': 0.8852697852467155, 'eval_runtime': 115.813, 'eval_samples_per_second': 35.54, 'eval_steps_per_second': 1.114, 'epoch': 0.54}




{'loss': 1.1191, 'grad_norm': 36.4020881652832, 'learning_rate': 4.600000000000001e-05, 'epoch': 0.62}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.5400001406669617, 'eval_accuracy': 0.9139941690962099, 'eval_f1': 0.9084552011702601, 'eval_precision': 0.9209161574159062, 'eval_recall': 0.9114069285375231, 'eval_runtime': 117.1034, 'eval_samples_per_second': 35.148, 'eval_steps_per_second': 1.102, 'epoch': 0.62}




{'loss': 0.9249, 'grad_norm': 37.288429260253906, 'learning_rate': 4.55e-05, 'epoch': 0.7}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.41832435131073, 'eval_accuracy': 0.9193391642371235, 'eval_f1': 0.913590633986456, 'eval_precision': 0.9283911611083989, 'eval_recall': 0.9169393031418399, 'eval_runtime': 116.4522, 'eval_samples_per_second': 35.345, 'eval_steps_per_second': 1.108, 'epoch': 0.7}




{'loss': 0.7701, 'grad_norm': 36.70619201660156, 'learning_rate': 4.5e-05, 'epoch': 0.78}


  0%|          | 0/129 [00:00<?, ?it/s]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


{'eval_loss': 0.3423353433609009, 'eval_accuracy': 0.923712342079689, 'eval_f1': 0.9167317908696295, 'eval_precision': 0.9265458073145066, 'eval_recall': 0.9207090056403667, 'eval_runtime': 116.9797, 'eval_samples_per_second': 35.186, 'eval_steps_per_second': 1.103, 'epoch': 0.78}




{'loss': 0.7036, 'grad_norm': 30.09217643737793, 'learning_rate': 4.4500000000000004e-05, 'epoch': 0.85}


  0%|          | 0/129 [00:00<?, ?it/s]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


{'eval_loss': 0.3141166865825653, 'eval_accuracy': 0.9258989310009719, 'eval_f1': 0.9199295975579974, 'eval_precision': 0.9270010191480259, 'eval_recall': 0.9228040093614015, 'eval_runtime': 116.9652, 'eval_samples_per_second': 35.19, 'eval_steps_per_second': 1.103, 'epoch': 0.85}




{'loss': 0.7279, 'grad_norm': 25.417821884155273, 'learning_rate': 4.4000000000000006e-05, 'epoch': 0.93}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.2813890278339386, 'eval_accuracy': 0.9261418853255587, 'eval_f1': 0.9200313705925975, 'eval_precision': 0.9300531792015672, 'eval_recall': 0.9234535506220213, 'eval_runtime': 116.2712, 'eval_samples_per_second': 35.4, 'eval_steps_per_second': 1.109, 'epoch': 0.93}




{'loss': 0.6732, 'grad_norm': 37.04437255859375, 'learning_rate': 4.35e-05, 'epoch': 1.01}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.2583376467227936, 'eval_accuracy': 0.9278425655976676, 'eval_f1': 0.9257827956681505, 'eval_precision': 0.9336659702817446, 'eval_recall': 0.9264134713855884, 'eval_runtime': 116.0872, 'eval_samples_per_second': 35.456, 'eval_steps_per_second': 1.111, 'epoch': 1.01}




{'loss': 0.5251, 'grad_norm': 29.01424789428711, 'learning_rate': 4.3e-05, 'epoch': 1.09}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.24332156777381897, 'eval_accuracy': 0.9387755102040817, 'eval_f1': 0.9343495020560317, 'eval_precision': 0.9400195362137737, 'eval_recall': 0.9364590575108477, 'eval_runtime': 118.0813, 'eval_samples_per_second': 34.857, 'eval_steps_per_second': 1.092, 'epoch': 1.09}




{'loss': 0.506, 'grad_norm': 24.758153915405273, 'learning_rate': 4.25e-05, 'epoch': 1.17}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.2486099898815155, 'eval_accuracy': 0.9293002915451894, 'eval_f1': 0.923673480868727, 'eval_precision': 0.9392627242326875, 'eval_recall': 0.9284078296144684, 'eval_runtime': 117.2673, 'eval_samples_per_second': 35.099, 'eval_steps_per_second': 1.1, 'epoch': 1.17}




{'loss': 0.4941, 'grad_norm': 30.309085845947266, 'learning_rate': 4.2e-05, 'epoch': 1.24}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.24892301857471466, 'eval_accuracy': 0.9295432458697764, 'eval_f1': 0.9275860219532449, 'eval_precision': 0.9340224064275343, 'eval_recall': 0.9276101852749326, 'eval_runtime': 116.9083, 'eval_samples_per_second': 35.207, 'eval_steps_per_second': 1.103, 'epoch': 1.24}




{'loss': 0.493, 'grad_norm': 31.011947631835938, 'learning_rate': 4.15e-05, 'epoch': 1.32}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.22557905316352844, 'eval_accuracy': 0.9361030126336248, 'eval_f1': 0.9336891277999396, 'eval_precision': 0.9401808677720161, 'eval_recall': 0.9344423711587737, 'eval_runtime': 118.5567, 'eval_samples_per_second': 34.718, 'eval_steps_per_second': 1.088, 'epoch': 1.32}




{'loss': 0.4975, 'grad_norm': 31.7733154296875, 'learning_rate': 4.1e-05, 'epoch': 1.4}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.2236272543668747, 'eval_accuracy': 0.9390184645286687, 'eval_f1': 0.9352200153615517, 'eval_precision': 0.9430236036085446, 'eval_recall': 0.9376522647842572, 'eval_runtime': 114.3593, 'eval_samples_per_second': 35.992, 'eval_steps_per_second': 1.128, 'epoch': 1.4}




{'loss': 0.4742, 'grad_norm': 44.587825775146484, 'learning_rate': 4.05e-05, 'epoch': 1.48}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.22906745970249176, 'eval_accuracy': 0.9390184645286687, 'eval_f1': 0.9348672227662737, 'eval_precision': 0.9443221376291268, 'eval_recall': 0.9368278532845314, 'eval_runtime': 114.4144, 'eval_samples_per_second': 35.974, 'eval_steps_per_second': 1.127, 'epoch': 1.48}




{'loss': 0.4788, 'grad_norm': 51.10420608520508, 'learning_rate': 4e-05, 'epoch': 1.55}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.21873104572296143, 'eval_accuracy': 0.9385325558794947, 'eval_f1': 0.9348021784060464, 'eval_precision': 0.9429207736971593, 'eval_recall': 0.9358784149851511, 'eval_runtime': 114.5674, 'eval_samples_per_second': 35.926, 'eval_steps_per_second': 1.126, 'epoch': 1.55}




{'loss': 0.4817, 'grad_norm': 29.57049560546875, 'learning_rate': 3.9500000000000005e-05, 'epoch': 1.63}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.2193629890680313, 'eval_accuracy': 0.9382896015549077, 'eval_f1': 0.9365648816530497, 'eval_precision': 0.9438250894351351, 'eval_recall': 0.9370270150771032, 'eval_runtime': 114.6083, 'eval_samples_per_second': 35.914, 'eval_steps_per_second': 1.126, 'epoch': 1.63}




{'loss': 0.425, 'grad_norm': 20.471195220947266, 'learning_rate': 3.9000000000000006e-05, 'epoch': 1.71}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.21454617381095886, 'eval_accuracy': 0.9395043731778425, 'eval_f1': 0.9365201289942966, 'eval_precision': 0.9419287910877474, 'eval_recall': 0.9373934984216807, 'eval_runtime': 114.4861, 'eval_samples_per_second': 35.952, 'eval_steps_per_second': 1.127, 'epoch': 1.71}




{'loss': 0.4392, 'grad_norm': 30.670284271240234, 'learning_rate': 3.85e-05, 'epoch': 1.79}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.21058662235736847, 'eval_accuracy': 0.9404761904761905, 'eval_f1': 0.9367420698571843, 'eval_precision': 0.9472776166631209, 'eval_recall': 0.9389728915687214, 'eval_runtime': 114.8313, 'eval_samples_per_second': 35.844, 'eval_steps_per_second': 1.123, 'epoch': 1.79}




{'loss': 0.4295, 'grad_norm': 23.996227264404297, 'learning_rate': 3.8e-05, 'epoch': 1.86}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.20313242077827454, 'eval_accuracy': 0.9426627793974732, 'eval_f1': 0.9415231910984045, 'eval_precision': 0.9460733827301256, 'eval_recall': 0.9419062362633588, 'eval_runtime': 114.4366, 'eval_samples_per_second': 35.968, 'eval_steps_per_second': 1.127, 'epoch': 1.86}




{'loss': 0.447, 'grad_norm': 29.65323257446289, 'learning_rate': 3.7500000000000003e-05, 'epoch': 1.94}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.20734070241451263, 'eval_accuracy': 0.9373177842565598, 'eval_f1': 0.934085851023942, 'eval_precision': 0.9405809641296471, 'eval_recall': 0.935499789776851, 'eval_runtime': 114.6373, 'eval_samples_per_second': 35.905, 'eval_steps_per_second': 1.125, 'epoch': 1.94}




{'loss': 0.4718, 'grad_norm': 25.023006439208984, 'learning_rate': 3.7e-05, 'epoch': 2.02}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.20732612907886505, 'eval_accuracy': 0.9416909620991254, 'eval_f1': 0.939770108961802, 'eval_precision': 0.9436139851292384, 'eval_recall': 0.9395972540954411, 'eval_runtime': 114.8017, 'eval_samples_per_second': 35.853, 'eval_steps_per_second': 1.124, 'epoch': 2.02}




{'loss': 0.4528, 'grad_norm': 29.38068199157715, 'learning_rate': 3.65e-05, 'epoch': 2.1}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.20109473168849945, 'eval_accuracy': 0.9426627793974732, 'eval_f1': 0.9402546407567826, 'eval_precision': 0.9447143456273354, 'eval_recall': 0.9401327475466873, 'eval_runtime': 115.0331, 'eval_samples_per_second': 35.781, 'eval_steps_per_second': 1.121, 'epoch': 2.1}




{'loss': 0.3958, 'grad_norm': 19.475000381469727, 'learning_rate': 3.6e-05, 'epoch': 2.17}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.1979122757911682, 'eval_accuracy': 0.9438775510204082, 'eval_f1': 0.9401571777817359, 'eval_precision': 0.9466559993183812, 'eval_recall': 0.941823625617955, 'eval_runtime': 114.5986, 'eval_samples_per_second': 35.917, 'eval_steps_per_second': 1.126, 'epoch': 2.17}




{'loss': 0.4325, 'grad_norm': 23.977619171142578, 'learning_rate': 3.55e-05, 'epoch': 2.25}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.19925996661186218, 'eval_accuracy': 0.9421768707482994, 'eval_f1': 0.9396204404158489, 'eval_precision': 0.9448251772503703, 'eval_recall': 0.940379301213978, 'eval_runtime': 114.6027, 'eval_samples_per_second': 35.915, 'eval_steps_per_second': 1.126, 'epoch': 2.25}




{'loss': 0.3228, 'grad_norm': 26.77207374572754, 'learning_rate': 3.5e-05, 'epoch': 2.33}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.20252813398838043, 'eval_accuracy': 0.9397473275024295, 'eval_f1': 0.9371514375373243, 'eval_precision': 0.9415118336037184, 'eval_recall': 0.9374533753415454, 'eval_runtime': 114.9424, 'eval_samples_per_second': 35.809, 'eval_steps_per_second': 1.122, 'epoch': 2.33}




{'loss': 0.383, 'grad_norm': 18.35658073425293, 'learning_rate': 3.45e-05, 'epoch': 2.41}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.20318132638931274, 'eval_accuracy': 0.9424198250728864, 'eval_f1': 0.9395786141795599, 'eval_precision': 0.9471288830591912, 'eval_recall': 0.9406682184567607, 'eval_runtime': 114.5097, 'eval_samples_per_second': 35.945, 'eval_steps_per_second': 1.127, 'epoch': 2.41}




{'loss': 0.4147, 'grad_norm': 27.020387649536133, 'learning_rate': 3.4000000000000007e-05, 'epoch': 2.49}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.19750656187534332, 'eval_accuracy': 0.9433916423712342, 'eval_f1': 0.9401031149800597, 'eval_precision': 0.9466297083232625, 'eval_recall': 0.9418430619246869, 'eval_runtime': 114.3341, 'eval_samples_per_second': 36.0, 'eval_steps_per_second': 1.128, 'epoch': 2.49}




{'loss': 0.3587, 'grad_norm': 32.080196380615234, 'learning_rate': 3.35e-05, 'epoch': 2.56}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.20484071969985962, 'eval_accuracy': 0.9429057337220602, 'eval_f1': 0.941181947588637, 'eval_precision': 0.9453028458596549, 'eval_recall': 0.9415019462749606, 'eval_runtime': 114.7942, 'eval_samples_per_second': 35.855, 'eval_steps_per_second': 1.124, 'epoch': 2.56}




{'loss': 0.3481, 'grad_norm': 21.583600997924805, 'learning_rate': 3.3e-05, 'epoch': 2.64}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.21101877093315125, 'eval_accuracy': 0.9416909620991254, 'eval_f1': 0.9409328461528574, 'eval_precision': 0.9453296950566307, 'eval_recall': 0.9414241081371865, 'eval_runtime': 115.2273, 'eval_samples_per_second': 35.721, 'eval_steps_per_second': 1.12, 'epoch': 2.64}




{'loss': 0.4007, 'grad_norm': 30.03070640563965, 'learning_rate': 3.2500000000000004e-05, 'epoch': 2.72}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.19447678327560425, 'eval_accuracy': 0.9448493683187561, 'eval_f1': 0.9415138633931484, 'eval_precision': 0.9469802948132661, 'eval_recall': 0.942949195133524, 'eval_runtime': 115.0467, 'eval_samples_per_second': 35.777, 'eval_steps_per_second': 1.121, 'epoch': 2.72}




{'loss': 0.3719, 'grad_norm': 19.006587982177734, 'learning_rate': 3.2000000000000005e-05, 'epoch': 2.8}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.20250166952610016, 'eval_accuracy': 0.9414480077745384, 'eval_f1': 0.9404145495231127, 'eval_precision': 0.9447405846663819, 'eval_recall': 0.9408034205466518, 'eval_runtime': 115.0173, 'eval_samples_per_second': 35.786, 'eval_steps_per_second': 1.122, 'epoch': 2.8}




{'loss': 0.3993, 'grad_norm': 29.12856101989746, 'learning_rate': 3.15e-05, 'epoch': 2.87}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.2011735588312149, 'eval_accuracy': 0.9448493683187561, 'eval_f1': 0.9419080758511604, 'eval_precision': 0.9485110507596467, 'eval_recall': 0.9429995946279425, 'eval_runtime': 114.9695, 'eval_samples_per_second': 35.801, 'eval_steps_per_second': 1.122, 'epoch': 2.87}




{'loss': 0.3745, 'grad_norm': 31.063690185546875, 'learning_rate': 3.1e-05, 'epoch': 2.95}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.19235630333423615, 'eval_accuracy': 0.9450923226433431, 'eval_f1': 0.9414846823446322, 'eval_precision': 0.9499272592883472, 'eval_recall': 0.9434571156744745, 'eval_runtime': 115.3893, 'eval_samples_per_second': 35.671, 'eval_steps_per_second': 1.118, 'epoch': 2.95}




{'loss': 0.3638, 'grad_norm': 17.190593719482422, 'learning_rate': 3.05e-05, 'epoch': 3.03}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.19399064779281616, 'eval_accuracy': 0.9443634596695821, 'eval_f1': 0.9424039415864353, 'eval_precision': 0.9477598282320199, 'eval_recall': 0.9424056698043153, 'eval_runtime': 114.892, 'eval_samples_per_second': 35.825, 'eval_steps_per_second': 1.123, 'epoch': 3.03}




{'loss': 0.3421, 'grad_norm': 34.20246505737305, 'learning_rate': 3e-05, 'epoch': 3.11}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.1897164136171341, 'eval_accuracy': 0.9465500485908649, 'eval_f1': 0.9441019525393687, 'eval_precision': 0.9496324245782034, 'eval_recall': 0.9445579251653037, 'eval_runtime': 114.9933, 'eval_samples_per_second': 35.793, 'eval_steps_per_second': 1.122, 'epoch': 3.11}




{'loss': 0.2906, 'grad_norm': 39.98542404174805, 'learning_rate': 2.95e-05, 'epoch': 3.18}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.18932002782821655, 'eval_accuracy': 0.9470359572400389, 'eval_f1': 0.9456754034759556, 'eval_precision': 0.9494168814742135, 'eval_recall': 0.9456967112313741, 'eval_runtime': 115.4616, 'eval_samples_per_second': 35.648, 'eval_steps_per_second': 1.117, 'epoch': 3.18}




{'loss': 0.3455, 'grad_norm': 22.710445404052734, 'learning_rate': 2.9e-05, 'epoch': 3.26}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.18017922341823578, 'eval_accuracy': 0.9484936831875608, 'eval_f1': 0.94714050801556, 'eval_precision': 0.9499007284588198, 'eval_recall': 0.9474784961404308, 'eval_runtime': 114.5961, 'eval_samples_per_second': 35.917, 'eval_steps_per_second': 1.126, 'epoch': 3.26}




{'loss': 0.3338, 'grad_norm': 27.539039611816406, 'learning_rate': 2.8499999999999998e-05, 'epoch': 3.34}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.19262713193893433, 'eval_accuracy': 0.9441205053449951, 'eval_f1': 0.9414006658184005, 'eval_precision': 0.9472613482110482, 'eval_recall': 0.9424492875208347, 'eval_runtime': 114.3881, 'eval_samples_per_second': 35.983, 'eval_steps_per_second': 1.128, 'epoch': 3.34}




{'loss': 0.3307, 'grad_norm': 31.13157844543457, 'learning_rate': 2.8000000000000003e-05, 'epoch': 3.42}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.20202496647834778, 'eval_accuracy': 0.9419339164237124, 'eval_f1': 0.9407488062667215, 'eval_precision': 0.9447014610743715, 'eval_recall': 0.9408760495815661, 'eval_runtime': 114.2398, 'eval_samples_per_second': 36.029, 'eval_steps_per_second': 1.129, 'epoch': 3.42}




{'loss': 0.367, 'grad_norm': 35.75934982299805, 'learning_rate': 2.7500000000000004e-05, 'epoch': 3.5}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.19336122274398804, 'eval_accuracy': 0.9465500485908649, 'eval_f1': 0.9452244096779967, 'eval_precision': 0.9486806069204384, 'eval_recall': 0.945357240364433, 'eval_runtime': 114.8563, 'eval_samples_per_second': 35.836, 'eval_steps_per_second': 1.123, 'epoch': 3.5}




{'loss': 0.3248, 'grad_norm': 23.628128051757812, 'learning_rate': 2.7000000000000002e-05, 'epoch': 3.57}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.2003922015428543, 'eval_accuracy': 0.9419339164237124, 'eval_f1': 0.9392921765894248, 'eval_precision': 0.9442515458934876, 'eval_recall': 0.9401493961700677, 'eval_runtime': 114.5631, 'eval_samples_per_second': 35.928, 'eval_steps_per_second': 1.126, 'epoch': 3.57}




{'loss': 0.3366, 'grad_norm': 42.56528854370117, 'learning_rate': 2.6500000000000004e-05, 'epoch': 3.65}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.19236095249652863, 'eval_accuracy': 0.9431486880466472, 'eval_f1': 0.9409643735150478, 'eval_precision': 0.9466555090439602, 'eval_recall': 0.9415082502284738, 'eval_runtime': 114.6786, 'eval_samples_per_second': 35.892, 'eval_steps_per_second': 1.125, 'epoch': 3.65}




{'loss': 0.3342, 'grad_norm': 19.98875617980957, 'learning_rate': 2.6000000000000002e-05, 'epoch': 3.73}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.19377027451992035, 'eval_accuracy': 0.9453352769679301, 'eval_f1': 0.9436307861283311, 'eval_precision': 0.9467901159978599, 'eval_recall': 0.943816355738806, 'eval_runtime': 114.9849, 'eval_samples_per_second': 35.796, 'eval_steps_per_second': 1.122, 'epoch': 3.73}




{'loss': 0.3386, 'grad_norm': 21.59819221496582, 'learning_rate': 2.5500000000000003e-05, 'epoch': 3.81}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.20178444683551788, 'eval_accuracy': 0.9443634596695821, 'eval_f1': 0.9428295179020412, 'eval_precision': 0.9469455644494296, 'eval_recall': 0.9430155349321213, 'eval_runtime': 114.6708, 'eval_samples_per_second': 35.894, 'eval_steps_per_second': 1.125, 'epoch': 3.81}




{'loss': 0.3841, 'grad_norm': 21.11665153503418, 'learning_rate': 2.5e-05, 'epoch': 3.88}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.193311870098114, 'eval_accuracy': 0.9433916423712342, 'eval_f1': 0.9414012627871041, 'eval_precision': 0.9458002923284925, 'eval_recall': 0.94184618952753, 'eval_runtime': 114.9776, 'eval_samples_per_second': 35.798, 'eval_steps_per_second': 1.122, 'epoch': 3.88}




{'loss': 0.3174, 'grad_norm': 18.7007999420166, 'learning_rate': 2.45e-05, 'epoch': 3.96}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.19022010266780853, 'eval_accuracy': 0.9453352769679301, 'eval_f1': 0.9437864796087925, 'eval_precision': 0.9466327980959414, 'eval_recall': 0.9435855550407289, 'eval_runtime': 115.2406, 'eval_samples_per_second': 35.717, 'eval_steps_per_second': 1.119, 'epoch': 3.96}




{'loss': 0.2996, 'grad_norm': 17.607086181640625, 'learning_rate': 2.4e-05, 'epoch': 4.04}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.18879003822803497, 'eval_accuracy': 0.9465500485908649, 'eval_f1': 0.9453809424654497, 'eval_precision': 0.9497376727045288, 'eval_recall': 0.9459956280754142, 'eval_runtime': 115.2126, 'eval_samples_per_second': 35.725, 'eval_steps_per_second': 1.12, 'epoch': 4.04}




{'loss': 0.2879, 'grad_norm': 34.3960075378418, 'learning_rate': 2.35e-05, 'epoch': 4.12}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.18851810693740845, 'eval_accuracy': 0.9441205053449951, 'eval_f1': 0.9427902851201815, 'eval_precision': 0.9464331604533499, 'eval_recall': 0.9428457088438701, 'eval_runtime': 115.3928, 'eval_samples_per_second': 35.669, 'eval_steps_per_second': 1.118, 'epoch': 4.12}




{'loss': 0.3035, 'grad_norm': 27.399072647094727, 'learning_rate': 2.3000000000000003e-05, 'epoch': 4.19}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.1908504068851471, 'eval_accuracy': 0.9453352769679301, 'eval_f1': 0.9434051369389914, 'eval_precision': 0.9474717636272078, 'eval_recall': 0.9437481173747684, 'eval_runtime': 114.8582, 'eval_samples_per_second': 35.835, 'eval_steps_per_second': 1.123, 'epoch': 4.19}




{'loss': 0.2574, 'grad_norm': 21.96457290649414, 'learning_rate': 2.25e-05, 'epoch': 4.27}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.18855735659599304, 'eval_accuracy': 0.9453352769679301, 'eval_f1': 0.9427031026358901, 'eval_precision': 0.9475830137825754, 'eval_recall': 0.9438414557692797, 'eval_runtime': 114.8411, 'eval_samples_per_second': 35.841, 'eval_steps_per_second': 1.123, 'epoch': 4.27}




{'loss': 0.3219, 'grad_norm': 41.36893081665039, 'learning_rate': 2.2000000000000003e-05, 'epoch': 4.35}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.18892067670822144, 'eval_accuracy': 0.9433916423712342, 'eval_f1': 0.9411438682134082, 'eval_precision': 0.9461780674309639, 'eval_recall': 0.9417372009351842, 'eval_runtime': 115.4308, 'eval_samples_per_second': 35.658, 'eval_steps_per_second': 1.118, 'epoch': 4.35}




{'loss': 0.2827, 'grad_norm': 31.3144588470459, 'learning_rate': 2.15e-05, 'epoch': 4.43}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.18959985673427582, 'eval_accuracy': 0.9448493683187561, 'eval_f1': 0.9434954931712027, 'eval_precision': 0.9463643348204727, 'eval_recall': 0.9433684998867641, 'eval_runtime': 115.1487, 'eval_samples_per_second': 35.745, 'eval_steps_per_second': 1.12, 'epoch': 4.43}




{'loss': 0.2869, 'grad_norm': 20.795242309570312, 'learning_rate': 2.1e-05, 'epoch': 4.5}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.1945854127407074, 'eval_accuracy': 0.9443634596695821, 'eval_f1': 0.9429882368776638, 'eval_precision': 0.9458759338467805, 'eval_recall': 0.9427150271390136, 'eval_runtime': 115.0128, 'eval_samples_per_second': 35.787, 'eval_steps_per_second': 1.122, 'epoch': 4.5}




{'loss': 0.3442, 'grad_norm': 22.8905029296875, 'learning_rate': 2.05e-05, 'epoch': 4.58}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.18711623549461365, 'eval_accuracy': 0.9458211856171039, 'eval_f1': 0.9443975552551526, 'eval_precision': 0.9477411298392244, 'eval_recall': 0.9444669404930629, 'eval_runtime': 115.0342, 'eval_samples_per_second': 35.781, 'eval_steps_per_second': 1.121, 'epoch': 4.58}




{'loss': 0.2739, 'grad_norm': 18.282894134521484, 'learning_rate': 2e-05, 'epoch': 4.66}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.1880841702222824, 'eval_accuracy': 0.9441205053449951, 'eval_f1': 0.9415259481526164, 'eval_precision': 0.9469780431226001, 'eval_recall': 0.9421132519003071, 'eval_runtime': 114.6389, 'eval_samples_per_second': 35.904, 'eval_steps_per_second': 1.125, 'epoch': 4.66}




{'loss': 0.3067, 'grad_norm': 17.205751419067383, 'learning_rate': 1.9500000000000003e-05, 'epoch': 4.74}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.19253447651863098, 'eval_accuracy': 0.9475218658892128, 'eval_f1': 0.9455549198929877, 'eval_precision': 0.9498783733589569, 'eval_recall': 0.9455746598929079, 'eval_runtime': 114.8075, 'eval_samples_per_second': 35.851, 'eval_steps_per_second': 1.124, 'epoch': 4.74}




{'loss': 0.2674, 'grad_norm': 29.793489456176758, 'learning_rate': 1.9e-05, 'epoch': 4.82}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.19188550114631653, 'eval_accuracy': 0.9429057337220602, 'eval_f1': 0.9405066366498914, 'eval_precision': 0.9458056670586158, 'eval_recall': 0.9407893783001804, 'eval_runtime': 114.8903, 'eval_samples_per_second': 35.825, 'eval_steps_per_second': 1.123, 'epoch': 4.82}




{'loss': 0.3029, 'grad_norm': 40.88832473754883, 'learning_rate': 1.85e-05, 'epoch': 4.89}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.1870385855436325, 'eval_accuracy': 0.9446064139941691, 'eval_f1': 0.9419978871151995, 'eval_precision': 0.9467945730172027, 'eval_recall': 0.9425046516642015, 'eval_runtime': 115.0825, 'eval_samples_per_second': 35.766, 'eval_steps_per_second': 1.121, 'epoch': 4.89}




{'loss': 0.293, 'grad_norm': 25.430564880371094, 'learning_rate': 1.8e-05, 'epoch': 4.97}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.19138064980506897, 'eval_accuracy': 0.9421768707482994, 'eval_f1': 0.9398439773363093, 'eval_precision': 0.9443712235347143, 'eval_recall': 0.9402050138864034, 'eval_runtime': 114.8654, 'eval_samples_per_second': 35.833, 'eval_steps_per_second': 1.123, 'epoch': 4.97}




{'loss': 0.3242, 'grad_norm': 16.76936149597168, 'learning_rate': 1.75e-05, 'epoch': 5.05}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.1905665248632431, 'eval_accuracy': 0.9443634596695821, 'eval_f1': 0.9428469199634004, 'eval_precision': 0.9462565822668318, 'eval_recall': 0.9428831333486722, 'eval_runtime': 114.8538, 'eval_samples_per_second': 35.837, 'eval_steps_per_second': 1.123, 'epoch': 5.05}




{'loss': 0.3302, 'grad_norm': 30.843332290649414, 'learning_rate': 1.7000000000000003e-05, 'epoch': 5.13}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.18933725357055664, 'eval_accuracy': 0.9453352769679301, 'eval_f1': 0.9436571818715118, 'eval_precision': 0.9467108586574963, 'eval_recall': 0.9438527768448465, 'eval_runtime': 115.1185, 'eval_samples_per_second': 35.754, 'eval_steps_per_second': 1.121, 'epoch': 5.13}




{'loss': 0.2754, 'grad_norm': 20.851177215576172, 'learning_rate': 1.65e-05, 'epoch': 5.2}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.1859486699104309, 'eval_accuracy': 0.9470359572400389, 'eval_f1': 0.9452360577527517, 'eval_precision': 0.948884107004187, 'eval_recall': 0.9453068178693637, 'eval_runtime': 114.9667, 'eval_samples_per_second': 35.802, 'eval_steps_per_second': 1.122, 'epoch': 5.2}




{'loss': 0.2794, 'grad_norm': 32.60487747192383, 'learning_rate': 1.6000000000000003e-05, 'epoch': 5.28}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.18755145370960236, 'eval_accuracy': 0.9458211856171039, 'eval_f1': 0.9441062093968957, 'eval_precision': 0.9472886051353733, 'eval_recall': 0.9442015299233748, 'eval_runtime': 114.8785, 'eval_samples_per_second': 35.829, 'eval_steps_per_second': 1.123, 'epoch': 5.28}




{'loss': 0.3015, 'grad_norm': 23.714696884155273, 'learning_rate': 1.55e-05, 'epoch': 5.36}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.1870153546333313, 'eval_accuracy': 0.9463070942662779, 'eval_f1': 0.9450064660883716, 'eval_precision': 0.9480599743441597, 'eval_recall': 0.9451215682039763, 'eval_runtime': 115.2137, 'eval_samples_per_second': 35.725, 'eval_steps_per_second': 1.12, 'epoch': 5.36}




{'loss': 0.2741, 'grad_norm': 26.103408813476562, 'learning_rate': 1.5e-05, 'epoch': 5.44}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.1891375035047531, 'eval_accuracy': 0.9426627793974732, 'eval_f1': 0.941466297473656, 'eval_precision': 0.9446859437408165, 'eval_recall': 0.9414389371088902, 'eval_runtime': 114.8799, 'eval_samples_per_second': 35.829, 'eval_steps_per_second': 1.123, 'epoch': 5.44}




{'loss': 0.2856, 'grad_norm': 24.357542037963867, 'learning_rate': 1.45e-05, 'epoch': 5.51}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.18975676596164703, 'eval_accuracy': 0.9455782312925171, 'eval_f1': 0.943872222215918, 'eval_precision': 0.946982737270694, 'eval_recall': 0.9438607151950139, 'eval_runtime': 114.6348, 'eval_samples_per_second': 35.905, 'eval_steps_per_second': 1.125, 'epoch': 5.51}




{'loss': 0.2869, 'grad_norm': 17.85304069519043, 'learning_rate': 1.4000000000000001e-05, 'epoch': 5.59}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.1900128573179245, 'eval_accuracy': 0.9463070942662779, 'eval_f1': 0.9449003574138755, 'eval_precision': 0.9485287191510329, 'eval_recall': 0.9447589201115735, 'eval_runtime': 115.1085, 'eval_samples_per_second': 35.758, 'eval_steps_per_second': 1.121, 'epoch': 5.59}




{'loss': 0.2874, 'grad_norm': 23.500152587890625, 'learning_rate': 1.3500000000000001e-05, 'epoch': 5.67}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.19259242713451385, 'eval_accuracy': 0.9458211856171039, 'eval_f1': 0.9434055925300381, 'eval_precision': 0.9488706777885639, 'eval_recall': 0.9439478824344302, 'eval_runtime': 114.9474, 'eval_samples_per_second': 35.808, 'eval_steps_per_second': 1.122, 'epoch': 5.67}




{'loss': 0.1988, 'grad_norm': 22.595317840576172, 'learning_rate': 1.3000000000000001e-05, 'epoch': 5.75}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.1882985234260559, 'eval_accuracy': 0.9453352769679301, 'eval_f1': 0.9426854260794502, 'eval_precision': 0.9469339918302618, 'eval_recall': 0.9433031282032139, 'eval_runtime': 115.1264, 'eval_samples_per_second': 35.752, 'eval_steps_per_second': 1.121, 'epoch': 5.75}




{'loss': 0.2644, 'grad_norm': 28.592859268188477, 'learning_rate': 1.25e-05, 'epoch': 5.83}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.18950645625591278, 'eval_accuracy': 0.9472789115646258, 'eval_f1': 0.9448028291783778, 'eval_precision': 0.9493876702823437, 'eval_recall': 0.9454785004855722, 'eval_runtime': 115.0675, 'eval_samples_per_second': 35.77, 'eval_steps_per_second': 1.121, 'epoch': 5.83}




{'loss': 0.2641, 'grad_norm': 31.456523895263672, 'learning_rate': 1.2e-05, 'epoch': 5.9}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.1931435912847519, 'eval_accuracy': 0.9438775510204082, 'eval_f1': 0.941433386425573, 'eval_precision': 0.9465851411488619, 'eval_recall': 0.9421179108709055, 'eval_runtime': 114.3837, 'eval_samples_per_second': 35.984, 'eval_steps_per_second': 1.128, 'epoch': 5.9}




{'loss': 0.2391, 'grad_norm': 36.5975227355957, 'learning_rate': 1.1500000000000002e-05, 'epoch': 5.98}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.19245532155036926, 'eval_accuracy': 0.9438775510204082, 'eval_f1': 0.9413665363647609, 'eval_precision': 0.9459785586190361, 'eval_recall': 0.942074546074078, 'eval_runtime': 114.3684, 'eval_samples_per_second': 35.989, 'eval_steps_per_second': 1.128, 'epoch': 5.98}




{'loss': 0.2601, 'grad_norm': 42.9268913269043, 'learning_rate': 1.1000000000000001e-05, 'epoch': 6.06}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.19217662513256073, 'eval_accuracy': 0.9465500485908649, 'eval_f1': 0.9446123481197052, 'eval_precision': 0.9485415624396621, 'eval_recall': 0.9450066690417543, 'eval_runtime': 114.6574, 'eval_samples_per_second': 35.898, 'eval_steps_per_second': 1.125, 'epoch': 6.06}




{'loss': 0.2499, 'grad_norm': 32.35667419433594, 'learning_rate': 1.05e-05, 'epoch': 6.14}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.19207020103931427, 'eval_accuracy': 0.9460641399416909, 'eval_f1': 0.9442995122333805, 'eval_precision': 0.9479958854083244, 'eval_recall': 0.9443226322813668, 'eval_runtime': 114.6419, 'eval_samples_per_second': 35.903, 'eval_steps_per_second': 1.125, 'epoch': 6.14}




{'loss': 0.264, 'grad_norm': 37.09341049194336, 'learning_rate': 1e-05, 'epoch': 6.21}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.18772649765014648, 'eval_accuracy': 0.9465500485908649, 'eval_f1': 0.9449748257567703, 'eval_precision': 0.9479318204339404, 'eval_recall': 0.9450885190747927, 'eval_runtime': 115.0287, 'eval_samples_per_second': 35.782, 'eval_steps_per_second': 1.121, 'epoch': 6.21}




{'loss': 0.2523, 'grad_norm': 23.960466384887695, 'learning_rate': 9.5e-06, 'epoch': 6.29}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.18751277029514313, 'eval_accuracy': 0.9467930029154519, 'eval_f1': 0.9452585905069442, 'eval_precision': 0.9482655680231242, 'eval_recall': 0.9454911591930526, 'eval_runtime': 115.3611, 'eval_samples_per_second': 35.679, 'eval_steps_per_second': 1.118, 'epoch': 6.29}




{'loss': 0.2406, 'grad_norm': 24.072086334228516, 'learning_rate': 9e-06, 'epoch': 6.37}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.18802115321159363, 'eval_accuracy': 0.9494655004859086, 'eval_f1': 0.9477070574852993, 'eval_precision': 0.9515929762382033, 'eval_recall': 0.9480766180643243, 'eval_runtime': 114.8757, 'eval_samples_per_second': 35.83, 'eval_steps_per_second': 1.123, 'epoch': 6.37}




{'loss': 0.2749, 'grad_norm': 20.991914749145508, 'learning_rate': 8.500000000000002e-06, 'epoch': 6.45}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.1885104477405548, 'eval_accuracy': 0.9465500485908649, 'eval_f1': 0.9447709867884129, 'eval_precision': 0.9482607163831029, 'eval_recall': 0.9450556842845805, 'eval_runtime': 114.6381, 'eval_samples_per_second': 35.904, 'eval_steps_per_second': 1.125, 'epoch': 6.45}




{'loss': 0.2702, 'grad_norm': 72.34149932861328, 'learning_rate': 8.000000000000001e-06, 'epoch': 6.52}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.1884743869304657, 'eval_accuracy': 0.9467930029154519, 'eval_f1': 0.9450964819057274, 'eval_precision': 0.9482242985135603, 'eval_recall': 0.9454769319797258, 'eval_runtime': 115.2162, 'eval_samples_per_second': 35.724, 'eval_steps_per_second': 1.12, 'epoch': 6.52}




{'loss': 0.2482, 'grad_norm': 17.251073837280273, 'learning_rate': 7.5e-06, 'epoch': 6.6}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.18626774847507477, 'eval_accuracy': 0.9475218658892128, 'eval_f1': 0.9460705692945173, 'eval_precision': 0.949297813063323, 'eval_recall': 0.9463759071528689, 'eval_runtime': 114.6516, 'eval_samples_per_second': 35.9, 'eval_steps_per_second': 1.125, 'epoch': 6.6}




{'loss': 0.2403, 'grad_norm': 19.474308013916016, 'learning_rate': 7.000000000000001e-06, 'epoch': 6.68}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.1897239238023758, 'eval_accuracy': 0.9470359572400389, 'eval_f1': 0.9450683372184614, 'eval_precision': 0.9496556504600578, 'eval_recall': 0.9453010975584315, 'eval_runtime': 114.474, 'eval_samples_per_second': 35.956, 'eval_steps_per_second': 1.127, 'epoch': 6.68}




{'loss': 0.2509, 'grad_norm': 33.18558883666992, 'learning_rate': 6.5000000000000004e-06, 'epoch': 6.76}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.19059084355831146, 'eval_accuracy': 0.9482507288629738, 'eval_f1': 0.9461830780670489, 'eval_precision': 0.9507805867016828, 'eval_recall': 0.9464756298884152, 'eval_runtime': 114.5318, 'eval_samples_per_second': 35.938, 'eval_steps_per_second': 1.126, 'epoch': 6.76}




{'loss': 0.2689, 'grad_norm': 18.298492431640625, 'learning_rate': 6e-06, 'epoch': 6.83}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.18674452602863312, 'eval_accuracy': 0.9484936831875608, 'eval_f1': 0.9459116119918208, 'eval_precision': 0.9506477204257867, 'eval_recall': 0.9466156759052429, 'eval_runtime': 114.3375, 'eval_samples_per_second': 35.999, 'eval_steps_per_second': 1.128, 'epoch': 6.83}




{'loss': 0.2159, 'grad_norm': 18.049999237060547, 'learning_rate': 5.500000000000001e-06, 'epoch': 6.91}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.18661876022815704, 'eval_accuracy': 0.9484936831875608, 'eval_f1': 0.946438620230781, 'eval_precision': 0.9503522868270984, 'eval_recall': 0.9467846203446506, 'eval_runtime': 115.019, 'eval_samples_per_second': 35.785, 'eval_steps_per_second': 1.122, 'epoch': 6.91}




{'loss': 0.2488, 'grad_norm': 35.76055908203125, 'learning_rate': 5e-06, 'epoch': 6.99}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.1865723431110382, 'eval_accuracy': 0.9460641399416909, 'eval_f1': 0.9434604111462372, 'eval_precision': 0.9481526440603574, 'eval_recall': 0.9443130519259704, 'eval_runtime': 114.4672, 'eval_samples_per_second': 35.958, 'eval_steps_per_second': 1.127, 'epoch': 6.99}




{'loss': 0.2366, 'grad_norm': 42.35475158691406, 'learning_rate': 4.5e-06, 'epoch': 7.07}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.1870640069246292, 'eval_accuracy': 0.9448493683187561, 'eval_f1': 0.9422096792950079, 'eval_precision': 0.9463818135151401, 'eval_recall': 0.9429973410538078, 'eval_runtime': 114.4117, 'eval_samples_per_second': 35.975, 'eval_steps_per_second': 1.128, 'epoch': 7.07}




{'loss': 0.2602, 'grad_norm': 16.04122543334961, 'learning_rate': 4.000000000000001e-06, 'epoch': 7.15}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.18540120124816895, 'eval_accuracy': 0.9465500485908649, 'eval_f1': 0.9440799651911433, 'eval_precision': 0.9482822745243968, 'eval_recall': 0.9447404023533209, 'eval_runtime': 115.0761, 'eval_samples_per_second': 35.768, 'eval_steps_per_second': 1.121, 'epoch': 7.15}




{'loss': 0.2236, 'grad_norm': 19.42647361755371, 'learning_rate': 3.5000000000000004e-06, 'epoch': 7.22}


  0%|          | 0/129 [00:00<?, ?it/s]

{'eval_loss': 0.18591512739658356, 'eval_accuracy': 0.9453352769679301, 'eval_f1': 0.9429143190138815, 'eval_precision': 0.9467375634808105, 'eval_recall': 0.9436323198742707, 'eval_runtime': 122.2756, 'eval_samples_per_second': 33.662, 'eval_steps_per_second': 1.055, 'epoch': 7.22}




In [1]:
mlflow.end_run()

NameError: name 'mlflow' is not defined

In [None]:
# training_args = TrainingArguments(**CONFIG["training_args"])

# image_processor = AutoImageProcessor.from_pretrained(MODELS)
# model = AutoModelForImageClassification.from_pretrained(
# MODELS,
# num_labels=len(CONFIG["label2int"]),
# id2label=CONFIG["label2int"],
# label2id=CONFIG["int2label"],
# ignore_mismatched_sizes=True,
# )


# training_args = TrainingArguments(**CONFIG["training_args"])

# trainer = Trainer(
#     model=model,
#     args=training_args,
#     train_dataset=ds["train"],
#     eval_dataset=ds["test"],
#     tokenizer=image_processor,
#     data_collator=collate_fn,
#     compute_metrics=compute_metrics,
#     # callbacks=[early_stopping_callback],
#     preprocess_logits_for_metrics=preprocess_logits_for_metrics,
# )

# # Train the model
# trainer.train()

# mlflow.end_run()