End of training
787bb85
verified
-
attn_norm=None, attn_projector=mlp, attn_weight=25, learning_rate=0.0001, per_device_train_batch_size=4, warmup_ratio=0
Training in progress, step 99000
-
attn_norm=None, attn_projector=mlp, attn_weight=25, learning_rate=0.0002, per_device_train_batch_size=4, warmup_ratio=0
Training in progress, step 198000
-
attn_norm=None, attn_projector=mlp, attn_weight=5, learning_rate=0.0001, per_device_train_batch_size=4, warmup_ratio=0
Training in progress, step 99000
-
attn_norm=None, attn_projector=mlp, attn_weight=5, learning_rate=0.0002, per_device_train_batch_size=4, warmup_ratio=0
Training in progress, step 99000
-
attn_norm=None, attn_projector=orthogonal, attn_weight=25, learning_rate=0.0001, per_device_train_batch_size=4, warmup_ratio=0.2
Training in progress, step 99000
-
attn_norm=None, attn_projector=orthogonal, attn_weight=25, learning_rate=0.0001, per_device_train_batch_size=4, warmup_ratio=0
Training in progress, step 115000
-
attn_norm=None, attn_projector=orthogonal, attn_weight=25, learning_rate=0.0002, per_device_train_batch_size=4, warmup_ratio=0.2
End of training
-
attn_norm=None, attn_projector=orthogonal, attn_weight=25, learning_rate=0.0002, per_device_train_batch_size=4, warmup_ratio=0
Training in progress, step 99000
-
attn_norm=None, attn_projector=orthogonal, attn_weight=25, learning_rate=0.0004, per_device_train_batch_size=4, warmup_ratio=0
Training in progress, step 99000
-
attn_norm=None, attn_projector=orthogonal, attn_weight=5, learning_rate=0.0001, per_device_train_batch_size=4, warmup_ratio=0.2
Training in progress, step 99000
-
attn_norm=None, attn_projector=orthogonal, attn_weight=5, learning_rate=0.0001, per_device_train_batch_size=4, warmup_ratio=0
Training in progress, step 60000
-
attn_norm=None, attn_projector=orthogonal, attn_weight=5, learning_rate=0.0002, per_device_train_batch_size=4, warmup_ratio=0.2
Training in progress, step 99000
-
attn_norm=None, attn_projector=orthogonal, attn_weight=5, learning_rate=0.0002, per_device_train_batch_size=4, warmup_ratio=0
Training in progress, step 99000
-
attn_norm=None, attn_projector=orthogonal, attn_weight=5, learning_rate=0.0004, per_device_train_batch_size=4, warmup_ratio=0
Training in progress, step 99000
-
attn_norm=layernorm, attn_projector=mlp, attn_weight=25, learning_rate=0.0001, per_device_train_batch_size=2, warmup_ratio=0
End of training
-
attn_norm=layernorm, attn_projector=mlp, attn_weight=25, learning_rate=0.0001, per_device_train_batch_size=4, warmup_ratio=0.2
Training in progress, step 99000
-
attn_norm=layernorm, attn_projector=mlp, attn_weight=25, learning_rate=0.0001, per_device_train_batch_size=4, warmup_ratio=0
Training in progress, step 99000
-
attn_norm=layernorm, attn_projector=mlp, attn_weight=25, learning_rate=0.0002, per_device_train_batch_size=4, warmup_ratio=0.2
End of training
-
attn_norm=layernorm, attn_projector=mlp, attn_weight=25, learning_rate=0.0002, per_device_train_batch_size=4, warmup_ratio=0
Training in progress, step 99000
-
attn_norm=layernorm, attn_projector=mlp, attn_weight=5, learning_rate=0.0001, per_device_train_batch_size=2, warmup_ratio=0
Training in progress, step 198000
-
attn_norm=layernorm, attn_projector=mlp, attn_weight=5, learning_rate=0.0001, per_device_train_batch_size=4, warmup_ratio=0.2
Training in progress, step 99000
-
attn_norm=layernorm, attn_projector=mlp, attn_weight=5, learning_rate=0.0001, per_device_train_batch_size=4, warmup_ratio=0
Training in progress, step 99000
-
attn_norm=layernorm, attn_projector=mlp, attn_weight=5, learning_rate=0.0002, per_device_train_batch_size=2, warmup_ratio=0
Training in progress, step 198000
-
attn_norm=layernorm, attn_projector=mlp, attn_weight=5, learning_rate=0.0002, per_device_train_batch_size=4, warmup_ratio=0.2
Training in progress, step 99000
-
attn_norm=layernorm, attn_projector=mlp, attn_weight=5, learning_rate=0.0002, per_device_train_batch_size=4, warmup_ratio=0
Training in progress, step 99000
-
attn_norm=layernorm, attn_projector=orthogonal, attn_weight=25, learning_rate=0.0001, per_device_train_batch_size=2, warmup_ratio=0
Training in progress, step 198000
-
attn_norm=layernorm, attn_projector=orthogonal, attn_weight=25, learning_rate=0.0001, per_device_train_batch_size=4, warmup_ratio=0.2
Training in progress, step 99000
-
attn_norm=layernorm, attn_projector=orthogonal, attn_weight=25, learning_rate=0.0001, per_device_train_batch_size=4, warmup_ratio=0
Training in progress, step 99000
-
attn_norm=layernorm, attn_projector=orthogonal, attn_weight=25, learning_rate=0.0002, per_device_train_batch_size=2, warmup_ratio=0
Training in progress, step 198000
-
attn_norm=layernorm, attn_projector=orthogonal, attn_weight=25, learning_rate=0.0002, per_device_train_batch_size=4, warmup_ratio=0.2
Training in progress, step 99000
-
attn_norm=layernorm, attn_projector=orthogonal, attn_weight=25, learning_rate=0.0002, per_device_train_batch_size=4, warmup_ratio=0
Training in progress, step 85000
-
attn_norm=layernorm, attn_projector=orthogonal, attn_weight=25, learning_rate=0.0004, per_device_train_batch_size=4, warmup_ratio=0
Training in progress, step 99000
-
attn_norm=layernorm, attn_projector=orthogonal, attn_weight=5, learning_rate=0.0001, per_device_train_batch_size=2, warmup_ratio=0
Training in progress, step 198000
-
attn_norm=layernorm, attn_projector=orthogonal, attn_weight=5, learning_rate=0.0001, per_device_train_batch_size=4, warmup_ratio=0.2
Training in progress, step 99000
-
attn_norm=layernorm, attn_projector=orthogonal, attn_weight=5, learning_rate=0.0001, per_device_train_batch_size=4, warmup_ratio=0
Training in progress, step 99000
-
attn_norm=layernorm, attn_projector=orthogonal, attn_weight=5, learning_rate=0.0002, per_device_train_batch_size=2, warmup_ratio=0
Training in progress, step 198000
-
attn_norm=layernorm, attn_projector=orthogonal, attn_weight=5, learning_rate=0.0002, per_device_train_batch_size=4, warmup_ratio=0.2
Training in progress, step 99000
-
attn_norm=layernorm, attn_projector=orthogonal, attn_weight=5, learning_rate=0.0002, per_device_train_batch_size=4, warmup_ratio=0
Training in progress, step 99000
-
attn_norm=layernorm, attn_projector=orthogonal, attn_weight=5, learning_rate=0.0004, per_device_train_batch_size=4, warmup_ratio=0
Training in progress, step 35000