- attn_layer_mapper=all, attn_loss_fn=cos, attn_projector=orthogonal, attn_weight=5
- attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=orthogonal, attn_weight=25.0
- attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_projector=orthogonal, attn_weight=5
- attn_layer_mapper=all, attn_loss_fn=raw_mse, attn_weight=5, projector=orthogonal
- attn_layer_mapper=last, attn_loss_fn=raw_mse, attn_projector=orthogonal, attn_weight=25.0
- attn_layer_mapper=last, attn_loss_fn=raw_mse, attn_projector=orthogonal, attn_weight=5
- attn_layer_mapper=last_k_2, attn_loss_fn=raw_mse, attn_projector=orthogonal, attn_weight=25.0
- attn_layer_mapper=last_k_2, attn_loss_fn=raw_mse, attn_projector=orthogonal, attn_weight=5
- attn_layer_mapper=layer-2, attn_loss_fn=raw_mse, attn_projector=orthogonal, attn_weight=25.0
- attn_layer_mapper=layer-2, attn_loss_fn=raw_mse, attn_projector=orthogonal, attn_weight=5