|
--- |
|
library_name: peft |
|
base_model: meta-llama/Llama-2-13b-chat-hf |
|
--- |
|
|
|
# Model Card for Model ID |
|
|
|
<!-- Provide a quick summary of what the model is/does. --> |
|
|
|
```text |
|
{ |
|
"alpha_pattern": {}, |
|
"auto_mapping": null, |
|
"base_model_name_or_path": "meta-llama/Llama-2-13b-chat-hf", |
|
"bias": "none", |
|
"fan_in_fan_out": false, |
|
"inference_mode": true, |
|
"init_lora_weights": true, |
|
"layers_pattern": null, |
|
"layers_to_transform": null, |
|
"lora_alpha": 16, |
|
"lora_dropout": 0.05, |
|
"modules_to_save": null, |
|
"peft_type": "LORA", |
|
"r": 8, |
|
"rank_pattern": {}, |
|
"revision": null, |
|
"target_modules": [ |
|
"q_proj", |
|
"v_proj" |
|
], |
|
"task_type": "CAUSAL_LM" |
|
} |
|
``` |
|
|
|
```text |
|
{ |
|
"best_metric": 0.05725787207484245, |
|
"best_model_checkpoint": "./Lora-Meta-Llama2-13b-chat-hf-QandA_2g_v01-v07\\checkpoint-80440", |
|
"epoch": 40.0, |
|
"eval_steps": 500, |
|
"global_step": 80440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.875e-05, |
|
"loss": 0.7946, |
|
"step": 2011 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.6483868360519409, |
|
"eval_runtime": 69.9926, |
|
"eval_samples_per_second": 5.758, |
|
"eval_steps_per_second": 0.729, |
|
"step": 2011 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.5829, |
|
"step": 4022 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.5402843356132507, |
|
"eval_runtime": 70.0086, |
|
"eval_samples_per_second": 5.756, |
|
"eval_steps_per_second": 0.728, |
|
"step": 4022 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.6250000000000006e-05, |
|
"loss": 0.4918, |
|
"step": 6033 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.45011985301971436, |
|
"eval_runtime": 70.1392, |
|
"eval_samples_per_second": 5.746, |
|
"eval_steps_per_second": 0.727, |
|
"step": 6033 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.419, |
|
"step": 8044 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.38660600781440735, |
|
"eval_runtime": 70.1321, |
|
"eval_samples_per_second": 5.746, |
|
"eval_steps_per_second": 0.727, |
|
"step": 8044 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 4.375e-05, |
|
"loss": 0.3644, |
|
"step": 10055 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.33983489871025085, |
|
"eval_runtime": 69.9916, |
|
"eval_samples_per_second": 5.758, |
|
"eval_steps_per_second": 0.729, |
|
"step": 10055 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.3211, |
|
"step": 12066 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.3004438281059265, |
|
"eval_runtime": 70.0768, |
|
"eval_samples_per_second": 5.751, |
|
"eval_steps_per_second": 0.728, |
|
"step": 12066 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 4.125e-05, |
|
"loss": 0.2854, |
|
"step": 14077 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 0.2634061574935913, |
|
"eval_runtime": 70.2069, |
|
"eval_samples_per_second": 5.74, |
|
"eval_steps_per_second": 0.726, |
|
"step": 14077 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 4e-05, |
|
"loss": 0.255, |
|
"step": 16088 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 0.23876915872097015, |
|
"eval_runtime": 70.1721, |
|
"eval_samples_per_second": 5.743, |
|
"eval_steps_per_second": 0.727, |
|
"step": 16088 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 3.875e-05, |
|
"loss": 0.2281, |
|
"step": 18099 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 0.21539266407489777, |
|
"eval_runtime": 70.1355, |
|
"eval_samples_per_second": 5.746, |
|
"eval_steps_per_second": 0.727, |
|
"step": 18099 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.2052, |
|
"step": 20110 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 0.18904653191566467, |
|
"eval_runtime": 70.1349, |
|
"eval_samples_per_second": 5.746, |
|
"eval_steps_per_second": 0.727, |
|
"step": 20110 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 3.625e-05, |
|
"loss": 0.1853, |
|
"step": 22121 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 0.17202098667621613, |
|
"eval_runtime": 69.9829, |
|
"eval_samples_per_second": 5.759, |
|
"eval_steps_per_second": 0.729, |
|
"step": 22121 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.1673, |
|
"step": 24132 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 0.15875761210918427, |
|
"eval_runtime": 70.1596, |
|
"eval_samples_per_second": 5.744, |
|
"eval_steps_per_second": 0.727, |
|
"step": 24132 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 3.375000000000001e-05, |
|
"loss": 0.1526, |
|
"step": 26143 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 0.14447805285453796, |
|
"eval_runtime": 70.1252, |
|
"eval_samples_per_second": 5.747, |
|
"eval_steps_per_second": 0.727, |
|
"step": 26143 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.1398, |
|
"step": 28154 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 0.13342420756816864, |
|
"eval_runtime": 70.1196, |
|
"eval_samples_per_second": 5.747, |
|
"eval_steps_per_second": 0.727, |
|
"step": 28154 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 3.125e-05, |
|
"loss": 0.1285, |
|
"step": 30165 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 0.12114470452070236, |
|
"eval_runtime": 70.2112, |
|
"eval_samples_per_second": 5.74, |
|
"eval_steps_per_second": 0.726, |
|
"step": 30165 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1187, |
|
"step": 32176 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 0.11447372287511826, |
|
"eval_runtime": 70.1257, |
|
"eval_samples_per_second": 5.747, |
|
"eval_steps_per_second": 0.727, |
|
"step": 32176 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 2.8749999999999997e-05, |
|
"loss": 0.1104, |
|
"step": 34187 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 0.10539893060922623, |
|
"eval_runtime": 70.1826, |
|
"eval_samples_per_second": 5.742, |
|
"eval_steps_per_second": 0.727, |
|
"step": 34187 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.1038, |
|
"step": 36198 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 0.09906744956970215, |
|
"eval_runtime": 70.117, |
|
"eval_samples_per_second": 5.748, |
|
"eval_steps_per_second": 0.727, |
|
"step": 36198 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 2.625e-05, |
|
"loss": 0.0974, |
|
"step": 38209 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 0.09452048689126968, |
|
"eval_runtime": 70.1925, |
|
"eval_samples_per_second": 5.741, |
|
"eval_steps_per_second": 0.727, |
|
"step": 38209 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.0927, |
|
"step": 40220 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 0.09014962613582611, |
|
"eval_runtime": 69.9849, |
|
"eval_samples_per_second": 5.758, |
|
"eval_steps_per_second": 0.729, |
|
"step": 40220 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"learning_rate": 2.375e-05, |
|
"loss": 0.0878, |
|
"step": 42231 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_loss": 0.08503083884716034, |
|
"eval_runtime": 70.1728, |
|
"eval_samples_per_second": 5.743, |
|
"eval_steps_per_second": 0.727, |
|
"step": 42231 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.0838, |
|
"step": 44242 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_loss": 0.0820975974202156, |
|
"eval_runtime": 70.0791, |
|
"eval_samples_per_second": 5.751, |
|
"eval_steps_per_second": 0.728, |
|
"step": 44242 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"learning_rate": 2.125e-05, |
|
"loss": 0.0801, |
|
"step": 46253 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_loss": 0.0777197927236557, |
|
"eval_runtime": 69.9961, |
|
"eval_samples_per_second": 5.757, |
|
"eval_steps_per_second": 0.729, |
|
"step": 46253 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0775, |
|
"step": 48264 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_loss": 0.0748789981007576, |
|
"eval_runtime": 69.905, |
|
"eval_samples_per_second": 5.765, |
|
"eval_steps_per_second": 0.73, |
|
"step": 48264 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 1.8750000000000002e-05, |
|
"loss": 0.0751, |
|
"step": 50275 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_loss": 0.0729849636554718, |
|
"eval_runtime": 70.0915, |
|
"eval_samples_per_second": 5.75, |
|
"eval_steps_per_second": 0.728, |
|
"step": 50275 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.0727, |
|
"step": 52286 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_loss": 0.0698952004313469, |
|
"eval_runtime": 70.0781, |
|
"eval_samples_per_second": 5.751, |
|
"eval_steps_per_second": 0.728, |
|
"step": 52286 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"learning_rate": 1.6250000000000002e-05, |
|
"loss": 0.0706, |
|
"step": 54297 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_loss": 0.06760543584823608, |
|
"eval_runtime": 69.9618, |
|
"eval_samples_per_second": 5.76, |
|
"eval_steps_per_second": 0.729, |
|
"step": 54297 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.0691, |
|
"step": 56308 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_loss": 0.06610006093978882, |
|
"eval_runtime": 70.1085, |
|
"eval_samples_per_second": 5.748, |
|
"eval_steps_per_second": 0.727, |
|
"step": 56308 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"learning_rate": 1.3750000000000002e-05, |
|
"loss": 0.0678, |
|
"step": 58319 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_loss": 0.06433883309364319, |
|
"eval_runtime": 70.1363, |
|
"eval_samples_per_second": 5.746, |
|
"eval_steps_per_second": 0.727, |
|
"step": 58319 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.0666, |
|
"step": 60330 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_loss": 0.06277326494455338, |
|
"eval_runtime": 70.0925, |
|
"eval_samples_per_second": 5.75, |
|
"eval_steps_per_second": 0.728, |
|
"step": 60330 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"learning_rate": 1.125e-05, |
|
"loss": 0.0652, |
|
"step": 62341 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_loss": 0.06192418932914734, |
|
"eval_runtime": 69.9357, |
|
"eval_samples_per_second": 5.762, |
|
"eval_steps_per_second": 0.729, |
|
"step": 62341 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0644, |
|
"step": 64352 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_loss": 0.0610126368701458, |
|
"eval_runtime": 70.061, |
|
"eval_samples_per_second": 5.752, |
|
"eval_steps_per_second": 0.728, |
|
"step": 64352 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"learning_rate": 8.75e-06, |
|
"loss": 0.0635, |
|
"step": 66363 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_loss": 0.060028236359357834, |
|
"eval_runtime": 69.9253, |
|
"eval_samples_per_second": 5.763, |
|
"eval_steps_per_second": 0.729, |
|
"step": 66363 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.0629, |
|
"step": 68374 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_loss": 0.05925382673740387, |
|
"eval_runtime": 69.9042, |
|
"eval_samples_per_second": 5.765, |
|
"eval_steps_per_second": 0.73, |
|
"step": 68374 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.0622, |
|
"step": 70385 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_loss": 0.05860263481736183, |
|
"eval_runtime": 69.8706, |
|
"eval_samples_per_second": 5.768, |
|
"eval_steps_per_second": 0.73, |
|
"step": 70385 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0616, |
|
"step": 72396 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_loss": 0.05808304622769356, |
|
"eval_runtime": 69.9999, |
|
"eval_samples_per_second": 5.757, |
|
"eval_steps_per_second": 0.729, |
|
"step": 72396 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"learning_rate": 3.75e-06, |
|
"loss": 0.061, |
|
"step": 74407 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_loss": 0.057825859636068344, |
|
"eval_runtime": 69.9835, |
|
"eval_samples_per_second": 5.758, |
|
"eval_steps_per_second": 0.729, |
|
"step": 74407 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.0605, |
|
"step": 76418 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_loss": 0.057523321360349655, |
|
"eval_runtime": 69.9943, |
|
"eval_samples_per_second": 5.758, |
|
"eval_steps_per_second": 0.729, |
|
"step": 76418 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"learning_rate": 1.25e-06, |
|
"loss": 0.06, |
|
"step": 78429 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_loss": 0.05731285735964775, |
|
"eval_runtime": 70.0036, |
|
"eval_samples_per_second": 5.757, |
|
"eval_steps_per_second": 0.729, |
|
"step": 78429 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.0595, |
|
"step": 80440 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_loss": 0.05725787207484245, |
|
"eval_runtime": 69.9176, |
|
"eval_samples_per_second": 5.764, |
|
"eval_steps_per_second": 0.729, |
|
"step": 80440 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 80440, |
|
"num_train_epochs": 40, |
|
"save_steps": 500, |
|
"total_flos": 9.118285061492736e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
``` |
|
|
|
## Model Details |
|
|
|
### Model Description |
|
|
|
<!-- Provide a longer summary of what this model is. --> |
|
|
|
|
|
|
|
- **Developed by:** [More Information Needed] |
|
- **Funded by [optional]:** [More Information Needed] |
|
- **Shared by [optional]:** [More Information Needed] |
|
- **Model type:** [More Information Needed] |
|
- **Language(s) (NLP):** [More Information Needed] |
|
- **License:** [More Information Needed] |
|
- **Finetuned from model [optional]:** [More Information Needed] |
|
|
|
### Model Sources [optional] |
|
|
|
<!-- Provide the basic links for the model. --> |
|
|
|
- **Repository:** [More Information Needed] |
|
- **Paper [optional]:** [More Information Needed] |
|
- **Demo [optional]:** [More Information Needed] |
|
|
|
## Uses |
|
|
|
<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. --> |
|
|
|
### Direct Use |
|
|
|
<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. --> |
|
|
|
[More Information Needed] |
|
|
|
### Downstream Use [optional] |
|
|
|
<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app --> |
|
|
|
[More Information Needed] |
|
|
|
### Out-of-Scope Use |
|
|
|
<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. --> |
|
|
|
[More Information Needed] |
|
|
|
## Bias, Risks, and Limitations |
|
|
|
<!-- This section is meant to convey both technical and sociotechnical limitations. --> |
|
|
|
[More Information Needed] |
|
|
|
### Recommendations |
|
|
|
<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. --> |
|
|
|
Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. |
|
|
|
## How to Get Started with the Model |
|
|
|
Use the code below to get started with the model. |
|
|
|
[More Information Needed] |
|
|
|
## Training Details |
|
|
|
### Training Data |
|
|
|
<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. --> |
|
|
|
[More Information Needed] |
|
|
|
### Training Procedure |
|
|
|
<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. --> |
|
|
|
#### Preprocessing [optional] |
|
|
|
[More Information Needed] |
|
|
|
|
|
#### Training Hyperparameters |
|
|
|
- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision --> |
|
|
|
#### Speeds, Sizes, Times [optional] |
|
|
|
<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. --> |
|
|
|
[More Information Needed] |
|
|
|
## Evaluation |
|
|
|
<!-- This section describes the evaluation protocols and provides the results. --> |
|
|
|
### Testing Data, Factors & Metrics |
|
|
|
#### Testing Data |
|
|
|
<!-- This should link to a Dataset Card if possible. --> |
|
|
|
[More Information Needed] |
|
|
|
#### Factors |
|
|
|
<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. --> |
|
|
|
[More Information Needed] |
|
|
|
#### Metrics |
|
|
|
<!-- These are the evaluation metrics being used, ideally with a description of why. --> |
|
|
|
[More Information Needed] |
|
|
|
### Results |
|
|
|
[More Information Needed] |
|
|
|
#### Summary |
|
|
|
|
|
|
|
## Model Examination [optional] |
|
|
|
<!-- Relevant interpretability work for the model goes here --> |
|
|
|
[More Information Needed] |
|
|
|
## Environmental Impact |
|
|
|
<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly --> |
|
|
|
Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). |
|
|
|
- **Hardware Type:** [More Information Needed] |
|
- **Hours used:** [More Information Needed] |
|
- **Cloud Provider:** [More Information Needed] |
|
- **Compute Region:** [More Information Needed] |
|
- **Carbon Emitted:** [More Information Needed] |
|
|
|
## Technical Specifications [optional] |
|
|
|
### Model Architecture and Objective |
|
|
|
[More Information Needed] |
|
|
|
### Compute Infrastructure |
|
|
|
[More Information Needed] |
|
|
|
#### Hardware |
|
|
|
[More Information Needed] |
|
|
|
#### Software |
|
|
|
[More Information Needed] |
|
|
|
## Citation [optional] |
|
|
|
<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. --> |
|
|
|
**BibTeX:** |
|
|
|
[More Information Needed] |
|
|
|
**APA:** |
|
|
|
[More Information Needed] |
|
|
|
## Glossary [optional] |
|
|
|
<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. --> |
|
|
|
[More Information Needed] |
|
|
|
## More Information [optional] |
|
|
|
[More Information Needed] |
|
|
|
## Model Card Authors [optional] |
|
|
|
[More Information Needed] |
|
|
|
## Model Card Contact |
|
|
|
[More Information Needed] |
|
|
|
|
|
## Training procedure |
|
|
|
|
|
The following `bitsandbytes` quantization config was used during training: |
|
- quant_method: bitsandbytes |
|
- load_in_8bit: False |
|
- load_in_4bit: True |
|
- llm_int8_threshold: 6.0 |
|
- llm_int8_skip_modules: None |
|
- llm_int8_enable_fp32_cpu_offload: False |
|
- llm_int8_has_fp16_weight: False |
|
- bnb_4bit_quant_type: nf4 |
|
- bnb_4bit_use_double_quant: True |
|
- bnb_4bit_compute_dtype: float16 |
|
|
|
### Framework versions |
|
|
|
|
|
- PEFT 0.6.2 |
|
|