Update README.md

ce923aa verified about 1 year ago

19.1 kB

	---
	library_name: peft
	base_model: meta-llama/Llama-2-13b-chat-hf
	---

	# Model Card for Model ID

	<!-- Provide a quick summary of what the model is/does. -->

	```text
	{
	"alpha_pattern": {},
	"auto_mapping": null,
	"base_model_name_or_path": "meta-llama/Llama-2-13b-chat-hf",
	"bias": "none",
	"fan_in_fan_out": false,
	"inference_mode": true,
	"init_lora_weights": true,
	"layers_pattern": null,
	"layers_to_transform": null,
	"lora_alpha": 16,
	"lora_dropout": 0.05,
	"modules_to_save": null,
	"peft_type": "LORA",
	"r": 8,
	"rank_pattern": {},
	"revision": null,
	"target_modules": [
	"q_proj",
	"v_proj"
	],
	"task_type": "CAUSAL_LM"
	}
	```

	```text
	{
	"best_metric": 0.05725787207484245,
	"best_model_checkpoint": "./Lora-Meta-Llama2-13b-chat-hf-QandA_2g_v01-v07\\checkpoint-80440",
	"epoch": 40.0,
	"eval_steps": 500,
	"global_step": 80440,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 1.0,
	"learning_rate": 4.875e-05,
	"loss": 0.7946,
	"step": 2011
	},
	{
	"epoch": 1.0,
	"eval_loss": 0.6483868360519409,
	"eval_runtime": 69.9926,
	"eval_samples_per_second": 5.758,
	"eval_steps_per_second": 0.729,
	"step": 2011
	},
	{
	"epoch": 2.0,
	"learning_rate": 4.75e-05,
	"loss": 0.5829,
	"step": 4022
	},
	{
	"epoch": 2.0,
	"eval_loss": 0.5402843356132507,
	"eval_runtime": 70.0086,
	"eval_samples_per_second": 5.756,
	"eval_steps_per_second": 0.728,
	"step": 4022
	},
	{
	"epoch": 3.0,
	"learning_rate": 4.6250000000000006e-05,
	"loss": 0.4918,
	"step": 6033
	},
	{
	"epoch": 3.0,
	"eval_loss": 0.45011985301971436,
	"eval_runtime": 70.1392,
	"eval_samples_per_second": 5.746,
	"eval_steps_per_second": 0.727,
	"step": 6033
	},
	{
	"epoch": 4.0,
	"learning_rate": 4.5e-05,
	"loss": 0.419,
	"step": 8044
	},
	{
	"epoch": 4.0,
	"eval_loss": 0.38660600781440735,
	"eval_runtime": 70.1321,
	"eval_samples_per_second": 5.746,
	"eval_steps_per_second": 0.727,
	"step": 8044
	},
	{
	"epoch": 5.0,
	"learning_rate": 4.375e-05,
	"loss": 0.3644,
	"step": 10055
	},
	{
	"epoch": 5.0,
	"eval_loss": 0.33983489871025085,
	"eval_runtime": 69.9916,
	"eval_samples_per_second": 5.758,
	"eval_steps_per_second": 0.729,
	"step": 10055
	},
	{
	"epoch": 6.0,
	"learning_rate": 4.25e-05,
	"loss": 0.3211,
	"step": 12066
	},
	{
	"epoch": 6.0,
	"eval_loss": 0.3004438281059265,
	"eval_runtime": 70.0768,
	"eval_samples_per_second": 5.751,
	"eval_steps_per_second": 0.728,
	"step": 12066
	},
	{
	"epoch": 7.0,
	"learning_rate": 4.125e-05,
	"loss": 0.2854,
	"step": 14077
	},
	{
	"epoch": 7.0,
	"eval_loss": 0.2634061574935913,
	"eval_runtime": 70.2069,
	"eval_samples_per_second": 5.74,
	"eval_steps_per_second": 0.726,
	"step": 14077
	},
	{
	"epoch": 8.0,
	"learning_rate": 4e-05,
	"loss": 0.255,
	"step": 16088
	},
	{
	"epoch": 8.0,
	"eval_loss": 0.23876915872097015,
	"eval_runtime": 70.1721,
	"eval_samples_per_second": 5.743,
	"eval_steps_per_second": 0.727,
	"step": 16088
	},
	{
	"epoch": 9.0,
	"learning_rate": 3.875e-05,
	"loss": 0.2281,
	"step": 18099
	},
	{
	"epoch": 9.0,
	"eval_loss": 0.21539266407489777,
	"eval_runtime": 70.1355,
	"eval_samples_per_second": 5.746,
	"eval_steps_per_second": 0.727,
	"step": 18099
	},
	{
	"epoch": 10.0,
	"learning_rate": 3.7500000000000003e-05,
	"loss": 0.2052,
	"step": 20110
	},
	{
	"epoch": 10.0,
	"eval_loss": 0.18904653191566467,
	"eval_runtime": 70.1349,
	"eval_samples_per_second": 5.746,
	"eval_steps_per_second": 0.727,
	"step": 20110
	},
	{
	"epoch": 11.0,
	"learning_rate": 3.625e-05,
	"loss": 0.1853,
	"step": 22121
	},
	{
	"epoch": 11.0,
	"eval_loss": 0.17202098667621613,
	"eval_runtime": 69.9829,
	"eval_samples_per_second": 5.759,
	"eval_steps_per_second": 0.729,
	"step": 22121
	},
	{
	"epoch": 12.0,
	"learning_rate": 3.5e-05,
	"loss": 0.1673,
	"step": 24132
	},
	{
	"epoch": 12.0,
	"eval_loss": 0.15875761210918427,
	"eval_runtime": 70.1596,
	"eval_samples_per_second": 5.744,
	"eval_steps_per_second": 0.727,
	"step": 24132
	},
	{
	"epoch": 13.0,
	"learning_rate": 3.375000000000001e-05,
	"loss": 0.1526,
	"step": 26143
	},
	{
	"epoch": 13.0,
	"eval_loss": 0.14447805285453796,
	"eval_runtime": 70.1252,
	"eval_samples_per_second": 5.747,
	"eval_steps_per_second": 0.727,
	"step": 26143
	},
	{
	"epoch": 14.0,
	"learning_rate": 3.2500000000000004e-05,
	"loss": 0.1398,
	"step": 28154
	},
	{
	"epoch": 14.0,
	"eval_loss": 0.13342420756816864,
	"eval_runtime": 70.1196,
	"eval_samples_per_second": 5.747,
	"eval_steps_per_second": 0.727,
	"step": 28154
	},
	{
	"epoch": 15.0,
	"learning_rate": 3.125e-05,
	"loss": 0.1285,
	"step": 30165
	},
	{
	"epoch": 15.0,
	"eval_loss": 0.12114470452070236,
	"eval_runtime": 70.2112,
	"eval_samples_per_second": 5.74,
	"eval_steps_per_second": 0.726,
	"step": 30165
	},
	{
	"epoch": 16.0,
	"learning_rate": 3e-05,
	"loss": 0.1187,
	"step": 32176
	},
	{
	"epoch": 16.0,
	"eval_loss": 0.11447372287511826,
	"eval_runtime": 70.1257,
	"eval_samples_per_second": 5.747,
	"eval_steps_per_second": 0.727,
	"step": 32176
	},
	{
	"epoch": 17.0,
	"learning_rate": 2.8749999999999997e-05,
	"loss": 0.1104,
	"step": 34187
	},
	{
	"epoch": 17.0,
	"eval_loss": 0.10539893060922623,
	"eval_runtime": 70.1826,
	"eval_samples_per_second": 5.742,
	"eval_steps_per_second": 0.727,
	"step": 34187
	},
	{
	"epoch": 18.0,
	"learning_rate": 2.7500000000000004e-05,
	"loss": 0.1038,
	"step": 36198
	},
	{
	"epoch": 18.0,
	"eval_loss": 0.09906744956970215,
	"eval_runtime": 70.117,
	"eval_samples_per_second": 5.748,
	"eval_steps_per_second": 0.727,
	"step": 36198
	},
	{
	"epoch": 19.0,
	"learning_rate": 2.625e-05,
	"loss": 0.0974,
	"step": 38209
	},
	{
	"epoch": 19.0,
	"eval_loss": 0.09452048689126968,
	"eval_runtime": 70.1925,
	"eval_samples_per_second": 5.741,
	"eval_steps_per_second": 0.727,
	"step": 38209
	},
	{
	"epoch": 20.0,
	"learning_rate": 2.5e-05,
	"loss": 0.0927,
	"step": 40220
	},
	{
	"epoch": 20.0,
	"eval_loss": 0.09014962613582611,
	"eval_runtime": 69.9849,
	"eval_samples_per_second": 5.758,
	"eval_steps_per_second": 0.729,
	"step": 40220
	},
	{
	"epoch": 21.0,
	"learning_rate": 2.375e-05,
	"loss": 0.0878,
	"step": 42231
	},
	{
	"epoch": 21.0,
	"eval_loss": 0.08503083884716034,
	"eval_runtime": 70.1728,
	"eval_samples_per_second": 5.743,
	"eval_steps_per_second": 0.727,
	"step": 42231
	},
	{
	"epoch": 22.0,
	"learning_rate": 2.25e-05,
	"loss": 0.0838,
	"step": 44242
	},
	{
	"epoch": 22.0,
	"eval_loss": 0.0820975974202156,
	"eval_runtime": 70.0791,
	"eval_samples_per_second": 5.751,
	"eval_steps_per_second": 0.728,
	"step": 44242
	},
	{
	"epoch": 23.0,
	"learning_rate": 2.125e-05,
	"loss": 0.0801,
	"step": 46253
	},
	{
	"epoch": 23.0,
	"eval_loss": 0.0777197927236557,
	"eval_runtime": 69.9961,
	"eval_samples_per_second": 5.757,
	"eval_steps_per_second": 0.729,
	"step": 46253
	},
	{
	"epoch": 24.0,
	"learning_rate": 2e-05,
	"loss": 0.0775,
	"step": 48264
	},
	{
	"epoch": 24.0,
	"eval_loss": 0.0748789981007576,
	"eval_runtime": 69.905,
	"eval_samples_per_second": 5.765,
	"eval_steps_per_second": 0.73,
	"step": 48264
	},
	{
	"epoch": 25.0,
	"learning_rate": 1.8750000000000002e-05,
	"loss": 0.0751,
	"step": 50275
	},
	{
	"epoch": 25.0,
	"eval_loss": 0.0729849636554718,
	"eval_runtime": 70.0915,
	"eval_samples_per_second": 5.75,
	"eval_steps_per_second": 0.728,
	"step": 50275
	},
	{
	"epoch": 26.0,
	"learning_rate": 1.75e-05,
	"loss": 0.0727,
	"step": 52286
	},
	{
	"epoch": 26.0,
	"eval_loss": 0.0698952004313469,
	"eval_runtime": 70.0781,
	"eval_samples_per_second": 5.751,
	"eval_steps_per_second": 0.728,
	"step": 52286
	},
	{
	"epoch": 27.0,
	"learning_rate": 1.6250000000000002e-05,
	"loss": 0.0706,
	"step": 54297
	},
	{
	"epoch": 27.0,
	"eval_loss": 0.06760543584823608,
	"eval_runtime": 69.9618,
	"eval_samples_per_second": 5.76,
	"eval_steps_per_second": 0.729,
	"step": 54297
	},
	{
	"epoch": 28.0,
	"learning_rate": 1.5e-05,
	"loss": 0.0691,
	"step": 56308
	},
	{
	"epoch": 28.0,
	"eval_loss": 0.06610006093978882,
	"eval_runtime": 70.1085,
	"eval_samples_per_second": 5.748,
	"eval_steps_per_second": 0.727,
	"step": 56308
	},
	{
	"epoch": 29.0,
	"learning_rate": 1.3750000000000002e-05,
	"loss": 0.0678,
	"step": 58319
	},
	{
	"epoch": 29.0,
	"eval_loss": 0.06433883309364319,
	"eval_runtime": 70.1363,
	"eval_samples_per_second": 5.746,
	"eval_steps_per_second": 0.727,
	"step": 58319
	},
	{
	"epoch": 30.0,
	"learning_rate": 1.25e-05,
	"loss": 0.0666,
	"step": 60330
	},
	{
	"epoch": 30.0,
	"eval_loss": 0.06277326494455338,
	"eval_runtime": 70.0925,
	"eval_samples_per_second": 5.75,
	"eval_steps_per_second": 0.728,
	"step": 60330
	},
	{
	"epoch": 31.0,
	"learning_rate": 1.125e-05,
	"loss": 0.0652,
	"step": 62341
	},
	{
	"epoch": 31.0,
	"eval_loss": 0.06192418932914734,
	"eval_runtime": 69.9357,
	"eval_samples_per_second": 5.762,
	"eval_steps_per_second": 0.729,
	"step": 62341
	},
	{
	"epoch": 32.0,
	"learning_rate": 1e-05,
	"loss": 0.0644,
	"step": 64352
	},
	{
	"epoch": 32.0,
	"eval_loss": 0.0610126368701458,
	"eval_runtime": 70.061,
	"eval_samples_per_second": 5.752,
	"eval_steps_per_second": 0.728,
	"step": 64352
	},
	{
	"epoch": 33.0,
	"learning_rate": 8.75e-06,
	"loss": 0.0635,
	"step": 66363
	},
	{
	"epoch": 33.0,
	"eval_loss": 0.060028236359357834,
	"eval_runtime": 69.9253,
	"eval_samples_per_second": 5.763,
	"eval_steps_per_second": 0.729,
	"step": 66363
	},
	{
	"epoch": 34.0,
	"learning_rate": 7.5e-06,
	"loss": 0.0629,
	"step": 68374
	},
	{
	"epoch": 34.0,
	"eval_loss": 0.05925382673740387,
	"eval_runtime": 69.9042,
	"eval_samples_per_second": 5.765,
	"eval_steps_per_second": 0.73,
	"step": 68374
	},
	{
	"epoch": 35.0,
	"learning_rate": 6.25e-06,
	"loss": 0.0622,
	"step": 70385
	},
	{
	"epoch": 35.0,
	"eval_loss": 0.05860263481736183,
	"eval_runtime": 69.8706,
	"eval_samples_per_second": 5.768,
	"eval_steps_per_second": 0.73,
	"step": 70385
	},
	{
	"epoch": 36.0,
	"learning_rate": 5e-06,
	"loss": 0.0616,
	"step": 72396
	},
	{
	"epoch": 36.0,
	"eval_loss": 0.05808304622769356,
	"eval_runtime": 69.9999,
	"eval_samples_per_second": 5.757,
	"eval_steps_per_second": 0.729,
	"step": 72396
	},
	{
	"epoch": 37.0,
	"learning_rate": 3.75e-06,
	"loss": 0.061,
	"step": 74407
	},
	{
	"epoch": 37.0,
	"eval_loss": 0.057825859636068344,
	"eval_runtime": 69.9835,
	"eval_samples_per_second": 5.758,
	"eval_steps_per_second": 0.729,
	"step": 74407
	},
	{
	"epoch": 38.0,
	"learning_rate": 2.5e-06,
	"loss": 0.0605,
	"step": 76418
	},
	{
	"epoch": 38.0,
	"eval_loss": 0.057523321360349655,
	"eval_runtime": 69.9943,
	"eval_samples_per_second": 5.758,
	"eval_steps_per_second": 0.729,
	"step": 76418
	},
	{
	"epoch": 39.0,
	"learning_rate": 1.25e-06,
	"loss": 0.06,
	"step": 78429
	},
	{
	"epoch": 39.0,
	"eval_loss": 0.05731285735964775,
	"eval_runtime": 70.0036,
	"eval_samples_per_second": 5.757,
	"eval_steps_per_second": 0.729,
	"step": 78429
	},
	{
	"epoch": 40.0,
	"learning_rate": 0.0,
	"loss": 0.0595,
	"step": 80440
	},
	{
	"epoch": 40.0,
	"eval_loss": 0.05725787207484245,
	"eval_runtime": 69.9176,
	"eval_samples_per_second": 5.764,
	"eval_steps_per_second": 0.729,
	"step": 80440
	}
	],
	"logging_steps": 500,
	"max_steps": 80440,
	"num_train_epochs": 40,
	"save_steps": 500,
	"total_flos": 9.118285061492736e+17,
	"trial_name": null,
	"trial_params": null
	}
	```

	## Model Details

	### Model Description

	<!-- Provide a longer summary of what this model is. -->



	- Developed by: [More Information Needed]
	- Funded by [optional]: [More Information Needed]
	- Shared by [optional]: [More Information Needed]
	- Model type: [More Information Needed]
	- Language(s) (NLP): [More Information Needed]
	- License: [More Information Needed]
	- Finetuned from model [optional]: [More Information Needed]

	### Model Sources [optional]

	<!-- Provide the basic links for the model. -->

	- Repository: [More Information Needed]
	- Paper [optional]: [More Information Needed]
	- Demo [optional]: [More Information Needed]

	## Uses

	<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->

	### Direct Use

	<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->

	[More Information Needed]

	### Downstream Use [optional]

	<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->

	[More Information Needed]

	### Out-of-Scope Use

	<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->

	[More Information Needed]

	## Bias, Risks, and Limitations

	<!-- This section is meant to convey both technical and sociotechnical limitations. -->

	[More Information Needed]

	### Recommendations

	<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->

	Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.

	## How to Get Started with the Model

	Use the code below to get started with the model.

	[More Information Needed]

	## Training Details

	### Training Data

	<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->

	[More Information Needed]

	### Training Procedure

	<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->

	#### Preprocessing [optional]

	[More Information Needed]


	#### Training Hyperparameters

	- Training regime: [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->

	#### Speeds, Sizes, Times [optional]

	<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->

	[More Information Needed]

	## Evaluation

	<!-- This section describes the evaluation protocols and provides the results. -->

	### Testing Data, Factors & Metrics

	#### Testing Data

	<!-- This should link to a Dataset Card if possible. -->

	[More Information Needed]

	#### Factors

	<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->

	[More Information Needed]

	#### Metrics

	<!-- These are the evaluation metrics being used, ideally with a description of why. -->

	[More Information Needed]

	### Results

	[More Information Needed]

	#### Summary



	## Model Examination [optional]

	<!-- Relevant interpretability work for the model goes here -->

	[More Information Needed]

	## Environmental Impact

	<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->

	Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).

	- Hardware Type: [More Information Needed]
	- Hours used: [More Information Needed]
	- Cloud Provider: [More Information Needed]
	- Compute Region: [More Information Needed]
	- Carbon Emitted: [More Information Needed]

	## Technical Specifications [optional]

	### Model Architecture and Objective

	[More Information Needed]

	### Compute Infrastructure

	[More Information Needed]

	#### Hardware

	[More Information Needed]

	#### Software

	[More Information Needed]

	## Citation [optional]

	<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->

	BibTeX:

	[More Information Needed]

	APA:

	[More Information Needed]

	## Glossary [optional]

	<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->

	[More Information Needed]

	## More Information [optional]

	[More Information Needed]

	## Model Card Authors [optional]

	[More Information Needed]

	## Model Card Contact

	[More Information Needed]


	## Training procedure


	The following `bitsandbytes` quantization config was used during training:
	- quant_method: bitsandbytes
	- load_in_8bit: False
	- load_in_4bit: True
	- llm_int8_threshold: 6.0
	- llm_int8_skip_modules: None
	- llm_int8_enable_fp32_cpu_offload: False
	- llm_int8_has_fp16_weight: False
	- bnb_4bit_quant_type: nf4
	- bnb_4bit_use_double_quant: True
	- bnb_4bit_compute_dtype: float16

	### Framework versions


	- PEFT 0.6.2