Update README.md
Browse files
README.md
CHANGED
@@ -52,7 +52,7 @@ Here is the table summarizing the architecture used for training, along with the
|
|
52 |
| Hyperparameter | Value |
|
53 |
|:---------------------:|:----------:|
|
54 |
| label smoothing | 0.05 |
|
55 |
-
|
|
56 |
| betas | 0.9, 0.999 |
|
57 |
| learning rate | 1e-5 |
|
58 |
| anneal strategy | cos |
|
@@ -108,6 +108,6 @@ Citation
|
|
108 |
AUTHOR = {Cyrile Delestre},
|
109 |
URL = {https://huggingface.co/cmarkea/bloomz-3b-sft-chat},
|
110 |
YEAR = {2023},
|
111 |
-
KEYWORDS = {NLP ; Transformers ; Bloomz},
|
112 |
}
|
113 |
```
|
|
|
52 |
| Hyperparameter | Value |
|
53 |
|:---------------------:|:----------:|
|
54 |
| label smoothing | 0.05 |
|
55 |
+
| optimizer | AdamW |
|
56 |
| betas | 0.9, 0.999 |
|
57 |
| learning rate | 1e-5 |
|
58 |
| anneal strategy | cos |
|
|
|
108 |
AUTHOR = {Cyrile Delestre},
|
109 |
URL = {https://huggingface.co/cmarkea/bloomz-3b-sft-chat},
|
110 |
YEAR = {2023},
|
111 |
+
KEYWORDS = {NLP ; Transformers ; LLM ; Bloomz},
|
112 |
}
|
113 |
```
|