nicholasKluge
commited on
Commit
·
8e775fb
1
Parent(s):
d996c32
Upload AIRA_FineTuning.ipynb
Browse files- AIRA_FineTuning.ipynb +12 -12
AIRA_FineTuning.ipynb
CHANGED
@@ -758,14 +758,14 @@
|
|
758 |
"source": [
|
759 |
"from transformers import GPT2Tokenizer\n",
|
760 |
"\n",
|
761 |
-
"model = \"pierreguillou/gpt2-small-portuguese\"
|
762 |
-
"model_size = \"PT-124M\"
|
763 |
"\n",
|
764 |
"tokenizer = GPT2Tokenizer.from_pretrained(model,\n",
|
765 |
-
" bos_token='<|startofinstruction|>',
|
766 |
" sep_token = '<|endofinstruction|>',\n",
|
767 |
-
" eos_token='<|endofcompletion|>',
|
768 |
-
" pad_token='<|pad|>')
|
769 |
"\n",
|
770 |
"df['demonstrations'] = tokenizer.bos_token + df['prompt'] + tokenizer.sep_token + df['completion'] + tokenizer.eos_token\n",
|
771 |
"\n",
|
@@ -830,7 +830,7 @@
|
|
830 |
"id": "vSqKuRjIe8ru"
|
831 |
},
|
832 |
"source": [
|
833 |
-
"
|
834 |
]
|
835 |
},
|
836 |
{
|
@@ -846,7 +846,7 @@
|
|
846 |
"dataloader = DataLoader(\n",
|
847 |
" dataset,\n",
|
848 |
" sampler=RandomSampler(dataset),\n",
|
849 |
-
" batch_size=24,
|
850 |
" )"
|
851 |
]
|
852 |
},
|
@@ -856,7 +856,7 @@
|
|
856 |
"id": "0vxvcTIHe8rv"
|
857 |
},
|
858 |
"source": [
|
859 |
-
"
|
860 |
]
|
861 |
},
|
862 |
{
|
@@ -961,7 +961,7 @@
|
|
961 |
"id": "XAoXD7GVYMN_"
|
962 |
},
|
963 |
"source": [
|
964 |
-
"
|
965 |
]
|
966 |
},
|
967 |
{
|
@@ -1020,7 +1020,7 @@
|
|
1020 |
"id": "GJ1kH3uSe8rw"
|
1021 |
},
|
1022 |
"source": [
|
1023 |
-
"
|
1024 |
]
|
1025 |
},
|
1026 |
{
|
@@ -1059,7 +1059,7 @@
|
|
1059 |
"id": "nz1a9lxXe8rw"
|
1060 |
},
|
1061 |
"source": [
|
1062 |
-
"
|
1063 |
]
|
1064 |
},
|
1065 |
{
|
@@ -1790,7 +1790,7 @@
|
|
1790 |
"id": "h48iOsqie8rx"
|
1791 |
},
|
1792 |
"source": [
|
1793 |
-
"
|
1794 |
]
|
1795 |
},
|
1796 |
{
|
|
|
758 |
"source": [
|
759 |
"from transformers import GPT2Tokenizer\n",
|
760 |
"\n",
|
761 |
+
"model = \"pierreguillou/gpt2-small-portuguese\" \n",
|
762 |
+
"model_size = \"PT-124M\" \n",
|
763 |
"\n",
|
764 |
"tokenizer = GPT2Tokenizer.from_pretrained(model,\n",
|
765 |
+
" bos_token='<|startofinstruction|>', \n",
|
766 |
" sep_token = '<|endofinstruction|>',\n",
|
767 |
+
" eos_token='<|endofcompletion|>', \n",
|
768 |
+
" pad_token='<|pad|>') \n",
|
769 |
"\n",
|
770 |
"df['demonstrations'] = tokenizer.bos_token + df['prompt'] + tokenizer.sep_token + df['completion'] + tokenizer.eos_token\n",
|
771 |
"\n",
|
|
|
830 |
"id": "vSqKuRjIe8ru"
|
831 |
},
|
832 |
"source": [
|
833 |
+
"7. Create the `DataLoaders` and specify the `batch_size`."
|
834 |
]
|
835 |
},
|
836 |
{
|
|
|
846 |
"dataloader = DataLoader(\n",
|
847 |
" dataset,\n",
|
848 |
" sampler=RandomSampler(dataset),\n",
|
849 |
+
" batch_size=24, \n",
|
850 |
" )"
|
851 |
]
|
852 |
},
|
|
|
856 |
"id": "0vxvcTIHe8rv"
|
857 |
},
|
858 |
"source": [
|
859 |
+
"8. Load the base model (`GPT2LMHeadModel`)."
|
860 |
]
|
861 |
},
|
862 |
{
|
|
|
961 |
"id": "XAoXD7GVYMN_"
|
962 |
},
|
963 |
"source": [
|
964 |
+
"9. Freeze some of the layers for constrained fine-tuning. This allows the model to retain some of its original capabilities after the tuning."
|
965 |
]
|
966 |
},
|
967 |
{
|
|
|
1020 |
"id": "GJ1kH3uSe8rw"
|
1021 |
},
|
1022 |
"source": [
|
1023 |
+
"10. Set the training hyperparameters."
|
1024 |
]
|
1025 |
},
|
1026 |
{
|
|
|
1059 |
"id": "nz1a9lxXe8rw"
|
1060 |
},
|
1061 |
"source": [
|
1062 |
+
"11. Training/Validation loop. Track the carbon emissions of your work by using `codecarbon`. 🌱"
|
1063 |
]
|
1064 |
},
|
1065 |
{
|
|
|
1790 |
"id": "h48iOsqie8rx"
|
1791 |
},
|
1792 |
"source": [
|
1793 |
+
"12. Check the training stats and plot the learning curves."
|
1794 |
]
|
1795 |
},
|
1796 |
{
|