fine-tuning-Phi2-with-webglm-qa-with-lora

Browse files

Files changed (3) hide show

README.md +51 -51
adapter_config.json +3 -3
adapter_model.safetensors +1 -1

README.md CHANGED Viewed

@@ -16,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [microsoft/phi-2](https://huggingface.co/microsoft/phi-2) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.1016
 ## Model description
@@ -51,56 +51,56 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
-| No log        | 0.2   | 10   | 8.0315          |
-| No log        | 0.4   | 20   | 6.2055          |
-| No log        | 0.6   | 30   | 2.7735          |
-| No log        | 0.8   | 40   | 0.6055          |
-| 4.4745        | 1.0   | 50   | 0.5323          |
-| 4.4745        | 1.2   | 60   | 0.4631          |
-| 4.4745        | 1.39  | 70   | 0.4075          |
-| 4.4745        | 1.59  | 80   | 0.3566          |
-| 4.4745        | 1.79  | 90   | 0.3155          |
-| 0.3331        | 1.99  | 100  | 0.2869          |
-| 0.3331        | 2.19  | 110  | 0.2624          |
-| 0.3331        | 2.39  | 120  | 0.2453          |
-| 0.3331        | 2.59  | 130  | 0.2288          |
-| 0.3331        | 2.79  | 140  | 0.2095          |
-| 0.1947        | 2.99  | 150  | 0.1978          |
-| 0.1947        | 3.19  | 160  | 0.1886          |
-| 0.1947        | 3.39  | 170  | 0.1766          |
-| 0.1947        | 3.59  | 180  | 0.1691          |
-| 0.1947        | 3.78  | 190  | 0.1626          |
-| 0.1486        | 3.98  | 200  | 0.1562          |
-| 0.1486        | 4.18  | 210  | 0.1510          |
-| 0.1486        | 4.38  | 220  | 0.1489          |
-| 0.1486        | 4.58  | 230  | 0.1439          |
-| 0.1486        | 4.78  | 240  | 0.1364          |
-| 0.1232        | 4.98  | 250  | 0.1314          |
-| 0.1232        | 5.18  | 260  | 0.1306          |
-| 0.1232        | 5.38  | 270  | 0.1295          |
-| 0.1232        | 5.58  | 280  | 0.1256          |
-| 0.1232        | 5.78  | 290  | 0.1228          |
-| 0.1084        | 5.98  | 300  | 0.1195          |
-| 0.1084        | 6.18  | 310  | 0.1165          |
-| 0.1084        | 6.37  | 320  | 0.1156          |
-| 0.1084        | 6.57  | 330  | 0.1147          |
-| 0.1084        | 6.77  | 340  | 0.1120          |
-| 0.0964        | 6.97  | 350  | 0.1100          |
-| 0.0964        | 7.17  | 360  | 0.1100          |
-| 0.0964        | 7.37  | 370  | 0.1087          |
-| 0.0964        | 7.57  | 380  | 0.1080          |
-| 0.0964        | 7.77  | 390  | 0.1071          |
-| 0.0905        | 7.97  | 400  | 0.1065          |
-| 0.0905        | 8.17  | 410  | 0.1061          |
-| 0.0905        | 8.37  | 420  | 0.1053          |
-| 0.0905        | 8.57  | 430  | 0.1044          |
-| 0.0905        | 8.76  | 440  | 0.1036          |
-| 0.0843        | 8.96  | 450  | 0.1028          |
-| 0.0843        | 9.16  | 460  | 0.1021          |
-| 0.0843        | 9.36  | 470  | 0.1019          |
-| 0.0843        | 9.56  | 480  | 0.1018          |
-| 0.0843        | 9.76  | 490  | 0.1016          |
-| 0.0819        | 9.96  | 500  | 0.1016          |
 ### Framework versions

 This model is a fine-tuned version of [microsoft/phi-2](https://huggingface.co/microsoft/phi-2) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.1032
 ## Model description
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
+| No log        | 0.2   | 10   | 8.0361          |
+| No log        | 0.4   | 20   | 6.2064          |
+| No log        | 0.6   | 30   | 2.7739          |
+| No log        | 0.8   | 40   | 0.6071          |
+| 4.4774        | 1.0   | 50   | 0.5329          |
+| 4.4774        | 1.2   | 60   | 0.4635          |
+| 4.4774        | 1.39  | 70   | 0.4081          |
+| 4.4774        | 1.59  | 80   | 0.3576          |
+| 4.4774        | 1.79  | 90   | 0.3173          |
+| 0.3338        | 1.99  | 100  | 0.2889          |
+| 0.3338        | 2.19  | 110  | 0.2645          |
+| 0.3338        | 2.39  | 120  | 0.2471          |
+| 0.3338        | 2.59  | 130  | 0.2301          |
+| 0.3338        | 2.79  | 140  | 0.2121          |
+| 0.1964        | 2.99  | 150  | 0.1992          |
+| 0.1964        | 3.19  | 160  | 0.1913          |
+| 0.1964        | 3.39  | 170  | 0.1793          |
+| 0.1964        | 3.59  | 180  | 0.1713          |
+| 0.1964        | 3.78  | 190  | 0.1642          |
+| 0.1501        | 3.98  | 200  | 0.1579          |
+| 0.1501        | 4.18  | 210  | 0.1531          |
+| 0.1501        | 4.38  | 220  | 0.1511          |
+| 0.1501        | 4.58  | 230  | 0.1455          |
+| 0.1501        | 4.78  | 240  | 0.1379          |
+| 0.1248        | 4.98  | 250  | 0.1333          |
+| 0.1248        | 5.18  | 260  | 0.1313          |
+| 0.1248        | 5.38  | 270  | 0.1308          |
+| 0.1248        | 5.58  | 280  | 0.1271          |
+| 0.1248        | 5.78  | 290  | 0.1244          |
+| 0.1097        | 5.98  | 300  | 0.1208          |
+| 0.1097        | 6.18  | 310  | 0.1178          |
+| 0.1097        | 6.37  | 320  | 0.1164          |
+| 0.1097        | 6.57  | 330  | 0.1155          |
+| 0.1097        | 6.77  | 340  | 0.1125          |
+| 0.0976        | 6.97  | 350  | 0.1108          |
+| 0.0976        | 7.17  | 360  | 0.1109          |
+| 0.0976        | 7.37  | 370  | 0.1093          |
+| 0.0976        | 7.57  | 380  | 0.1085          |
+| 0.0976        | 7.77  | 390  | 0.1079          |
+| 0.0917        | 7.97  | 400  | 0.1072          |
+| 0.0917        | 8.17  | 410  | 0.1064          |
+| 0.0917        | 8.37  | 420  | 0.1058          |
+| 0.0917        | 8.57  | 430  | 0.1054          |
+| 0.0917        | 8.76  | 440  | 0.1047          |
+| 0.0855        | 8.96  | 450  | 0.1040          |
+| 0.0855        | 9.16  | 460  | 0.1034          |
+| 0.0855        | 9.36  | 470  | 0.1032          |
+| 0.0855        | 9.56  | 480  | 0.1032          |
+| 0.0855        | 9.76  | 490  | 0.1032          |
+| 0.0833        | 9.96  | 500  | 0.1032          |
 ### Framework versions

adapter_config.json CHANGED Viewed

@@ -19,12 +19,12 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "q_proj",
     "k_proj",
     "v_proj",
-    "fc2",
     "dense",
-    "fc1"
   ],
   "task_type": "CAUSAL_LM"
 }

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "k_proj",
     "v_proj",
     "dense",
+    "fc1",
+    "q_proj",
+    "fc2"
   ],
   "task_type": "CAUSAL_LM"
 }

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:eb85b33189faa8d8a482db2171584cc114b99d6eab73450f77c75ee1eff60b10
 size 94422368

 version https://git-lfs.github.com/spec/v1
+oid sha256:c2940117eb1f424c3485633ca11ec57876aa73e4dcf297c5726a207484ed4272
 size 94422368