fine-tuning-Phi2-with-webglm-qa-with-lora

Browse files

Files changed (3) hide show

README.md +51 -51
adapter_config.json +4 -4
adapter_model.safetensors +1 -1

README.md CHANGED Viewed

@@ -16,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [microsoft/phi-2](https://huggingface.co/microsoft/phi-2) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.1008
 ## Model description
@@ -51,56 +51,56 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
-| No log        | 0.2   | 10   | 7.9734          |
-| No log        | 0.4   | 20   | 6.2577          |
-| No log        | 0.6   | 30   | 2.8966          |
-| No log        | 0.8   | 40   | 0.5650          |
-| 4.4766        | 1.0   | 50   | 0.5106          |
-| 4.4766        | 1.2   | 60   | 0.4513          |
-| 4.4766        | 1.39  | 70   | 0.4008          |
-| 4.4766        | 1.59  | 80   | 0.3539          |
-| 4.4766        | 1.79  | 90   | 0.3156          |
-| 0.3251        | 1.99  | 100  | 0.2877          |
-| 0.3251        | 2.19  | 110  | 0.2631          |
-| 0.3251        | 2.39  | 120  | 0.2464          |
-| 0.3251        | 2.59  | 130  | 0.2303          |
-| 0.3251        | 2.79  | 140  | 0.2117          |
-| 0.1953        | 2.99  | 150  | 0.1982          |
-| 0.1953        | 3.19  | 160  | 0.1892          |
-| 0.1953        | 3.39  | 170  | 0.1767          |
-| 0.1953        | 3.59  | 180  | 0.1687          |
-| 0.1953        | 3.78  | 190  | 0.1616          |
-| 0.1469        | 3.98  | 200  | 0.1559          |
-| 0.1469        | 4.18  | 210  | 0.1507          |
-| 0.1469        | 4.38  | 220  | 0.1484          |
-| 0.1469        | 4.58  | 230  | 0.1421          |
-| 0.1469        | 4.78  | 240  | 0.1353          |
-| 0.1212        | 4.98  | 250  | 0.1309          |
-| 0.1212        | 5.18  | 260  | 0.1292          |
-| 0.1212        | 5.38  | 270  | 0.1267          |
-| 0.1212        | 5.58  | 280  | 0.1231          |
-| 0.1212        | 5.78  | 290  | 0.1218          |
-| 0.1059        | 5.98  | 300  | 0.1177          |
-| 0.1059        | 6.18  | 310  | 0.1154          |
-| 0.1059        | 6.37  | 320  | 0.1151          |
-| 0.1059        | 6.57  | 330  | 0.1144          |
-| 0.1059        | 6.77  | 340  | 0.1114          |
-| 0.0936        | 6.97  | 350  | 0.1098          |
-| 0.0936        | 7.17  | 360  | 0.1093          |
-| 0.0936        | 7.37  | 370  | 0.1071          |
-| 0.0936        | 7.57  | 380  | 0.1063          |
-| 0.0936        | 7.77  | 390  | 0.1060          |
-| 0.0881        | 7.97  | 400  | 0.1049          |
-| 0.0881        | 8.17  | 410  | 0.1042          |
-| 0.0881        | 8.37  | 420  | 0.1035          |
-| 0.0881        | 8.57  | 430  | 0.1032          |
-| 0.0881        | 8.76  | 440  | 0.1028          |
-| 0.0819        | 8.96  | 450  | 0.1019          |
-| 0.0819        | 9.16  | 460  | 0.1014          |
-| 0.0819        | 9.36  | 470  | 0.1012          |
-| 0.0819        | 9.56  | 480  | 0.1010          |
-| 0.0819        | 9.76  | 490  | 0.1008          |
-| 0.079         | 9.96  | 500  | 0.1008          |
 ### Framework versions

 This model is a fine-tuned version of [microsoft/phi-2](https://huggingface.co/microsoft/phi-2) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.1016
 ## Model description
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
+| No log        | 0.2   | 10   | 8.0315          |
+| No log        | 0.4   | 20   | 6.2055          |
+| No log        | 0.6   | 30   | 2.7735          |
+| No log        | 0.8   | 40   | 0.6055          |
+| 4.4745        | 1.0   | 50   | 0.5323          |
+| 4.4745        | 1.2   | 60   | 0.4631          |
+| 4.4745        | 1.39  | 70   | 0.4075          |
+| 4.4745        | 1.59  | 80   | 0.3566          |
+| 4.4745        | 1.79  | 90   | 0.3155          |
+| 0.3331        | 1.99  | 100  | 0.2869          |
+| 0.3331        | 2.19  | 110  | 0.2624          |
+| 0.3331        | 2.39  | 120  | 0.2453          |
+| 0.3331        | 2.59  | 130  | 0.2288          |
+| 0.3331        | 2.79  | 140  | 0.2095          |
+| 0.1947        | 2.99  | 150  | 0.1978          |
+| 0.1947        | 3.19  | 160  | 0.1886          |
+| 0.1947        | 3.39  | 170  | 0.1766          |
+| 0.1947        | 3.59  | 180  | 0.1691          |
+| 0.1947        | 3.78  | 190  | 0.1626          |
+| 0.1486        | 3.98  | 200  | 0.1562          |
+| 0.1486        | 4.18  | 210  | 0.1510          |
+| 0.1486        | 4.38  | 220  | 0.1489          |
+| 0.1486        | 4.58  | 230  | 0.1439          |
+| 0.1486        | 4.78  | 240  | 0.1364          |
+| 0.1232        | 4.98  | 250  | 0.1314          |
+| 0.1232        | 5.18  | 260  | 0.1306          |
+| 0.1232        | 5.38  | 270  | 0.1295          |
+| 0.1232        | 5.58  | 280  | 0.1256          |
+| 0.1232        | 5.78  | 290  | 0.1228          |
+| 0.1084        | 5.98  | 300  | 0.1195          |
+| 0.1084        | 6.18  | 310  | 0.1165          |
+| 0.1084        | 6.37  | 320  | 0.1156          |
+| 0.1084        | 6.57  | 330  | 0.1147          |
+| 0.1084        | 6.77  | 340  | 0.1120          |
+| 0.0964        | 6.97  | 350  | 0.1100          |
+| 0.0964        | 7.17  | 360  | 0.1100          |
+| 0.0964        | 7.37  | 370  | 0.1087          |
+| 0.0964        | 7.57  | 380  | 0.1080          |
+| 0.0964        | 7.77  | 390  | 0.1071          |
+| 0.0905        | 7.97  | 400  | 0.1065          |
+| 0.0905        | 8.17  | 410  | 0.1061          |
+| 0.0905        | 8.37  | 420  | 0.1053          |
+| 0.0905        | 8.57  | 430  | 0.1044          |
+| 0.0905        | 8.76  | 440  | 0.1036          |
+| 0.0843        | 8.96  | 450  | 0.1028          |
+| 0.0843        | 9.16  | 460  | 0.1021          |
+| 0.0843        | 9.36  | 470  | 0.1019          |
+| 0.0843        | 9.56  | 480  | 0.1018          |
+| 0.0843        | 9.76  | 490  | 0.1016          |
+| 0.0819        | 9.96  | 500  | 0.1016          |
 ### Framework versions

adapter_config.json CHANGED Viewed

@@ -19,12 +19,12 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "k_proj",
     "q_proj",
-    "fc2",
     "v_proj",
-    "fc1",
-    "dense"
   ],
   "task_type": "CAUSAL_LM"
 }

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "q_proj",
+    "k_proj",
     "v_proj",
+    "fc2",
+    "dense",
+    "fc1"
   ],
   "task_type": "CAUSAL_LM"
 }

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0893ceadbbb8b438bd07e6464913f985f2fa6107ff3d249ce2e1a812c4cd4e1c
 size 94422368

 version https://git-lfs.github.com/spec/v1
+oid sha256:eb85b33189faa8d8a482db2171584cc114b99d6eab73450f77c75ee1eff60b10
 size 94422368