Upload 9 files

Files changed (9) hide show

benchmark_scores/result_jaqket_v2-0.2-0.2.json ADDED Viewed

+{
+  "results": {
+    "jaqket_v2-0.2-0.2": {
+      "exact_match": 80.58419243986255,
+      "f1": 84.86990863279517
+    }
+  },
+  "versions": {
+    "jaqket_v2-0.2-0.2": 0.2
+  },
+  "config": {
+    "model": "hf-causal-experimental",
+    "model_args": "pretrained=HachiML/youri-2x7b_v0.2,tokenizer=HachiML/youri-2x7b_v0.2,use_accelerate=True,dtype=auto",
+    "num_fewshot": [
+      1
+    ],
+    "batch_size": 2,
+    "device": "cuda",
+    "no_cache": false,
+    "limit": null,
+    "bootstrap_iters": 100000,
+    "description_dict": null
+  }
+}

benchmark_scores/result_jcola.json ADDED Viewed

+{
+  "results": {
+    "jcola": {
+      "balanced_acc": 0.5841310422736191,
+      "mcc": 0.13714598200194028,
+      "mcc_stderr": 0.03691630655102646,
+      "macro_f1": 0.5548345406141655
+    }
+  },
+  "versions": {
+    "jcola": 0.2
+  },
+  "config": {
+    "model": "hf-causal-experimental",
+    "model_args": "pretrained=HachiML/youri-2x7b_v0.2,tokenizer=HachiML/youri-2x7b_v0.2,use_accelerate=True,dtype=auto",
+    "num_fewshot": [
+      5
+    ],
+    "batch_size": 2,
+    "device": "cuda",
+    "no_cache": false,
+    "limit": null,
+    "bootstrap_iters": 100000,
+    "description_dict": null
+  }
+}

benchmark_scores/result_jcommonsenseqa-1.1-0.2.1.json ADDED Viewed

+{
+  "results": {
+    "jcommonsenseqa-1.1-0.2.1": {
+      "acc": 0.9097408400357462,
+      "acc_stderr": 0.008570054575466593,
+      "acc_norm": 0.8302055406613047,
+      "acc_norm_stderr": 0.011228808341801134
+    }
+  },
+  "versions": {
+    "jcommonsenseqa-1.1-0.2.1": 1.1
+  },
+  "config": {
+    "model": "hf-causal-experimental",
+    "model_args": "pretrained=HachiML/youri-2x7b_v0.2,tokenizer=HachiML/youri-2x7b_v0.2,use_accelerate=True,dtype=auto",
+    "num_fewshot": [
+      3
+    ],
+    "batch_size": 1,
+    "device": "cuda",
+    "no_cache": false,
+    "limit": null,
+    "bootstrap_iters": 100000,
+    "description_dict": null
+  }
+}

benchmark_scores/result_jnli-1.3-0.2.json ADDED Viewed

+{
+    "results": {
+      "jnli-1.3-0.2": {
+        "acc": 0.5875102711585867,
+        "acc_stderr": 0.009980291973884942,
+        "acc_norm": 0.5875102711585867,
+        "acc_norm_stderr": 0.009980291973884942,
+        "balanced_acc": 0.7118954352211451,
+        "mcc": 0.48341356747473024,
+        "mcc_stderr": 0.011948217496149275,
+        "macro_f1": 0.601774322498386
+      }
+    },
+    "versions": {
+      "jnli-1.3-0.2": 1.3
+    },
+    "config": {
+      "model": "hf-causal-experimental",
+      "model_args": "pretrained=HachiML/youri-2x7b_v0.2,tokenizer=HachiML/youri-2x7b_v0.2,use_accelerate=True,dtype=auto",
+      "num_fewshot": [
+        3
+      ],
+      "batch_size": 8,
+      "device": "cuda",
+      "no_cache": false,
+      "limit": null,
+      "bootstrap_iters": 100000,
+      "description_dict": null
+    }
+}

benchmark_scores/result_jsquad-1.2-0.2.json ADDED Viewed

+{
+    "results": {
+      "jsquad-1.2-0.2": {
+        "exact_match": 80.14407924358397,
+        "f1": 91.31291346456423
+      }
+    },
+    "versions": {
+      "jsquad-1.2-0.2": 1.2
+    },
+    "config": {
+      "model": "hf-causal-experimental",
+      "model_args": "pretrained=HachiML/youri-2x7b_v0.2,tokenizer=HachiML/youri-2x7b_v0.2,use_accelerate=True,dtype=auto",
+      "num_fewshot": [
+        2
+      ],
+      "batch_size": 1,
+      "device": "cuda",
+      "no_cache": false,
+      "limit": null,
+      "bootstrap_iters": 100000,
+      "description_dict": null
+    }
+}

benchmark_scores/result_marc_ja-1.1-0.2.json ADDED Viewed

+{
+  "results": {
+    "marc_ja-1.1-0.2": {
+      "acc": 0.9757693668199505,
+      "acc_stderr": 0.002045109088212568,
+      "acc_norm": 0.9757693668199505,
+      "acc_norm_stderr": 0.002045109088212568,
+      "balanced_acc": 0.9595742998823737,
+      "mcc": 0.9043294397096614,
+      "mcc_stderr": 0.00802664025288833,
+      "macro_f1": 0.9520408004687209
+    }
+  },
+  "versions": {
+    "marc_ja-1.1-0.2": 1.1
+  },
+  "config": {
+    "model": "hf-causal-experimental",
+    "model_args": "pretrained=HachiML/youri-2x7b_v0.2,tokenizer=HachiML/youri-2x7b_v0.2,use_accelerate=True,dtype=auto",
+    "num_fewshot": [
+      0
+    ],
+    "batch_size": 2,
+    "device": "cuda",
+    "no_cache": false,
+    "limit": null,
+    "bootstrap_iters": 100000,
+    "description_dict": null
+  }
+}

benchmark_scores/result_mgsm.json ADDED Viewed

+{
+  "results": {
+    "mgsm": {
+      "acc": 0.232,
+      "acc_stderr": 0.026750070374865167
+    }
+  },
+  "versions": {
+    "mgsm": 1.0
+  },
+  "config": {
+    "model": "hf-causal-experimental",
+    "model_args": "pretrained=HachiML/youri-2x7b_v0.2,tokenizer=HachiML/youri-2x7b_v0.2,use_accelerate=True,dtype=auto",
+    "num_fewshot": [
+      5
+    ],
+    "batch_size": 2,
+    "device": "cuda",
+    "no_cache": false,
+    "limit": null,
+    "bootstrap_iters": 100000,
+    "description_dict": null
+  }
+}

benchmark_scores/result_xlsum_ja.json ADDED Viewed

+{
+  "results": {
+    "xlsum_ja": {
+      "rouge2": 25.447876004263104
+    }
+  },
+  "versions": {
+    "xlsum_ja": 1.0
+  },
+  "config": {
+    "model": "hf-causal-experimental",
+    "model_args": "pretrained=HachiML/youri-2x7b_v0.2,tokenizer=HachiML/youri-2x7b_v0.2,use_accelerate=True,dtype=auto",
+    "num_fewshot": [
+      1
+    ],
+    "batch_size": 2,
+    "device": "cuda",
+    "no_cache": false,
+    "limit": null,
+    "bootstrap_iters": 100000,
+    "description_dict": null
+  }
+}

benchmark_scores/result_xwinograd_ja.json ADDED Viewed

+{
+  "results": {
+    "xwinograd_ja": {
+      "acc": 0.8143899895724713,
+      "acc_stderr": 0.012561287517973916
+    }
+  },
+  "versions": {
+    "xwinograd_ja": 1.0
+  },
+  "config": {
+    "model": "hf-causal-experimental",
+    "model_args": "pretrained=HachiML/youri-2x7b_v0.2,tokenizer=HachiML/youri-2x7b_v0.2,use_accelerate=True,dtype=auto",
+    "num_fewshot": [
+      0
+    ],
+    "batch_size": 2,
+    "device": "cuda",
+    "no_cache": false,
+    "limit": null,
+    "bootstrap_iters": 100000,
+    "description_dict": null
+  }
+}