fix compatibility issue for transformers 4.46+
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- 20241205_033944/configs/20241205_033944_98865.py +0 -0
- 20241205_033944/logs/eval/internvl-chat-20b/C3.out +6 -0
- 20241205_033944/logs/eval/internvl-chat-20b/GaokaoBench_2010-2013_English_MCQs.out +6 -0
- 20241205_033944/logs/eval/internvl-chat-20b/GaokaoBench_2010-2022_Biology_MCQs.out +6 -0
- 20241205_033944/logs/eval/internvl-chat-20b/GaokaoBench_2010-2022_Chemistry_MCQs.out +6 -0
- 20241205_033944/logs/eval/internvl-chat-20b/GaokaoBench_2010-2022_Chinese_Lang_and_Usage_MCQs.out +6 -0
- 20241205_033944/logs/eval/internvl-chat-20b/GaokaoBench_2010-2022_Chinese_Language_Famous_Passages_and_Sentences_Dictation.out +6 -0
- 20241205_033944/logs/eval/internvl-chat-20b/GaokaoBench_2010-2022_Chinese_Modern_Lit.out +6 -0
- 20241205_033944/logs/eval/internvl-chat-20b/GaokaoBench_2010-2022_English_Fill_in_Blanks.out +6 -0
- 20241205_033944/logs/eval/internvl-chat-20b/GaokaoBench_2010-2022_English_Reading_Comp.out +6 -0
- 20241205_033944/logs/eval/internvl-chat-20b/GaokaoBench_2010-2022_Geography_MCQs.out +6 -0
- 20241205_033944/logs/eval/internvl-chat-20b/GaokaoBench_2010-2022_History_MCQs.out +6 -0
- 20241205_033944/logs/eval/internvl-chat-20b/GaokaoBench_2010-2022_Math_II_Fill-in-the-Blank.out +6 -0
- 20241205_033944/logs/eval/internvl-chat-20b/GaokaoBench_2010-2022_Math_II_MCQs.out +6 -0
- 20241205_033944/logs/eval/internvl-chat-20b/GaokaoBench_2010-2022_Math_I_Fill-in-the-Blank.out +6 -0
- 20241205_033944/logs/eval/internvl-chat-20b/GaokaoBench_2010-2022_Math_I_MCQs.out +6 -0
- 20241205_033944/logs/eval/internvl-chat-20b/GaokaoBench_2010-2022_Physics_MCQs.out +6 -0
- 20241205_033944/logs/eval/internvl-chat-20b/GaokaoBench_2010-2022_Political_Science_MCQs.out +6 -0
- 20241205_033944/logs/eval/internvl-chat-20b/GaokaoBench_2012-2022_English_Cloze_Test.out +6 -0
- 20241205_033944/logs/eval/internvl-chat-20b/GaokaoBench_2014-2022_English_Language_Cloze_Passage.out +6 -0
- 20241205_033944/logs/eval/internvl-chat-20b/IFEval.out +6 -0
- 20241205_033944/logs/eval/internvl-chat-20b/TheoremQA.out +0 -0
- 20241205_033944/logs/eval/internvl-chat-20b/bbh-boolean_expressions.out +6 -0
- 20241205_033944/logs/eval/internvl-chat-20b/bbh-causal_judgement.out +6 -0
- 20241205_033944/logs/eval/internvl-chat-20b/bbh-date_understanding.out +8 -0
- 20241205_033944/logs/eval/internvl-chat-20b/bbh-disambiguation_qa.out +8 -0
- 20241205_033944/logs/eval/internvl-chat-20b/bbh-dyck_languages.out +6 -0
- 20241205_033944/logs/eval/internvl-chat-20b/bbh-formal_fallacies.out +6 -0
- 20241205_033944/logs/eval/internvl-chat-20b/bbh-geometric_shapes.out +8 -0
- 20241205_033944/logs/eval/internvl-chat-20b/bbh-hyperbaton.out +8 -0
- 20241205_033944/logs/eval/internvl-chat-20b/bbh-logical_deduction_five_objects.out +8 -0
- 20241205_033944/logs/eval/internvl-chat-20b/bbh-logical_deduction_seven_objects.out +8 -0
- 20241205_033944/logs/eval/internvl-chat-20b/bbh-logical_deduction_three_objects.out +8 -0
- 20241205_033944/logs/eval/internvl-chat-20b/bbh-movie_recommendation.out +8 -0
- 20241205_033944/logs/eval/internvl-chat-20b/bbh-multistep_arithmetic_two.out +6 -0
- 20241205_033944/logs/eval/internvl-chat-20b/bbh-navigate.out +6 -0
- 20241205_033944/logs/eval/internvl-chat-20b/bbh-object_counting.out +6 -0
- 20241205_033944/logs/eval/internvl-chat-20b/bbh-penguins_in_a_table.out +8 -0
- 20241205_033944/logs/eval/internvl-chat-20b/bbh-reasoning_about_colored_objects.out +8 -0
- 20241205_033944/logs/eval/internvl-chat-20b/bbh-ruin_names.out +8 -0
- 20241205_033944/logs/eval/internvl-chat-20b/bbh-salient_translation_error_detection.out +8 -0
- 20241205_033944/logs/eval/internvl-chat-20b/bbh-snarks.out +8 -0
- 20241205_033944/logs/eval/internvl-chat-20b/bbh-sports_understanding.out +6 -0
- 20241205_033944/logs/eval/internvl-chat-20b/bbh-temporal_sequences.out +8 -0
- 20241205_033944/logs/eval/internvl-chat-20b/bbh-tracking_shuffled_objects_five_objects.out +8 -0
- 20241205_033944/logs/eval/internvl-chat-20b/bbh-tracking_shuffled_objects_seven_objects.out +8 -0
- 20241205_033944/logs/eval/internvl-chat-20b/bbh-tracking_shuffled_objects_three_objects.out +8 -0
- 20241205_033944/logs/eval/internvl-chat-20b/bbh-web_of_lies.out +6 -0
- 20241205_033944/logs/eval/internvl-chat-20b/bbh-word_sorting.out +6 -0
- 20241205_033944/logs/eval/internvl-chat-20b/ceval-accountant.out +6 -0
20241205_033944/configs/20241205_033944_98865.py
ADDED
The diff for this file is too large to render.
See raw diff
|
|
20241205_033944/logs/eval/internvl-chat-20b/C3.out
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
srun: Job 4090296 scheduled successfully!
|
2 |
+
Current QUOTA_TYPE is [reserved], which means the job has occupied quota in RESERVED_TOTAL under your partition.
|
3 |
+
Current PHX_PRIORITY is P0
|
4 |
+
|
5 |
+
12/05 03:49:11 - OpenCompass - [4m[97mINFO[0m - Task [internvl-chat-20b/C3]: {'accuracy': 76.87671232876713}
|
6 |
+
12/05 03:49:11 - OpenCompass - [4m[97mINFO[0m - time elapsed: 15.30s
|
20241205_033944/logs/eval/internvl-chat-20b/GaokaoBench_2010-2013_English_MCQs.out
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
srun: Job 4090278 scheduled successfully!
|
2 |
+
Current QUOTA_TYPE is [reserved], which means the job has occupied quota in RESERVED_TOTAL under your partition.
|
3 |
+
Current PHX_PRIORITY is P0
|
4 |
+
|
5 |
+
12/05 03:49:05 - OpenCompass - [4m[97mINFO[0m - Task [internvl-chat-20b/GaokaoBench_2010-2013_English_MCQs]: {'score': 55.23809523809524}
|
6 |
+
12/05 03:49:05 - OpenCompass - [4m[97mINFO[0m - time elapsed: 11.80s
|
20241205_033944/logs/eval/internvl-chat-20b/GaokaoBench_2010-2022_Biology_MCQs.out
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
srun: Job 4090429 scheduled successfully!
|
2 |
+
Current QUOTA_TYPE is [reserved], which means the job has occupied quota in RESERVED_TOTAL under your partition.
|
3 |
+
Current PHX_PRIORITY is P0
|
4 |
+
|
5 |
+
12/05 03:49:21 - OpenCompass - [4m[97mINFO[0m - Task [internvl-chat-20b/GaokaoBench_2010-2022_Biology_MCQs]: {'score': 50.0}
|
6 |
+
12/05 03:49:21 - OpenCompass - [4m[97mINFO[0m - time elapsed: 18.30s
|
20241205_033944/logs/eval/internvl-chat-20b/GaokaoBench_2010-2022_Chemistry_MCQs.out
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
srun: Job 4090456 scheduled successfully!
|
2 |
+
Current QUOTA_TYPE is [reserved], which means the job has occupied quota in RESERVED_TOTAL under your partition.
|
3 |
+
Current PHX_PRIORITY is P0
|
4 |
+
|
5 |
+
12/05 03:49:22 - OpenCompass - [4m[97mINFO[0m - Task [internvl-chat-20b/GaokaoBench_2010-2022_Chemistry_MCQs]: {'score': 30.64516129032258}
|
6 |
+
12/05 03:49:22 - OpenCompass - [4m[97mINFO[0m - time elapsed: 18.23s
|
20241205_033944/logs/eval/internvl-chat-20b/GaokaoBench_2010-2022_Chinese_Lang_and_Usage_MCQs.out
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
srun: Job 4090416 scheduled successfully!
|
2 |
+
Current QUOTA_TYPE is [reserved], which means the job has occupied quota in RESERVED_TOTAL under your partition.
|
3 |
+
Current PHX_PRIORITY is P0
|
4 |
+
|
5 |
+
12/05 03:49:21 - OpenCompass - [4m[97mINFO[0m - Task [internvl-chat-20b/GaokaoBench_2010-2022_Chinese_Lang_and_Usage_MCQs]: {'score': 23.75}
|
6 |
+
12/05 03:49:21 - OpenCompass - [4m[97mINFO[0m - time elapsed: 18.76s
|
20241205_033944/logs/eval/internvl-chat-20b/GaokaoBench_2010-2022_Chinese_Language_Famous_Passages_and_Sentences_Dictation.out
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
srun: Job 4090340 scheduled successfully!
|
2 |
+
Current QUOTA_TYPE is [reserved], which means the job has occupied quota in RESERVED_TOTAL under your partition.
|
3 |
+
Current PHX_PRIORITY is P0
|
4 |
+
|
5 |
+
12/05 03:49:10 - OpenCompass - [4m[97mINFO[0m - Task [internvl-chat-20b/GaokaoBench_2010-2022_Chinese_Language_Famous_Passages_and_Sentences_Dictation]: {'score': 0}
|
6 |
+
12/05 03:49:10 - OpenCompass - [4m[97mINFO[0m - time elapsed: 12.39s
|
20241205_033944/logs/eval/internvl-chat-20b/GaokaoBench_2010-2022_Chinese_Modern_Lit.out
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
srun: Job 4090279 scheduled successfully!
|
2 |
+
Current QUOTA_TYPE is [reserved], which means the job has occupied quota in RESERVED_TOTAL under your partition.
|
3 |
+
Current PHX_PRIORITY is P0
|
4 |
+
|
5 |
+
12/05 03:49:05 - OpenCompass - [4m[97mINFO[0m - Task [internvl-chat-20b/GaokaoBench_2010-2022_Chinese_Modern_Lit]: {'score': 9.195402298850574}
|
6 |
+
12/05 03:49:05 - OpenCompass - [4m[97mINFO[0m - time elapsed: 11.22s
|
20241205_033944/logs/eval/internvl-chat-20b/GaokaoBench_2010-2022_English_Fill_in_Blanks.out
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
srun: Job 4090339 scheduled successfully!
|
2 |
+
Current QUOTA_TYPE is [reserved], which means the job has occupied quota in RESERVED_TOTAL under your partition.
|
3 |
+
Current PHX_PRIORITY is P0
|
4 |
+
|
5 |
+
12/05 03:49:10 - OpenCompass - [4m[97mINFO[0m - Task [internvl-chat-20b/GaokaoBench_2010-2022_English_Fill_in_Blanks]: {'score': 0.0}
|
6 |
+
12/05 03:49:10 - OpenCompass - [4m[97mINFO[0m - time elapsed: 12.32s
|
20241205_033944/logs/eval/internvl-chat-20b/GaokaoBench_2010-2022_English_Reading_Comp.out
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
srun: Job 4090346 scheduled successfully!
|
2 |
+
Current QUOTA_TYPE is [reserved], which means the job has occupied quota in RESERVED_TOTAL under your partition.
|
3 |
+
Current PHX_PRIORITY is P0
|
4 |
+
|
5 |
+
12/05 03:49:10 - OpenCompass - [4m[97mINFO[0m - Task [internvl-chat-20b/GaokaoBench_2010-2022_English_Reading_Comp]: {'score': 12.553191489361701}
|
6 |
+
12/05 03:49:10 - OpenCompass - [4m[97mINFO[0m - time elapsed: 12.31s
|
20241205_033944/logs/eval/internvl-chat-20b/GaokaoBench_2010-2022_Geography_MCQs.out
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
srun: Job 4090436 scheduled successfully!
|
2 |
+
Current QUOTA_TYPE is [reserved], which means the job has occupied quota in RESERVED_TOTAL under your partition.
|
3 |
+
Current PHX_PRIORITY is P0
|
4 |
+
|
5 |
+
12/05 03:49:22 - OpenCompass - [4m[97mINFO[0m - Task [internvl-chat-20b/GaokaoBench_2010-2022_Geography_MCQs]: {'score': 44.21052631578947}
|
6 |
+
12/05 03:49:22 - OpenCompass - [4m[97mINFO[0m - time elapsed: 18.57s
|
20241205_033944/logs/eval/internvl-chat-20b/GaokaoBench_2010-2022_History_MCQs.out
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
srun: Job 4090415 scheduled successfully!
|
2 |
+
Current QUOTA_TYPE is [reserved], which means the job has occupied quota in RESERVED_TOTAL under your partition.
|
3 |
+
Current PHX_PRIORITY is P0
|
4 |
+
|
5 |
+
12/05 03:49:22 - OpenCompass - [4m[97mINFO[0m - Task [internvl-chat-20b/GaokaoBench_2010-2022_History_MCQs]: {'score': 68.6411149825784}
|
6 |
+
12/05 03:49:22 - OpenCompass - [4m[97mINFO[0m - time elapsed: 18.77s
|
20241205_033944/logs/eval/internvl-chat-20b/GaokaoBench_2010-2022_Math_II_Fill-in-the-Blank.out
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
srun: Job 4090312 scheduled successfully!
|
2 |
+
Current QUOTA_TYPE is [reserved], which means the job has occupied quota in RESERVED_TOTAL under your partition.
|
3 |
+
Current PHX_PRIORITY is P0
|
4 |
+
|
5 |
+
12/05 03:49:14 - OpenCompass - [4m[97mINFO[0m - Task [internvl-chat-20b/GaokaoBench_2010-2022_Math_II_Fill-in-the-Blank]: {'score': 0}
|
6 |
+
12/05 03:49:14 - OpenCompass - [4m[97mINFO[0m - time elapsed: 15.85s
|
20241205_033944/logs/eval/internvl-chat-20b/GaokaoBench_2010-2022_Math_II_MCQs.out
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
srun: Job 4090323 scheduled successfully!
|
2 |
+
Current QUOTA_TYPE is [reserved], which means the job has occupied quota in RESERVED_TOTAL under your partition.
|
3 |
+
Current PHX_PRIORITY is P0
|
4 |
+
|
5 |
+
12/05 03:49:14 - OpenCompass - [4m[97mINFO[0m - Task [internvl-chat-20b/GaokaoBench_2010-2022_Math_II_MCQs]: {'score': 13.302752293577983}
|
6 |
+
12/05 03:49:14 - OpenCompass - [4m[97mINFO[0m - time elapsed: 15.92s
|
20241205_033944/logs/eval/internvl-chat-20b/GaokaoBench_2010-2022_Math_I_Fill-in-the-Blank.out
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
srun: Job 4090394 scheduled successfully!
|
2 |
+
Current QUOTA_TYPE is [reserved], which means the job has occupied quota in RESERVED_TOTAL under your partition.
|
3 |
+
Current PHX_PRIORITY is P0
|
4 |
+
|
5 |
+
12/05 03:49:17 - OpenCompass - [4m[97mINFO[0m - Task [internvl-chat-20b/GaokaoBench_2010-2022_Math_I_Fill-in-the-Blank]: {'score': 0}
|
6 |
+
12/05 03:49:17 - OpenCompass - [4m[97mINFO[0m - time elapsed: 16.37s
|
20241205_033944/logs/eval/internvl-chat-20b/GaokaoBench_2010-2022_Math_I_MCQs.out
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
srun: Job 4090419 scheduled successfully!
|
2 |
+
Current QUOTA_TYPE is [reserved], which means the job has occupied quota in RESERVED_TOTAL under your partition.
|
3 |
+
Current PHX_PRIORITY is P0
|
4 |
+
|
5 |
+
12/05 03:49:22 - OpenCompass - [4m[97mINFO[0m - Task [internvl-chat-20b/GaokaoBench_2010-2022_Math_I_MCQs]: {'score': 10.2803738317757}
|
6 |
+
12/05 03:49:22 - OpenCompass - [4m[97mINFO[0m - time elapsed: 18.55s
|
20241205_033944/logs/eval/internvl-chat-20b/GaokaoBench_2010-2022_Physics_MCQs.out
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
srun: Job 4090320 scheduled successfully!
|
2 |
+
Current QUOTA_TYPE is [reserved], which means the job has occupied quota in RESERVED_TOTAL under your partition.
|
3 |
+
Current PHX_PRIORITY is P0
|
4 |
+
|
5 |
+
12/05 03:49:14 - OpenCompass - [4m[97mINFO[0m - Task [internvl-chat-20b/GaokaoBench_2010-2022_Physics_MCQs]: {'score': 10.9375}
|
6 |
+
12/05 03:49:14 - OpenCompass - [4m[97mINFO[0m - time elapsed: 16.43s
|
20241205_033944/logs/eval/internvl-chat-20b/GaokaoBench_2010-2022_Political_Science_MCQs.out
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
srun: Job 4090420 scheduled successfully!
|
2 |
+
Current QUOTA_TYPE is [reserved], which means the job has occupied quota in RESERVED_TOTAL under your partition.
|
3 |
+
Current PHX_PRIORITY is P0
|
4 |
+
|
5 |
+
12/05 03:49:21 - OpenCompass - [4m[97mINFO[0m - Task [internvl-chat-20b/GaokaoBench_2010-2022_Political_Science_MCQs]: {'score': 73.75}
|
6 |
+
12/05 03:49:21 - OpenCompass - [4m[97mINFO[0m - time elapsed: 17.63s
|
20241205_033944/logs/eval/internvl-chat-20b/GaokaoBench_2012-2022_English_Cloze_Test.out
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
srun: Job 4090317 scheduled successfully!
|
2 |
+
Current QUOTA_TYPE is [reserved], which means the job has occupied quota in RESERVED_TOTAL under your partition.
|
3 |
+
Current PHX_PRIORITY is P0
|
4 |
+
|
5 |
+
12/05 03:49:14 - OpenCompass - [4m[97mINFO[0m - Task [internvl-chat-20b/GaokaoBench_2012-2022_English_Cloze_Test]: {'score': 0.0}
|
6 |
+
12/05 03:49:14 - OpenCompass - [4m[97mINFO[0m - time elapsed: 15.85s
|
20241205_033944/logs/eval/internvl-chat-20b/GaokaoBench_2014-2022_English_Language_Cloze_Passage.out
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
srun: Job 4090287 scheduled successfully!
|
2 |
+
Current QUOTA_TYPE is [reserved], which means the job has occupied quota in RESERVED_TOTAL under your partition.
|
3 |
+
Current PHX_PRIORITY is P0
|
4 |
+
|
5 |
+
12/05 03:49:10 - OpenCompass - [4m[97mINFO[0m - Task [internvl-chat-20b/GaokaoBench_2014-2022_English_Language_Cloze_Passage]: {'score': 0}
|
6 |
+
12/05 03:49:10 - OpenCompass - [4m[97mINFO[0m - time elapsed: 15.13s
|
20241205_033944/logs/eval/internvl-chat-20b/IFEval.out
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
srun: Job 4090535 scheduled successfully!
|
2 |
+
Current QUOTA_TYPE is [reserved], which means the job has occupied quota in RESERVED_TOTAL under your partition.
|
3 |
+
Current PHX_PRIORITY is P0
|
4 |
+
|
5 |
+
12/05 03:49:35 - OpenCompass - [4m[97mINFO[0m - Task [internvl-chat-20b/IFEval]: {'Prompt-level-strict-accuracy': 19.77818853974122, 'Inst-level-strict-accuracy': 31.894484412470025, 'Prompt-level-loose-accuracy': 22.920517560073936, 'Inst-level-loose-accuracy': 35.13189448441247}
|
6 |
+
12/05 03:49:35 - OpenCompass - [4m[97mINFO[0m - time elapsed: 14.98s
|
20241205_033944/logs/eval/internvl-chat-20b/TheoremQA.out
ADDED
The diff for this file is too large to render.
See raw diff
|
|
20241205_033944/logs/eval/internvl-chat-20b/bbh-boolean_expressions.out
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
srun: Job 4090516 scheduled successfully!
|
2 |
+
Current QUOTA_TYPE is [reserved], which means the job has occupied quota in RESERVED_TOTAL under your partition.
|
3 |
+
Current PHX_PRIORITY is P0
|
4 |
+
|
5 |
+
12/05 03:49:24 - OpenCompass - [4m[97mINFO[0m - Task [internvl-chat-20b/bbh-boolean_expressions]: {'score': 56.39999999999999}
|
6 |
+
12/05 03:49:24 - OpenCompass - [4m[97mINFO[0m - time elapsed: 11.15s
|
20241205_033944/logs/eval/internvl-chat-20b/bbh-causal_judgement.out
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
srun: Job 4090523 scheduled successfully!
|
2 |
+
Current QUOTA_TYPE is [reserved], which means the job has occupied quota in RESERVED_TOTAL under your partition.
|
3 |
+
Current PHX_PRIORITY is P0
|
4 |
+
|
5 |
+
12/05 03:49:27 - OpenCompass - [4m[97mINFO[0m - Task [internvl-chat-20b/bbh-causal_judgement]: {'score': 48.1283422459893}
|
6 |
+
12/05 03:49:27 - OpenCompass - [4m[97mINFO[0m - time elapsed: 10.68s
|
20241205_033944/logs/eval/internvl-chat-20b/bbh-date_understanding.out
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
srun: Job 4090337 scheduled successfully!
|
2 |
+
Current QUOTA_TYPE is [reserved], which means the job has occupied quota in RESERVED_TOTAL under your partition.
|
3 |
+
Current PHX_PRIORITY is P0
|
4 |
+
|
5 |
+
Parameter 'function'=<function OpenICLEvalTask._score.<locals>.postprocess at 0x7f230a482950> of the transform datasets.arrow_dataset.Dataset._map_single couldn't be hashed properly, a random hash was used instead. Make sure your transforms and parameters are serializable with pickle or dill for the dataset fingerprinting and caching to work. If you reuse this transform, the caching mechanism will consider it to be different from the previous calls and recompute everything. This warning is only showed once. Subsequent hashing failures won't be showed.
|
6 |
+
|
7 |
+
12/05 03:49:10 - OpenCompass - [4m[97mINFO[0m - Task [internvl-chat-20b/bbh-date_understanding]: {'score': 54.800000000000004}
|
8 |
+
12/05 03:49:10 - OpenCompass - [4m[97mINFO[0m - time elapsed: 12.54s
|
20241205_033944/logs/eval/internvl-chat-20b/bbh-disambiguation_qa.out
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
srun: Job 4090338 scheduled successfully!
|
2 |
+
Current QUOTA_TYPE is [reserved], which means the job has occupied quota in RESERVED_TOTAL under your partition.
|
3 |
+
Current PHX_PRIORITY is P0
|
4 |
+
|
5 |
+
Parameter 'function'=<function OpenICLEvalTask._score.<locals>.postprocess at 0x7f443bc32950> of the transform datasets.arrow_dataset.Dataset._map_single couldn't be hashed properly, a random hash was used instead. Make sure your transforms and parameters are serializable with pickle or dill for the dataset fingerprinting and caching to work. If you reuse this transform, the caching mechanism will consider it to be different from the previous calls and recompute everything. This warning is only showed once. Subsequent hashing failures won't be showed.
|
6 |
+
|
7 |
+
12/05 03:49:10 - OpenCompass - [4m[97mINFO[0m - Task [internvl-chat-20b/bbh-disambiguation_qa]: {'score': 51.6}
|
8 |
+
12/05 03:49:10 - OpenCompass - [4m[97mINFO[0m - time elapsed: 12.59s
|
20241205_033944/logs/eval/internvl-chat-20b/bbh-dyck_languages.out
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
srun: Job 4090539 scheduled successfully!
|
2 |
+
Current QUOTA_TYPE is [reserved], which means the job has occupied quota in RESERVED_TOTAL under your partition.
|
3 |
+
Current PHX_PRIORITY is P0
|
4 |
+
|
5 |
+
12/05 03:49:33 - OpenCompass - [4m[97mINFO[0m - Task [internvl-chat-20b/bbh-dyck_languages]: {'score': 0.0}
|
6 |
+
12/05 03:49:33 - OpenCompass - [4m[97mINFO[0m - time elapsed: 12.01s
|
20241205_033944/logs/eval/internvl-chat-20b/bbh-formal_fallacies.out
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
srun: Job 4090542 scheduled successfully!
|
2 |
+
Current QUOTA_TYPE is [reserved], which means the job has occupied quota in RESERVED_TOTAL under your partition.
|
3 |
+
Current PHX_PRIORITY is P0
|
4 |
+
|
5 |
+
12/05 03:49:32 - OpenCompass - [4m[97mINFO[0m - Task [internvl-chat-20b/bbh-formal_fallacies]: {'score': 48.0}
|
6 |
+
12/05 03:49:32 - OpenCompass - [4m[97mINFO[0m - time elapsed: 9.92s
|
20241205_033944/logs/eval/internvl-chat-20b/bbh-geometric_shapes.out
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
srun: Job 4090513 scheduled successfully!
|
2 |
+
Current QUOTA_TYPE is [reserved], which means the job has occupied quota in RESERVED_TOTAL under your partition.
|
3 |
+
Current PHX_PRIORITY is P0
|
4 |
+
|
5 |
+
Parameter 'function'=<function OpenICLEvalTask._score.<locals>.postprocess at 0x7fc13523a950> of the transform datasets.arrow_dataset.Dataset._map_single couldn't be hashed properly, a random hash was used instead. Make sure your transforms and parameters are serializable with pickle or dill for the dataset fingerprinting and caching to work. If you reuse this transform, the caching mechanism will consider it to be different from the previous calls and recompute everything. This warning is only showed once. Subsequent hashing failures won't be showed.
|
6 |
+
|
7 |
+
12/05 03:49:20 - OpenCompass - [4m[97mINFO[0m - Task [internvl-chat-20b/bbh-geometric_shapes]: {'score': 24.8}
|
8 |
+
12/05 03:49:20 - OpenCompass - [4m[97mINFO[0m - time elapsed: 10.89s
|
20241205_033944/logs/eval/internvl-chat-20b/bbh-hyperbaton.out
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
srun: Job 4090518 scheduled successfully!
|
2 |
+
Current QUOTA_TYPE is [reserved], which means the job has occupied quota in RESERVED_TOTAL under your partition.
|
3 |
+
Current PHX_PRIORITY is P0
|
4 |
+
|
5 |
+
Parameter 'function'=<function OpenICLEvalTask._score.<locals>.postprocess at 0x7fda1fcaa950> of the transform datasets.arrow_dataset.Dataset._map_single couldn't be hashed properly, a random hash was used instead. Make sure your transforms and parameters are serializable with pickle or dill for the dataset fingerprinting and caching to work. If you reuse this transform, the caching mechanism will consider it to be different from the previous calls and recompute everything. This warning is only showed once. Subsequent hashing failures won't be showed.
|
6 |
+
|
7 |
+
12/05 03:49:25 - OpenCompass - [4m[97mINFO[0m - Task [internvl-chat-20b/bbh-hyperbaton]: {'score': 58.8}
|
8 |
+
12/05 03:49:25 - OpenCompass - [4m[97mINFO[0m - time elapsed: 10.79s
|
20241205_033944/logs/eval/internvl-chat-20b/bbh-logical_deduction_five_objects.out
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
srun: Job 4090529 scheduled successfully!
|
2 |
+
Current QUOTA_TYPE is [reserved], which means the job has occupied quota in RESERVED_TOTAL under your partition.
|
3 |
+
Current PHX_PRIORITY is P0
|
4 |
+
|
5 |
+
Parameter 'function'=<function OpenICLEvalTask._score.<locals>.postprocess at 0x7f4280c6a950> of the transform datasets.arrow_dataset.Dataset._map_single couldn't be hashed properly, a random hash was used instead. Make sure your transforms and parameters are serializable with pickle or dill for the dataset fingerprinting and caching to work. If you reuse this transform, the caching mechanism will consider it to be different from the previous calls and recompute everything. This warning is only showed once. Subsequent hashing failures won't be showed.
|
6 |
+
|
7 |
+
12/05 03:49:29 - OpenCompass - [4m[97mINFO[0m - Task [internvl-chat-20b/bbh-logical_deduction_five_objects]: {'score': 21.6}
|
8 |
+
12/05 03:49:29 - OpenCompass - [4m[97mINFO[0m - time elapsed: 10.53s
|
20241205_033944/logs/eval/internvl-chat-20b/bbh-logical_deduction_seven_objects.out
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
srun: Job 4090541 scheduled successfully!
|
2 |
+
Current QUOTA_TYPE is [reserved], which means the job has occupied quota in RESERVED_TOTAL under your partition.
|
3 |
+
Current PHX_PRIORITY is P0
|
4 |
+
|
5 |
+
Parameter 'function'=<function OpenICLEvalTask._score.<locals>.postprocess at 0x7fafc6daa950> of the transform datasets.arrow_dataset.Dataset._map_single couldn't be hashed properly, a random hash was used instead. Make sure your transforms and parameters are serializable with pickle or dill for the dataset fingerprinting and caching to work. If you reuse this transform, the caching mechanism will consider it to be different from the previous calls and recompute everything. This warning is only showed once. Subsequent hashing failures won't be showed.
|
6 |
+
|
7 |
+
12/05 03:49:32 - OpenCompass - [4m[97mINFO[0m - Task [internvl-chat-20b/bbh-logical_deduction_seven_objects]: {'score': 17.2}
|
8 |
+
12/05 03:49:32 - OpenCompass - [4m[97mINFO[0m - time elapsed: 10.53s
|
20241205_033944/logs/eval/internvl-chat-20b/bbh-logical_deduction_three_objects.out
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
srun: Job 4090514 scheduled successfully!
|
2 |
+
Current QUOTA_TYPE is [reserved], which means the job has occupied quota in RESERVED_TOTAL under your partition.
|
3 |
+
Current PHX_PRIORITY is P0
|
4 |
+
|
5 |
+
Parameter 'function'=<function OpenICLEvalTask._score.<locals>.postprocess at 0x7f1d0a64a950> of the transform datasets.arrow_dataset.Dataset._map_single couldn't be hashed properly, a random hash was used instead. Make sure your transforms and parameters are serializable with pickle or dill for the dataset fingerprinting and caching to work. If you reuse this transform, the caching mechanism will consider it to be different from the previous calls and recompute everything. This warning is only showed once. Subsequent hashing failures won't be showed.
|
6 |
+
|
7 |
+
12/05 03:49:22 - OpenCompass - [4m[97mINFO[0m - Task [internvl-chat-20b/bbh-logical_deduction_three_objects]: {'score': 38.0}
|
8 |
+
12/05 03:49:22 - OpenCompass - [4m[97mINFO[0m - time elapsed: 10.64s
|
20241205_033944/logs/eval/internvl-chat-20b/bbh-movie_recommendation.out
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
srun: Job 4090528 scheduled successfully!
|
2 |
+
Current QUOTA_TYPE is [reserved], which means the job has occupied quota in RESERVED_TOTAL under your partition.
|
3 |
+
Current PHX_PRIORITY is P0
|
4 |
+
|
5 |
+
Parameter 'function'=<function OpenICLEvalTask._score.<locals>.postprocess at 0x7fae0592e950> of the transform datasets.arrow_dataset.Dataset._map_single couldn't be hashed properly, a random hash was used instead. Make sure your transforms and parameters are serializable with pickle or dill for the dataset fingerprinting and caching to work. If you reuse this transform, the caching mechanism will consider it to be different from the previous calls and recompute everything. This warning is only showed once. Subsequent hashing failures won't be showed.
|
6 |
+
|
7 |
+
12/05 03:49:29 - OpenCompass - [4m[97mINFO[0m - Task [internvl-chat-20b/bbh-movie_recommendation]: {'score': 53.2}
|
8 |
+
12/05 03:49:29 - OpenCompass - [4m[97mINFO[0m - time elapsed: 10.87s
|
20241205_033944/logs/eval/internvl-chat-20b/bbh-multistep_arithmetic_two.out
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
srun: Job 4090520 scheduled successfully!
|
2 |
+
Current QUOTA_TYPE is [reserved], which means the job has occupied quota in RESERVED_TOTAL under your partition.
|
3 |
+
Current PHX_PRIORITY is P0
|
4 |
+
|
5 |
+
12/05 03:49:26 - OpenCompass - [4m[97mINFO[0m - Task [internvl-chat-20b/bbh-multistep_arithmetic_two]: {'score': 9.6}
|
6 |
+
12/05 03:49:26 - OpenCompass - [4m[97mINFO[0m - time elapsed: 11.22s
|
20241205_033944/logs/eval/internvl-chat-20b/bbh-navigate.out
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
srun: Job 4090530 scheduled successfully!
|
2 |
+
Current QUOTA_TYPE is [reserved], which means the job has occupied quota in RESERVED_TOTAL under your partition.
|
3 |
+
Current PHX_PRIORITY is P0
|
4 |
+
|
5 |
+
12/05 03:49:29 - OpenCompass - [4m[97mINFO[0m - Task [internvl-chat-20b/bbh-navigate]: {'score': 58.8}
|
6 |
+
12/05 03:49:29 - OpenCompass - [4m[97mINFO[0m - time elapsed: 10.34s
|
20241205_033944/logs/eval/internvl-chat-20b/bbh-object_counting.out
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
srun: Job 4090531 scheduled successfully!
|
2 |
+
Current QUOTA_TYPE is [reserved], which means the job has occupied quota in RESERVED_TOTAL under your partition.
|
3 |
+
Current PHX_PRIORITY is P0
|
4 |
+
|
5 |
+
12/05 03:49:29 - OpenCompass - [4m[97mINFO[0m - Task [internvl-chat-20b/bbh-object_counting]: {'score': 51.2}
|
6 |
+
12/05 03:49:29 - OpenCompass - [4m[97mINFO[0m - time elapsed: 10.39s
|
20241205_033944/logs/eval/internvl-chat-20b/bbh-penguins_in_a_table.out
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
srun: Job 4090517 scheduled successfully!
|
2 |
+
Current QUOTA_TYPE is [reserved], which means the job has occupied quota in RESERVED_TOTAL under your partition.
|
3 |
+
Current PHX_PRIORITY is P0
|
4 |
+
|
5 |
+
Parameter 'function'=<function OpenICLEvalTask._score.<locals>.postprocess at 0x7f38bccc28c0> of the transform datasets.arrow_dataset.Dataset._map_single couldn't be hashed properly, a random hash was used instead. Make sure your transforms and parameters are serializable with pickle or dill for the dataset fingerprinting and caching to work. If you reuse this transform, the caching mechanism will consider it to be different from the previous calls and recompute everything. This warning is only showed once. Subsequent hashing failures won't be showed.
|
6 |
+
|
7 |
+
12/05 03:49:24 - OpenCompass - [4m[97mINFO[0m - Task [internvl-chat-20b/bbh-penguins_in_a_table]: {'score': 32.87671232876712}
|
8 |
+
12/05 03:49:24 - OpenCompass - [4m[97mINFO[0m - time elapsed: 11.15s
|
20241205_033944/logs/eval/internvl-chat-20b/bbh-reasoning_about_colored_objects.out
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
srun: Job 4090515 scheduled successfully!
|
2 |
+
Current QUOTA_TYPE is [reserved], which means the job has occupied quota in RESERVED_TOTAL under your partition.
|
3 |
+
Current PHX_PRIORITY is P0
|
4 |
+
|
5 |
+
Parameter 'function'=<function OpenICLEvalTask._score.<locals>.postprocess at 0x7f8aa82f2950> of the transform datasets.arrow_dataset.Dataset._map_single couldn't be hashed properly, a random hash was used instead. Make sure your transforms and parameters are serializable with pickle or dill for the dataset fingerprinting and caching to work. If you reuse this transform, the caching mechanism will consider it to be different from the previous calls and recompute everything. This warning is only showed once. Subsequent hashing failures won't be showed.
|
6 |
+
|
7 |
+
12/05 03:49:22 - OpenCompass - [4m[97mINFO[0m - Task [internvl-chat-20b/bbh-reasoning_about_colored_objects]: {'score': 52.0}
|
8 |
+
12/05 03:49:22 - OpenCompass - [4m[97mINFO[0m - time elapsed: 10.69s
|
20241205_033944/logs/eval/internvl-chat-20b/bbh-ruin_names.out
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
srun: Job 4090525 scheduled successfully!
|
2 |
+
Current QUOTA_TYPE is [reserved], which means the job has occupied quota in RESERVED_TOTAL under your partition.
|
3 |
+
Current PHX_PRIORITY is P0
|
4 |
+
|
5 |
+
Parameter 'function'=<function OpenICLEvalTask._score.<locals>.postprocess at 0x7f9b772fa950> of the transform datasets.arrow_dataset.Dataset._map_single couldn't be hashed properly, a random hash was used instead. Make sure your transforms and parameters are serializable with pickle or dill for the dataset fingerprinting and caching to work. If you reuse this transform, the caching mechanism will consider it to be different from the previous calls and recompute everything. This warning is only showed once. Subsequent hashing failures won't be showed.
|
6 |
+
|
7 |
+
12/05 03:49:28 - OpenCompass - [4m[97mINFO[0m - Task [internvl-chat-20b/bbh-ruin_names]: {'score': 26.8}
|
8 |
+
12/05 03:49:28 - OpenCompass - [4m[97mINFO[0m - time elapsed: 10.89s
|
20241205_033944/logs/eval/internvl-chat-20b/bbh-salient_translation_error_detection.out
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
srun: Job 4090540 scheduled successfully!
|
2 |
+
Current QUOTA_TYPE is [reserved], which means the job has occupied quota in RESERVED_TOTAL under your partition.
|
3 |
+
Current PHX_PRIORITY is P0
|
4 |
+
|
5 |
+
Parameter 'function'=<function OpenICLEvalTask._score.<locals>.postprocess at 0x7f0cda62a950> of the transform datasets.arrow_dataset.Dataset._map_single couldn't be hashed properly, a random hash was used instead. Make sure your transforms and parameters are serializable with pickle or dill for the dataset fingerprinting and caching to work. If you reuse this transform, the caching mechanism will consider it to be different from the previous calls and recompute everything. This warning is only showed once. Subsequent hashing failures won't be showed.
|
6 |
+
|
7 |
+
12/05 03:49:32 - OpenCompass - [4m[97mINFO[0m - Task [internvl-chat-20b/bbh-salient_translation_error_detection]: {'score': 16.8}
|
8 |
+
12/05 03:49:32 - OpenCompass - [4m[97mINFO[0m - time elapsed: 10.55s
|
20241205_033944/logs/eval/internvl-chat-20b/bbh-snarks.out
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
srun: Job 4090522 scheduled successfully!
|
2 |
+
Current QUOTA_TYPE is [reserved], which means the job has occupied quota in RESERVED_TOTAL under your partition.
|
3 |
+
Current PHX_PRIORITY is P0
|
4 |
+
|
5 |
+
Parameter 'function'=<function OpenICLEvalTask._score.<locals>.postprocess at 0x7fa12dc0e8c0> of the transform datasets.arrow_dataset.Dataset._map_single couldn't be hashed properly, a random hash was used instead. Make sure your transforms and parameters are serializable with pickle or dill for the dataset fingerprinting and caching to work. If you reuse this transform, the caching mechanism will consider it to be different from the previous calls and recompute everything. This warning is only showed once. Subsequent hashing failures won't be showed.
|
6 |
+
|
7 |
+
12/05 03:49:27 - OpenCompass - [4m[97mINFO[0m - Task [internvl-chat-20b/bbh-snarks]: {'score': 48.87640449438202}
|
8 |
+
12/05 03:49:27 - OpenCompass - [4m[97mINFO[0m - time elapsed: 10.78s
|
20241205_033944/logs/eval/internvl-chat-20b/bbh-sports_understanding.out
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
srun: Job 4090526 scheduled successfully!
|
2 |
+
Current QUOTA_TYPE is [reserved], which means the job has occupied quota in RESERVED_TOTAL under your partition.
|
3 |
+
Current PHX_PRIORITY is P0
|
4 |
+
|
5 |
+
12/05 03:49:28 - OpenCompass - [4m[97mINFO[0m - Task [internvl-chat-20b/bbh-sports_understanding]: {'score': 56.00000000000001}
|
6 |
+
12/05 03:49:28 - OpenCompass - [4m[97mINFO[0m - time elapsed: 10.37s
|
20241205_033944/logs/eval/internvl-chat-20b/bbh-temporal_sequences.out
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
srun: Job 4090401 scheduled successfully!
|
2 |
+
Current QUOTA_TYPE is [reserved], which means the job has occupied quota in RESERVED_TOTAL under your partition.
|
3 |
+
Current PHX_PRIORITY is P0
|
4 |
+
|
5 |
+
Parameter 'function'=<function OpenICLEvalTask._score.<locals>.postprocess at 0x7f499b756950> of the transform datasets.arrow_dataset.Dataset._map_single couldn't be hashed properly, a random hash was used instead. Make sure your transforms and parameters are serializable with pickle or dill for the dataset fingerprinting and caching to work. If you reuse this transform, the caching mechanism will consider it to be different from the previous calls and recompute everything. This warning is only showed once. Subsequent hashing failures won't be showed.
|
6 |
+
|
7 |
+
12/05 03:49:22 - OpenCompass - [4m[97mINFO[0m - Task [internvl-chat-20b/bbh-temporal_sequences]: {'score': 40.8}
|
8 |
+
12/05 03:49:22 - OpenCompass - [4m[97mINFO[0m - time elapsed: 19.32s
|
20241205_033944/logs/eval/internvl-chat-20b/bbh-tracking_shuffled_objects_five_objects.out
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
srun: Job 4090534 scheduled successfully!
|
2 |
+
Current QUOTA_TYPE is [reserved], which means the job has occupied quota in RESERVED_TOTAL under your partition.
|
3 |
+
Current PHX_PRIORITY is P0
|
4 |
+
|
5 |
+
Parameter 'function'=<function OpenICLEvalTask._score.<locals>.postprocess at 0x7f8c55906950> of the transform datasets.arrow_dataset.Dataset._map_single couldn't be hashed properly, a random hash was used instead. Make sure your transforms and parameters are serializable with pickle or dill for the dataset fingerprinting and caching to work. If you reuse this transform, the caching mechanism will consider it to be different from the previous calls and recompute everything. This warning is only showed once. Subsequent hashing failures won't be showed.
|
6 |
+
|
7 |
+
12/05 03:49:32 - OpenCompass - [4m[97mINFO[0m - Task [internvl-chat-20b/bbh-tracking_shuffled_objects_five_objects]: {'score': 19.6}
|
8 |
+
12/05 03:49:32 - OpenCompass - [4m[97mINFO[0m - time elapsed: 12.71s
|
20241205_033944/logs/eval/internvl-chat-20b/bbh-tracking_shuffled_objects_seven_objects.out
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
srun: Job 4090519 scheduled successfully!
|
2 |
+
Current QUOTA_TYPE is [reserved], which means the job has occupied quota in RESERVED_TOTAL under your partition.
|
3 |
+
Current PHX_PRIORITY is P0
|
4 |
+
|
5 |
+
Parameter 'function'=<function OpenICLEvalTask._score.<locals>.postprocess at 0x7fdeda40e950> of the transform datasets.arrow_dataset.Dataset._map_single couldn't be hashed properly, a random hash was used instead. Make sure your transforms and parameters are serializable with pickle or dill for the dataset fingerprinting and caching to work. If you reuse this transform, the caching mechanism will consider it to be different from the previous calls and recompute everything. This warning is only showed once. Subsequent hashing failures won't be showed.
|
6 |
+
|
7 |
+
12/05 03:49:25 - OpenCompass - [4m[97mINFO[0m - Task [internvl-chat-20b/bbh-tracking_shuffled_objects_seven_objects]: {'score': 12.8}
|
8 |
+
12/05 03:49:25 - OpenCompass - [4m[97mINFO[0m - time elapsed: 10.71s
|
20241205_033944/logs/eval/internvl-chat-20b/bbh-tracking_shuffled_objects_three_objects.out
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
srun: Job 4090509 scheduled successfully!
|
2 |
+
Current QUOTA_TYPE is [reserved], which means the job has occupied quota in RESERVED_TOTAL under your partition.
|
3 |
+
Current PHX_PRIORITY is P0
|
4 |
+
|
5 |
+
Parameter 'function'=<function OpenICLEvalTask._score.<locals>.postprocess at 0x7f2d74d22950> of the transform datasets.arrow_dataset.Dataset._map_single couldn't be hashed properly, a random hash was used instead. Make sure your transforms and parameters are serializable with pickle or dill for the dataset fingerprinting and caching to work. If you reuse this transform, the caching mechanism will consider it to be different from the previous calls and recompute everything. This warning is only showed once. Subsequent hashing failures won't be showed.
|
6 |
+
|
7 |
+
12/05 03:49:19 - OpenCompass - [4m[97mINFO[0m - Task [internvl-chat-20b/bbh-tracking_shuffled_objects_three_objects]: {'score': 30.8}
|
8 |
+
12/05 03:49:19 - OpenCompass - [4m[97mINFO[0m - time elapsed: 12.49s
|
20241205_033944/logs/eval/internvl-chat-20b/bbh-web_of_lies.out
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
srun: Job 4090527 scheduled successfully!
|
2 |
+
Current QUOTA_TYPE is [reserved], which means the job has occupied quota in RESERVED_TOTAL under your partition.
|
3 |
+
Current PHX_PRIORITY is P0
|
4 |
+
|
5 |
+
12/05 03:49:29 - OpenCompass - [4m[97mINFO[0m - Task [internvl-chat-20b/bbh-web_of_lies]: {'score': 48.4}
|
6 |
+
12/05 03:49:29 - OpenCompass - [4m[97mINFO[0m - time elapsed: 10.90s
|
20241205_033944/logs/eval/internvl-chat-20b/bbh-word_sorting.out
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
srun: Job 4090521 scheduled successfully!
|
2 |
+
Current QUOTA_TYPE is [reserved], which means the job has occupied quota in RESERVED_TOTAL under your partition.
|
3 |
+
Current PHX_PRIORITY is P0
|
4 |
+
|
5 |
+
12/05 03:49:26 - OpenCompass - [4m[97mINFO[0m - Task [internvl-chat-20b/bbh-word_sorting]: {'score': 2.8000000000000003}
|
6 |
+
12/05 03:49:26 - OpenCompass - [4m[97mINFO[0m - time elapsed: 11.19s
|
20241205_033944/logs/eval/internvl-chat-20b/ceval-accountant.out
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
srun: Job 4090414 scheduled successfully!
|
2 |
+
Current QUOTA_TYPE is [reserved], which means the job has occupied quota in RESERVED_TOTAL under your partition.
|
3 |
+
Current PHX_PRIORITY is P0
|
4 |
+
|
5 |
+
12/05 03:49:20 - OpenCompass - [4m[97mINFO[0m - Task [internvl-chat-20b/ceval-accountant]: {'accuracy': 30.612244897959183}
|
6 |
+
12/05 03:49:20 - OpenCompass - [4m[97mINFO[0m - time elapsed: 16.82s
|