cuierfei commited on
Commit
b537a0f
Β·
verified Β·
1 Parent(s): f245b6d

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes. Β  See raw diff
Files changed (50) hide show
  1. .gitattributes +2 -0
  2. caption-coco.log +0 -0
  3. eval_milebench/ALFRED.log +107 -0
  4. eval_milebench/ALFRED/ALFRED_240803234459.json +0 -0
  5. eval_milebench/ALFRED/eval.json +1 -0
  6. eval_milebench/ALFRED/eval_score.json +802 -0
  7. eval_milebench/ALFRED/pred.json +0 -0
  8. eval_milebench/ActionLocalization.log +296 -0
  9. eval_milebench/ActionLocalization/ActionLocalization_240803233707.json +0 -0
  10. eval_milebench/ActionLocalization/ActionLocalization_240803234615.json +0 -0
  11. eval_milebench/ActionLocalization/eval.json +1 -0
  12. eval_milebench/ActionLocalization/eval_score.json +802 -0
  13. eval_milebench/ActionLocalization/pred.json +0 -0
  14. eval_milebench/ActionLocalization/pred_with_extracted.json +0 -0
  15. eval_milebench/ActionPrediction.log +366 -0
  16. eval_milebench/ActionPrediction/ActionPrediction_240803234615.json +0 -0
  17. eval_milebench/ActionPrediction/eval.json +1 -0
  18. eval_milebench/ActionPrediction/eval_score.json +802 -0
  19. eval_milebench/ActionPrediction/pred.json +0 -0
  20. eval_milebench/ActionPrediction/pred_with_extracted.json +0 -0
  21. eval_milebench/ActionSequence.log +338 -0
  22. eval_milebench/ActionSequence/ActionSequence_240803234618.json +0 -0
  23. eval_milebench/ActionSequence/eval.json +1 -0
  24. eval_milebench/ActionSequence/eval_score.json +802 -0
  25. eval_milebench/ActionSequence/pred.json +0 -0
  26. eval_milebench/ActionSequence/pred_with_extracted.json +0 -0
  27. eval_milebench/CLEVR-Change.log +86 -0
  28. eval_milebench/CLEVR-Change/CLEVR-Change_240803234510.json +0 -0
  29. eval_milebench/CLEVR-Change/eval.json +1 -0
  30. eval_milebench/CLEVR-Change/eval_score.json +802 -0
  31. eval_milebench/CLEVR-Change/pred.json +0 -0
  32. eval_milebench/CharacterOrder.log +261 -0
  33. eval_milebench/CharacterOrder/CharacterOrder_240803234555.json +0 -0
  34. eval_milebench/CharacterOrder/eval.json +1 -0
  35. eval_milebench/CharacterOrder/eval_score.json +802 -0
  36. eval_milebench/CharacterOrder/pred.json +0 -0
  37. eval_milebench/CharacterOrder/pred_with_extracted.json +0 -0
  38. eval_milebench/CounterfactualInference.log +86 -0
  39. eval_milebench/CounterfactualInference/CounterfactualInference_240803234437.json +0 -0
  40. eval_milebench/CounterfactualInference/eval.json +1 -0
  41. eval_milebench/CounterfactualInference/eval_score.json +802 -0
  42. eval_milebench/CounterfactualInference/pred.json +0 -0
  43. eval_milebench/CounterfactualInference/pred_with_extracted.json +0 -0
  44. eval_milebench/DocVQA.log +114 -0
  45. eval_milebench/DocVQA/DocVQA_240803234442.json +0 -0
  46. eval_milebench/DocVQA/eval.json +1 -0
  47. eval_milebench/DocVQA/eval_score.json +802 -0
  48. eval_milebench/DocVQA/pred.json +0 -0
  49. eval_milebench/DocVQA/pred_with_extracted.json +0 -0
  50. eval_milebench/EgocentricNavigation.log +401 -0
.gitattributes CHANGED
@@ -34,3 +34,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  examples/red-panda.mp4 filter=lfs diff=lfs merge=lfs -text
 
 
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  examples/red-panda.mp4 filter=lfs diff=lfs merge=lfs -text
37
+ eval_milebench/TextNeedleInAHaystack/TextNeedleInAHaystack_240803235133.json filter=lfs diff=lfs merge=lfs -text
38
+ eval_milebench/TextNeedleInAHaystack/pred.json filter=lfs diff=lfs merge=lfs -text
caption-coco.log ADDED
The diff for this file is too large to render. See raw diff
 
eval_milebench/ALFRED.log ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0
  0%| | 0/9 [00:00<?, ?it/s]
1
  11%|β–ˆ | 1/9 [00:02<00:17, 2.25s/it]
2
  22%|β–ˆβ–ˆβ– | 2/9 [00:03<00:10, 1.48s/it]
3
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 3/9 [00:04<00:07, 1.23s/it]
4
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 4/9 [00:05<00:05, 1.12s/it]
5
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5/9 [00:06<00:04, 1.06s/it]
6
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 6/9 [00:06<00:02, 1.16it/s]
7
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 7/9 [00:07<00:01, 1.12it/s]
8
  89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 8/9 [00:08<00:00, 1.09it/s]
 
 
 
9
  0%| | 0/9 [00:00<?, ?it/s]
10
  11%|β–ˆ | 1/9 [00:02<00:22, 2.78s/it]
11
  22%|β–ˆβ–ˆβ– | 2/9 [00:03<00:12, 1.73s/it]
12
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 3/9 [00:04<00:08, 1.37s/it]
13
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 4/9 [00:05<00:06, 1.23s/it]
14
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5/9 [00:07<00:05, 1.25s/it]
15
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 6/9 [00:08<00:03, 1.16s/it]
16
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 7/9 [00:08<00:02, 1.10s/it]
17
  89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 8/9 [00:10<00:01, 1.20s/it]
 
 
18
  0%| | 0/9 [00:00<?, ?it/s]
19
  11%|β–ˆ | 1/9 [00:02<00:21, 2.73s/it]
20
  22%|β–ˆβ–ˆβ– | 2/9 [00:03<00:12, 1.75s/it]
21
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 3/9 [00:04<00:08, 1.41s/it]
22
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 4/9 [00:05<00:06, 1.24s/it]
23
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5/9 [00:06<00:04, 1.13s/it]
24
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 6/9 [00:07<00:03, 1.08s/it]
25
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 7/9 [00:08<00:02, 1.04s/it]
26
  89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 8/9 [00:10<00:01, 1.33s/it]
 
 
27
  0%| | 0/15 [00:00<?, ?it/s]
28
  7%|β–‹ | 1/15 [00:01<00:17, 1.25s/it]
29
  13%|β–ˆβ–Ž | 2/15 [00:02<00:13, 1.05s/it]
30
  20%|β–ˆβ–ˆ | 3/15 [00:03<00:11, 1.01it/s]
31
  27%|β–ˆβ–ˆβ–‹ | 4/15 [00:04<00:11, 1.06s/it]
32
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 5/15 [00:05<00:10, 1.01s/it]
33
  40%|β–ˆβ–ˆβ–ˆβ–ˆ | 6/15 [00:06<00:08, 1.01it/s]
34
  47%|β–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 7/15 [00:07<00:08, 1.07s/it]
35
  53%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 8/15 [00:08<00:07, 1.03s/it]
36
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 9/15 [00:08<00:05, 1.17it/s]
37
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 10/15 [00:09<00:04, 1.06it/s]
38
  73%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 11/15 [00:10<00:03, 1.06it/s]
39
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 12/15 [00:11<00:02, 1.05it/s]
40
  87%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 13/15 [00:12<00:01, 1.06it/s]
41
  93%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž| 14/15 [00:13<00:00, 1.06it/s]
 
 
 
42
  0%| | 0/15 [00:00<?, ?it/s]
43
  7%|β–‹ | 1/15 [00:01<00:17, 1.26s/it]
44
  13%|β–ˆβ–Ž | 2/15 [00:02<00:16, 1.25s/it]
45
  20%|β–ˆβ–ˆ | 3/15 [00:03<00:12, 1.06s/it]
46
  27%|β–ˆβ–ˆβ–‹ | 4/15 [00:04<00:11, 1.02s/it]
47
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 5/15 [00:05<00:10, 1.04s/it]
48
  40%|β–ˆβ–ˆβ–ˆβ–ˆ | 6/15 [00:06<00:09, 1.00s/it]
49
  47%|β–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 7/15 [00:07<00:07, 1.02it/s]
50
  53%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 8/15 [00:08<00:06, 1.03it/s]
51
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 9/15 [00:09<00:05, 1.04it/s]
52
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 10/15 [00:10<00:04, 1.05it/s]
53
  73%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 11/15 [00:11<00:03, 1.05it/s]
54
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 12/15 [00:11<00:02, 1.05it/s]
55
  87%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 13/15 [00:12<00:01, 1.05it/s]
56
  93%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž| 14/15 [00:13<00:00, 1.10it/s]
 
 
57
  0%| | 0/16 [00:00<?, ?it/s]
58
  6%|β–‹ | 1/16 [00:01<00:19, 1.32s/it]
59
  12%|β–ˆβ–Ž | 2/16 [00:02<00:15, 1.10s/it]
60
  19%|β–ˆβ–‰ | 3/16 [00:03<00:12, 1.05it/s]
61
  25%|β–ˆβ–ˆβ–Œ | 4/16 [00:04<00:12, 1.03s/it]
62
  31%|β–ˆβ–ˆβ–ˆβ– | 5/16 [00:05<00:11, 1.02s/it]
63
  38%|β–ˆβ–ˆβ–ˆβ–Š | 6/16 [00:06<00:09, 1.00it/s]
64
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 7/16 [00:07<00:08, 1.02it/s]
65
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 8/16 [00:07<00:07, 1.10it/s]
66
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 9/16 [00:08<00:06, 1.08it/s]
67
  62%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 10/16 [00:09<00:05, 1.07it/s]
68
  69%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 11/16 [00:10<00:04, 1.07it/s]
69
  75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 12/16 [00:11<00:03, 1.06it/s]
70
  81%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 13/16 [00:12<00:02, 1.05it/s]
71
  88%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 14/16 [00:13<00:02, 1.03s/it]
72
  94%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 15/16 [00:14<00:00, 1.10it/s]
 
 
73
  0%| | 0/13 [00:00<?, ?it/s]
74
  8%|β–Š | 1/13 [00:01<00:13, 1.12s/it]
75
  15%|β–ˆβ–Œ | 2/13 [00:01<00:08, 1.28it/s]
76
  23%|β–ˆβ–ˆβ–Ž | 3/13 [00:02<00:08, 1.13it/s]
77
  31%|β–ˆβ–ˆβ–ˆ | 4/13 [00:03<00:08, 1.09it/s]
78
  38%|β–ˆβ–ˆβ–ˆβ–Š | 5/13 [00:04<00:07, 1.09it/s]
79
  46%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 6/13 [00:05<00:06, 1.08it/s]
80
  54%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 7/13 [00:06<00:05, 1.19it/s]
81
  62%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 8/13 [00:07<00:04, 1.16it/s]
82
  69%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 9/13 [00:07<00:03, 1.20it/s]
83
  77%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 10/13 [00:08<00:02, 1.17it/s]
84
  85%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 11/13 [00:09<00:01, 1.14it/s]
85
  92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 12/13 [00:10<00:00, 1.13it/s]
 
 
 
86
  0%| | 0/12 [00:00<?, ?it/s]
87
  8%|β–Š | 1/12 [00:00<00:09, 1.15it/s]
88
  17%|β–ˆβ–‹ | 2/12 [00:01<00:09, 1.10it/s]
89
  25%|β–ˆβ–ˆβ–Œ | 3/12 [00:02<00:07, 1.19it/s]
90
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 4/12 [00:03<00:07, 1.14it/s]
91
  42%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 5/12 [00:04<00:06, 1.12it/s]
92
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 6/12 [00:05<00:05, 1.14it/s]
93
  58%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 7/12 [00:05<00:04, 1.22it/s]
94
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 8/12 [00:06<00:02, 1.35it/s]
95
  75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 9/12 [00:07<00:02, 1.24it/s]
96
  83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 10/12 [00:08<00:01, 1.19it/s]
97
  92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 11/12 [00:09<00:00, 1.15it/s]
 
 
98
  0%| | 0/13 [00:00<?, ?it/s]
99
  8%|β–Š | 1/13 [00:01<00:14, 1.22s/it]
100
  15%|β–ˆβ–Œ | 2/13 [00:01<00:10, 1.05it/s]
101
  23%|β–ˆβ–ˆβ–Ž | 3/13 [00:02<00:09, 1.07it/s]
102
  31%|β–ˆβ–ˆβ–ˆ | 4/13 [00:03<00:08, 1.07it/s]
103
  38%|β–ˆβ–ˆβ–ˆβ–Š | 5/13 [00:04<00:07, 1.07it/s]
104
  46%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 6/13 [00:05<00:06, 1.16it/s]
105
  54%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 7/13 [00:05<00:04, 1.37it/s]
106
  62%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 8/13 [00:06<00:03, 1.26it/s]
107
  69%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 9/13 [00:07<00:03, 1.19it/s]
108
  77%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 10/13 [00:08<00:02, 1.25it/s]
109
  85%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 11/13 [00:09<00:01, 1.21it/s]
110
  92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 12/13 [00:10<00:00, 1.17it/s]
 
 
111
  0%| | 0/13 [00:00<?, ?it/s]
112
  8%|β–Š | 1/13 [00:01<00:14, 1.19s/it]
113
  15%|β–ˆβ–Œ | 2/13 [00:01<00:08, 1.33it/s]
114
  23%|β–ˆβ–ˆβ–Ž | 3/13 [00:02<00:08, 1.23it/s]
115
  31%|β–ˆβ–ˆβ–ˆ | 4/13 [00:02<00:05, 1.56it/s]
116
  38%|β–ˆβ–ˆβ–ˆβ–Š | 5/13 [00:03<00:05, 1.43it/s]
117
  46%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 6/13 [00:04<00:04, 1.64it/s]
118
  54%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 7/13 [00:05<00:04, 1.42it/s]
119
  62%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 8/13 [00:05<00:03, 1.30it/s]
120
  69%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 9/13 [00:06<00:03, 1.23it/s]
121
  77%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 10/13 [00:07<00:02, 1.36it/s]
122
  85%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 11/13 [00:07<00:01, 1.57it/s]
123
  92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 12/13 [00:08<00:00, 1.38it/s]
 
124
  0%| | 0/12 [00:00<?, ?it/s]
125
  8%|β–Š | 1/12 [00:00<00:07, 1.45it/s]
126
  17%|β–ˆβ–‹ | 2/12 [00:01<00:08, 1.23it/s]
127
  25%|β–ˆβ–ˆβ–Œ | 3/12 [00:02<00:07, 1.17it/s]
128
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 4/12 [00:03<00:06, 1.15it/s]
129
  42%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 5/12 [00:03<00:05, 1.36it/s]
130
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 6/12 [00:04<00:04, 1.36it/s]
131
  58%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 7/12 [00:05<00:03, 1.27it/s]
132
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 8/12 [00:05<00:02, 1.49it/s]
133
  75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 9/12 [00:06<00:02, 1.35it/s]
134
  83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 10/12 [00:07<00:01, 1.41it/s]
135
  92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 11/12 [00:08<00:00, 1.52it/s]
 
136
  0%| | 0/13 [00:00<?, ?it/s]
137
  8%|β–Š | 1/13 [00:01<00:14, 1.20s/it]
138
  15%|β–ˆβ–Œ | 2/13 [00:01<00:09, 1.21it/s]
139
  23%|β–ˆβ–ˆβ–Ž | 3/13 [00:02<00:08, 1.16it/s]
140
  31%|β–ˆβ–ˆβ–ˆ | 4/13 [00:03<00:07, 1.14it/s]
141
  38%|β–ˆβ–ˆβ–ˆβ–Š | 5/13 [00:04<00:05, 1.39it/s]
142
  46%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 6/13 [00:04<00:04, 1.49it/s]
143
  54%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 7/13 [00:05<00:04, 1.34it/s]
144
  62%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 8/13 [00:06<00:03, 1.25it/s]
145
  69%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 9/13 [00:07<00:03, 1.33it/s]
146
  77%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 10/13 [00:07<00:02, 1.43it/s]
147
  85%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 11/13 [00:08<00:01, 1.31it/s]
148
  92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 12/13 [00:08<00:00, 1.57it/s]
 
 
 
 
 
1
+ language_model.model.layers.0 4
2
+ language_model.model.layers.1 4
3
+ language_model.model.layers.2 4
4
+ language_model.model.layers.3 4
5
+ language_model.model.layers.4 4
6
+ language_model.model.layers.5 4
7
+ language_model.model.layers.6 4
8
+ language_model.model.layers.7 4
9
+ language_model.model.layers.8 4
10
+ language_model.model.layers.9 4
11
+ language_model.model.layers.10 4
12
+ language_model.model.layers.11 4
13
+ language_model.model.layers.12 4
14
+ language_model.model.layers.13 4
15
+ language_model.model.layers.14 4
16
+ language_model.model.layers.15 4
17
+ language_model.model.layers.16 4
18
+ language_model.model.layers.17 4
19
+ language_model.model.layers.18 4
20
+ language_model.model.layers.19 4
21
+ language_model.model.layers.20 4
22
+ language_model.model.layers.21 4
23
+ language_model.model.layers.22 4
24
+ language_model.model.layers.23 4
25
+ vision_model.encoder.layers.0 0
26
+ vision_model.encoder.layers.1 0
27
+ vision_model.encoder.layers.2 0
28
+ vision_model.encoder.layers.3 0
29
+ vision_model.encoder.layers.4 0
30
+ vision_model.encoder.layers.5 0
31
+ vision_model.encoder.layers.6 0
32
+ vision_model.encoder.layers.7 0
33
+ vision_model.encoder.layers.8 0
34
+ vision_model.encoder.layers.9 0
35
+ vision_model.encoder.layers.10 0
36
+ vision_model.encoder.layers.11 0
37
+ vision_model.encoder.layers.12 0
38
+ vision_model.encoder.layers.13 0
39
+ vision_model.encoder.layers.14 0
40
+ vision_model.encoder.layers.15 0
41
+ vision_model.encoder.layers.16 0
42
+ vision_model.encoder.layers.17 0
43
+ vision_model.encoder.layers.18 0
44
+ vision_model.encoder.layers.19 0
45
+ vision_model.encoder.layers.20 0
46
+ vision_model.encoder.layers.21 0
47
+ vision_model.encoder.layers.22 0
48
+ vision_model.encoder.layers.23 0
49
+ vision_model.embeddings 0
50
+ mlp1 0
51
+ language_model.model.tok_embeddings 4
52
+ language_model.model.norm 4
53
+ language_model.output 4
54
+ language_model.model.embed_tokens 4
55
+ language_model.lm_head 4
56
+ The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.
57
+ The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.
58
+ The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.
59
+ The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.
60
+ Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
61
+ Rank [2] Begin to eval model work_dirs/share_internvl/InternVL2-2B on task ALFRED, devices: {device(type='cuda', index=2), device(type='cuda', index=6)}
62
+ Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
63
+ Rank [1] Begin to eval model work_dirs/share_internvl/InternVL2-2B on task ALFRED, devices: {device(type='cuda', index=1), device(type='cuda', index=5)}
64
+ Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
65
+ Rank [3] Begin to eval model work_dirs/share_internvl/InternVL2-2B on task ALFRED, devices: {device(type='cuda', index=3), device(type='cuda', index=7)}
66
+ Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
67
+ Rank [0] Begin to eval model work_dirs/share_internvl/InternVL2-2B on task ALFRED, devices: {device(type='cuda', index=0), device(type='cuda', index=4)}
68
+ Initialization Finished
69
+ Predicting ALFRED Using internvl
70
+ Proceeding 5-length images samples | Num: 36
71
+ Initialization Finished
72
+ Predicting ALFRED Using internvl
73
+ Proceeding 5-length images samples | Num: 36
74
+ Initialization Finished
75
+ Predicting ALFRED Using internvl
76
+ Proceeding 5-length images samples | Num: 36
77
+ Initialization Finished
78
+ Predicting ALFRED Using internvl
79
+ Proceeding 5-length images samples | Num: 36
80
+
81
  0%| | 0/9 [00:00<?, ?it/s]
82
  11%|β–ˆ | 1/9 [00:02<00:17, 2.25s/it]
83
  22%|β–ˆβ–ˆβ– | 2/9 [00:03<00:10, 1.48s/it]
84
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 3/9 [00:04<00:07, 1.23s/it]
85
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 4/9 [00:05<00:05, 1.12s/it]
86
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5/9 [00:06<00:04, 1.06s/it]
87
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 6/9 [00:06<00:02, 1.16it/s]
88
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 7/9 [00:07<00:01, 1.12it/s]
89
  89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 8/9 [00:08<00:00, 1.09it/s]
90
+ Proceeding 4-length images samples | Num: 62
91
+ Proceeding 4-length images samples | Num: 62
92
+
93
  0%| | 0/9 [00:00<?, ?it/s]
94
  11%|β–ˆ | 1/9 [00:02<00:22, 2.78s/it]
95
  22%|β–ˆβ–ˆβ– | 2/9 [00:03<00:12, 1.73s/it]
96
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 3/9 [00:04<00:08, 1.37s/it]
97
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 4/9 [00:05<00:06, 1.23s/it]
98
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5/9 [00:07<00:05, 1.25s/it]
99
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 6/9 [00:08<00:03, 1.16s/it]
100
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 7/9 [00:08<00:02, 1.10s/it]
101
  89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 8/9 [00:10<00:01, 1.20s/it]
102
+ Proceeding 4-length images samples | Num: 62
103
+
104
  0%| | 0/9 [00:00<?, ?it/s]
105
  11%|β–ˆ | 1/9 [00:02<00:21, 2.73s/it]
106
  22%|β–ˆβ–ˆβ– | 2/9 [00:03<00:12, 1.75s/it]
107
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 3/9 [00:04<00:08, 1.41s/it]
108
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 4/9 [00:05<00:06, 1.24s/it]
109
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5/9 [00:06<00:04, 1.13s/it]
110
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 6/9 [00:07<00:03, 1.08s/it]
111
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 7/9 [00:08<00:02, 1.04s/it]
112
  89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 8/9 [00:10<00:01, 1.33s/it]
113
+ Proceeding 4-length images samples | Num: 62
114
+
115
  0%| | 0/15 [00:00<?, ?it/s]
116
  7%|β–‹ | 1/15 [00:01<00:17, 1.25s/it]
117
  13%|β–ˆβ–Ž | 2/15 [00:02<00:13, 1.05s/it]
118
  20%|β–ˆβ–ˆ | 3/15 [00:03<00:11, 1.01it/s]
119
  27%|β–ˆβ–ˆβ–‹ | 4/15 [00:04<00:11, 1.06s/it]
120
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 5/15 [00:05<00:10, 1.01s/it]
121
  40%|β–ˆβ–ˆβ–ˆβ–ˆ | 6/15 [00:06<00:08, 1.01it/s]
122
  47%|β–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 7/15 [00:07<00:08, 1.07s/it]
123
  53%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 8/15 [00:08<00:07, 1.03s/it]
124
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 9/15 [00:08<00:05, 1.17it/s]
125
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 10/15 [00:09<00:04, 1.06it/s]
126
  73%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 11/15 [00:10<00:03, 1.06it/s]
127
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 12/15 [00:11<00:02, 1.05it/s]
128
  87%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 13/15 [00:12<00:01, 1.06it/s]
129
  93%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž| 14/15 [00:13<00:00, 1.06it/s]
130
+ Proceeding 3-length images samples | Num: 51
131
+ Proceeding 3-length images samples | Num: 51
132
+
133
  0%| | 0/15 [00:00<?, ?it/s]
134
  7%|β–‹ | 1/15 [00:01<00:17, 1.26s/it]
135
  13%|β–ˆβ–Ž | 2/15 [00:02<00:16, 1.25s/it]
136
  20%|β–ˆβ–ˆ | 3/15 [00:03<00:12, 1.06s/it]
137
  27%|β–ˆβ–ˆβ–‹ | 4/15 [00:04<00:11, 1.02s/it]
138
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 5/15 [00:05<00:10, 1.04s/it]
139
  40%|β–ˆβ–ˆβ–ˆβ–ˆ | 6/15 [00:06<00:09, 1.00s/it]
140
  47%|β–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 7/15 [00:07<00:07, 1.02it/s]
141
  53%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 8/15 [00:08<00:06, 1.03it/s]
142
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 9/15 [00:09<00:05, 1.04it/s]
143
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 10/15 [00:10<00:04, 1.05it/s]
144
  73%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 11/15 [00:11<00:03, 1.05it/s]
145
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 12/15 [00:11<00:02, 1.05it/s]
146
  87%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 13/15 [00:12<00:01, 1.05it/s]
147
  93%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž| 14/15 [00:13<00:00, 1.10it/s]
148
+ Proceeding 3-length images samples | Num: 51
149
+
150
  0%| | 0/16 [00:00<?, ?it/s]
151
  6%|β–‹ | 1/16 [00:01<00:19, 1.32s/it]
152
  12%|β–ˆβ–Ž | 2/16 [00:02<00:15, 1.10s/it]
153
  19%|β–ˆβ–‰ | 3/16 [00:03<00:12, 1.05it/s]
154
  25%|β–ˆβ–ˆβ–Œ | 4/16 [00:04<00:12, 1.03s/it]
155
  31%|β–ˆβ–ˆβ–ˆβ– | 5/16 [00:05<00:11, 1.02s/it]
156
  38%|β–ˆβ–ˆβ–ˆβ–Š | 6/16 [00:06<00:09, 1.00it/s]
157
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 7/16 [00:07<00:08, 1.02it/s]
158
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 8/16 [00:07<00:07, 1.10it/s]
159
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 9/16 [00:08<00:06, 1.08it/s]
160
  62%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 10/16 [00:09<00:05, 1.07it/s]
161
  69%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 11/16 [00:10<00:04, 1.07it/s]
162
  75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 12/16 [00:11<00:03, 1.06it/s]
163
  81%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 13/16 [00:12<00:02, 1.05it/s]
164
  88%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 14/16 [00:13<00:02, 1.03s/it]
165
  94%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 15/16 [00:14<00:00, 1.10it/s]
166
+ Proceeding 3-length images samples | Num: 51
167
+
168
  0%| | 0/13 [00:00<?, ?it/s]
169
  8%|β–Š | 1/13 [00:01<00:13, 1.12s/it]
170
  15%|β–ˆβ–Œ | 2/13 [00:01<00:08, 1.28it/s]
171
  23%|β–ˆβ–ˆβ–Ž | 3/13 [00:02<00:08, 1.13it/s]
172
  31%|β–ˆβ–ˆβ–ˆ | 4/13 [00:03<00:08, 1.09it/s]
173
  38%|β–ˆβ–ˆβ–ˆβ–Š | 5/13 [00:04<00:07, 1.09it/s]
174
  46%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 6/13 [00:05<00:06, 1.08it/s]
175
  54%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 7/13 [00:06<00:05, 1.19it/s]
176
  62%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 8/13 [00:07<00:04, 1.16it/s]
177
  69%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 9/13 [00:07<00:03, 1.20it/s]
178
  77%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 10/13 [00:08<00:02, 1.17it/s]
179
  85%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 11/13 [00:09<00:01, 1.14it/s]
180
  92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 12/13 [00:10<00:00, 1.13it/s]
181
+ Proceeding 2-length images samples | Num: 51
182
+ Proceeding 2-length images samples | Num: 51
183
+
184
  0%| | 0/12 [00:00<?, ?it/s]
185
  8%|β–Š | 1/12 [00:00<00:09, 1.15it/s]
186
  17%|β–ˆβ–‹ | 2/12 [00:01<00:09, 1.10it/s]
187
  25%|β–ˆβ–ˆβ–Œ | 3/12 [00:02<00:07, 1.19it/s]
188
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 4/12 [00:03<00:07, 1.14it/s]
189
  42%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 5/12 [00:04<00:06, 1.12it/s]
190
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 6/12 [00:05<00:05, 1.14it/s]
191
  58%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 7/12 [00:05<00:04, 1.22it/s]
192
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 8/12 [00:06<00:02, 1.35it/s]
193
  75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 9/12 [00:07<00:02, 1.24it/s]
194
  83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 10/12 [00:08<00:01, 1.19it/s]
195
  92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 11/12 [00:09<00:00, 1.15it/s]
196
+ Proceeding 2-length images samples | Num: 51
197
+
198
  0%| | 0/13 [00:00<?, ?it/s]
199
  8%|β–Š | 1/13 [00:01<00:14, 1.22s/it]
200
  15%|β–ˆβ–Œ | 2/13 [00:01<00:10, 1.05it/s]
201
  23%|β–ˆβ–ˆβ–Ž | 3/13 [00:02<00:09, 1.07it/s]
202
  31%|β–ˆβ–ˆβ–ˆ | 4/13 [00:03<00:08, 1.07it/s]
203
  38%|β–ˆβ–ˆβ–ˆβ–Š | 5/13 [00:04<00:07, 1.07it/s]
204
  46%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 6/13 [00:05<00:06, 1.16it/s]
205
  54%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 7/13 [00:05<00:04, 1.37it/s]
206
  62%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 8/13 [00:06<00:03, 1.26it/s]
207
  69%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 9/13 [00:07<00:03, 1.19it/s]
208
  77%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 10/13 [00:08<00:02, 1.25it/s]
209
  85%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 11/13 [00:09<00:01, 1.21it/s]
210
  92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 12/13 [00:10<00:00, 1.17it/s]
211
+ Proceeding 2-length images samples | Num: 51
212
+
213
  0%| | 0/13 [00:00<?, ?it/s]
214
  8%|β–Š | 1/13 [00:01<00:14, 1.19s/it]
215
  15%|β–ˆβ–Œ | 2/13 [00:01<00:08, 1.33it/s]
216
  23%|β–ˆβ–ˆβ–Ž | 3/13 [00:02<00:08, 1.23it/s]
217
  31%|β–ˆβ–ˆβ–ˆ | 4/13 [00:02<00:05, 1.56it/s]
218
  38%|β–ˆβ–ˆβ–ˆβ–Š | 5/13 [00:03<00:05, 1.43it/s]
219
  46%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 6/13 [00:04<00:04, 1.64it/s]
220
  54%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 7/13 [00:05<00:04, 1.42it/s]
221
  62%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 8/13 [00:05<00:03, 1.30it/s]
222
  69%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 9/13 [00:06<00:03, 1.23it/s]
223
  77%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 10/13 [00:07<00:02, 1.36it/s]
224
  85%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 11/13 [00:07<00:01, 1.57it/s]
225
  92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 12/13 [00:08<00:00, 1.38it/s]
226
+
227
  0%| | 0/12 [00:00<?, ?it/s]
228
  8%|β–Š | 1/12 [00:00<00:07, 1.45it/s]
229
  17%|β–ˆβ–‹ | 2/12 [00:01<00:08, 1.23it/s]
230
  25%|β–ˆβ–ˆβ–Œ | 3/12 [00:02<00:07, 1.17it/s]
231
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 4/12 [00:03<00:06, 1.15it/s]
232
  42%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 5/12 [00:03<00:05, 1.36it/s]
233
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 6/12 [00:04<00:04, 1.36it/s]
234
  58%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 7/12 [00:05<00:03, 1.27it/s]
235
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 8/12 [00:05<00:02, 1.49it/s]
236
  75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 9/12 [00:06<00:02, 1.35it/s]
237
  83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 10/12 [00:07<00:01, 1.41it/s]
238
  92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 11/12 [00:08<00:00, 1.52it/s]
239
+
240
  0%| | 0/13 [00:00<?, ?it/s]
241
  8%|β–Š | 1/13 [00:01<00:14, 1.20s/it]
242
  15%|β–ˆβ–Œ | 2/13 [00:01<00:09, 1.21it/s]
243
  23%|β–ˆβ–ˆβ–Ž | 3/13 [00:02<00:08, 1.16it/s]
244
  31%|β–ˆβ–ˆβ–ˆ | 4/13 [00:03<00:07, 1.14it/s]
245
  38%|β–ˆβ–ˆβ–ˆβ–Š | 5/13 [00:04<00:05, 1.39it/s]
246
  46%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 6/13 [00:04<00:04, 1.49it/s]
247
  54%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 7/13 [00:05<00:04, 1.34it/s]
248
  62%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 8/13 [00:06<00:03, 1.25it/s]
249
  69%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 9/13 [00:07<00:03, 1.33it/s]
250
  77%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 10/13 [00:07<00:02, 1.43it/s]
251
  85%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 11/13 [00:08<00:01, 1.31it/s]
252
  92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 12/13 [00:08<00:00, 1.57it/s]
253
+ evaluating ALFRED ...
254
+ Results saved to work_dirs/share_internvl/InternVL2-2B/eval_milebench/ALFRED/ALFRED_240803234459.json
255
+ python eval/milebench/evaluate.py --data-dir /mnt/inspurfs/share_data/wangweiyun/share_data/long-context-benchmark/MileBench/datasets--FreedomIntelligence--MileBench/snapshots/53c7a58051ef88bacf76541d91f03f5ba2d71e7d --dataset ALFRED --result-dir work_dirs/share_internvl/InternVL2-2B/eval_milebench/ALFRED
256
+ internvl: ALFRED: {'Rouge-L f': 0.2537993395427049, 'image_quantity_level-Accuracy': {'Few': 0.2537993395427049, 'Medium': 0, 'Many': 0}, 'image_quantity_level-Result': {'Few': [50.759867908540976, 200], 'Medium': [0, 0], 'Many': [0, 0]}}
eval_milebench/ALFRED/ALFRED_240803234459.json ADDED
The diff for this file is too large to render. See raw diff
 
eval_milebench/ALFRED/eval.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"Rouge-L f": 0.2537993395427049, "image_quantity_level-Accuracy": {"Few": 0.2537993395427049, "Medium": 0, "Many": 0}, "image_quantity_level-Result": {"Few": [50.759867908540976, 200], "Medium": [0, 0], "Many": [0, 0]}}
eval_milebench/ALFRED/eval_score.json ADDED
@@ -0,0 +1,802 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "id": "0",
4
+ "score": "0.129"
5
+ },
6
+ {
7
+ "id": "6",
8
+ "score": "0.364"
9
+ },
10
+ {
11
+ "id": "11",
12
+ "score": "0.286"
13
+ },
14
+ {
15
+ "id": "26",
16
+ "score": "0.2"
17
+ },
18
+ {
19
+ "id": "29",
20
+ "score": "0.273"
21
+ },
22
+ {
23
+ "id": "46",
24
+ "score": "0.194"
25
+ },
26
+ {
27
+ "id": "51",
28
+ "score": "0.062"
29
+ },
30
+ {
31
+ "id": "54",
32
+ "score": "0.25"
33
+ },
34
+ {
35
+ "id": "56",
36
+ "score": "0.333"
37
+ },
38
+ {
39
+ "id": "1",
40
+ "score": "0.19"
41
+ },
42
+ {
43
+ "id": "4",
44
+ "score": "0.261"
45
+ },
46
+ {
47
+ "id": "13",
48
+ "score": "0.091"
49
+ },
50
+ {
51
+ "id": "14",
52
+ "score": "0.2"
53
+ },
54
+ {
55
+ "id": "18",
56
+ "score": "0.069"
57
+ },
58
+ {
59
+ "id": "19",
60
+ "score": "0.296"
61
+ },
62
+ {
63
+ "id": "23",
64
+ "score": "0.207"
65
+ },
66
+ {
67
+ "id": "24",
68
+ "score": "0.16"
69
+ },
70
+ {
71
+ "id": "25",
72
+ "score": "0.296"
73
+ },
74
+ {
75
+ "id": "27",
76
+ "score": "0.148"
77
+ },
78
+ {
79
+ "id": "28",
80
+ "score": "0.205"
81
+ },
82
+ {
83
+ "id": "31",
84
+ "score": "0.08"
85
+ },
86
+ {
87
+ "id": "33",
88
+ "score": "0.174"
89
+ },
90
+ {
91
+ "id": "34",
92
+ "score": "0.345"
93
+ },
94
+ {
95
+ "id": "39",
96
+ "score": "0.375"
97
+ },
98
+ {
99
+ "id": "43",
100
+ "score": "0.286"
101
+ },
102
+ {
103
+ "id": "2",
104
+ "score": "0.286"
105
+ },
106
+ {
107
+ "id": "7",
108
+ "score": "0.333"
109
+ },
110
+ {
111
+ "id": "8",
112
+ "score": "0.625"
113
+ },
114
+ {
115
+ "id": "9",
116
+ "score": "0.5"
117
+ },
118
+ {
119
+ "id": "10",
120
+ "score": "0.25"
121
+ },
122
+ {
123
+ "id": "12",
124
+ "score": "0.387"
125
+ },
126
+ {
127
+ "id": "15",
128
+ "score": "0.385"
129
+ },
130
+ {
131
+ "id": "21",
132
+ "score": "0.593"
133
+ },
134
+ {
135
+ "id": "22",
136
+ "score": "0.187"
137
+ },
138
+ {
139
+ "id": "35",
140
+ "score": "0.357"
141
+ },
142
+ {
143
+ "id": "40",
144
+ "score": "0.286"
145
+ },
146
+ {
147
+ "id": "41",
148
+ "score": "0.444"
149
+ },
150
+ {
151
+ "id": "42",
152
+ "score": "0.276"
153
+ },
154
+ {
155
+ "id": "3",
156
+ "score": "0.231"
157
+ },
158
+ {
159
+ "id": "5",
160
+ "score": "0.24"
161
+ },
162
+ {
163
+ "id": "16",
164
+ "score": "0.2"
165
+ },
166
+ {
167
+ "id": "17",
168
+ "score": "0.385"
169
+ },
170
+ {
171
+ "id": "20",
172
+ "score": "0.174"
173
+ },
174
+ {
175
+ "id": "30",
176
+ "score": "0.308"
177
+ },
178
+ {
179
+ "id": "32",
180
+ "score": "0.148"
181
+ },
182
+ {
183
+ "id": "36",
184
+ "score": "0.0"
185
+ },
186
+ {
187
+ "id": "37",
188
+ "score": "0.267"
189
+ },
190
+ {
191
+ "id": "38",
192
+ "score": "0.222"
193
+ },
194
+ {
195
+ "id": "44",
196
+ "score": "0.095"
197
+ },
198
+ {
199
+ "id": "47",
200
+ "score": "0.133"
201
+ },
202
+ {
203
+ "id": "57",
204
+ "score": "0.182"
205
+ },
206
+ {
207
+ "id": "58",
208
+ "score": "0.429"
209
+ },
210
+ {
211
+ "id": "62",
212
+ "score": "0.133"
213
+ },
214
+ {
215
+ "id": "63",
216
+ "score": "0.214"
217
+ },
218
+ {
219
+ "id": "66",
220
+ "score": "0.2"
221
+ },
222
+ {
223
+ "id": "71",
224
+ "score": "0.5"
225
+ },
226
+ {
227
+ "id": "79",
228
+ "score": "0.133"
229
+ },
230
+ {
231
+ "id": "80",
232
+ "score": "0.143"
233
+ },
234
+ {
235
+ "id": "83",
236
+ "score": "0.424"
237
+ },
238
+ {
239
+ "id": "85",
240
+ "score": "0.412"
241
+ },
242
+ {
243
+ "id": "45",
244
+ "score": "0.194"
245
+ },
246
+ {
247
+ "id": "48",
248
+ "score": "0.083"
249
+ },
250
+ {
251
+ "id": "52",
252
+ "score": "0.214"
253
+ },
254
+ {
255
+ "id": "53",
256
+ "score": "0.211"
257
+ },
258
+ {
259
+ "id": "55",
260
+ "score": "0.235"
261
+ },
262
+ {
263
+ "id": "61",
264
+ "score": "0.074"
265
+ },
266
+ {
267
+ "id": "74",
268
+ "score": "0.067"
269
+ },
270
+ {
271
+ "id": "75",
272
+ "score": "0.154"
273
+ },
274
+ {
275
+ "id": "84",
276
+ "score": "0.16"
277
+ },
278
+ {
279
+ "id": "86",
280
+ "score": "0.357"
281
+ },
282
+ {
283
+ "id": "87",
284
+ "score": "0.176"
285
+ },
286
+ {
287
+ "id": "91",
288
+ "score": "0.19"
289
+ },
290
+ {
291
+ "id": "92",
292
+ "score": "0.235"
293
+ },
294
+ {
295
+ "id": "94",
296
+ "score": "0.08"
297
+ },
298
+ {
299
+ "id": "96",
300
+ "score": "0.25"
301
+ },
302
+ {
303
+ "id": "100",
304
+ "score": "0.133"
305
+ },
306
+ {
307
+ "id": "49",
308
+ "score": "0.348"
309
+ },
310
+ {
311
+ "id": "50",
312
+ "score": "0.375"
313
+ },
314
+ {
315
+ "id": "67",
316
+ "score": "0.6"
317
+ },
318
+ {
319
+ "id": "68",
320
+ "score": "0.258"
321
+ },
322
+ {
323
+ "id": "69",
324
+ "score": "0.455"
325
+ },
326
+ {
327
+ "id": "81",
328
+ "score": "0.467"
329
+ },
330
+ {
331
+ "id": "88",
332
+ "score": "0.211"
333
+ },
334
+ {
335
+ "id": "89",
336
+ "score": "0.414"
337
+ },
338
+ {
339
+ "id": "93",
340
+ "score": "0.214"
341
+ },
342
+ {
343
+ "id": "97",
344
+ "score": "0.083"
345
+ },
346
+ {
347
+ "id": "101",
348
+ "score": "0.4"
349
+ },
350
+ {
351
+ "id": "106",
352
+ "score": "0.182"
353
+ },
354
+ {
355
+ "id": "108",
356
+ "score": "0.385"
357
+ },
358
+ {
359
+ "id": "59",
360
+ "score": "0.148"
361
+ },
362
+ {
363
+ "id": "60",
364
+ "score": "0.476"
365
+ },
366
+ {
367
+ "id": "64",
368
+ "score": "0.08"
369
+ },
370
+ {
371
+ "id": "65",
372
+ "score": "0.125"
373
+ },
374
+ {
375
+ "id": "70",
376
+ "score": "0.118"
377
+ },
378
+ {
379
+ "id": "72",
380
+ "score": "0.148"
381
+ },
382
+ {
383
+ "id": "73",
384
+ "score": "0.167"
385
+ },
386
+ {
387
+ "id": "76",
388
+ "score": "0.133"
389
+ },
390
+ {
391
+ "id": "77",
392
+ "score": "0.2"
393
+ },
394
+ {
395
+ "id": "78",
396
+ "score": "0.471"
397
+ },
398
+ {
399
+ "id": "82",
400
+ "score": "0.171"
401
+ },
402
+ {
403
+ "id": "90",
404
+ "score": "0.308"
405
+ },
406
+ {
407
+ "id": "95",
408
+ "score": "0.0"
409
+ },
410
+ {
411
+ "id": "98",
412
+ "score": "0.133"
413
+ },
414
+ {
415
+ "id": "105",
416
+ "score": "0.333"
417
+ },
418
+ {
419
+ "id": "119",
420
+ "score": "0.167"
421
+ },
422
+ {
423
+ "id": "127",
424
+ "score": "0.385"
425
+ },
426
+ {
427
+ "id": "130",
428
+ "score": "0.111"
429
+ },
430
+ {
431
+ "id": "131",
432
+ "score": "0.333"
433
+ },
434
+ {
435
+ "id": "135",
436
+ "score": "0.519"
437
+ },
438
+ {
439
+ "id": "154",
440
+ "score": "0.2"
441
+ },
442
+ {
443
+ "id": "155",
444
+ "score": "0.387"
445
+ },
446
+ {
447
+ "id": "103",
448
+ "score": "0.057"
449
+ },
450
+ {
451
+ "id": "110",
452
+ "score": "0.148"
453
+ },
454
+ {
455
+ "id": "112",
456
+ "score": "0.08"
457
+ },
458
+ {
459
+ "id": "115",
460
+ "score": "0.16"
461
+ },
462
+ {
463
+ "id": "117",
464
+ "score": "0.091"
465
+ },
466
+ {
467
+ "id": "120",
468
+ "score": "0.138"
469
+ },
470
+ {
471
+ "id": "122",
472
+ "score": "0.091"
473
+ },
474
+ {
475
+ "id": "123",
476
+ "score": "0.074"
477
+ },
478
+ {
479
+ "id": "126",
480
+ "score": "0.222"
481
+ },
482
+ {
483
+ "id": "137",
484
+ "score": "0.194"
485
+ },
486
+ {
487
+ "id": "139",
488
+ "score": "0.25"
489
+ },
490
+ {
491
+ "id": "143",
492
+ "score": "0.167"
493
+ },
494
+ {
495
+ "id": "147",
496
+ "score": "0.148"
497
+ },
498
+ {
499
+ "id": "148",
500
+ "score": "0.069"
501
+ },
502
+ {
503
+ "id": "149",
504
+ "score": "0.174"
505
+ },
506
+ {
507
+ "id": "114",
508
+ "score": "0.32"
509
+ },
510
+ {
511
+ "id": "118",
512
+ "score": "0.4"
513
+ },
514
+ {
515
+ "id": "125",
516
+ "score": "0.286"
517
+ },
518
+ {
519
+ "id": "128",
520
+ "score": "0.385"
521
+ },
522
+ {
523
+ "id": "132",
524
+ "score": "0.4"
525
+ },
526
+ {
527
+ "id": "136",
528
+ "score": "0.258"
529
+ },
530
+ {
531
+ "id": "141",
532
+ "score": "0.696"
533
+ },
534
+ {
535
+ "id": "145",
536
+ "score": "0.333"
537
+ },
538
+ {
539
+ "id": "146",
540
+ "score": "0.348"
541
+ },
542
+ {
543
+ "id": "150",
544
+ "score": "0.286"
545
+ },
546
+ {
547
+ "id": "152",
548
+ "score": "0.364"
549
+ },
550
+ {
551
+ "id": "153",
552
+ "score": "0.308"
553
+ },
554
+ {
555
+ "id": "158",
556
+ "score": "0.533"
557
+ },
558
+ {
559
+ "id": "99",
560
+ "score": "0.303"
561
+ },
562
+ {
563
+ "id": "102",
564
+ "score": "0.333"
565
+ },
566
+ {
567
+ "id": "104",
568
+ "score": "0.16"
569
+ },
570
+ {
571
+ "id": "107",
572
+ "score": "0.286"
573
+ },
574
+ {
575
+ "id": "109",
576
+ "score": "0.19"
577
+ },
578
+ {
579
+ "id": "111",
580
+ "score": "0.353"
581
+ },
582
+ {
583
+ "id": "113",
584
+ "score": "0.182"
585
+ },
586
+ {
587
+ "id": "116",
588
+ "score": "0.08"
589
+ },
590
+ {
591
+ "id": "121",
592
+ "score": "0.182"
593
+ },
594
+ {
595
+ "id": "124",
596
+ "score": "0.19"
597
+ },
598
+ {
599
+ "id": "129",
600
+ "score": "0.0"
601
+ },
602
+ {
603
+ "id": "133",
604
+ "score": "0.4"
605
+ },
606
+ {
607
+ "id": "134",
608
+ "score": "0.0"
609
+ },
610
+ {
611
+ "id": "156",
612
+ "score": "0.24"
613
+ },
614
+ {
615
+ "id": "159",
616
+ "score": "0.545"
617
+ },
618
+ {
619
+ "id": "163",
620
+ "score": "0.545"
621
+ },
622
+ {
623
+ "id": "170",
624
+ "score": "0.48"
625
+ },
626
+ {
627
+ "id": "175",
628
+ "score": "0.556"
629
+ },
630
+ {
631
+ "id": "176",
632
+ "score": "0.455"
633
+ },
634
+ {
635
+ "id": "184",
636
+ "score": "0.353"
637
+ },
638
+ {
639
+ "id": "189",
640
+ "score": "0.195"
641
+ },
642
+ {
643
+ "id": "193",
644
+ "score": "0.348"
645
+ },
646
+ {
647
+ "id": "151",
648
+ "score": "0.231"
649
+ },
650
+ {
651
+ "id": "157",
652
+ "score": "0.222"
653
+ },
654
+ {
655
+ "id": "164",
656
+ "score": "0.143"
657
+ },
658
+ {
659
+ "id": "165",
660
+ "score": "0.071"
661
+ },
662
+ {
663
+ "id": "172",
664
+ "score": "0.1"
665
+ },
666
+ {
667
+ "id": "177",
668
+ "score": "0.129"
669
+ },
670
+ {
671
+ "id": "179",
672
+ "score": "0.24"
673
+ },
674
+ {
675
+ "id": "182",
676
+ "score": "0.167"
677
+ },
678
+ {
679
+ "id": "183",
680
+ "score": "0.095"
681
+ },
682
+ {
683
+ "id": "188",
684
+ "score": "0.182"
685
+ },
686
+ {
687
+ "id": "190",
688
+ "score": "0.077"
689
+ },
690
+ {
691
+ "id": "194",
692
+ "score": "0.324"
693
+ },
694
+ {
695
+ "id": "195",
696
+ "score": "0.16"
697
+ },
698
+ {
699
+ "id": "196",
700
+ "score": "0.111"
701
+ },
702
+ {
703
+ "id": "198",
704
+ "score": "0.267"
705
+ },
706
+ {
707
+ "id": "160",
708
+ "score": "0.476"
709
+ },
710
+ {
711
+ "id": "166",
712
+ "score": "0.348"
713
+ },
714
+ {
715
+ "id": "169",
716
+ "score": "0.444"
717
+ },
718
+ {
719
+ "id": "171",
720
+ "score": "0.48"
721
+ },
722
+ {
723
+ "id": "173",
724
+ "score": "0.267"
725
+ },
726
+ {
727
+ "id": "174",
728
+ "score": "0.37"
729
+ },
730
+ {
731
+ "id": "178",
732
+ "score": "0.455"
733
+ },
734
+ {
735
+ "id": "181",
736
+ "score": "0.333"
737
+ },
738
+ {
739
+ "id": "185",
740
+ "score": "0.222"
741
+ },
742
+ {
743
+ "id": "187",
744
+ "score": "0.16"
745
+ },
746
+ {
747
+ "id": "192",
748
+ "score": "0.462"
749
+ },
750
+ {
751
+ "id": "199",
752
+ "score": "0.375"
753
+ },
754
+ {
755
+ "id": "138",
756
+ "score": "0.125"
757
+ },
758
+ {
759
+ "id": "140",
760
+ "score": "0.231"
761
+ },
762
+ {
763
+ "id": "142",
764
+ "score": "0.167"
765
+ },
766
+ {
767
+ "id": "144",
768
+ "score": "0.414"
769
+ },
770
+ {
771
+ "id": "161",
772
+ "score": "0.111"
773
+ },
774
+ {
775
+ "id": "162",
776
+ "score": "0.16"
777
+ },
778
+ {
779
+ "id": "167",
780
+ "score": "0.133"
781
+ },
782
+ {
783
+ "id": "168",
784
+ "score": "0.235"
785
+ },
786
+ {
787
+ "id": "180",
788
+ "score": "0.296"
789
+ },
790
+ {
791
+ "id": "186",
792
+ "score": "0.4"
793
+ },
794
+ {
795
+ "id": "191",
796
+ "score": "0.19"
797
+ },
798
+ {
799
+ "id": "197",
800
+ "score": "0.158"
801
+ }
802
+ ]
eval_milebench/ALFRED/pred.json ADDED
The diff for this file is too large to render. See raw diff
 
eval_milebench/ActionLocalization.log ADDED
@@ -0,0 +1,296 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0
  0%| | 0/10 [00:00<?, ?it/s]
1
  10%|β–ˆ | 1/10 [00:04<00:43, 4.83s/it]
2
  20%|β–ˆβ–ˆ | 2/10 [00:06<00:25, 3.24s/it]
3
  30%|β–ˆβ–ˆβ–ˆ | 3/10 [00:07<00:15, 2.19s/it]
4
  40%|β–ˆβ–ˆβ–ˆβ–ˆ | 4/10 [00:10<00:13, 2.27s/it]
5
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5/10 [00:13<00:13, 2.73s/it]
6
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 6/10 [00:17<00:11, 2.99s/it]
7
  70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 7/10 [00:19<00:08, 2.73s/it]
8
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 8/10 [00:21<00:04, 2.39s/it]
9
  90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 9/10 [00:22<00:01, 1.99s/it]
 
 
 
10
  0%| | 0/10 [00:00<?, ?it/s]
11
  10%|β–ˆ | 1/10 [00:04<00:43, 4.85s/it]
12
  20%|β–ˆβ–ˆ | 2/10 [00:07<00:28, 3.56s/it]
13
  30%|β–ˆβ–ˆβ–ˆ | 3/10 [00:09<00:21, 3.06s/it]
14
  40%|β–ˆβ–ˆβ–ˆβ–ˆ | 4/10 [00:13<00:18, 3.10s/it]
15
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5/10 [00:14<00:11, 2.35s/it]
16
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 6/10 [00:17<00:10, 2.59s/it]
17
  70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 7/10 [00:19<00:07, 2.58s/it]
18
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 8/10 [00:21<00:04, 2.31s/it]
19
  90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 9/10 [00:22<00:01, 1.98s/it]
 
 
20
  0%| | 0/10 [00:00<?, ?it/s]
21
  10%|β–ˆ | 1/10 [00:04<00:44, 4.94s/it]
22
  20%|β–ˆβ–ˆ | 2/10 [00:07<00:28, 3.60s/it]
23
  30%|β–ˆβ–ˆβ–ˆ | 3/10 [00:10<00:21, 3.12s/it]
24
  40%|β–ˆβ–ˆβ–ˆβ–ˆ | 4/10 [00:13<00:19, 3.30s/it]
25
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5/10 [00:16<00:16, 3.25s/it]
26
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 6/10 [00:17<00:09, 2.43s/it]
27
  70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 7/10 [00:19<00:07, 2.35s/it]
28
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 8/10 [00:21<00:04, 2.15s/it]
29
  90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 9/10 [00:23<00:01, 1.98s/it]
 
 
30
  0%| | 0/6 [00:00<?, ?it/s]
31
  17%|β–ˆβ–‹ | 1/6 [00:03<00:16, 3.27s/it]
32
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 2/6 [00:06<00:14, 3.51s/it]
33
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 3/6 [00:08<00:08, 2.81s/it]
34
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 4/6 [00:09<00:04, 2.00s/it]
35
  83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5/6 [00:10<00:01, 1.73s/it]
 
 
 
36
  0%| | 0/6 [00:00<?, ?it/s]
37
  17%|β–ˆβ–‹ | 1/6 [00:03<00:18, 3.65s/it]
38
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 2/6 [00:07<00:14, 3.50s/it]
39
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 3/6 [00:09<00:08, 2.86s/it]
40
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 4/6 [00:10<00:04, 2.26s/it]
41
  83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5/6 [00:13<00:02, 2.36s/it]
 
 
42
  0%| | 0/6 [00:00<?, ?it/s]
43
  17%|β–ˆβ–‹ | 1/6 [00:03<00:18, 3.74s/it]
44
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 2/6 [00:07<00:14, 3.53s/it]
45
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 3/6 [00:09<00:08, 2.86s/it]
46
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 4/6 [00:10<00:04, 2.27s/it]
47
  83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5/6 [00:13<00:02, 2.52s/it]
 
 
 
48
  0%| | 0/1 [00:00<?, ?it/s]
 
 
49
  0%| | 0/1 [00:00<?, ?it/s]
 
 
50
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
51
  0%| | 0/3 [00:00<?, ?it/s]
52
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:02<00:04, 2.29s/it]
53
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:03<00:01, 1.72s/it]
 
 
54
  0%| | 0/3 [00:00<?, ?it/s]
55
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:02<00:04, 2.50s/it]
56
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:03<00:01, 1.76s/it]
 
 
57
  0%| | 0/3 [00:00<?, ?it/s]
58
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:02<00:05, 2.76s/it]
59
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:04<00:01, 1.94s/it]
 
 
60
  0%| | 0/1 [00:00<?, ?it/s]
 
 
61
  0%| | 0/1 [00:00<?, ?it/s]
 
 
62
  0%| | 0/1 [00:00<?, ?it/s]
 
 
63
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
64
  0%| | 0/1 [00:00<?, ?it/s]
 
 
65
  0%| | 0/1 [00:00<?, ?it/s]
 
 
66
  0%| | 0/2 [00:00<?, ?it/s]
67
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:03<00:03, 3.88s/it]
 
 
 
 
 
68
  0%| | 0/2 [00:00<?, ?it/s]
69
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:03<00:03, 3.64s/it]
 
 
 
 
70
  0%| | 0/3 [00:00<?, ?it/s]
71
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:03<00:07, 3.82s/it]
72
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:04<00:02, 2.26s/it]
 
 
73
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
74
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
75
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
76
  0%| | 0/1 [00:00<?, ?it/s]
 
 
77
  0%| | 0/1 [00:00<?, ?it/s]
 
 
78
  0%| | 0/2 [00:00<?, ?it/s]
79
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:03<00:03, 3.22s/it]
 
 
 
 
 
80
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
 
 
81
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
82
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
83
  0%| | 0/2 [00:00<?, ?it/s]
84
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:02<00:02, 2.92s/it]
 
 
85
  0%| | 0/2 [00:00<?, ?it/s]
86
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:02<00:02, 2.95s/it]
 
 
 
 
 
 
87
  0%| | 0/2 [00:00<?, ?it/s]
88
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:02<00:02, 2.50s/it]
 
 
 
 
 
 
 
 
 
 
89
  0%| | 0/1 [00:00<?, ?it/s]
 
 
90
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
91
  0%| | 0/4 [00:00<?, ?it/s]
92
  25%|β–ˆβ–ˆβ–Œ | 1/4 [00:01<00:03, 1.21s/it]
93
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 2/4 [00:05<00:05, 2.91s/it]
94
  75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 3/4 [00:06<00:01, 1.96s/it]
 
 
95
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
96
  0%| | 0/2 [00:00<?, ?it/s]
97
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:03<00:03, 3.63s/it]
 
 
98
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
 
 
 
 
 
99
  0%| | 0/2 [00:00<?, ?it/s]
100
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:03<00:03, 3.64s/it]
 
 
 
 
 
101
  0%| | 0/2 [00:00<?, ?it/s]
102
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:02<00:02, 2.84s/it]
 
 
103
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
104
  0%| | 0/4 [00:00<?, ?it/s]
105
  25%|β–ˆβ–ˆβ–Œ | 1/4 [00:03<00:09, 3.22s/it]
106
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 2/4 [00:04<00:03, 1.82s/it]
107
  75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 3/4 [00:06<00:02, 2.08s/it]
 
 
108
  0%| | 0/1 [00:00<?, ?it/s]
 
 
109
  0%| | 0/2 [00:00<?, ?it/s]
110
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:00<00:00, 1.04it/s]
 
 
111
  0%| | 0/4 [00:00<?, ?it/s]
112
  25%|β–ˆβ–ˆβ–Œ | 1/4 [00:01<00:04, 1.59s/it]
113
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 2/4 [00:04<00:05, 2.55s/it]
114
  75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 3/4 [00:05<00:01, 1.78s/it]
 
 
 
 
 
 
115
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
 
116
  0%| | 0/1 [00:00<?, ?it/s]
 
 
117
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
 
 
 
 
118
  0%| | 0/1 [00:00<?, ?it/s]
 
 
119
  0%| | 0/2 [00:00<?, ?it/s]
120
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.80s/it]
 
 
121
  0%| | 0/1 [00:00<?, ?it/s]
 
 
122
  0%| | 0/1 [00:00<?, ?it/s]
 
 
123
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
124
  0%| | 0/2 [00:00<?, ?it/s]
125
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.12s/it]
 
 
 
 
 
126
  0%| | 0/1 [00:00<?, ?it/s]
 
 
127
  0%| | 0/3 [00:00<?, ?it/s]
128
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:01<00:02, 1.17s/it]
129
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:02<00:01, 1.09s/it]
 
 
 
 
 
 
 
130
  0%| | 0/2 [00:00<?, ?it/s]
131
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.09s/it]
 
 
132
  0%| | 0/2 [00:00<?, ?it/s]
133
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.25s/it]
 
 
 
 
134
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.
2
+ The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.
3
+ language_model.model.layers.0 4
4
+ language_model.model.layers.1 4
5
+ language_model.model.layers.2 4
6
+ language_model.model.layers.3 4
7
+ language_model.model.layers.4 4
8
+ language_model.model.layers.5 4
9
+ language_model.model.layers.6 4
10
+ language_model.model.layers.7 4
11
+ language_model.model.layers.8 4
12
+ language_model.model.layers.9 4
13
+ language_model.model.layers.10 4
14
+ language_model.model.layers.11 4
15
+ language_model.model.layers.12 4
16
+ language_model.model.layers.13 4
17
+ language_model.model.layers.14 4
18
+ language_model.model.layers.15 4
19
+ language_model.model.layers.16 4
20
+ language_model.model.layers.17 4
21
+ language_model.model.layers.18 4
22
+ language_model.model.layers.19 4
23
+ language_model.model.layers.20 4
24
+ language_model.model.layers.21 4
25
+ language_model.model.layers.22 4
26
+ language_model.model.layers.23 4
27
+ vision_model.encoder.layers.0 0
28
+ vision_model.encoder.layers.1 0
29
+ vision_model.encoder.layers.2 0
30
+ vision_model.encoder.layers.3 0
31
+ vision_model.encoder.layers.4 0
32
+ vision_model.encoder.layers.5 0
33
+ vision_model.encoder.layers.6 0
34
+ vision_model.encoder.layers.7 0
35
+ vision_model.encoder.layers.8 0
36
+ vision_model.encoder.layers.9 0
37
+ vision_model.encoder.layers.10 0
38
+ vision_model.encoder.layers.11 0
39
+ vision_model.encoder.layers.12 0
40
+ vision_model.encoder.layers.13 0
41
+ vision_model.encoder.layers.14 0
42
+ vision_model.encoder.layers.15 0
43
+ vision_model.encoder.layers.16 0
44
+ vision_model.encoder.layers.17 0
45
+ vision_model.encoder.layers.18 0
46
+ vision_model.encoder.layers.19 0
47
+ vision_model.encoder.layers.20 0
48
+ vision_model.encoder.layers.21 0
49
+ vision_model.encoder.layers.22 0
50
+ vision_model.encoder.layers.23 0
51
+ vision_model.embeddings 0
52
+ mlp1 0
53
+ language_model.model.tok_embeddings 4
54
+ language_model.model.norm 4
55
+ language_model.output 4
56
+ language_model.model.embed_tokens 4
57
+ language_model.lm_head 4
58
+ The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.
59
+ The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.
60
+ Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
61
+ Rank [1] Begin to eval model work_dirs/share_internvl/InternVL2-2B on task ActionLocalization, devices: {device(type='cuda', index=1), device(type='cuda', index=5)}
62
+ Initialization Finished
63
+ Predicting ActionLocalization Using internvl
64
+ Proceeding 30-length images samples | Num: 40
65
+ Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
66
+ Rank [0] Begin to eval model work_dirs/share_internvl/InternVL2-2B on task ActionLocalization, devices: {device(type='cuda', index=0), device(type='cuda', index=4)}
67
+ Initialization Finished
68
+ Predicting ActionLocalization Using internvl
69
+ Proceeding 30-length images samples | Num: 40
70
+ Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
71
+ Rank [2] Begin to eval model work_dirs/share_internvl/InternVL2-2B on task ActionLocalization, devices: {device(type='cuda', index=2), device(type='cuda', index=6)}
72
+ Initialization Finished
73
+ Predicting ActionLocalization Using internvl
74
+ Proceeding 30-length images samples | Num: 40
75
+ Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
76
+ Rank [3] Begin to eval model work_dirs/share_internvl/InternVL2-2B on task ActionLocalization, devices: {device(type='cuda', index=3), device(type='cuda', index=7)}
77
+ Initialization Finished
78
+ Predicting ActionLocalization Using internvl
79
+ Proceeding 30-length images samples | Num: 40
80
+
81
  0%| | 0/10 [00:00<?, ?it/s]
82
  10%|β–ˆ | 1/10 [00:04<00:43, 4.83s/it]
83
  20%|β–ˆβ–ˆ | 2/10 [00:06<00:25, 3.24s/it]
84
  30%|β–ˆβ–ˆβ–ˆ | 3/10 [00:07<00:15, 2.19s/it]
85
  40%|β–ˆβ–ˆβ–ˆβ–ˆ | 4/10 [00:10<00:13, 2.27s/it]
86
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5/10 [00:13<00:13, 2.73s/it]
87
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 6/10 [00:17<00:11, 2.99s/it]
88
  70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 7/10 [00:19<00:08, 2.73s/it]
89
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 8/10 [00:21<00:04, 2.39s/it]
90
  90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 9/10 [00:22<00:01, 1.99s/it]
91
+ Proceeding 31-length images samples | Num: 24
92
+ Proceeding 31-length images samples | Num: 24
93
+
94
  0%| | 0/10 [00:00<?, ?it/s]
95
  10%|β–ˆ | 1/10 [00:04<00:43, 4.85s/it]
96
  20%|β–ˆβ–ˆ | 2/10 [00:07<00:28, 3.56s/it]
97
  30%|β–ˆβ–ˆβ–ˆ | 3/10 [00:09<00:21, 3.06s/it]
98
  40%|β–ˆβ–ˆβ–ˆβ–ˆ | 4/10 [00:13<00:18, 3.10s/it]
99
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5/10 [00:14<00:11, 2.35s/it]
100
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 6/10 [00:17<00:10, 2.59s/it]
101
  70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 7/10 [00:19<00:07, 2.58s/it]
102
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 8/10 [00:21<00:04, 2.31s/it]
103
  90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 9/10 [00:22<00:01, 1.98s/it]
104
+ Proceeding 31-length images samples | Num: 24
105
+
106
  0%| | 0/10 [00:00<?, ?it/s]
107
  10%|β–ˆ | 1/10 [00:04<00:44, 4.94s/it]
108
  20%|β–ˆβ–ˆ | 2/10 [00:07<00:28, 3.60s/it]
109
  30%|β–ˆβ–ˆβ–ˆ | 3/10 [00:10<00:21, 3.12s/it]
110
  40%|β–ˆβ–ˆβ–ˆβ–ˆ | 4/10 [00:13<00:19, 3.30s/it]
111
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5/10 [00:16<00:16, 3.25s/it]
112
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 6/10 [00:17<00:09, 2.43s/it]
113
  70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 7/10 [00:19<00:07, 2.35s/it]
114
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 8/10 [00:21<00:04, 2.15s/it]
115
  90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 9/10 [00:23<00:01, 1.98s/it]
116
+ Proceeding 31-length images samples | Num: 24
117
+
118
  0%| | 0/6 [00:00<?, ?it/s]
119
  17%|β–ˆβ–‹ | 1/6 [00:03<00:16, 3.27s/it]
120
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 2/6 [00:06<00:14, 3.51s/it]
121
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 3/6 [00:08<00:08, 2.81s/it]
122
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 4/6 [00:09<00:04, 2.00s/it]
123
  83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5/6 [00:10<00:01, 1.73s/it]
124
+ Proceeding 42-length images samples | Num: 4
125
+ Proceeding 42-length images samples | Num: 4
126
+
127
  0%| | 0/6 [00:00<?, ?it/s]
128
  17%|β–ˆβ–‹ | 1/6 [00:03<00:18, 3.65s/it]
129
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 2/6 [00:07<00:14, 3.50s/it]
130
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 3/6 [00:09<00:08, 2.86s/it]
131
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 4/6 [00:10<00:04, 2.26s/it]
132
  83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5/6 [00:13<00:02, 2.36s/it]
133
+ Proceeding 42-length images samples | Num: 4
134
+
135
  0%| | 0/6 [00:00<?, ?it/s]
136
  17%|β–ˆβ–‹ | 1/6 [00:03<00:18, 3.74s/it]
137
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 2/6 [00:07<00:14, 3.53s/it]
138
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 3/6 [00:09<00:08, 2.86s/it]
139
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 4/6 [00:10<00:04, 2.27s/it]
140
  83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5/6 [00:13<00:02, 2.52s/it]
141
+ Proceeding 42-length images samples | Num: 4
142
+ Proceeding 33-length images samples | Num: 12
143
+
144
  0%| | 0/1 [00:00<?, ?it/s]
145
+ Proceeding 33-length images samples | Num: 12
146
+
147
  0%| | 0/1 [00:00<?, ?it/s]
148
+ Proceeding 33-length images samples | Num: 12
149
+
150
  0%| | 0/1 [00:00<?, ?it/s]
151
+ Proceeding 33-length images samples | Num: 12
152
+ Proceeding 41-length images samples | Num: 5
153
+
154
  0%| | 0/3 [00:00<?, ?it/s]
155
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:02<00:04, 2.29s/it]
156
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:03<00:01, 1.72s/it]
157
+ Proceeding 41-length images samples | Num: 5
158
+
159
  0%| | 0/3 [00:00<?, ?it/s]
160
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:02<00:04, 2.50s/it]
161
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:03<00:01, 1.76s/it]
162
+ Proceeding 41-length images samples | Num: 5
163
+
164
  0%| | 0/3 [00:00<?, ?it/s]
165
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:02<00:05, 2.76s/it]
166
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:04<00:01, 1.94s/it]
167
+ Proceeding 41-length images samples | Num: 5
168
+
169
  0%| | 0/1 [00:00<?, ?it/s]
170
+ Proceeding 37-length images samples | Num: 5
171
+
172
  0%| | 0/1 [00:00<?, ?it/s]
173
+ Proceeding 37-length images samples | Num: 5
174
+
175
  0%| | 0/1 [00:00<?, ?it/s]
176
+ Proceeding 37-length images samples | Num: 5
177
+
178
  0%| | 0/1 [00:00<?, ?it/s]
179
+ Proceeding 35-length images samples | Num: 10
180
+ Proceeding 37-length images samples | Num: 5
181
+
182
  0%| | 0/1 [00:00<?, ?it/s]
183
+ Proceeding 35-length images samples | Num: 10
184
+
185
  0%| | 0/1 [00:00<?, ?it/s]
186
+ Proceeding 35-length images samples | Num: 10
187
+
188
  0%| | 0/2 [00:00<?, ?it/s]
189
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:03<00:03, 3.88s/it]
190
+ Proceeding 58-length images samples | Num: 2
191
+ Proceeding 35-length images samples | Num: 10
192
+
193
+ Proceeding 39-length images samples | Num: 5
194
+
195
  0%| | 0/2 [00:00<?, ?it/s]
196
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:03<00:03, 3.64s/it]
197
+ Proceeding 58-length images samples | Num: 2
198
+
199
+ Proceeding 39-length images samples | Num: 5
200
+
201
  0%| | 0/3 [00:00<?, ?it/s]
202
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:03<00:07, 3.82s/it]
203
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:04<00:02, 2.26s/it]
204
+ Proceeding 58-length images samples | Num: 2
205
+
206
  0%| | 0/1 [00:00<?, ?it/s]
207
+ Proceeding 40-length images samples | Num: 3
208
+
209
+ Proceeding 43-length images samples | Num: 3
210
+
211
  0%| | 0/1 [00:00<?, ?it/s]
212
+ Proceeding 40-length images samples | Num: 3
213
+
214
+ Proceeding 36-length images samples | Num: 9
215
+
216
  0%| | 0/1 [00:00<?, ?it/s]
217
+ Proceeding 39-length images samples | Num: 5
218
+ Proceeding 58-length images samples | Num: 2
219
+
220
  0%| | 0/1 [00:00<?, ?it/s]
221
+ Proceeding 40-length images samples | Num: 3
222
+
223
  0%| | 0/1 [00:00<?, ?it/s]
224
+ Proceeding 43-length images samples | Num: 3
225
+
226
  0%| | 0/2 [00:00<?, ?it/s]
227
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:03<00:03, 3.22s/it]
228
+ Proceeding 48-length images samples | Num: 1
229
+ Proceeding 39-length images samples | Num: 5
230
+
231
+ Proceeding 44-length images samples | Num: 2
232
+
233
  0%| | 0/1 [00:00<?, ?it/s]
234
+ Proceeding 43-length images samples | Num: 3
235
+
236
+ Proceeding 46-length images samples | Num: 1
237
+
238
+ Proceeding 38-length images samples | Num: 3
239
+
240
  0%| | 0/1 [00:00<?, ?it/s]
241
+ Proceeding 36-length images samples | Num: 9
242
+
243
+ Proceeding 34-length images samples | Num: 9
244
+
245
  0%| | 0/1 [00:00<?, ?it/s]
246
+ Proceeding 36-length images samples | Num: 9
247
+ Proceeding 40-length images samples | Num: 3
248
+
249
  0%| | 0/2 [00:00<?, ?it/s]
250
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:02<00:02, 2.92s/it]
251
+ Proceeding 48-length images samples | Num: 1
252
+
253
  0%| | 0/2 [00:00<?, ?it/s]
254
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:02<00:02, 2.95s/it]
255
+ Proceeding 50-length images samples | Num: 1
256
+
257
+ Proceeding 44-length images samples | Num: 2
258
+
259
+ Proceeding 32-length images samples | Num: 17
260
+
261
  0%| | 0/2 [00:00<?, ?it/s]
262
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:02<00:02, 2.50s/it]
263
+ Proceeding 48-length images samples | Num: 1
264
+
265
+ Proceeding 46-length images samples | Num: 1
266
+ Proceeding 43-length images samples | Num: 3
267
+
268
+ Proceeding 44-length images samples | Num: 2
269
+
270
+ Proceeding 38-length images samples | Num: 3
271
+ Proceeding 36-length images samples | Num: 9
272
+
273
  0%| | 0/1 [00:00<?, ?it/s]
274
+ Proceeding 34-length images samples | Num: 9
275
+
276
  0%| | 0/1 [00:00<?, ?it/s]
277
+ Proceeding 46-length images samples | Num: 1
278
+
279
+ Proceeding 38-length images samples | Num: 3
280
+
281
  0%| | 0/4 [00:00<?, ?it/s]
282
  25%|β–ˆβ–ˆβ–Œ | 1/4 [00:01<00:03, 1.21s/it]
283
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 2/4 [00:05<00:05, 2.91s/it]
284
  75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 3/4 [00:06<00:01, 1.96s/it]
285
+ Proceeding 26-length images samples | Num: 3
286
+
287
  0%| | 0/1 [00:00<?, ?it/s]
288
+ Proceeding 34-length images samples | Num: 9
289
+
290
+ Proceeding 27-length images samples | Num: 4
291
+
292
  0%| | 0/2 [00:00<?, ?it/s]
293
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:03<00:03, 3.63s/it]
294
+ Proceeding 50-length images samples | Num: 1
295
+
296
  0%| | 0/1 [00:00<?, ?it/s]
297
+ Proceeding 20-length images samples | Num: 2
298
+ Proceeding 48-length images samples | Num: 1
299
+
300
+ Proceeding 32-length images samples | Num: 17
301
+
302
+ Proceeding 45-length images samples | Num: 2
303
+
304
+ Proceeding 29-length images samples | Num: 10
305
+
306
  0%| | 0/2 [00:00<?, ?it/s]
307
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:03<00:03, 3.64s/it]
308
+ Proceeding 50-length images samples | Num: 1
309
+
310
+ Proceeding 32-length images samples | Num: 17
311
+ Proceeding 44-length images samples | Num: 2
312
+
313
  0%| | 0/2 [00:00<?, ?it/s]
314
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:02<00:02, 2.84s/it]
315
+ Proceeding 28-length images samples | Num: 6
316
+
317
  0%| | 0/1 [00:00<?, ?it/s]
318
+ Proceeding 23-length images samples | Num: 8
319
+ Proceeding 46-length images samples | Num: 1
320
+
321
  0%| | 0/4 [00:00<?, ?it/s]
322
  25%|β–ˆβ–ˆβ–Œ | 1/4 [00:03<00:09, 3.22s/it]
323
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 2/4 [00:04<00:03, 1.82s/it]
324
  75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 3/4 [00:06<00:02, 2.08s/it]
325
+ Proceeding 26-length images samples | Num: 3
326
+
327
  0%| | 0/1 [00:00<?, ?it/s]
328
+ Proceeding 27-length images samples | Num: 4
329
+
330
  0%| | 0/2 [00:00<?, ?it/s]
331
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:00<00:00, 1.04it/s]
332
+ Proceeding 22-length images samples | Num: 1
333
+
334
  0%| | 0/4 [00:00<?, ?it/s]
335
  25%|β–ˆβ–ˆβ–Œ | 1/4 [00:01<00:04, 1.59s/it]
336
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 2/4 [00:04<00:05, 2.55s/it]
337
  75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 3/4 [00:05<00:01, 1.78s/it]
338
+ Proceeding 26-length images samples | Num: 3
339
+
340
+ Proceeding 21-length images samples | Num: 5
341
+ Proceeding 38-length images samples | Num: 3
342
+ Proceeding 20-length images samples | Num: 2
343
+
344
  0%| | 0/1 [00:00<?, ?it/s]
345
+
346
+ Proceeding 45-length images samples | Num: 2
347
+
348
+ Proceeding 29-length images samples | Num: 10
349
+
350
  0%| | 0/1 [00:00<?, ?it/s]
351
+ Proceeding 19-length images samples | Num: 1
352
+
353
  0%| | 0/1 [00:00<?, ?it/s]
354
+ Proceeding 27-length images samples | Num: 4
355
+
356
+ Proceeding 17-length images samples | Num: 1
357
+ Proceeding 34-length images samples | Num: 9
358
+
359
+ Proceeding 24-length images samples | Num: 1
360
+
361
+
362
  0%| | 0/1 [00:00<?, ?it/s]
363
+ Proceeding 20-length images samples | Num: 2
364
+
365
  0%| | 0/2 [00:00<?, ?it/s]
366
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.80s/it]
367
+ Proceeding 28-length images samples | Num: 6
368
+
369
  0%| | 0/1 [00:00<?, ?it/s]
370
+ Proceeding 45-length images samples | Num: 2
371
+
372
  0%| | 0/1 [00:00<?, ?it/s]
373
+ Proceeding 23-length images samples | Num: 8
374
+
375
  0%| | 0/1 [00:00<?, ?it/s]
376
+ Proceeding 29-length images samples | Num: 10
377
+ Proceeding 50-length images samples | Num: 1
378
+
379
  0%| | 0/2 [00:00<?, ?it/s]
380
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.12s/it]
381
+ Proceeding 22-length images samples | Num: 1
382
+
383
+ Proceeding 21-length images samples | Num: 5
384
+ Proceeding 32-length images samples | Num: 17
385
+
386
  0%| | 0/1 [00:00<?, ?it/s]
387
+ Proceeding 19-length images samples | Num: 1
388
+
389
  0%| | 0/3 [00:00<?, ?it/s]
390
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:01<00:02, 1.17s/it]
391
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:02<00:01, 1.09s/it]
392
+ Proceeding 28-length images samples | Num: 6
393
+
394
+ Proceeding 17-length images samples | Num: 1
395
+
396
+ Proceeding 24-length images samples | Num: 1
397
+
398
+
399
  0%| | 0/2 [00:00<?, ?it/s]
400
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.09s/it]
401
+ Proceeding 23-length images samples | Num: 8
402
+
403
  0%| | 0/2 [00:00<?, ?it/s]
404
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.25s/it]
405
+ Proceeding 22-length images samples | Num: 1
406
+
407
+ Proceeding 21-length images samples | Num: 5
408
+
409
  0%| | 0/1 [00:00<?, ?it/s]
410
+ Proceeding 19-length images samples | Num: 1
411
+
412
+ Proceeding 17-length images samples | Num: 1
413
+ Proceeding 26-length images samples | Num: 3
414
+
415
+ Proceeding 24-length images samples | Num: 1
416
+
417
+ Proceeding 27-length images samples | Num: 4
418
+ Proceeding 20-length images samples | Num: 2
419
+ Proceeding 45-length images samples | Num: 2
420
+ Proceeding 29-length images samples | Num: 10
421
+ Proceeding 28-length images samples | Num: 6
422
+ Proceeding 23-length images samples | Num: 8
423
+ Proceeding 22-length images samples | Num: 1
424
+ Proceeding 21-length images samples | Num: 5
425
+ Proceeding 19-length images samples | Num: 1
426
+ Proceeding 17-length images samples | Num: 1
427
+ Proceeding 24-length images samples | Num: 1
428
+ evaluating ActionLocalization ...
429
+ Results saved to work_dirs/share_internvl/InternVL2-2B/eval_milebench/ActionLocalization/ActionLocalization_240803234615.json
430
+ python eval/milebench/evaluate.py --data-dir /mnt/inspurfs/share_data/wangweiyun/share_data/long-context-benchmark/MileBench/datasets--FreedomIntelligence--MileBench/snapshots/53c7a58051ef88bacf76541d91f03f5ba2d71e7d --dataset ActionLocalization --result-dir work_dirs/share_internvl/InternVL2-2B/eval_milebench/ActionLocalization
431
+ internvl: ActionLocalization: {'Accuracy': 0.255, 'image_quantity_level-Accuracy': {'Few': 0, 'Medium': 0.330188679245283, 'Many': 0.1702127659574468}, 'image_quantity_level-Result': {'Few': [0, 0], 'Medium': [35, 106], 'Many': [16, 94]}}
eval_milebench/ActionLocalization/ActionLocalization_240803233707.json ADDED
The diff for this file is too large to render. See raw diff
 
eval_milebench/ActionLocalization/ActionLocalization_240803234615.json ADDED
The diff for this file is too large to render. See raw diff
 
eval_milebench/ActionLocalization/eval.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"Accuracy": 0.255, "image_quantity_level-Accuracy": {"Few": 0, "Medium": 0.330188679245283, "Many": 0.1702127659574468}, "image_quantity_level-Result": {"Few": [0, 0], "Medium": [35, 106], "Many": [16, 94]}}
eval_milebench/ActionLocalization/eval_score.json ADDED
@@ -0,0 +1,802 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "id": "0",
4
+ "score": "0"
5
+ },
6
+ {
7
+ "id": "19",
8
+ "score": "0"
9
+ },
10
+ {
11
+ "id": "37",
12
+ "score": "0"
13
+ },
14
+ {
15
+ "id": "39",
16
+ "score": "0"
17
+ },
18
+ {
19
+ "id": "55",
20
+ "score": "0"
21
+ },
22
+ {
23
+ "id": "61",
24
+ "score": "0"
25
+ },
26
+ {
27
+ "id": "62",
28
+ "score": "0"
29
+ },
30
+ {
31
+ "id": "67",
32
+ "score": "0"
33
+ },
34
+ {
35
+ "id": "68",
36
+ "score": "0"
37
+ },
38
+ {
39
+ "id": "69",
40
+ "score": "0"
41
+ },
42
+ {
43
+ "id": "1",
44
+ "score": "0"
45
+ },
46
+ {
47
+ "id": "2",
48
+ "score": "0"
49
+ },
50
+ {
51
+ "id": "21",
52
+ "score": "0"
53
+ },
54
+ {
55
+ "id": "35",
56
+ "score": "0"
57
+ },
58
+ {
59
+ "id": "38",
60
+ "score": "0"
61
+ },
62
+ {
63
+ "id": "60",
64
+ "score": "0"
65
+ },
66
+ {
67
+ "id": "3",
68
+ "score": "0"
69
+ },
70
+ {
71
+ "id": "4",
72
+ "score": "0"
73
+ },
74
+ {
75
+ "id": "29",
76
+ "score": "0"
77
+ },
78
+ {
79
+ "id": "31",
80
+ "score": "0"
81
+ },
82
+ {
83
+ "id": "5",
84
+ "score": "0"
85
+ },
86
+ {
87
+ "id": "6",
88
+ "score": "0"
89
+ },
90
+ {
91
+ "id": "7",
92
+ "score": "0"
93
+ },
94
+ {
95
+ "id": "8",
96
+ "score": "0"
97
+ },
98
+ {
99
+ "id": "10",
100
+ "score": "0"
101
+ },
102
+ {
103
+ "id": "18",
104
+ "score": "0"
105
+ },
106
+ {
107
+ "id": "27",
108
+ "score": "0"
109
+ },
110
+ {
111
+ "id": "11",
112
+ "score": "0"
113
+ },
114
+ {
115
+ "id": "12",
116
+ "score": "0"
117
+ },
118
+ {
119
+ "id": "22",
120
+ "score": "0"
121
+ },
122
+ {
123
+ "id": "13",
124
+ "score": "0"
125
+ },
126
+ {
127
+ "id": "14",
128
+ "score": "0"
129
+ },
130
+ {
131
+ "id": "15",
132
+ "score": "0"
133
+ },
134
+ {
135
+ "id": "26",
136
+ "score": "0"
137
+ },
138
+ {
139
+ "id": "36",
140
+ "score": "0"
141
+ },
142
+ {
143
+ "id": "16",
144
+ "score": "0"
145
+ },
146
+ {
147
+ "id": "20",
148
+ "score": "0"
149
+ },
150
+ {
151
+ "id": "23",
152
+ "score": "0"
153
+ },
154
+ {
155
+ "id": "30",
156
+ "score": "0"
157
+ },
158
+ {
159
+ "id": "33",
160
+ "score": "0"
161
+ },
162
+ {
163
+ "id": "48",
164
+ "score": "0"
165
+ },
166
+ {
167
+ "id": "63",
168
+ "score": "1"
169
+ },
170
+ {
171
+ "id": "34",
172
+ "score": "0"
173
+ },
174
+ {
175
+ "id": "42",
176
+ "score": "0"
177
+ },
178
+ {
179
+ "id": "43",
180
+ "score": "0"
181
+ },
182
+ {
183
+ "id": "59",
184
+ "score": "0"
185
+ },
186
+ {
187
+ "id": "65",
188
+ "score": "0"
189
+ },
190
+ {
191
+ "id": "72",
192
+ "score": "0"
193
+ },
194
+ {
195
+ "id": "50",
196
+ "score": "0"
197
+ },
198
+ {
199
+ "id": "64",
200
+ "score": "0"
201
+ },
202
+ {
203
+ "id": "66",
204
+ "score": "0"
205
+ },
206
+ {
207
+ "id": "71",
208
+ "score": "0"
209
+ },
210
+ {
211
+ "id": "73",
212
+ "score": "0"
213
+ },
214
+ {
215
+ "id": "89",
216
+ "score": "0"
217
+ },
218
+ {
219
+ "id": "108",
220
+ "score": "0"
221
+ },
222
+ {
223
+ "id": "80",
224
+ "score": "0"
225
+ },
226
+ {
227
+ "id": "85",
228
+ "score": "0"
229
+ },
230
+ {
231
+ "id": "88",
232
+ "score": "0"
233
+ },
234
+ {
235
+ "id": "99",
236
+ "score": "0"
237
+ },
238
+ {
239
+ "id": "94",
240
+ "score": "0"
241
+ },
242
+ {
243
+ "id": "111",
244
+ "score": "0"
245
+ },
246
+ {
247
+ "id": "123",
248
+ "score": "1"
249
+ },
250
+ {
251
+ "id": "153",
252
+ "score": "0"
253
+ },
254
+ {
255
+ "id": "165",
256
+ "score": "0"
257
+ },
258
+ {
259
+ "id": "181",
260
+ "score": "0"
261
+ },
262
+ {
263
+ "id": "76",
264
+ "score": "0"
265
+ },
266
+ {
267
+ "id": "77",
268
+ "score": "0"
269
+ },
270
+ {
271
+ "id": "87",
272
+ "score": "0"
273
+ },
274
+ {
275
+ "id": "90",
276
+ "score": "0"
277
+ },
278
+ {
279
+ "id": "97",
280
+ "score": "0"
281
+ },
282
+ {
283
+ "id": "98",
284
+ "score": "0"
285
+ },
286
+ {
287
+ "id": "104",
288
+ "score": "1"
289
+ },
290
+ {
291
+ "id": "110",
292
+ "score": "1"
293
+ },
294
+ {
295
+ "id": "112",
296
+ "score": "1"
297
+ },
298
+ {
299
+ "id": "115",
300
+ "score": "1"
301
+ },
302
+ {
303
+ "id": "84",
304
+ "score": "0"
305
+ },
306
+ {
307
+ "id": "92",
308
+ "score": "0"
309
+ },
310
+ {
311
+ "id": "100",
312
+ "score": "0"
313
+ },
314
+ {
315
+ "id": "129",
316
+ "score": "1"
317
+ },
318
+ {
319
+ "id": "130",
320
+ "score": "0"
321
+ },
322
+ {
323
+ "id": "131",
324
+ "score": "1"
325
+ },
326
+ {
327
+ "id": "44",
328
+ "score": "0"
329
+ },
330
+ {
331
+ "id": "41",
332
+ "score": "0"
333
+ },
334
+ {
335
+ "id": "53",
336
+ "score": "0"
337
+ },
338
+ {
339
+ "id": "86",
340
+ "score": "0"
341
+ },
342
+ {
343
+ "id": "25",
344
+ "score": "0"
345
+ },
346
+ {
347
+ "id": "9",
348
+ "score": "0"
349
+ },
350
+ {
351
+ "id": "49",
352
+ "score": "0"
353
+ },
354
+ {
355
+ "id": "51",
356
+ "score": "0"
357
+ },
358
+ {
359
+ "id": "54",
360
+ "score": "0"
361
+ },
362
+ {
363
+ "id": "79",
364
+ "score": "1"
365
+ },
366
+ {
367
+ "id": "28",
368
+ "score": "0"
369
+ },
370
+ {
371
+ "id": "32",
372
+ "score": "0"
373
+ },
374
+ {
375
+ "id": "17",
376
+ "score": "0"
377
+ },
378
+ {
379
+ "id": "47",
380
+ "score": "0"
381
+ },
382
+ {
383
+ "id": "58",
384
+ "score": "0"
385
+ },
386
+ {
387
+ "id": "40",
388
+ "score": "0"
389
+ },
390
+ {
391
+ "id": "56",
392
+ "score": "0"
393
+ },
394
+ {
395
+ "id": "75",
396
+ "score": "0"
397
+ },
398
+ {
399
+ "id": "83",
400
+ "score": "0"
401
+ },
402
+ {
403
+ "id": "82",
404
+ "score": "0"
405
+ },
406
+ {
407
+ "id": "102",
408
+ "score": "0"
409
+ },
410
+ {
411
+ "id": "113",
412
+ "score": "0"
413
+ },
414
+ {
415
+ "id": "121",
416
+ "score": "0"
417
+ },
418
+ {
419
+ "id": "126",
420
+ "score": "1"
421
+ },
422
+ {
423
+ "id": "78",
424
+ "score": "0"
425
+ },
426
+ {
427
+ "id": "154",
428
+ "score": "0"
429
+ },
430
+ {
431
+ "id": "81",
432
+ "score": "0"
433
+ },
434
+ {
435
+ "id": "139",
436
+ "score": "1"
437
+ },
438
+ {
439
+ "id": "145",
440
+ "score": "0"
441
+ },
442
+ {
443
+ "id": "149",
444
+ "score": "0"
445
+ },
446
+ {
447
+ "id": "95",
448
+ "score": "0"
449
+ },
450
+ {
451
+ "id": "128",
452
+ "score": "1"
453
+ },
454
+ {
455
+ "id": "103",
456
+ "score": "0"
457
+ },
458
+ {
459
+ "id": "105",
460
+ "score": "0"
461
+ },
462
+ {
463
+ "id": "148",
464
+ "score": "1"
465
+ },
466
+ {
467
+ "id": "116",
468
+ "score": "0"
469
+ },
470
+ {
471
+ "id": "118",
472
+ "score": "1"
473
+ },
474
+ {
475
+ "id": "120",
476
+ "score": "1"
477
+ },
478
+ {
479
+ "id": "122",
480
+ "score": "1"
481
+ },
482
+ {
483
+ "id": "125",
484
+ "score": "1"
485
+ },
486
+ {
487
+ "id": "127",
488
+ "score": "0"
489
+ },
490
+ {
491
+ "id": "140",
492
+ "score": "1"
493
+ },
494
+ {
495
+ "id": "141",
496
+ "score": "0"
497
+ },
498
+ {
499
+ "id": "146",
500
+ "score": "0"
501
+ },
502
+ {
503
+ "id": "147",
504
+ "score": "1"
505
+ },
506
+ {
507
+ "id": "132",
508
+ "score": "1"
509
+ },
510
+ {
511
+ "id": "135",
512
+ "score": "0"
513
+ },
514
+ {
515
+ "id": "137",
516
+ "score": "0"
517
+ },
518
+ {
519
+ "id": "151",
520
+ "score": "1"
521
+ },
522
+ {
523
+ "id": "160",
524
+ "score": "1"
525
+ },
526
+ {
527
+ "id": "161",
528
+ "score": "0"
529
+ },
530
+ {
531
+ "id": "52",
532
+ "score": "0"
533
+ },
534
+ {
535
+ "id": "93",
536
+ "score": "0"
537
+ },
538
+ {
539
+ "id": "114",
540
+ "score": "1"
541
+ },
542
+ {
543
+ "id": "158",
544
+ "score": "0"
545
+ },
546
+ {
547
+ "id": "46",
548
+ "score": "0"
549
+ },
550
+ {
551
+ "id": "45",
552
+ "score": "0"
553
+ },
554
+ {
555
+ "id": "57",
556
+ "score": "0"
557
+ },
558
+ {
559
+ "id": "106",
560
+ "score": "0"
561
+ },
562
+ {
563
+ "id": "142",
564
+ "score": "0"
565
+ },
566
+ {
567
+ "id": "124",
568
+ "score": "1"
569
+ },
570
+ {
571
+ "id": "24",
572
+ "score": "0"
573
+ },
574
+ {
575
+ "id": "74",
576
+ "score": "0"
577
+ },
578
+ {
579
+ "id": "133",
580
+ "score": "0"
581
+ },
582
+ {
583
+ "id": "70",
584
+ "score": "0"
585
+ },
586
+ {
587
+ "id": "91",
588
+ "score": "0"
589
+ },
590
+ {
591
+ "id": "107",
592
+ "score": "0"
593
+ },
594
+ {
595
+ "id": "138",
596
+ "score": "1"
597
+ },
598
+ {
599
+ "id": "150",
600
+ "score": "1"
601
+ },
602
+ {
603
+ "id": "152",
604
+ "score": "1"
605
+ },
606
+ {
607
+ "id": "168",
608
+ "score": "0"
609
+ },
610
+ {
611
+ "id": "169",
612
+ "score": "0"
613
+ },
614
+ {
615
+ "id": "134",
616
+ "score": "1"
617
+ },
618
+ {
619
+ "id": "159",
620
+ "score": "0"
621
+ },
622
+ {
623
+ "id": "174",
624
+ "score": "1"
625
+ },
626
+ {
627
+ "id": "173",
628
+ "score": "1"
629
+ },
630
+ {
631
+ "id": "136",
632
+ "score": "0"
633
+ },
634
+ {
635
+ "id": "143",
636
+ "score": "1"
637
+ },
638
+ {
639
+ "id": "171",
640
+ "score": "0"
641
+ },
642
+ {
643
+ "id": "156",
644
+ "score": "1"
645
+ },
646
+ {
647
+ "id": "163",
648
+ "score": "1"
649
+ },
650
+ {
651
+ "id": "167",
652
+ "score": "0"
653
+ },
654
+ {
655
+ "id": "175",
656
+ "score": "0"
657
+ },
658
+ {
659
+ "id": "176",
660
+ "score": "1"
661
+ },
662
+ {
663
+ "id": "179",
664
+ "score": "0"
665
+ },
666
+ {
667
+ "id": "183",
668
+ "score": "1"
669
+ },
670
+ {
671
+ "id": "189",
672
+ "score": "0"
673
+ },
674
+ {
675
+ "id": "191",
676
+ "score": "0"
677
+ },
678
+ {
679
+ "id": "193",
680
+ "score": "0"
681
+ },
682
+ {
683
+ "id": "170",
684
+ "score": "1"
685
+ },
686
+ {
687
+ "id": "177",
688
+ "score": "1"
689
+ },
690
+ {
691
+ "id": "184",
692
+ "score": "0"
693
+ },
694
+ {
695
+ "id": "187",
696
+ "score": "0"
697
+ },
698
+ {
699
+ "id": "197",
700
+ "score": "1"
701
+ },
702
+ {
703
+ "id": "198",
704
+ "score": "0"
705
+ },
706
+ {
707
+ "id": "155",
708
+ "score": "0"
709
+ },
710
+ {
711
+ "id": "164",
712
+ "score": "0"
713
+ },
714
+ {
715
+ "id": "166",
716
+ "score": "0"
717
+ },
718
+ {
719
+ "id": "192",
720
+ "score": "1"
721
+ },
722
+ {
723
+ "id": "101",
724
+ "score": "0"
725
+ },
726
+ {
727
+ "id": "96",
728
+ "score": "0"
729
+ },
730
+ {
731
+ "id": "117",
732
+ "score": "1"
733
+ },
734
+ {
735
+ "id": "180",
736
+ "score": "1"
737
+ },
738
+ {
739
+ "id": "190",
740
+ "score": "1"
741
+ },
742
+ {
743
+ "id": "144",
744
+ "score": "0"
745
+ },
746
+ {
747
+ "id": "157",
748
+ "score": "0"
749
+ },
750
+ {
751
+ "id": "109",
752
+ "score": "0"
753
+ },
754
+ {
755
+ "id": "119",
756
+ "score": "1"
757
+ },
758
+ {
759
+ "id": "178",
760
+ "score": "1"
761
+ },
762
+ {
763
+ "id": "186",
764
+ "score": "1"
765
+ },
766
+ {
767
+ "id": "194",
768
+ "score": "1"
769
+ },
770
+ {
771
+ "id": "199",
772
+ "score": "1"
773
+ },
774
+ {
775
+ "id": "162",
776
+ "score": "0"
777
+ },
778
+ {
779
+ "id": "182",
780
+ "score": "1"
781
+ },
782
+ {
783
+ "id": "185",
784
+ "score": "1"
785
+ },
786
+ {
787
+ "id": "188",
788
+ "score": "0"
789
+ },
790
+ {
791
+ "id": "195",
792
+ "score": "0"
793
+ },
794
+ {
795
+ "id": "196",
796
+ "score": "1"
797
+ },
798
+ {
799
+ "id": "172",
800
+ "score": "1"
801
+ }
802
+ ]
eval_milebench/ActionLocalization/pred.json ADDED
The diff for this file is too large to render. See raw diff
 
eval_milebench/ActionLocalization/pred_with_extracted.json ADDED
The diff for this file is too large to render. See raw diff
 
eval_milebench/ActionPrediction.log ADDED
@@ -0,0 +1,366 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0
  0%| | 0/1 [00:00<?, ?it/s]
 
 
1
  0%| | 0/1 [00:00<?, ?it/s]
 
 
2
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
3
  0%| | 0/2 [00:00<?, ?it/s]
4
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:03<00:03, 3.09s/it]
 
 
5
  0%| | 0/2 [00:00<?, ?it/s]
6
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:03<00:03, 3.25s/it]
 
 
7
  0%| | 0/2 [00:00<?, ?it/s]
8
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:03<00:03, 3.30s/it]
 
 
 
9
  0%| | 0/3 [00:00<?, ?it/s]
10
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:01<00:02, 1.02s/it]
11
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:02<00:01, 1.46s/it]
 
 
12
  0%| | 0/3 [00:00<?, ?it/s]
13
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:02<00:04, 2.05s/it]
14
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:02<00:01, 1.32s/it]
 
 
15
  0%| | 0/3 [00:00<?, ?it/s]
16
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:02<00:04, 2.42s/it]
17
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:03<00:01, 1.50s/it]
 
 
 
18
  0%| | 0/3 [00:00<?, ?it/s]
19
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:03<00:06, 3.17s/it]
20
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:05<00:02, 2.79s/it]
 
 
21
  0%| | 0/4 [00:00<?, ?it/s]
22
  25%|β–ˆβ–ˆβ–Œ | 1/4 [00:03<00:10, 3.63s/it]
23
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 2/4 [00:06<00:05, 2.99s/it]
24
  75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 3/4 [00:06<00:01, 1.99s/it]
 
 
25
  0%| | 0/4 [00:00<?, ?it/s]
26
  25%|β–ˆβ–ˆβ–Œ | 1/4 [00:02<00:08, 2.83s/it]
27
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 2/4 [00:05<00:05, 2.80s/it]
28
  75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 3/4 [00:06<00:01, 1.96s/it]
 
 
 
29
  0%| | 0/6 [00:00<?, ?it/s]
30
  17%|β–ˆβ–‹ | 1/6 [00:01<00:06, 1.20s/it]
31
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 2/6 [00:02<00:05, 1.39s/it]
32
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 3/6 [00:03<00:03, 1.12s/it]
33
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 4/6 [00:04<00:02, 1.19s/it]
34
  83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5/6 [00:06<00:01, 1.39s/it]
 
 
35
  0%| | 0/6 [00:00<?, ?it/s]
36
  17%|β–ˆβ–‹ | 1/6 [00:02<00:10, 2.03s/it]
37
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 2/6 [00:02<00:05, 1.34s/it]
38
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 3/6 [00:05<00:05, 1.92s/it]
39
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 4/6 [00:06<00:03, 1.58s/it]
40
  83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5/6 [00:09<00:01, 1.90s/it]
 
 
 
41
  0%| | 0/2 [00:00<?, ?it/s]
42
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:03<00:03, 3.00s/it]
 
43
  0%| | 0/1 [00:00<?, ?it/s]
 
 
44
  0%| | 0/7 [00:00<?, ?it/s]
45
  14%|β–ˆβ– | 1/7 [00:02<00:17, 2.92s/it]
46
  29%|β–ˆβ–ˆβ–Š | 2/7 [00:03<00:08, 1.63s/it]
47
  43%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 3/7 [00:06<00:08, 2.21s/it]
48
  57%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 4/7 [00:09<00:07, 2.45s/it]
49
  71%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5/7 [00:10<00:03, 1.84s/it]
50
  86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 6/7 [00:13<00:02, 2.25s/it]
 
 
 
 
51
  0%| | 0/2 [00:00<?, ?it/s]
52
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:03<00:03, 3.73s/it]
 
 
 
53
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
 
 
54
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
55
  0%| | 0/2 [00:00<?, ?it/s]
56
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:04<00:04, 4.15s/it]
 
 
57
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
 
58
  0%| | 0/2 [00:00<?, ?it/s]
59
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.37s/it]
 
 
 
 
60
  0%| | 0/1 [00:00<?, ?it/s]
 
 
61
  0%| | 0/2 [00:00<?, ?it/s]
62
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:02<00:02, 2.49s/it]
 
 
63
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
64
  0%| | 0/1 [00:00<?, ?it/s]
 
 
65
  0%| | 0/2 [00:00<?, ?it/s]
66
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:02<00:02, 2.06s/it]
 
 
 
67
  0%| | 0/1 [00:00<?, ?it/s]
 
 
68
  0%| | 0/2 [00:00<?, ?it/s]
69
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.72s/it]
 
 
 
 
 
70
  0%| | 0/1 [00:00<?, ?it/s]
 
 
71
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
72
  0%| | 0/2 [00:00<?, ?it/s]
73
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:03<00:03, 3.55s/it]
 
 
74
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
75
  0%| | 0/1 [00:00<?, ?it/s]
 
 
76
  0%| | 0/11 [00:00<?, ?it/s]
77
  9%|β–‰ | 1/11 [00:01<00:11, 1.16s/it]
78
  18%|β–ˆβ–Š | 2/11 [00:02<00:12, 1.36s/it]
79
  27%|β–ˆβ–ˆβ–‹ | 3/11 [00:03<00:08, 1.09s/it]
80
  36%|β–ˆβ–ˆβ–ˆβ–‹ | 4/11 [00:04<00:06, 1.01it/s]
81
  45%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5/11 [00:06<00:08, 1.46s/it]
82
  55%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 6/11 [00:08<00:07, 1.46s/it]
83
  64%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 7/11 [00:10<00:06, 1.71s/it]
84
  73%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 8/11 [00:10<00:04, 1.40s/it]
85
  82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 9/11 [00:13<00:03, 1.84s/it]
86
  91%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 10/11 [00:15<00:01, 1.75s/it]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
 
 
 
88
  0%| | 0/11 [00:00<?, ?it/s]
89
  9%|β–‰ | 1/11 [00:03<00:36, 3.65s/it]
90
  18%|β–ˆβ–Š | 2/11 [00:05<00:23, 2.60s/it]
91
  27%|β–ˆβ–ˆβ–‹ | 3/11 [00:06<00:14, 1.78s/it]
92
  36%|β–ˆβ–ˆβ–ˆβ–‹ | 4/11 [00:08<00:13, 1.99s/it]
93
  45%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5/11 [00:09<00:10, 1.70s/it]
94
  55%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 6/11 [00:13<00:11, 2.28s/it]
95
  64%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 7/11 [00:13<00:07, 1.78s/it]
96
  73%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 8/11 [00:16<00:05, 1.94s/it]
97
  82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 9/11 [00:17<00:03, 1.71s/it]
98
  91%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 10/11 [00:18<00:01, 1.46s/it]
 
 
 
 
99
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
 
 
102
  0%| | 0/12 [00:00<?, ?it/s]
103
  8%|β–Š | 1/12 [00:02<00:27, 2.54s/it]
104
  17%|β–ˆβ–‹ | 2/12 [00:03<00:17, 1.76s/it]
105
  25%|β–ˆβ–ˆβ–Œ | 3/12 [00:04<00:12, 1.35s/it]
106
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 4/12 [00:05<00:09, 1.17s/it]
107
  42%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 5/12 [00:06<00:07, 1.06s/it]
108
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 6/12 [00:07<00:05, 1.03it/s]
109
  58%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 7/12 [00:08<00:05, 1.08s/it]
110
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 8/12 [00:09<00:03, 1.03it/s]
111
  75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 9/12 [00:09<00:02, 1.14it/s]
112
  83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 10/12 [00:11<00:02, 1.15s/it]
113
  92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 11/12 [00:12<00:01, 1.06s/it]
 
 
114
  0%| | 0/2 [00:00<?, ?it/s]
115
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.94s/it]
 
 
116
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
 
 
 
 
 
118
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
 
 
 
 
120
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
 
121
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
122
  0%| | 0/2 [00:00<?, ?it/s]
123
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.07s/it]
 
 
 
 
 
 
 
124
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
 
 
 
 
 
 
125
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ language_model.model.layers.0 4
2
+ language_model.model.layers.1 4
3
+ language_model.model.layers.2 4
4
+ language_model.model.layers.3 4
5
+ language_model.model.layers.4 4
6
+ language_model.model.layers.5 4
7
+ language_model.model.layers.6 4
8
+ language_model.model.layers.7 4
9
+ language_model.model.layers.8 4
10
+ language_model.model.layers.9 4
11
+ language_model.model.layers.10 4
12
+ language_model.model.layers.11 4
13
+ language_model.model.layers.12 4
14
+ language_model.model.layers.13 4
15
+ language_model.model.layers.14 4
16
+ language_model.model.layers.15 4
17
+ language_model.model.layers.16 4
18
+ language_model.model.layers.17 4
19
+ language_model.model.layers.18 4
20
+ language_model.model.layers.19 4
21
+ language_model.model.layers.20 4
22
+ language_model.model.layers.21 4
23
+ language_model.model.layers.22 4
24
+ language_model.model.layers.23 4
25
+ vision_model.encoder.layers.0 0
26
+ vision_model.encoder.layers.1 0
27
+ vision_model.encoder.layers.2 0
28
+ vision_model.encoder.layers.3 0
29
+ vision_model.encoder.layers.4 0
30
+ vision_model.encoder.layers.5 0
31
+ vision_model.encoder.layers.6 0
32
+ vision_model.encoder.layers.7 0
33
+ vision_model.encoder.layers.8 0
34
+ vision_model.encoder.layers.9 0
35
+ vision_model.encoder.layers.10 0
36
+ vision_model.encoder.layers.11 0
37
+ vision_model.encoder.layers.12 0
38
+ vision_model.encoder.layers.13 0
39
+ vision_model.encoder.layers.14 0
40
+ vision_model.encoder.layers.15 0
41
+ vision_model.encoder.layers.16 0
42
+ vision_model.encoder.layers.17 0
43
+ vision_model.encoder.layers.18 0
44
+ vision_model.encoder.layers.19 0
45
+ vision_model.encoder.layers.20 0
46
+ vision_model.encoder.layers.21 0
47
+ vision_model.encoder.layers.22 0
48
+ vision_model.encoder.layers.23 0
49
+ vision_model.embeddings 0
50
+ mlp1 0
51
+ language_model.model.tok_embeddings 4
52
+ language_model.model.norm 4
53
+ language_model.output 4
54
+ language_model.model.embed_tokens 4
55
+ language_model.lm_head 4
56
+ The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.
57
+ The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.
58
+ The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.
59
+ The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.
60
+ Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
61
+ Rank [3] Begin to eval model work_dirs/share_internvl/InternVL2-2B on task ActionPrediction, devices: {device(type='cuda', index=3), device(type='cuda', index=7)}
62
+ Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
63
+ Rank [0] Begin to eval model work_dirs/share_internvl/InternVL2-2B on task ActionPrediction, devices: {device(type='cuda', index=0), device(type='cuda', index=4)}
64
+ Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
65
+ Rank [2] Begin to eval model work_dirs/share_internvl/InternVL2-2B on task ActionPrediction, devices: {device(type='cuda', index=2), device(type='cuda', index=6)}
66
+ Initialization Finished
67
+ Predicting ActionPrediction Using internvl
68
+ Proceeding 36-length images samples | Num: 5
69
+ Initialization Finished
70
+ Predicting ActionPrediction Using internvl
71
+ Proceeding 36-length images samples | Num: 5
72
+ Initialization Finished
73
+ Predicting ActionPrediction Using internvl
74
+ Proceeding 36-length images samples | Num: 5
75
+ Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
76
+ Rank [1] Begin to eval model work_dirs/share_internvl/InternVL2-2B on task ActionPrediction, devices: {device(type='cuda', index=1), device(type='cuda', index=5)}
77
+ Initialization Finished
78
+ Predicting ActionPrediction Using internvl
79
+ Proceeding 36-length images samples | Num: 5
80
+
81
  0%| | 0/1 [00:00<?, ?it/s]
82
+ Proceeding 28-length images samples | Num: 9
83
+
84
  0%| | 0/1 [00:00<?, ?it/s]
85
+ Proceeding 28-length images samples | Num: 9
86
+
87
  0%| | 0/1 [00:00<?, ?it/s]
88
+ Proceeding 28-length images samples | Num: 9
89
+ Proceeding 28-length images samples | Num: 9
90
+
91
  0%| | 0/2 [00:00<?, ?it/s]
92
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:03<00:03, 3.09s/it]
93
+ Proceeding 29-length images samples | Num: 13
94
+
95
  0%| | 0/2 [00:00<?, ?it/s]
96
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:03<00:03, 3.25s/it]
97
+ Proceeding 29-length images samples | Num: 13
98
+
99
  0%| | 0/2 [00:00<?, ?it/s]
100
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:03<00:03, 3.30s/it]
101
+ Proceeding 29-length images samples | Num: 13
102
+ Proceeding 29-length images samples | Num: 13
103
+
104
  0%| | 0/3 [00:00<?, ?it/s]
105
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:01<00:02, 1.02s/it]
106
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:02<00:01, 1.46s/it]
107
+ Proceeding 32-length images samples | Num: 15
108
+
109
  0%| | 0/3 [00:00<?, ?it/s]
110
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:02<00:04, 2.05s/it]
111
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:02<00:01, 1.32s/it]
112
+ Proceeding 32-length images samples | Num: 15
113
+
114
  0%| | 0/3 [00:00<?, ?it/s]
115
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:02<00:04, 2.42s/it]
116
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:03<00:01, 1.50s/it]
117
+ Proceeding 32-length images samples | Num: 15
118
+ Proceeding 32-length images samples | Num: 15
119
+
120
  0%| | 0/3 [00:00<?, ?it/s]
121
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:03<00:06, 3.17s/it]
122
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:05<00:02, 2.79s/it]
123
+ Proceeding 31-length images samples | Num: 26
124
+
125
  0%| | 0/4 [00:00<?, ?it/s]
126
  25%|β–ˆβ–ˆβ–Œ | 1/4 [00:03<00:10, 3.63s/it]
127
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 2/4 [00:06<00:05, 2.99s/it]
128
  75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 3/4 [00:06<00:01, 1.99s/it]
129
+ Proceeding 31-length images samples | Num: 26
130
+
131
  0%| | 0/4 [00:00<?, ?it/s]
132
  25%|β–ˆβ–ˆβ–Œ | 1/4 [00:02<00:08, 2.83s/it]
133
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 2/4 [00:05<00:05, 2.80s/it]
134
  75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 3/4 [00:06<00:01, 1.96s/it]
135
+ Proceeding 31-length images samples | Num: 26
136
+ Proceeding 31-length images samples | Num: 26
137
+
138
  0%| | 0/6 [00:00<?, ?it/s]
139
  17%|β–ˆβ–‹ | 1/6 [00:01<00:06, 1.20s/it]
140
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 2/6 [00:02<00:05, 1.39s/it]
141
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 3/6 [00:03<00:03, 1.12s/it]
142
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 4/6 [00:04<00:02, 1.19s/it]
143
  83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5/6 [00:06<00:01, 1.39s/it]
144
+ Proceeding 34-length images samples | Num: 9
145
+
146
  0%| | 0/6 [00:00<?, ?it/s]
147
  17%|β–ˆβ–‹ | 1/6 [00:02<00:10, 2.03s/it]
148
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 2/6 [00:02<00:05, 1.34s/it]
149
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 3/6 [00:05<00:05, 1.92s/it]
150
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 4/6 [00:06<00:03, 1.58s/it]
151
  83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5/6 [00:09<00:01, 1.90s/it]
152
+ Proceeding 34-length images samples | Num: 9
153
+ Proceeding 37-length images samples | Num: 6
154
+
155
  0%| | 0/2 [00:00<?, ?it/s]
156
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:03<00:03, 3.00s/it]
157
+
158
  0%| | 0/1 [00:00<?, ?it/s]
159
+ Proceeding 44-length images samples | Num: 2
160
+
161
  0%| | 0/7 [00:00<?, ?it/s]
162
  14%|β–ˆβ– | 1/7 [00:02<00:17, 2.92s/it]
163
  29%|β–ˆβ–ˆβ–Š | 2/7 [00:03<00:08, 1.63s/it]
164
  43%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 3/7 [00:06<00:08, 2.21s/it]
165
  57%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 4/7 [00:09<00:07, 2.45s/it]
166
  71%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5/7 [00:10<00:03, 1.84s/it]
167
  86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 6/7 [00:13<00:02, 2.25s/it]
168
+ Proceeding 34-length images samples | Num: 9
169
+
170
+ Proceeding 23-length images samples | Num: 6
171
+
172
  0%| | 0/2 [00:00<?, ?it/s]
173
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:03<00:03, 3.73s/it]
174
+ Proceeding 37-length images samples | Num: 6
175
+ Proceeding 34-length images samples | Num: 9
176
+
177
  0%| | 0/1 [00:00<?, ?it/s]
178
+ Proceeding 56-length images samples | Num: 1
179
+
180
+ Proceeding 17-length images samples | Num: 3
181
+ Proceeding 33-length images samples | Num: 8
182
+
183
+
184
  0%| | 0/1 [00:00<?, ?it/s]
185
+ Proceeding 44-length images samples | Num: 2
186
+
187
+ Proceeding 23-length images samples | Num: 6
188
+
189
  0%| | 0/2 [00:00<?, ?it/s]
190
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:04<00:04, 4.15s/it]
191
+ Proceeding 37-length images samples | Num: 6
192
+
193
  0%| | 0/1 [00:00<?, ?it/s]
194
+ Proceeding 56-length images samples | Num: 1
195
+
196
+ Proceeding 17-length images samples | Num: 3
197
+ Proceeding 37-length images samples | Num: 6
198
+
199
  0%| | 0/2 [00:00<?, ?it/s]
200
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.37s/it]
201
+ Proceeding 24-length images samples | Num: 3
202
+
203
+ Proceeding 35-length images samples | Num: 5
204
+
205
  0%| | 0/1 [00:00<?, ?it/s]
206
+ Proceeding 33-length images samples | Num: 8
207
+
208
  0%| | 0/2 [00:00<?, ?it/s]
209
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:02<00:02, 2.49s/it]
210
+ Proceeding 44-length images samples | Num: 2
211
+
212
  0%| | 0/1 [00:00<?, ?it/s]
213
+ Proceeding 30-length images samples | Num: 46
214
+ Proceeding 44-length images samples | Num: 2
215
+
216
  0%| | 0/1 [00:00<?, ?it/s]
217
+ Proceeding 23-length images samples | Num: 6
218
+
219
  0%| | 0/2 [00:00<?, ?it/s]
220
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:02<00:02, 2.06s/it]
221
+ Proceeding 24-length images samples | Num: 3
222
+ Proceeding 23-length images samples | Num: 6
223
+
224
  0%| | 0/1 [00:00<?, ?it/s]
225
+ Proceeding 35-length images samples | Num: 5
226
+
227
  0%| | 0/2 [00:00<?, ?it/s]
228
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.72s/it]
229
+ Proceeding 56-length images samples | Num: 1
230
+
231
+ Proceeding 17-length images samples | Num: 3
232
+ Proceeding 56-length images samples | Num: 1
233
+
234
  0%| | 0/1 [00:00<?, ?it/s]
235
+ Proceeding 30-length images samples | Num: 46
236
+
237
  0%| | 0/1 [00:00<?, ?it/s]
238
+ Proceeding 33-length images samples | Num: 8
239
+ Proceeding 17-length images samples | Num: 3
240
+ Proceeding 33-length images samples | Num: 8
241
+
242
  0%| | 0/2 [00:00<?, ?it/s]
243
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:03<00:03, 3.55s/it]
244
+ Proceeding 24-length images samples | Num: 3
245
+
246
  0%| | 0/1 [00:00<?, ?it/s]
247
+ Proceeding 35-length images samples | Num: 5
248
+ Proceeding 24-length images samples | Num: 3
249
+
250
  0%| | 0/1 [00:00<?, ?it/s]
251
+ Proceeding 30-length images samples | Num: 46
252
+
253
  0%| | 0/11 [00:00<?, ?it/s]
254
  9%|β–‰ | 1/11 [00:01<00:11, 1.16s/it]
255
  18%|β–ˆβ–Š | 2/11 [00:02<00:12, 1.36s/it]
256
  27%|β–ˆβ–ˆβ–‹ | 3/11 [00:03<00:08, 1.09s/it]
257
  36%|β–ˆβ–ˆβ–ˆβ–‹ | 4/11 [00:04<00:06, 1.01it/s]
258
  45%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5/11 [00:06<00:08, 1.46s/it]
259
  55%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 6/11 [00:08<00:07, 1.46s/it]
260
  64%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 7/11 [00:10<00:06, 1.71s/it]
261
  73%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 8/11 [00:10<00:04, 1.40s/it]
262
  82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 9/11 [00:13<00:03, 1.84s/it]
263
  91%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 10/11 [00:15<00:01, 1.75s/it]
264
+ Proceeding 21-length images samples | Num: 2
265
+
266
+ Proceeding 41-length images samples | Num: 1
267
+ Proceeding 59-length images samples | Num: 1
268
+
269
+
270
+ Proceeding 22-length images samples | Num: 2
271
+ Proceeding 15-length images samples | Num: 3
272
+
273
+
274
+ Proceeding 38-length images samples | Num: 1
275
+
276
+ Proceeding 25-length images samples | Num: 2
277
+
278
+ Proceeding 20-length images samples | Num: 1
279
+ Proceeding 35-length images samples | Num: 5
280
+
281
+ Proceeding 39-length images samples | Num: 1
282
+ Proceeding 16-length images samples | Num: 4
283
+
284
+
285
  0%| | 0/1 [00:00<?, ?it/s]
286
+ Proceeding 43-length images samples | Num: 1
287
+
288
+ Proceeding 40-length images samples | Num: 2
289
+ Proceeding 27-length images samples | Num: 7
290
+
291
+ Proceeding 30-length images samples | Num: 46
292
+
293
  0%| | 0/11 [00:00<?, ?it/s]
294
  9%|β–‰ | 1/11 [00:03<00:36, 3.65s/it]
295
  18%|β–ˆβ–Š | 2/11 [00:05<00:23, 2.60s/it]
296
  27%|β–ˆβ–ˆβ–‹ | 3/11 [00:06<00:14, 1.78s/it]
297
  36%|β–ˆβ–ˆβ–ˆβ–‹ | 4/11 [00:08<00:13, 1.99s/it]
298
  45%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5/11 [00:09<00:10, 1.70s/it]
299
  55%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 6/11 [00:13<00:11, 2.28s/it]
300
  64%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 7/11 [00:13<00:07, 1.78s/it]
301
  73%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 8/11 [00:16<00:05, 1.94s/it]
302
  82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 9/11 [00:17<00:03, 1.71s/it]
303
  91%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 10/11 [00:18<00:01, 1.46s/it]
304
+ Proceeding 21-length images samples | Num: 2
305
+
306
+ Proceeding 41-length images samples | Num: 1
307
+
308
  0%| | 0/1 [00:00<?, ?it/s]
309
+ Proceeding 26-length images samples | Num: 1
310
+
311
+ Proceeding 59-length images samples | Num: 1
312
+
313
+ Proceeding 81-length images samples | Num: 1
314
+
315
+ Proceeding 22-length images samples | Num: 2
316
+
317
+ Proceeding 12-length images samples | Num: 2
318
+
319
+ Proceeding 46-length images samples | Num: 1
320
+
321
+ Proceeding 15-length images samples | Num: 3
322
+
323
+ Proceeding 18-length images samples | Num: 1
324
+
325
+ Proceeding 60-length images samples | Num: 1
326
+
327
+ Proceeding 45-length images samples | Num: 2
328
+
329
+ Proceeding 13-length images samples | Num: 1
330
+
331
  0%| | 0/1 [00:00<?, ?it/s]
332
+ Proceeding 38-length images samples | Num: 1
333
+
334
+ Proceeding 19-length images samples | Num: 1
335
+
336
+ Proceeding 25-length images samples | Num: 2
337
+
338
+ Proceeding 9-length images samples | Num: 1
339
+
340
+ Proceeding 20-length images samples | Num: 1
341
+
342
+ Proceeding 49-length images samples | Num: 1
343
+
344
+ Proceeding 39-length images samples | Num: 1
345
+
346
+ Proceeding 16-length images samples | Num: 4
347
+
348
+ Proceeding 54-length images samples | Num: 1
349
+
350
+ Proceeding 7-length images samples | Num: 1
351
+
352
+
353
  0%| | 0/1 [00:00<?, ?it/s]
354
+ Proceeding 43-length images samples | Num: 1
355
+ Proceeding 40-length images samples | Num: 2
356
+
357
+
358
+ Proceeding 27-length images samples | Num: 7
359
+
360
  0%| | 0/12 [00:00<?, ?it/s]
361
  8%|β–Š | 1/12 [00:02<00:27, 2.54s/it]
362
  17%|β–ˆβ–‹ | 2/12 [00:03<00:17, 1.76s/it]
363
  25%|β–ˆβ–ˆβ–Œ | 3/12 [00:04<00:12, 1.35s/it]
364
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 4/12 [00:05<00:09, 1.17s/it]
365
  42%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 5/12 [00:06<00:07, 1.06s/it]
366
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 6/12 [00:07<00:05, 1.03it/s]
367
  58%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 7/12 [00:08<00:05, 1.08s/it]
368
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 8/12 [00:09<00:03, 1.03it/s]
369
  75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 9/12 [00:09<00:02, 1.14it/s]
370
  83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 10/12 [00:11<00:02, 1.15s/it]
371
  92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 11/12 [00:12<00:01, 1.06s/it]
372
+ Proceeding 21-length images samples | Num: 2
373
+
374
  0%| | 0/2 [00:00<?, ?it/s]
375
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.94s/it]
376
+ Proceeding 26-length images samples | Num: 1
377
+
378
  0%| | 0/1 [00:00<?, ?it/s]
379
+ Proceeding 41-length images samples | Num: 1
380
+
381
+ Proceeding 81-length images samples | Num: 1
382
+
383
+ Proceeding 59-length images samples | Num: 1
384
+
385
+ Proceeding 12-length images samples | Num: 2
386
+
387
+ Proceeding 22-length images samples | Num: 2
388
+
389
+ Proceeding 46-length images samples | Num: 1
390
+
391
+ Proceeding 18-length images samples | Num: 1
392
+
393
+ Proceeding 60-length images samples | Num: 1
394
+
395
  0%| | 0/1 [00:00<?, ?it/s]
396
+ Proceeding 15-length images samples | Num: 3
397
+
398
+ Proceeding 45-length images samples | Num: 2
399
+ Proceeding 21-length images samples | Num: 2
400
+
401
+ Proceeding 13-length images samples | Num: 1
402
+
403
+ Proceeding 19-length images samples | Num: 1
404
+
405
  0%| | 0/1 [00:00<?, ?it/s]
406
+ Proceeding 38-length images samples | Num: 1
407
+
408
+ Proceeding 9-length images samples | Num: 1
409
+
410
+ Proceeding 25-length images samples | Num: 2
411
+ Proceeding 41-length images samples | Num: 1
412
+
413
+ Proceeding 49-length images samples | Num: 1
414
+
415
+ Proceeding 54-length images samples | Num: 1
416
+
417
+ Proceeding 7-length images samples | Num: 1
418
+
419
  0%| | 0/1 [00:00<?, ?it/s]
420
+ Proceeding 20-length images samples | Num: 1
421
+
422
+
423
+ Proceeding 39-length images samples | Num: 1
424
+ Proceeding 59-length images samples | Num: 1
425
+
426
+ Proceeding 16-length images samples | Num: 4
427
+
428
  0%| | 0/1 [00:00<?, ?it/s]
429
+ Proceeding 43-length images samples | Num: 1
430
+
431
+ Proceeding 40-length images samples | Num: 2
432
+ Proceeding 22-length images samples | Num: 2
433
+
434
  0%| | 0/1 [00:00<?, ?it/s]
435
+ Proceeding 27-length images samples | Num: 7
436
+ Proceeding 15-length images samples | Num: 3
437
+ Proceeding 38-length images samples | Num: 1
438
+
439
  0%| | 0/2 [00:00<?, ?it/s]
440
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.07s/it]
441
+ Proceeding 26-length images samples | Num: 1
442
+
443
+ Proceeding 81-length images samples | Num: 1
444
+
445
+ Proceeding 12-length images samples | Num: 2
446
+ Proceeding 25-length images samples | Num: 2
447
+
448
  0%| | 0/1 [00:00<?, ?it/s]
449
+ Proceeding 46-length images samples | Num: 1
450
+
451
+ Proceeding 18-length images samples | Num: 1
452
+ Proceeding 20-length images samples | Num: 1
453
+
454
+ Proceeding 60-length images samples | Num: 1
455
+
456
+ Proceeding 45-length images samples | Num: 2
457
+ Proceeding 39-length images samples | Num: 1
458
+
459
  0%| | 0/1 [00:00<?, ?it/s]
460
+ Proceeding 13-length images samples | Num: 1
461
+
462
+ Proceeding 19-length images samples | Num: 1
463
+ Proceeding 16-length images samples | Num: 4
464
+
465
+ Proceeding 9-length images samples | Num: 1
466
+
467
+ Proceeding 49-length images samples | Num: 1
468
+
469
+ Proceeding 54-length images samples | Num: 1
470
+ Proceeding 43-length images samples | Num: 1
471
+
472
+ Proceeding 7-length images samples | Num: 1
473
+
474
+ Proceeding 40-length images samples | Num: 2
475
+ Proceeding 27-length images samples | Num: 7
476
+ Proceeding 26-length images samples | Num: 1
477
+ Proceeding 81-length images samples | Num: 1
478
+ Proceeding 12-length images samples | Num: 2
479
+ Proceeding 46-length images samples | Num: 1
480
+ Proceeding 18-length images samples | Num: 1
481
+ Proceeding 60-length images samples | Num: 1
482
+ Proceeding 45-length images samples | Num: 2
483
+ Proceeding 13-length images samples | Num: 1
484
+ Proceeding 19-length images samples | Num: 1
485
+ Proceeding 9-length images samples | Num: 1
486
+ Proceeding 49-length images samples | Num: 1
487
+ Proceeding 54-length images samples | Num: 1
488
+ Proceeding 7-length images samples | Num: 1
489
+ evaluating ActionPrediction ...
490
+ Results saved to work_dirs/share_internvl/InternVL2-2B/eval_milebench/ActionPrediction/ActionPrediction_240803234615.json
491
+ python eval/milebench/evaluate.py --data-dir /mnt/inspurfs/share_data/wangweiyun/share_data/long-context-benchmark/MileBench/datasets--FreedomIntelligence--MileBench/snapshots/53c7a58051ef88bacf76541d91f03f5ba2d71e7d --dataset ActionPrediction --result-dir work_dirs/share_internvl/InternVL2-2B/eval_milebench/ActionPrediction
492
+ internvl: ActionPrediction: {'Accuracy': 0.755, 'image_quantity_level-Accuracy': {'Few': 0, 'Medium': 0.7555555555555555, 'Many': 0.7538461538461538}, 'image_quantity_level-Result': {'Few': [0, 0], 'Medium': [102, 135], 'Many': [49, 65]}}
eval_milebench/ActionPrediction/ActionPrediction_240803234615.json ADDED
The diff for this file is too large to render. See raw diff
 
eval_milebench/ActionPrediction/eval.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"Accuracy": 0.755, "image_quantity_level-Accuracy": {"Few": 0, "Medium": 0.7555555555555555, "Many": 0.7538461538461538}, "image_quantity_level-Result": {"Few": [0, 0], "Medium": [102, 135], "Many": [49, 65]}}
eval_milebench/ActionPrediction/eval_score.json ADDED
@@ -0,0 +1,802 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "id": "0",
4
+ "score": "1"
5
+ },
6
+ {
7
+ "id": "56",
8
+ "score": "1"
9
+ },
10
+ {
11
+ "id": "1",
12
+ "score": "1"
13
+ },
14
+ {
15
+ "id": "16",
16
+ "score": "0"
17
+ },
18
+ {
19
+ "id": "33",
20
+ "score": "1"
21
+ },
22
+ {
23
+ "id": "2",
24
+ "score": "1"
25
+ },
26
+ {
27
+ "id": "23",
28
+ "score": "1"
29
+ },
30
+ {
31
+ "id": "25",
32
+ "score": "1"
33
+ },
34
+ {
35
+ "id": "28",
36
+ "score": "1"
37
+ },
38
+ {
39
+ "id": "3",
40
+ "score": "1"
41
+ },
42
+ {
43
+ "id": "18",
44
+ "score": "0"
45
+ },
46
+ {
47
+ "id": "37",
48
+ "score": "1"
49
+ },
50
+ {
51
+ "id": "42",
52
+ "score": "1"
53
+ },
54
+ {
55
+ "id": "4",
56
+ "score": "1"
57
+ },
58
+ {
59
+ "id": "14",
60
+ "score": "1"
61
+ },
62
+ {
63
+ "id": "21",
64
+ "score": "1"
65
+ },
66
+ {
67
+ "id": "38",
68
+ "score": "1"
69
+ },
70
+ {
71
+ "id": "50",
72
+ "score": "1"
73
+ },
74
+ {
75
+ "id": "57",
76
+ "score": "1"
77
+ },
78
+ {
79
+ "id": "75",
80
+ "score": "0"
81
+ },
82
+ {
83
+ "id": "5",
84
+ "score": "1"
85
+ },
86
+ {
87
+ "id": "43",
88
+ "score": "1"
89
+ },
90
+ {
91
+ "id": "60",
92
+ "score": "1"
93
+ },
94
+ {
95
+ "id": "6",
96
+ "score": "0"
97
+ },
98
+ {
99
+ "id": "29",
100
+ "score": "1"
101
+ },
102
+ {
103
+ "id": "7",
104
+ "score": "1"
105
+ },
106
+ {
107
+ "id": "8",
108
+ "score": "1"
109
+ },
110
+ {
111
+ "id": "47",
112
+ "score": "1"
113
+ },
114
+ {
115
+ "id": "9",
116
+ "score": "1"
117
+ },
118
+ {
119
+ "id": "10",
120
+ "score": "1"
121
+ },
122
+ {
123
+ "id": "11",
124
+ "score": "1"
125
+ },
126
+ {
127
+ "id": "12",
128
+ "score": "1"
129
+ },
130
+ {
131
+ "id": "13",
132
+ "score": "0"
133
+ },
134
+ {
135
+ "id": "15",
136
+ "score": "0"
137
+ },
138
+ {
139
+ "id": "85",
140
+ "score": "0"
141
+ },
142
+ {
143
+ "id": "17",
144
+ "score": "0"
145
+ },
146
+ {
147
+ "id": "22",
148
+ "score": "1"
149
+ },
150
+ {
151
+ "id": "24",
152
+ "score": "1"
153
+ },
154
+ {
155
+ "id": "26",
156
+ "score": "0"
157
+ },
158
+ {
159
+ "id": "31",
160
+ "score": "1"
161
+ },
162
+ {
163
+ "id": "34",
164
+ "score": "1"
165
+ },
166
+ {
167
+ "id": "35",
168
+ "score": "1"
169
+ },
170
+ {
171
+ "id": "51",
172
+ "score": "0"
173
+ },
174
+ {
175
+ "id": "58",
176
+ "score": "1"
177
+ },
178
+ {
179
+ "id": "65",
180
+ "score": "1"
181
+ },
182
+ {
183
+ "id": "67",
184
+ "score": "1"
185
+ },
186
+ {
187
+ "id": "68",
188
+ "score": "1"
189
+ },
190
+ {
191
+ "id": "20",
192
+ "score": "0"
193
+ },
194
+ {
195
+ "id": "27",
196
+ "score": "1"
197
+ },
198
+ {
199
+ "id": "32",
200
+ "score": "1"
201
+ },
202
+ {
203
+ "id": "36",
204
+ "score": "1"
205
+ },
206
+ {
207
+ "id": "41",
208
+ "score": "1"
209
+ },
210
+ {
211
+ "id": "44",
212
+ "score": "0"
213
+ },
214
+ {
215
+ "id": "46",
216
+ "score": "1"
217
+ },
218
+ {
219
+ "id": "49",
220
+ "score": "1"
221
+ },
222
+ {
223
+ "id": "52",
224
+ "score": "1"
225
+ },
226
+ {
227
+ "id": "53",
228
+ "score": "1"
229
+ },
230
+ {
231
+ "id": "59",
232
+ "score": "0"
233
+ },
234
+ {
235
+ "id": "61",
236
+ "score": "1"
237
+ },
238
+ {
239
+ "id": "70",
240
+ "score": "1"
241
+ },
242
+ {
243
+ "id": "141",
244
+ "score": "1"
245
+ },
246
+ {
247
+ "id": "82",
248
+ "score": "1"
249
+ },
250
+ {
251
+ "id": "89",
252
+ "score": "1"
253
+ },
254
+ {
255
+ "id": "98",
256
+ "score": "1"
257
+ },
258
+ {
259
+ "id": "111",
260
+ "score": "1"
261
+ },
262
+ {
263
+ "id": "133",
264
+ "score": "1"
265
+ },
266
+ {
267
+ "id": "134",
268
+ "score": "1"
269
+ },
270
+ {
271
+ "id": "138",
272
+ "score": "0"
273
+ },
274
+ {
275
+ "id": "150",
276
+ "score": "1"
277
+ },
278
+ {
279
+ "id": "158",
280
+ "score": "1"
281
+ },
282
+ {
283
+ "id": "164",
284
+ "score": "1"
285
+ },
286
+ {
287
+ "id": "171",
288
+ "score": "0"
289
+ },
290
+ {
291
+ "id": "174",
292
+ "score": "1"
293
+ },
294
+ {
295
+ "id": "194",
296
+ "score": "0"
297
+ },
298
+ {
299
+ "id": "62",
300
+ "score": "0"
301
+ },
302
+ {
303
+ "id": "40",
304
+ "score": "1"
305
+ },
306
+ {
307
+ "id": "64",
308
+ "score": "1"
309
+ },
310
+ {
311
+ "id": "30",
312
+ "score": "1"
313
+ },
314
+ {
315
+ "id": "63",
316
+ "score": "1"
317
+ },
318
+ {
319
+ "id": "99",
320
+ "score": "1"
321
+ },
322
+ {
323
+ "id": "45",
324
+ "score": "0"
325
+ },
326
+ {
327
+ "id": "55",
328
+ "score": "1"
329
+ },
330
+ {
331
+ "id": "81",
332
+ "score": "0"
333
+ },
334
+ {
335
+ "id": "94",
336
+ "score": "0"
337
+ },
338
+ {
339
+ "id": "79",
340
+ "score": "1"
341
+ },
342
+ {
343
+ "id": "95",
344
+ "score": "0"
345
+ },
346
+ {
347
+ "id": "104",
348
+ "score": "1"
349
+ },
350
+ {
351
+ "id": "106",
352
+ "score": "0"
353
+ },
354
+ {
355
+ "id": "108",
356
+ "score": "1"
357
+ },
358
+ {
359
+ "id": "116",
360
+ "score": "1"
361
+ },
362
+ {
363
+ "id": "122",
364
+ "score": "1"
365
+ },
366
+ {
367
+ "id": "76",
368
+ "score": "1"
369
+ },
370
+ {
371
+ "id": "84",
372
+ "score": "1"
373
+ },
374
+ {
375
+ "id": "113",
376
+ "score": "1"
377
+ },
378
+ {
379
+ "id": "127",
380
+ "score": "1"
381
+ },
382
+ {
383
+ "id": "72",
384
+ "score": "0"
385
+ },
386
+ {
387
+ "id": "48",
388
+ "score": "0"
389
+ },
390
+ {
391
+ "id": "73",
392
+ "score": "0"
393
+ },
394
+ {
395
+ "id": "120",
396
+ "score": "1"
397
+ },
398
+ {
399
+ "id": "54",
400
+ "score": "1"
401
+ },
402
+ {
403
+ "id": "135",
404
+ "score": "1"
405
+ },
406
+ {
407
+ "id": "19",
408
+ "score": "1"
409
+ },
410
+ {
411
+ "id": "121",
412
+ "score": "0"
413
+ },
414
+ {
415
+ "id": "74",
416
+ "score": "1"
417
+ },
418
+ {
419
+ "id": "77",
420
+ "score": "1"
421
+ },
422
+ {
423
+ "id": "80",
424
+ "score": "1"
425
+ },
426
+ {
427
+ "id": "83",
428
+ "score": "1"
429
+ },
430
+ {
431
+ "id": "88",
432
+ "score": "0"
433
+ },
434
+ {
435
+ "id": "90",
436
+ "score": "0"
437
+ },
438
+ {
439
+ "id": "91",
440
+ "score": "0"
441
+ },
442
+ {
443
+ "id": "93",
444
+ "score": "0"
445
+ },
446
+ {
447
+ "id": "97",
448
+ "score": "0"
449
+ },
450
+ {
451
+ "id": "100",
452
+ "score": "1"
453
+ },
454
+ {
455
+ "id": "103",
456
+ "score": "1"
457
+ },
458
+ {
459
+ "id": "107",
460
+ "score": "1"
461
+ },
462
+ {
463
+ "id": "39",
464
+ "score": "1"
465
+ },
466
+ {
467
+ "id": "180",
468
+ "score": "0"
469
+ },
470
+ {
471
+ "id": "71",
472
+ "score": "1"
473
+ },
474
+ {
475
+ "id": "145",
476
+ "score": "1"
477
+ },
478
+ {
479
+ "id": "117",
480
+ "score": "1"
481
+ },
482
+ {
483
+ "id": "195",
484
+ "score": "1"
485
+ },
486
+ {
487
+ "id": "149",
488
+ "score": "0"
489
+ },
490
+ {
491
+ "id": "163",
492
+ "score": "1"
493
+ },
494
+ {
495
+ "id": "137",
496
+ "score": "0"
497
+ },
498
+ {
499
+ "id": "185",
500
+ "score": "1"
501
+ },
502
+ {
503
+ "id": "129",
504
+ "score": "0"
505
+ },
506
+ {
507
+ "id": "69",
508
+ "score": "1"
509
+ },
510
+ {
511
+ "id": "87",
512
+ "score": "1"
513
+ },
514
+ {
515
+ "id": "131",
516
+ "score": "0"
517
+ },
518
+ {
519
+ "id": "132",
520
+ "score": "1"
521
+ },
522
+ {
523
+ "id": "155",
524
+ "score": "1"
525
+ },
526
+ {
527
+ "id": "101",
528
+ "score": "1"
529
+ },
530
+ {
531
+ "id": "102",
532
+ "score": "1"
533
+ },
534
+ {
535
+ "id": "112",
536
+ "score": "1"
537
+ },
538
+ {
539
+ "id": "118",
540
+ "score": "1"
541
+ },
542
+ {
543
+ "id": "126",
544
+ "score": "0"
545
+ },
546
+ {
547
+ "id": "128",
548
+ "score": "0"
549
+ },
550
+ {
551
+ "id": "130",
552
+ "score": "1"
553
+ },
554
+ {
555
+ "id": "148",
556
+ "score": "0"
557
+ },
558
+ {
559
+ "id": "152",
560
+ "score": "1"
561
+ },
562
+ {
563
+ "id": "153",
564
+ "score": "1"
565
+ },
566
+ {
567
+ "id": "86",
568
+ "score": "1"
569
+ },
570
+ {
571
+ "id": "96",
572
+ "score": "1"
573
+ },
574
+ {
575
+ "id": "146",
576
+ "score": "1"
577
+ },
578
+ {
579
+ "id": "78",
580
+ "score": "1"
581
+ },
582
+ {
583
+ "id": "144",
584
+ "score": "1"
585
+ },
586
+ {
587
+ "id": "151",
588
+ "score": "1"
589
+ },
590
+ {
591
+ "id": "161",
592
+ "score": "1"
593
+ },
594
+ {
595
+ "id": "66",
596
+ "score": "1"
597
+ },
598
+ {
599
+ "id": "140",
600
+ "score": "1"
601
+ },
602
+ {
603
+ "id": "109",
604
+ "score": "1"
605
+ },
606
+ {
607
+ "id": "110",
608
+ "score": "0"
609
+ },
610
+ {
611
+ "id": "114",
612
+ "score": "1"
613
+ },
614
+ {
615
+ "id": "119",
616
+ "score": "1"
617
+ },
618
+ {
619
+ "id": "123",
620
+ "score": "0"
621
+ },
622
+ {
623
+ "id": "124",
624
+ "score": "0"
625
+ },
626
+ {
627
+ "id": "125",
628
+ "score": "0"
629
+ },
630
+ {
631
+ "id": "136",
632
+ "score": "0"
633
+ },
634
+ {
635
+ "id": "142",
636
+ "score": "1"
637
+ },
638
+ {
639
+ "id": "143",
640
+ "score": "1"
641
+ },
642
+ {
643
+ "id": "154",
644
+ "score": "0"
645
+ },
646
+ {
647
+ "id": "182",
648
+ "score": "1"
649
+ },
650
+ {
651
+ "id": "139",
652
+ "score": "1"
653
+ },
654
+ {
655
+ "id": "179",
656
+ "score": "1"
657
+ },
658
+ {
659
+ "id": "186",
660
+ "score": "1"
661
+ },
662
+ {
663
+ "id": "160",
664
+ "score": "1"
665
+ },
666
+ {
667
+ "id": "92",
668
+ "score": "1"
669
+ },
670
+ {
671
+ "id": "199",
672
+ "score": "0"
673
+ },
674
+ {
675
+ "id": "157",
676
+ "score": "1"
677
+ },
678
+ {
679
+ "id": "183",
680
+ "score": "0"
681
+ },
682
+ {
683
+ "id": "191",
684
+ "score": "1"
685
+ },
686
+ {
687
+ "id": "147",
688
+ "score": "1"
689
+ },
690
+ {
691
+ "id": "167",
692
+ "score": "1"
693
+ },
694
+ {
695
+ "id": "187",
696
+ "score": "1"
697
+ },
698
+ {
699
+ "id": "168",
700
+ "score": "1"
701
+ },
702
+ {
703
+ "id": "170",
704
+ "score": "1"
705
+ },
706
+ {
707
+ "id": "173",
708
+ "score": "1"
709
+ },
710
+ {
711
+ "id": "176",
712
+ "score": "1"
713
+ },
714
+ {
715
+ "id": "177",
716
+ "score": "1"
717
+ },
718
+ {
719
+ "id": "190",
720
+ "score": "1"
721
+ },
722
+ {
723
+ "id": "115",
724
+ "score": "0"
725
+ },
726
+ {
727
+ "id": "169",
728
+ "score": "1"
729
+ },
730
+ {
731
+ "id": "192",
732
+ "score": "1"
733
+ },
734
+ {
735
+ "id": "105",
736
+ "score": "1"
737
+ },
738
+ {
739
+ "id": "165",
740
+ "score": "1"
741
+ },
742
+ {
743
+ "id": "198",
744
+ "score": "1"
745
+ },
746
+ {
747
+ "id": "197",
748
+ "score": "1"
749
+ },
750
+ {
751
+ "id": "156",
752
+ "score": "1"
753
+ },
754
+ {
755
+ "id": "159",
756
+ "score": "1"
757
+ },
758
+ {
759
+ "id": "162",
760
+ "score": "1"
761
+ },
762
+ {
763
+ "id": "166",
764
+ "score": "1"
765
+ },
766
+ {
767
+ "id": "172",
768
+ "score": "1"
769
+ },
770
+ {
771
+ "id": "175",
772
+ "score": "1"
773
+ },
774
+ {
775
+ "id": "178",
776
+ "score": "0"
777
+ },
778
+ {
779
+ "id": "181",
780
+ "score": "1"
781
+ },
782
+ {
783
+ "id": "184",
784
+ "score": "1"
785
+ },
786
+ {
787
+ "id": "188",
788
+ "score": "1"
789
+ },
790
+ {
791
+ "id": "193",
792
+ "score": "1"
793
+ },
794
+ {
795
+ "id": "189",
796
+ "score": "1"
797
+ },
798
+ {
799
+ "id": "196",
800
+ "score": "1"
801
+ }
802
+ ]
eval_milebench/ActionPrediction/pred.json ADDED
The diff for this file is too large to render. See raw diff
 
eval_milebench/ActionPrediction/pred_with_extracted.json ADDED
The diff for this file is too large to render. See raw diff
 
eval_milebench/ActionSequence.log ADDED
@@ -0,0 +1,338 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0
  0%| | 0/1 [00:00<?, ?it/s]
 
 
1
  0%| | 0/1 [00:00<?, ?it/s]
 
 
2
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
3
  0%| | 0/6 [00:00<?, ?it/s]
4
  17%|β–ˆβ–‹ | 1/6 [00:03<00:18, 3.68s/it]
5
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 2/6 [00:04<00:07, 2.00s/it]
6
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 3/6 [00:06<00:06, 2.15s/it]
7
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 4/6 [00:08<00:03, 1.79s/it]
8
  83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5/6 [00:09<00:01, 1.58s/it]
 
 
9
  0%| | 0/6 [00:00<?, ?it/s]
10
  17%|β–ˆβ–‹ | 1/6 [00:03<00:19, 3.97s/it]
11
  33%|οΏ½οΏ½οΏ½β–ˆβ–ˆβ–Ž | 2/6 [00:06<00:12, 3.08s/it]
12
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 3/6 [00:07<00:06, 2.06s/it]
13
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 4/6 [00:08<00:03, 1.67s/it]
14
  83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5/6 [00:10<00:01, 1.95s/it]
 
 
15
  0%| | 0/6 [00:00<?, ?it/s]
16
  17%|β–ˆβ–‹ | 1/6 [00:04<00:20, 4.02s/it]
17
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 2/6 [00:06<00:12, 3.12s/it]
18
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 3/6 [00:07<00:06, 2.22s/it]
19
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 4/6 [00:08<00:03, 1.67s/it]
20
  83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5/6 [00:10<00:01, 1.93s/it]
 
 
 
21
  0%| | 0/3 [00:00<?, ?it/s]
22
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:01<00:02, 1.19s/it]
23
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:04<00:02, 2.73s/it]
 
 
24
  0%| | 0/3 [00:00<?, ?it/s]
25
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:02<00:05, 2.72s/it]
26
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:04<00:02, 2.21s/it]
 
 
27
  0%| | 0/3 [00:00<?, ?it/s]
28
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:04<00:08, 4.09s/it]
29
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:06<00:03, 3.01s/it]
 
 
30
  0%| | 0/1 [00:00<?, ?it/s]
 
 
31
  0%| | 0/1 [00:00<?, ?it/s]
 
 
32
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
33
  0%| | 0/1 [00:00<?, ?it/s]
 
 
34
  0%| | 0/1 [00:00<?, ?it/s]
 
 
35
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
 
 
 
36
  0%| | 0/1 [00:00<?, ?it/s]
 
 
37
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
 
 
 
38
  0%| | 0/3 [00:00<?, ?it/s]
39
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:03<00:06, 3.44s/it]
40
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:04<00:01, 1.81s/it]
 
 
 
41
  0%| | 0/1 [00:00<?, ?it/s]
 
 
42
  0%| | 0/3 [00:00<?, ?it/s]
43
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:04<00:08, 4.03s/it]
44
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:04<00:02, 2.13s/it]
 
 
45
  0%| | 0/1 [00:00<?, ?it/s]
 
 
46
  0%| | 0/4 [00:00<?, ?it/s]
47
  25%|β–ˆβ–ˆβ–Œ | 1/4 [00:04<00:12, 4.02s/it]
48
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 2/4 [00:04<00:04, 2.22s/it]
49
  75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 3/4 [00:05<00:01, 1.54s/it]
 
 
 
 
50
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
51
  0%| | 0/1 [00:00<?, ?it/s]
 
 
52
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
 
53
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
 
 
 
54
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
 
55
  0%| | 0/9 [00:00<?, ?it/s]
56
  11%|β–ˆ | 1/9 [00:02<00:22, 2.81s/it]
57
  22%|β–ˆβ–ˆβ– | 2/9 [00:03<00:11, 1.63s/it]
58
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 3/9 [00:04<00:08, 1.46s/it]
59
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 4/9 [00:05<00:05, 1.19s/it]
60
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5/9 [00:07<00:06, 1.57s/it]
61
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 6/9 [00:10<00:06, 2.02s/it]
62
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 7/9 [00:13<00:04, 2.29s/it]
63
  89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 8/9 [00:14<00:01, 1.80s/it]
 
 
 
 
 
64
  0%| | 0/1 [00:00<?, ?it/s]
 
65
  0%| | 0/9 [00:00<?, ?it/s]
66
  11%|β–ˆ | 1/9 [00:01<00:13, 1.67s/it]
67
  22%|β–ˆβ–ˆβ– | 2/9 [00:04<00:14, 2.08s/it]
68
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 3/9 [00:04<00:08, 1.48s/it]
69
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 4/9 [00:07<00:09, 1.91s/it]
70
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5/9 [00:10<00:09, 2.29s/it]
71
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 6/9 [00:12<00:06, 2.09s/it]
72
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 7/9 [00:13<00:03, 1.76s/it]
73
  89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 8/9 [00:13<00:01, 1.46s/it]
 
 
 
 
74
  0%| | 0/1 [00:00<?, ?it/s]
 
 
75
  0%| | 0/2 [00:00<?, ?it/s]
76
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.46s/it]
 
 
 
 
77
  0%| | 0/9 [00:00<?, ?it/s]
78
  11%|β–ˆ | 1/9 [00:02<00:23, 2.89s/it]
79
  22%|β–ˆβ–ˆβ– | 2/9 [00:05<00:19, 2.84s/it]
80
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 3/9 [00:07<00:13, 2.28s/it]
81
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 4/9 [00:09<00:10, 2.09s/it]
82
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5/9 [00:11<00:08, 2.17s/it]
83
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 6/9 [00:12<00:05, 1.72s/it]
84
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 7/9 [00:14<00:03, 1.81s/it]
85
  89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 8/9 [00:15<00:01, 1.61s/it]
 
 
 
 
86
  0%| | 0/2 [00:00<?, ?it/s]
87
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.41s/it]
 
 
 
 
 
 
88
  0%| | 0/1 [00:00<?, ?it/s]
 
 
89
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
90
  0%| | 0/2 [00:00<?, ?it/s]
91
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.74s/it]
 
 
92
  0%| | 0/1 [00:00<?, ?it/s]
 
 
93
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
 
94
  0%| | 0/1 [00:00<?, ?it/s]
 
 
95
  0%| | 0/2 [00:00<?, ?it/s]
96
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.29s/it]
 
 
 
 
 
97
  0%| | 0/2 [00:00<?, ?it/s]
98
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.57s/it]
 
 
 
99
  0%| | 0/1 [00:00<?, ?it/s]
 
100
  0%| | 0/1 [00:00<?, ?it/s]
 
 
101
  0%| | 0/1 [00:00<?, ?it/s]
 
 
102
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
 
 
103
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
104
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
 
 
 
105
  0%| | 0/2 [00:00<?, ?it/s]
106
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:03<00:03, 3.21s/it]
 
 
107
  0%| | 0/2 [00:00<?, ?it/s]
108
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:02<00:02, 2.92s/it]
 
 
 
 
109
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
 
 
 
 
 
110
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
 
 
111
  0%| | 0/2 [00:00<?, ?it/s]
112
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:02<00:02, 2.74s/it]
 
 
 
 
 
113
  0%| | 0/1 [00:00<?, ?it/s]
 
114
  0%| | 0/1 [00:00<?, ?it/s]
 
 
115
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
116
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
117
  0%| | 0/2 [00:00<?, ?it/s]
118
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:02<00:02, 2.32s/it]
 
 
 
 
 
 
 
 
 
119
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
 
 
120
  0%| | 0/2 [00:00<?, ?it/s]
121
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.38s/it]
 
 
 
122
  0%| | 0/1 [00:00<?, ?it/s]
 
 
123
  0%| | 0/1 [00:00<?, ?it/s]
 
 
124
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
 
125
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
 
 
126
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
127
  0%| | 0/2 [00:00<?, ?it/s]
128
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.03s/it]
 
 
 
 
129
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
130
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
131
  0%| | 0/1 [00:00<?, ?it/s]
 
 
132
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
133
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ language_model.model.layers.0 4
2
+ language_model.model.layers.1 4
3
+ language_model.model.layers.2 4
4
+ language_model.model.layers.3 4
5
+ language_model.model.layers.4 4
6
+ language_model.model.layers.5 4
7
+ language_model.model.layers.6 4
8
+ language_model.model.layers.7 4
9
+ language_model.model.layers.8 4
10
+ language_model.model.layers.9 4
11
+ language_model.model.layers.10 4
12
+ language_model.model.layers.11 4
13
+ language_model.model.layers.12 4
14
+ language_model.model.layers.13 4
15
+ language_model.model.layers.14 4
16
+ language_model.model.layers.15 4
17
+ language_model.model.layers.16 4
18
+ language_model.model.layers.17 4
19
+ language_model.model.layers.18 4
20
+ language_model.model.layers.19 4
21
+ language_model.model.layers.20 4
22
+ language_model.model.layers.21 4
23
+ language_model.model.layers.22 4
24
+ language_model.model.layers.23 4
25
+ vision_model.encoder.layers.0 0
26
+ vision_model.encoder.layers.1 0
27
+ vision_model.encoder.layers.2 0
28
+ vision_model.encoder.layers.3 0
29
+ vision_model.encoder.layers.4 0
30
+ vision_model.encoder.layers.5 0
31
+ vision_model.encoder.layers.6 0
32
+ vision_model.encoder.layers.7 0
33
+ vision_model.encoder.layers.8 0
34
+ vision_model.encoder.layers.9 0
35
+ vision_model.encoder.layers.10 0
36
+ vision_model.encoder.layers.11 0
37
+ vision_model.encoder.layers.12 0
38
+ vision_model.encoder.layers.13 0
39
+ vision_model.encoder.layers.14 0
40
+ vision_model.encoder.layers.15 0
41
+ vision_model.encoder.layers.16 0
42
+ vision_model.encoder.layers.17 0
43
+ vision_model.encoder.layers.18 0
44
+ vision_model.encoder.layers.19 0
45
+ vision_model.encoder.layers.20 0
46
+ vision_model.encoder.layers.21 0
47
+ vision_model.encoder.layers.22 0
48
+ vision_model.encoder.layers.23 0
49
+ vision_model.embeddings 0
50
+ mlp1 0
51
+ language_model.model.tok_embeddings 4
52
+ language_model.model.norm 4
53
+ language_model.output 4
54
+ language_model.model.embed_tokens 4
55
+ language_model.lm_head 4
56
+ The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.
57
+ The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.
58
+ The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.
59
+ The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.
60
+ Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
61
+ Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
62
+ Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
63
+ Rank [2] Begin to eval model work_dirs/share_internvl/InternVL2-2B on task ActionSequence, devices: {device(type='cuda', index=2), device(type='cuda', index=6)}
64
+ Rank [0] Begin to eval model work_dirs/share_internvl/InternVL2-2B on task ActionSequence, devices: {device(type='cuda', index=0), device(type='cuda', index=4)}
65
+ Rank [3] Begin to eval model work_dirs/share_internvl/InternVL2-2B on task ActionSequence, devices: {device(type='cuda', index=3), device(type='cuda', index=7)}
66
+ Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
67
+ Rank [1] Begin to eval model work_dirs/share_internvl/InternVL2-2B on task ActionSequence, devices: {device(type='cuda', index=1), device(type='cuda', index=5)}
68
+ Initialization Finished
69
+ Predicting ActionSequence Using internvl
70
+ Proceeding 44-length images samples | Num: 5
71
+ Initialization Finished
72
+ Predicting ActionSequence Using internvl
73
+ Proceeding 44-length images samples | Num: 5
74
+ Initialization Finished
75
+ Predicting ActionSequence Using internvl
76
+ Proceeding 44-length images samples | Num: 5
77
+ Initialization Finished
78
+ Predicting ActionSequence Using internvl
79
+ Proceeding 44-length images samples | Num: 5
80
+
81
  0%| | 0/1 [00:00<?, ?it/s]
82
+ Proceeding 31-length images samples | Num: 24
83
+
84
  0%| | 0/1 [00:00<?, ?it/s]
85
+ Proceeding 31-length images samples | Num: 24
86
+
87
  0%| | 0/1 [00:00<?, ?it/s]
88
+ Proceeding 31-length images samples | Num: 24
89
+ Proceeding 31-length images samples | Num: 24
90
+
91
  0%| | 0/6 [00:00<?, ?it/s]
92
  17%|β–ˆβ–‹ | 1/6 [00:03<00:18, 3.68s/it]
93
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 2/6 [00:04<00:07, 2.00s/it]
94
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 3/6 [00:06<00:06, 2.15s/it]
95
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 4/6 [00:08<00:03, 1.79s/it]
96
  83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5/6 [00:09<00:01, 1.58s/it]
97
+ Proceeding 32-length images samples | Num: 13
98
+
99
  0%| | 0/6 [00:00<?, ?it/s]
100
  17%|β–ˆβ–‹ | 1/6 [00:03<00:19, 3.97s/it]
101
  33%|οΏ½οΏ½οΏ½β–ˆβ–ˆβ–Ž | 2/6 [00:06<00:12, 3.08s/it]
102
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 3/6 [00:07<00:06, 2.06s/it]
103
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 4/6 [00:08<00:03, 1.67s/it]
104
  83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5/6 [00:10<00:01, 1.95s/it]
105
+ Proceeding 32-length images samples | Num: 13
106
+
107
  0%| | 0/6 [00:00<?, ?it/s]
108
  17%|β–ˆβ–‹ | 1/6 [00:04<00:20, 4.02s/it]
109
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 2/6 [00:06<00:12, 3.12s/it]
110
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 3/6 [00:07<00:06, 2.22s/it]
111
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 4/6 [00:08<00:03, 1.67s/it]
112
  83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5/6 [00:10<00:01, 1.93s/it]
113
+ Proceeding 32-length images samples | Num: 13
114
+ Proceeding 32-length images samples | Num: 13
115
+
116
  0%| | 0/3 [00:00<?, ?it/s]
117
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:01<00:02, 1.19s/it]
118
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:04<00:02, 2.73s/it]
119
+ Proceeding 16-length images samples | Num: 4
120
+
121
  0%| | 0/3 [00:00<?, ?it/s]
122
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:02<00:05, 2.72s/it]
123
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:04<00:02, 2.21s/it]
124
+ Proceeding 16-length images samples | Num: 4
125
+
126
  0%| | 0/3 [00:00<?, ?it/s]
127
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:04<00:08, 4.09s/it]
128
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:06<00:03, 3.01s/it]
129
+ Proceeding 16-length images samples | Num: 4
130
+
131
  0%| | 0/1 [00:00<?, ?it/s]
132
+ Proceeding 45-length images samples | Num: 4
133
+
134
  0%| | 0/1 [00:00<?, ?it/s]
135
+ Proceeding 45-length images samples | Num: 4
136
+
137
  0%| | 0/1 [00:00<?, ?it/s]
138
+ Proceeding 45-length images samples | Num: 4
139
+ Proceeding 16-length images samples | Num: 4
140
+ Proceeding 45-length images samples | Num: 4
141
+
142
  0%| | 0/1 [00:00<?, ?it/s]
143
+ Proceeding 39-length images samples | Num: 3
144
+
145
  0%| | 0/1 [00:00<?, ?it/s]
146
+ Proceeding 39-length images samples | Num: 3
147
+
148
  0%| | 0/1 [00:00<?, ?it/s]
149
+ Proceeding 39-length images samples | Num: 3
150
+ Proceeding 39-length images samples | Num: 3
151
+
152
+ Proceeding 69-length images samples | Num: 1
153
+
154
+ Proceeding 27-length images samples | Num: 14
155
+
156
  0%| | 0/1 [00:00<?, ?it/s]
157
+ Proceeding 69-length images samples | Num: 1
158
+
159
  0%| | 0/1 [00:00<?, ?it/s]
160
+ Proceeding 69-length images samples | Num: 1
161
+ Proceeding 69-length images samples | Num: 1
162
+
163
+ Proceeding 27-length images samples | Num: 14
164
+
165
+ Proceeding 27-length images samples | Num: 14
166
+
167
  0%| | 0/3 [00:00<?, ?it/s]
168
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:03<00:06, 3.44s/it]
169
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:04<00:01, 1.81s/it]
170
+ Proceeding 38-length images samples | Num: 4
171
+ Proceeding 27-length images samples | Num: 14
172
+
173
  0%| | 0/1 [00:00<?, ?it/s]
174
+ Proceeding 21-length images samples | Num: 5
175
+
176
  0%| | 0/3 [00:00<?, ?it/s]
177
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:04<00:08, 4.03s/it]
178
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:04<00:02, 2.13s/it]
179
+ Proceeding 38-length images samples | Num: 4
180
+
181
  0%| | 0/1 [00:00<?, ?it/s]
182
+ Proceeding 8-length images samples | Num: 1
183
+
184
  0%| | 0/4 [00:00<?, ?it/s]
185
  25%|β–ˆβ–ˆβ–Œ | 1/4 [00:04<00:12, 4.02s/it]
186
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 2/4 [00:04<00:04, 2.22s/it]
187
  75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 3/4 [00:05<00:01, 1.54s/it]
188
+ Proceeding 38-length images samples | Num: 4
189
+
190
+ Proceeding 13-length images samples | Num: 2
191
+
192
  0%| | 0/1 [00:00<?, ?it/s]
193
+ Proceeding 21-length images samples | Num: 5
194
+
195
+ Proceeding 30-length images samples | Num: 37
196
+
197
  0%| | 0/1 [00:00<?, ?it/s]
198
+ Proceeding 21-length images samples | Num: 5
199
+
200
  0%| | 0/1 [00:00<?, ?it/s]
201
+ Proceeding 8-length images samples | Num: 1
202
+ Proceeding 38-length images samples | Num: 4
203
+
204
+ Proceeding 13-length images samples | Num: 2
205
+
206
  0%| | 0/1 [00:00<?, ?it/s]
207
+ Proceeding 8-length images samples | Num: 1
208
+
209
+ Proceeding 30-length images samples | Num: 37
210
+ Proceeding 13-length images samples | Num: 2
211
+
212
+ Proceeding 21-length images samples | Num: 5
213
+
214
  0%| | 0/1 [00:00<?, ?it/s]
215
+ Proceeding 30-length images samples | Num: 37
216
+ Proceeding 8-length images samples | Num: 1
217
+ Proceeding 13-length images samples | Num: 2
218
+ Proceeding 30-length images samples | Num: 37
219
+
220
  0%| | 0/9 [00:00<?, ?it/s]
221
  11%|β–ˆ | 1/9 [00:02<00:22, 2.81s/it]
222
  22%|β–ˆβ–ˆβ– | 2/9 [00:03<00:11, 1.63s/it]
223
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 3/9 [00:04<00:08, 1.46s/it]
224
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 4/9 [00:05<00:05, 1.19s/it]
225
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5/9 [00:07<00:06, 1.57s/it]
226
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 6/9 [00:10<00:06, 2.02s/it]
227
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 7/9 [00:13<00:04, 2.29s/it]
228
  89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 8/9 [00:14<00:01, 1.80s/it]
229
+ Proceeding 36-length images samples | Num: 2
230
+
231
+ Proceeding 24-length images samples | Num: 4
232
+ Proceeding 34-length images samples | Num: 8
233
+
234
  0%| | 0/1 [00:00<?, ?it/s]
235
+
236
  0%| | 0/9 [00:00<?, ?it/s]
237
  11%|β–ˆ | 1/9 [00:01<00:13, 1.67s/it]
238
  22%|β–ˆβ–ˆβ– | 2/9 [00:04<00:14, 2.08s/it]
239
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 3/9 [00:04<00:08, 1.48s/it]
240
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 4/9 [00:07<00:09, 1.91s/it]
241
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5/9 [00:10<00:09, 2.29s/it]
242
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 6/9 [00:12<00:06, 2.09s/it]
243
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 7/9 [00:13<00:03, 1.76s/it]
244
  89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 8/9 [00:13<00:01, 1.46s/it]
245
+ Proceeding 36-length images samples | Num: 2
246
+ Proceeding 24-length images samples | Num: 4
247
+
248
+
249
  0%| | 0/1 [00:00<?, ?it/s]
250
+ Proceeding 34-length images samples | Num: 8
251
+
252
  0%| | 0/2 [00:00<?, ?it/s]
253
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.46s/it]
254
+ Proceeding 11-length images samples | Num: 2
255
+
256
+ Proceeding 15-length images samples | Num: 2
257
+
258
  0%| | 0/9 [00:00<?, ?it/s]
259
  11%|β–ˆ | 1/9 [00:02<00:23, 2.89s/it]
260
  22%|β–ˆβ–ˆβ– | 2/9 [00:05<00:19, 2.84s/it]
261
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 3/9 [00:07<00:13, 2.28s/it]
262
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 4/9 [00:09<00:10, 2.09s/it]
263
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5/9 [00:11<00:08, 2.17s/it]
264
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 6/9 [00:12<00:05, 1.72s/it]
265
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 7/9 [00:14<00:03, 1.81s/it]
266
  89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 8/9 [00:15<00:01, 1.61s/it]
267
+ Proceeding 36-length images samples | Num: 2
268
+
269
+ Proceeding 28-length images samples | Num: 7
270
+
271
  0%| | 0/2 [00:00<?, ?it/s]
272
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.41s/it]
273
+ Proceeding 11-length images samples | Num: 2
274
+ Proceeding 15-length images samples | Num: 2
275
+
276
+
277
+ Proceeding 28-length images samples | Num: 7
278
+
279
  0%| | 0/1 [00:00<?, ?it/s]
280
+ Proceeding 29-length images samples | Num: 7
281
+
282
  0%| | 0/1 [00:00<?, ?it/s]
283
+ Proceeding 24-length images samples | Num: 4
284
+ Proceeding 36-length images samples | Num: 2
285
+
286
  0%| | 0/2 [00:00<?, ?it/s]
287
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.74s/it]
288
+ Proceeding 29-length images samples | Num: 7
289
+
290
  0%| | 0/1 [00:00<?, ?it/s]
291
+ Proceeding 34-length images samples | Num: 8
292
+
293
  0%| | 0/1 [00:00<?, ?it/s]
294
+ Proceeding 25-length images samples | Num: 3
295
+ Proceeding 19-length images samples | Num: 4
296
+
297
+ Proceeding 24-length images samples | Num: 4
298
+
299
  0%| | 0/1 [00:00<?, ?it/s]
300
+ Proceeding 51-length images samples | Num: 1
301
+
302
  0%| | 0/2 [00:00<?, ?it/s]
303
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.29s/it]
304
+ Proceeding 25-length images samples | Num: 3
305
+
306
+ Proceeding 35-length images samples | Num: 7
307
+ Proceeding 34-length images samples | Num: 8
308
+
309
  0%| | 0/2 [00:00<?, ?it/s]
310
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.57s/it]
311
+ Proceeding 11-length images samples | Num: 2
312
+ Proceeding 19-length images samples | Num: 4
313
+
314
  0%| | 0/1 [00:00<?, ?it/s]
315
+
316
  0%| | 0/1 [00:00<?, ?it/s]
317
+ Proceeding 15-length images samples | Num: 2
318
+
319
  0%| | 0/1 [00:00<?, ?it/s]
320
+ Proceeding 51-length images samples | Num: 1
321
+
322
  0%| | 0/1 [00:00<?, ?it/s]
323
+ Proceeding 40-length images samples | Num: 2
324
+
325
+ Proceeding 35-length images samples | Num: 7
326
+
327
+ Proceeding 33-length images samples | Num: 5
328
+
329
  0%| | 0/1 [00:00<?, ?it/s]
330
+ Proceeding 28-length images samples | Num: 7
331
+ Proceeding 11-length images samples | Num: 2
332
+ Proceeding 15-length images samples | Num: 2
333
+
334
  0%| | 0/1 [00:00<?, ?it/s]
335
+ Proceeding 60-length images samples | Num: 1
336
+ Proceeding 28-length images samples | Num: 7
337
+
338
+ Proceeding 49-length images samples | Num: 2
339
+
340
+ Proceeding 23-length images samples | Num: 7
341
+
342
  0%| | 0/2 [00:00<?, ?it/s]
343
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:03<00:03, 3.21s/it]
344
+ Proceeding 40-length images samples | Num: 2
345
+
346
  0%| | 0/2 [00:00<?, ?it/s]
347
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:02<00:02, 2.92s/it]
348
+ Proceeding 29-length images samples | Num: 7
349
+
350
+ Proceeding 33-length images samples | Num: 5
351
+
352
  0%| | 0/1 [00:00<?, ?it/s]
353
+ Proceeding 70-length images samples | Num: 1
354
+ Proceeding 37-length images samples | Num: 3
355
+
356
+
357
+ Proceeding 22-length images samples | Num: 2
358
+
359
+ Proceeding 20-length images samples | Num: 2
360
+ Proceeding 29-length images samples | Num: 7
361
+
362
  0%| | 0/1 [00:00<?, ?it/s]
363
+ Proceeding 60-length images samples | Num: 1
364
+
365
+ Proceeding 17-length images samples | Num: 5
366
+
367
+ Proceeding 49-length images samples | Num: 2
368
+
369
  0%| | 0/2 [00:00<?, ?it/s]
370
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:02<00:02, 2.74s/it]
371
+ Proceeding 25-length images samples | Num: 3
372
+
373
+ Proceeding 23-length images samples | Num: 7
374
+ Proceeding 18-length images samples | Num: 4
375
+
376
  0%| | 0/1 [00:00<?, ?it/s]
377
+
378
  0%| | 0/1 [00:00<?, ?it/s]
379
+ Proceeding 19-length images samples | Num: 4
380
+
381
  0%| | 0/1 [00:00<?, ?it/s]
382
+ Proceeding 50-length images samples | Num: 1
383
+ Proceeding 25-length images samples | Num: 3
384
+
385
  0%| | 0/1 [00:00<?, ?it/s]
386
+ Proceeding 51-length images samples | Num: 1
387
+
388
+ Proceeding 26-length images samples | Num: 1
389
+
390
  0%| | 0/2 [00:00<?, ?it/s]
391
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:02<00:02, 2.32s/it]
392
+ Proceeding 70-length images samples | Num: 1
393
+
394
+ Proceeding 35-length images samples | Num: 7
395
+
396
+
397
+ Proceeding 37-length images samples | Num: 3
398
+ Proceeding 19-length images samples | Num: 4
399
+ Proceeding 51-length images samples | Num: 1
400
+
401
  0%| | 0/1 [00:00<?, ?it/s]
402
+ Proceeding 22-length images samples | Num: 2
403
+
404
+ Proceeding 20-length images samples | Num: 2
405
+
406
+ Proceeding 17-length images samples | Num: 5
407
+
408
  0%| | 0/2 [00:00<?, ?it/s]
409
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.38s/it]
410
+ Proceeding 40-length images samples | Num: 2
411
+ Proceeding 35-length images samples | Num: 7
412
+
413
  0%| | 0/1 [00:00<?, ?it/s]
414
+ Proceeding 18-length images samples | Num: 4
415
+
416
  0%| | 0/1 [00:00<?, ?it/s]
417
+ Proceeding 33-length images samples | Num: 5
418
+
419
  0%| | 0/1 [00:00<?, ?it/s]
420
+ Proceeding 50-length images samples | Num: 1
421
+
422
+ Proceeding 26-length images samples | Num: 1
423
+
424
+
425
  0%| | 0/1 [00:00<?, ?it/s]
426
+ Proceeding 60-length images samples | Num: 1
427
+ Proceeding 40-length images samples | Num: 2
428
+
429
+ Proceeding 49-length images samples | Num: 2
430
+ Proceeding 33-length images samples | Num: 5
431
+
432
  0%| | 0/1 [00:00<?, ?it/s]
433
+ Proceeding 23-length images samples | Num: 7
434
+ Proceeding 60-length images samples | Num: 1
435
+
436
  0%| | 0/2 [00:00<?, ?it/s]
437
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.03s/it]
438
+ Proceeding 70-length images samples | Num: 1
439
+
440
+ Proceeding 37-length images samples | Num: 3
441
+
442
  0%| | 0/1 [00:00<?, ?it/s]
443
+ Proceeding 22-length images samples | Num: 2
444
+ Proceeding 49-length images samples | Num: 2
445
+
446
  0%| | 0/1 [00:00<?, ?it/s]
447
+ Proceeding 20-length images samples | Num: 2
448
+ Proceeding 23-length images samples | Num: 7
449
+
450
  0%| | 0/1 [00:00<?, ?it/s]
451
+ Proceeding 17-length images samples | Num: 5
452
+
453
  0%| | 0/1 [00:00<?, ?it/s]
454
+ Proceeding 18-length images samples | Num: 4
455
+ Proceeding 70-length images samples | Num: 1
456
+
457
  0%| | 0/1 [00:00<?, ?it/s]
458
+ Proceeding 50-length images samples | Num: 1
459
+
460
+ Proceeding 26-length images samples | Num: 1
461
+
462
+ Proceeding 37-length images samples | Num: 3
463
+ Proceeding 22-length images samples | Num: 2
464
+ Proceeding 20-length images samples | Num: 2
465
+ Proceeding 17-length images samples | Num: 5
466
+ Proceeding 18-length images samples | Num: 4
467
+ Proceeding 50-length images samples | Num: 1
468
+ Proceeding 26-length images samples | Num: 1
469
+ evaluating ActionSequence ...
470
+ Results saved to work_dirs/share_internvl/InternVL2-2B/eval_milebench/ActionSequence/ActionSequence_240803234618.json
471
+ python eval/milebench/evaluate.py --data-dir /mnt/inspurfs/share_data/wangweiyun/share_data/long-context-benchmark/MileBench/datasets--FreedomIntelligence--MileBench/snapshots/53c7a58051ef88bacf76541d91f03f5ba2d71e7d --dataset ActionSequence --result-dir work_dirs/share_internvl/InternVL2-2B/eval_milebench/ActionSequence
472
+ internvl: ActionSequence: {'Accuracy': 0.715, 'image_quantity_level-Accuracy': {'Few': 0, 'Medium': 0.7153284671532847, 'Many': 0.7142857142857143}, 'image_quantity_level-Result': {'Few': [0, 0], 'Medium': [98, 137], 'Many': [45, 63]}}
eval_milebench/ActionSequence/ActionSequence_240803234618.json ADDED
The diff for this file is too large to render. See raw diff
 
eval_milebench/ActionSequence/eval.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"Accuracy": 0.715, "image_quantity_level-Accuracy": {"Few": 0, "Medium": 0.7153284671532847, "Many": 0.7142857142857143}, "image_quantity_level-Result": {"Few": [0, 0], "Medium": [98, 137], "Many": [45, 63]}}
eval_milebench/ActionSequence/eval_score.json ADDED
@@ -0,0 +1,802 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "id": "0",
4
+ "score": "1"
5
+ },
6
+ {
7
+ "id": "36",
8
+ "score": "1"
9
+ },
10
+ {
11
+ "id": "1",
12
+ "score": "1"
13
+ },
14
+ {
15
+ "id": "4",
16
+ "score": "1"
17
+ },
18
+ {
19
+ "id": "13",
20
+ "score": "1"
21
+ },
22
+ {
23
+ "id": "22",
24
+ "score": "1"
25
+ },
26
+ {
27
+ "id": "24",
28
+ "score": "1"
29
+ },
30
+ {
31
+ "id": "33",
32
+ "score": "0"
33
+ },
34
+ {
35
+ "id": "2",
36
+ "score": "1"
37
+ },
38
+ {
39
+ "id": "14",
40
+ "score": "1"
41
+ },
42
+ {
43
+ "id": "44",
44
+ "score": "0"
45
+ },
46
+ {
47
+ "id": "82",
48
+ "score": "1"
49
+ },
50
+ {
51
+ "id": "3",
52
+ "score": "0"
53
+ },
54
+ {
55
+ "id": "5",
56
+ "score": "1"
57
+ },
58
+ {
59
+ "id": "6",
60
+ "score": "1"
61
+ },
62
+ {
63
+ "id": "7",
64
+ "score": "1"
65
+ },
66
+ {
67
+ "id": "8",
68
+ "score": "1"
69
+ },
70
+ {
71
+ "id": "23",
72
+ "score": "0"
73
+ },
74
+ {
75
+ "id": "26",
76
+ "score": "0"
77
+ },
78
+ {
79
+ "id": "47",
80
+ "score": "1"
81
+ },
82
+ {
83
+ "id": "9",
84
+ "score": "1"
85
+ },
86
+ {
87
+ "id": "10",
88
+ "score": "1"
89
+ },
90
+ {
91
+ "id": "41",
92
+ "score": "1"
93
+ },
94
+ {
95
+ "id": "11",
96
+ "score": "0"
97
+ },
98
+ {
99
+ "id": "12",
100
+ "score": "1"
101
+ },
102
+ {
103
+ "id": "15",
104
+ "score": "0"
105
+ },
106
+ {
107
+ "id": "18",
108
+ "score": "1"
109
+ },
110
+ {
111
+ "id": "19",
112
+ "score": "0"
113
+ },
114
+ {
115
+ "id": "28",
116
+ "score": "1"
117
+ },
118
+ {
119
+ "id": "29",
120
+ "score": "1"
121
+ },
122
+ {
123
+ "id": "34",
124
+ "score": "1"
125
+ },
126
+ {
127
+ "id": "38",
128
+ "score": "1"
129
+ },
130
+ {
131
+ "id": "48",
132
+ "score": "0"
133
+ },
134
+ {
135
+ "id": "51",
136
+ "score": "0"
137
+ },
138
+ {
139
+ "id": "58",
140
+ "score": "0"
141
+ },
142
+ {
143
+ "id": "16",
144
+ "score": "1"
145
+ },
146
+ {
147
+ "id": "17",
148
+ "score": "1"
149
+ },
150
+ {
151
+ "id": "20",
152
+ "score": "1"
153
+ },
154
+ {
155
+ "id": "31",
156
+ "score": "1"
157
+ },
158
+ {
159
+ "id": "21",
160
+ "score": "1"
161
+ },
162
+ {
163
+ "id": "25",
164
+ "score": "1"
165
+ },
166
+ {
167
+ "id": "30",
168
+ "score": "1"
169
+ },
170
+ {
171
+ "id": "35",
172
+ "score": "1"
173
+ },
174
+ {
175
+ "id": "32",
176
+ "score": "1"
177
+ },
178
+ {
179
+ "id": "66",
180
+ "score": "1"
181
+ },
182
+ {
183
+ "id": "37",
184
+ "score": "1"
185
+ },
186
+ {
187
+ "id": "40",
188
+ "score": "0"
189
+ },
190
+ {
191
+ "id": "42",
192
+ "score": "0"
193
+ },
194
+ {
195
+ "id": "49",
196
+ "score": "1"
197
+ },
198
+ {
199
+ "id": "62",
200
+ "score": "1"
201
+ },
202
+ {
203
+ "id": "50",
204
+ "score": "1"
205
+ },
206
+ {
207
+ "id": "52",
208
+ "score": "1"
209
+ },
210
+ {
211
+ "id": "64",
212
+ "score": "1"
213
+ },
214
+ {
215
+ "id": "53",
216
+ "score": "1"
217
+ },
218
+ {
219
+ "id": "56",
220
+ "score": "1"
221
+ },
222
+ {
223
+ "id": "59",
224
+ "score": "1"
225
+ },
226
+ {
227
+ "id": "105",
228
+ "score": "1"
229
+ },
230
+ {
231
+ "id": "75",
232
+ "score": "0"
233
+ },
234
+ {
235
+ "id": "79",
236
+ "score": "0"
237
+ },
238
+ {
239
+ "id": "106",
240
+ "score": "0"
241
+ },
242
+ {
243
+ "id": "117",
244
+ "score": "1"
245
+ },
246
+ {
247
+ "id": "134",
248
+ "score": "1"
249
+ },
250
+ {
251
+ "id": "139",
252
+ "score": "1"
253
+ },
254
+ {
255
+ "id": "135",
256
+ "score": "1"
257
+ },
258
+ {
259
+ "id": "163",
260
+ "score": "1"
261
+ },
262
+ {
263
+ "id": "196",
264
+ "score": "1"
265
+ },
266
+ {
267
+ "id": "131",
268
+ "score": "1"
269
+ },
270
+ {
271
+ "id": "45",
272
+ "score": "1"
273
+ },
274
+ {
275
+ "id": "46",
276
+ "score": "1"
277
+ },
278
+ {
279
+ "id": "55",
280
+ "score": "0"
281
+ },
282
+ {
283
+ "id": "68",
284
+ "score": "1"
285
+ },
286
+ {
287
+ "id": "73",
288
+ "score": "1"
289
+ },
290
+ {
291
+ "id": "74",
292
+ "score": "0"
293
+ },
294
+ {
295
+ "id": "87",
296
+ "score": "1"
297
+ },
298
+ {
299
+ "id": "107",
300
+ "score": "1"
301
+ },
302
+ {
303
+ "id": "108",
304
+ "score": "1"
305
+ },
306
+ {
307
+ "id": "78",
308
+ "score": "0"
309
+ },
310
+ {
311
+ "id": "109",
312
+ "score": "1"
313
+ },
314
+ {
315
+ "id": "63",
316
+ "score": "0"
317
+ },
318
+ {
319
+ "id": "57",
320
+ "score": "1"
321
+ },
322
+ {
323
+ "id": "60",
324
+ "score": "1"
325
+ },
326
+ {
327
+ "id": "90",
328
+ "score": "1"
329
+ },
330
+ {
331
+ "id": "104",
332
+ "score": "1"
333
+ },
334
+ {
335
+ "id": "39",
336
+ "score": "1"
337
+ },
338
+ {
339
+ "id": "67",
340
+ "score": "1"
341
+ },
342
+ {
343
+ "id": "188",
344
+ "score": "1"
345
+ },
346
+ {
347
+ "id": "61",
348
+ "score": "1"
349
+ },
350
+ {
351
+ "id": "69",
352
+ "score": "1"
353
+ },
354
+ {
355
+ "id": "72",
356
+ "score": "1"
357
+ },
358
+ {
359
+ "id": "89",
360
+ "score": "0"
361
+ },
362
+ {
363
+ "id": "92",
364
+ "score": "1"
365
+ },
366
+ {
367
+ "id": "93",
368
+ "score": "0"
369
+ },
370
+ {
371
+ "id": "96",
372
+ "score": "1"
373
+ },
374
+ {
375
+ "id": "102",
376
+ "score": "1"
377
+ },
378
+ {
379
+ "id": "103",
380
+ "score": "0"
381
+ },
382
+ {
383
+ "id": "76",
384
+ "score": "1"
385
+ },
386
+ {
387
+ "id": "27",
388
+ "score": "1"
389
+ },
390
+ {
391
+ "id": "43",
392
+ "score": "0"
393
+ },
394
+ {
395
+ "id": "77",
396
+ "score": "0"
397
+ },
398
+ {
399
+ "id": "65",
400
+ "score": "1"
401
+ },
402
+ {
403
+ "id": "157",
404
+ "score": "1"
405
+ },
406
+ {
407
+ "id": "86",
408
+ "score": "0"
409
+ },
410
+ {
411
+ "id": "148",
412
+ "score": "0"
413
+ },
414
+ {
415
+ "id": "80",
416
+ "score": "1"
417
+ },
418
+ {
419
+ "id": "81",
420
+ "score": "1"
421
+ },
422
+ {
423
+ "id": "112",
424
+ "score": "1"
425
+ },
426
+ {
427
+ "id": "88",
428
+ "score": "1"
429
+ },
430
+ {
431
+ "id": "97",
432
+ "score": "1"
433
+ },
434
+ {
435
+ "id": "125",
436
+ "score": "1"
437
+ },
438
+ {
439
+ "id": "180",
440
+ "score": "1"
441
+ },
442
+ {
443
+ "id": "94",
444
+ "score": "1"
445
+ },
446
+ {
447
+ "id": "156",
448
+ "score": "1"
449
+ },
450
+ {
451
+ "id": "146",
452
+ "score": "1"
453
+ },
454
+ {
455
+ "id": "158",
456
+ "score": "1"
457
+ },
458
+ {
459
+ "id": "121",
460
+ "score": "1"
461
+ },
462
+ {
463
+ "id": "193",
464
+ "score": "1"
465
+ },
466
+ {
467
+ "id": "141",
468
+ "score": "0"
469
+ },
470
+ {
471
+ "id": "145",
472
+ "score": "1"
473
+ },
474
+ {
475
+ "id": "151",
476
+ "score": "0"
477
+ },
478
+ {
479
+ "id": "143",
480
+ "score": "1"
481
+ },
482
+ {
483
+ "id": "100",
484
+ "score": "1"
485
+ },
486
+ {
487
+ "id": "113",
488
+ "score": "1"
489
+ },
490
+ {
491
+ "id": "115",
492
+ "score": "0"
493
+ },
494
+ {
495
+ "id": "127",
496
+ "score": "1"
497
+ },
498
+ {
499
+ "id": "132",
500
+ "score": "1"
501
+ },
502
+ {
503
+ "id": "138",
504
+ "score": "1"
505
+ },
506
+ {
507
+ "id": "118",
508
+ "score": "0"
509
+ },
510
+ {
511
+ "id": "124",
512
+ "score": "1"
513
+ },
514
+ {
515
+ "id": "130",
516
+ "score": "1"
517
+ },
518
+ {
519
+ "id": "126",
520
+ "score": "1"
521
+ },
522
+ {
523
+ "id": "152",
524
+ "score": "1"
525
+ },
526
+ {
527
+ "id": "116",
528
+ "score": "0"
529
+ },
530
+ {
531
+ "id": "120",
532
+ "score": "0"
533
+ },
534
+ {
535
+ "id": "123",
536
+ "score": "1"
537
+ },
538
+ {
539
+ "id": "133",
540
+ "score": "1"
541
+ },
542
+ {
543
+ "id": "54",
544
+ "score": "1"
545
+ },
546
+ {
547
+ "id": "70",
548
+ "score": "0"
549
+ },
550
+ {
551
+ "id": "110",
552
+ "score": "1"
553
+ },
554
+ {
555
+ "id": "114",
556
+ "score": "0"
557
+ },
558
+ {
559
+ "id": "119",
560
+ "score": "0"
561
+ },
562
+ {
563
+ "id": "122",
564
+ "score": "1"
565
+ },
566
+ {
567
+ "id": "128",
568
+ "score": "1"
569
+ },
570
+ {
571
+ "id": "137",
572
+ "score": "0"
573
+ },
574
+ {
575
+ "id": "144",
576
+ "score": "1"
577
+ },
578
+ {
579
+ "id": "147",
580
+ "score": "1"
581
+ },
582
+ {
583
+ "id": "159",
584
+ "score": "1"
585
+ },
586
+ {
587
+ "id": "71",
588
+ "score": "1"
589
+ },
590
+ {
591
+ "id": "83",
592
+ "score": "1"
593
+ },
594
+ {
595
+ "id": "84",
596
+ "score": "0"
597
+ },
598
+ {
599
+ "id": "150",
600
+ "score": "1"
601
+ },
602
+ {
603
+ "id": "174",
604
+ "score": "0"
605
+ },
606
+ {
607
+ "id": "85",
608
+ "score": "1"
609
+ },
610
+ {
611
+ "id": "101",
612
+ "score": "1"
613
+ },
614
+ {
615
+ "id": "198",
616
+ "score": "0"
617
+ },
618
+ {
619
+ "id": "99",
620
+ "score": "0"
621
+ },
622
+ {
623
+ "id": "142",
624
+ "score": "1"
625
+ },
626
+ {
627
+ "id": "185",
628
+ "score": "1"
629
+ },
630
+ {
631
+ "id": "111",
632
+ "score": "1"
633
+ },
634
+ {
635
+ "id": "171",
636
+ "score": "1"
637
+ },
638
+ {
639
+ "id": "189",
640
+ "score": "0"
641
+ },
642
+ {
643
+ "id": "155",
644
+ "score": "0"
645
+ },
646
+ {
647
+ "id": "164",
648
+ "score": "1"
649
+ },
650
+ {
651
+ "id": "168",
652
+ "score": "1"
653
+ },
654
+ {
655
+ "id": "199",
656
+ "score": "1"
657
+ },
658
+ {
659
+ "id": "153",
660
+ "score": "1"
661
+ },
662
+ {
663
+ "id": "161",
664
+ "score": "1"
665
+ },
666
+ {
667
+ "id": "175",
668
+ "score": "1"
669
+ },
670
+ {
671
+ "id": "182",
672
+ "score": "0"
673
+ },
674
+ {
675
+ "id": "184",
676
+ "score": "1"
677
+ },
678
+ {
679
+ "id": "191",
680
+ "score": "0"
681
+ },
682
+ {
683
+ "id": "136",
684
+ "score": "1"
685
+ },
686
+ {
687
+ "id": "166",
688
+ "score": "0"
689
+ },
690
+ {
691
+ "id": "172",
692
+ "score": "0"
693
+ },
694
+ {
695
+ "id": "129",
696
+ "score": "0"
697
+ },
698
+ {
699
+ "id": "165",
700
+ "score": "0"
701
+ },
702
+ {
703
+ "id": "140",
704
+ "score": "1"
705
+ },
706
+ {
707
+ "id": "154",
708
+ "score": "0"
709
+ },
710
+ {
711
+ "id": "173",
712
+ "score": "1"
713
+ },
714
+ {
715
+ "id": "167",
716
+ "score": "0"
717
+ },
718
+ {
719
+ "id": "91",
720
+ "score": "1"
721
+ },
722
+ {
723
+ "id": "160",
724
+ "score": "0"
725
+ },
726
+ {
727
+ "id": "162",
728
+ "score": "0"
729
+ },
730
+ {
731
+ "id": "169",
732
+ "score": "1"
733
+ },
734
+ {
735
+ "id": "170",
736
+ "score": "1"
737
+ },
738
+ {
739
+ "id": "176",
740
+ "score": "1"
741
+ },
742
+ {
743
+ "id": "179",
744
+ "score": "1"
745
+ },
746
+ {
747
+ "id": "187",
748
+ "score": "1"
749
+ },
750
+ {
751
+ "id": "190",
752
+ "score": "1"
753
+ },
754
+ {
755
+ "id": "192",
756
+ "score": "1"
757
+ },
758
+ {
759
+ "id": "95",
760
+ "score": "1"
761
+ },
762
+ {
763
+ "id": "98",
764
+ "score": "0"
765
+ },
766
+ {
767
+ "id": "194",
768
+ "score": "0"
769
+ },
770
+ {
771
+ "id": "183",
772
+ "score": "1"
773
+ },
774
+ {
775
+ "id": "178",
776
+ "score": "1"
777
+ },
778
+ {
779
+ "id": "177",
780
+ "score": "0"
781
+ },
782
+ {
783
+ "id": "197",
784
+ "score": "1"
785
+ },
786
+ {
787
+ "id": "149",
788
+ "score": "0"
789
+ },
790
+ {
791
+ "id": "195",
792
+ "score": "1"
793
+ },
794
+ {
795
+ "id": "181",
796
+ "score": "1"
797
+ },
798
+ {
799
+ "id": "186",
800
+ "score": "1"
801
+ }
802
+ ]
eval_milebench/ActionSequence/pred.json ADDED
The diff for this file is too large to render. See raw diff
 
eval_milebench/ActionSequence/pred_with_extracted.json ADDED
The diff for this file is too large to render. See raw diff
 
eval_milebench/CLEVR-Change.log ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0
  0%| | 0/50 [00:00<?, ?it/s]
1
  2%|▏ | 1/50 [00:02<02:03, 2.53s/it]
2
  4%|▍ | 2/50 [00:03<01:16, 1.58s/it]
3
  6%|β–Œ | 3/50 [00:04<00:59, 1.26s/it]
4
  8%|β–Š | 4/50 [00:05<00:51, 1.11s/it]
5
  10%|β–ˆ | 5/50 [00:06<00:46, 1.03s/it]
6
  12%|β–ˆβ– | 6/50 [00:06<00:43, 1.02it/s]
7
  14%|β–ˆβ– | 7/50 [00:07<00:40, 1.05it/s]
8
  16%|β–ˆβ–Œ | 8/50 [00:08<00:39, 1.05it/s]
9
  18%|β–ˆβ–Š | 9/50 [00:09<00:37, 1.08it/s]
10
  20%|β–ˆβ–ˆ | 10/50 [00:10<00:36, 1.10it/s]
11
  22%|β–ˆβ–ˆβ– | 11/50 [00:11<00:34, 1.12it/s]
12
  24%|β–ˆβ–ˆβ– | 12/50 [00:12<00:33, 1.13it/s]
13
  26%|β–ˆβ–ˆβ–Œ | 13/50 [00:13<00:32, 1.13it/s]
14
  28%|β–ˆβ–ˆβ–Š | 14/50 [00:14<00:31, 1.14it/s]
15
  30%|β–ˆβ–ˆβ–ˆ | 15/50 [00:14<00:30, 1.14it/s]
16
  32%|β–ˆβ–ˆβ–ˆβ– | 16/50 [00:15<00:29, 1.14it/s]
17
  34%|β–ˆβ–ˆβ–ˆβ– | 17/50 [00:16<00:28, 1.14it/s]
18
  36%|β–ˆβ–ˆβ–ˆβ–Œ | 18/50 [00:17<00:28, 1.14it/s]
19
  38%|β–ˆβ–ˆβ–ˆβ–Š | 19/50 [00:18<00:27,
20
  0%| | 0/50 [00:00<?, ?it/s]
21
  2%|▏ | 1/50 [00:02<02:09, 2.64s/it]
22
  4%|▍ | 2/50 [00:03<01:18, 1.63s/it]
23
  6%|β–Œ | 3/50 [00:04<01:00, 1.28s/it]
24
  8%|β–Š | 4/50 [00:05<00:51, 1.12s/it]
25
  10%|β–ˆ | 5/50 [00:06<00:46, 1.04s/it]
26
  12%|���▏ | 6/50 [00:07<00:43, 1.02it/s]
27
  14%|β–ˆβ– | 7/50 [00:07<00:40, 1.06it/s]
28
  16%|β–ˆβ–Œ | 8/50 [00:08<00:39, 1.05it/s]
29
  18%|β–ˆβ–Š | 9/50 [00:09<00:38, 1.08it/s]
30
  20%|β–ˆβ–ˆ | 10/50 [00:10<00:36, 1.10it/s]
31
  22%|β–ˆβ–ˆβ– | 11/50 [00:11<00:35, 1.11it/s]
32
  24%|β–ˆβ–ˆβ– | 12/50 [00:12<00:34, 1.12it/s]
33
  26%|β–ˆβ–ˆβ–Œ | 13/50 [00:13<00:32, 1.12it/s]
34
  28%|β–ˆβ–ˆβ–Š | 14/50 [00:14<00:31, 1.13it/s]
35
  30%|β–ˆβ–ˆβ–ˆ | 15/50 [00:15<00:31, 1.13it/s]
36
  32%|β–ˆβ–ˆβ–ˆβ– | 16/50 [00:15<00:30, 1.13it/s]
37
  34%|β–ˆβ–ˆβ–ˆβ– | 17/50 [00:16<00:29, 1.13it/s]
38
  36%|β–ˆβ–ˆβ–ˆβ–Œ | 18/50 [00:17<00:28, 1.13it/s]
39
  38%|β–ˆβ–ˆβ–ˆβ–Š | 19/50 [00:18<00:27,
40
  0%| | 0/50 [00:00<?, ?it/s]
41
  2%|▏ | 1/50 [00:02<02:12, 2.70s/it]
42
  4%|▍ | 2/50 [00:03<01:19, 1.65s/it]
43
  6%|β–Œ | 3/50 [00:04<01:01, 1.31s/it]
44
  8%|β–Š | 4/50 [00:05<00:52, 1.15s/it]
45
  10%|β–ˆ | 5/50 [00:06<00:47, 1.06s/it]
46
  12%|β–ˆβ– | 6/50 [00:07<00:44, 1.01s/it]
47
  14%|β–ˆβ– | 7/50 [00:08<00:41, 1.03it/s]
48
  16%|β–ˆβ–Œ | 8/50 [00:09<00:39, 1.06it/s]
49
  18%|β–ˆβ–Š | 9/50 [00:09<00:38, 1.07it/s]
50
  20%|β–ˆβ–ˆ | 10/50 [00:10<00:36, 1.08it/s]
51
  22%|β–ˆβ–ˆβ– | 11/50 [00:11<00:35, 1.09it/s]
52
  24%|β–ˆβ–ˆβ– | 12/50 [00:12<00:34, 1.10it/s]
53
  26%|β–ˆβ–ˆβ–Œ | 13/50 [00:13<00:33, 1.11it/s]
54
  28%|β–ˆβ–ˆβ–Š | 14/50 [00:14<00:32, 1.11it/s]
55
  30%|β–ˆβ–ˆβ–ˆ | 15/50 [00:15<00:31, 1.11it/s]
56
  32%|β–ˆβ–ˆβ–ˆβ– | 16/50 [00:16<00:30, 1.11it/s]
57
  34%|β–ˆβ–ˆβ–ˆβ– | 17/50 [00:17<00:29, 1.12it/s]
58
  36%|β–ˆβ–ˆβ–ˆβ–Œ | 18/50 [00:17<00:28, 1.12it/s]
59
  38%|β–ˆβ–ˆβ–ˆβ–Š | 19/50 [00:18<00:27, 1.13it/s]
60
  40%|β–ˆβ–ˆβ–ˆβ–ˆ | 20/50 [00:19<00:26, 1.13it/s]
61
  42%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 21/50 [00:20<00:25, 1.14it/s]
62
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 22/50 [00:21<00:24, 1.14it/s]
63
  46%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 23/50 [00:21<00:23, 1.14it/s]
64
  48%|β–ˆβ–ˆβ–ˆβ–ˆβ–Š | 24/50 [00:22<00:22, 1.14it/s]
65
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 25/50 [00:23<00:21, 1.14it/s]
66
  52%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 26/50 [00:24<00:21, 1.14it/s]
67
  54%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 27/50 [00:25<00:20, 1.14it/s]
68
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 28/50 [00:26<00:19, 1.13it/s]
69
  58%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 29/50 [00:27<00:18, 1.13it/s]
70
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 30/50 [00:28<00:17, 1.13it/s]
71
  62%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 31/50 [00:28<00:16, 1.13it/s]
72
  64%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 32/50 [00:29<00:15, 1.13it/s]
73
  66%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 33/50 [00:30<00:14, 1.14it/s]
74
  68%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 34/50 [00:31<00:13, 1.14it/s]
75
  70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 35/50 [00:32<00:13, 1.15it/s]
76
  72%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 36/50 [00:33<00:12, 1.1 1.13it/s]
77
  40%|β–ˆβ–ˆβ–ˆβ–ˆ | 20/50 [00:19<00:26, 1.13it/s]
78
  42%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 21/50 [00:20<00:25, 1.14it/s]
79
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 22/50 [00:21<00:24, 1.14it/s]
80
  46%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 23/50 [00:22<00:23, 1.14it/s]
81
  48%|β–ˆβ–ˆβ–ˆβ–ˆβ–Š | 24/50 [00:22<00:22, 1.14it/s]
82
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 25/50 [00:23<00:21, 1.14it/s]
83
  52%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 26/50 [00:24<00:21, 1.14it/s]
84
  54%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 27/50 [00:25<00:20, 1.14it/s]
85
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 28/50 [00:26<00:19, 1.14it/s]
86
  58%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 29/50 [00:27<00:18, 1.14it/s]
87
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 30/50 [00:28<00:17, 1.14it/s]
88
  62%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 31/50 [00:29<00:16, 1.14it/s]
89
  64%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 32/50 [00:29<00:15, 1.15it/s]
90
  66%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 33/50 [00:30<00:14, 1.15it/s]
91
  68%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 34/50 [00:31<00:13, 1.15it/s]
92
  70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 35/50 [00:32<00:13, 1.15it/s]
93
  72%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 36/50 [00:33<00:12, 1.1 1.12it/s]
94
  40%|β–ˆβ–ˆβ–ˆβ–ˆ | 20/50 [00:19<00:26, 1.12it/s]
95
  42%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 21/50 [00:20<00:25, 1.13it/s]
96
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 22/50 [00:21<00:24, 1.13it/s]
97
  46%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 23/50 [00:22<00:23, 1.13it/s]
98
  48%|β–ˆβ–ˆβ–ˆβ–ˆβ–Š | 24/50 [00:23<00:22, 1.13it/s]
99
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 25/50 [00:24<00:22, 1.14it/s]
100
  52%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 26/50 [00:25<00:21, 1.14it/s]
101
  54%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 27/50 [00:25<00:20, 1.13it/s]
102
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 28/50 [00:26<00:19, 1.13it/s]
103
  58%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 29/50 [00:27<00:18, 1.13it/s]
104
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 30/50 [00:28<00:17, 1.12it/s]
105
  62%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 31/50 [00:29<00:16, 1.12it/s]
106
  64%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 32/50 [00:30<00:16, 1.12it/s]
107
  66%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 33/50 [00:31<00:15, 1.12it/s]
108
  68%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 34/50 [00:32<00:14, 1.12it/s]
109
  70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 35/50 [00:33<00:13, 1.13it/s]
110
  72%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 36/50 [00:33<00:12, 1.15it/s]
111
  74%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 37/50 [00:34<00:11, 1.15it/s]
112
  76%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 38/50 [00:35<00:10, 1.15it/s]
113
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 39/50 [00:35<00:09, 1.16it/s]
114
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 40/50 [00:36<00:08, 1.16it/s]
115
  82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆοΏ½οΏ½β– | 41/50 [00:37<00:07, 1.16it/s]
116
  84%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 42/50 [00:38<00:06, 1.16it/s]
117
  86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 43/50 [00:39<00:06, 1.16it/s]
118
  88%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 44/50 [00:40<00:05, 1.16it/s]
119
  90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 45/50 [00:41<00:04, 1.16it/s]
120
  92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 46/50 [00:41<00:03, 1.16it/s]
121
  94%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 47/50 [00:42<00:02, 1.16it/s]
122
  96%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ| 48/50 [00:43<00:01, 1.15it/s]
123
  98%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 49/50 [00:44<00:00, 1.15it/s]
 
124
  74%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 37/50 [00:34<00:11, 1.15it/s]
125
  76%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 38/50 [00:35<00:10, 1.15it/s]
126
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 39/50 [00:36<00:09, 1.15it/s]
127
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 40/50 [00:36<00:08, 1.15it/s]
128
  82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 41/50 [00:37<00:07, 1.15it/s]
129
  84%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 42/50 [00:38<00:06, 1.15it/s]
130
  86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 43/50 [00:39<00:06, 1.15it/s]
131
  88%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 44/50 [00:40<00:05, 1.15it/s]
132
  90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 45/50 [00:41<00:04, 1.15it/s]
133
  92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 46/50 [00:42<00:03, 1.15it/s]
134
  94%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 47/50 [00:42<00:02, 1.15it/s]
135
  96%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ| 48/50 [00:43<00:01, 1.15it/s]
136
  98%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 49/50 [00:44<00:00, 1.15it/s]
 
137
  74%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 37/50 [00:34<00:11, 1.12it/s]
138
  76%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 38/50 [00:35<00:10, 1.11it/s]
139
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 39/50 [00:36<00:09, 1.11it/s]
140
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 40/50 [00:37<00:08, 1.12it/s]
141
  82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 41/50 [00:38<00:08, 1.11it/s]
142
  84%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 42/50 [00:39<00:07, 1.10it/s]
143
  86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 43/50 [00:40<00:06, 1.11it/s]
144
  88%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 44/50 [00:41<00:05, 1.12it/s]
145
  90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 45/50 [00:42<00:04, 1.12it/s]
146
  92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 46/50 [00:42<00:03, 1.12it/s]
147
  94%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 47/50 [00:43<00:02, 1.12it/s]
148
  96%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ| 48/50 [00:44<00:01, 1.12it/s]
149
  98%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 49/50 [00:45<00:00, 1.12it/s]
 
 
 
 
 
1
+ language_model.model.layers.0 4
2
+ language_model.model.layers.1 4
3
+ language_model.model.layers.2 4
4
+ language_model.model.layers.3 4
5
+ language_model.model.layers.4 4
6
+ language_model.model.layers.5 4
7
+ language_model.model.layers.6 4
8
+ language_model.model.layers.7 4
9
+ language_model.model.layers.8 4
10
+ language_model.model.layers.9 4
11
+ language_model.model.layers.10 4
12
+ language_model.model.layers.11 4
13
+ language_model.model.layers.12 4
14
+ language_model.model.layers.13 4
15
+ language_model.model.layers.14 4
16
+ language_model.model.layers.15 4
17
+ language_model.model.layers.16 4
18
+ language_model.model.layers.17 4
19
+ language_model.model.layers.18 4
20
+ language_model.model.layers.19 4
21
+ language_model.model.layers.20 4
22
+ language_model.model.layers.21 4
23
+ language_model.model.layers.22 4
24
+ language_model.model.layers.23 4
25
+ vision_model.encoder.layers.0 0
26
+ vision_model.encoder.layers.1 0
27
+ vision_model.encoder.layers.2 0
28
+ vision_model.encoder.layers.3 0
29
+ vision_model.encoder.layers.4 0
30
+ vision_model.encoder.layers.5 0
31
+ vision_model.encoder.layers.6 0
32
+ vision_model.encoder.layers.7 0
33
+ vision_model.encoder.layers.8 0
34
+ vision_model.encoder.layers.9 0
35
+ vision_model.encoder.layers.10 0
36
+ vision_model.encoder.layers.11 0
37
+ vision_model.encoder.layers.12 0
38
+ vision_model.encoder.layers.13 0
39
+ vision_model.encoder.layers.14 0
40
+ vision_model.encoder.layers.15 0
41
+ vision_model.encoder.layers.16 0
42
+ vision_model.encoder.layers.17 0
43
+ vision_model.encoder.layers.18 0
44
+ vision_model.encoder.layers.19 0
45
+ vision_model.encoder.layers.20 0
46
+ vision_model.encoder.layers.21 0
47
+ vision_model.encoder.layers.22 0
48
+ vision_model.encoder.layers.23 0
49
+ vision_model.embeddings 0
50
+ mlp1 0
51
+ language_model.model.tok_embeddings 4
52
+ language_model.model.norm 4
53
+ language_model.output 4
54
+ language_model.model.embed_tokens 4
55
+ language_model.lm_head 4
56
+ The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.
57
+ The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.
58
+ The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.
59
+ The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.
60
+ Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
61
+ Rank [0] Begin to eval model work_dirs/share_internvl/InternVL2-2B on task CLEVR-Change, devices: {device(type='cuda', index=0), device(type='cuda', index=4)}
62
+ Initialization Finished
63
+ Predicting CLEVR-Change Using internvl
64
+ Proceeding 2-length images samples | Num: 200
65
+ Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
66
+ Rank [1] Begin to eval model work_dirs/share_internvl/InternVL2-2B on task CLEVR-Change, devices: {device(type='cuda', index=1), device(type='cuda', index=5)}
67
+ Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
68
+ Initialization Finished
69
+ Predicting CLEVR-Change Using internvl
70
+ Proceeding 2-length images samples | Num: 200
71
+ Rank [3] Begin to eval model work_dirs/share_internvl/InternVL2-2B on task CLEVR-Change, devices: {device(type='cuda', index=3), device(type='cuda', index=7)}
72
+ Initialization Finished
73
+ Predicting CLEVR-Change Using internvl
74
+ Proceeding 2-length images samples | Num: 200
75
+ Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
76
+ Rank [2] Begin to eval model work_dirs/share_internvl/InternVL2-2B on task CLEVR-Change, devices: {device(type='cuda', index=2), device(type='cuda', index=6)}
77
+ Initialization Finished
78
+ Predicting CLEVR-Change Using internvl
79
+ Proceeding 2-length images samples | Num: 200
80
+
81
  0%| | 0/50 [00:00<?, ?it/s]
82
  2%|▏ | 1/50 [00:02<02:03, 2.53s/it]
83
  4%|▍ | 2/50 [00:03<01:16, 1.58s/it]
84
  6%|β–Œ | 3/50 [00:04<00:59, 1.26s/it]
85
  8%|β–Š | 4/50 [00:05<00:51, 1.11s/it]
86
  10%|β–ˆ | 5/50 [00:06<00:46, 1.03s/it]
87
  12%|β–ˆβ– | 6/50 [00:06<00:43, 1.02it/s]
88
  14%|β–ˆβ– | 7/50 [00:07<00:40, 1.05it/s]
89
  16%|β–ˆβ–Œ | 8/50 [00:08<00:39, 1.05it/s]
90
  18%|β–ˆβ–Š | 9/50 [00:09<00:37, 1.08it/s]
91
  20%|β–ˆβ–ˆ | 10/50 [00:10<00:36, 1.10it/s]
92
  22%|β–ˆβ–ˆβ– | 11/50 [00:11<00:34, 1.12it/s]
93
  24%|β–ˆβ–ˆβ– | 12/50 [00:12<00:33, 1.13it/s]
94
  26%|β–ˆβ–ˆβ–Œ | 13/50 [00:13<00:32, 1.13it/s]
95
  28%|β–ˆβ–ˆβ–Š | 14/50 [00:14<00:31, 1.14it/s]
96
  30%|β–ˆβ–ˆβ–ˆ | 15/50 [00:14<00:30, 1.14it/s]
97
  32%|β–ˆβ–ˆβ–ˆβ– | 16/50 [00:15<00:29, 1.14it/s]
98
  34%|β–ˆβ–ˆβ–ˆβ– | 17/50 [00:16<00:28, 1.14it/s]
99
  36%|β–ˆβ–ˆβ–ˆβ–Œ | 18/50 [00:17<00:28, 1.14it/s]
100
  38%|β–ˆβ–ˆβ–ˆβ–Š | 19/50 [00:18<00:27,
101
  0%| | 0/50 [00:00<?, ?it/s]
102
  2%|▏ | 1/50 [00:02<02:09, 2.64s/it]
103
  4%|▍ | 2/50 [00:03<01:18, 1.63s/it]
104
  6%|β–Œ | 3/50 [00:04<01:00, 1.28s/it]
105
  8%|β–Š | 4/50 [00:05<00:51, 1.12s/it]
106
  10%|β–ˆ | 5/50 [00:06<00:46, 1.04s/it]
107
  12%|���▏ | 6/50 [00:07<00:43, 1.02it/s]
108
  14%|β–ˆβ– | 7/50 [00:07<00:40, 1.06it/s]
109
  16%|β–ˆβ–Œ | 8/50 [00:08<00:39, 1.05it/s]
110
  18%|β–ˆβ–Š | 9/50 [00:09<00:38, 1.08it/s]
111
  20%|β–ˆβ–ˆ | 10/50 [00:10<00:36, 1.10it/s]
112
  22%|β–ˆβ–ˆβ– | 11/50 [00:11<00:35, 1.11it/s]
113
  24%|β–ˆβ–ˆβ– | 12/50 [00:12<00:34, 1.12it/s]
114
  26%|β–ˆβ–ˆβ–Œ | 13/50 [00:13<00:32, 1.12it/s]
115
  28%|β–ˆβ–ˆβ–Š | 14/50 [00:14<00:31, 1.13it/s]
116
  30%|β–ˆβ–ˆβ–ˆ | 15/50 [00:15<00:31, 1.13it/s]
117
  32%|β–ˆβ–ˆβ–ˆβ– | 16/50 [00:15<00:30, 1.13it/s]
118
  34%|β–ˆβ–ˆβ–ˆβ– | 17/50 [00:16<00:29, 1.13it/s]
119
  36%|β–ˆβ–ˆβ–ˆβ–Œ | 18/50 [00:17<00:28, 1.13it/s]
120
  38%|β–ˆβ–ˆβ–ˆβ–Š | 19/50 [00:18<00:27,
121
  0%| | 0/50 [00:00<?, ?it/s]
122
  2%|▏ | 1/50 [00:02<02:12, 2.70s/it]
123
  4%|▍ | 2/50 [00:03<01:19, 1.65s/it]
124
  6%|β–Œ | 3/50 [00:04<01:01, 1.31s/it]
125
  8%|β–Š | 4/50 [00:05<00:52, 1.15s/it]
126
  10%|β–ˆ | 5/50 [00:06<00:47, 1.06s/it]
127
  12%|β–ˆβ– | 6/50 [00:07<00:44, 1.01s/it]
128
  14%|β–ˆβ– | 7/50 [00:08<00:41, 1.03it/s]
129
  16%|β–ˆβ–Œ | 8/50 [00:09<00:39, 1.06it/s]
130
  18%|β–ˆβ–Š | 9/50 [00:09<00:38, 1.07it/s]
131
  20%|β–ˆβ–ˆ | 10/50 [00:10<00:36, 1.08it/s]
132
  22%|β–ˆβ–ˆβ– | 11/50 [00:11<00:35, 1.09it/s]
133
  24%|β–ˆβ–ˆβ– | 12/50 [00:12<00:34, 1.10it/s]
134
  26%|β–ˆβ–ˆβ–Œ | 13/50 [00:13<00:33, 1.11it/s]
135
  28%|β–ˆβ–ˆβ–Š | 14/50 [00:14<00:32, 1.11it/s]
136
  30%|β–ˆβ–ˆβ–ˆ | 15/50 [00:15<00:31, 1.11it/s]
137
  32%|β–ˆβ–ˆβ–ˆβ– | 16/50 [00:16<00:30, 1.11it/s]
138
  34%|β–ˆβ–ˆβ–ˆβ– | 17/50 [00:17<00:29, 1.12it/s]
139
  36%|β–ˆβ–ˆβ–ˆβ–Œ | 18/50 [00:17<00:28, 1.12it/s]
140
  38%|β–ˆβ–ˆβ–ˆβ–Š | 19/50 [00:18<00:27, 1.13it/s]
141
  40%|β–ˆβ–ˆβ–ˆβ–ˆ | 20/50 [00:19<00:26, 1.13it/s]
142
  42%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 21/50 [00:20<00:25, 1.14it/s]
143
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 22/50 [00:21<00:24, 1.14it/s]
144
  46%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 23/50 [00:21<00:23, 1.14it/s]
145
  48%|β–ˆβ–ˆβ–ˆβ–ˆβ–Š | 24/50 [00:22<00:22, 1.14it/s]
146
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 25/50 [00:23<00:21, 1.14it/s]
147
  52%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 26/50 [00:24<00:21, 1.14it/s]
148
  54%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 27/50 [00:25<00:20, 1.14it/s]
149
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 28/50 [00:26<00:19, 1.13it/s]
150
  58%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 29/50 [00:27<00:18, 1.13it/s]
151
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 30/50 [00:28<00:17, 1.13it/s]
152
  62%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 31/50 [00:28<00:16, 1.13it/s]
153
  64%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 32/50 [00:29<00:15, 1.13it/s]
154
  66%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 33/50 [00:30<00:14, 1.14it/s]
155
  68%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 34/50 [00:31<00:13, 1.14it/s]
156
  70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 35/50 [00:32<00:13, 1.15it/s]
157
  72%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 36/50 [00:33<00:12, 1.1 1.13it/s]
158
  40%|β–ˆβ–ˆβ–ˆβ–ˆ | 20/50 [00:19<00:26, 1.13it/s]
159
  42%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 21/50 [00:20<00:25, 1.14it/s]
160
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 22/50 [00:21<00:24, 1.14it/s]
161
  46%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 23/50 [00:22<00:23, 1.14it/s]
162
  48%|β–ˆβ–ˆβ–ˆβ–ˆβ–Š | 24/50 [00:22<00:22, 1.14it/s]
163
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 25/50 [00:23<00:21, 1.14it/s]
164
  52%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 26/50 [00:24<00:21, 1.14it/s]
165
  54%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 27/50 [00:25<00:20, 1.14it/s]
166
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 28/50 [00:26<00:19, 1.14it/s]
167
  58%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 29/50 [00:27<00:18, 1.14it/s]
168
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 30/50 [00:28<00:17, 1.14it/s]
169
  62%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 31/50 [00:29<00:16, 1.14it/s]
170
  64%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 32/50 [00:29<00:15, 1.15it/s]
171
  66%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 33/50 [00:30<00:14, 1.15it/s]
172
  68%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 34/50 [00:31<00:13, 1.15it/s]
173
  70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 35/50 [00:32<00:13, 1.15it/s]
174
  72%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 36/50 [00:33<00:12, 1.1 1.12it/s]
175
  40%|β–ˆβ–ˆβ–ˆβ–ˆ | 20/50 [00:19<00:26, 1.12it/s]
176
  42%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 21/50 [00:20<00:25, 1.13it/s]
177
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 22/50 [00:21<00:24, 1.13it/s]
178
  46%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 23/50 [00:22<00:23, 1.13it/s]
179
  48%|β–ˆβ–ˆβ–ˆβ–ˆβ–Š | 24/50 [00:23<00:22, 1.13it/s]
180
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 25/50 [00:24<00:22, 1.14it/s]
181
  52%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 26/50 [00:25<00:21, 1.14it/s]
182
  54%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 27/50 [00:25<00:20, 1.13it/s]
183
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 28/50 [00:26<00:19, 1.13it/s]
184
  58%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 29/50 [00:27<00:18, 1.13it/s]
185
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 30/50 [00:28<00:17, 1.12it/s]
186
  62%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 31/50 [00:29<00:16, 1.12it/s]
187
  64%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 32/50 [00:30<00:16, 1.12it/s]
188
  66%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 33/50 [00:31<00:15, 1.12it/s]
189
  68%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 34/50 [00:32<00:14, 1.12it/s]
190
  70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 35/50 [00:33<00:13, 1.13it/s]
191
  72%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 36/50 [00:33<00:12, 1.15it/s]
192
  74%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 37/50 [00:34<00:11, 1.15it/s]
193
  76%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 38/50 [00:35<00:10, 1.15it/s]
194
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 39/50 [00:35<00:09, 1.16it/s]
195
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 40/50 [00:36<00:08, 1.16it/s]
196
  82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆοΏ½οΏ½β– | 41/50 [00:37<00:07, 1.16it/s]
197
  84%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 42/50 [00:38<00:06, 1.16it/s]
198
  86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 43/50 [00:39<00:06, 1.16it/s]
199
  88%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 44/50 [00:40<00:05, 1.16it/s]
200
  90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 45/50 [00:41<00:04, 1.16it/s]
201
  92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 46/50 [00:41<00:03, 1.16it/s]
202
  94%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 47/50 [00:42<00:02, 1.16it/s]
203
  96%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ| 48/50 [00:43<00:01, 1.15it/s]
204
  98%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 49/50 [00:44<00:00, 1.15it/s]
205
+ 5it/s]
206
  74%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 37/50 [00:34<00:11, 1.15it/s]
207
  76%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 38/50 [00:35<00:10, 1.15it/s]
208
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 39/50 [00:36<00:09, 1.15it/s]
209
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 40/50 [00:36<00:08, 1.15it/s]
210
  82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 41/50 [00:37<00:07, 1.15it/s]
211
  84%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 42/50 [00:38<00:06, 1.15it/s]
212
  86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 43/50 [00:39<00:06, 1.15it/s]
213
  88%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 44/50 [00:40<00:05, 1.15it/s]
214
  90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 45/50 [00:41<00:04, 1.15it/s]
215
  92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 46/50 [00:42<00:03, 1.15it/s]
216
  94%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 47/50 [00:42<00:02, 1.15it/s]
217
  96%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ| 48/50 [00:43<00:01, 1.15it/s]
218
  98%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 49/50 [00:44<00:00, 1.15it/s]
219
+ 2it/s]
220
  74%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 37/50 [00:34<00:11, 1.12it/s]
221
  76%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 38/50 [00:35<00:10, 1.11it/s]
222
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 39/50 [00:36<00:09, 1.11it/s]
223
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 40/50 [00:37<00:08, 1.12it/s]
224
  82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 41/50 [00:38<00:08, 1.11it/s]
225
  84%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 42/50 [00:39<00:07, 1.10it/s]
226
  86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 43/50 [00:40<00:06, 1.11it/s]
227
  88%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 44/50 [00:41<00:05, 1.12it/s]
228
  90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 45/50 [00:42<00:04, 1.12it/s]
229
  92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 46/50 [00:42<00:03, 1.12it/s]
230
  94%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 47/50 [00:43<00:02, 1.12it/s]
231
  96%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ| 48/50 [00:44<00:01, 1.12it/s]
232
  98%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 49/50 [00:45<00:00, 1.12it/s]
233
+ evaluating CLEVR-Change ...
234
+ Results saved to work_dirs/share_internvl/InternVL2-2B/eval_milebench/CLEVR-Change/CLEVR-Change_240803234510.json
235
+ python eval/milebench/evaluate.py --data-dir /mnt/inspurfs/share_data/wangweiyun/share_data/long-context-benchmark/MileBench/datasets--FreedomIntelligence--MileBench/snapshots/53c7a58051ef88bacf76541d91f03f5ba2d71e7d --dataset CLEVR-Change --result-dir work_dirs/share_internvl/InternVL2-2B/eval_milebench/CLEVR-Change
236
+ internvl: CLEVR-Change: {'Rouge-L f': 0.19528275526918393, 'image_quantity_level-Accuracy': {'Few': 0.19528275526918393, 'Medium': 0, 'Many': 0}, 'image_quantity_level-Result': {'Few': [39.05655105383679, 200], 'Medium': [0, 0], 'Many': [0, 0]}}
eval_milebench/CLEVR-Change/CLEVR-Change_240803234510.json ADDED
The diff for this file is too large to render. See raw diff
 
eval_milebench/CLEVR-Change/eval.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"Rouge-L f": 0.19528275526918393, "image_quantity_level-Accuracy": {"Few": 0.19528275526918393, "Medium": 0, "Many": 0}, "image_quantity_level-Result": {"Few": [39.05655105383679, 200], "Medium": [0, 0], "Many": [0, 0]}}
eval_milebench/CLEVR-Change/eval_score.json ADDED
@@ -0,0 +1,802 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "id": "0",
4
+ "score": "0.35"
5
+ },
6
+ {
7
+ "id": "1",
8
+ "score": "0.222"
9
+ },
10
+ {
11
+ "id": "2",
12
+ "score": "0.08"
13
+ },
14
+ {
15
+ "id": "3",
16
+ "score": "0.118"
17
+ },
18
+ {
19
+ "id": "4",
20
+ "score": "0.238"
21
+ },
22
+ {
23
+ "id": "5",
24
+ "score": "0.304"
25
+ },
26
+ {
27
+ "id": "6",
28
+ "score": "0.182"
29
+ },
30
+ {
31
+ "id": "7",
32
+ "score": "0.311"
33
+ },
34
+ {
35
+ "id": "8",
36
+ "score": "0.244"
37
+ },
38
+ {
39
+ "id": "9",
40
+ "score": "0.222"
41
+ },
42
+ {
43
+ "id": "10",
44
+ "score": "0.163"
45
+ },
46
+ {
47
+ "id": "11",
48
+ "score": "0.244"
49
+ },
50
+ {
51
+ "id": "12",
52
+ "score": "0.085"
53
+ },
54
+ {
55
+ "id": "13",
56
+ "score": "0.341"
57
+ },
58
+ {
59
+ "id": "14",
60
+ "score": "0.24"
61
+ },
62
+ {
63
+ "id": "15",
64
+ "score": "0.25"
65
+ },
66
+ {
67
+ "id": "16",
68
+ "score": "0.14"
69
+ },
70
+ {
71
+ "id": "17",
72
+ "score": "0.19"
73
+ },
74
+ {
75
+ "id": "18",
76
+ "score": "0.08"
77
+ },
78
+ {
79
+ "id": "19",
80
+ "score": "0.115"
81
+ },
82
+ {
83
+ "id": "20",
84
+ "score": "0.227"
85
+ },
86
+ {
87
+ "id": "21",
88
+ "score": "0.208"
89
+ },
90
+ {
91
+ "id": "22",
92
+ "score": "0.222"
93
+ },
94
+ {
95
+ "id": "23",
96
+ "score": "0.091"
97
+ },
98
+ {
99
+ "id": "24",
100
+ "score": "0.12"
101
+ },
102
+ {
103
+ "id": "25",
104
+ "score": "0.208"
105
+ },
106
+ {
107
+ "id": "26",
108
+ "score": "0.2"
109
+ },
110
+ {
111
+ "id": "27",
112
+ "score": "0.186"
113
+ },
114
+ {
115
+ "id": "28",
116
+ "score": "0.182"
117
+ },
118
+ {
119
+ "id": "29",
120
+ "score": "0.19"
121
+ },
122
+ {
123
+ "id": "30",
124
+ "score": "0.195"
125
+ },
126
+ {
127
+ "id": "31",
128
+ "score": "0.25"
129
+ },
130
+ {
131
+ "id": "32",
132
+ "score": "0.222"
133
+ },
134
+ {
135
+ "id": "33",
136
+ "score": "0.093"
137
+ },
138
+ {
139
+ "id": "34",
140
+ "score": "0.154"
141
+ },
142
+ {
143
+ "id": "35",
144
+ "score": "0.205"
145
+ },
146
+ {
147
+ "id": "36",
148
+ "score": "0.083"
149
+ },
150
+ {
151
+ "id": "37",
152
+ "score": "0.125"
153
+ },
154
+ {
155
+ "id": "38",
156
+ "score": "0.154"
157
+ },
158
+ {
159
+ "id": "39",
160
+ "score": "0.286"
161
+ },
162
+ {
163
+ "id": "40",
164
+ "score": "0.205"
165
+ },
166
+ {
167
+ "id": "41",
168
+ "score": "0.233"
169
+ },
170
+ {
171
+ "id": "42",
172
+ "score": "0.217"
173
+ },
174
+ {
175
+ "id": "43",
176
+ "score": "0.061"
177
+ },
178
+ {
179
+ "id": "44",
180
+ "score": "0.244"
181
+ },
182
+ {
183
+ "id": "45",
184
+ "score": "0.244"
185
+ },
186
+ {
187
+ "id": "46",
188
+ "score": "0.227"
189
+ },
190
+ {
191
+ "id": "47",
192
+ "score": "0.208"
193
+ },
194
+ {
195
+ "id": "48",
196
+ "score": "0.128"
197
+ },
198
+ {
199
+ "id": "49",
200
+ "score": "0.125"
201
+ },
202
+ {
203
+ "id": "50",
204
+ "score": "0.17"
205
+ },
206
+ {
207
+ "id": "51",
208
+ "score": "0.222"
209
+ },
210
+ {
211
+ "id": "52",
212
+ "score": "0.286"
213
+ },
214
+ {
215
+ "id": "53",
216
+ "score": "0.286"
217
+ },
218
+ {
219
+ "id": "54",
220
+ "score": "0.286"
221
+ },
222
+ {
223
+ "id": "55",
224
+ "score": "0.273"
225
+ },
226
+ {
227
+ "id": "56",
228
+ "score": "0.178"
229
+ },
230
+ {
231
+ "id": "57",
232
+ "score": "0.1"
233
+ },
234
+ {
235
+ "id": "58",
236
+ "score": "0.17"
237
+ },
238
+ {
239
+ "id": "59",
240
+ "score": "0.227"
241
+ },
242
+ {
243
+ "id": "60",
244
+ "score": "0.208"
245
+ },
246
+ {
247
+ "id": "61",
248
+ "score": "0.174"
249
+ },
250
+ {
251
+ "id": "62",
252
+ "score": "0.273"
253
+ },
254
+ {
255
+ "id": "63",
256
+ "score": "0.292"
257
+ },
258
+ {
259
+ "id": "64",
260
+ "score": "0.157"
261
+ },
262
+ {
263
+ "id": "65",
264
+ "score": "0.255"
265
+ },
266
+ {
267
+ "id": "66",
268
+ "score": "0.182"
269
+ },
270
+ {
271
+ "id": "67",
272
+ "score": "0.167"
273
+ },
274
+ {
275
+ "id": "68",
276
+ "score": "0.25"
277
+ },
278
+ {
279
+ "id": "69",
280
+ "score": "0.1"
281
+ },
282
+ {
283
+ "id": "70",
284
+ "score": "0.105"
285
+ },
286
+ {
287
+ "id": "71",
288
+ "score": "0.082"
289
+ },
290
+ {
291
+ "id": "72",
292
+ "score": "0.111"
293
+ },
294
+ {
295
+ "id": "73",
296
+ "score": "0.14"
297
+ },
298
+ {
299
+ "id": "74",
300
+ "score": "0.195"
301
+ },
302
+ {
303
+ "id": "75",
304
+ "score": "0.178"
305
+ },
306
+ {
307
+ "id": "76",
308
+ "score": "0.178"
309
+ },
310
+ {
311
+ "id": "77",
312
+ "score": "0.216"
313
+ },
314
+ {
315
+ "id": "78",
316
+ "score": "0.095"
317
+ },
318
+ {
319
+ "id": "79",
320
+ "score": "0.216"
321
+ },
322
+ {
323
+ "id": "80",
324
+ "score": "0.089"
325
+ },
326
+ {
327
+ "id": "81",
328
+ "score": "0.136"
329
+ },
330
+ {
331
+ "id": "82",
332
+ "score": "0.192"
333
+ },
334
+ {
335
+ "id": "83",
336
+ "score": "0.122"
337
+ },
338
+ {
339
+ "id": "84",
340
+ "score": "0.098"
341
+ },
342
+ {
343
+ "id": "85",
344
+ "score": "0.082"
345
+ },
346
+ {
347
+ "id": "86",
348
+ "score": "0.267"
349
+ },
350
+ {
351
+ "id": "87",
352
+ "score": "0.133"
353
+ },
354
+ {
355
+ "id": "88",
356
+ "score": "0.136"
357
+ },
358
+ {
359
+ "id": "89",
360
+ "score": "0.14"
361
+ },
362
+ {
363
+ "id": "90",
364
+ "score": "0.158"
365
+ },
366
+ {
367
+ "id": "91",
368
+ "score": "0.186"
369
+ },
370
+ {
371
+ "id": "92",
372
+ "score": "0.213"
373
+ },
374
+ {
375
+ "id": "93",
376
+ "score": "0.1"
377
+ },
378
+ {
379
+ "id": "94",
380
+ "score": "0.298"
381
+ },
382
+ {
383
+ "id": "95",
384
+ "score": "0.133"
385
+ },
386
+ {
387
+ "id": "96",
388
+ "score": "0.227"
389
+ },
390
+ {
391
+ "id": "97",
392
+ "score": "0.217"
393
+ },
394
+ {
395
+ "id": "98",
396
+ "score": "0.089"
397
+ },
398
+ {
399
+ "id": "99",
400
+ "score": "0.174"
401
+ },
402
+ {
403
+ "id": "100",
404
+ "score": "0.211"
405
+ },
406
+ {
407
+ "id": "101",
408
+ "score": "0.17"
409
+ },
410
+ {
411
+ "id": "102",
412
+ "score": "0.204"
413
+ },
414
+ {
415
+ "id": "103",
416
+ "score": "0.136"
417
+ },
418
+ {
419
+ "id": "104",
420
+ "score": "0.205"
421
+ },
422
+ {
423
+ "id": "105",
424
+ "score": "0.286"
425
+ },
426
+ {
427
+ "id": "106",
428
+ "score": "0.146"
429
+ },
430
+ {
431
+ "id": "107",
432
+ "score": "0.24"
433
+ },
434
+ {
435
+ "id": "108",
436
+ "score": "0.245"
437
+ },
438
+ {
439
+ "id": "109",
440
+ "score": "0.15"
441
+ },
442
+ {
443
+ "id": "110",
444
+ "score": "0.273"
445
+ },
446
+ {
447
+ "id": "111",
448
+ "score": "0.083"
449
+ },
450
+ {
451
+ "id": "112",
452
+ "score": "0.255"
453
+ },
454
+ {
455
+ "id": "113",
456
+ "score": "0.17"
457
+ },
458
+ {
459
+ "id": "114",
460
+ "score": "0.109"
461
+ },
462
+ {
463
+ "id": "115",
464
+ "score": "0.261"
465
+ },
466
+ {
467
+ "id": "116",
468
+ "score": "0.293"
469
+ },
470
+ {
471
+ "id": "117",
472
+ "score": "0.098"
473
+ },
474
+ {
475
+ "id": "118",
476
+ "score": "0.273"
477
+ },
478
+ {
479
+ "id": "119",
480
+ "score": "0.217"
481
+ },
482
+ {
483
+ "id": "120",
484
+ "score": "0.17"
485
+ },
486
+ {
487
+ "id": "121",
488
+ "score": "0.255"
489
+ },
490
+ {
491
+ "id": "122",
492
+ "score": "0.196"
493
+ },
494
+ {
495
+ "id": "123",
496
+ "score": "0.12"
497
+ },
498
+ {
499
+ "id": "124",
500
+ "score": "0.278"
501
+ },
502
+ {
503
+ "id": "125",
504
+ "score": "0.298"
505
+ },
506
+ {
507
+ "id": "126",
508
+ "score": "0.151"
509
+ },
510
+ {
511
+ "id": "127",
512
+ "score": "0.218"
513
+ },
514
+ {
515
+ "id": "128",
516
+ "score": "0.186"
517
+ },
518
+ {
519
+ "id": "129",
520
+ "score": "0.053"
521
+ },
522
+ {
523
+ "id": "130",
524
+ "score": "0.143"
525
+ },
526
+ {
527
+ "id": "131",
528
+ "score": "0.085"
529
+ },
530
+ {
531
+ "id": "132",
532
+ "score": "0.2"
533
+ },
534
+ {
535
+ "id": "133",
536
+ "score": "0.24"
537
+ },
538
+ {
539
+ "id": "134",
540
+ "score": "0.095"
541
+ },
542
+ {
543
+ "id": "135",
544
+ "score": "0.292"
545
+ },
546
+ {
547
+ "id": "136",
548
+ "score": "0.227"
549
+ },
550
+ {
551
+ "id": "137",
552
+ "score": "0.245"
553
+ },
554
+ {
555
+ "id": "138",
556
+ "score": "0.167"
557
+ },
558
+ {
559
+ "id": "139",
560
+ "score": "0.133"
561
+ },
562
+ {
563
+ "id": "140",
564
+ "score": "0.14"
565
+ },
566
+ {
567
+ "id": "141",
568
+ "score": "0.133"
569
+ },
570
+ {
571
+ "id": "142",
572
+ "score": "0.233"
573
+ },
574
+ {
575
+ "id": "143",
576
+ "score": "0.255"
577
+ },
578
+ {
579
+ "id": "144",
580
+ "score": "0.128"
581
+ },
582
+ {
583
+ "id": "145",
584
+ "score": "0.298"
585
+ },
586
+ {
587
+ "id": "146",
588
+ "score": "0.143"
589
+ },
590
+ {
591
+ "id": "147",
592
+ "score": "0.083"
593
+ },
594
+ {
595
+ "id": "148",
596
+ "score": "0.077"
597
+ },
598
+ {
599
+ "id": "149",
600
+ "score": "0.17"
601
+ },
602
+ {
603
+ "id": "150",
604
+ "score": "0.261"
605
+ },
606
+ {
607
+ "id": "151",
608
+ "score": "0.14"
609
+ },
610
+ {
611
+ "id": "152",
612
+ "score": "0.113"
613
+ },
614
+ {
615
+ "id": "153",
616
+ "score": "0.17"
617
+ },
618
+ {
619
+ "id": "154",
620
+ "score": "0.261"
621
+ },
622
+ {
623
+ "id": "155",
624
+ "score": "0.227"
625
+ },
626
+ {
627
+ "id": "156",
628
+ "score": "0.208"
629
+ },
630
+ {
631
+ "id": "157",
632
+ "score": "0.178"
633
+ },
634
+ {
635
+ "id": "158",
636
+ "score": "0.213"
637
+ },
638
+ {
639
+ "id": "159",
640
+ "score": "0.186"
641
+ },
642
+ {
643
+ "id": "160",
644
+ "score": "0.244"
645
+ },
646
+ {
647
+ "id": "161",
648
+ "score": "0.255"
649
+ },
650
+ {
651
+ "id": "162",
652
+ "score": "0.13"
653
+ },
654
+ {
655
+ "id": "163",
656
+ "score": "0.14"
657
+ },
658
+ {
659
+ "id": "164",
660
+ "score": "0.12"
661
+ },
662
+ {
663
+ "id": "165",
664
+ "score": "0.308"
665
+ },
666
+ {
667
+ "id": "166",
668
+ "score": "0.244"
669
+ },
670
+ {
671
+ "id": "167",
672
+ "score": "0.326"
673
+ },
674
+ {
675
+ "id": "168",
676
+ "score": "0.267"
677
+ },
678
+ {
679
+ "id": "169",
680
+ "score": "0.195"
681
+ },
682
+ {
683
+ "id": "170",
684
+ "score": "0.192"
685
+ },
686
+ {
687
+ "id": "171",
688
+ "score": "0.286"
689
+ },
690
+ {
691
+ "id": "172",
692
+ "score": "0.213"
693
+ },
694
+ {
695
+ "id": "173",
696
+ "score": "0.233"
697
+ },
698
+ {
699
+ "id": "174",
700
+ "score": "0.211"
701
+ },
702
+ {
703
+ "id": "175",
704
+ "score": "0.267"
705
+ },
706
+ {
707
+ "id": "176",
708
+ "score": "0.267"
709
+ },
710
+ {
711
+ "id": "177",
712
+ "score": "0.25"
713
+ },
714
+ {
715
+ "id": "178",
716
+ "score": "0.326"
717
+ },
718
+ {
719
+ "id": "179",
720
+ "score": "0.227"
721
+ },
722
+ {
723
+ "id": "180",
724
+ "score": "0.383"
725
+ },
726
+ {
727
+ "id": "181",
728
+ "score": "0.208"
729
+ },
730
+ {
731
+ "id": "182",
732
+ "score": "0.222"
733
+ },
734
+ {
735
+ "id": "183",
736
+ "score": "0.17"
737
+ },
738
+ {
739
+ "id": "184",
740
+ "score": "0.174"
741
+ },
742
+ {
743
+ "id": "185",
744
+ "score": "0.118"
745
+ },
746
+ {
747
+ "id": "186",
748
+ "score": "0.213"
749
+ },
750
+ {
751
+ "id": "187",
752
+ "score": "0.19"
753
+ },
754
+ {
755
+ "id": "188",
756
+ "score": "0.227"
757
+ },
758
+ {
759
+ "id": "189",
760
+ "score": "0.213"
761
+ },
762
+ {
763
+ "id": "190",
764
+ "score": "0.196"
765
+ },
766
+ {
767
+ "id": "191",
768
+ "score": "0.238"
769
+ },
770
+ {
771
+ "id": "192",
772
+ "score": "0.178"
773
+ },
774
+ {
775
+ "id": "193",
776
+ "score": "0.318"
777
+ },
778
+ {
779
+ "id": "194",
780
+ "score": "0.318"
781
+ },
782
+ {
783
+ "id": "195",
784
+ "score": "0.25"
785
+ },
786
+ {
787
+ "id": "196",
788
+ "score": "0.128"
789
+ },
790
+ {
791
+ "id": "197",
792
+ "score": "0.17"
793
+ },
794
+ {
795
+ "id": "198",
796
+ "score": "0.261"
797
+ },
798
+ {
799
+ "id": "199",
800
+ "score": "0.213"
801
+ }
802
+ ]
eval_milebench/CLEVR-Change/pred.json ADDED
The diff for this file is too large to render. See raw diff
 
eval_milebench/CharacterOrder.log ADDED
@@ -0,0 +1,261 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0
  0%| | 0/2 [00:00<?, ?it/s]
1
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:03<00:03, 3.04s/it]
 
 
2
  0%| | 0/2 [00:00<?, ?it/s]
3
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:03<00:03, 3.03s/it]
 
 
4
  0%| | 0/2 [00:00<?, ?it/s]
5
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:03<00:03, 3.54s/it]
 
 
 
6
  0%| | 0/1 [00:00<?, ?it/s]
 
 
7
  0%| | 0/1 [00:00<?, ?it/s]
 
 
8
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
9
  0%| | 0/2 [00:00<?, ?it/s]
10
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.64s/it]
 
 
11
  0%| | 0/2 [00:00<?, ?it/s]
12
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.67s/it]
 
 
13
  0%| | 0/2 [00:00<?, ?it/s]
14
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:02<00:02, 2.00s/it]
 
 
 
15
  0%| | 0/3 [00:00<?, ?it/s]
16
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:02<00:04, 2.15s/it]
17
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:03<00:01, 1.68s/it]
 
 
 
18
  0%| | 0/3 [00:00<?, ?it/s]
19
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:01<00:03, 1.62s/it]
20
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:03<00:01, 1.64s/it]
 
21
  0%| | 0/3 [00:00<?, ?it/s]
22
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:02<00:05, 2.63s/it]
23
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:04<00:02, 2.32s/it]
 
 
 
24
  0%| | 0/3 [00:00<?, ?it/s]
25
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:02<00:04, 2.17s/it]
26
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:04<00:02, 2.13s/it]
 
 
27
  0%| | 0/3 [00:00<?, ?it/s]
28
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:02<00:04, 2.42s/it]
29
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:04<00:02, 2.29s/it]
 
 
30
  0%| | 0/1 [00:00<?, ?it/s]
 
 
31
  0%| | 0/1 [00:00<?, ?it/s]
 
 
32
  0%| | 0/1 [00:00<?, ?it/s]
 
 
33
  0%| | 0/3 [00:00<?, ?it/s]
34
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:01<00:03, 1.94s/it]
35
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:04<00:02, 2.43s/it]
 
 
 
 
36
  0%| | 0/2 [00:00<?, ?it/s]
37
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.61s/it]
 
 
 
38
  0%| | 0/1 [00:00<?, ?it/s]
 
 
39
  0%| | 0/2 [00:00<?, ?it/s]
40
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.14s/it]
 
 
 
41
  0%| | 0/1 [00:00<?, ?it/s]
 
 
42
  0%| | 0/2 [00:00<?, ?it/s]
43
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.40s/it]
 
 
 
 
44
  0%| | 0/5 [00:00<?, ?it/s]
45
  20%|β–ˆβ–ˆ | 1/5 [00:03<00:12, 3.01s/it]
46
  40%|β–ˆβ–ˆβ–ˆβ–ˆ | 2/5 [00:05<00:07, 2.61s/it]
47
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 3/5 [00:07<00:04, 2.44s/it]
48
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 4/5 [00:09<00:02, 2.41s/it]
 
 
49
  0%| | 0/5 [00:00<?, ?it/s]
50
  20%|β–ˆβ–ˆ | 1/5 [00:03<00:13, 3.26s/it]
51
  40%|β–ˆβ–ˆβ–ˆβ–ˆ | 2/5 [00:05<00:07, 2.57s/it]
52
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 3/5 [00:07<00:04, 2.37s/it]
53
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 4/5 [00:09<00:02, 2.09s/it]
 
 
54
  0%| | 0/1 [00:00<?, ?it/s]
 
 
55
  0%| | 0/1 [00:00<?, ?it/s]
 
 
56
  0%| | 0/5 [00:00<?, ?it/s]
57
  20%|β–ˆβ–ˆ | 1/5 [00:02<00:10, 2.68s/it]
58
  40%|β–ˆβ–ˆβ–ˆβ–ˆ | 2/5 [00:04<00:07, 2.43s/it]
59
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 3/5 [00:07<00:04, 2.40s/it]
60
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 4/5 [00:09<00:02, 2.35s/it]
 
 
61
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
62
  0%| | 0/2 [00:00<?, ?it/s]
63
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.96s/it]
 
 
 
64
  0%| | 0/3 [00:00<?, ?it/s]
65
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:01<00:03, 1.80s/it]
66
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:03<00:02, 2.03s/it]
 
 
67
  0%| | 0/3 [00:00<?, ?it/s]
68
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:01<00:03, 1.85s/it]
69
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:03<00:01, 1.74s/it]
 
 
70
  0%| | 0/2 [00:00<?, ?it/s]
71
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.91s/it]
 
 
72
  0%| | 0/2 [00:00<?, ?it/s]
73
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.30s/it]
 
 
74
  0%| | 0/2 [00:00<?, ?it/s]
75
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.69s/it]
 
 
76
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
77
  0%| | 0/1 [00:00<?, ?it/s]
 
 
78
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
79
  0%| | 0/3 [00:00<?, ?it/s]
80
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:01<00:03, 1.83s/it]
81
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:03<00:01, 1.95s/it]
 
 
82
  0%| | 0/3 [00:00<?, ?it/s]
83
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:01<00:03, 1.91s/it]
84
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:03<00:01, 1.84s/it]
 
 
85
  0%| | 0/3 [00:00<?, ?it/s]
86
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:01<00:03, 1.94s/it]
87
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:03<00:01, 1.64s/it]
 
 
 
 
88
  0%| | 0/7 [00:00<?, ?it/s]
89
  14%|β–ˆβ– | 1/7 [00:02<00:17, 2.97s/it]
90
  29%|β–ˆβ–ˆβ–Š | 2/7 [00:05<00:14, 2.80s/it]
91
  43%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 3/7 [00:08<00:10, 2.69s/it]
92
  57%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 4/7 [00:10<00:07, 2.47s/it]
93
  71%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5/7 [00:12<00:04, 2.34s/it]
94
  86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 6/7 [00:14<00:02, 2.19s/it]
 
 
 
 
95
  0%| | 0/7 [00:00<?, ?it/s]
96
  14%|β–ˆβ– | 1/7 [00:03<00:20, 3.43s/it]
97
  29%|β–ˆβ–ˆβ–Š | 2/7 [00:05<00:13, 2.72s/it]
98
  43%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 3/7 [00:07<00:09, 2.49s/it]
99
  57%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 4/7 [00:10<00:08, 2.68s/it]
100
  71%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5/7 [00:12<00:04, 2.44s/it]
101
  86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 6/7 [00:15<00:02, 2.49s/it]
 
 
102
  0%| | 0/7 [00:00<?, ?it/s]
103
  14%|β–ˆβ– | 1/7 [00:02<00:12, 2.15s/it]
104
  29%|β–ˆβ–ˆβ–Š | 2/7 [00:04<00:12, 2.47s/it]
105
  43%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 3/7 [00:07<00:10, 2.57s/it]
106
  57%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 4/7 [00:09<00:07, 2.43s/it]
107
  71%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5/7 [00:11<00:04, 2.33s/it]
108
  86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 6/7 [00:14<00:02, 2.35s/it]
 
 
109
  0%| | 0/1 [00:00<?, ?it/s]
 
 
110
  0%| | 0/1 [00:00<?, ?it/s]
 
 
111
  0%| | 0/2 [00:00<?, ?it/s]
112
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.87s/it]
 
 
 
 
113
  0%| | 0/2 [00:00<?, ?it/s]
114
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:02<00:02, 2.75s/it]
 
 
115
  0%| | 0/2 [00:00<?, ?it/s]
116
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:02<00:02, 2.52s/it]
 
 
117
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
118
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
119
  0%| | 0/3 [00:00<?, ?it/s]
120
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:02<00:05, 2.83s/it]
121
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:04<00:02, 2.08s/it]
 
 
 
 
 
 
 
 
 
 
 
122
  0%| | 0/3 [00:00<?, ?it/s]
123
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:02<00:05, 2.50s/it]
124
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:03<00:01, 1.80s/it]
 
 
125
  0%| | 0/3 [00:00<?, ?it/s]
126
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:01<00:03, 1.85s/it]
127
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:03<00:01, 1.94s/it]
 
 
 
128
  0%| | 0/1 [00:00<?, ?it/s]
 
 
129
  0%| | 0/2 [00:00<?, ?it/s]
130
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:02<00:02, 2.28s/it]
 
 
 
 
131
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
132
  0%| | 0/1 [00:00<?, ?it/s]
 
 
133
  0%| | 0/1 [00:00<?, ?it/s]
 
 
134
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
 
135
  0%| | 0/1 [00:00<?, ?it/s]
 
136
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
137
  0%| | 0/2 [00:00<?, ?it/s]
138
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.75s/it]
 
 
 
 
 
139
  0%| | 0/2 [00:00<?, ?it/s]
140
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:02<00:02, 2.19s/it]
 
 
 
 
 
141
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
 
 
 
 
 
1
+ language_model.model.layers.0 4
2
+ language_model.model.layers.1 4
3
+ language_model.model.layers.2 4
4
+ language_model.model.layers.3 4
5
+ language_model.model.layers.4 4
6
+ language_model.model.layers.5 4
7
+ language_model.model.layers.6 4
8
+ language_model.model.layers.7 4
9
+ language_model.model.layers.8 4
10
+ language_model.model.layers.9 4
11
+ language_model.model.layers.10 4
12
+ language_model.model.layers.11 4
13
+ language_model.model.layers.12 4
14
+ language_model.model.layers.13 4
15
+ language_model.model.layers.14 4
16
+ language_model.model.layers.15 4
17
+ language_model.model.layers.16 4
18
+ language_model.model.layers.17 4
19
+ language_model.model.layers.18 4
20
+ language_model.model.layers.19 4
21
+ language_model.model.layers.20 4
22
+ language_model.model.layers.21 4
23
+ language_model.model.layers.22 4
24
+ language_model.model.layers.23 4
25
+ vision_model.encoder.layers.0 0
26
+ vision_model.encoder.layers.1 0
27
+ vision_model.encoder.layers.2 0
28
+ vision_model.encoder.layers.3 0
29
+ vision_model.encoder.layers.4 0
30
+ vision_model.encoder.layers.5 0
31
+ vision_model.encoder.layers.6 0
32
+ vision_model.encoder.layers.7 0
33
+ vision_model.encoder.layers.8 0
34
+ vision_model.encoder.layers.9 0
35
+ vision_model.encoder.layers.10 0
36
+ vision_model.encoder.layers.11 0
37
+ vision_model.encoder.layers.12 0
38
+ vision_model.encoder.layers.13 0
39
+ vision_model.encoder.layers.14 0
40
+ vision_model.encoder.layers.15 0
41
+ vision_model.encoder.layers.16 0
42
+ vision_model.encoder.layers.17 0
43
+ vision_model.encoder.layers.18 0
44
+ vision_model.encoder.layers.19 0
45
+ vision_model.encoder.layers.20 0
46
+ vision_model.encoder.layers.21 0
47
+ vision_model.encoder.layers.22 0
48
+ vision_model.encoder.layers.23 0
49
+ vision_model.embeddings 0
50
+ mlp1 0
51
+ language_model.model.tok_embeddings 4
52
+ language_model.model.norm 4
53
+ language_model.output 4
54
+ language_model.model.embed_tokens 4
55
+ language_model.lm_head 4
56
+ The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.
57
+ The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.
58
+ The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.
59
+ The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.
60
+ Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
61
+ Rank [3] Begin to eval model work_dirs/share_internvl/InternVL2-2B on task CharacterOrder, devices: {device(type='cuda', index=3), device(type='cuda', index=7)}
62
+ Initialization Finished
63
+ Predicting CharacterOrder Using internvl
64
+ Proceeding 29-length images samples | Num: 8
65
+ Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
66
+ Rank [1] Begin to eval model work_dirs/share_internvl/InternVL2-2B on task CharacterOrder, devices: {device(type='cuda', index=1), device(type='cuda', index=5)}
67
+ Initialization Finished
68
+ Predicting CharacterOrder Using internvl
69
+ Proceeding 29-length images samples | Num: 8
70
+ Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
71
+ Rank [2] Begin to eval model work_dirs/share_internvl/InternVL2-2B on task CharacterOrder, devices: {device(type='cuda', index=2), device(type='cuda', index=6)}
72
+ Initialization Finished
73
+ Predicting CharacterOrder Using internvl
74
+ Proceeding 29-length images samples | Num: 8
75
+ Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
76
+ Rank [0] Begin to eval model work_dirs/share_internvl/InternVL2-2B on task CharacterOrder, devices: {device(type='cuda', index=0), device(type='cuda', index=4)}
77
+ Initialization Finished
78
+ Predicting CharacterOrder Using internvl
79
+ Proceeding 29-length images samples | Num: 8
80
+
81
  0%| | 0/2 [00:00<?, ?it/s]
82
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:03<00:03, 3.04s/it]
83
+ Proceeding 27-length images samples | Num: 5
84
+
85
  0%| | 0/2 [00:00<?, ?it/s]
86
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:03<00:03, 3.03s/it]
87
+ Proceeding 27-length images samples | Num: 5
88
+
89
  0%| | 0/2 [00:00<?, ?it/s]
90
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:03<00:03, 3.54s/it]
91
+ Proceeding 27-length images samples | Num: 5
92
+ Proceeding 27-length images samples | Num: 5
93
+
94
  0%| | 0/1 [00:00<?, ?it/s]
95
+ Proceeding 23-length images samples | Num: 9
96
+
97
  0%| | 0/1 [00:00<?, ?it/s]
98
+ Proceeding 23-length images samples | Num: 9
99
+
100
  0%| | 0/1 [00:00<?, ?it/s]
101
+ Proceeding 23-length images samples | Num: 9
102
+ Proceeding 23-length images samples | Num: 9
103
+
104
  0%| | 0/2 [00:00<?, ?it/s]
105
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.64s/it]
106
+ Proceeding 25-length images samples | Num: 12
107
+
108
  0%| | 0/2 [00:00<?, ?it/s]
109
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.67s/it]
110
+ Proceeding 25-length images samples | Num: 12
111
+
112
  0%| | 0/2 [00:00<?, ?it/s]
113
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:02<00:02, 2.00s/it]
114
+ Proceeding 25-length images samples | Num: 12
115
+ Proceeding 25-length images samples | Num: 12
116
+
117
  0%| | 0/3 [00:00<?, ?it/s]
118
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:02<00:04, 2.15s/it]
119
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:03<00:01, 1.68s/it]
120
+ Proceeding 28-length images samples | Num: 12
121
+ Proceeding 28-length images samples | Num: 12
122
+
123
  0%| | 0/3 [00:00<?, ?it/s]
124
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:01<00:03, 1.62s/it]
125
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:03<00:01, 1.64s/it]
126
+
127
  0%| | 0/3 [00:00<?, ?it/s]
128
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:02<00:05, 2.63s/it]
129
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:04<00:02, 2.32s/it]
130
+ Proceeding 28-length images samples | Num: 12
131
+ Proceeding 28-length images samples | Num: 12
132
+
133
  0%| | 0/3 [00:00<?, ?it/s]
134
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:02<00:04, 2.17s/it]
135
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:04<00:02, 2.13s/it]
136
+ Proceeding 16-length images samples | Num: 3
137
+
138
  0%| | 0/3 [00:00<?, ?it/s]
139
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:02<00:04, 2.42s/it]
140
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:04<00:02, 2.29s/it]
141
+ Proceeding 16-length images samples | Num: 3
142
+
143
  0%| | 0/1 [00:00<?, ?it/s]
144
+ Proceeding 20-length images samples | Num: 6
145
+
146
  0%| | 0/1 [00:00<?, ?it/s]
147
+ Proceeding 20-length images samples | Num: 6
148
+
149
  0%| | 0/1 [00:00<?, ?it/s]
150
+ Proceeding 19-length images samples | Num: 7
151
+
152
  0%| | 0/3 [00:00<?, ?it/s]
153
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:01<00:03, 1.94s/it]
154
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:04<00:02, 2.43s/it]
155
+ Proceeding 16-length images samples | Num: 3
156
+
157
+ Proceeding 20-length images samples | Num: 6
158
+
159
  0%| | 0/2 [00:00<?, ?it/s]
160
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.61s/it]
161
+ Proceeding 19-length images samples | Num: 7
162
+ Proceeding 16-length images samples | Num: 3
163
+
164
  0%| | 0/1 [00:00<?, ?it/s]
165
+ Proceeding 19-length images samples | Num: 7
166
+
167
  0%| | 0/2 [00:00<?, ?it/s]
168
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.14s/it]
169
+ Proceeding 35-length images samples | Num: 20
170
+ Proceeding 20-length images samples | Num: 6
171
+
172
  0%| | 0/1 [00:00<?, ?it/s]
173
+ Proceeding 35-length images samples | Num: 20
174
+
175
  0%| | 0/2 [00:00<?, ?it/s]
176
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.40s/it]
177
+ Proceeding 35-length images samples | Num: 20
178
+ Proceeding 19-length images samples | Num: 7
179
+ Proceeding 35-length images samples | Num: 20
180
+
181
  0%| | 0/5 [00:00<?, ?it/s]
182
  20%|β–ˆβ–ˆ | 1/5 [00:03<00:12, 3.01s/it]
183
  40%|β–ˆβ–ˆβ–ˆβ–ˆ | 2/5 [00:05<00:07, 2.61s/it]
184
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 3/5 [00:07<00:04, 2.44s/it]
185
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 4/5 [00:09<00:02, 2.41s/it]
186
+ Proceeding 17-length images samples | Num: 4
187
+
188
  0%| | 0/5 [00:00<?, ?it/s]
189
  20%|β–ˆβ–ˆ | 1/5 [00:03<00:13, 3.26s/it]
190
  40%|β–ˆβ–ˆβ–ˆβ–ˆ | 2/5 [00:05<00:07, 2.57s/it]
191
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 3/5 [00:07<00:04, 2.37s/it]
192
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 4/5 [00:09<00:02, 2.09s/it]
193
+ Proceeding 17-length images samples | Num: 4
194
+
195
  0%| | 0/1 [00:00<?, ?it/s]
196
+ Proceeding 26-length images samples | Num: 11
197
+
198
  0%| | 0/1 [00:00<?, ?it/s]
199
+ Proceeding 26-length images samples | Num: 11
200
+
201
  0%| | 0/5 [00:00<?, ?it/s]
202
  20%|β–ˆβ–ˆ | 1/5 [00:02<00:10, 2.68s/it]
203
  40%|β–ˆβ–ˆβ–ˆβ–ˆ | 2/5 [00:04<00:07, 2.43s/it]
204
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 3/5 [00:07<00:04, 2.40s/it]
205
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 4/5 [00:09<00:02, 2.35s/it]
206
+ Proceeding 17-length images samples | Num: 4
207
+
208
  0%| | 0/1 [00:00<?, ?it/s]
209
+ Proceeding 26-length images samples | Num: 11
210
+ Proceeding 17-length images samples | Num: 4
211
+
212
  0%| | 0/2 [00:00<?, ?it/s]
213
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.96s/it]
214
+ Proceeding 22-length images samples | Num: 9
215
+ Proceeding 26-length images samples | Num: 11
216
+
217
  0%| | 0/3 [00:00<?, ?it/s]
218
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:01<00:03, 1.80s/it]
219
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:03<00:02, 2.03s/it]
220
+ Proceeding 22-length images samples | Num: 9
221
+
222
  0%| | 0/3 [00:00<?, ?it/s]
223
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:01<00:03, 1.85s/it]
224
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:03<00:01, 1.74s/it]
225
+ Proceeding 22-length images samples | Num: 9
226
+
227
  0%| | 0/2 [00:00<?, ?it/s]
228
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.91s/it]
229
+ Proceeding 30-length images samples | Num: 5
230
+
231
  0%| | 0/2 [00:00<?, ?it/s]
232
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.30s/it]
233
+ Proceeding 30-length images samples | Num: 5
234
+
235
  0%| | 0/2 [00:00<?, ?it/s]
236
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.69s/it]
237
+ Proceeding 30-length images samples | Num: 5
238
+
239
  0%| | 0/1 [00:00<?, ?it/s]
240
+ Proceeding 24-length images samples | Num: 12
241
+ Proceeding 22-length images samples | Num: 9
242
+
243
  0%| | 0/1 [00:00<?, ?it/s]
244
+ Proceeding 24-length images samples | Num: 12
245
+
246
  0%| | 0/1 [00:00<?, ?it/s]
247
+ Proceeding 24-length images samples | Num: 12
248
+ Proceeding 30-length images samples | Num: 5
249
+
250
  0%| | 0/3 [00:00<?, ?it/s]
251
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:01<00:03, 1.83s/it]
252
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:03<00:01, 1.95s/it]
253
+ Proceeding 34-length images samples | Num: 28
254
+
255
  0%| | 0/3 [00:00<?, ?it/s]
256
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:01<00:03, 1.91s/it]
257
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:03<00:01, 1.84s/it]
258
+ Proceeding 34-length images samples | Num: 28
259
+
260
  0%| | 0/3 [00:00<?, ?it/s]
261
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:01<00:03, 1.94s/it]
262
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:03<00:01, 1.64s/it]
263
+ Proceeding 34-length images samples | Num: 28
264
+ Proceeding 24-length images samples | Num: 12
265
+ Proceeding 34-length images samples | Num: 28
266
+
267
  0%| | 0/7 [00:00<?, ?it/s]
268
  14%|β–ˆβ– | 1/7 [00:02<00:17, 2.97s/it]
269
  29%|β–ˆβ–ˆβ–Š | 2/7 [00:05<00:14, 2.80s/it]
270
  43%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 3/7 [00:08<00:10, 2.69s/it]
271
  57%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 4/7 [00:10<00:07, 2.47s/it]
272
  71%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5/7 [00:12<00:04, 2.34s/it]
273
  86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 6/7 [00:14<00:02, 2.19s/it]
274
+ Proceeding 10-length images samples | Num: 3
275
+
276
+ Proceeding 21-length images samples | Num: 8
277
+
278
  0%| | 0/7 [00:00<?, ?it/s]
279
  14%|β–ˆβ– | 1/7 [00:03<00:20, 3.43s/it]
280
  29%|β–ˆβ–ˆβ–Š | 2/7 [00:05<00:13, 2.72s/it]
281
  43%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 3/7 [00:07<00:09, 2.49s/it]
282
  57%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 4/7 [00:10<00:08, 2.68s/it]
283
  71%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5/7 [00:12<00:04, 2.44s/it]
284
  86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 6/7 [00:15<00:02, 2.49s/it]
285
+ Proceeding 10-length images samples | Num: 3
286
+
287
  0%| | 0/7 [00:00<?, ?it/s]
288
  14%|β–ˆβ– | 1/7 [00:02<00:12, 2.15s/it]
289
  29%|β–ˆβ–ˆβ–Š | 2/7 [00:04<00:12, 2.47s/it]
290
  43%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 3/7 [00:07<00:10, 2.57s/it]
291
  57%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 4/7 [00:09<00:07, 2.43s/it]
292
  71%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5/7 [00:11<00:04, 2.33s/it]
293
  86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 6/7 [00:14<00:02, 2.35s/it]
294
+ Proceeding 10-length images samples | Num: 3
295
+
296
  0%| | 0/1 [00:00<?, ?it/s]
297
+ Proceeding 21-length images samples | Num: 8
298
+
299
  0%| | 0/1 [00:00<?, ?it/s]
300
+ Proceeding 21-length images samples | Num: 8
301
+
302
  0%| | 0/2 [00:00<?, ?it/s]
303
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.87s/it]
304
+ Proceeding 12-length images samples | Num: 3
305
+
306
+ Proceeding 32-length images samples | Num: 13
307
+
308
  0%| | 0/2 [00:00<?, ?it/s]
309
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:02<00:02, 2.75s/it]
310
+ Proceeding 12-length images samples | Num: 3
311
+
312
  0%| | 0/2 [00:00<?, ?it/s]
313
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:02<00:02, 2.52s/it]
314
+ Proceeding 12-length images samples | Num: 3
315
+
316
  0%| | 0/1 [00:00<?, ?it/s]
317
+ Proceeding 32-length images samples | Num: 13
318
+ Proceeding 32-length images samples | Num: 13
319
+
320
  0%| | 0/1 [00:00<?, ?it/s]
321
+ Proceeding 10-length images samples | Num: 3
322
+ Proceeding 21-length images samples | Num: 8
323
+
324
  0%| | 0/3 [00:00<?, ?it/s]
325
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:02<00:05, 2.83s/it]
326
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:04<00:02, 2.08s/it]
327
+ Proceeding 33-length images samples | Num: 3
328
+
329
+ Proceeding 15-length images samples | Num: 3
330
+
331
+ Proceeding 14-length images samples | Num: 3
332
+
333
+ Proceeding 13-length images samples | Num: 2
334
+
335
+ Proceeding 31-length images samples | Num: 8
336
+ Proceeding 12-length images samples | Num: 3
337
+
338
  0%| | 0/3 [00:00<?, ?it/s]
339
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:02<00:05, 2.50s/it]
340
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:03<00:01, 1.80s/it]
341
+ Proceeding 33-length images samples | Num: 3
342
+
343
  0%| | 0/3 [00:00<?, ?it/s]
344
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:01<00:03, 1.85s/it]
345
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:03<00:01, 1.94s/it]
346
+ Proceeding 33-length images samples | Num: 3
347
+ Proceeding 32-length images samples | Num: 13
348
+
349
  0%| | 0/1 [00:00<?, ?it/s]
350
+ Proceeding 15-length images samples | Num: 3
351
+
352
  0%| | 0/2 [00:00<?, ?it/s]
353
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:02<00:02, 2.28s/it]
354
+ Proceeding 18-length images samples | Num: 1
355
+
356
+ Proceeding 9-length images samples | Num: 2
357
+
358
  0%| | 0/1 [00:00<?, ?it/s]
359
+ Proceeding 15-length images samples | Num: 3
360
+
361
+
362
  0%| | 0/1 [00:00<?, ?it/s]
363
+ Proceeding 14-length images samples | Num: 3
364
+
365
  0%| | 0/1 [00:00<?, ?it/s]
366
+ Proceeding 14-length images samples | Num: 3
367
+
368
  0%| | 0/1 [00:00<?, ?it/s]
369
+ Proceeding 13-length images samples | Num: 2
370
+
371
+ Proceeding 31-length images samples | Num: 8
372
+ Proceeding 13-length images samples | Num: 2
373
+
374
  0%| | 0/1 [00:00<?, ?it/s]
375
+
376
  0%| | 0/1 [00:00<?, ?it/s]
377
+ Proceeding 31-length images samples | Num: 8
378
+ Proceeding 33-length images samples | Num: 3
379
+ Proceeding 18-length images samples | Num: 1
380
+
381
  0%| | 0/2 [00:00<?, ?it/s]
382
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.75s/it]
383
+
384
+ Proceeding 9-length images samples | Num: 2
385
+
386
+ Proceeding 15-length images samples | Num: 3
387
+
388
  0%| | 0/2 [00:00<?, ?it/s]
389
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:02<00:02, 2.19s/it]
390
+ Proceeding 18-length images samples | Num: 1
391
+
392
+ Proceeding 9-length images samples | Num: 2
393
+ Proceeding 14-length images samples | Num: 3
394
+
395
  0%| | 0/1 [00:00<?, ?it/s]
396
+ Proceeding 13-length images samples | Num: 2
397
+ Proceeding 31-length images samples | Num: 8
398
+ Proceeding 18-length images samples | Num: 1
399
+ Proceeding 9-length images samples | Num: 2
400
+ evaluating CharacterOrder ...
401
+ Results saved to work_dirs/share_internvl/InternVL2-2B/eval_milebench/CharacterOrder/CharacterOrder_240803234555.json
402
+ python eval/milebench/evaluate.py --data-dir /mnt/inspurfs/share_data/wangweiyun/share_data/long-context-benchmark/MileBench/datasets--FreedomIntelligence--MileBench/snapshots/53c7a58051ef88bacf76541d91f03f5ba2d71e7d --dataset CharacterOrder --result-dir work_dirs/share_internvl/InternVL2-2B/eval_milebench/CharacterOrder
403
+ internvl: CharacterOrder: {'Accuracy': 0.53, 'image_quantity_level-Accuracy': {'Few': 0, 'Medium': 0.5514705882352942, 'Many': 0.484375}, 'image_quantity_level-Result': {'Few': [0, 0], 'Medium': [75, 136], 'Many': [31, 64]}}
eval_milebench/CharacterOrder/CharacterOrder_240803234555.json ADDED
The diff for this file is too large to render. See raw diff
 
eval_milebench/CharacterOrder/eval.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"Accuracy": 0.53, "image_quantity_level-Accuracy": {"Few": 0, "Medium": 0.5514705882352942, "Many": 0.484375}, "image_quantity_level-Result": {"Few": [0, 0], "Medium": [75, 136], "Many": [31, 64]}}
eval_milebench/CharacterOrder/eval_score.json ADDED
@@ -0,0 +1,802 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "id": "0",
4
+ "score": "1"
5
+ },
6
+ {
7
+ "id": "4",
8
+ "score": "1"
9
+ },
10
+ {
11
+ "id": "1",
12
+ "score": "0"
13
+ },
14
+ {
15
+ "id": "16",
16
+ "score": "1"
17
+ },
18
+ {
19
+ "id": "2",
20
+ "score": "1"
21
+ },
22
+ {
23
+ "id": "18",
24
+ "score": "0"
25
+ },
26
+ {
27
+ "id": "30",
28
+ "score": "1"
29
+ },
30
+ {
31
+ "id": "3",
32
+ "score": "0"
33
+ },
34
+ {
35
+ "id": "7",
36
+ "score": "1"
37
+ },
38
+ {
39
+ "id": "8",
40
+ "score": "0"
41
+ },
42
+ {
43
+ "id": "5",
44
+ "score": "0"
45
+ },
46
+ {
47
+ "id": "15",
48
+ "score": "0"
49
+ },
50
+ {
51
+ "id": "43",
52
+ "score": "0"
53
+ },
54
+ {
55
+ "id": "6",
56
+ "score": "0"
57
+ },
58
+ {
59
+ "id": "10",
60
+ "score": "1"
61
+ },
62
+ {
63
+ "id": "17",
64
+ "score": "1"
65
+ },
66
+ {
67
+ "id": "12",
68
+ "score": "0"
69
+ },
70
+ {
71
+ "id": "14",
72
+ "score": "0"
73
+ },
74
+ {
75
+ "id": "13",
76
+ "score": "0"
77
+ },
78
+ {
79
+ "id": "22",
80
+ "score": "0"
81
+ },
82
+ {
83
+ "id": "31",
84
+ "score": "1"
85
+ },
86
+ {
87
+ "id": "33",
88
+ "score": "0"
89
+ },
90
+ {
91
+ "id": "46",
92
+ "score": "1"
93
+ },
94
+ {
95
+ "id": "19",
96
+ "score": "1"
97
+ },
98
+ {
99
+ "id": "21",
100
+ "score": "0"
101
+ },
102
+ {
103
+ "id": "51",
104
+ "score": "1"
105
+ },
106
+ {
107
+ "id": "54",
108
+ "score": "1"
109
+ },
110
+ {
111
+ "id": "23",
112
+ "score": "1"
113
+ },
114
+ {
115
+ "id": "26",
116
+ "score": "1"
117
+ },
118
+ {
119
+ "id": "37",
120
+ "score": "0"
121
+ },
122
+ {
123
+ "id": "24",
124
+ "score": "1"
125
+ },
126
+ {
127
+ "id": "61",
128
+ "score": "1"
129
+ },
130
+ {
131
+ "id": "25",
132
+ "score": "1"
133
+ },
134
+ {
135
+ "id": "35",
136
+ "score": "0"
137
+ },
138
+ {
139
+ "id": "39",
140
+ "score": "1"
141
+ },
142
+ {
143
+ "id": "27",
144
+ "score": "0"
145
+ },
146
+ {
147
+ "id": "29",
148
+ "score": "0"
149
+ },
150
+ {
151
+ "id": "32",
152
+ "score": "1"
153
+ },
154
+ {
155
+ "id": "49",
156
+ "score": "0"
157
+ },
158
+ {
159
+ "id": "65",
160
+ "score": "0"
161
+ },
162
+ {
163
+ "id": "74",
164
+ "score": "1"
165
+ },
166
+ {
167
+ "id": "75",
168
+ "score": "1"
169
+ },
170
+ {
171
+ "id": "28",
172
+ "score": "1"
173
+ },
174
+ {
175
+ "id": "36",
176
+ "score": "0"
177
+ },
178
+ {
179
+ "id": "50",
180
+ "score": "1"
181
+ },
182
+ {
183
+ "id": "40",
184
+ "score": "1"
185
+ },
186
+ {
187
+ "id": "44",
188
+ "score": "0"
189
+ },
190
+ {
191
+ "id": "58",
192
+ "score": "0"
193
+ },
194
+ {
195
+ "id": "69",
196
+ "score": "1"
197
+ },
198
+ {
199
+ "id": "77",
200
+ "score": "1"
201
+ },
202
+ {
203
+ "id": "52",
204
+ "score": "1"
205
+ },
206
+ {
207
+ "id": "53",
208
+ "score": "1"
209
+ },
210
+ {
211
+ "id": "66",
212
+ "score": "1"
213
+ },
214
+ {
215
+ "id": "72",
216
+ "score": "1"
217
+ },
218
+ {
219
+ "id": "84",
220
+ "score": "1"
221
+ },
222
+ {
223
+ "id": "106",
224
+ "score": "1"
225
+ },
226
+ {
227
+ "id": "103",
228
+ "score": "1"
229
+ },
230
+ {
231
+ "id": "107",
232
+ "score": "1"
233
+ },
234
+ {
235
+ "id": "20",
236
+ "score": "1"
237
+ },
238
+ {
239
+ "id": "45",
240
+ "score": "0"
241
+ },
242
+ {
243
+ "id": "79",
244
+ "score": "0"
245
+ },
246
+ {
247
+ "id": "42",
248
+ "score": "1"
249
+ },
250
+ {
251
+ "id": "138",
252
+ "score": "0"
253
+ },
254
+ {
255
+ "id": "9",
256
+ "score": "0"
257
+ },
258
+ {
259
+ "id": "11",
260
+ "score": "0"
261
+ },
262
+ {
263
+ "id": "123",
264
+ "score": "0"
265
+ },
266
+ {
267
+ "id": "64",
268
+ "score": "0"
269
+ },
270
+ {
271
+ "id": "82",
272
+ "score": "1"
273
+ },
274
+ {
275
+ "id": "105",
276
+ "score": "0"
277
+ },
278
+ {
279
+ "id": "94",
280
+ "score": "0"
281
+ },
282
+ {
283
+ "id": "34",
284
+ "score": "0"
285
+ },
286
+ {
287
+ "id": "48",
288
+ "score": "0"
289
+ },
290
+ {
291
+ "id": "38",
292
+ "score": "0"
293
+ },
294
+ {
295
+ "id": "70",
296
+ "score": "1"
297
+ },
298
+ {
299
+ "id": "47",
300
+ "score": "1"
301
+ },
302
+ {
303
+ "id": "60",
304
+ "score": "0"
305
+ },
306
+ {
307
+ "id": "63",
308
+ "score": "1"
309
+ },
310
+ {
311
+ "id": "67",
312
+ "score": "0"
313
+ },
314
+ {
315
+ "id": "76",
316
+ "score": "0"
317
+ },
318
+ {
319
+ "id": "68",
320
+ "score": "1"
321
+ },
322
+ {
323
+ "id": "71",
324
+ "score": "0"
325
+ },
326
+ {
327
+ "id": "73",
328
+ "score": "0"
329
+ },
330
+ {
331
+ "id": "80",
332
+ "score": "1"
333
+ },
334
+ {
335
+ "id": "41",
336
+ "score": "1"
337
+ },
338
+ {
339
+ "id": "59",
340
+ "score": "1"
341
+ },
342
+ {
343
+ "id": "124",
344
+ "score": "1"
345
+ },
346
+ {
347
+ "id": "55",
348
+ "score": "1"
349
+ },
350
+ {
351
+ "id": "57",
352
+ "score": "0"
353
+ },
354
+ {
355
+ "id": "83",
356
+ "score": "1"
357
+ },
358
+ {
359
+ "id": "85",
360
+ "score": "0"
361
+ },
362
+ {
363
+ "id": "86",
364
+ "score": "0"
365
+ },
366
+ {
367
+ "id": "92",
368
+ "score": "0"
369
+ },
370
+ {
371
+ "id": "101",
372
+ "score": "0"
373
+ },
374
+ {
375
+ "id": "114",
376
+ "score": "0"
377
+ },
378
+ {
379
+ "id": "120",
380
+ "score": "1"
381
+ },
382
+ {
383
+ "id": "125",
384
+ "score": "1"
385
+ },
386
+ {
387
+ "id": "144",
388
+ "score": "0"
389
+ },
390
+ {
391
+ "id": "90",
392
+ "score": "1"
393
+ },
394
+ {
395
+ "id": "126",
396
+ "score": "1"
397
+ },
398
+ {
399
+ "id": "111",
400
+ "score": "0"
401
+ },
402
+ {
403
+ "id": "78",
404
+ "score": "0"
405
+ },
406
+ {
407
+ "id": "88",
408
+ "score": "0"
409
+ },
410
+ {
411
+ "id": "98",
412
+ "score": "0"
413
+ },
414
+ {
415
+ "id": "116",
416
+ "score": "1"
417
+ },
418
+ {
419
+ "id": "95",
420
+ "score": "0"
421
+ },
422
+ {
423
+ "id": "113",
424
+ "score": "1"
425
+ },
426
+ {
427
+ "id": "152",
428
+ "score": "1"
429
+ },
430
+ {
431
+ "id": "108",
432
+ "score": "1"
433
+ },
434
+ {
435
+ "id": "115",
436
+ "score": "1"
437
+ },
438
+ {
439
+ "id": "154",
440
+ "score": "1"
441
+ },
442
+ {
443
+ "id": "56",
444
+ "score": "1"
445
+ },
446
+ {
447
+ "id": "110",
448
+ "score": "0"
449
+ },
450
+ {
451
+ "id": "81",
452
+ "score": "1"
453
+ },
454
+ {
455
+ "id": "141",
456
+ "score": "1"
457
+ },
458
+ {
459
+ "id": "146",
460
+ "score": "1"
461
+ },
462
+ {
463
+ "id": "142",
464
+ "score": "1"
465
+ },
466
+ {
467
+ "id": "161",
468
+ "score": "1"
469
+ },
470
+ {
471
+ "id": "177",
472
+ "score": "0"
473
+ },
474
+ {
475
+ "id": "132",
476
+ "score": "1"
477
+ },
478
+ {
479
+ "id": "134",
480
+ "score": "0"
481
+ },
482
+ {
483
+ "id": "145",
484
+ "score": "1"
485
+ },
486
+ {
487
+ "id": "182",
488
+ "score": "0"
489
+ },
490
+ {
491
+ "id": "62",
492
+ "score": "1"
493
+ },
494
+ {
495
+ "id": "117",
496
+ "score": "0"
497
+ },
498
+ {
499
+ "id": "151",
500
+ "score": "0"
501
+ },
502
+ {
503
+ "id": "87",
504
+ "score": "0"
505
+ },
506
+ {
507
+ "id": "97",
508
+ "score": "0"
509
+ },
510
+ {
511
+ "id": "100",
512
+ "score": "1"
513
+ },
514
+ {
515
+ "id": "127",
516
+ "score": "1"
517
+ },
518
+ {
519
+ "id": "131",
520
+ "score": "1"
521
+ },
522
+ {
523
+ "id": "91",
524
+ "score": "1"
525
+ },
526
+ {
527
+ "id": "96",
528
+ "score": "0"
529
+ },
530
+ {
531
+ "id": "102",
532
+ "score": "0"
533
+ },
534
+ {
535
+ "id": "121",
536
+ "score": "1"
537
+ },
538
+ {
539
+ "id": "104",
540
+ "score": "1"
541
+ },
542
+ {
543
+ "id": "156",
544
+ "score": "1"
545
+ },
546
+ {
547
+ "id": "140",
548
+ "score": "1"
549
+ },
550
+ {
551
+ "id": "93",
552
+ "score": "0"
553
+ },
554
+ {
555
+ "id": "109",
556
+ "score": "0"
557
+ },
558
+ {
559
+ "id": "162",
560
+ "score": "1"
561
+ },
562
+ {
563
+ "id": "128",
564
+ "score": "1"
565
+ },
566
+ {
567
+ "id": "130",
568
+ "score": "1"
569
+ },
570
+ {
571
+ "id": "137",
572
+ "score": "0"
573
+ },
574
+ {
575
+ "id": "139",
576
+ "score": "1"
577
+ },
578
+ {
579
+ "id": "148",
580
+ "score": "0"
581
+ },
582
+ {
583
+ "id": "150",
584
+ "score": "1"
585
+ },
586
+ {
587
+ "id": "159",
588
+ "score": "1"
589
+ },
590
+ {
591
+ "id": "171",
592
+ "score": "0"
593
+ },
594
+ {
595
+ "id": "143",
596
+ "score": "0"
597
+ },
598
+ {
599
+ "id": "153",
600
+ "score": "0"
601
+ },
602
+ {
603
+ "id": "160",
604
+ "score": "1"
605
+ },
606
+ {
607
+ "id": "99",
608
+ "score": "1"
609
+ },
610
+ {
611
+ "id": "118",
612
+ "score": "0"
613
+ },
614
+ {
615
+ "id": "133",
616
+ "score": "1"
617
+ },
618
+ {
619
+ "id": "122",
620
+ "score": "0"
621
+ },
622
+ {
623
+ "id": "164",
624
+ "score": "0"
625
+ },
626
+ {
627
+ "id": "170",
628
+ "score": "0"
629
+ },
630
+ {
631
+ "id": "136",
632
+ "score": "0"
633
+ },
634
+ {
635
+ "id": "157",
636
+ "score": "1"
637
+ },
638
+ {
639
+ "id": "112",
640
+ "score": "1"
641
+ },
642
+ {
643
+ "id": "119",
644
+ "score": "1"
645
+ },
646
+ {
647
+ "id": "89",
648
+ "score": "0"
649
+ },
650
+ {
651
+ "id": "163",
652
+ "score": "1"
653
+ },
654
+ {
655
+ "id": "198",
656
+ "score": "1"
657
+ },
658
+ {
659
+ "id": "188",
660
+ "score": "1"
661
+ },
662
+ {
663
+ "id": "194",
664
+ "score": "0"
665
+ },
666
+ {
667
+ "id": "196",
668
+ "score": "0"
669
+ },
670
+ {
671
+ "id": "147",
672
+ "score": "0"
673
+ },
674
+ {
675
+ "id": "189",
676
+ "score": "0"
677
+ },
678
+ {
679
+ "id": "193",
680
+ "score": "0"
681
+ },
682
+ {
683
+ "id": "181",
684
+ "score": "1"
685
+ },
686
+ {
687
+ "id": "191",
688
+ "score": "1"
689
+ },
690
+ {
691
+ "id": "135",
692
+ "score": "0"
693
+ },
694
+ {
695
+ "id": "168",
696
+ "score": "1"
697
+ },
698
+ {
699
+ "id": "178",
700
+ "score": "1"
701
+ },
702
+ {
703
+ "id": "184",
704
+ "score": "1"
705
+ },
706
+ {
707
+ "id": "192",
708
+ "score": "1"
709
+ },
710
+ {
711
+ "id": "167",
712
+ "score": "0"
713
+ },
714
+ {
715
+ "id": "129",
716
+ "score": "0"
717
+ },
718
+ {
719
+ "id": "169",
720
+ "score": "1"
721
+ },
722
+ {
723
+ "id": "165",
724
+ "score": "0"
725
+ },
726
+ {
727
+ "id": "187",
728
+ "score": "0"
729
+ },
730
+ {
731
+ "id": "197",
732
+ "score": "1"
733
+ },
734
+ {
735
+ "id": "172",
736
+ "score": "1"
737
+ },
738
+ {
739
+ "id": "174",
740
+ "score": "1"
741
+ },
742
+ {
743
+ "id": "199",
744
+ "score": "1"
745
+ },
746
+ {
747
+ "id": "166",
748
+ "score": "0"
749
+ },
750
+ {
751
+ "id": "173",
752
+ "score": "0"
753
+ },
754
+ {
755
+ "id": "175",
756
+ "score": "1"
757
+ },
758
+ {
759
+ "id": "176",
760
+ "score": "0"
761
+ },
762
+ {
763
+ "id": "180",
764
+ "score": "0"
765
+ },
766
+ {
767
+ "id": "183",
768
+ "score": "0"
769
+ },
770
+ {
771
+ "id": "190",
772
+ "score": "0"
773
+ },
774
+ {
775
+ "id": "185",
776
+ "score": "1"
777
+ },
778
+ {
779
+ "id": "186",
780
+ "score": "0"
781
+ },
782
+ {
783
+ "id": "149",
784
+ "score": "1"
785
+ },
786
+ {
787
+ "id": "155",
788
+ "score": "1"
789
+ },
790
+ {
791
+ "id": "195",
792
+ "score": "1"
793
+ },
794
+ {
795
+ "id": "158",
796
+ "score": "0"
797
+ },
798
+ {
799
+ "id": "179",
800
+ "score": "0"
801
+ }
802
+ ]
eval_milebench/CharacterOrder/pred.json ADDED
The diff for this file is too large to render. See raw diff
 
eval_milebench/CharacterOrder/pred_with_extracted.json ADDED
The diff for this file is too large to render. See raw diff
 
eval_milebench/CounterfactualInference.log ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0
  0%| | 0/50 [00:00<?, ?it/s]
1
  2%|▏ | 1/50 [00:01<01:24, 1.72s/it]
2
  4%|▍ | 2/50 [00:02<00:56, 1.18s/it]
3
  6%|β–Œ | 3/50 [00:03<00:46, 1.02it/s]
4
  8%|β–Š | 4/50 [00:03<00:36, 1.25it/s]
5
  10%|β–ˆ | 5/50 [00:04<00:28, 1.58it/s]
6
  12%|β–ˆβ– | 6/50 [00:04<00:22, 1.97it/s]
7
  14%|β–ˆβ– | 7/50 [00:04<00:19, 2.18it/s]
8
  16%|β–ˆβ–Œ | 8/50 [00:05<00:17, 2.39it/s]
9
  18%|β–ˆβ–Š | 9/50 [00:05<00:16, 2.52it/s]
10
  20%|β–ˆβ–ˆ | 10/50 [00:05<00:13, 3.00it/s]
11
  22%|β–ˆβ–ˆβ– | 11/50 [00:05<00:11, 3.50it/s]
12
  24%|β–ˆβ–ˆβ– | 12/50 [00:06<00:11, 3.23it/s]
13
  26%|β–ˆβ–ˆβ–Œ | 13/50 [00:06<00:09, 3.73it/s]
14
  28%|β–ˆβ–ˆβ–Š | 14/50 [00:06<00:10, 3.59it/s]
15
  30%|β–ˆβ–ˆβ–ˆ | 15/50 [00:06<00:08, 3.98it/s]
16
  32%|β–ˆβ–ˆβ–ˆβ– | 16/50 [00:06<00:07, 4.43it/s]
17
  34%|β–ˆβ–ˆβ–ˆβ– | 17/50 [00:07<00:06, 4.82it/s]
18
  36%|β–ˆβ–ˆβ–ˆβ–Œ | 18/50 [00:07<00:06, 4.76it/s]
19
  38%|β–ˆβ–ˆβ–ˆβ–Š | 19/50 [00:07<00:06,
20
  0%| | 0/50 [00:00<?, ?it/s]
21
  2%|▏ | 1/50 [00:01<01:24, 1.72s/it]
22
  4%|▍ | 2/50 [00:02<00:55, 1.17s/it]
23
  6%|β–Œ | 3/50 [00:03<00:45, 1.03it/s]
24
  8%|β–Š | 4/50 [00:03<00:36, 1.27it/s]
25
  10%|β–ˆ | 5/50 [00:04<00:31, 1.42it/s]
26
  12%|β–ˆβ– | 6/50 [00:04<00:26, 1.66it/s]
27
  14%|β–ˆβ– | 7/50 [00:04<00:19, 2.17it/s]
28
  16%|β–ˆβ–Œ | 8/50 [00:05<00:20, 2.09it/s]
29
  18%|β–ˆβ–Š | 9/50 [00:05<00:15, 2.60it/s]
30
  20%|β–ˆβ–ˆ | 10/50 [00:05<00:12, 3.10it/s]
31
  22%|β–ˆβ–ˆβ– | 11/50 [00:06<00:13, 2.79it/s]
32
  24%|β–ˆβ–ˆβ– | 12/50 [00:06<00:13, 2.77it/s]
33
  26%|β–ˆβ–ˆβ–Œ | 13/50 [00:07<00:14, 2.51it/s]
34
  28%|β–ˆβ–ˆβ–Š | 14/50 [00:07<00:11, 3.05it/s]
35
  30%|β–ˆβ–ˆβ–ˆ | 15/50 [00:07<00:10, 3.38it/s]
36
  32%|β–ˆβ–ˆβ–ˆβ– | 16/50 [00:08<00:13, 2.52it/s]
37
  34%|β–ˆβ–ˆβ–ˆβ– | 17/50 [00:08<00:15, 2.10it/s]
38
  36%|β–ˆβ–ˆβ–ˆβ–Œ | 18/50 [00:09<00:15, 2.07it/s]
39
  38%|β–ˆβ–ˆβ–ˆβ–Š | 19/50 [00:09<00:14,
40
  0%| | 0/50 [00:00<?, ?it/s]
41
  2%|▏ | 1/50 [00:01<01:24, 1.72s/it]
42
  4%|▍ | 2/50 [00:02<00:56, 1.17s/it]
43
  6%|β–Œ | 3/50 [00:03<00:46, 1.02it/s]
44
  8%|β–Š | 4/50 [00:03<00:36, 1.25it/s]
45
  10%|β–ˆ | 5/50 [00:04<00:28, 1.60it/s]
46
  12%|β–ˆβ– | 6/50 [00:04<00:28, 1.57it/s]
47
  14%|β–ˆβ– | 7/50 [00:05<00:24, 1.77it/s]
48
  16%|β–ˆβ–Œ | 8/50 [00:05<00:19, 2.11it/s]
49
  18%|β–ˆβ–Š | 9/50 [00:05<00:15, 2.61it/s]
50
  20%|β–ˆβ–ˆ | 10/50 [00:05<00:14, 2.84it/s]
51
  22%|β–ˆβ–ˆβ– | 11/50 [00:06<00:13, 3.00it/s]
52
  24%|β–ˆβ–ˆβ– | 12/50 [00:06<00:13, 2.87it/s]
53
  26%|β–ˆβ–ˆβ–Œ | 13/50 [00:07<00:18, 2.04it/s]
54
  28%|β–ˆβ–ˆβ–Š | 14/50 [00:07<00:17, 2.02it/s]
55
  30%|β–ˆβ–ˆβ–ˆ | 15/50 [00:08<00:14, 2.48it/s]
56
  32%|β–ˆβ–ˆβ–ˆβ– | 16/50 [00:08<00:16, 2.10it/s]
57
  34%|β–ˆβ–ˆβ–ˆβ– | 17/50 [00:09<00:15, 2.07it/s]
58
  36%|β–ˆβ–ˆβ–ˆβ–Œ | 18/50 [00:09<00:15, 2.13it/s]
59
  38%|β–ˆβ–ˆβ–ˆβ–Š | 19/50 [00:10<00:14, 5.09it/s]
60
  40%|β–ˆβ–ˆβ–ˆβ–ˆ | 20/50 [00:08<00:08, 3.37it/s]
61
  42%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 21/50 [00:08<00:11, 2.45it/s]
62
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 22/50 [00:09<00:12, 2.28it/s]
63
  46%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 23/50 [00:09<00:11, 2.30it/s]
64
  48%|β–ˆβ–ˆβ–ˆβ–ˆβ–Š | 24/50 [00:10<00:11, 2.18it/s]
65
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 25/50 [00:10<00:10, 2.40it/s]
66
  52%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 26/50 [00:10<00:08, 2.88it/s]
67
  54%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 27/50 [00:10<00:06, 3.37it/s]
68
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 28/50 [00:11<00:05, 3.83it/s]
69
  58%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 29/50 [00:11<00:04, 4.26it/s]
70
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 30/50 [00:11<00:04, 4.66it/s]
71
  62%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 31/50 [00:11<00:03, 4.92it/s]
72
  64%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 32/50 [00:11<00:03, 5.24it/s]
73
  66%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 33/50 [00:11<00:03, 5.51it/s]
74
  68%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 34/50 [00:12<00:02, 5.71it/s]
75
  70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 35/50 [00:12<00:02, 5.30it/s]
76
  72%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 36/50 [00:12<00:02, 5.5 2.07it/s]
77
  40%|β–ˆβ–ˆβ–ˆβ–ˆ | 20/50 [00:10<00:12, 2.40it/s]
78
  42%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 21/50 [00:10<00:09, 2.93it/s]
79
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 22/50 [00:10<00:08, 3.47it/s]
80
  46%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 23/50 [00:10<00:06, 3.97it/s]
81
  48%|β–ˆβ–ˆβ–ˆβ–ˆβ–Š | 24/50 [00:11<00:05, 4.43it/s]
82
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 25/50 [00:11<00:05, 4.78it/s]
83
  52%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 26/50 [00:11<00:04, 5.00it/s]
84
  54%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 27/50 [00:11<00:04, 5.21it/s]
85
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 28/50 [00:11<00:04, 5.29it/s]
86
  58%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 29/50 [00:12<00:03, 5.40it/s]
87
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 30/50 [00:12<00:03, 5.09it/s]
88
  62%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 31/50 [00:12<00:03, 5.35it/s]
89
  64%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 32/50 [00:12<00:03, 5.59it/s]
90
  66%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 33/50 [00:12<00:02, 5.76it/s]
91
  68%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 34/50 [00:12<00:02, 5.94it/s]
92
  70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 35/50 [00:13<00:02, 5.68it/s]
93
  72%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 36/50 [00:13<00:02, 5.8 2.21it/s]
94
  40%|β–ˆβ–ˆβ–ˆβ–ˆ | 20/50 [00:09<00:10, 2.73it/s]
95
  42%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 21/50 [00:10<00:11, 2.57it/s]
96
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 22/50 [00:10<00:10, 2.75it/s]
97
  46%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 23/50 [00:10<00:08, 3.20it/s]
98
  48%|β–ˆβ–ˆβ–ˆβ–ˆβ–Š | 24/50 [00:10<00:07, 3.68it/s]
99
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 25/50 [00:11<00:06, 4.07it/s]
100
  52%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 26/50 [00:11<00:05, 4.35it/s]
101
  54%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 27/50 [00:11<00:05, 3.86it/s]
102
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 28/50 [00:11<00:05, 4.26it/s]
103
  58%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 29/50 [00:11<00:04, 4.66it/s]
104
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 30/50 [00:12<00:04, 4.99it/s]
105
  62%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 31/50 [00:12<00:03, 5.09it/s]
106
  64%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 32/50 [00:12<00:03, 5.26it/s]
107
  66%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 33/50 [00:12<00:03, 5.39it/s]
108
  68%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 34/50 [00:13<00:03, 4.12it/s]
109
  70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 35/50 [00:13<00:03, 4.48it/s]
110
  72%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 36/50 [00:13<00:02, 4.86it/s]
111
  74%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 37/50 [00:12<00:02, 5.74it/s]
112
  76%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 38/50 [00:12<00:02, 5.85it/s]
113
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 39/50 [00:12<00:01, 5.95it/s]
114
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 40/50 [00:13<00:01, 5.88it/s]
115
  82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 41/50 [00:13<00:01, 5.91it/s]
116
  84%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 42/50 [00:13<00:01, 5.81it/s]
117
  86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 43/50 [00:13<00:01, 5.56it/s]
118
  88%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 44/50 [00:13<00:01, 5.56it/s]
119
  90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 45/50 [00:13<00:00, 5.67it/s]
120
  92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 46/50 [00:14<00:00, 5.78it/s]
121
  94%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 47/50 [00:14<00:00, 5.84it/s]
122
  96%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ| 48/50 [00:14<00:00, 5.88it/s]
123
  98%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 49/50 [00:14<00:00, 5.77it/s]
 
124
  74%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 37/50 [00:13<00:02, 5.72it/s]
125
  76%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 38/50 [00:13<00:02, 5.36it/s]
126
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 39/50 [00:13<00:02, 5.49it/s]
127
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 40/50 [00:13<00:01, 5.67it/s]
128
  82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 41/50 [00:14<00:01, 5.80it/s]
129
  84%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 42/50 [00:14<00:01, 5.84it/s]
130
  86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 43/50 [00:14<00:01, 5.92it/s]
131
  88%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 44/50 [00:14<00:01, 5.76it/s]
132
  90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 45/50 [00:14<00:00, 5.69it/s]
133
  92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 46/50 [00:14<00:00, 5.81it/s]
134
  94%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 47/50 [00:15<00:00, 5.86it/s]
135
  96%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ| 48/50 [00:15<00:00, 5.91it/s]
136
  98%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 49/50 [00:15<00:00, 5.91it/s]
 
137
  74%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 37/50 [00:13<00:02, 5.03it/s]
138
  76%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 38/50 [00:13<00:02, 5.31it/s]
139
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 39/50 [00:13<00:02, 5.47it/s]
140
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 40/50 [00:14<00:01, 5.62it/s]
141
  82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 41/50 [00:14<00:01, 5.76it/s]
142
  84%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 42/50 [00:14<00:01, 4.13it/s]
143
  86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 43/50 [00:14<00:01, 4.47it/s]
144
  88%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 44/50 [00:14<00:01, 4.81it/s]
145
  90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 45/50 [00:15<00:00, 5.08it/s]
146
  92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 46/50 [00:15<00:00, 5.30it/s]
147
  94%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 47/50 [00:15<00:00, 5.49it/s]
148
  96%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ| 48/50 [00:15<00:00, 5.50it/s]
149
  98%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 49/50 [00:15<00:00, 5.73it/s]
 
 
 
 
 
1
+ language_model.model.layers.0 4
2
+ language_model.model.layers.1 4
3
+ language_model.model.layers.2 4
4
+ language_model.model.layers.3 4
5
+ language_model.model.layers.4 4
6
+ language_model.model.layers.5 4
7
+ language_model.model.layers.6 4
8
+ language_model.model.layers.7 4
9
+ language_model.model.layers.8 4
10
+ language_model.model.layers.9 4
11
+ language_model.model.layers.10 4
12
+ language_model.model.layers.11 4
13
+ language_model.model.layers.12 4
14
+ language_model.model.layers.13 4
15
+ language_model.model.layers.14 4
16
+ language_model.model.layers.15 4
17
+ language_model.model.layers.16 4
18
+ language_model.model.layers.17 4
19
+ language_model.model.layers.18 4
20
+ language_model.model.layers.19 4
21
+ language_model.model.layers.20 4
22
+ language_model.model.layers.21 4
23
+ language_model.model.layers.22 4
24
+ language_model.model.layers.23 4
25
+ vision_model.encoder.layers.0 0
26
+ vision_model.encoder.layers.1 0
27
+ vision_model.encoder.layers.2 0
28
+ vision_model.encoder.layers.3 0
29
+ vision_model.encoder.layers.4 0
30
+ vision_model.encoder.layers.5 0
31
+ vision_model.encoder.layers.6 0
32
+ vision_model.encoder.layers.7 0
33
+ vision_model.encoder.layers.8 0
34
+ vision_model.encoder.layers.9 0
35
+ vision_model.encoder.layers.10 0
36
+ vision_model.encoder.layers.11 0
37
+ vision_model.encoder.layers.12 0
38
+ vision_model.encoder.layers.13 0
39
+ vision_model.encoder.layers.14 0
40
+ vision_model.encoder.layers.15 0
41
+ vision_model.encoder.layers.16 0
42
+ vision_model.encoder.layers.17 0
43
+ vision_model.encoder.layers.18 0
44
+ vision_model.encoder.layers.19 0
45
+ vision_model.encoder.layers.20 0
46
+ vision_model.encoder.layers.21 0
47
+ vision_model.encoder.layers.22 0
48
+ vision_model.encoder.layers.23 0
49
+ vision_model.embeddings 0
50
+ mlp1 0
51
+ language_model.model.tok_embeddings 4
52
+ language_model.model.norm 4
53
+ language_model.output 4
54
+ language_model.model.embed_tokens 4
55
+ language_model.lm_head 4
56
+ The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.
57
+ The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.
58
+ The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.
59
+ The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.
60
+ Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
61
+ Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
62
+ Rank [2] Begin to eval model work_dirs/share_internvl/InternVL2-2B on task CounterfactualInference, devices: {device(type='cuda', index=2), device(type='cuda', index=6)}
63
+ Rank [0] Begin to eval model work_dirs/share_internvl/InternVL2-2B on task CounterfactualInference, devices: {device(type='cuda', index=0), device(type='cuda', index=4)}
64
+ Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
65
+ Rank [3] Begin to eval model work_dirs/share_internvl/InternVL2-2B on task CounterfactualInference, devices: {device(type='cuda', index=3), device(type='cuda', index=7)}
66
+ Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
67
+ Rank [1] Begin to eval model work_dirs/share_internvl/InternVL2-2B on task CounterfactualInference, devices: {device(type='cuda', index=1), device(type='cuda', index=5)}
68
+ Initialization Finished
69
+ Predicting CounterfactualInference Using internvl
70
+ Proceeding 5-length images samples | Num: 200
71
+ Initialization Finished
72
+ Predicting CounterfactualInference Using internvl
73
+ Proceeding 5-length images samples | Num: 200
74
+ Initialization Finished
75
+ Predicting CounterfactualInference Using internvl
76
+ Proceeding 5-length images samples | Num: 200
77
+ Initialization Finished
78
+ Predicting CounterfactualInference Using internvl
79
+ Proceeding 5-length images samples | Num: 200
80
+
81
  0%| | 0/50 [00:00<?, ?it/s]
82
  2%|▏ | 1/50 [00:01<01:24, 1.72s/it]
83
  4%|▍ | 2/50 [00:02<00:56, 1.18s/it]
84
  6%|β–Œ | 3/50 [00:03<00:46, 1.02it/s]
85
  8%|β–Š | 4/50 [00:03<00:36, 1.25it/s]
86
  10%|β–ˆ | 5/50 [00:04<00:28, 1.58it/s]
87
  12%|β–ˆβ– | 6/50 [00:04<00:22, 1.97it/s]
88
  14%|β–ˆβ– | 7/50 [00:04<00:19, 2.18it/s]
89
  16%|β–ˆβ–Œ | 8/50 [00:05<00:17, 2.39it/s]
90
  18%|β–ˆβ–Š | 9/50 [00:05<00:16, 2.52it/s]
91
  20%|β–ˆβ–ˆ | 10/50 [00:05<00:13, 3.00it/s]
92
  22%|β–ˆβ–ˆβ– | 11/50 [00:05<00:11, 3.50it/s]
93
  24%|β–ˆβ–ˆβ– | 12/50 [00:06<00:11, 3.23it/s]
94
  26%|β–ˆβ–ˆβ–Œ | 13/50 [00:06<00:09, 3.73it/s]
95
  28%|β–ˆβ–ˆβ–Š | 14/50 [00:06<00:10, 3.59it/s]
96
  30%|β–ˆβ–ˆβ–ˆ | 15/50 [00:06<00:08, 3.98it/s]
97
  32%|β–ˆβ–ˆβ–ˆβ– | 16/50 [00:06<00:07, 4.43it/s]
98
  34%|β–ˆβ–ˆβ–ˆβ– | 17/50 [00:07<00:06, 4.82it/s]
99
  36%|β–ˆβ–ˆβ–ˆβ–Œ | 18/50 [00:07<00:06, 4.76it/s]
100
  38%|β–ˆβ–ˆβ–ˆβ–Š | 19/50 [00:07<00:06,
101
  0%| | 0/50 [00:00<?, ?it/s]
102
  2%|▏ | 1/50 [00:01<01:24, 1.72s/it]
103
  4%|▍ | 2/50 [00:02<00:55, 1.17s/it]
104
  6%|β–Œ | 3/50 [00:03<00:45, 1.03it/s]
105
  8%|β–Š | 4/50 [00:03<00:36, 1.27it/s]
106
  10%|β–ˆ | 5/50 [00:04<00:31, 1.42it/s]
107
  12%|β–ˆβ– | 6/50 [00:04<00:26, 1.66it/s]
108
  14%|β–ˆβ– | 7/50 [00:04<00:19, 2.17it/s]
109
  16%|β–ˆβ–Œ | 8/50 [00:05<00:20, 2.09it/s]
110
  18%|β–ˆβ–Š | 9/50 [00:05<00:15, 2.60it/s]
111
  20%|β–ˆβ–ˆ | 10/50 [00:05<00:12, 3.10it/s]
112
  22%|β–ˆβ–ˆβ– | 11/50 [00:06<00:13, 2.79it/s]
113
  24%|β–ˆβ–ˆβ– | 12/50 [00:06<00:13, 2.77it/s]
114
  26%|β–ˆβ–ˆβ–Œ | 13/50 [00:07<00:14, 2.51it/s]
115
  28%|β–ˆβ–ˆβ–Š | 14/50 [00:07<00:11, 3.05it/s]
116
  30%|β–ˆβ–ˆβ–ˆ | 15/50 [00:07<00:10, 3.38it/s]
117
  32%|β–ˆβ–ˆβ–ˆβ– | 16/50 [00:08<00:13, 2.52it/s]
118
  34%|β–ˆβ–ˆβ–ˆβ– | 17/50 [00:08<00:15, 2.10it/s]
119
  36%|β–ˆβ–ˆβ–ˆβ–Œ | 18/50 [00:09<00:15, 2.07it/s]
120
  38%|β–ˆβ–ˆβ–ˆβ–Š | 19/50 [00:09<00:14,
121
  0%| | 0/50 [00:00<?, ?it/s]
122
  2%|▏ | 1/50 [00:01<01:24, 1.72s/it]
123
  4%|▍ | 2/50 [00:02<00:56, 1.17s/it]
124
  6%|β–Œ | 3/50 [00:03<00:46, 1.02it/s]
125
  8%|β–Š | 4/50 [00:03<00:36, 1.25it/s]
126
  10%|β–ˆ | 5/50 [00:04<00:28, 1.60it/s]
127
  12%|β–ˆβ– | 6/50 [00:04<00:28, 1.57it/s]
128
  14%|β–ˆβ– | 7/50 [00:05<00:24, 1.77it/s]
129
  16%|β–ˆβ–Œ | 8/50 [00:05<00:19, 2.11it/s]
130
  18%|β–ˆβ–Š | 9/50 [00:05<00:15, 2.61it/s]
131
  20%|β–ˆβ–ˆ | 10/50 [00:05<00:14, 2.84it/s]
132
  22%|β–ˆβ–ˆβ– | 11/50 [00:06<00:13, 3.00it/s]
133
  24%|β–ˆβ–ˆβ– | 12/50 [00:06<00:13, 2.87it/s]
134
  26%|β–ˆβ–ˆβ–Œ | 13/50 [00:07<00:18, 2.04it/s]
135
  28%|β–ˆβ–ˆβ–Š | 14/50 [00:07<00:17, 2.02it/s]
136
  30%|β–ˆβ–ˆβ–ˆ | 15/50 [00:08<00:14, 2.48it/s]
137
  32%|β–ˆβ–ˆβ–ˆβ– | 16/50 [00:08<00:16, 2.10it/s]
138
  34%|β–ˆβ–ˆβ–ˆβ– | 17/50 [00:09<00:15, 2.07it/s]
139
  36%|β–ˆβ–ˆβ–ˆβ–Œ | 18/50 [00:09<00:15, 2.13it/s]
140
  38%|β–ˆβ–ˆβ–ˆβ–Š | 19/50 [00:10<00:14, 5.09it/s]
141
  40%|β–ˆβ–ˆβ–ˆβ–ˆ | 20/50 [00:08<00:08, 3.37it/s]
142
  42%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 21/50 [00:08<00:11, 2.45it/s]
143
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 22/50 [00:09<00:12, 2.28it/s]
144
  46%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 23/50 [00:09<00:11, 2.30it/s]
145
  48%|β–ˆβ–ˆβ–ˆβ–ˆβ–Š | 24/50 [00:10<00:11, 2.18it/s]
146
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 25/50 [00:10<00:10, 2.40it/s]
147
  52%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 26/50 [00:10<00:08, 2.88it/s]
148
  54%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 27/50 [00:10<00:06, 3.37it/s]
149
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 28/50 [00:11<00:05, 3.83it/s]
150
  58%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 29/50 [00:11<00:04, 4.26it/s]
151
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 30/50 [00:11<00:04, 4.66it/s]
152
  62%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 31/50 [00:11<00:03, 4.92it/s]
153
  64%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 32/50 [00:11<00:03, 5.24it/s]
154
  66%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 33/50 [00:11<00:03, 5.51it/s]
155
  68%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 34/50 [00:12<00:02, 5.71it/s]
156
  70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 35/50 [00:12<00:02, 5.30it/s]
157
  72%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 36/50 [00:12<00:02, 5.5 2.07it/s]
158
  40%|β–ˆβ–ˆβ–ˆβ–ˆ | 20/50 [00:10<00:12, 2.40it/s]
159
  42%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 21/50 [00:10<00:09, 2.93it/s]
160
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 22/50 [00:10<00:08, 3.47it/s]
161
  46%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 23/50 [00:10<00:06, 3.97it/s]
162
  48%|β–ˆβ–ˆβ–ˆβ–ˆβ–Š | 24/50 [00:11<00:05, 4.43it/s]
163
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 25/50 [00:11<00:05, 4.78it/s]
164
  52%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 26/50 [00:11<00:04, 5.00it/s]
165
  54%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 27/50 [00:11<00:04, 5.21it/s]
166
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 28/50 [00:11<00:04, 5.29it/s]
167
  58%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 29/50 [00:12<00:03, 5.40it/s]
168
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 30/50 [00:12<00:03, 5.09it/s]
169
  62%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 31/50 [00:12<00:03, 5.35it/s]
170
  64%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 32/50 [00:12<00:03, 5.59it/s]
171
  66%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 33/50 [00:12<00:02, 5.76it/s]
172
  68%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 34/50 [00:12<00:02, 5.94it/s]
173
  70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 35/50 [00:13<00:02, 5.68it/s]
174
  72%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 36/50 [00:13<00:02, 5.8 2.21it/s]
175
  40%|β–ˆβ–ˆβ–ˆβ–ˆ | 20/50 [00:09<00:10, 2.73it/s]
176
  42%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 21/50 [00:10<00:11, 2.57it/s]
177
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 22/50 [00:10<00:10, 2.75it/s]
178
  46%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 23/50 [00:10<00:08, 3.20it/s]
179
  48%|β–ˆβ–ˆβ–ˆβ–ˆβ–Š | 24/50 [00:10<00:07, 3.68it/s]
180
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 25/50 [00:11<00:06, 4.07it/s]
181
  52%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 26/50 [00:11<00:05, 4.35it/s]
182
  54%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 27/50 [00:11<00:05, 3.86it/s]
183
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 28/50 [00:11<00:05, 4.26it/s]
184
  58%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 29/50 [00:11<00:04, 4.66it/s]
185
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 30/50 [00:12<00:04, 4.99it/s]
186
  62%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 31/50 [00:12<00:03, 5.09it/s]
187
  64%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 32/50 [00:12<00:03, 5.26it/s]
188
  66%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 33/50 [00:12<00:03, 5.39it/s]
189
  68%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 34/50 [00:13<00:03, 4.12it/s]
190
  70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 35/50 [00:13<00:03, 4.48it/s]
191
  72%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 36/50 [00:13<00:02, 4.86it/s]
192
  74%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 37/50 [00:12<00:02, 5.74it/s]
193
  76%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 38/50 [00:12<00:02, 5.85it/s]
194
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 39/50 [00:12<00:01, 5.95it/s]
195
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 40/50 [00:13<00:01, 5.88it/s]
196
  82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 41/50 [00:13<00:01, 5.91it/s]
197
  84%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 42/50 [00:13<00:01, 5.81it/s]
198
  86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 43/50 [00:13<00:01, 5.56it/s]
199
  88%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 44/50 [00:13<00:01, 5.56it/s]
200
  90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 45/50 [00:13<00:00, 5.67it/s]
201
  92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 46/50 [00:14<00:00, 5.78it/s]
202
  94%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 47/50 [00:14<00:00, 5.84it/s]
203
  96%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ| 48/50 [00:14<00:00, 5.88it/s]
204
  98%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 49/50 [00:14<00:00, 5.77it/s]
205
+ 0it/s]
206
  74%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 37/50 [00:13<00:02, 5.72it/s]
207
  76%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 38/50 [00:13<00:02, 5.36it/s]
208
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 39/50 [00:13<00:02, 5.49it/s]
209
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 40/50 [00:13<00:01, 5.67it/s]
210
  82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 41/50 [00:14<00:01, 5.80it/s]
211
  84%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 42/50 [00:14<00:01, 5.84it/s]
212
  86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 43/50 [00:14<00:01, 5.92it/s]
213
  88%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 44/50 [00:14<00:01, 5.76it/s]
214
  90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 45/50 [00:14<00:00, 5.69it/s]
215
  92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 46/50 [00:14<00:00, 5.81it/s]
216
  94%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 47/50 [00:15<00:00, 5.86it/s]
217
  96%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ| 48/50 [00:15<00:00, 5.91it/s]
218
  98%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 49/50 [00:15<00:00, 5.91it/s]
219
+ 5it/s]
220
  74%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 37/50 [00:13<00:02, 5.03it/s]
221
  76%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 38/50 [00:13<00:02, 5.31it/s]
222
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 39/50 [00:13<00:02, 5.47it/s]
223
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 40/50 [00:14<00:01, 5.62it/s]
224
  82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 41/50 [00:14<00:01, 5.76it/s]
225
  84%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 42/50 [00:14<00:01, 4.13it/s]
226
  86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 43/50 [00:14<00:01, 4.47it/s]
227
  88%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 44/50 [00:14<00:01, 4.81it/s]
228
  90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 45/50 [00:15<00:00, 5.08it/s]
229
  92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 46/50 [00:15<00:00, 5.30it/s]
230
  94%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 47/50 [00:15<00:00, 5.49it/s]
231
  96%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ| 48/50 [00:15<00:00, 5.50it/s]
232
  98%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 49/50 [00:15<00:00, 5.73it/s]
233
+ evaluating CounterfactualInference ...
234
+ Results saved to work_dirs/share_internvl/InternVL2-2B/eval_milebench/CounterfactualInference/CounterfactualInference_240803234437.json
235
+ python eval/milebench/evaluate.py --data-dir /mnt/inspurfs/share_data/wangweiyun/share_data/long-context-benchmark/MileBench/datasets--FreedomIntelligence--MileBench/snapshots/53c7a58051ef88bacf76541d91f03f5ba2d71e7d --dataset CounterfactualInference --result-dir work_dirs/share_internvl/InternVL2-2B/eval_milebench/CounterfactualInference
236
+ internvl: CounterfactualInference: {'Accuracy': 0.635, 'image_quantity_level-Accuracy': {'Few': 0.635, 'Medium': 0, 'Many': 0}, 'image_quantity_level-Result': {'Few': [127, 200], 'Medium': [0, 0], 'Many': [0, 0]}}
eval_milebench/CounterfactualInference/CounterfactualInference_240803234437.json ADDED
The diff for this file is too large to render. See raw diff
 
eval_milebench/CounterfactualInference/eval.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"Accuracy": 0.635, "image_quantity_level-Accuracy": {"Few": 0.635, "Medium": 0, "Many": 0}, "image_quantity_level-Result": {"Few": [127, 200], "Medium": [0, 0], "Many": [0, 0]}}
eval_milebench/CounterfactualInference/eval_score.json ADDED
@@ -0,0 +1,802 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "id": "0",
4
+ "score": "1"
5
+ },
6
+ {
7
+ "id": "1",
8
+ "score": "0"
9
+ },
10
+ {
11
+ "id": "2",
12
+ "score": "1"
13
+ },
14
+ {
15
+ "id": "3",
16
+ "score": "0"
17
+ },
18
+ {
19
+ "id": "4",
20
+ "score": "1"
21
+ },
22
+ {
23
+ "id": "5",
24
+ "score": "1"
25
+ },
26
+ {
27
+ "id": "6",
28
+ "score": "1"
29
+ },
30
+ {
31
+ "id": "7",
32
+ "score": "1"
33
+ },
34
+ {
35
+ "id": "8",
36
+ "score": "1"
37
+ },
38
+ {
39
+ "id": "9",
40
+ "score": "0"
41
+ },
42
+ {
43
+ "id": "10",
44
+ "score": "0"
45
+ },
46
+ {
47
+ "id": "11",
48
+ "score": "1"
49
+ },
50
+ {
51
+ "id": "12",
52
+ "score": "1"
53
+ },
54
+ {
55
+ "id": "13",
56
+ "score": "0"
57
+ },
58
+ {
59
+ "id": "14",
60
+ "score": "1"
61
+ },
62
+ {
63
+ "id": "15",
64
+ "score": "0"
65
+ },
66
+ {
67
+ "id": "16",
68
+ "score": "1"
69
+ },
70
+ {
71
+ "id": "17",
72
+ "score": "1"
73
+ },
74
+ {
75
+ "id": "18",
76
+ "score": "1"
77
+ },
78
+ {
79
+ "id": "19",
80
+ "score": "1"
81
+ },
82
+ {
83
+ "id": "20",
84
+ "score": "1"
85
+ },
86
+ {
87
+ "id": "21",
88
+ "score": "0"
89
+ },
90
+ {
91
+ "id": "22",
92
+ "score": "1"
93
+ },
94
+ {
95
+ "id": "23",
96
+ "score": "0"
97
+ },
98
+ {
99
+ "id": "24",
100
+ "score": "1"
101
+ },
102
+ {
103
+ "id": "25",
104
+ "score": "1"
105
+ },
106
+ {
107
+ "id": "26",
108
+ "score": "0"
109
+ },
110
+ {
111
+ "id": "27",
112
+ "score": "1"
113
+ },
114
+ {
115
+ "id": "28",
116
+ "score": "1"
117
+ },
118
+ {
119
+ "id": "29",
120
+ "score": "0"
121
+ },
122
+ {
123
+ "id": "30",
124
+ "score": "1"
125
+ },
126
+ {
127
+ "id": "31",
128
+ "score": "0"
129
+ },
130
+ {
131
+ "id": "32",
132
+ "score": "0"
133
+ },
134
+ {
135
+ "id": "33",
136
+ "score": "1"
137
+ },
138
+ {
139
+ "id": "34",
140
+ "score": "1"
141
+ },
142
+ {
143
+ "id": "35",
144
+ "score": "1"
145
+ },
146
+ {
147
+ "id": "36",
148
+ "score": "1"
149
+ },
150
+ {
151
+ "id": "37",
152
+ "score": "1"
153
+ },
154
+ {
155
+ "id": "38",
156
+ "score": "0"
157
+ },
158
+ {
159
+ "id": "39",
160
+ "score": "0"
161
+ },
162
+ {
163
+ "id": "40",
164
+ "score": "1"
165
+ },
166
+ {
167
+ "id": "41",
168
+ "score": "1"
169
+ },
170
+ {
171
+ "id": "42",
172
+ "score": "0"
173
+ },
174
+ {
175
+ "id": "43",
176
+ "score": "0"
177
+ },
178
+ {
179
+ "id": "44",
180
+ "score": "0"
181
+ },
182
+ {
183
+ "id": "45",
184
+ "score": "0"
185
+ },
186
+ {
187
+ "id": "46",
188
+ "score": "1"
189
+ },
190
+ {
191
+ "id": "47",
192
+ "score": "1"
193
+ },
194
+ {
195
+ "id": "48",
196
+ "score": "0"
197
+ },
198
+ {
199
+ "id": "49",
200
+ "score": "1"
201
+ },
202
+ {
203
+ "id": "50",
204
+ "score": "1"
205
+ },
206
+ {
207
+ "id": "51",
208
+ "score": "1"
209
+ },
210
+ {
211
+ "id": "52",
212
+ "score": "1"
213
+ },
214
+ {
215
+ "id": "53",
216
+ "score": "1"
217
+ },
218
+ {
219
+ "id": "54",
220
+ "score": "0"
221
+ },
222
+ {
223
+ "id": "55",
224
+ "score": "1"
225
+ },
226
+ {
227
+ "id": "56",
228
+ "score": "0"
229
+ },
230
+ {
231
+ "id": "57",
232
+ "score": "1"
233
+ },
234
+ {
235
+ "id": "58",
236
+ "score": "0"
237
+ },
238
+ {
239
+ "id": "59",
240
+ "score": "1"
241
+ },
242
+ {
243
+ "id": "60",
244
+ "score": "1"
245
+ },
246
+ {
247
+ "id": "61",
248
+ "score": "1"
249
+ },
250
+ {
251
+ "id": "62",
252
+ "score": "1"
253
+ },
254
+ {
255
+ "id": "63",
256
+ "score": "0"
257
+ },
258
+ {
259
+ "id": "64",
260
+ "score": "1"
261
+ },
262
+ {
263
+ "id": "65",
264
+ "score": "1"
265
+ },
266
+ {
267
+ "id": "66",
268
+ "score": "1"
269
+ },
270
+ {
271
+ "id": "67",
272
+ "score": "1"
273
+ },
274
+ {
275
+ "id": "68",
276
+ "score": "0"
277
+ },
278
+ {
279
+ "id": "69",
280
+ "score": "0"
281
+ },
282
+ {
283
+ "id": "70",
284
+ "score": "1"
285
+ },
286
+ {
287
+ "id": "71",
288
+ "score": "1"
289
+ },
290
+ {
291
+ "id": "72",
292
+ "score": "1"
293
+ },
294
+ {
295
+ "id": "73",
296
+ "score": "1"
297
+ },
298
+ {
299
+ "id": "74",
300
+ "score": "0"
301
+ },
302
+ {
303
+ "id": "75",
304
+ "score": "1"
305
+ },
306
+ {
307
+ "id": "76",
308
+ "score": "1"
309
+ },
310
+ {
311
+ "id": "77",
312
+ "score": "0"
313
+ },
314
+ {
315
+ "id": "78",
316
+ "score": "0"
317
+ },
318
+ {
319
+ "id": "79",
320
+ "score": "1"
321
+ },
322
+ {
323
+ "id": "80",
324
+ "score": "0"
325
+ },
326
+ {
327
+ "id": "81",
328
+ "score": "1"
329
+ },
330
+ {
331
+ "id": "82",
332
+ "score": "0"
333
+ },
334
+ {
335
+ "id": "83",
336
+ "score": "0"
337
+ },
338
+ {
339
+ "id": "84",
340
+ "score": "1"
341
+ },
342
+ {
343
+ "id": "85",
344
+ "score": "1"
345
+ },
346
+ {
347
+ "id": "86",
348
+ "score": "0"
349
+ },
350
+ {
351
+ "id": "87",
352
+ "score": "1"
353
+ },
354
+ {
355
+ "id": "88",
356
+ "score": "1"
357
+ },
358
+ {
359
+ "id": "89",
360
+ "score": "0"
361
+ },
362
+ {
363
+ "id": "90",
364
+ "score": "0"
365
+ },
366
+ {
367
+ "id": "91",
368
+ "score": "1"
369
+ },
370
+ {
371
+ "id": "92",
372
+ "score": "0"
373
+ },
374
+ {
375
+ "id": "93",
376
+ "score": "1"
377
+ },
378
+ {
379
+ "id": "94",
380
+ "score": "1"
381
+ },
382
+ {
383
+ "id": "95",
384
+ "score": "0"
385
+ },
386
+ {
387
+ "id": "96",
388
+ "score": "1"
389
+ },
390
+ {
391
+ "id": "97",
392
+ "score": "1"
393
+ },
394
+ {
395
+ "id": "98",
396
+ "score": "1"
397
+ },
398
+ {
399
+ "id": "99",
400
+ "score": "0"
401
+ },
402
+ {
403
+ "id": "100",
404
+ "score": "1"
405
+ },
406
+ {
407
+ "id": "101",
408
+ "score": "1"
409
+ },
410
+ {
411
+ "id": "102",
412
+ "score": "1"
413
+ },
414
+ {
415
+ "id": "103",
416
+ "score": "0"
417
+ },
418
+ {
419
+ "id": "104",
420
+ "score": "0"
421
+ },
422
+ {
423
+ "id": "105",
424
+ "score": "1"
425
+ },
426
+ {
427
+ "id": "106",
428
+ "score": "0"
429
+ },
430
+ {
431
+ "id": "107",
432
+ "score": "0"
433
+ },
434
+ {
435
+ "id": "108",
436
+ "score": "0"
437
+ },
438
+ {
439
+ "id": "109",
440
+ "score": "0"
441
+ },
442
+ {
443
+ "id": "110",
444
+ "score": "1"
445
+ },
446
+ {
447
+ "id": "111",
448
+ "score": "1"
449
+ },
450
+ {
451
+ "id": "112",
452
+ "score": "1"
453
+ },
454
+ {
455
+ "id": "113",
456
+ "score": "1"
457
+ },
458
+ {
459
+ "id": "114",
460
+ "score": "1"
461
+ },
462
+ {
463
+ "id": "115",
464
+ "score": "1"
465
+ },
466
+ {
467
+ "id": "116",
468
+ "score": "1"
469
+ },
470
+ {
471
+ "id": "117",
472
+ "score": "0"
473
+ },
474
+ {
475
+ "id": "118",
476
+ "score": "1"
477
+ },
478
+ {
479
+ "id": "119",
480
+ "score": "0"
481
+ },
482
+ {
483
+ "id": "120",
484
+ "score": "1"
485
+ },
486
+ {
487
+ "id": "121",
488
+ "score": "0"
489
+ },
490
+ {
491
+ "id": "122",
492
+ "score": "1"
493
+ },
494
+ {
495
+ "id": "123",
496
+ "score": "1"
497
+ },
498
+ {
499
+ "id": "124",
500
+ "score": "0"
501
+ },
502
+ {
503
+ "id": "125",
504
+ "score": "0"
505
+ },
506
+ {
507
+ "id": "126",
508
+ "score": "1"
509
+ },
510
+ {
511
+ "id": "127",
512
+ "score": "1"
513
+ },
514
+ {
515
+ "id": "128",
516
+ "score": "1"
517
+ },
518
+ {
519
+ "id": "129",
520
+ "score": "1"
521
+ },
522
+ {
523
+ "id": "130",
524
+ "score": "1"
525
+ },
526
+ {
527
+ "id": "131",
528
+ "score": "1"
529
+ },
530
+ {
531
+ "id": "132",
532
+ "score": "0"
533
+ },
534
+ {
535
+ "id": "133",
536
+ "score": "0"
537
+ },
538
+ {
539
+ "id": "134",
540
+ "score": "1"
541
+ },
542
+ {
543
+ "id": "135",
544
+ "score": "0"
545
+ },
546
+ {
547
+ "id": "136",
548
+ "score": "1"
549
+ },
550
+ {
551
+ "id": "137",
552
+ "score": "1"
553
+ },
554
+ {
555
+ "id": "138",
556
+ "score": "1"
557
+ },
558
+ {
559
+ "id": "139",
560
+ "score": "1"
561
+ },
562
+ {
563
+ "id": "140",
564
+ "score": "1"
565
+ },
566
+ {
567
+ "id": "141",
568
+ "score": "1"
569
+ },
570
+ {
571
+ "id": "142",
572
+ "score": "1"
573
+ },
574
+ {
575
+ "id": "143",
576
+ "score": "0"
577
+ },
578
+ {
579
+ "id": "144",
580
+ "score": "0"
581
+ },
582
+ {
583
+ "id": "145",
584
+ "score": "1"
585
+ },
586
+ {
587
+ "id": "146",
588
+ "score": "0"
589
+ },
590
+ {
591
+ "id": "147",
592
+ "score": "0"
593
+ },
594
+ {
595
+ "id": "148",
596
+ "score": "0"
597
+ },
598
+ {
599
+ "id": "149",
600
+ "score": "1"
601
+ },
602
+ {
603
+ "id": "150",
604
+ "score": "1"
605
+ },
606
+ {
607
+ "id": "151",
608
+ "score": "0"
609
+ },
610
+ {
611
+ "id": "152",
612
+ "score": "0"
613
+ },
614
+ {
615
+ "id": "153",
616
+ "score": "1"
617
+ },
618
+ {
619
+ "id": "154",
620
+ "score": "1"
621
+ },
622
+ {
623
+ "id": "155",
624
+ "score": "1"
625
+ },
626
+ {
627
+ "id": "156",
628
+ "score": "1"
629
+ },
630
+ {
631
+ "id": "157",
632
+ "score": "1"
633
+ },
634
+ {
635
+ "id": "158",
636
+ "score": "0"
637
+ },
638
+ {
639
+ "id": "159",
640
+ "score": "0"
641
+ },
642
+ {
643
+ "id": "160",
644
+ "score": "0"
645
+ },
646
+ {
647
+ "id": "161",
648
+ "score": "1"
649
+ },
650
+ {
651
+ "id": "162",
652
+ "score": "1"
653
+ },
654
+ {
655
+ "id": "163",
656
+ "score": "0"
657
+ },
658
+ {
659
+ "id": "164",
660
+ "score": "1"
661
+ },
662
+ {
663
+ "id": "165",
664
+ "score": "0"
665
+ },
666
+ {
667
+ "id": "166",
668
+ "score": "0"
669
+ },
670
+ {
671
+ "id": "167",
672
+ "score": "1"
673
+ },
674
+ {
675
+ "id": "168",
676
+ "score": "1"
677
+ },
678
+ {
679
+ "id": "169",
680
+ "score": "1"
681
+ },
682
+ {
683
+ "id": "170",
684
+ "score": "1"
685
+ },
686
+ {
687
+ "id": "171",
688
+ "score": "1"
689
+ },
690
+ {
691
+ "id": "172",
692
+ "score": "1"
693
+ },
694
+ {
695
+ "id": "173",
696
+ "score": "1"
697
+ },
698
+ {
699
+ "id": "174",
700
+ "score": "1"
701
+ },
702
+ {
703
+ "id": "175",
704
+ "score": "0"
705
+ },
706
+ {
707
+ "id": "176",
708
+ "score": "0"
709
+ },
710
+ {
711
+ "id": "177",
712
+ "score": "0"
713
+ },
714
+ {
715
+ "id": "178",
716
+ "score": "1"
717
+ },
718
+ {
719
+ "id": "179",
720
+ "score": "1"
721
+ },
722
+ {
723
+ "id": "180",
724
+ "score": "1"
725
+ },
726
+ {
727
+ "id": "181",
728
+ "score": "1"
729
+ },
730
+ {
731
+ "id": "182",
732
+ "score": "1"
733
+ },
734
+ {
735
+ "id": "183",
736
+ "score": "0"
737
+ },
738
+ {
739
+ "id": "184",
740
+ "score": "1"
741
+ },
742
+ {
743
+ "id": "185",
744
+ "score": "1"
745
+ },
746
+ {
747
+ "id": "186",
748
+ "score": "0"
749
+ },
750
+ {
751
+ "id": "187",
752
+ "score": "1"
753
+ },
754
+ {
755
+ "id": "188",
756
+ "score": "1"
757
+ },
758
+ {
759
+ "id": "189",
760
+ "score": "1"
761
+ },
762
+ {
763
+ "id": "190",
764
+ "score": "0"
765
+ },
766
+ {
767
+ "id": "191",
768
+ "score": "1"
769
+ },
770
+ {
771
+ "id": "192",
772
+ "score": "1"
773
+ },
774
+ {
775
+ "id": "193",
776
+ "score": "1"
777
+ },
778
+ {
779
+ "id": "194",
780
+ "score": "0"
781
+ },
782
+ {
783
+ "id": "195",
784
+ "score": "1"
785
+ },
786
+ {
787
+ "id": "196",
788
+ "score": "0"
789
+ },
790
+ {
791
+ "id": "197",
792
+ "score": "0"
793
+ },
794
+ {
795
+ "id": "198",
796
+ "score": "1"
797
+ },
798
+ {
799
+ "id": "199",
800
+ "score": "1"
801
+ }
802
+ ]
eval_milebench/CounterfactualInference/pred.json ADDED
The diff for this file is too large to render. See raw diff
 
eval_milebench/CounterfactualInference/pred_with_extracted.json ADDED
The diff for this file is too large to render. See raw diff
 
eval_milebench/DocVQA.log ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0
  0%| | 0/17 [00:00<?, ?it/s]
1
  6%|β–Œ | 1/17 [00:01<00:22, 1.43s/it]
2
  12%|β–ˆβ– | 2/17 [00:01<00:11, 1.35it/s]
3
  18%|β–ˆβ–Š | 3/17 [00:02<00:08, 1.75it/s]
4
  24%|β–ˆβ–ˆβ–Ž | 4/17 [00:02<00:05, 2.26it/s]
5
  29%|β–ˆβ–ˆβ–‰ | 5/17 [00:02<00:04, 2.78it/s]
6
  35%|β–ˆβ–ˆβ–ˆβ–Œ | 6/17 [00:02<00:03, 3.21it/s]
7
  41%|β–ˆβ–ˆβ–ˆβ–ˆ | 7/17 [00:02<00:02, 3.60it/s]
8
  47%|β–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 8/17 [00:03<00:02, 3.89it/s]
9
  53%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 9/17 [00:03<00:01, 4.07it/s]
10
  59%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 10/17 [00:03<00:01, 4.28it/s]
11
  65%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 11/17 [00:03<00:01, 4.35it/s]
12
  71%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 12/17 [00:03<00:01, 4.57it/s]
13
  76%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 13/17 [00:04<00:00, 4.50it/s]
14
  82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 14/17 [00:04<00:00, 4.44it/s]
15
  88%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 15/17 [00:04<00:00, 4.48it/s]
16
  94%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 16/17 [00:04<00:00, 4.44it/s]
 
 
17
  0%| | 0/18 [00:00<?, ?it/s]
18
  6%|β–Œ | 1/18 [00:01<00:24, 1.42s/it]
19
  11%|β–ˆ | 2/18 [00:01<00:11, 1.37it/s]
20
  17%|β–ˆβ–‹ | 3/18 [00:02<00:08, 1.77it/s]
21
  22%|β–ˆβ–ˆβ– | 4/18 [00:02<00:05, 2.36it/s]
22
  28%|β–ˆβ–ˆβ–Š | 5/18 [00:02<00:04, 2.82it/s]
23
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 6/18 [00:02<00:03, 3.13it/s]
24
  39%|β–ˆβ–ˆβ–ˆβ–‰ | 7/18 [00:02<00:03, 3.44it/s]
25
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 8/18 [00:03<00:02, 4.07it/s]
26
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 9/18 [00:03<00:02, 4.06it/s]
27
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 10/18 [00:03<00:01, 4.30it/s]
28
  61%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 11/18 [00:03<00:01, 4.36it/s]
29
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 12/18 [00:03<00:01, 4.47it/s]
30
  72%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 13/18 [00:04<00:01, 4.38it/s]
31
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 14/18 [00:04<00:00, 4.27it/s]
32
  83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 15/18 [00:04<00:00, 4.40it/s]
33
  89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 16/18 [00:04<00:00, 3.98it/s]
34
  94%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 17/18 [00:05<00:00, 4.14i
35
  0%| | 0/18 [00:00<?, ?it/s]
36
  6%|β–Œ | 1/18 [00:01<00:24, 1.46s/it]
37
  11%|β–ˆ | 2/18 [00:01<00:11, 1.39it/s]
38
  17%|β–ˆβ–‹ | 3/18 [00:02<00:08, 1.77it/s]
39
  22%|β–ˆβ–ˆβ– | 4/18 [00:02<00:06, 2.23it/s]
40
  28%|β–ˆβ–ˆβ–Š | 5/18 [00:02<00:04, 2.77it/s]
41
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 6/18 [00:02<00:03, 3.19it/s]
42
  39%|β–ˆβ–ˆβ–ˆβ–‰ | 7/18 [00:02<00:03, 3.53it/s]
43
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 8/18 [00:03<00:02, 3.79it/s]
44
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 9/18 [00:03<00:02, 3.92it/s]
45
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 10/18 [00:03<00:02, 3.77it/s]
46
  61%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 11/18 [00:03<00:01, 4.03it/s]
47
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 12/18 [00:04<00:01, 4.09it/s]
48
  72%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 13/18 [00:04<00:01, 3.84it/s]
49
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 14/18 [00:04<00:01, 3.99it/s]
50
  83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 15/18 [00:04<00:00, 4.23it/s]
51
  89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 16/18 [00:05<00:00, 4.44it/s]
52
  94%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 17/18 [00:05<00:00, 4.49iProceeding 5-length images samples | Num: 36
 
 
 
 
 
53
  0%| | 0/9 [00:00<?, ?it/s]
54
  11%|β–ˆ | 1/9 [00:00<00:07, 1.06it/s]
55
  22%|β–ˆβ–ˆβ– | 2/9 [00:01<00:04, 1.44it/s]
56
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 3/9 [00:01<00:03, 1.64it/s]
57
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 4/9 [00:02<00:02, 1.76it/s]
58
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5/9 [00:02<00:02, 1.84it/s]
59
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 6/9 [00:03<00:01, 1.89it/s]
60
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 7/9 [00:03<00:01, 1.96it/s]
61
  89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 8/9 [00:04<00:00, 2.00it/s]
 
 
62
  0%| | 0/9 [00:00<?, ?it/s]
63
  11%|β–ˆ | 1/9 [00:00<00:06, 1.28it/s]
64
  22%|β–ˆβ–ˆβ– | 2/9 [00:01<00:04, 1.63it/s]
65
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 3/9 [00:01<00:03, 1.79it/s]
66
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 4/9 [00:02<00:02, 1.93it/s]
67
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5/9 [00:02<00:02, 1.93it/s]
68
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 6/9 [00:03<00:01, 1.98it/s]
69
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 7/9 [00:03<00:00, 2.00it/s]
70
  89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 8/9 [00:04<00:00, 2.12it/s]
 
 
 
71
  0%| | 0/9 [00:00<?, ?it/s]
72
  11%|β–ˆ | 1/9 [00:00<00:06, 1.21it/s]
73
  22%|β–ˆβ–ˆβ– | 2/9 [00:01<00:04, 1.46it/s]
74
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 3/9 [00:02<00:04, 1.41it/s]
75
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 4/9 [00:02<00:03, 1.56it/s]
76
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5/9 [00:03<00:02, 1.69it/s]
77
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 6/9 [00:03<00:01, 1.76it/s]
78
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 7/9 [00:04<00:01, 1.83it/s]
79
  89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 8/9 [00:04<00:00, 1.89it/s]
 
 
80
  0%| | 0/10 [00:00<?, ?it/s]
81
  10%|β–ˆ | 1/10 [00:00<00:07, 1.23it/s]
82
  20%|β–ˆβ–ˆ | 2/10 [00:01<00:04, 1.63it/s]
83
  30%|β–ˆβ–ˆβ–ˆ | 3/10 [00:01<00:03, 2.13it/s]
84
  40%|β–ˆβ–ˆβ–ˆβ–ˆ | 4/10 [00:01<00:02, 2.49it/s]
85
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5/10 [00:02<00:02, 2.28it/s]
86
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 6/10 [00:02<00:01, 2.45it/s]
87
  70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 7/10 [00:03<00:01, 2.68it/s]
88
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 8/10 [00:03<00:00, 2.85it/s]
89
  90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 9/10 [00:03<00:00, 2.88it/s]
 
 
 
90
  0%| | 0/10 [00:00<?, ?it/s]
91
  10%|β–ˆ | 1/10 [00:00<00:08, 1.10it/s]
92
  20%|β–ˆβ–ˆ | 2/10 [00:01<00:04, 1.83it/s]
93
  30%|β–ˆβ–ˆβ–ˆ | 3/10 [00:01<00:02, 2.45it/s]
94
  40%|β–ˆβ–ˆβ–ˆβ–ˆ | 4/10 [00:01<00:02, 2.71it/s]
95
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5/10 [00:02<00:01, 2.84it/s]
96
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 6/10 [00:02<00:01, 2.97it/s]
97
  70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 7/10 [00:02<00:00, 3.03it/s]
98
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 8/10 [00:02<00:00, 3.14it/s]
99
  90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 9/10 [00:03<00:00, 3.14it/s]
 
 
100
  0%| | 0/10 [00:00<?, ?it/s]
101
  10%|β–ˆ | 1/10 [00:01<00:09, 1.03s/it]
102
  20%|β–ˆβ–ˆ | 2/10 [00:02<00:09, 1.22s/it]
103
  30%|β–ˆβ–ˆβ–ˆ | 3/10 [00:02<00:05, 1.23it/s]
104
  40%|β–ˆβ–ˆβ–ˆβ–ˆ | 4/10 [00:03<00:03, 1.63it/s]
105
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5/10 [00:03<00:02, 2.00it/s]
106
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 6/10 [00:03<00:01, 2.32it/s]
107
  70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 7/10 [00:03<00:01, 2.58it/s]
108
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 8/10 [00:04<00:00, 3.18it/s]
109
  90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 9/10 [00:04<00:00, 3.21it/s]
 
 
110
  0%| | 0/6 [00:00<?, ?it/s]
111
  17%|β–ˆβ–‹ | 1/6 [00:00<00:04, 1.19it/s]
112
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 2/6 [00:01<00:02, 1.53it/s]
113
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 3/6 [00:01<00:01, 1.76it/s]
114
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 4/6 [00:02<00:01, 1.76it/s]
115
  83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5/6 [00:03<00:00, 1.43it/s]
 
 
116
  0%| | 0/6 [00:00<?, ?it/s]
117
  17%|β–ˆβ–‹ | 1/6 [00:00<00:04, 1.13it/s]
118
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 2/6 [00:01<00:02, 1.42it/s]
119
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 3/6 [00:02<00:01, 1.53it/s]
120
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 4/6 [00:02<00:01, 1.31it/s]
121
  83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5/6 [00:03<00:00, 1.42it/s]
 
 
 
122
  0%| | 0/6 [00:00<?, ?it/s]
123
  17%|β–ˆβ–‹ | 1/6 [00:00<00:04, 1.25it/s]
124
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 2/6 [00:01<00:02, 1.80it/s]
125
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 3/6 [00:01<00:01, 1.81it/s]
126
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 4/6 [00:02<00:01, 1.42it/s]
127
  83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5/6 [00:03<00:00, 1.57it/s]
 
 
128
  0%| | 0/7 [00:00<?, ?it/s]
129
  14%|β–ˆβ– | 1/7 [00:00<00:04, 1.48it/s]
130
  29%|β–ˆβ–ˆβ–Š | 2/7 [00:01<00:02, 1.97it/s]
131
  43%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 3/7 [00:01<00:01, 2.18it/s]
132
  57%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 4/7 [00:02<00:02, 1.30it/s]
133
  71%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5/7 [00:03<00:01, 1.54it/s]
134
  86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 6/7 [00:03<00:00, 1.80it/s]
 
135
  0%| | 0/7 [00:00<?, ?it/s]
136
  14%|β–ˆβ– | 1/7 [00:00<00:03, 1.63it/s]
137
  29%|β–ˆβ–ˆβ–Š | 2/7 [00:01<00:02, 2.03it/s]
138
  43%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 3/7 [00:02<00:03, 1.24it/s]
139
  57%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 4/7 [00:02<00:01, 1.56it/s]
140
  71%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5/7 [00:03<00:01, 1.72it/s]
141
  86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 6/7 [00:03<00:00, 1.89it/s]
 
142
  0%| | 0/7 [00:00<?, ?it/s]
143
  14%|β–ˆβ– | 1/7 [00:00<00:04, 1.47it/s]
144
  29%|β–ˆβ–ˆβ–Š | 2/7 [00:01<00:02, 1.73it/s]
145
  43%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 3/7 [00:02<00:03, 1.09it/s]
146
  57%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 4/7 [00:02<00:02, 1.40it/s]
147
  71%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5/7 [00:03<00:01, 1.85it/s]
148
  86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 6/7 [00:03<00:00, 2.04it/s]
 
 
 
 
 
1
+ language_model.model.layers.0 4
2
+ language_model.model.layers.1 4
3
+ language_model.model.layers.2 4
4
+ language_model.model.layers.3 4
5
+ language_model.model.layers.4 4
6
+ language_model.model.layers.5 4
7
+ language_model.model.layers.6 4
8
+ language_model.model.layers.7 4
9
+ language_model.model.layers.8 4
10
+ language_model.model.layers.9 4
11
+ language_model.model.layers.10 4
12
+ language_model.model.layers.11 4
13
+ language_model.model.layers.12 4
14
+ language_model.model.layers.13 4
15
+ language_model.model.layers.14 4
16
+ language_model.model.layers.15 4
17
+ language_model.model.layers.16 4
18
+ language_model.model.layers.17 4
19
+ language_model.model.layers.18 4
20
+ language_model.model.layers.19 4
21
+ language_model.model.layers.20 4
22
+ language_model.model.layers.21 4
23
+ language_model.model.layers.22 4
24
+ language_model.model.layers.23 4
25
+ vision_model.encoder.layers.0 0
26
+ vision_model.encoder.layers.1 0
27
+ vision_model.encoder.layers.2 0
28
+ vision_model.encoder.layers.3 0
29
+ vision_model.encoder.layers.4 0
30
+ vision_model.encoder.layers.5 0
31
+ vision_model.encoder.layers.6 0
32
+ vision_model.encoder.layers.7 0
33
+ vision_model.encoder.layers.8 0
34
+ vision_model.encoder.layers.9 0
35
+ vision_model.encoder.layers.10 0
36
+ vision_model.encoder.layers.11 0
37
+ vision_model.encoder.layers.12 0
38
+ vision_model.encoder.layers.13 0
39
+ vision_model.encoder.layers.14 0
40
+ vision_model.encoder.layers.15 0
41
+ vision_model.encoder.layers.16 0
42
+ vision_model.encoder.layers.17 0
43
+ vision_model.encoder.layers.18 0
44
+ vision_model.encoder.layers.19 0
45
+ vision_model.encoder.layers.20 0
46
+ vision_model.encoder.layers.21 0
47
+ vision_model.encoder.layers.22 0
48
+ vision_model.encoder.layers.23 0
49
+ vision_model.embeddings 0
50
+ mlp1 0
51
+ language_model.model.tok_embeddings 4
52
+ language_model.model.norm 4
53
+ language_model.output 4
54
+ language_model.model.embed_tokens 4
55
+ language_model.lm_head 4
56
+ The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.
57
+ The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.
58
+ The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.
59
+ The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.
60
+ Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
61
+ Rank [2] Begin to eval model work_dirs/share_internvl/InternVL2-2B on task DocVQA, devices: {device(type='cuda', index=2), device(type='cuda', index=6)}
62
+ Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
63
+ Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
64
+ Rank [1] Begin to eval model work_dirs/share_internvl/InternVL2-2B on task DocVQA, devices: {device(type='cuda', index=1), device(type='cuda', index=5)}
65
+ Rank [0] Begin to eval model work_dirs/share_internvl/InternVL2-2B on task DocVQA, devices: {device(type='cuda', index=0), device(type='cuda', index=4)}
66
+ Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
67
+ Rank [3] Begin to eval model work_dirs/share_internvl/InternVL2-2B on task DocVQA, devices: {device(type='cuda', index=3), device(type='cuda', index=7)}
68
+ Initialization Finished
69
+ Predicting DocVQA Using internvl
70
+ Proceeding 2-length images samples | Num: 71
71
+ Initialization Finished
72
+ Predicting DocVQA Using internvl
73
+ Proceeding 2-length images samples | Num: 71
74
+ Initialization Finished
75
+ Predicting DocVQA Using internvl
76
+ Proceeding 2-length images samples | Num: 71
77
+ Initialization Finished
78
+ Predicting DocVQA Using internvl
79
+ Proceeding 2-length images samples | Num: 71
80
+
81
  0%| | 0/17 [00:00<?, ?it/s]
82
  6%|β–Œ | 1/17 [00:01<00:22, 1.43s/it]
83
  12%|β–ˆβ– | 2/17 [00:01<00:11, 1.35it/s]
84
  18%|β–ˆβ–Š | 3/17 [00:02<00:08, 1.75it/s]
85
  24%|β–ˆβ–ˆβ–Ž | 4/17 [00:02<00:05, 2.26it/s]
86
  29%|β–ˆβ–ˆβ–‰ | 5/17 [00:02<00:04, 2.78it/s]
87
  35%|β–ˆβ–ˆβ–ˆβ–Œ | 6/17 [00:02<00:03, 3.21it/s]
88
  41%|β–ˆβ–ˆβ–ˆβ–ˆ | 7/17 [00:02<00:02, 3.60it/s]
89
  47%|β–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 8/17 [00:03<00:02, 3.89it/s]
90
  53%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 9/17 [00:03<00:01, 4.07it/s]
91
  59%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 10/17 [00:03<00:01, 4.28it/s]
92
  65%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 11/17 [00:03<00:01, 4.35it/s]
93
  71%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 12/17 [00:03<00:01, 4.57it/s]
94
  76%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 13/17 [00:04<00:00, 4.50it/s]
95
  82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 14/17 [00:04<00:00, 4.44it/s]
96
  88%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 15/17 [00:04<00:00, 4.48it/s]
97
  94%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 16/17 [00:04<00:00, 4.44it/s]
98
+ Proceeding 5-length images samples | Num: 36
99
+
100
  0%| | 0/18 [00:00<?, ?it/s]
101
  6%|β–Œ | 1/18 [00:01<00:24, 1.42s/it]
102
  11%|β–ˆ | 2/18 [00:01<00:11, 1.37it/s]
103
  17%|β–ˆβ–‹ | 3/18 [00:02<00:08, 1.77it/s]
104
  22%|β–ˆβ–ˆβ– | 4/18 [00:02<00:05, 2.36it/s]
105
  28%|β–ˆβ–ˆβ–Š | 5/18 [00:02<00:04, 2.82it/s]
106
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 6/18 [00:02<00:03, 3.13it/s]
107
  39%|β–ˆβ–ˆβ–ˆβ–‰ | 7/18 [00:02<00:03, 3.44it/s]
108
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 8/18 [00:03<00:02, 4.07it/s]
109
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 9/18 [00:03<00:02, 4.06it/s]
110
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 10/18 [00:03<00:01, 4.30it/s]
111
  61%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 11/18 [00:03<00:01, 4.36it/s]
112
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 12/18 [00:03<00:01, 4.47it/s]
113
  72%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 13/18 [00:04<00:01, 4.38it/s]
114
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 14/18 [00:04<00:00, 4.27it/s]
115
  83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 15/18 [00:04<00:00, 4.40it/s]
116
  89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 16/18 [00:04<00:00, 3.98it/s]
117
  94%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 17/18 [00:05<00:00, 4.14i
118
  0%| | 0/18 [00:00<?, ?it/s]
119
  6%|β–Œ | 1/18 [00:01<00:24, 1.46s/it]
120
  11%|β–ˆ | 2/18 [00:01<00:11, 1.39it/s]
121
  17%|β–ˆβ–‹ | 3/18 [00:02<00:08, 1.77it/s]
122
  22%|β–ˆβ–ˆβ– | 4/18 [00:02<00:06, 2.23it/s]
123
  28%|β–ˆβ–ˆβ–Š | 5/18 [00:02<00:04, 2.77it/s]
124
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 6/18 [00:02<00:03, 3.19it/s]
125
  39%|β–ˆβ–ˆβ–ˆβ–‰ | 7/18 [00:02<00:03, 3.53it/s]
126
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 8/18 [00:03<00:02, 3.79it/s]
127
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 9/18 [00:03<00:02, 3.92it/s]
128
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 10/18 [00:03<00:02, 3.77it/s]
129
  61%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 11/18 [00:03<00:01, 4.03it/s]
130
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 12/18 [00:04<00:01, 4.09it/s]
131
  72%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 13/18 [00:04<00:01, 3.84it/s]
132
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 14/18 [00:04<00:01, 3.99it/s]
133
  83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 15/18 [00:04<00:00, 4.23it/s]
134
  89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 16/18 [00:05<00:00, 4.44it/s]
135
  94%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 17/18 [00:05<00:00, 4.49iProceeding 5-length images samples | Num: 36
136
+ t/s]
137
+ Proceeding 5-length images samples | Num: 36
138
+ t/s]
139
+ Proceeding 5-length images samples | Num: 36
140
+
141
  0%| | 0/9 [00:00<?, ?it/s]
142
  11%|β–ˆ | 1/9 [00:00<00:07, 1.06it/s]
143
  22%|β–ˆβ–ˆβ– | 2/9 [00:01<00:04, 1.44it/s]
144
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 3/9 [00:01<00:03, 1.64it/s]
145
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 4/9 [00:02<00:02, 1.76it/s]
146
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5/9 [00:02<00:02, 1.84it/s]
147
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 6/9 [00:03<00:01, 1.89it/s]
148
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 7/9 [00:03<00:01, 1.96it/s]
149
  89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 8/9 [00:04<00:00, 2.00it/s]
150
+ Proceeding 3-length images samples | Num: 40
151
+
152
  0%| | 0/9 [00:00<?, ?it/s]
153
  11%|β–ˆ | 1/9 [00:00<00:06, 1.28it/s]
154
  22%|β–ˆβ–ˆβ– | 2/9 [00:01<00:04, 1.63it/s]
155
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 3/9 [00:01<00:03, 1.79it/s]
156
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 4/9 [00:02<00:02, 1.93it/s]
157
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5/9 [00:02<00:02, 1.93it/s]
158
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 6/9 [00:03<00:01, 1.98it/s]
159
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 7/9 [00:03<00:00, 2.00it/s]
160
  89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 8/9 [00:04<00:00, 2.12it/s]
161
+ Proceeding 3-length images samples | Num: 40
162
+ Proceeding 3-length images samples | Num: 40
163
+
164
  0%| | 0/9 [00:00<?, ?it/s]
165
  11%|β–ˆ | 1/9 [00:00<00:06, 1.21it/s]
166
  22%|β–ˆβ–ˆβ– | 2/9 [00:01<00:04, 1.46it/s]
167
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 3/9 [00:02<00:04, 1.41it/s]
168
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 4/9 [00:02<00:03, 1.56it/s]
169
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5/9 [00:03<00:02, 1.69it/s]
170
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 6/9 [00:03<00:01, 1.76it/s]
171
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 7/9 [00:04<00:01, 1.83it/s]
172
  89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 8/9 [00:04<00:00, 1.89it/s]
173
+ Proceeding 3-length images samples | Num: 40
174
+
175
  0%| | 0/10 [00:00<?, ?it/s]
176
  10%|β–ˆ | 1/10 [00:00<00:07, 1.23it/s]
177
  20%|β–ˆβ–ˆ | 2/10 [00:01<00:04, 1.63it/s]
178
  30%|β–ˆβ–ˆβ–ˆ | 3/10 [00:01<00:03, 2.13it/s]
179
  40%|β–ˆβ–ˆβ–ˆβ–ˆ | 4/10 [00:01<00:02, 2.49it/s]
180
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5/10 [00:02<00:02, 2.28it/s]
181
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 6/10 [00:02<00:01, 2.45it/s]
182
  70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 7/10 [00:03<00:01, 2.68it/s]
183
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 8/10 [00:03<00:00, 2.85it/s]
184
  90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 9/10 [00:03<00:00, 2.88it/s]
185
+ Proceeding 6-length images samples | Num: 24
186
+ Proceeding 6-length images samples | Num: 24
187
+
188
  0%| | 0/10 [00:00<?, ?it/s]
189
  10%|β–ˆ | 1/10 [00:00<00:08, 1.10it/s]
190
  20%|β–ˆβ–ˆ | 2/10 [00:01<00:04, 1.83it/s]
191
  30%|β–ˆβ–ˆβ–ˆ | 3/10 [00:01<00:02, 2.45it/s]
192
  40%|β–ˆβ–ˆβ–ˆβ–ˆ | 4/10 [00:01<00:02, 2.71it/s]
193
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5/10 [00:02<00:01, 2.84it/s]
194
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 6/10 [00:02<00:01, 2.97it/s]
195
  70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 7/10 [00:02<00:00, 3.03it/s]
196
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 8/10 [00:02<00:00, 3.14it/s]
197
  90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 9/10 [00:03<00:00, 3.14it/s]
198
+ Proceeding 6-length images samples | Num: 24
199
+
200
  0%| | 0/10 [00:00<?, ?it/s]
201
  10%|β–ˆ | 1/10 [00:01<00:09, 1.03s/it]
202
  20%|β–ˆβ–ˆ | 2/10 [00:02<00:09, 1.22s/it]
203
  30%|β–ˆβ–ˆβ–ˆ | 3/10 [00:02<00:05, 1.23it/s]
204
  40%|β–ˆβ–ˆβ–ˆβ–ˆ | 4/10 [00:03<00:03, 1.63it/s]
205
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5/10 [00:03<00:02, 2.00it/s]
206
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 6/10 [00:03<00:01, 2.32it/s]
207
  70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 7/10 [00:03<00:01, 2.58it/s]
208
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 8/10 [00:04<00:00, 3.18it/s]
209
  90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 9/10 [00:04<00:00, 3.21it/s]
210
+ Proceeding 6-length images samples | Num: 24
211
+
212
  0%| | 0/6 [00:00<?, ?it/s]
213
  17%|β–ˆβ–‹ | 1/6 [00:00<00:04, 1.19it/s]
214
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 2/6 [00:01<00:02, 1.53it/s]
215
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 3/6 [00:01<00:01, 1.76it/s]
216
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 4/6 [00:02<00:01, 1.76it/s]
217
  83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5/6 [00:03<00:00, 1.43it/s]
218
+ Proceeding 4-length images samples | Num: 29
219
+
220
  0%| | 0/6 [00:00<?, ?it/s]
221
  17%|β–ˆβ–‹ | 1/6 [00:00<00:04, 1.13it/s]
222
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 2/6 [00:01<00:02, 1.42it/s]
223
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 3/6 [00:02<00:01, 1.53it/s]
224
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 4/6 [00:02<00:01, 1.31it/s]
225
  83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5/6 [00:03<00:00, 1.42it/s]
226
+ Proceeding 4-length images samples | Num: 29
227
+ Proceeding 4-length images samples | Num: 29
228
+
229
  0%| | 0/6 [00:00<?, ?it/s]
230
  17%|β–ˆβ–‹ | 1/6 [00:00<00:04, 1.25it/s]
231
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 2/6 [00:01<00:02, 1.80it/s]
232
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 3/6 [00:01<00:01, 1.81it/s]
233
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 4/6 [00:02<00:01, 1.42it/s]
234
  83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 5/6 [00:03<00:00, 1.57it/s]
235
+ Proceeding 4-length images samples | Num: 29
236
+
237
  0%| | 0/7 [00:00<?, ?it/s]
238
  14%|β–ˆβ– | 1/7 [00:00<00:04, 1.48it/s]
239
  29%|β–ˆβ–ˆβ–Š | 2/7 [00:01<00:02, 1.97it/s]
240
  43%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 3/7 [00:01<00:01, 2.18it/s]
241
  57%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 4/7 [00:02<00:02, 1.30it/s]
242
  71%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5/7 [00:03<00:01, 1.54it/s]
243
  86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 6/7 [00:03<00:00, 1.80it/s]
244
+
245
  0%| | 0/7 [00:00<?, ?it/s]
246
  14%|β–ˆβ– | 1/7 [00:00<00:03, 1.63it/s]
247
  29%|β–ˆβ–ˆβ–Š | 2/7 [00:01<00:02, 2.03it/s]
248
  43%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 3/7 [00:02<00:03, 1.24it/s]
249
  57%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 4/7 [00:02<00:01, 1.56it/s]
250
  71%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5/7 [00:03<00:01, 1.72it/s]
251
  86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 6/7 [00:03<00:00, 1.89it/s]
252
+
253
  0%| | 0/7 [00:00<?, ?it/s]
254
  14%|β–ˆβ– | 1/7 [00:00<00:04, 1.47it/s]
255
  29%|β–ˆβ–ˆβ–Š | 2/7 [00:01<00:02, 1.73it/s]
256
  43%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 3/7 [00:02<00:03, 1.09it/s]
257
  57%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 4/7 [00:02<00:02, 1.40it/s]
258
  71%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 5/7 [00:03<00:01, 1.85it/s]
259
  86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 6/7 [00:03<00:00, 2.04it/s]
260
+ evaluating DocVQA ...
261
+ Results saved to work_dirs/share_internvl/InternVL2-2B/eval_milebench/DocVQA/DocVQA_240803234442.json
262
+ python eval/milebench/evaluate.py --data-dir /mnt/inspurfs/share_data/wangweiyun/share_data/long-context-benchmark/MileBench/datasets--FreedomIntelligence--MileBench/snapshots/53c7a58051ef88bacf76541d91f03f5ba2d71e7d --dataset DocVQA --result-dir work_dirs/share_internvl/InternVL2-2B/eval_milebench/DocVQA
263
+ internvl: DocVQA: {'Accuracy': 0.61, 'image_quantity_level-Accuracy': {'Few': 0.5909090909090909, 'Medium': 0.75, 'Many': 0}, 'image_quantity_level-Result': {'Few': [104, 176], 'Medium': [18, 24], 'Many': [0, 0]}}
eval_milebench/DocVQA/DocVQA_240803234442.json ADDED
The diff for this file is too large to render. See raw diff
 
eval_milebench/DocVQA/eval.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"Accuracy": 0.61, "image_quantity_level-Accuracy": {"Few": 0.5909090909090909, "Medium": 0.75, "Many": 0}, "image_quantity_level-Result": {"Few": [104, 176], "Medium": [18, 24], "Many": [0, 0]}}
eval_milebench/DocVQA/eval_score.json ADDED
@@ -0,0 +1,802 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "id": "0",
4
+ "score": "1"
5
+ },
6
+ {
7
+ "id": "5",
8
+ "score": "1"
9
+ },
10
+ {
11
+ "id": "7",
12
+ "score": "1"
13
+ },
14
+ {
15
+ "id": "12",
16
+ "score": "1"
17
+ },
18
+ {
19
+ "id": "14",
20
+ "score": "0"
21
+ },
22
+ {
23
+ "id": "15",
24
+ "score": "1"
25
+ },
26
+ {
27
+ "id": "20",
28
+ "score": "1"
29
+ },
30
+ {
31
+ "id": "23",
32
+ "score": "1"
33
+ },
34
+ {
35
+ "id": "34",
36
+ "score": "1"
37
+ },
38
+ {
39
+ "id": "35",
40
+ "score": "0"
41
+ },
42
+ {
43
+ "id": "36",
44
+ "score": "1"
45
+ },
46
+ {
47
+ "id": "40",
48
+ "score": "1"
49
+ },
50
+ {
51
+ "id": "42",
52
+ "score": "1"
53
+ },
54
+ {
55
+ "id": "43",
56
+ "score": "0"
57
+ },
58
+ {
59
+ "id": "45",
60
+ "score": "1"
61
+ },
62
+ {
63
+ "id": "46",
64
+ "score": "1"
65
+ },
66
+ {
67
+ "id": "49",
68
+ "score": "1"
69
+ },
70
+ {
71
+ "id": "50",
72
+ "score": "1"
73
+ },
74
+ {
75
+ "id": "1",
76
+ "score": "1"
77
+ },
78
+ {
79
+ "id": "8",
80
+ "score": "1"
81
+ },
82
+ {
83
+ "id": "11",
84
+ "score": "0"
85
+ },
86
+ {
87
+ "id": "16",
88
+ "score": "0"
89
+ },
90
+ {
91
+ "id": "19",
92
+ "score": "0"
93
+ },
94
+ {
95
+ "id": "21",
96
+ "score": "0"
97
+ },
98
+ {
99
+ "id": "22",
100
+ "score": "1"
101
+ },
102
+ {
103
+ "id": "24",
104
+ "score": "1"
105
+ },
106
+ {
107
+ "id": "29",
108
+ "score": "1"
109
+ },
110
+ {
111
+ "id": "2",
112
+ "score": "1"
113
+ },
114
+ {
115
+ "id": "3",
116
+ "score": "0"
117
+ },
118
+ {
119
+ "id": "10",
120
+ "score": "1"
121
+ },
122
+ {
123
+ "id": "13",
124
+ "score": "1"
125
+ },
126
+ {
127
+ "id": "17",
128
+ "score": "0"
129
+ },
130
+ {
131
+ "id": "26",
132
+ "score": "1"
133
+ },
134
+ {
135
+ "id": "31",
136
+ "score": "0"
137
+ },
138
+ {
139
+ "id": "38",
140
+ "score": "1"
141
+ },
142
+ {
143
+ "id": "52",
144
+ "score": "0"
145
+ },
146
+ {
147
+ "id": "54",
148
+ "score": "0"
149
+ },
150
+ {
151
+ "id": "4",
152
+ "score": "1"
153
+ },
154
+ {
155
+ "id": "6",
156
+ "score": "1"
157
+ },
158
+ {
159
+ "id": "18",
160
+ "score": "1"
161
+ },
162
+ {
163
+ "id": "32",
164
+ "score": "0"
165
+ },
166
+ {
167
+ "id": "39",
168
+ "score": "0"
169
+ },
170
+ {
171
+ "id": "55",
172
+ "score": "1"
173
+ },
174
+ {
175
+ "id": "9",
176
+ "score": "1"
177
+ },
178
+ {
179
+ "id": "25",
180
+ "score": "0"
181
+ },
182
+ {
183
+ "id": "27",
184
+ "score": "1"
185
+ },
186
+ {
187
+ "id": "28",
188
+ "score": "0"
189
+ },
190
+ {
191
+ "id": "30",
192
+ "score": "0"
193
+ },
194
+ {
195
+ "id": "33",
196
+ "score": "1"
197
+ },
198
+ {
199
+ "id": "41",
200
+ "score": "1"
201
+ },
202
+ {
203
+ "id": "48",
204
+ "score": "0"
205
+ },
206
+ {
207
+ "id": "53",
208
+ "score": "0"
209
+ },
210
+ {
211
+ "id": "56",
212
+ "score": "1"
213
+ },
214
+ {
215
+ "id": "67",
216
+ "score": "0"
217
+ },
218
+ {
219
+ "id": "68",
220
+ "score": "1"
221
+ },
222
+ {
223
+ "id": "73",
224
+ "score": "1"
225
+ },
226
+ {
227
+ "id": "78",
228
+ "score": "0"
229
+ },
230
+ {
231
+ "id": "82",
232
+ "score": "1"
233
+ },
234
+ {
235
+ "id": "88",
236
+ "score": "1"
237
+ },
238
+ {
239
+ "id": "92",
240
+ "score": "1"
241
+ },
242
+ {
243
+ "id": "94",
244
+ "score": "1"
245
+ },
246
+ {
247
+ "id": "99",
248
+ "score": "1"
249
+ },
250
+ {
251
+ "id": "100",
252
+ "score": "1"
253
+ },
254
+ {
255
+ "id": "101",
256
+ "score": "1"
257
+ },
258
+ {
259
+ "id": "102",
260
+ "score": "0"
261
+ },
262
+ {
263
+ "id": "104",
264
+ "score": "1"
265
+ },
266
+ {
267
+ "id": "107",
268
+ "score": "0"
269
+ },
270
+ {
271
+ "id": "108",
272
+ "score": "0"
273
+ },
274
+ {
275
+ "id": "112",
276
+ "score": "0"
277
+ },
278
+ {
279
+ "id": "37",
280
+ "score": "0"
281
+ },
282
+ {
283
+ "id": "44",
284
+ "score": "1"
285
+ },
286
+ {
287
+ "id": "47",
288
+ "score": "0"
289
+ },
290
+ {
291
+ "id": "66",
292
+ "score": "0"
293
+ },
294
+ {
295
+ "id": "72",
296
+ "score": "0"
297
+ },
298
+ {
299
+ "id": "83",
300
+ "score": "1"
301
+ },
302
+ {
303
+ "id": "85",
304
+ "score": "0"
305
+ },
306
+ {
307
+ "id": "93",
308
+ "score": "1"
309
+ },
310
+ {
311
+ "id": "95",
312
+ "score": "1"
313
+ },
314
+ {
315
+ "id": "59",
316
+ "score": "0"
317
+ },
318
+ {
319
+ "id": "60",
320
+ "score": "1"
321
+ },
322
+ {
323
+ "id": "61",
324
+ "score": "0"
325
+ },
326
+ {
327
+ "id": "63",
328
+ "score": "1"
329
+ },
330
+ {
331
+ "id": "64",
332
+ "score": "1"
333
+ },
334
+ {
335
+ "id": "65",
336
+ "score": "1"
337
+ },
338
+ {
339
+ "id": "71",
340
+ "score": "0"
341
+ },
342
+ {
343
+ "id": "76",
344
+ "score": "1"
345
+ },
346
+ {
347
+ "id": "77",
348
+ "score": "0"
349
+ },
350
+ {
351
+ "id": "79",
352
+ "score": "1"
353
+ },
354
+ {
355
+ "id": "57",
356
+ "score": "1"
357
+ },
358
+ {
359
+ "id": "62",
360
+ "score": "1"
361
+ },
362
+ {
363
+ "id": "69",
364
+ "score": "0"
365
+ },
366
+ {
367
+ "id": "74",
368
+ "score": "1"
369
+ },
370
+ {
371
+ "id": "75",
372
+ "score": "1"
373
+ },
374
+ {
375
+ "id": "89",
376
+ "score": "1"
377
+ },
378
+ {
379
+ "id": "51",
380
+ "score": "0"
381
+ },
382
+ {
383
+ "id": "58",
384
+ "score": "0"
385
+ },
386
+ {
387
+ "id": "70",
388
+ "score": "1"
389
+ },
390
+ {
391
+ "id": "90",
392
+ "score": "1"
393
+ },
394
+ {
395
+ "id": "105",
396
+ "score": "1"
397
+ },
398
+ {
399
+ "id": "111",
400
+ "score": "0"
401
+ },
402
+ {
403
+ "id": "119",
404
+ "score": "1"
405
+ },
406
+ {
407
+ "id": "113",
408
+ "score": "0"
409
+ },
410
+ {
411
+ "id": "114",
412
+ "score": "0"
413
+ },
414
+ {
415
+ "id": "116",
416
+ "score": "1"
417
+ },
418
+ {
419
+ "id": "117",
420
+ "score": "1"
421
+ },
422
+ {
423
+ "id": "129",
424
+ "score": "0"
425
+ },
426
+ {
427
+ "id": "130",
428
+ "score": "0"
429
+ },
430
+ {
431
+ "id": "132",
432
+ "score": "1"
433
+ },
434
+ {
435
+ "id": "133",
436
+ "score": "1"
437
+ },
438
+ {
439
+ "id": "140",
440
+ "score": "0"
441
+ },
442
+ {
443
+ "id": "143",
444
+ "score": "1"
445
+ },
446
+ {
447
+ "id": "144",
448
+ "score": "1"
449
+ },
450
+ {
451
+ "id": "148",
452
+ "score": "1"
453
+ },
454
+ {
455
+ "id": "151",
456
+ "score": "0"
457
+ },
458
+ {
459
+ "id": "154",
460
+ "score": "0"
461
+ },
462
+ {
463
+ "id": "155",
464
+ "score": "0"
465
+ },
466
+ {
467
+ "id": "157",
468
+ "score": "1"
469
+ },
470
+ {
471
+ "id": "158",
472
+ "score": "1"
473
+ },
474
+ {
475
+ "id": "159",
476
+ "score": "0"
477
+ },
478
+ {
479
+ "id": "98",
480
+ "score": "0"
481
+ },
482
+ {
483
+ "id": "106",
484
+ "score": "1"
485
+ },
486
+ {
487
+ "id": "110",
488
+ "score": "1"
489
+ },
490
+ {
491
+ "id": "123",
492
+ "score": "1"
493
+ },
494
+ {
495
+ "id": "124",
496
+ "score": "0"
497
+ },
498
+ {
499
+ "id": "127",
500
+ "score": "0"
501
+ },
502
+ {
503
+ "id": "137",
504
+ "score": "1"
505
+ },
506
+ {
507
+ "id": "142",
508
+ "score": "1"
509
+ },
510
+ {
511
+ "id": "146",
512
+ "score": "1"
513
+ },
514
+ {
515
+ "id": "80",
516
+ "score": "0"
517
+ },
518
+ {
519
+ "id": "81",
520
+ "score": "0"
521
+ },
522
+ {
523
+ "id": "84",
524
+ "score": "1"
525
+ },
526
+ {
527
+ "id": "86",
528
+ "score": "1"
529
+ },
530
+ {
531
+ "id": "87",
532
+ "score": "0"
533
+ },
534
+ {
535
+ "id": "96",
536
+ "score": "1"
537
+ },
538
+ {
539
+ "id": "109",
540
+ "score": "0"
541
+ },
542
+ {
543
+ "id": "128",
544
+ "score": "1"
545
+ },
546
+ {
547
+ "id": "141",
548
+ "score": "1"
549
+ },
550
+ {
551
+ "id": "152",
552
+ "score": "0"
553
+ },
554
+ {
555
+ "id": "91",
556
+ "score": "0"
557
+ },
558
+ {
559
+ "id": "97",
560
+ "score": "1"
561
+ },
562
+ {
563
+ "id": "103",
564
+ "score": "1"
565
+ },
566
+ {
567
+ "id": "115",
568
+ "score": "1"
569
+ },
570
+ {
571
+ "id": "118",
572
+ "score": "1"
573
+ },
574
+ {
575
+ "id": "125",
576
+ "score": "1"
577
+ },
578
+ {
579
+ "id": "120",
580
+ "score": "0"
581
+ },
582
+ {
583
+ "id": "121",
584
+ "score": "1"
585
+ },
586
+ {
587
+ "id": "122",
588
+ "score": "1"
589
+ },
590
+ {
591
+ "id": "131",
592
+ "score": "1"
593
+ },
594
+ {
595
+ "id": "134",
596
+ "score": "0"
597
+ },
598
+ {
599
+ "id": "135",
600
+ "score": "0"
601
+ },
602
+ {
603
+ "id": "145",
604
+ "score": "0"
605
+ },
606
+ {
607
+ "id": "160",
608
+ "score": "1"
609
+ },
610
+ {
611
+ "id": "164",
612
+ "score": "1"
613
+ },
614
+ {
615
+ "id": "165",
616
+ "score": "0"
617
+ },
618
+ {
619
+ "id": "166",
620
+ "score": "1"
621
+ },
622
+ {
623
+ "id": "169",
624
+ "score": "1"
625
+ },
626
+ {
627
+ "id": "172",
628
+ "score": "1"
629
+ },
630
+ {
631
+ "id": "174",
632
+ "score": "1"
633
+ },
634
+ {
635
+ "id": "176",
636
+ "score": "1"
637
+ },
638
+ {
639
+ "id": "178",
640
+ "score": "1"
641
+ },
642
+ {
643
+ "id": "179",
644
+ "score": "0"
645
+ },
646
+ {
647
+ "id": "182",
648
+ "score": "0"
649
+ },
650
+ {
651
+ "id": "185",
652
+ "score": "1"
653
+ },
654
+ {
655
+ "id": "187",
656
+ "score": "1"
657
+ },
658
+ {
659
+ "id": "191",
660
+ "score": "0"
661
+ },
662
+ {
663
+ "id": "192",
664
+ "score": "0"
665
+ },
666
+ {
667
+ "id": "194",
668
+ "score": "1"
669
+ },
670
+ {
671
+ "id": "195",
672
+ "score": "1"
673
+ },
674
+ {
675
+ "id": "147",
676
+ "score": "0"
677
+ },
678
+ {
679
+ "id": "149",
680
+ "score": "0"
681
+ },
682
+ {
683
+ "id": "153",
684
+ "score": "1"
685
+ },
686
+ {
687
+ "id": "156",
688
+ "score": "0"
689
+ },
690
+ {
691
+ "id": "175",
692
+ "score": "0"
693
+ },
694
+ {
695
+ "id": "180",
696
+ "score": "0"
697
+ },
698
+ {
699
+ "id": "186",
700
+ "score": "1"
701
+ },
702
+ {
703
+ "id": "193",
704
+ "score": "0"
705
+ },
706
+ {
707
+ "id": "198",
708
+ "score": "1"
709
+ },
710
+ {
711
+ "id": "161",
712
+ "score": "1"
713
+ },
714
+ {
715
+ "id": "162",
716
+ "score": "0"
717
+ },
718
+ {
719
+ "id": "167",
720
+ "score": "1"
721
+ },
722
+ {
723
+ "id": "171",
724
+ "score": "0"
725
+ },
726
+ {
727
+ "id": "177",
728
+ "score": "1"
729
+ },
730
+ {
731
+ "id": "181",
732
+ "score": "1"
733
+ },
734
+ {
735
+ "id": "183",
736
+ "score": "0"
737
+ },
738
+ {
739
+ "id": "184",
740
+ "score": "1"
741
+ },
742
+ {
743
+ "id": "196",
744
+ "score": "1"
745
+ },
746
+ {
747
+ "id": "199",
748
+ "score": "0"
749
+ },
750
+ {
751
+ "id": "126",
752
+ "score": "1"
753
+ },
754
+ {
755
+ "id": "136",
756
+ "score": "0"
757
+ },
758
+ {
759
+ "id": "138",
760
+ "score": "1"
761
+ },
762
+ {
763
+ "id": "139",
764
+ "score": "1"
765
+ },
766
+ {
767
+ "id": "173",
768
+ "score": "0"
769
+ },
770
+ {
771
+ "id": "188",
772
+ "score": "1"
773
+ },
774
+ {
775
+ "id": "150",
776
+ "score": "1"
777
+ },
778
+ {
779
+ "id": "163",
780
+ "score": "1"
781
+ },
782
+ {
783
+ "id": "168",
784
+ "score": "1"
785
+ },
786
+ {
787
+ "id": "170",
788
+ "score": "0"
789
+ },
790
+ {
791
+ "id": "189",
792
+ "score": "1"
793
+ },
794
+ {
795
+ "id": "190",
796
+ "score": "1"
797
+ },
798
+ {
799
+ "id": "197",
800
+ "score": "1"
801
+ }
802
+ ]
eval_milebench/DocVQA/pred.json ADDED
The diff for this file is too large to render. See raw diff
 
eval_milebench/DocVQA/pred_with_extracted.json ADDED
The diff for this file is too large to render. See raw diff
 
eval_milebench/EgocentricNavigation.log ADDED
@@ -0,0 +1,401 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0
  0%| | 0/2 [00:00<?, ?it/s]
1
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:07<00:07, 7.49s/it]
 
 
 
2
  0%| | 0/2 [00:00<?, ?it/s]
3
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:08<00:08, 8.08s/it]
 
 
4
  0%| | 0/2 [00:00<?, ?it/s]
5
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:07<00:07, 7.88s/it]
 
 
 
 
 
 
6
  0%| | 0/1 [00:00<?, ?it/s]
 
 
7
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
8
  0%| | 0/1 [00:00<?, ?it/s]
 
 
9
  0%| | 0/2 [00:00<?, ?it/s]
10
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.37s/it]
 
 
11
  0%| | 0/1 [00:00<?, ?it/s]
 
 
12
  0%| | 0/2 [00:00<?, ?it/s]
13
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.41s/it]
 
 
 
14
  0%| | 0/1 [00:00<?, ?it/s]
 
 
15
  0%| | 0/1 [00:00<?, ?it/s]
 
 
16
  0%| | 0/2 [00:00<?, ?it/s]
17
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:05<00:05, 5.50s/it]
 
 
18
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
19
  0%| | 0/2 [00:00<?, ?it/s]
20
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:03<00:03, 3.07s/it]
 
 
21
  0%| | 0/1 [00:00<?, ?it/s]
 
 
22
  0%| | 0/1 [00:00<?, ?it/s]
 
 
23
  0%| | 0/1 [00:00<?, ?it/s]
 
 
24
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
25
  0%| | 0/2 [00:00<?, ?it/s]
26
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:09<00:09, 9.43s/it]
 
 
27
  0%| | 0/1 [00:00<?, ?it/s]
 
 
28
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
29
  0%| | 0/2 [00:00<?, ?it/s]
30
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:02<00:02, 2.78s/it]
 
31
  0%| | 0/1 [00:00<?, ?it/s]
 
 
32
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
33
  0%| | 0/1 [00:00<?, ?it/s]
 
 
34
  0%| | 0/2 [00:00<?, ?it/s]
35
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:02<00:02, 2.96s/it]
 
 
36
  0%| | 0/1 [00:00<?, ?it/s]
 
 
37
  0%| | 0/2 [00:00<?, ?it/s]
38
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.62s/it]
 
 
 
39
  0%| | 0/2 [00:00<?, ?it/s]
40
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:04<00:04, 4.16s/it]
 
 
41
  0%| | 0/2 [00:00<?, ?it/s]
42
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.34s/it]
 
 
43
  0%| | 0/2 [00:00<?, ?it/s]
44
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:02<00:02, 2.43s/it]
 
 
45
  0%| | 0/2 [00:00<?, ?it/s]
46
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:02<00:02, 2.04s/it]
 
 
 
47
  0%| | 0/2 [00:00<?, ?it/s]
48
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.48s/it]
 
 
49
  0%| | 0/2 [00:00<?, ?it/s]
50
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.06s/it]
 
51
  0%| | 0/2 [00:00<?, ?it/s]
52
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.67s/it]
 
 
 
 
 
 
 
 
53
  0%| | 0/2 [00:00<?, ?it/s]
54
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.11s/it]
 
 
55
  0%| | 0/1 [00:00<?, ?it/s]
 
 
56
  0%| | 0/3 [00:00<?, ?it/s]
57
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:02<00:05, 2.98s/it]
58
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:03<00:01, 1.82s/it]
 
 
 
59
  0%| | 0/2 [00:00<?, ?it/s]
60
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.57s/it]
 
 
61
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
62
  0%| | 0/3 [00:00<?, ?it/s]
63
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:03<00:06, 3.00s/it]
64
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:03<00:01, 1.74s/it]
 
 
65
  0%| | 0/1 [00:00<?, ?it/s]
 
 
66
  0%| | 0/1 [00:00<?, ?it/s]
 
 
67
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
68
  0%| | 0/1 [00:00<?, ?it/s]
 
 
69
  0%| | 0/1 [00:00<?, ?it/s]
 
 
70
  0%| | 0/4 [00:00<?, ?it/s]
71
  25%|β–ˆβ–ˆβ–Œ | 1/4 [00:04<00:14, 4.72s/it]
72
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 2/4 [00:05<00:04, 2.44s/it]
73
  75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 3/4 [00:08<00:02, 2.89s/it]
 
 
74
  0%| | 0/1 [00:00<?, ?it/s]
 
 
75
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
76
  0%| | 0/1 [00:00<?, ?it/s]
 
 
77
  0%| | 0/1 [00:00<?, ?it/s]
 
 
78
  0%| | 0/2 [00:00<?, ?it/s]
79
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.41s/it]
 
 
80
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
 
81
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
 
82
  0%| | 0/2 [00:00<?, ?it/s]
83
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.59s/it]
 
 
84
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
85
  0%| | 0/4 [00:00<?, ?it/s]
86
  25%|β–ˆβ–ˆβ–Œ | 1/4 [00:01<00:05, 1.93s/it]
87
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 2/4 [00:03<00:03, 1.59s/it]
88
  75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 3/4 [00:07<00:03, 3.01s/it]
 
 
89
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
90
  0%| | 0/4 [00:00<?, ?it/s]
91
  25%|β–ˆβ–ˆβ–Œ | 1/4 [00:04<00:14, 4.93s/it]
92
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 2/4 [00:08<00:07, 3.85s/it]
93
  75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 3/4 [00:09<00:02, 2.54s/it]
 
 
94
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
95
  0%| | 0/2 [00:00<?, ?it/s]
96
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:03<00:03, 3.77s/it]
 
 
 
 
97
  0%| | 0/1 [00:00<?, ?it/s]
 
 
98
  0%| | 0/1 [00:00<?, ?it/s]
 
 
99
  0%| | 0/2 [00:00<?, ?it/s]
100
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:03<00:03, 3.63s/it]
 
 
 
 
 
101
  0%| | 0/1 [00:00<?, ?it/s]
 
 
102
  0%| | 0/1 [00:00<?, ?it/s]
 
 
103
  0%| | 0/2 [00:00<?, ?it/s]
104
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:03<00:03, 3.94s/it]
 
 
 
 
105
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
 
 
106
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
 
 
 
 
 
107
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
 
 
 
 
 
 
108
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
 
 
109
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
 
 
 
 
 
 
 
 
110
  0%| | 0/1 [00:00<?, ?it/s]
 
 
111
  0%| | 0/1 [00:00<?, ?it/s]
 
 
112
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
113
  0%| | 0/3 [00:00<?, ?it/s]
114
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:03<00:06, 3.22s/it]
115
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:04<00:02, 2.11s/it]
 
 
116
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
 
 
 
 
 
117
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
 
 
 
 
 
118
  0%| | 0/3 [00:00<?, ?it/s]
119
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:02<00:04, 2.26s/it]
120
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:03<00:01, 1.45s/it]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  0%| | 0/1 [00:00<?, ?it/s]
 
 
122
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
 
 
 
 
 
 
124
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
125
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
126
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
 
127
  0%| | 0/1 [00:00<?, ?it/s]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ language_model.model.layers.0 4
2
+ language_model.model.layers.1 4
3
+ language_model.model.layers.2 4
4
+ language_model.model.layers.3 4
5
+ language_model.model.layers.4 4
6
+ language_model.model.layers.5 4
7
+ language_model.model.layers.6 4
8
+ language_model.model.layers.7 4
9
+ language_model.model.layers.8 4
10
+ language_model.model.layers.9 4
11
+ language_model.model.layers.10 4
12
+ language_model.model.layers.11 4
13
+ language_model.model.layers.12 4
14
+ language_model.model.layers.13 4
15
+ language_model.model.layers.14 4
16
+ language_model.model.layers.15 4
17
+ language_model.model.layers.16 4
18
+ language_model.model.layers.17 4
19
+ language_model.model.layers.18 4
20
+ language_model.model.layers.19 4
21
+ language_model.model.layers.20 4
22
+ language_model.model.layers.21 4
23
+ language_model.model.layers.22 4
24
+ language_model.model.layers.23 4
25
+ vision_model.encoder.layers.0 0
26
+ vision_model.encoder.layers.1 0
27
+ vision_model.encoder.layers.2 0
28
+ vision_model.encoder.layers.3 0
29
+ vision_model.encoder.layers.4 0
30
+ vision_model.encoder.layers.5 0
31
+ vision_model.encoder.layers.6 0
32
+ vision_model.encoder.layers.7 0
33
+ vision_model.encoder.layers.8 0
34
+ vision_model.encoder.layers.9 0
35
+ vision_model.encoder.layers.10 0
36
+ vision_model.encoder.layers.11 0
37
+ vision_model.encoder.layers.12 0
38
+ vision_model.encoder.layers.13 0
39
+ vision_model.encoder.layers.14 0
40
+ vision_model.encoder.layers.15 0
41
+ vision_model.encoder.layers.16 0
42
+ vision_model.encoder.layers.17 0
43
+ vision_model.encoder.layers.18 0
44
+ vision_model.encoder.layers.19 0
45
+ vision_model.encoder.layers.20 0
46
+ vision_model.encoder.layers.21 0
47
+ vision_model.encoder.layers.22 0
48
+ vision_model.encoder.layers.23 0
49
+ vision_model.embeddings 0
50
+ mlp1 0
51
+ language_model.model.tok_embeddings 4
52
+ language_model.model.norm 4
53
+ language_model.output 4
54
+ language_model.model.embed_tokens 4
55
+ language_model.lm_head 4
56
+ The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.
57
+ The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.
58
+ The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.
59
+ The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.
60
+ Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
61
+ Rank [3] Begin to eval model work_dirs/share_internvl/InternVL2-2B on task EgocentricNavigation, devices: {device(type='cuda', index=3), device(type='cuda', index=7)}
62
+ Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
63
+ Rank [0] Begin to eval model work_dirs/share_internvl/InternVL2-2B on task EgocentricNavigation, devices: {device(type='cuda', index=0), device(type='cuda', index=4)}
64
+ Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
65
+ Rank [2] Begin to eval model work_dirs/share_internvl/InternVL2-2B on task EgocentricNavigation, devices: {device(type='cuda', index=2), device(type='cuda', index=6)}
66
+ Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
67
+ Rank [1] Begin to eval model work_dirs/share_internvl/InternVL2-2B on task EgocentricNavigation, devices: {device(type='cuda', index=1), device(type='cuda', index=5)}
68
+ Initialization Finished
69
+ Predicting EgocentricNavigation Using internvl
70
+ Proceeding 52-length images samples | Num: 8
71
+ Initialization Finished
72
+ Predicting EgocentricNavigation Using internvl
73
+ Proceeding 52-length images samples | Num: 8
74
+ Initialization Finished
75
+ Predicting EgocentricNavigation Using internvl
76
+ Proceeding 52-length images samples | Num: 8
77
+ Initialization Finished
78
+ Predicting EgocentricNavigation Using internvl
79
+ Proceeding 52-length images samples | Num: 8
80
+
81
  0%| | 0/2 [00:00<?, ?it/s]
82
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:07<00:07, 7.49s/it]
83
+ Proceeding 73-length images samples | Num: 2
84
+ Proceeding 73-length images samples | Num: 2
85
+
86
  0%| | 0/2 [00:00<?, ?it/s]
87
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:08<00:08, 8.08s/it]
88
+ Proceeding 73-length images samples | Num: 2
89
+
90
  0%| | 0/2 [00:00<?, ?it/s]
91
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:07<00:07, 7.88s/it]
92
+ Proceeding 73-length images samples | Num: 2
93
+
94
+ Proceeding 48-length images samples | Num: 5
95
+
96
+ Proceeding 48-length images samples | Num: 5
97
+
98
  0%| | 0/1 [00:00<?, ?it/s]
99
+ Proceeding 38-length images samples | Num: 9
100
+
101
  0%| | 0/1 [00:00<?, ?it/s]
102
+ Proceeding 38-length images samples | Num: 9
103
+ Proceeding 48-length images samples | Num: 5
104
+
105
  0%| | 0/1 [00:00<?, ?it/s]
106
+ Proceeding 48-length images samples | Num: 5
107
+
108
  0%| | 0/2 [00:00<?, ?it/s]
109
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.37s/it]
110
+ Proceeding 54-length images samples | Num: 5
111
+
112
  0%| | 0/1 [00:00<?, ?it/s]
113
+ Proceeding 38-length images samples | Num: 9
114
+
115
  0%| | 0/2 [00:00<?, ?it/s]
116
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.41s/it]
117
+ Proceeding 54-length images samples | Num: 5
118
+ Proceeding 38-length images samples | Num: 9
119
+
120
  0%| | 0/1 [00:00<?, ?it/s]
121
+ Proceeding 30-length images samples | Num: 7
122
+
123
  0%| | 0/1 [00:00<?, ?it/s]
124
+ Proceeding 30-length images samples | Num: 7
125
+
126
  0%| | 0/2 [00:00<?, ?it/s]
127
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:05<00:05, 5.50s/it]
128
+ Proceeding 54-length images samples | Num: 5
129
+
130
  0%| | 0/1 [00:00<?, ?it/s]
131
+ Proceeding 33-length images samples | Num: 5
132
+ Proceeding 54-length images samples | Num: 5
133
+
134
  0%| | 0/2 [00:00<?, ?it/s]
135
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:03<00:03, 3.07s/it]
136
+ Proceeding 33-length images samples | Num: 5
137
+
138
  0%| | 0/1 [00:00<?, ?it/s]
139
+ Proceeding 30-length images samples | Num: 7
140
+
141
  0%| | 0/1 [00:00<?, ?it/s]
142
+ Proceeding 53-length images samples | Num: 5
143
+
144
  0%| | 0/1 [00:00<?, ?it/s]
145
+ Proceeding 53-length images samples | Num: 5
146
+
147
  0%| | 0/1 [00:00<?, ?it/s]
148
+ Proceeding 44-length images samples | Num: 3
149
+
150
+ Proceeding 39-length images samples | Num: 9
151
+
152
  0%| | 0/2 [00:00<?, ?it/s]
153
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:09<00:09, 9.43s/it]
154
+ Proceeding 33-length images samples | Num: 5
155
+
156
  0%| | 0/1 [00:00<?, ?it/s]
157
+ Proceeding 53-length images samples | Num: 5
158
+
159
  0%| | 0/1 [00:00<?, ?it/s]
160
+ Proceeding 44-length images samples | Num: 3
161
+ Proceeding 30-length images samples | Num: 7
162
+ Proceeding 34-length images samples | Num: 8
163
+
164
  0%| | 0/2 [00:00<?, ?it/s]
165
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:02<00:02, 2.78s/it]
166
+
167
  0%| | 0/1 [00:00<?, ?it/s]
168
+ Proceeding 44-length images samples | Num: 3
169
+
170
  0%| | 0/1 [00:00<?, ?it/s]
171
+ Proceeding 39-length images samples | Num: 9
172
+ Proceeding 33-length images samples | Num: 5
173
+
174
  0%| | 0/1 [00:00<?, ?it/s]
175
+ Proceeding 39-length images samples | Num: 9
176
+
177
  0%| | 0/2 [00:00<?, ?it/s]
178
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:02<00:02, 2.96s/it]
179
+ Proceeding 31-length images samples | Num: 7
180
+
181
  0%| | 0/1 [00:00<?, ?it/s]
182
+ Proceeding 36-length images samples | Num: 10
183
+
184
  0%| | 0/2 [00:00<?, ?it/s]
185
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.62s/it]
186
+ Proceeding 34-length images samples | Num: 8
187
+ Proceeding 53-length images samples | Num: 5
188
+
189
  0%| | 0/2 [00:00<?, ?it/s]
190
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:04<00:04, 4.16s/it]
191
+ Proceeding 34-length images samples | Num: 8
192
+
193
  0%| | 0/2 [00:00<?, ?it/s]
194
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.34s/it]
195
+ Proceeding 32-length images samples | Num: 10
196
+
197
  0%| | 0/2 [00:00<?, ?it/s]
198
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:02<00:02, 2.43s/it]
199
+ Proceeding 31-length images samples | Num: 7
200
+
201
  0%| | 0/2 [00:00<?, ?it/s]
202
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:02<00:02, 2.04s/it]
203
+ Proceeding 31-length images samples | Num: 7
204
+ Proceeding 44-length images samples | Num: 3
205
+
206
  0%| | 0/2 [00:00<?, ?it/s]
207
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.48s/it]
208
+ Proceeding 36-length images samples | Num: 10
209
+
210
  0%| | 0/2 [00:00<?, ?it/s]
211
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.06s/it]
212
+
213
  0%| | 0/2 [00:00<?, ?it/s]
214
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.67s/it]
215
+ Proceeding 36-length images samples | Num: 10
216
+ Proceeding 42-length images samples | Num: 3
217
+ Proceeding 39-length images samples | Num: 9
218
+
219
+ Proceeding 56-length images samples | Num: 3
220
+ Proceeding 55-length images samples | Num: 4
221
+
222
+
223
  0%| | 0/2 [00:00<?, ?it/s]
224
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.11s/it]
225
+ Proceeding 32-length images samples | Num: 10
226
+
227
  0%| | 0/1 [00:00<?, ?it/s]
228
+ Proceeding 46-length images samples | Num: 4
229
+
230
  0%| | 0/3 [00:00<?, ?it/s]
231
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:02<00:05, 2.98s/it]
232
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:03<00:01, 1.82s/it]
233
+ Proceeding 32-length images samples | Num: 10
234
+ Proceeding 34-length images samples | Num: 8
235
+
236
  0%| | 0/2 [00:00<?, ?it/s]
237
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.57s/it]
238
+ Proceeding 42-length images samples | Num: 3
239
+
240
  0%| | 0/1 [00:00<?, ?it/s]
241
+ Proceeding 45-length images samples | Num: 6
242
+ Proceeding 31-length images samples | Num: 7
243
+
244
  0%| | 0/3 [00:00<?, ?it/s]
245
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:03<00:06, 3.00s/it]
246
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:03<00:01, 1.74s/it]
247
+ Proceeding 42-length images samples | Num: 3
248
+
249
  0%| | 0/1 [00:00<?, ?it/s]
250
+ Proceeding 56-length images samples | Num: 3
251
+
252
  0%| | 0/1 [00:00<?, ?it/s]
253
+ Proceeding 35-length images samples | Num: 16
254
+
255
  0%| | 0/1 [00:00<?, ?it/s]
256
+ Proceeding 56-length images samples | Num: 3
257
+ Proceeding 36-length images samples | Num: 10
258
+
259
  0%| | 0/1 [00:00<?, ?it/s]
260
+ Proceeding 55-length images samples | Num: 4
261
+
262
  0%| | 0/1 [00:00<?, ?it/s]
263
+ Proceeding 55-length images samples | Num: 4
264
+
265
  0%| | 0/4 [00:00<?, ?it/s]
266
  25%|β–ˆβ–ˆβ–Œ | 1/4 [00:04<00:14, 4.72s/it]
267
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 2/4 [00:05<00:04, 2.44s/it]
268
  75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 3/4 [00:08<00:02, 2.89s/it]
269
+ Proceeding 47-length images samples | Num: 8
270
+
271
  0%| | 0/1 [00:00<?, ?it/s]
272
+ Proceeding 46-length images samples | Num: 4
273
+
274
  0%| | 0/1 [00:00<?, ?it/s]
275
+ Proceeding 46-length images samples | Num: 4
276
+ Proceeding 32-length images samples | Num: 10
277
+
278
  0%| | 0/1 [00:00<?, ?it/s]
279
+ Proceeding 45-length images samples | Num: 6
280
+
281
  0%| | 0/1 [00:00<?, ?it/s]
282
+ Proceeding 45-length images samples | Num: 6
283
+
284
  0%| | 0/2 [00:00<?, ?it/s]
285
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.41s/it]
286
+ Proceeding 62-length images samples | Num: 1
287
+
288
  0%| | 0/1 [00:00<?, ?it/s]
289
+ Proceeding 35-length images samples | Num: 16
290
+
291
+ Proceeding 43-length images samples | Num: 4
292
+ Proceeding 42-length images samples | Num: 3
293
+
294
  0%| | 0/1 [00:00<?, ?it/s]
295
+ Proceeding 51-length images samples | Num: 3
296
+
297
+ Proceeding 58-length images samples | Num: 4
298
+ Proceeding 56-length images samples | Num: 3
299
+
300
  0%| | 0/2 [00:00<?, ?it/s]
301
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:01<00:01, 1.59s/it]
302
+ Proceeding 35-length images samples | Num: 16
303
+
304
  0%| | 0/1 [00:00<?, ?it/s]
305
+ Proceeding 50-length images samples | Num: 5
306
+ Proceeding 55-length images samples | Num: 4
307
+
308
  0%| | 0/4 [00:00<?, ?it/s]
309
  25%|β–ˆβ–ˆβ–Œ | 1/4 [00:01<00:05, 1.93s/it]
310
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 2/4 [00:03<00:03, 1.59s/it]
311
  75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 3/4 [00:07<00:03, 3.01s/it]
312
+ Proceeding 47-length images samples | Num: 8
313
+
314
  0%| | 0/1 [00:00<?, ?it/s]
315
+ Proceeding 41-length images samples | Num: 5
316
+ Proceeding 46-length images samples | Num: 4
317
+
318
  0%| | 0/4 [00:00<?, ?it/s]
319
  25%|β–ˆβ–ˆβ–Œ | 1/4 [00:04<00:14, 4.93s/it]
320
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 2/4 [00:08<00:07, 3.85s/it]
321
  75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 3/4 [00:09<00:02, 2.54s/it]
322
+ Proceeding 47-length images samples | Num: 8
323
+
324
  0%| | 0/1 [00:00<?, ?it/s]
325
+ Proceeding 57-length images samples | Num: 5
326
+ Proceeding 45-length images samples | Num: 6
327
+
328
  0%| | 0/2 [00:00<?, ?it/s]
329
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:03<00:03, 3.77s/it]
330
+ Proceeding 62-length images samples | Num: 1
331
+
332
+ Proceeding 43-length images samples | Num: 4
333
+
334
  0%| | 0/1 [00:00<?, ?it/s]
335
+ Proceeding 51-length images samples | Num: 3
336
+
337
  0%| | 0/1 [00:00<?, ?it/s]
338
+ Proceeding 37-length images samples | Num: 11
339
+
340
  0%| | 0/2 [00:00<?, ?it/s]
341
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:03<00:03, 3.63s/it]
342
+ Proceeding 62-length images samples | Num: 1
343
+
344
+ Proceeding 43-length images samples | Num: 4
345
+ Proceeding 35-length images samples | Num: 16
346
+
347
  0%| | 0/1 [00:00<?, ?it/s]
348
+ Proceeding 58-length images samples | Num: 4
349
+
350
  0%| | 0/1 [00:00<?, ?it/s]
351
+ Proceeding 51-length images samples | Num: 3
352
+
353
  0%| | 0/2 [00:00<?, ?it/s]
354
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/2 [00:03<00:03, 3.94s/it]
355
+ Proceeding 80-length images samples | Num: 1
356
+
357
+ Proceeding 90-length images samples | Num: 1
358
+
359
  0%| | 0/1 [00:00<?, ?it/s]
360
+ Proceeding 50-length images samples | Num: 5
361
+
362
+ Proceeding 65-length images samples | Num: 1
363
+
364
+ Proceeding 40-length images samples | Num: 3
365
+
366
  0%| | 0/1 [00:00<?, ?it/s]
367
+ Proceeding 58-length images samples | Num: 4
368
+
369
+ Proceeding 76-length images samples | Num: 1
370
+
371
+ Proceeding 60-length images samples | Num: 1
372
+
373
+ Proceeding 49-length images samples | Num: 2
374
+ Proceeding 47-length images samples | Num: 8
375
+
376
  0%| | 0/1 [00:00<?, ?it/s]
377
+ Proceeding 41-length images samples | Num: 5
378
+
379
+ Proceeding 70-length images samples | Num: 1
380
+
381
+ Proceeding 109-length images samples | Num: 1
382
+
383
+ Proceeding 69-length images samples | Num: 1
384
+
385
+ Proceeding 59-length images samples | Num: 3
386
+
387
  0%| | 0/1 [00:00<?, ?it/s]
388
+ Proceeding 50-length images samples | Num: 5
389
+
390
+ Proceeding 68-length images samples | Num: 2
391
+
392
+ Proceeding 61-length images samples | Num: 2
393
+
394
  0%| | 0/1 [00:00<?, ?it/s]
395
+ Proceeding 57-length images samples | Num: 5
396
+ Proceeding 71-length images samples | Num: 1
397
+
398
+
399
+ Proceeding 63-length images samples | Num: 2
400
+
401
+ Proceeding 26-length images samples | Num: 1
402
+ Proceeding 77-length images samples | Num: 1
403
+
404
+
405
+ Proceeding 62-length images samples | Num: 1
406
+
407
  0%| | 0/1 [00:00<?, ?it/s]
408
+ Proceeding 41-length images samples | Num: 5
409
+
410
  0%| | 0/1 [00:00<?, ?it/s]
411
+ Proceeding 37-length images samples | Num: 11
412
+
413
  0%| | 0/1 [00:00<?, ?it/s]
414
+ Proceeding 57-length images samples | Num: 5
415
+ Proceeding 43-length images samples | Num: 4
416
+ Proceeding 51-length images samples | Num: 3
417
+
418
  0%| | 0/3 [00:00<?, ?it/s]
419
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:03<00:06, 3.22s/it]
420
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:04<00:02, 2.11s/it]
421
+ Proceeding 80-length images samples | Num: 1
422
+
423
  0%| | 0/1 [00:00<?, ?it/s]
424
+ Proceeding 37-length images samples | Num: 11
425
+
426
+ Proceeding 90-length images samples | Num: 1
427
+
428
+ Proceeding 65-length images samples | Num: 1
429
+
430
+ Proceeding 40-length images samples | Num: 3
431
+ Proceeding 58-length images samples | Num: 4
432
+
433
  0%| | 0/1 [00:00<?, ?it/s]
434
+ Proceeding 76-length images samples | Num: 1
435
+
436
+ Proceeding 60-length images samples | Num: 1
437
+ Proceeding 50-length images samples | Num: 5
438
+
439
+ Proceeding 49-length images samples | Num: 2
440
+
441
+ Proceeding 70-length images samples | Num: 1
442
+
443
  0%| | 0/3 [00:00<?, ?it/s]
444
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 1/3 [00:02<00:04, 2.26s/it]
445
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 2/3 [00:03<00:01, 1.45s/it]
446
+ Proceeding 80-length images samples | Num: 1
447
+
448
+ Proceeding 109-length images samples | Num: 1
449
+
450
+ Proceeding 90-length images samples | Num: 1
451
+
452
+ Proceeding 69-length images samples | Num: 1
453
+
454
+ Proceeding 65-length images samples | Num: 1
455
+
456
+ Proceeding 59-length images samples | Num: 3
457
+
458
+ Proceeding 40-length images samples | Num: 3
459
+ Proceeding 41-length images samples | Num: 5
460
+
461
  0%| | 0/1 [00:00<?, ?it/s]
462
+ Proceeding 76-length images samples | Num: 1
463
+
464
  0%| | 0/1 [00:00<?, ?it/s]
465
+ Proceeding 68-length images samples | Num: 2
466
+
467
+ Proceeding 60-length images samples | Num: 1
468
+
469
+ Proceeding 61-length images samples | Num: 2
470
+
471
+ Proceeding 49-length images samples | Num: 2
472
+
473
+ Proceeding 71-length images samples | Num: 1
474
+
475
+ Proceeding 63-length images samples | Num: 2
476
+
477
+ Proceeding 26-length images samples | Num: 1
478
+
479
+ Proceeding 77-length images samples | Num: 1
480
+
481
  0%| | 0/1 [00:00<?, ?it/s]
482
+ Proceeding 70-length images samples | Num: 1
483
+
484
+ Proceeding 57-length images samples | Num: 5
485
+
486
+ Proceeding 109-length images samples | Num: 1
487
+
488
+ Proceeding 69-length images samples | Num: 1
489
+
490
+ Proceeding 59-length images samples | Num: 3
491
+
492
  0%| | 0/1 [00:00<?, ?it/s]
493
+ Proceeding 68-length images samples | Num: 2
494
+ Proceeding 37-length images samples | Num: 11
495
+
496
  0%| | 0/1 [00:00<?, ?it/s]
497
+ Proceeding 61-length images samples | Num: 2
498
+ Proceeding 80-length images samples | Num: 1
499
+
500
  0%| | 0/1 [00:00<?, ?it/s]
501
+ Proceeding 71-length images samples | Num: 1
502
+
503
+ Proceeding 63-length images samples | Num: 2
504
+ Proceeding 90-length images samples | Num: 1
505
+
506
  0%| | 0/1 [00:00<?, ?it/s]
507
+ Proceeding 26-length images samples | Num: 1
508
+
509
+ Proceeding 77-length images samples | Num: 1
510
+
511
+ Proceeding 65-length images samples | Num: 1
512
+ Proceeding 40-length images samples | Num: 3
513
+ Proceeding 76-length images samples | Num: 1
514
+ Proceeding 60-length images samples | Num: 1
515
+ Proceeding 49-length images samples | Num: 2
516
+ Proceeding 70-length images samples | Num: 1
517
+ Proceeding 109-length images samples | Num: 1
518
+ Proceeding 69-length images samples | Num: 1
519
+ Proceeding 59-length images samples | Num: 3
520
+ Proceeding 68-length images samples | Num: 2
521
+ Proceeding 61-length images samples | Num: 2
522
+ Proceeding 71-length images samples | Num: 1
523
+ Proceeding 63-length images samples | Num: 2
524
+ Proceeding 26-length images samples | Num: 1
525
+ Proceeding 77-length images samples | Num: 1
526
+ evaluating EgocentricNavigation ...
527
+ Results saved to work_dirs/share_internvl/InternVL2-2B/eval_milebench/EgocentricNavigation/EgocentricNavigation_240803234751.json
528
+ python eval/milebench/evaluate.py --data-dir /mnt/inspurfs/share_data/wangweiyun/share_data/long-context-benchmark/MileBench/datasets--FreedomIntelligence--MileBench/snapshots/53c7a58051ef88bacf76541d91f03f5ba2d71e7d --dataset EgocentricNavigation --result-dir work_dirs/share_internvl/InternVL2-2B/eval_milebench/EgocentricNavigation
529
+ internvl: EgocentricNavigation: {'Accuracy': 0.33, 'image_quantity_level-Accuracy': {'Few': 0, 'Medium': 0.4, 'Many': 0.32432432432432434}, 'image_quantity_level-Result': {'Few': [0, 0], 'Medium': [6, 15], 'Many': [60, 185]}}