diff --git "a/eval_milebench/ActionLocalization/pred.json" "b/eval_milebench/ActionLocalization/pred.json" new file mode 100644--- /dev/null +++ "b/eval_milebench/ActionLocalization/pred.json" @@ -0,0 +1,2402 @@ +[ + { + "sample_id": 0, + "question": "Given the visuals, discern the timing of the event in the query. You must choose your answer from the Choice List.\nDuring which part of the video does the action 'person sitting on a couch' occur?\nChoice list: \nA. In the middle of the video.\nB. At the end of the video.\nC. Throughout the entire video.\nD. At the beginning of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "Throughout the entire video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "D" + }, + { + "sample_id": 19, + "question": "Examine the given illustrations and deduce when the action in the inquiry happens. You must choose your answer from the Choice List.\nWhen in the video sequence do we observe the action 'one person undresses in front of a wardrobe'?\nChoice list: \nA. At the beginning of the video.\nB. At the end of the video.\nC. Throughout the entire video.\nD. In the middle of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "Throughout the entire video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 37, + "question": "Given the visuals, discern the timing of the event in the query. You must choose your answer from the Choice List.\nWhen in the video sequence do we observe the action 'person opens the oven door'?\nChoice list: \nA. Throughout the entire video.\nB. At the end of the video.\nC. At the beginning of the video.\nD. In the middle of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "Throughout the entire video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "D" + }, + { + "sample_id": 39, + "question": "Based on the given images, identify when does the action in the question happen You must choose your answer from the Choice List.\nWhen in the video sequence do we observe the action 'person holding a pillow'?\nChoice list: \nA. Throughout the entire video.\nB. At the end of the video.\nC. At the beginning of the video.\nD. In the middle of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "Throughout the entire video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "D" + }, + { + "sample_id": 55, + "question": "Observe the given images and deduce when the action in the query takes place. You must choose your answer from the Choice List.\nCan you identify when the action 'person opens cabinets above it' happens in the video?\nChoice list: \nA. In the middle of the video.\nB. At the beginning of the video.\nC. At the end of the video.\nD. Throughout the entire video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the end of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 61, + "question": "Observe the given images and deduce when the action in the query takes place. You must choose your answer from the Choice List.\nIn the given video, when does the action 'the person in the doorway walks away holding the towel' take place?\nChoice list: \nA. In the middle of the video.\nB. At the end of the video.\nC. At the beginning of the video.\nD. Throughout the entire video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the end of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 62, + "question": "Evaluate the presented graphics and infer the timing of the action in the question. You must choose your answer from the Choice List.\nDuring which part of the video does the action 'person closes the door' occur?\nChoice list: \nA. At the end of the video.\nB. Throughout the entire video.\nC. In the middle of the video.\nD. At the beginning of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the end of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "C" + }, + { + "sample_id": 67, + "question": "Review the supplied visuals and ascertain the timing of the action in the inquiry. You must choose your answer from the Choice List.\nAt what moment in the video does the action 'person puts the cup on a table' occur?\nChoice list: \nA. Throughout the entire video.\nB. In the middle of the video.\nC. At the end of the video.\nD. At the beginning of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the end of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "D" + }, + { + "sample_id": 68, + "question": "Examine the given illustrations and deduce when the action in the inquiry happens. You must choose your answer from the Choice List.\nDuring which part of the video does the action 'person takes a drink from a cup' occur?\nChoice list: \nA. At the end of the video.\nB. Throughout the entire video.\nC. In the middle of the video.\nD. At the beginning of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the end of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "C" + }, + { + "sample_id": 69, + "question": "Observe the given images and deduce when the action in the query takes place. You must choose your answer from the Choice List.\nWhen in the video sequence do we observe the action 'person opens a laptop up'?\nChoice list: \nA. In the middle of the video.\nB. At the beginning of the video.\nC. At the end of the video.\nD. Throughout the entire video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the end of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "B" + }, + { + "sample_id": 1, + "question": "Observe the given images and deduce when the action in the query takes place. You must choose your answer from the Choice List.\nCan you identify when the action 'person still smiling' happens in the video?\nChoice list: \nA. Throughout the entire video.\nB. At the beginning of the video.\nC. At the end of the video.\nD. In the middle of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "Throughout the entire video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "B" + }, + { + "sample_id": 2, + "question": "Review the supplied visuals and ascertain the timing of the action in the inquiry. You must choose your answer from the Choice List.\nIn the given video, when does the action 'a person is tidying up the cabinet' take place?\nChoice list: \nA. In the middle of the video.\nB. Throughout the entire video.\nC. At the beginning of the video.\nD. At the end of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "Throughout the entire video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "C" + }, + { + "sample_id": 21, + "question": "Inspect the presented illustrations and conclude when the action in the inquiry occurs. You must choose your answer from the Choice List.\nWhen in the video sequence do we observe the action 'person starts laughing'?\nChoice list: \nA. In the middle of the video.\nB. Throughout the entire video.\nC. At the end of the video.\nD. At the beginning of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "Throughout the entire video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "D" + }, + { + "sample_id": 35, + "question": "Evaluate the presented graphics and infer the timing of the action in the question. You must choose your answer from the Choice List.\nCan you identify when the action 'person closes a window' happens in the video?\nChoice list: \nA. At the end of the video.\nB. Throughout the entire video.\nC. In the middle of the video.\nD. At the beginning of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "Throughout the entire video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "C" + }, + { + "sample_id": 38, + "question": "From the images presented, ascertain the moment the action in the query occurs. You must choose your answer from the Choice List.\nWhen in the video sequence do we observe the action 'person start working on their laptop'?\nChoice list: \nA. At the beginning of the video.\nB. Throughout the entire video.\nC. At the end of the video.\nD. In the middle of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "Throughout the entire video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 60, + "question": "Observe the given images and deduce when the action in the query takes place. You must choose your answer from the Choice List.\nDuring which part of the video does the action 'person takes a blanket' occur?\nChoice list: \nA. At the end of the video.\nB. At the beginning of the video.\nC. Throughout the entire video.\nD. In the middle of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the end of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "D" + }, + { + "sample_id": 3, + "question": "Given the visuals, discern the timing of the event in the query. You must choose your answer from the Choice List.\nAt what moment in the video does the action 'the person takes a lollipop from a box' occur?\nChoice list: \nA. At the end of the video.\nB. In the middle of the video.\nC. At the beginning of the video.\nD. Throughout the entire video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "Throughout the entire video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "B" + }, + { + "sample_id": 4, + "question": "Inspect the presented illustrations and conclude when the action in the inquiry occurs. You must choose your answer from the Choice List.\nWhen in the video sequence do we observe the action 'person fixes their hair in a mirror'?\nChoice list: \nA. At the end of the video.\nB. In the middle of the video.\nC. At the beginning of the video.\nD. Throughout the entire video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "Throughout the entire video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "B" + }, + { + "sample_id": 29, + "question": "Using the images at hand, infer when the action in the question takes place. You must choose your answer from the Choice List.\nWhen in the video sequence do we observe the action 'the person takes a picture out'?\nChoice list: \nA. Throughout the entire video.\nB. At the end of the video.\nC. In the middle of the video.\nD. At the beginning of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "Throughout the entire video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "C" + }, + { + "sample_id": 31, + "question": "Observe the given images and deduce when the action in the query takes place. You must choose your answer from the Choice List.\nCan you identify when the action 'a person is smiling' happens in the video?\nChoice list: \nA. At the end of the video.\nB. Throughout the entire video.\nC. At the beginning of the video.\nD. In the middle of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "Throughout the entire video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "D" + }, + { + "sample_id": 5, + "question": "Given the visuals, discern the timing of the event in the query. You must choose your answer from the Choice List.\nAt what moment in the video does the action 'the person is also watching television' occur?\nChoice list: \nA. At the beginning of the video.\nB. Throughout the entire video.\nC. In the middle of the video.\nD. At the end of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "Throughout the entire video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 6, + "question": "Observe the given images and deduce when the action in the query takes place. You must choose your answer from the Choice List.\nWhen in the video sequence do we observe the action 'person reads a book'?\nChoice list: \nA. In the middle of the video.\nB. At the beginning of the video.\nC. At the end of the video.\nD. Throughout the entire video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "Throughout the entire video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 7, + "question": "Evaluate the presented graphics and infer the timing of the action in the question. You must choose your answer from the Choice List.\nCan you identify when the action 'person sits in a chair' happens in the video?\nChoice list: \nA. Throughout the entire video.\nB. At the end of the video.\nC. At the beginning of the video.\nD. In the middle of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "Throughout the entire video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "C" + }, + { + "sample_id": 8, + "question": "Examine the given illustrations and deduce when the action in the inquiry happens. You must choose your answer from the Choice List.\nAt what moment in the video does the action 'the person picked up a phone to play with it' occur?\nChoice list: \nA. At the end of the video.\nB. At the beginning of the video.\nC. Throughout the entire video.\nD. In the middle of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "Throughout the entire video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "D" + }, + { + "sample_id": 10, + "question": "Examine the given illustrations and deduce when the action in the inquiry happens. You must choose your answer from the Choice List.\nIn the given video, when does the action 'person fixes their hair' take place?\nChoice list: \nA. At the beginning of the video.\nB. Throughout the entire video.\nC. At the end of the video.\nD. In the middle of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "Throughout the entire video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 18, + "question": "Using the images at hand, infer when the action in the question takes place. You must choose your answer from the Choice List.\nCan you identify when the action 'person open a bag' happens in the video?\nChoice list: \nA. In the middle of the video.\nB. At the end of the video.\nC. At the beginning of the video.\nD. Throughout the entire video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "Throughout the entire video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 27, + "question": "Given the visuals, discern the timing of the event in the query. You must choose your answer from the Choice List.\nDuring which part of the video does the action 'person looks out the window' occur?\nChoice list: \nA. At the end of the video.\nB. Throughout the entire video.\nC. In the middle of the video.\nD. At the beginning of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "Throughout the entire video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "C" + }, + { + "sample_id": 11, + "question": "Analyze the provided visuals and determine the timing of the event in question. You must choose your answer from the Choice List.\nDuring which part of the video does the action 'the person undresses (removes socks' occur?\nChoice list: \nA. In the middle of the video.\nB. At the beginning of the video.\nC. Throughout the entire video.\nD. At the end of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "Throughout the entire video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 12, + "question": "Using the images at hand, infer when the action in the question takes place. You must choose your answer from the Choice List.\nCan you identify when the action 'person putting books' happens in the video?\nChoice list: \nA. Throughout the entire video.\nB. At the end of the video.\nC. In the middle of the video.\nD. At the beginning of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "Throughout the entire video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "D" + }, + { + "sample_id": 22, + "question": "Review the supplied visuals and ascertain the timing of the action in the inquiry. You must choose your answer from the Choice List.\nDuring which part of the video does the action 'person walking through a doorway in the living room' occur?\nChoice list: \nA. At the end of the video.\nB. Throughout the entire video.\nC. In the middle of the video.\nD. At the beginning of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "Throughout the entire video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "D" + }, + { + "sample_id": 13, + "question": "Observe the given images and deduce when the action in the query takes place. You must choose your answer from the Choice List.\nWhen in the video sequence do we observe the action 'person starts washing clothes'?\nChoice list: \nA. At the beginning of the video.\nB. In the middle of the video.\nC. Throughout the entire video.\nD. At the end of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "Throughout the entire video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 14, + "question": "Observe the given images and deduce when the action in the query takes place. You must choose your answer from the Choice List.\nWhen in the video sequence do we observe the action 'person puts on shoes'?\nChoice list: \nA. At the end of the video.\nB. Throughout the entire video.\nC. At the beginning of the video.\nD. In the middle of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "Throughout the entire video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "D" + }, + { + "sample_id": 15, + "question": "Inspect the presented illustrations and conclude when the action in the inquiry occurs. You must choose your answer from the Choice List.\nAt what moment in the video does the action 'a person is walking in a room holding towel' occur?\nChoice list: \nA. At the end of the video.\nB. In the middle of the video.\nC. Throughout the entire video.\nD. At the beginning of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "Throughout the entire video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "D" + }, + { + "sample_id": 26, + "question": "Based on the given images, identify when does the action in the question happen You must choose your answer from the Choice List.\nDuring which part of the video does the action 'person sitting on bed' occur?\nChoice list: \nA. At the beginning of the video.\nB. At the end of the video.\nC. In the middle of the video.\nD. Throughout the entire video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "Throughout the entire video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 36, + "question": "Analyze the provided visuals and determine the timing of the event in question. You must choose your answer from the Choice List.\nWhen in the video sequence do we observe the action 'person opens the closet'?\nChoice list: \nA. At the beginning of the video.\nB. Throughout the entire video.\nC. In the middle of the video.\nD. At the end of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "Throughout the entire video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 16, + "question": "Using the images at hand, infer when the action in the question takes place. You must choose your answer from the Choice List.\nWhen in the video sequence do we observe the action 'a person is also dressing'?\nChoice list: \nA. Throughout the entire video.\nB. At the beginning of the video.\nC. At the end of the video.\nD. In the middle of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "Throughout the entire video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "D" + }, + { + "sample_id": 20, + "question": "Inspect the presented illustrations and conclude when the action in the inquiry occurs. You must choose your answer from the Choice List.\nDuring which part of the video does the action 'a person is sitting on a couch watching tv' occur?\nChoice list: \nA. At the end of the video.\nB. Throughout the entire video.\nC. At the beginning of the video.\nD. In the middle of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "Throughout the entire video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "C" + }, + { + "sample_id": 23, + "question": "Given the visuals, discern the timing of the event in the query. You must choose your answer from the Choice List.\nWhen in the video sequence do we observe the action 'person quickly undressing'?\nChoice list: \nA. At the beginning of the video.\nB. Throughout the entire video.\nC. In the middle of the video.\nD. At the end of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "Throughout the entire video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "C" + }, + { + "sample_id": 30, + "question": "Observe the given images and deduce when the action in the query takes place. You must choose your answer from the Choice List.\nIn the given video, when does the action 'person begins to undress' take place?\nChoice list: \nA. At the beginning of the video.\nB. Throughout the entire video.\nC. At the end of the video.\nD. In the middle of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "Throughout the entire video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 33, + "question": "Using the images at hand, infer when the action in the question takes place. You must choose your answer from the Choice List.\nIn the given video, when does the action 'person working on a laptop' take place?\nChoice list: \nA. In the middle of the video.\nB. At the end of the video.\nC. Throughout the entire video.\nD. At the beginning of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "Throughout the entire video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "D" + }, + { + "sample_id": 48, + "question": "Review the supplied visuals and ascertain the timing of the action in the inquiry. You must choose your answer from the Choice List.\nAt what moment in the video does the action 'person eating something' occur?\nChoice list: \nA. At the beginning of the video.\nB. Throughout the entire video.\nC. In the middle of the video.\nD. At the end of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "Throughout the entire video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "C" + }, + { + "sample_id": 63, + "question": "Analyze the provided visuals and determine the timing of the event in question. You must choose your answer from the Choice List.\nDuring which part of the video does the action 'person sits down at the table' occur?\nChoice list: \nA. At the end of the video.\nB. At the beginning of the video.\nC. Throughout the entire video.\nD. In the middle of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the end of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 34, + "question": "Based on the given images, identify when does the action in the question happen You must choose your answer from the Choice List.\nDuring which part of the video does the action 'person playing a game on a laptop' occur?\nChoice list: \nA. At the end of the video.\nB. Throughout the entire video.\nC. At the beginning of the video.\nD. In the middle of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "Throughout the entire video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "D" + }, + { + "sample_id": 42, + "question": "Evaluate the presented graphics and infer the timing of the action in the question. You must choose your answer from the Choice List.\nWhen in the video sequence do we observe the action 'person putting on their shoes'?\nChoice list: \nA. Throughout the entire video.\nB. At the beginning of the video.\nC. In the middle of the video.\nD. At the end of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "Throughout the entire video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "C" + }, + { + "sample_id": 43, + "question": "Using the images at hand, infer when the action in the question takes place. You must choose your answer from the Choice List.\nCan you identify when the action 'the person is sitting in the couch with the laptop' happens in the video?\nChoice list: \nA. In the middle of the video.\nB. Throughout the entire video.\nC. At the end of the video.\nD. At the beginning of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "Throughout the entire video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "D" + }, + { + "sample_id": 59, + "question": "Using the images at hand, infer when the action in the question takes place. You must choose your answer from the Choice List.\nAt what moment in the video does the action 'person turns on the light' occur?\nChoice list: \nA. Throughout the entire video.\nB. At the beginning of the video.\nC. At the end of the video.\nD. In the middle of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the end of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "B" + }, + { + "sample_id": 65, + "question": "Given the visuals, discern the timing of the event in the query. You must choose your answer from the Choice List.\nCan you identify when the action 'person puts the remote on a shelf' happens in the video?\nChoice list: \nA. At the beginning of the video.\nB. Throughout the entire video.\nC. In the middle of the video.\nD. At the end of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the end of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 72, + "question": "Given the visuals, discern the timing of the event in the query. You must choose your answer from the Choice List.\nCan you identify when the action 'person close the closet door' happens in the video?\nChoice list: \nA. At the end of the video.\nB. In the middle of the video.\nC. Throughout the entire video.\nD. At the beginning of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the end of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "D" + }, + { + "sample_id": 50, + "question": "Analyze the provided visuals and determine the timing of the event in question. You must choose your answer from the Choice List.\nAt what moment in the video does the action 'person put down broom' occur?\nChoice list: \nA. At the end of the video.\nB. At the beginning of the video.\nC. In the middle of the video.\nD. Throughout the entire video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the end of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "C" + }, + { + "sample_id": 64, + "question": "Evaluate the presented graphics and infer the timing of the action in the question. You must choose your answer from the Choice List.\nDuring which part of the video does the action 'person put the bottle in the garbage' occur?\nChoice list: \nA. At the beginning of the video.\nB. Throughout the entire video.\nC. At the end of the video.\nD. In the middle of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the end of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "D" + }, + { + "sample_id": 66, + "question": "Based on the given images, identify when does the action in the question happen You must choose your answer from the Choice List.\nIn the given video, when does the action 'person put on their shoes' take place?\nChoice list: \nA. In the middle of the video.\nB. At the beginning of the video.\nC. Throughout the entire video.\nD. At the end of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the end of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 71, + "question": "Evaluate the presented graphics and infer the timing of the action in the question. You must choose your answer from the Choice List.\nIn the given video, when does the action 'the person puts the book down' take place?\nChoice list: \nA. Throughout the entire video.\nB. At the beginning of the video.\nC. At the end of the video.\nD. In the middle of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the end of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "B" + }, + { + "sample_id": 73, + "question": "Inspect the presented illustrations and conclude when the action in the inquiry occurs. You must choose your answer from the Choice List.\nDuring which part of the video does the action 'person open book' occur?\nChoice list: \nA. At the beginning of the video.\nB. In the middle of the video.\nC. Throughout the entire video.\nD. At the end of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the end of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 89, + "question": "Observe the given images and deduce when the action in the query takes place. You must choose your answer from the Choice List.\nAt what moment in the video does the action 'person laughs at a tv' occur?\nChoice list: \nA. In the middle of the video.\nB. At the beginning of the video.\nC. Throughout the entire video.\nD. At the end of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the end of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "B" + }, + { + "sample_id": 108, + "question": "Analyze the provided visuals and determine the timing of the event in question. You must choose your answer from the Choice List.\nIn the given video, when does the action 'person takes a towel from a rack' take place?\nChoice list: \nA. At the beginning of the video.\nB. At the end of the video.\nC. In the middle of the video.\nD. Throughout the entire video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "In the middle of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 80, + "question": "Review the supplied visuals and ascertain the timing of the action in the inquiry. You must choose your answer from the Choice List.\nDuring which part of the video does the action 'the person takes a sandwich from the refrigerator' occur?\nChoice list: \nA. In the middle of the video.\nB. Throughout the entire video.\nC. At the end of the video.\nD. At the beginning of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the end of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 85, + "question": "Given the visuals, discern the timing of the event in the query. You must choose your answer from the Choice List.\nIn the given video, when does the action 'a person opens a box' take place?\nChoice list: \nA. Throughout the entire video.\nB. At the beginning of the video.\nC. At the end of the video.\nD. In the middle of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the end of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "B" + }, + { + "sample_id": 88, + "question": "Given the visuals, discern the timing of the event in the query. You must choose your answer from the Choice List.\nDuring which part of the video does the action 'person runs out' occur?\nChoice list: \nA. At the end of the video.\nB. In the middle of the video.\nC. Throughout the entire video.\nD. At the beginning of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the end of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "B" + }, + { + "sample_id": 99, + "question": "Review the supplied visuals and ascertain the timing of the action in the inquiry. You must choose your answer from the Choice List.\nIn the given video, when does the action 'person closed the book' take place?\nChoice list: \nA. Throughout the entire video.\nB. At the beginning of the video.\nC. At the end of the video.\nD. In the middle of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the end of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "B" + }, + { + "sample_id": 94, + "question": "From the images presented, ascertain the moment the action in the query occurs. You must choose your answer from the Choice List.\nWhen in the video sequence do we observe the action 'the person closes the refrigerator'?\nChoice list: \nA. Throughout the entire video.\nB. In the middle of the video.\nC. At the beginning of the video.\nD. At the end of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the end of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "C" + }, + { + "sample_id": 111, + "question": "Evaluate the presented graphics and infer the timing of the action in the question. You must choose your answer from the Choice List.\nIn the given video, when does the action 'a person puts some food on a shelf' take place?\nChoice list: \nA. At the beginning of the video.\nB. Throughout the entire video.\nC. In the middle of the video.\nD. At the end of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "In the middle of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 123, + "question": "Using the images at hand, infer when the action in the question takes place. You must choose your answer from the Choice List.\nAt what moment in the video does the action 'person closed the closet door' occur?\nChoice list: \nA. At the end of the video.\nB. In the middle of the video.\nC. At the beginning of the video.\nD. Throughout the entire video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "In the middle of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "B" + }, + { + "sample_id": 153, + "question": "Given the visuals, discern the timing of the event in the query. You must choose your answer from the Choice List.\nWhen in the video sequence do we observe the action 'person is sitting on floor sneezing'?\nChoice list: \nA. At the beginning of the video.\nB. At the end of the video.\nC. In the middle of the video.\nD. Throughout the entire video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the beginning of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "C" + }, + { + "sample_id": 165, + "question": "Evaluate the presented graphics and infer the timing of the action in the question. You must choose your answer from the Choice List.\nWhen in the video sequence do we observe the action 'the person is also playing with a phone'?\nChoice list: \nA. At the end of the video.\nB. At the beginning of the video.\nC. Throughout the entire video.\nD. In the middle of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the beginning of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "D" + }, + { + "sample_id": 181, + "question": "Review the supplied visuals and ascertain the timing of the action in the inquiry. You must choose your answer from the Choice List.\nAt what moment in the video does the action 'the person takes a bag off of a shelf' occur?\nChoice list: \nA. At the beginning of the video.\nB. At the end of the video.\nC. In the middle of the video.\nD. Throughout the entire video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the beginning of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "C" + }, + { + "sample_id": 76, + "question": "Given the visuals, discern the timing of the event in the query. You must choose your answer from the Choice List.\nIn the given video, when does the action 'person opening a door' take place?\nChoice list: \nA. In the middle of the video.\nB. At the beginning of the video.\nC. Throughout the entire video.\nD. At the end of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the end of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "B" + }, + { + "sample_id": 77, + "question": "Inspect the presented illustrations and conclude when the action in the inquiry occurs. You must choose your answer from the Choice List.\nDuring which part of the video does the action 'the person was putting the bag into the cabinet' occur?\nChoice list: \nA. Throughout the entire video.\nB. In the middle of the video.\nC. At the end of the video.\nD. At the beginning of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the end of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "B" + }, + { + "sample_id": 87, + "question": "Based on the given images, identify when does the action in the question happen You must choose your answer from the Choice List.\nDuring which part of the video does the action 'person puts down the box' occur?\nChoice list: \nA. In the middle of the video.\nB. At the beginning of the video.\nC. Throughout the entire video.\nD. At the end of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the end of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 90, + "question": "Inspect the presented illustrations and conclude when the action in the inquiry occurs. You must choose your answer from the Choice List.\nAt what moment in the video does the action 'person takes a glass out from the refrigerator' occur?\nChoice list: \nA. At the beginning of the video.\nB. Throughout the entire video.\nC. At the end of the video.\nD. In the middle of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the end of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "D" + }, + { + "sample_id": 97, + "question": "Observe the given images and deduce when the action in the query takes place. You must choose your answer from the Choice List.\nAt what moment in the video does the action 'person throws the book down' occur?\nChoice list: \nA. In the middle of the video.\nB. Throughout the entire video.\nC. At the beginning of the video.\nD. At the end of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the end of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 98, + "question": "Review the supplied visuals and ascertain the timing of the action in the inquiry. You must choose your answer from the Choice List.\nCan you identify when the action 'the person takes a paper towel from the shelf' happens in the video?\nChoice list: \nA. At the beginning of the video.\nB. At the end of the video.\nC. In the middle of the video.\nD. Throughout the entire video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the end of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 104, + "question": "Evaluate the presented graphics and infer the timing of the action in the question. You must choose your answer from the Choice List.\nWhen in the video sequence do we observe the action 'a person runs laughing through the hall with a pillow'?\nChoice list: \nA. In the middle of the video.\nB. Throughout the entire video.\nC. At the end of the video.\nD. At the beginning of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "In the middle of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 110, + "question": "Review the supplied visuals and ascertain the timing of the action in the inquiry. You must choose your answer from the Choice List.\nAt what moment in the video does the action 'person the take shoes out of the closet stand up' occur?\nChoice list: \nA. At the end of the video.\nB. At the beginning of the video.\nC. In the middle of the video.\nD. Throughout the entire video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "In the middle of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "C" + }, + { + "sample_id": 112, + "question": "Given the visuals, discern the timing of the event in the query. You must choose your answer from the Choice List.\nIn the given video, when does the action 'person closes a cupboard door' take place?\nChoice list: \nA. In the middle of the video.\nB. At the beginning of the video.\nC. At the end of the video.\nD. Throughout the entire video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "In the middle of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 115, + "question": "Evaluate the presented graphics and infer the timing of the action in the question. You must choose your answer from the Choice List.\nWhen in the video sequence do we observe the action 'person wash their hands'?\nChoice list: \nA. At the end of the video.\nB. At the beginning of the video.\nC. Throughout the entire video.\nD. In the middle of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "In the middle of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "D" + }, + { + "sample_id": 84, + "question": "Review the supplied visuals and ascertain the timing of the action in the inquiry. You must choose your answer from the Choice List.\nDuring which part of the video does the action 'person closes the cabinet' occur?\nChoice list: \nA. In the middle of the video.\nB. At the end of the video.\nC. Throughout the entire video.\nD. At the beginning of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the end of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 92, + "question": "Given the visuals, discern the timing of the event in the query. You must choose your answer from the Choice List.\nAt what moment in the video does the action 'person begins laughing' occur?\nChoice list: \nA. Throughout the entire video.\nB. In the middle of the video.\nC. At the beginning of the video.\nD. At the end of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the end of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "C" + }, + { + "sample_id": 100, + "question": "Evaluate the presented graphics and infer the timing of the action in the question. You must choose your answer from the Choice List.\nIn the given video, when does the action 'person takes out a phone' take place?\nChoice list: \nA. At the end of the video.\nB. In the middle of the video.\nC. At the beginning of the video.\nD. Throughout the entire video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "In the middle of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "C" + }, + { + "sample_id": 129, + "question": "Review the supplied visuals and ascertain the timing of the action in the inquiry. You must choose your answer from the Choice List.\nAt what moment in the video does the action 'person takes a camera from a table' occur?\nChoice list: \nA. At the end of the video.\nB. At the beginning of the video.\nC. Throughout the entire video.\nD. In the middle of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "In the middle of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "D" + }, + { + "sample_id": 130, + "question": "Inspect the presented illustrations and conclude when the action in the inquiry occurs. You must choose your answer from the Choice List.\nIn the given video, when does the action 'the person begins tidying their clothes' take place?\nChoice list: \nA. At the end of the video.\nB. In the middle of the video.\nC. At the beginning of the video.\nD. Throughout the entire video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "In the middle of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "C" + }, + { + "sample_id": 131, + "question": "From the images presented, ascertain the moment the action in the query occurs. You must choose your answer from the Choice List.\nCan you identify when the action 'the person puts the cups onto a table' happens in the video?\nChoice list: \nA. Throughout the entire video.\nB. At the end of the video.\nC. In the middle of the video.\nD. At the beginning of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "In the middle of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "C" + }, + { + "sample_id": 44, + "question": "Examine the given illustrations and deduce when the action in the inquiry happens. You must choose your answer from the Choice List.\nCan you identify when the action 'person takes out the phone' happens in the video?\nChoice list: \nA. In the middle of the video.\nB. At the beginning of the video.\nC. Throughout the entire video.\nD. At the end of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "Throughout the entire video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 41, + "question": "Review the supplied visuals and ascertain the timing of the action in the inquiry. You must choose your answer from the Choice List.\nDuring which part of the video does the action 'person cooks some food on the stove' occur?\nChoice list: \nA. At the beginning of the video.\nB. In the middle of the video.\nC. At the end of the video.\nD. Throughout the entire video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "Throughout the entire video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "B" + }, + { + "sample_id": 53, + "question": "Evaluate the presented graphics and infer the timing of the action in the question. You must choose your answer from the Choice List.\nWhen in the video sequence do we observe the action 'person open a box'?\nChoice list: \nA. Throughout the entire video.\nB. At the end of the video.\nC. At the beginning of the video.\nD. In the middle of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the end of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "D" + }, + { + "sample_id": 86, + "question": "From the images presented, ascertain the moment the action in the query occurs. You must choose your answer from the Choice List.\nAt what moment in the video does the action 'the person throws their clothes onto the shelf' occur?\nChoice list: \nA. At the end of the video.\nB. Throughout the entire video.\nC. In the middle of the video.\nD. At the beginning of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the end of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "C" + }, + { + "sample_id": 25, + "question": "Analyze the provided visuals and determine the timing of the event in question. You must choose your answer from the Choice List.\nWhen in the video sequence do we observe the action 'person putting away groceries'?\nChoice list: \nA. Throughout the entire video.\nB. In the middle of the video.\nC. At the end of the video.\nD. At the beginning of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "Throughout the entire video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "B" + }, + { + "sample_id": 9, + "question": "Inspect the presented illustrations and conclude when the action in the inquiry occurs. You must choose your answer from the Choice List.\nIn the given video, when does the action 'person looks out the window' take place?\nChoice list: \nA. In the middle of the video.\nB. At the end of the video.\nC. Throughout the entire video.\nD. At the beginning of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "Throughout the entire video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 49, + "question": "Examine the given illustrations and deduce when the action in the inquiry happens. You must choose your answer from the Choice List.\nDuring which part of the video does the action 'person eating a sandwich' occur?\nChoice list: \nA. At the end of the video.\nB. At the beginning of the video.\nC. Throughout the entire video.\nD. In the middle of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "Throughout the entire video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "B" + }, + { + "sample_id": 51, + "question": "Inspect the presented illustrations and conclude when the action in the inquiry occurs. You must choose your answer from the Choice List.\nIn the given video, when does the action 'the person opens a cabinet' take place?\nChoice list: \nA. In the middle of the video.\nB. At the beginning of the video.\nC. At the end of the video.\nD. Throughout the entire video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the end of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "B" + }, + { + "sample_id": 54, + "question": "Based on the given images, identify when does the action in the question happen You must choose your answer from the Choice List.\nAt what moment in the video does the action 'person takes a drink from a coffee cup' occur?\nChoice list: \nA. In the middle of the video.\nB. At the beginning of the video.\nC. Throughout the entire video.\nD. At the end of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the end of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 79, + "question": "Examine the given illustrations and deduce when the action in the inquiry happens. You must choose your answer from the Choice List.\nDuring which part of the video does the action 'person stand up again' occur?\nChoice list: \nA. At the beginning of the video.\nB. At the end of the video.\nC. Throughout the entire video.\nD. In the middle of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the end of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "B" + }, + { + "sample_id": 28, + "question": "Given the visuals, discern the timing of the event in the query. You must choose your answer from the Choice List.\nIn the given video, when does the action 'person pour a glass of soda' take place?\nChoice list: \nA. At the end of the video.\nB. In the middle of the video.\nC. At the beginning of the video.\nD. Throughout the entire video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "Throughout the entire video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "C" + }, + { + "sample_id": 32, + "question": "Review the supplied visuals and ascertain the timing of the action in the inquiry. You must choose your answer from the Choice List.\nDuring which part of the video does the action 'a person is smiling' occur?\nChoice list: \nA. At the beginning of the video.\nB. Throughout the entire video.\nC. At the end of the video.\nD. In the middle of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "Throughout the entire video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 17, + "question": "Based on the given images, identify when does the action in the question happen You must choose your answer from the Choice List.\nAt what moment in the video does the action 'person eats some food' occur?\nChoice list: \nA. At the beginning of the video.\nB. Throughout the entire video.\nC. In the middle of the video.\nD. At the end of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "Throughout the entire video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "C" + }, + { + "sample_id": 47, + "question": "Evaluate the presented graphics and infer the timing of the action in the question. You must choose your answer from the Choice List.\nIn the given video, when does the action 'person pour it into a glass' take place?\nChoice list: \nA. At the end of the video.\nB. At the beginning of the video.\nC. Throughout the entire video.\nD. In the middle of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "Throughout the entire video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "D" + }, + { + "sample_id": 58, + "question": "Evaluate the presented graphics and infer the timing of the action in the question. You must choose your answer from the Choice List.\nIn the given video, when does the action 'person eats a sandwich' take place?\nChoice list: \nA. Throughout the entire video.\nB. At the beginning of the video.\nC. In the middle of the video.\nD. At the end of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the end of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "B" + }, + { + "sample_id": 40, + "question": "From the images presented, ascertain the moment the action in the query occurs. You must choose your answer from the Choice List.\nDuring which part of the video does the action 'person looking at a book' occur?\nChoice list: \nA. Throughout the entire video.\nB. At the end of the video.\nC. At the beginning of the video.\nD. In the middle of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "Throughout the entire video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "C" + }, + { + "sample_id": 56, + "question": "Review the supplied visuals and ascertain the timing of the action in the inquiry. You must choose your answer from the Choice List.\nAt what moment in the video does the action 'person open a box' occur?\nChoice list: \nA. At the end of the video.\nB. Throughout the entire video.\nC. At the beginning of the video.\nD. In the middle of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the end of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "D" + }, + { + "sample_id": 75, + "question": "Analyze the provided visuals and determine the timing of the event in question. You must choose your answer from the Choice List.\nAt what moment in the video does the action 'person putting glasses on' occur?\nChoice list: \nA. Throughout the entire video.\nB. At the beginning of the video.\nC. In the middle of the video.\nD. At the end of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the end of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "C" + }, + { + "sample_id": 83, + "question": "Observe the given images and deduce when the action in the query takes place. You must choose your answer from the Choice List.\nWhen in the video sequence do we observe the action 'person put the plate on a table'?\nChoice list: \nA. At the end of the video.\nB. Throughout the entire video.\nC. In the middle of the video.\nD. At the beginning of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the end of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "C" + }, + { + "sample_id": 82, + "question": "Based on the given images, identify when does the action in the question happen You must choose your answer from the Choice List.\nWhen in the video sequence do we observe the action 'person drinking water from a glass'?\nChoice list: \nA. In the middle of the video.\nB. At the end of the video.\nC. At the beginning of the video.\nD. Throughout the entire video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the end of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "C" + }, + { + "sample_id": 102, + "question": "Observe the given images and deduce when the action in the query takes place. You must choose your answer from the Choice List.\nCan you identify when the action 'a person is sneezing as they dust a nearby mirror' happens in the video?\nChoice list: \nA. Throughout the entire video.\nB. At the end of the video.\nC. At the beginning of the video.\nD. In the middle of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "In the middle of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "C" + }, + { + "sample_id": 113, + "question": "Given the visuals, discern the timing of the event in the query. You must choose your answer from the Choice List.\nAt what moment in the video does the action 'person puts a towel over the shoulder' occur?\nChoice list: \nA. At the beginning of the video.\nB. Throughout the entire video.\nC. At the end of the video.\nD. In the middle of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "In the middle of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 121, + "question": "Examine the given illustrations and deduce when the action in the inquiry happens. You must choose your answer from the Choice List.\nDuring which part of the video does the action 'person starts closing door' occur?\nChoice list: \nA. Throughout the entire video.\nB. At the beginning of the video.\nC. At the end of the video.\nD. In the middle of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "In the middle of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "B" + }, + { + "sample_id": 126, + "question": "Based on the given images, identify when does the action in the question happen You must choose your answer from the Choice List.\nDuring which part of the video does the action 'the person puts the bowls on the table' occur?\nChoice list: \nA. At the end of the video.\nB. Throughout the entire video.\nC. In the middle of the video.\nD. At the beginning of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "In the middle of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "C" + }, + { + "sample_id": 78, + "question": "Using the images at hand, infer when the action in the question takes place. You must choose your answer from the Choice List.\nAt what moment in the video does the action 'person turns off a light' occur?\nChoice list: \nA. Throughout the entire video.\nB. At the end of the video.\nC. At the beginning of the video.\nD. In the middle of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the end of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "D" + }, + { + "sample_id": 154, + "question": "From the images presented, ascertain the moment the action in the query occurs. You must choose your answer from the Choice List.\nCan you identify when the action 'person holding a pillow,' happens in the video?\nChoice list: \nA. Throughout the entire video.\nB. In the middle of the video.\nC. At the end of the video.\nD. At the beginning of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the beginning of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "B" + }, + { + "sample_id": 81, + "question": "Based on the given images, identify when does the action in the question happen You must choose your answer from the Choice List.\nAt what moment in the video does the action 'the person sits on a couch' occur?\nChoice list: \nA. At the beginning of the video.\nB. At the end of the video.\nC. In the middle of the video.\nD. Throughout the entire video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the end of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 139, + "question": "Given the visuals, discern the timing of the event in the query. You must choose your answer from the Choice List.\nDuring which part of the video does the action 'the person takes a pillow from the shelf' occur?\nChoice list: \nA. At the beginning of the video.\nB. In the middle of the video.\nC. Throughout the entire video.\nD. At the end of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "In the middle of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "B" + }, + { + "sample_id": 145, + "question": "Examine the given illustrations and deduce when the action in the inquiry happens. You must choose your answer from the Choice List.\nAt what moment in the video does the action 'the person takes a phone' occur?\nChoice list: \nA. In the middle of the video.\nB. At the end of the video.\nC. At the beginning of the video.\nD. Throughout the entire video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "In the middle of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "C" + }, + { + "sample_id": 149, + "question": "Analyze the provided visuals and determine the timing of the event in question. You must choose your answer from the Choice List.\nCan you identify when the action 'person puts the bag on the table' happens in the video?\nChoice list: \nA. Throughout the entire video.\nB. At the end of the video.\nC. In the middle of the video.\nD. At the beginning of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "In the middle of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "D" + }, + { + "sample_id": 95, + "question": "Observe the given images and deduce when the action in the query takes place. You must choose your answer from the Choice List.\nAt what moment in the video does the action 'person begins to eat it' occur?\nChoice list: \nA. In the middle of the video.\nB. Throughout the entire video.\nC. At the end of the video.\nD. At the beginning of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the end of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 128, + "question": "From the images presented, ascertain the moment the action in the query occurs. You must choose your answer from the Choice List.\nAt what moment in the video does the action 'person puts it down on the shelf' occur?\nChoice list: \nA. In the middle of the video.\nB. At the beginning of the video.\nC. Throughout the entire video.\nD. At the end of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "In the middle of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 103, + "question": "Review the supplied visuals and ascertain the timing of the action in the inquiry. You must choose your answer from the Choice List.\nAt what moment in the video does the action 'person opens up s laptop' occur?\nChoice list: \nA. Throughout the entire video.\nB. In the middle of the video.\nC. At the end of the video.\nD. At the beginning of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "In the middle of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "D" + }, + { + "sample_id": 105, + "question": "Given the visuals, discern the timing of the event in the query. You must choose your answer from the Choice List.\nIn the given video, when does the action 'person throwing their clothes on the sofa' take place?\nChoice list: \nA. In the middle of the video.\nB. At the beginning of the video.\nC. At the end of the video.\nD. Throughout the entire video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "In the middle of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "B" + }, + { + "sample_id": 148, + "question": "Given the visuals, discern the timing of the event in the query. You must choose your answer from the Choice List.\nDuring which part of the video does the action 'person closes the door' occur?\nChoice list: \nA. In the middle of the video.\nB. At the beginning of the video.\nC. At the end of the video.\nD. Throughout the entire video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "In the middle of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 116, + "question": "Inspect the presented illustrations and conclude when the action in the inquiry occurs. You must choose your answer from the Choice List.\nWhen in the video sequence do we observe the action 'the person was laughing'?\nChoice list: \nA. At the beginning of the video.\nB. At the end of the video.\nC. Throughout the entire video.\nD. In the middle of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "In the middle of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 118, + "question": "Review the supplied visuals and ascertain the timing of the action in the inquiry. You must choose your answer from the Choice List.\nIn the given video, when does the action 'person throws a pair of shoes toward a sofa' take place?\nChoice list: \nA. In the middle of the video.\nB. At the end of the video.\nC. At the beginning of the video.\nD. Throughout the entire video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "In the middle of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 120, + "question": "Given the visuals, discern the timing of the event in the query. You must choose your answer from the Choice List.\nDuring which part of the video does the action 'person drinking from the cup' occur?\nChoice list: \nA. In the middle of the video.\nB. At the end of the video.\nC. At the beginning of the video.\nD. Throughout the entire video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "In the middle of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 122, + "question": "Analyze the provided visuals and determine the timing of the event in question. You must choose your answer from the Choice List.\nWhen in the video sequence do we observe the action 'the person puts a towel on a shelf'?\nChoice list: \nA. In the middle of the video.\nB. At the beginning of the video.\nC. Throughout the entire video.\nD. At the end of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "In the middle of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 125, + "question": "Evaluate the presented graphics and infer the timing of the action in the question. You must choose your answer from the Choice List.\nIn the given video, when does the action 'person puts it down on the table' take place?\nChoice list: \nA. In the middle of the video.\nB. At the beginning of the video.\nC. At the end of the video.\nD. Throughout the entire video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "In the middle of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 127, + "question": "Review the supplied visuals and ascertain the timing of the action in the inquiry. You must choose your answer from the Choice List.\nIn the given video, when does the action 'person drinking glass' take place?\nChoice list: \nA. In the middle of the video.\nB. Throughout the entire video.\nC. At the end of the video.\nD. At the beginning of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "In the middle of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "D" + }, + { + "sample_id": 140, + "question": "Analyze the provided visuals and determine the timing of the event in question. You must choose your answer from the Choice List.\nAt what moment in the video does the action 'the person puts the food on the desk' occur?\nChoice list: \nA. Throughout the entire video.\nB. In the middle of the video.\nC. At the beginning of the video.\nD. At the end of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "In the middle of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "B" + }, + { + "sample_id": 141, + "question": "Given the visuals, discern the timing of the event in the query. You must choose your answer from the Choice List.\nIn the given video, when does the action 'person close the cabinet door' take place?\nChoice list: \nA. At the end of the video.\nB. At the beginning of the video.\nC. In the middle of the video.\nD. Throughout the entire video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "In the middle of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "B" + }, + { + "sample_id": 146, + "question": "Examine the given illustrations and deduce when the action in the inquiry happens. You must choose your answer from the Choice List.\nDuring which part of the video does the action 'person opens up the laptop' occur?\nChoice list: \nA. At the beginning of the video.\nB. In the middle of the video.\nC. At the end of the video.\nD. Throughout the entire video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "In the middle of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 147, + "question": "Examine the given illustrations and deduce when the action in the inquiry happens. You must choose your answer from the Choice List.\nIn the given video, when does the action 'a person takes some food from a cabinet' take place?\nChoice list: \nA. In the middle of the video.\nB. Throughout the entire video.\nC. At the end of the video.\nD. At the beginning of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "In the middle of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 132, + "question": "Analyze the provided visuals and determine the timing of the event in question. You must choose your answer from the Choice List.\nDuring which part of the video does the action 'the person takes a broom' occur?\nChoice list: \nA. Throughout the entire video.\nB. In the middle of the video.\nC. At the beginning of the video.\nD. At the end of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "In the middle of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "B" + }, + { + "sample_id": 135, + "question": "Inspect the presented illustrations and conclude when the action in the inquiry occurs. You must choose your answer from the Choice List.\nIn the given video, when does the action 'person running in place' take place?\nChoice list: \nA. In the middle of the video.\nB. At the beginning of the video.\nC. Throughout the entire video.\nD. At the end of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "In the middle of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "B" + }, + { + "sample_id": 137, + "question": "Given the visuals, discern the timing of the event in the query. You must choose your answer from the Choice List.\nDuring which part of the video does the action 'another person runs in with homework' occur?\nChoice list: \nA. At the end of the video.\nB. In the middle of the video.\nC. Throughout the entire video.\nD. At the beginning of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "In the middle of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "D" + }, + { + "sample_id": 151, + "question": "Analyze the provided visuals and determine the timing of the event in question. You must choose your answer from the Choice List.\nIn the given video, when does the action 'the person opens a box of cookies' take place?\nChoice list: \nA. Throughout the entire video.\nB. At the beginning of the video.\nC. At the end of the video.\nD. In the middle of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the beginning of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "B" + }, + { + "sample_id": 160, + "question": "Inspect the presented illustrations and conclude when the action in the inquiry occurs. You must choose your answer from the Choice List.\nDuring which part of the video does the action 'a person is standing in the garage holding a book' occur?\nChoice list: \nA. In the middle of the video.\nB. Throughout the entire video.\nC. At the beginning of the video.\nD. At the end of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the beginning of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "C" + }, + { + "sample_id": 161, + "question": "Given the visuals, discern the timing of the event in the query. You must choose your answer from the Choice List.\nWhen in the video sequence do we observe the action 'a person is sneezing by the door'?\nChoice list: \nA. In the middle of the video.\nB. Throughout the entire video.\nC. At the end of the video.\nD. At the beginning of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the beginning of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 52, + "question": "From the images presented, ascertain the moment the action in the query occurs. You must choose your answer from the Choice List.\nCan you identify when the action 'person opens the door' happens in the video?\nChoice list: \nA. In the middle of the video.\nB. At the beginning of the video.\nC. Throughout the entire video.\nD. At the end of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the end of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 93, + "question": "Observe the given images and deduce when the action in the query takes place. You must choose your answer from the Choice List.\nAt what moment in the video does the action 'person smiling at a camera' occur?\nChoice list: \nA. In the middle of the video.\nB. At the end of the video.\nC. At the beginning of the video.\nD. Throughout the entire video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the end of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "C" + }, + { + "sample_id": 114, + "question": "Given the visuals, discern the timing of the event in the query. You must choose your answer from the Choice List.\nAt what moment in the video does the action 'a person is putting food into the refrigerator' occur?\nChoice list: \nA. In the middle of the video.\nB. At the beginning of the video.\nC. At the end of the video.\nD. Throughout the entire video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "In the middle of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 158, + "question": "Examine the given illustrations and deduce when the action in the inquiry happens. You must choose your answer from the Choice List.\nCan you identify when the action 'person takes off some clothes' happens in the video?\nChoice list: \nA. At the end of the video.\nB. Throughout the entire video.\nC. At the beginning of the video.\nD. In the middle of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the beginning of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "D" + }, + { + "sample_id": 46, + "question": "Based on the given images, identify when does the action in the question happen You must choose your answer from the Choice List.\nWhen in the video sequence do we observe the action 'person tidying clothes'?\nChoice list: \nA. Throughout the entire video.\nB. At the beginning of the video.\nC. In the middle of the video.\nD. At the end of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "Throughout the entire video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "C" + }, + { + "sample_id": 45, + "question": "Examine the given illustrations and deduce when the action in the inquiry happens. You must choose your answer from the Choice List.\nWhen in the video sequence do we observe the action 'a person laughs to themselves'?\nChoice list: \nA. At the beginning of the video.\nB. Throughout the entire video.\nC. At the end of the video.\nD. In the middle of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "Throughout the entire video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 57, + "question": "Analyze the provided visuals and determine the timing of the event in question. You must choose your answer from the Choice List.\nIn the given video, when does the action 'person close a cabinet' take place?\nChoice list: \nA. At the beginning of the video.\nB. In the middle of the video.\nC. Throughout the entire video.\nD. At the end of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the end of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 106, + "question": "Inspect the presented illustrations and conclude when the action in the inquiry occurs. You must choose your answer from the Choice List.\nAt what moment in the video does the action 'a person awakens in their bathroom holding their phone' occur?\nChoice list: \nA. In the middle of the video.\nB. At the end of the video.\nC. Throughout the entire video.\nD. At the beginning of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "In the middle of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "D" + }, + { + "sample_id": 142, + "question": "Examine the given illustrations and deduce when the action in the inquiry happens. You must choose your answer from the Choice List.\nIn the given video, when does the action 'person takes a pink laptop from a shelf opens' take place?\nChoice list: \nA. At the end of the video.\nB. Throughout the entire video.\nC. At the beginning of the video.\nD. In the middle of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "In the middle of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "C" + }, + { + "sample_id": 124, + "question": "Examine the given illustrations and deduce when the action in the inquiry happens. You must choose your answer from the Choice List.\nDuring which part of the video does the action 'next the person closes the doors to the wardrobe' occur?\nChoice list: \nA. In the middle of the video.\nB. At the end of the video.\nC. Throughout the entire video.\nD. At the beginning of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "In the middle of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 24, + "question": "From the images presented, ascertain the moment the action in the query occurs. You must choose your answer from the Choice List.\nCan you identify when the action 'a person is dressing in front of a mirror' happens in the video?\nChoice list: \nA. Throughout the entire video.\nB. At the end of the video.\nC. In the middle of the video.\nD. At the beginning of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "Throughout the entire video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "D" + }, + { + "sample_id": 74, + "question": "Using the images at hand, infer when the action in the question takes place. You must choose your answer from the Choice List.\nCan you identify when the action 'person close a laptop' happens in the video?\nChoice list: \nA. In the middle of the video.\nB. At the end of the video.\nC. At the beginning of the video.\nD. Throughout the entire video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the end of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 133, + "question": "Evaluate the presented graphics and infer the timing of the action in the question. You must choose your answer from the Choice List.\nWhen in the video sequence do we observe the action 'person drinks from a glass'?\nChoice list: \nA. At the beginning of the video.\nB. At the end of the video.\nC. In the middle of the video.\nD. Throughout the entire video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "In the middle of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 70, + "question": "Using the images at hand, infer when the action in the question takes place. You must choose your answer from the Choice List.\nIn the given video, when does the action 'the person closes the book' take place?\nChoice list: \nA. Throughout the entire video.\nB. At the beginning of the video.\nC. In the middle of the video.\nD. At the end of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the end of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "B" + }, + { + "sample_id": 91, + "question": "From the images presented, ascertain the moment the action in the query occurs. You must choose your answer from the Choice List.\nWhen in the video sequence do we observe the action 'person closes the box'?\nChoice list: \nA. At the end of the video.\nB. Throughout the entire video.\nC. At the beginning of the video.\nD. In the middle of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the end of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "D" + }, + { + "sample_id": 107, + "question": "Review the supplied visuals and ascertain the timing of the action in the inquiry. You must choose your answer from the Choice List.\nAt what moment in the video does the action 'person opening the door' occur?\nChoice list: \nA. At the end of the video.\nB. Throughout the entire video.\nC. In the middle of the video.\nD. At the beginning of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "In the middle of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "D" + }, + { + "sample_id": 138, + "question": "Based on the given images, identify when does the action in the question happen You must choose your answer from the Choice List.\nWhen in the video sequence do we observe the action 'the person throws the bag down'?\nChoice list: \nA. At the end of the video.\nB. Throughout the entire video.\nC. At the beginning of the video.\nD. In the middle of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "In the middle of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "D" + }, + { + "sample_id": 150, + "question": "Evaluate the presented graphics and infer the timing of the action in the question. You must choose your answer from the Choice List.\nCan you identify when the action 'a person is holding a towel' happens in the video?\nChoice list: \nA. In the middle of the video.\nB. At the end of the video.\nC. Throughout the entire video.\nD. At the beginning of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the beginning of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "D" + }, + { + "sample_id": 152, + "question": "Inspect the presented illustrations and conclude when the action in the inquiry occurs. You must choose your answer from the Choice List.\nIn the given video, when does the action 'a person in their recreation room is eating some food' take place?\nChoice list: \nA. In the middle of the video.\nB. At the beginning of the video.\nC. At the end of the video.\nD. Throughout the entire video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the beginning of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "B" + }, + { + "sample_id": 168, + "question": "Based on the given images, identify when does the action in the question happen You must choose your answer from the Choice List.\nCan you identify when the action 'person take some food' happens in the video?\nChoice list: \nA. Throughout the entire video.\nB. At the beginning of the video.\nC. In the middle of the video.\nD. At the end of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the beginning of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "C" + }, + { + "sample_id": 169, + "question": "Evaluate the presented graphics and infer the timing of the action in the question. You must choose your answer from the Choice List.\nCan you identify when the action 'person takes a sandwich from a plate' happens in the video?\nChoice list: \nA. In the middle of the video.\nB. At the beginning of the video.\nC. Throughout the entire video.\nD. At the end of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the beginning of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 134, + "question": "Evaluate the presented graphics and infer the timing of the action in the question. You must choose your answer from the Choice List.\nCan you identify when the action 'person runs to the door' happens in the video?\nChoice list: \nA. At the end of the video.\nB. At the beginning of the video.\nC. Throughout the entire video.\nD. In the middle of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "In the middle of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "D" + }, + { + "sample_id": 159, + "question": "Evaluate the presented graphics and infer the timing of the action in the question. You must choose your answer from the Choice List.\nCan you identify when the action 'the person takes a drink from a cup of coffee' happens in the video?\nChoice list: \nA. In the middle of the video.\nB. At the beginning of the video.\nC. Throughout the entire video.\nD. At the end of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the beginning of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 174, + "question": "Observe the given images and deduce when the action in the query takes place. You must choose your answer from the Choice List.\nIn the given video, when does the action 'person opens the closet' take place?\nChoice list: \nA. In the middle of the video.\nB. Throughout the entire video.\nC. At the end of the video.\nD. At the beginning of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the beginning of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "D" + }, + { + "sample_id": 173, + "question": "From the images presented, ascertain the moment the action in the query occurs. You must choose your answer from the Choice List.\nCan you identify when the action 'a person walks through the doorway' happens in the video?\nChoice list: \nA. At the end of the video.\nB. Throughout the entire video.\nC. At the beginning of the video.\nD. In the middle of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the beginning of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "C" + }, + { + "sample_id": 136, + "question": "Analyze the provided visuals and determine the timing of the event in question. You must choose your answer from the Choice List.\nAt what moment in the video does the action 'the person puts the blanket on the floor' occur?\nChoice list: \nA. At the beginning of the video.\nB. In the middle of the video.\nC. At the end of the video.\nD. Throughout the entire video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "In the middle of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 143, + "question": "Given the visuals, discern the timing of the event in the query. You must choose your answer from the Choice List.\nDuring which part of the video does the action 'person takes some food from it' occur?\nChoice list: \nA. At the beginning of the video.\nB. Throughout the entire video.\nC. In the middle of the video.\nD. At the end of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "In the middle of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "C" + }, + { + "sample_id": 171, + "question": "Analyze the provided visuals and determine the timing of the event in question. You must choose your answer from the Choice List.\nDuring which part of the video does the action 'person watches themselves eat' occur?\nChoice list: \nA. In the middle of the video.\nB. Throughout the entire video.\nC. At the end of the video.\nD. At the beginning of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the beginning of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 156, + "question": "Inspect the presented illustrations and conclude when the action in the inquiry occurs. You must choose your answer from the Choice List.\nWhen in the video sequence do we observe the action 'person the individual opens another nearby closet'?\nChoice list: \nA. In the middle of the video.\nB. At the beginning of the video.\nC. At the end of the video.\nD. Throughout the entire video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the beginning of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "B" + }, + { + "sample_id": 163, + "question": "Inspect the presented illustrations and conclude when the action in the inquiry occurs. You must choose your answer from the Choice List.\nIn the given video, when does the action 'a person takes dishes off a shelf' take place?\nChoice list: \nA. At the beginning of the video.\nB. In the middle of the video.\nC. Throughout the entire video.\nD. At the end of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the beginning of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 167, + "question": "Using the images at hand, infer when the action in the question takes place. You must choose your answer from the Choice List.\nCan you identify when the action 'person takes several bites of an unknown food item' happens in the video?\nChoice list: \nA. In the middle of the video.\nB. At the beginning of the video.\nC. At the end of the video.\nD. Throughout the entire video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the beginning of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 175, + "question": "Examine the given illustrations and deduce when the action in the inquiry happens. You must choose your answer from the Choice List.\nDuring which part of the video does the action 'a person puts a coffee cup on a shelf' occur?\nChoice list: \nA. At the end of the video.\nB. In the middle of the video.\nC. At the beginning of the video.\nD. Throughout the entire video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the beginning of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "B" + }, + { + "sample_id": 176, + "question": "Evaluate the presented graphics and infer the timing of the action in the question. You must choose your answer from the Choice List.\nIn the given video, when does the action 'person opens a door' take place?\nChoice list: \nA. At the beginning of the video.\nB. In the middle of the video.\nC. Throughout the entire video.\nD. At the end of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the beginning of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 179, + "question": "Inspect the presented illustrations and conclude when the action in the inquiry occurs. You must choose your answer from the Choice List.\nDuring which part of the video does the action 'the person puts the groceries on the table' occur?\nChoice list: \nA. In the middle of the video.\nB. Throughout the entire video.\nC. At the end of the video.\nD. At the beginning of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the beginning of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 183, + "question": "Using the images at hand, infer when the action in the question takes place. You must choose your answer from the Choice List.\nAt what moment in the video does the action 'a person opens a closet door' occur?\nChoice list: \nA. At the beginning of the video.\nB. In the middle of the video.\nC. Throughout the entire video.\nD. At the end of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the beginning of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 189, + "question": "Inspect the presented illustrations and conclude when the action in the inquiry occurs. You must choose your answer from the Choice List.\nDuring which part of the video does the action 'a person puts a pillow down on the sofa' occur?\nChoice list: \nA. At the beginning of the video.\nB. Throughout the entire video.\nC. At the end of the video.\nD. In the middle of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the beginning of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "D" + }, + { + "sample_id": 191, + "question": "Review the supplied visuals and ascertain the timing of the action in the inquiry. You must choose your answer from the Choice List.\nWhen in the video sequence do we observe the action 'a person is closing the door'?\nChoice list: \nA. In the middle of the video.\nB. At the end of the video.\nC. At the beginning of the video.\nD. Throughout the entire video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the beginning of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 193, + "question": "Evaluate the presented graphics and infer the timing of the action in the question. You must choose your answer from the Choice List.\nIn the given video, when does the action 'person throws a pillow at a refrigerator' take place?\nChoice list: \nA. At the end of the video.\nB. In the middle of the video.\nC. At the beginning of the video.\nD. Throughout the entire video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the beginning of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "B" + }, + { + "sample_id": 170, + "question": "Observe the given images and deduce when the action in the query takes place. You must choose your answer from the Choice List.\nDuring which part of the video does the action 'a person is running across a room' occur?\nChoice list: \nA. In the middle of the video.\nB. At the beginning of the video.\nC. Throughout the entire video.\nD. At the end of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the beginning of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "B" + }, + { + "sample_id": 177, + "question": "Observe the given images and deduce when the action in the query takes place. You must choose your answer from the Choice List.\nWhen in the video sequence do we observe the action 'person washes their hands'?\nChoice list: \nA. Throughout the entire video.\nB. At the end of the video.\nC. In the middle of the video.\nD. At the beginning of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the beginning of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "D" + }, + { + "sample_id": 184, + "question": "Using the images at hand, infer when the action in the question takes place. You must choose your answer from the Choice List.\nDuring which part of the video does the action 'person puts on shoes' occur?\nChoice list: \nA. At the beginning of the video.\nB. In the middle of the video.\nC. Throughout the entire video.\nD. At the end of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the beginning of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "B" + }, + { + "sample_id": 187, + "question": "Examine the given illustrations and deduce when the action in the inquiry happens. You must choose your answer from the Choice List.\nAt what moment in the video does the action 'one person propped on a pillow awakens' occur?\nChoice list: \nA. At the beginning of the video.\nB. At the end of the video.\nC. In the middle of the video.\nD. Throughout the entire video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the beginning of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "C" + }, + { + "sample_id": 197, + "question": "From the images presented, ascertain the moment the action in the query occurs. You must choose your answer from the Choice List.\nCan you identify when the action 'the person takes a towel from the cabinet' happens in the video?\nChoice list: \nA. Throughout the entire video.\nB. In the middle of the video.\nC. At the end of the video.\nD. At the beginning of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the beginning of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "D" + }, + { + "sample_id": 198, + "question": "Using the images at hand, infer when the action in the question takes place. You must choose your answer from the Choice List.\nAt what moment in the video does the action 'person takes another box from the same shelf' occur?\nChoice list: \nA. In the middle of the video.\nB. At the beginning of the video.\nC. At the end of the video.\nD. Throughout the entire video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the beginning of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 155, + "question": "Analyze the provided visuals and determine the timing of the event in question. You must choose your answer from the Choice List.\nCan you identify when the action 'a person closes a door' happens in the video?\nChoice list: \nA. Throughout the entire video.\nB. At the beginning of the video.\nC. At the end of the video.\nD. In the middle of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the beginning of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "D" + }, + { + "sample_id": 164, + "question": "Inspect the presented illustrations and conclude when the action in the inquiry occurs. You must choose your answer from the Choice List.\nDuring which part of the video does the action 'person puts a plastic bag on the table' occur?\nChoice list: \nA. At the beginning of the video.\nB. Throughout the entire video.\nC. At the end of the video.\nD. In the middle of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the beginning of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "D" + }, + { + "sample_id": 166, + "question": "Observe the given images and deduce when the action in the query takes place. You must choose your answer from the Choice List.\nWhen in the video sequence do we observe the action 'a person is sneezing'?\nChoice list: \nA. At the beginning of the video.\nB. In the middle of the video.\nC. Throughout the entire video.\nD. At the end of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the beginning of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "B" + }, + { + "sample_id": 192, + "question": "Observe the given images and deduce when the action in the query takes place. You must choose your answer from the Choice List.\nIn the given video, when does the action 'a person opens a laptop' take place?\nChoice list: \nA. At the end of the video.\nB. At the beginning of the video.\nC. Throughout the entire video.\nD. In the middle of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the beginning of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "B" + }, + { + "sample_id": 101, + "question": "Using the images at hand, infer when the action in the question takes place. You must choose your answer from the Choice List.\nDuring which part of the video does the action 'the person opens a pink laptop' occur?\nChoice list: \nA. At the end of the video.\nB. In the middle of the video.\nC. At the beginning of the video.\nD. Throughout the entire video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "In the middle of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "C" + }, + { + "sample_id": 96, + "question": "Review the supplied visuals and ascertain the timing of the action in the inquiry. You must choose your answer from the Choice List.\nDuring which part of the video does the action 'person opens the dryer door' occur?\nChoice list: \nA. At the end of the video.\nB. Throughout the entire video.\nC. In the middle of the video.\nD. At the beginning of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the end of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "C" + }, + { + "sample_id": 117, + "question": "Using the images at hand, infer when the action in the question takes place. You must choose your answer from the Choice List.\nIn the given video, when does the action 'person takes a glass from the desk' take place?\nChoice list: \nA. At the end of the video.\nB. At the beginning of the video.\nC. In the middle of the video.\nD. Throughout the entire video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "In the middle of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "C" + }, + { + "sample_id": 180, + "question": "Evaluate the presented graphics and infer the timing of the action in the question. You must choose your answer from the Choice List.\nDuring which part of the video does the action 'person walk over to a cabinet take a towel out' occur?\nChoice list: \nA. At the end of the video.\nB. In the middle of the video.\nC. Throughout the entire video.\nD. At the beginning of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the beginning of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "D" + }, + { + "sample_id": 190, + "question": "Using the images at hand, infer when the action in the question takes place. You must choose your answer from the Choice List.\nDuring which part of the video does the action 'a person is closing the door' occur?\nChoice list: \nA. Throughout the entire video.\nB. At the beginning of the video.\nC. In the middle of the video.\nD. At the end of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the beginning of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "B" + }, + { + "sample_id": 144, + "question": "Examine the given illustrations and deduce when the action in the inquiry happens. You must choose your answer from the Choice List.\nWhen in the video sequence do we observe the action 'person opens the door'?\nChoice list: \nA. In the middle of the video.\nB. At the end of the video.\nC. Throughout the entire video.\nD. At the beginning of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "In the middle of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "D" + }, + { + "sample_id": 157, + "question": "Given the visuals, discern the timing of the event in the query. You must choose your answer from the Choice List.\nIn the given video, when does the action 'the person puts the food on the table' take place?\nChoice list: \nA. At the beginning of the video.\nB. In the middle of the video.\nC. Throughout the entire video.\nD. At the end of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the beginning of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "B" + }, + { + "sample_id": 109, + "question": "Evaluate the presented graphics and infer the timing of the action in the question. You must choose your answer from the Choice List.\nDuring which part of the video does the action 'person sits on a chair to look at them' occur?\nChoice list: \nA. Throughout the entire video.\nB. At the beginning of the video.\nC. At the end of the video.\nD. In the middle of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "In the middle of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "B" + }, + { + "sample_id": 119, + "question": "Evaluate the presented graphics and infer the timing of the action in the question. You must choose your answer from the Choice List.\nCan you identify when the action 'person sit on their bed' happens in the video?\nChoice list: \nA. Throughout the entire video.\nB. At the end of the video.\nC. In the middle of the video.\nD. At the beginning of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "In the middle of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "C" + }, + { + "sample_id": 178, + "question": "Given the visuals, discern the timing of the event in the query. You must choose your answer from the Choice List.\nCan you identify when the action 'the person is opening a box' happens in the video?\nChoice list: \nA. Throughout the entire video.\nB. At the end of the video.\nC. In the middle of the video.\nD. At the beginning of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the beginning of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "D" + }, + { + "sample_id": 186, + "question": "Observe the given images and deduce when the action in the query takes place. You must choose your answer from the Choice List.\nDuring which part of the video does the action 'person turns on a light' occur?\nChoice list: \nA. At the end of the video.\nB. At the beginning of the video.\nC. Throughout the entire video.\nD. In the middle of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the beginning of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "B" + }, + { + "sample_id": 194, + "question": "Evaluate the presented graphics and infer the timing of the action in the question. You must choose your answer from the Choice List.\nAt what moment in the video does the action 'person begins eating it' occur?\nChoice list: \nA. Throughout the entire video.\nB. At the beginning of the video.\nC. At the end of the video.\nD. In the middle of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the beginning of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "B" + }, + { + "sample_id": 199, + "question": "Analyze the provided visuals and determine the timing of the event in question. You must choose your answer from the Choice List.\nIn the given video, when does the action 'a person is holding a bag' take place?\nChoice list: \nA. At the beginning of the video.\nB. In the middle of the video.\nC. Throughout the entire video.\nD. At the end of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the beginning of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 162, + "question": "From the images presented, ascertain the moment the action in the query occurs. You must choose your answer from the Choice List.\nCan you identify when the action 'the person takes a tissue from a tissue box' happens in the video?\nChoice list: \nA. At the end of the video.\nB. Throughout the entire video.\nC. In the middle of the video.\nD. At the beginning of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the beginning of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "C" + }, + { + "sample_id": 182, + "question": "From the images presented, ascertain the moment the action in the query occurs. You must choose your answer from the Choice List.\nWhen in the video sequence do we observe the action 'the person sat in a hall opening a plastic bag'?\nChoice list: \nA. In the middle of the video.\nB. At the beginning of the video.\nC. At the end of the video.\nD. Throughout the entire video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the beginning of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "B" + }, + { + "sample_id": 185, + "question": "Given the visuals, discern the timing of the event in the query. You must choose your answer from the Choice List.\nWhen in the video sequence do we observe the action 'a person is throwing the bag at the light switch'?\nChoice list: \nA. In the middle of the video.\nB. At the end of the video.\nC. Throughout the entire video.\nD. At the beginning of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the beginning of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "D" + }, + { + "sample_id": 188, + "question": "Inspect the presented illustrations and conclude when the action in the inquiry occurs. You must choose your answer from the Choice List.\nDuring which part of the video does the action 'the person is sneezing' occur?\nChoice list: \nA. Throughout the entire video.\nB. At the beginning of the video.\nC. At the end of the video.\nD. In the middle of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the beginning of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "D" + }, + { + "sample_id": 195, + "question": "Inspect the presented illustrations and conclude when the action in the inquiry occurs. You must choose your answer from the Choice List.\nAt what moment in the video does the action 'person puts down a picture' occur?\nChoice list: \nA. In the middle of the video.\nB. At the end of the video.\nC. At the beginning of the video.\nD. Throughout the entire video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the beginning of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "A" + }, + { + "sample_id": 196, + "question": "Observe the given images and deduce when the action in the query takes place. You must choose your answer from the Choice List.\nWhen in the video sequence do we observe the action 'a person is sneezing into a phone'?\nChoice list: \nA. At the end of the video.\nB. At the beginning of the video.\nC. Throughout the entire video.\nD. In the middle of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the beginning of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "B" + }, + { + "sample_id": 172, + "question": "Using the images at hand, infer when the action in the question takes place. You must choose your answer from the Choice List.\nDuring which part of the video does the action 'the person is holding a laptop' occur?\nChoice list: \nA. At the end of the video.\nB. Throughout the entire video.\nC. At the beginning of the video.\nD. In the middle of the video.\nAnswer with the option's letter from the given choices directly.", + "gt_response": "At the beginning of the video.", + "gen_kwargs": { + "do_sample": false, + "num_beams": 1, + "max_new_tokens": 32, + "eos_token_id": 92542 + }, + "pred_response": "C" + } +] \ No newline at end of file