Suprath commited on
Commit
f113e41
·
verified ·
1 Parent(s): 8964b86

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -36
app.py CHANGED
@@ -90,45 +90,45 @@ def detect_landmark(image, detector, predictor):
90
  coords[i] = (shape.part(i).x, shape.part(i).y)
91
  return coords
92
 
93
- def predict_and_save(process_video):
94
- num_frames = int(cv2.VideoCapture(process_video).get(cv2.CAP_PROP_FRAME_COUNT))
95
-
96
- tsv_cont = ["/\n", f"test-0\t{process_video}\t{None}\t{num_frames}\t{int(16_000*num_frames/25)}\n"]
97
- label_cont = ["DUMMY\n"]
98
- with open(f"{data_dir}/test.tsv", "w") as fo:
99
- fo.write("".join(tsv_cont))
100
- with open(f"{data_dir}/test.wrd", "w") as fo:
101
- fo.write("".join(label_cont))
102
- task.load_dataset(gen_subset, task_cfg=saved_cfg.task)
103
-
104
- def decode_fn(x):
105
- dictionary = task.target_dictionary
106
- symbols_ignore = generator.symbols_to_strip_from_output
107
- symbols_ignore.add(dictionary.pad())
108
- return task.datasets[gen_subset].label_processors[0].decode(x, symbols_ignore)
109
-
110
- itr = task.get_batch_iterator(dataset=task.dataset(gen_subset)).next_epoch_itr(shuffle=False)
111
- sample = next(itr)
112
- if torch.cuda.is_available():
113
- sample = utils.move_to_cuda(sample)
114
- hypos = task.inference_step(generator, models, sample)
115
- ref = decode_fn(sample['target'][0].int().cpu())
116
- hypo = hypos[0][0]['tokens'].int().cpu()
117
- hypo = decode_fn(hypo)
118
 
119
- # Collect timestamps and texts
120
- transcript = []
121
- for i, (start, end) in enumerate(sample['net_input']['video_lengths'], 1):
122
- start_time = float(start) / 16_000
123
- end_time = float(end) / 16_000
124
- text = hypo[i].strip()
125
- transcript.append({"timestamp": [start_time, end_time], "text": text})
126
 
127
- # Save transcript to a JSON file
128
- with open('speech_transcript.json', 'w') as outfile:
129
- json.dump(transcript, outfile, indent=4)
130
 
131
- return hypo
132
 
133
 
134
  def preprocess_video(input_video_path):
 
90
  coords[i] = (shape.part(i).x, shape.part(i).y)
91
  return coords
92
 
93
+ # def predict_and_save(process_video):
94
+ # num_frames = int(cv2.VideoCapture(process_video).get(cv2.CAP_PROP_FRAME_COUNT))
95
+
96
+ # tsv_cont = ["/\n", f"test-0\t{process_video}\t{None}\t{num_frames}\t{int(16_000*num_frames/25)}\n"]
97
+ # label_cont = ["DUMMY\n"]
98
+ # with open(f"{data_dir}/test.tsv", "w") as fo:
99
+ # fo.write("".join(tsv_cont))
100
+ # with open(f"{data_dir}/test.wrd", "w") as fo:
101
+ # fo.write("".join(label_cont))
102
+ # task.load_dataset(gen_subset, task_cfg=saved_cfg.task)
103
+
104
+ # def decode_fn(x):
105
+ # dictionary = task.target_dictionary
106
+ # symbols_ignore = generator.symbols_to_strip_from_output
107
+ # symbols_ignore.add(dictionary.pad())
108
+ # return task.datasets[gen_subset].label_processors[0].decode(x, symbols_ignore)
109
+
110
+ # itr = task.get_batch_iterator(dataset=task.dataset(gen_subset)).next_epoch_itr(shuffle=False)
111
+ # sample = next(itr)
112
+ # if torch.cuda.is_available():
113
+ # sample = utils.move_to_cuda(sample)
114
+ # hypos = task.inference_step(generator, models, sample)
115
+ # ref = decode_fn(sample['target'][0].int().cpu())
116
+ # hypo = hypos[0][0]['tokens'].int().cpu()
117
+ # hypo = decode_fn(hypo)
118
 
119
+ # # Collect timestamps and texts
120
+ # transcript = []
121
+ # for i, (start, end) in enumerate(sample['net_input']['video_lengths'], 1):
122
+ # start_time = float(start) / 16_000
123
+ # end_time = float(end) / 16_000
124
+ # text = hypo[i].strip()
125
+ # transcript.append({"timestamp": [start_time, end_time], "text": text})
126
 
127
+ # # Save transcript to a JSON file
128
+ # with open('speech_transcript.json', 'w') as outfile:
129
+ # json.dump(transcript, outfile, indent=4)
130
 
131
+ # return hypo
132
 
133
 
134
  def preprocess_video(input_video_path):