Update app.py
Browse files
app.py
CHANGED
@@ -90,45 +90,45 @@ def detect_landmark(image, detector, predictor):
|
|
90 |
coords[i] = (shape.part(i).x, shape.part(i).y)
|
91 |
return coords
|
92 |
|
93 |
-
def predict_and_save(process_video):
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
|
131 |
-
|
132 |
|
133 |
|
134 |
def preprocess_video(input_video_path):
|
|
|
90 |
coords[i] = (shape.part(i).x, shape.part(i).y)
|
91 |
return coords
|
92 |
|
93 |
+
# def predict_and_save(process_video):
|
94 |
+
# num_frames = int(cv2.VideoCapture(process_video).get(cv2.CAP_PROP_FRAME_COUNT))
|
95 |
+
|
96 |
+
# tsv_cont = ["/\n", f"test-0\t{process_video}\t{None}\t{num_frames}\t{int(16_000*num_frames/25)}\n"]
|
97 |
+
# label_cont = ["DUMMY\n"]
|
98 |
+
# with open(f"{data_dir}/test.tsv", "w") as fo:
|
99 |
+
# fo.write("".join(tsv_cont))
|
100 |
+
# with open(f"{data_dir}/test.wrd", "w") as fo:
|
101 |
+
# fo.write("".join(label_cont))
|
102 |
+
# task.load_dataset(gen_subset, task_cfg=saved_cfg.task)
|
103 |
+
|
104 |
+
# def decode_fn(x):
|
105 |
+
# dictionary = task.target_dictionary
|
106 |
+
# symbols_ignore = generator.symbols_to_strip_from_output
|
107 |
+
# symbols_ignore.add(dictionary.pad())
|
108 |
+
# return task.datasets[gen_subset].label_processors[0].decode(x, symbols_ignore)
|
109 |
+
|
110 |
+
# itr = task.get_batch_iterator(dataset=task.dataset(gen_subset)).next_epoch_itr(shuffle=False)
|
111 |
+
# sample = next(itr)
|
112 |
+
# if torch.cuda.is_available():
|
113 |
+
# sample = utils.move_to_cuda(sample)
|
114 |
+
# hypos = task.inference_step(generator, models, sample)
|
115 |
+
# ref = decode_fn(sample['target'][0].int().cpu())
|
116 |
+
# hypo = hypos[0][0]['tokens'].int().cpu()
|
117 |
+
# hypo = decode_fn(hypo)
|
118 |
|
119 |
+
# # Collect timestamps and texts
|
120 |
+
# transcript = []
|
121 |
+
# for i, (start, end) in enumerate(sample['net_input']['video_lengths'], 1):
|
122 |
+
# start_time = float(start) / 16_000
|
123 |
+
# end_time = float(end) / 16_000
|
124 |
+
# text = hypo[i].strip()
|
125 |
+
# transcript.append({"timestamp": [start_time, end_time], "text": text})
|
126 |
|
127 |
+
# # Save transcript to a JSON file
|
128 |
+
# with open('speech_transcript.json', 'w') as outfile:
|
129 |
+
# json.dump(transcript, outfile, indent=4)
|
130 |
|
131 |
+
# return hypo
|
132 |
|
133 |
|
134 |
def preprocess_video(input_video_path):
|