Minor adjustments to vad.py
Browse files- src/vad.py +10 -2
src/vad.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
from abc import ABC, abstractmethod
|
2 |
from collections import Counter, deque
|
3 |
import os
|
4 |
import time
|
@@ -421,7 +421,8 @@ class AbstractTranscription(ABC):
|
|
421 |
sub_text = ""
|
422 |
sub_words = []
|
423 |
word_length = 0
|
424 |
-
|
|
|
425 |
for idx, word in enumerate(segment_words):
|
426 |
word2 = segment_words[idx + 1] if idx + 1 < len(segment_words) else None
|
427 |
# Adjust start and end
|
@@ -430,12 +431,18 @@ class AbstractTranscription(ABC):
|
|
430 |
|
431 |
if "start" not in sub_segment:
|
432 |
sub_segment["start"] = float(word["start"])
|
|
|
|
|
433 |
|
434 |
sub_text += word["word"]
|
435 |
sub_words.append(word)
|
436 |
word_length += len_wide(word["word"])
|
437 |
if (sub_text.rstrip().endswith(".") or
|
438 |
(word_length > 90 and (sub_text.rstrip().endswith(",") or sub_text.rstrip().endswith("?"))) or
|
|
|
|
|
|
|
|
|
439 |
(word_length > 120 and word2 and (word2["word"].lstrip().startswith(",") or ((word2["word"].strip() in ["and", "or", "but"])))) or
|
440 |
(word_length > 180 and sub_text.endswith(" "))):
|
441 |
sub_segment["text"] = sub_text
|
@@ -446,6 +453,7 @@ class AbstractTranscription(ABC):
|
|
446 |
sub_text = ""
|
447 |
sub_words = []
|
448 |
word_length = 0
|
|
|
449 |
if "start" in sub_segment:
|
450 |
sub_segment["text"] = sub_text
|
451 |
sub_segment["end"] = float(word["end"])
|
|
|
1 |
+
from abc import ABC, abstractmethod
|
2 |
from collections import Counter, deque
|
3 |
import os
|
4 |
import time
|
|
|
421 |
sub_text = ""
|
422 |
sub_words = []
|
423 |
word_length = 0
|
424 |
+
is_wide = False
|
425 |
+
|
426 |
for idx, word in enumerate(segment_words):
|
427 |
word2 = segment_words[idx + 1] if idx + 1 < len(segment_words) else None
|
428 |
# Adjust start and end
|
|
|
431 |
|
432 |
if "start" not in sub_segment:
|
433 |
sub_segment["start"] = float(word["start"])
|
434 |
+
if not is_wide and len(word["word"]) > 1:
|
435 |
+
is_wide = True
|
436 |
|
437 |
sub_text += word["word"]
|
438 |
sub_words.append(word)
|
439 |
word_length += len_wide(word["word"])
|
440 |
if (sub_text.rstrip().endswith(".") or
|
441 |
(word_length > 90 and (sub_text.rstrip().endswith(",") or sub_text.rstrip().endswith("?"))) or
|
442 |
+
(word_length > 80 and is_wide and (
|
443 |
+
sub_text.rstrip().endswith(",") or sub_text.rstrip().endswith("?") or
|
444 |
+
sub_text.rstrip().endswith("、") or sub_text.rstrip().endswith("。"))) or
|
445 |
+
(word_length > 90 and is_wide and sub_text.endswith(" ")) or
|
446 |
(word_length > 120 and word2 and (word2["word"].lstrip().startswith(",") or ((word2["word"].strip() in ["and", "or", "but"])))) or
|
447 |
(word_length > 180 and sub_text.endswith(" "))):
|
448 |
sub_segment["text"] = sub_text
|
|
|
453 |
sub_text = ""
|
454 |
sub_words = []
|
455 |
word_length = 0
|
456 |
+
is_wide = False
|
457 |
if "start" in sub_segment:
|
458 |
sub_segment["text"] = sub_text
|
459 |
sub_segment["end"] = float(word["end"])
|