Moroccan-Fast-Speech-to-Text-Transcription

Sleeping

App Files Files Community

BounharAbdelaziz commited on 24 days ago

Commit

3af0797

verified ·

1 Parent(s): 790a907

v0.1: remove create dataset, only load it.

Browse files

Files changed (1) hide show

utils.py +3 -46

utils.py CHANGED Viewed

@@ -56,58 +56,15 @@ def create_html_image(image_path):
     """
     return html_string
-# ---------------------------------------------------------------------------- #
-# ---------------------------------------------------------------------------- #
-def load_or_create_dataset():
-    try:
-        dataset = load_dataset(HF_DATASET_NAME, token=STT_EVAL_DATASET_TOKEN)
-        return dataset
-    except Exception as e:
-        print(f"[INFO] Dataset not found or error loading: {e}. Creating a new one.")
-        features = Features({
-            "timestamp": Value("string"),
-            "audio": Audio(sampling_rate=16000),
-            "model_used": Value("string"),
-            "transcription": Value("string")
-        })
-        dataset = Dataset.from_dict({
-            "timestamp": [],
-            "audio": [],
-            "model_used": [],
-            "transcription": []
-        }, features=features)
-        dataset = DatasetDict({
-            "train": dataset,
-        })
-        return dataset
 # ---------------------------------------------------------------------------- #
 # ---------------------------------------------------------------------------- #
 def save_to_hf_dataset(audio_signal, model_choice, transcription):
     print("[INFO] Loading dataset...")
-    try:
-        dataset = load_dataset(HF_DATASET_NAME, token=STT_EVAL_DATASET_TOKEN)
-        print("[INFO] Dataset loaded successfully.")
-    except Exception as e:
-        print(f"[INFO] Dataset not found or error loading. Creating a new one.")
-        dataset = DatasetDict({
-            "train": Dataset.from_dict(
-                {
-                    "audio": [],
-                    "transcription": [],
-                    "model_used": [],
-                    "timestamp": [],
-                },
-                features=Features({
-                    "audio": Audio(sampling_rate=16000),
-                    "transcription": Value("string"),
-                    "model_used": Value("string"),
-                    "timestamp": Value("string"),
-                })
-            )
-        })
     timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
     new_entry = {

     """
     return html_string
 # ---------------------------------------------------------------------------- #
 # ---------------------------------------------------------------------------- #
 def save_to_hf_dataset(audio_signal, model_choice, transcription):
     print("[INFO] Loading dataset...")
+    dataset = load_dataset(HF_DATASET_NAME, token=STT_EVAL_DATASET_TOKEN)
+    print("[INFO] Dataset loaded successfully.")
     timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
     new_entry = {