BounharAbdelaziz commited on
Commit
3af0797
·
verified ·
1 Parent(s): 790a907

v0.1: remove create dataset, only load it.

Browse files
Files changed (1) hide show
  1. utils.py +3 -46
utils.py CHANGED
@@ -56,58 +56,15 @@ def create_html_image(image_path):
56
  """
57
  return html_string
58
 
59
- # ---------------------------------------------------------------------------- #
60
- # ---------------------------------------------------------------------------- #
61
-
62
- def load_or_create_dataset():
63
- try:
64
- dataset = load_dataset(HF_DATASET_NAME, token=STT_EVAL_DATASET_TOKEN)
65
- return dataset
66
- except Exception as e:
67
- print(f"[INFO] Dataset not found or error loading: {e}. Creating a new one.")
68
- features = Features({
69
- "timestamp": Value("string"),
70
- "audio": Audio(sampling_rate=16000),
71
- "model_used": Value("string"),
72
- "transcription": Value("string")
73
- })
74
- dataset = Dataset.from_dict({
75
- "timestamp": [],
76
- "audio": [],
77
- "model_used": [],
78
- "transcription": []
79
- }, features=features)
80
- dataset = DatasetDict({
81
- "train": dataset,
82
- })
83
- return dataset
84
 
85
  # ---------------------------------------------------------------------------- #
86
  # ---------------------------------------------------------------------------- #
87
 
88
  def save_to_hf_dataset(audio_signal, model_choice, transcription):
89
  print("[INFO] Loading dataset...")
90
- try:
91
- dataset = load_dataset(HF_DATASET_NAME, token=STT_EVAL_DATASET_TOKEN)
92
- print("[INFO] Dataset loaded successfully.")
93
- except Exception as e:
94
- print(f"[INFO] Dataset not found or error loading. Creating a new one.")
95
- dataset = DatasetDict({
96
- "train": Dataset.from_dict(
97
- {
98
- "audio": [],
99
- "transcription": [],
100
- "model_used": [],
101
- "timestamp": [],
102
- },
103
- features=Features({
104
- "audio": Audio(sampling_rate=16000),
105
- "transcription": Value("string"),
106
- "model_used": Value("string"),
107
- "timestamp": Value("string"),
108
- })
109
- )
110
- })
111
 
112
  timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
113
  new_entry = {
 
56
  """
57
  return html_string
58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
  # ---------------------------------------------------------------------------- #
61
  # ---------------------------------------------------------------------------- #
62
 
63
  def save_to_hf_dataset(audio_signal, model_choice, transcription):
64
  print("[INFO] Loading dataset...")
65
+
66
+ dataset = load_dataset(HF_DATASET_NAME, token=STT_EVAL_DATASET_TOKEN)
67
+ print("[INFO] Dataset loaded successfully.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
  timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
70
  new_entry = {