BounharAbdelaziz commited on
Commit
790a907
·
verified ·
1 Parent(s): fc7f857

v0.1: moved dataset to personal space and added access token. Avoids huggingface_hub.errors.HfHubHTTPError: 401 Client Error: Unauthorized for url: https://huggingface.co/api/repos/create

Browse files
Files changed (1) hide show
  1. utils.py +7 -4
utils.py CHANGED
@@ -17,7 +17,7 @@ from datasets import (
17
 
18
 
19
  # Hugging Face evaluation dataset
20
- HF_DATASET_NAME = "atlasia/Moroccan-STT-Eval-Dataset"
21
 
22
  # Models paths
23
  MODEL_PATHS = {
@@ -29,6 +29,9 @@ MODEL_PATHS = {
29
  # Access token to models
30
  STT_MODEL_TOKEN = os.environ.get("STT_MODEL_TOKEN")
31
 
 
 
 
32
  # ---------------------------------------------------------------------------- #
33
  # ---------------------------------------------------------------------------- #
34
 
@@ -58,7 +61,7 @@ def create_html_image(image_path):
58
 
59
  def load_or_create_dataset():
60
  try:
61
- dataset = load_dataset(HF_DATASET_NAME)
62
  return dataset
63
  except Exception as e:
64
  print(f"[INFO] Dataset not found or error loading: {e}. Creating a new one.")
@@ -85,7 +88,7 @@ def load_or_create_dataset():
85
  def save_to_hf_dataset(audio_signal, model_choice, transcription):
86
  print("[INFO] Loading dataset...")
87
  try:
88
- dataset = load_dataset(HF_DATASET_NAME)
89
  print("[INFO] Dataset loaded successfully.")
90
  except Exception as e:
91
  print(f"[INFO] Dataset not found or error loading. Creating a new one.")
@@ -130,7 +133,7 @@ def save_to_hf_dataset(audio_signal, model_choice, transcription):
130
  dataset["train"] = updated_train_dataset
131
 
132
  print("[INFO] Pushing the updated dataset...")
133
- dataset.push_to_hub(HF_DATASET_NAME)
134
 
135
  print("[INFO] Dataset updated and pushed successfully.")
136
 
 
17
 
18
 
19
  # Hugging Face evaluation dataset
20
+ HF_DATASET_NAME = "BounharAbdelaziz/Moroccan-STT-Eval-Dataset"
21
 
22
  # Models paths
23
  MODEL_PATHS = {
 
29
  # Access token to models
30
  STT_MODEL_TOKEN = os.environ.get("STT_MODEL_TOKEN")
31
 
32
+ # Access token to dataset
33
+ STT_EVAL_DATASET_TOKEN = os.environ.get("STT_EVAL_DATASET_TOKEN")
34
+
35
  # ---------------------------------------------------------------------------- #
36
  # ---------------------------------------------------------------------------- #
37
 
 
61
 
62
  def load_or_create_dataset():
63
  try:
64
+ dataset = load_dataset(HF_DATASET_NAME, token=STT_EVAL_DATASET_TOKEN)
65
  return dataset
66
  except Exception as e:
67
  print(f"[INFO] Dataset not found or error loading: {e}. Creating a new one.")
 
88
  def save_to_hf_dataset(audio_signal, model_choice, transcription):
89
  print("[INFO] Loading dataset...")
90
  try:
91
+ dataset = load_dataset(HF_DATASET_NAME, token=STT_EVAL_DATASET_TOKEN)
92
  print("[INFO] Dataset loaded successfully.")
93
  except Exception as e:
94
  print(f"[INFO] Dataset not found or error loading. Creating a new one.")
 
133
  dataset["train"] = updated_train_dataset
134
 
135
  print("[INFO] Pushing the updated dataset...")
136
+ dataset.push_to_hub(HF_DATASET_NAME, token=STT_EVAL_DATASET_TOKEN)
137
 
138
  print("[INFO] Dataset updated and pushed successfully.")
139