BounharAbdelaziz
commited on
v0.1: moved dataset to personal space and added access token. Avoids huggingface_hub.errors.HfHubHTTPError: 401 Client Error: Unauthorized for url: https://huggingface.co/api/repos/create
Browse files
utils.py
CHANGED
@@ -17,7 +17,7 @@ from datasets import (
|
|
17 |
|
18 |
|
19 |
# Hugging Face evaluation dataset
|
20 |
-
HF_DATASET_NAME = "
|
21 |
|
22 |
# Models paths
|
23 |
MODEL_PATHS = {
|
@@ -29,6 +29,9 @@ MODEL_PATHS = {
|
|
29 |
# Access token to models
|
30 |
STT_MODEL_TOKEN = os.environ.get("STT_MODEL_TOKEN")
|
31 |
|
|
|
|
|
|
|
32 |
# ---------------------------------------------------------------------------- #
|
33 |
# ---------------------------------------------------------------------------- #
|
34 |
|
@@ -58,7 +61,7 @@ def create_html_image(image_path):
|
|
58 |
|
59 |
def load_or_create_dataset():
|
60 |
try:
|
61 |
-
dataset = load_dataset(HF_DATASET_NAME)
|
62 |
return dataset
|
63 |
except Exception as e:
|
64 |
print(f"[INFO] Dataset not found or error loading: {e}. Creating a new one.")
|
@@ -85,7 +88,7 @@ def load_or_create_dataset():
|
|
85 |
def save_to_hf_dataset(audio_signal, model_choice, transcription):
|
86 |
print("[INFO] Loading dataset...")
|
87 |
try:
|
88 |
-
dataset = load_dataset(HF_DATASET_NAME)
|
89 |
print("[INFO] Dataset loaded successfully.")
|
90 |
except Exception as e:
|
91 |
print(f"[INFO] Dataset not found or error loading. Creating a new one.")
|
@@ -130,7 +133,7 @@ def save_to_hf_dataset(audio_signal, model_choice, transcription):
|
|
130 |
dataset["train"] = updated_train_dataset
|
131 |
|
132 |
print("[INFO] Pushing the updated dataset...")
|
133 |
-
dataset.push_to_hub(HF_DATASET_NAME)
|
134 |
|
135 |
print("[INFO] Dataset updated and pushed successfully.")
|
136 |
|
|
|
17 |
|
18 |
|
19 |
# Hugging Face evaluation dataset
|
20 |
+
HF_DATASET_NAME = "BounharAbdelaziz/Moroccan-STT-Eval-Dataset"
|
21 |
|
22 |
# Models paths
|
23 |
MODEL_PATHS = {
|
|
|
29 |
# Access token to models
|
30 |
STT_MODEL_TOKEN = os.environ.get("STT_MODEL_TOKEN")
|
31 |
|
32 |
+
# Access token to dataset
|
33 |
+
STT_EVAL_DATASET_TOKEN = os.environ.get("STT_EVAL_DATASET_TOKEN")
|
34 |
+
|
35 |
# ---------------------------------------------------------------------------- #
|
36 |
# ---------------------------------------------------------------------------- #
|
37 |
|
|
|
61 |
|
62 |
def load_or_create_dataset():
|
63 |
try:
|
64 |
+
dataset = load_dataset(HF_DATASET_NAME, token=STT_EVAL_DATASET_TOKEN)
|
65 |
return dataset
|
66 |
except Exception as e:
|
67 |
print(f"[INFO] Dataset not found or error loading: {e}. Creating a new one.")
|
|
|
88 |
def save_to_hf_dataset(audio_signal, model_choice, transcription):
|
89 |
print("[INFO] Loading dataset...")
|
90 |
try:
|
91 |
+
dataset = load_dataset(HF_DATASET_NAME, token=STT_EVAL_DATASET_TOKEN)
|
92 |
print("[INFO] Dataset loaded successfully.")
|
93 |
except Exception as e:
|
94 |
print(f"[INFO] Dataset not found or error loading. Creating a new one.")
|
|
|
133 |
dataset["train"] = updated_train_dataset
|
134 |
|
135 |
print("[INFO] Pushing the updated dataset...")
|
136 |
+
dataset.push_to_hub(HF_DATASET_NAME, token=STT_EVAL_DATASET_TOKEN)
|
137 |
|
138 |
print("[INFO] Dataset updated and pushed successfully.")
|
139 |
|