TheoLvs commited on
Commit
382de7c
·
1 Parent(s): 1cc5c16

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -4
app.py CHANGED
@@ -16,9 +16,15 @@ import spaces
16
  # Use dotenv to load the environment variables
17
  load_dotenv()
18
 
19
- # Get HF token from environment variable
20
  HF_TOKEN = os.getenv("HF_TOKEN_TEXT")
21
- print(HF_TOKEN)
 
 
 
 
 
 
22
  if not HF_TOKEN:
23
  print("Warning: HF_TOKEN not found in environment variables. Submissions will not work.")
24
 
@@ -162,13 +168,13 @@ LABEL_MAPPING = {
162
 
163
  # Load and prepare the dataset
164
  print("Loading dataset...")
165
- dataset = load_dataset("QuotaClimat/frugalaichallenge-text-train")
166
 
167
  # Convert string labels to integers
168
  dataset = dataset.map(lambda x: {"label": LABEL_MAPPING[x["label"]]})
169
 
170
  # Split dataset
171
- train_test = dataset["train"].train_test_split(test_size=0.2, seed=42)
172
  train_dataset = train_test["train"]
173
  test_dataset = train_test["test"]
174
 
 
16
  # Use dotenv to load the environment variables
17
  load_dotenv()
18
 
19
+ # Get environment variables
20
  HF_TOKEN = os.getenv("HF_TOKEN_TEXT")
21
+ DATASET_NAME = os.getenv("DATASET_NAME", "QuotaClimat/frugalaichallenge-text-train") # Default to public dataset
22
+ TEST_SIZE = float(os.getenv("TEST_SIZE", "0.2")) # Default to 20% test size
23
+ TEST_SEED = int(os.getenv("TEST_SEED", "42")) # Default seed for reproducibility
24
+
25
+ print(f"Using dataset: {DATASET_NAME}")
26
+ print(f"Test split size: {TEST_SIZE}")
27
+
28
  if not HF_TOKEN:
29
  print("Warning: HF_TOKEN not found in environment variables. Submissions will not work.")
30
 
 
168
 
169
  # Load and prepare the dataset
170
  print("Loading dataset...")
171
+ dataset = load_dataset(DATASET_NAME)
172
 
173
  # Convert string labels to integers
174
  dataset = dataset.map(lambda x: {"label": LABEL_MAPPING[x["label"]]})
175
 
176
  # Split dataset
177
+ train_test = dataset["train"].train_test_split(test_size=TEST_SIZE, seed=TEST_SEED)
178
  train_dataset = train_test["train"]
179
  test_dataset = train_test["test"]
180