mgyigit commited on
Commit
f5e0909
·
verified ·
1 Parent(s): e589dd9

Update src/saving_utils.py

Browse files
Files changed (1) hide show
  1. src/saving_utils.py +52 -35
src/saving_utils.py CHANGED
@@ -6,6 +6,55 @@ from huggingface_hub import HfApi
6
  script_dir = os.path.dirname(os.path.abspath(__file__)) # Directory of the running script
7
 
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  def save_csv_locally(dataframe, file_name, save_dir="/tmp"):
10
  # Ensure the save directory exists
11
  os.makedirs(save_dir, exist_ok=True)
@@ -19,28 +68,13 @@ def save_csv_locally(dataframe, file_name, save_dir="/tmp"):
19
 
20
  return file_path
21
 
22
- def upload_to_hub(local_path, remote_path, repo_id, repo_type="space"):
23
- api = HfApi(token=os.getenv("api_key")) # Requires authentication via HF_TOKEN
24
- api.upload_file(
25
- path_or_fileobj=local_path,
26
- path_in_repo=remote_path,
27
- repo_id=repo_id,
28
- repo_type=repo_type,
29
- commit_message=f"Updating {os.path.basename(remote_path)}"
30
- )
31
- print(f"Uploaded {local_path} to {repo_id}/{remote_path}")
32
-
33
- def cleanup_local_file(file_path):
34
- if os.path.exists(file_path):
35
- os.remove(file_path)
36
- print(f"Removed local file: {file_path}")
37
 
38
  def save_similarity_output(
39
  output_dict,
40
  method_name,
41
- leaderboard_path="/home/user/app/src/data/leaderboard_results.csv",
42
- similarity_path="/home/user/app/src/data/similarity_results.csv",
43
- repo_id="mgyigit/probe3",
44
  ):
45
  # Load or initialize the DataFrames
46
  if os.path.exists(leaderboard_path):
@@ -61,16 +95,12 @@ def save_similarity_output(
61
  new_row['Method'] = method_name
62
  similarity_df = pd.concat([similarity_df, pd.DataFrame([new_row])], ignore_index=True)
63
 
64
- # Same for the leaderboard DataFrame
65
  if method_name not in leaderboard_df['Method'].values:
66
  new_row = {col: None for col in leaderboard_df.columns}
67
  new_row['Method'] = method_name
68
  leaderboard_df = pd.concat([leaderboard_df, pd.DataFrame([new_row])], ignore_index=True)
69
 
70
- # Initialize storage for averages
71
  averages = {}
72
-
73
- # Iterate through the datasets and calculate averages
74
  for dataset in ['sparse', '200', '500']:
75
  correlation_values = []
76
  pvalue_values = []
@@ -104,22 +134,9 @@ def save_similarity_output(
104
  similarity_df.loc[similarity_df['Method'] == method_name, f"{dataset}_Ave_pvalue"] = averages[f"{dataset}_Ave_pvalue"]
105
  leaderboard_df.loc[leaderboard_df['Method'] == method_name, f"sim_{dataset}_Ave_pvalue"] = averages[f"{dataset}_Ave_pvalue"]
106
 
107
- # Save locally to a temporary directory
108
  leaderboard_file = save_csv_locally(leaderboard_df, "leaderboard_results.csv")
109
  similarity_file = save_csv_locally(similarity_df, "similarity_results.csv")
110
 
111
- # Upload to Hugging Face Hub
112
- try:
113
- upload_to_hub(leaderboard_file, "leaderboard_results.csv", repo_id)
114
- upload_to_hub(similarity_file, "similarity_results.csv", repo_id)
115
- except Exception as e:
116
- print(f"Failed to upload files: {e}")
117
- return -1
118
-
119
- # Clean up local files
120
- cleanup_local_file(leaderboard_file)
121
- cleanup_local_file(similarity_file)
122
-
123
  return 0
124
 
125
  def save_function_output(model_output, method_name, func_results_path="/home/user/app/src/data/function_results.csv", leaderboard_path="/home/user/app/src/data/leaderboard_results.csv"):
 
6
  script_dir = os.path.dirname(os.path.abspath(__file__)) # Directory of the running script
7
 
8
 
9
+
10
+ def download_files_from_hub(benchmark_types, repo_id="mgyigit/probe-data", repo_type="space"):
11
+ api = HfApi(token=os.getenv("api-key")) #load api-key secret
12
+
13
+ benchmark_types += "leaderboard"
14
+ for benchmark in benchmark_types:
15
+ file_name = f"{benchmark}_results.csv"
16
+ local_path = f"/tmp/{file_name}"
17
+
18
+ try:
19
+ # Download the file from the specified repo
20
+ api.download_file(
21
+ repo_id=repo_id,
22
+ path_in_repo=file_name,
23
+ local_dir="/tmp",
24
+ repo_type=repo_type,
25
+ )
26
+ print(f"Downloaded {file_name} from {repo_id} to {local_path}")
27
+
28
+ except Exception as e:
29
+ print(f"Failed to download {file_name}: {e}")
30
+
31
+
32
+ return 0
33
+
34
+
35
+ def upload_to_hub(benchmark_types, repo_id="mgyigit/probe-data", repo_type="space"):
36
+ api = HfApi(token=os.getenv("api_key")) # Requires authentication via HF_TOKEN
37
+ benchmark_types += "leaderboard"
38
+
39
+ for benchmark in benchmark_types:
40
+ file_name = f"{benchmark}_results.csv"
41
+ local_path = f"/tmp/{file_name}"
42
+
43
+ api.upload_file(
44
+ path_or_fileobj=local_path,
45
+ path_in_repo=file_name,
46
+ repo_id=repo_id,
47
+ repo_type=repo_type,
48
+ commit_message=f"Updating {file_name}"
49
+ )
50
+ print(f"Uploaded {local_path} to {repo_id}/{file_name}")
51
+
52
+ os.remove(local_path)
53
+ print(f"Removed local file: {file_path}")
54
+
55
+ return 0
56
+
57
+
58
  def save_csv_locally(dataframe, file_name, save_dir="/tmp"):
59
  # Ensure the save directory exists
60
  os.makedirs(save_dir, exist_ok=True)
 
68
 
69
  return file_path
70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
  def save_similarity_output(
73
  output_dict,
74
  method_name,
75
+ leaderboard_path="/tmp/leaderboard_results.csv",
76
+ similarity_path="/tmp/similarity_results.csv",
77
+ repo_id="mgyigit/probe-data",
78
  ):
79
  # Load or initialize the DataFrames
80
  if os.path.exists(leaderboard_path):
 
95
  new_row['Method'] = method_name
96
  similarity_df = pd.concat([similarity_df, pd.DataFrame([new_row])], ignore_index=True)
97
 
 
98
  if method_name not in leaderboard_df['Method'].values:
99
  new_row = {col: None for col in leaderboard_df.columns}
100
  new_row['Method'] = method_name
101
  leaderboard_df = pd.concat([leaderboard_df, pd.DataFrame([new_row])], ignore_index=True)
102
 
 
103
  averages = {}
 
 
104
  for dataset in ['sparse', '200', '500']:
105
  correlation_values = []
106
  pvalue_values = []
 
134
  similarity_df.loc[similarity_df['Method'] == method_name, f"{dataset}_Ave_pvalue"] = averages[f"{dataset}_Ave_pvalue"]
135
  leaderboard_df.loc[leaderboard_df['Method'] == method_name, f"sim_{dataset}_Ave_pvalue"] = averages[f"{dataset}_Ave_pvalue"]
136
 
 
137
  leaderboard_file = save_csv_locally(leaderboard_df, "leaderboard_results.csv")
138
  similarity_file = save_csv_locally(similarity_df, "similarity_results.csv")
139
 
 
 
 
 
 
 
 
 
 
 
 
 
140
  return 0
141
 
142
  def save_function_output(model_output, method_name, func_results_path="/home/user/app/src/data/function_results.csv", leaderboard_path="/home/user/app/src/data/leaderboard_results.csv"):