mgyigit commited on
Commit
b71cdae
·
verified ·
1 Parent(s): 7e80034

Update src/saving_utils.py

Browse files
Files changed (1) hide show
  1. src/saving_utils.py +26 -25
src/saving_utils.py CHANGED
@@ -1,59 +1,60 @@
1
- import pandas as pd
2
  import os
3
-
4
- import sys
5
-
6
- script_dir = os.path.dirname(os.path.abspath(__file__))
7
- sys.path.append('..')
8
- sys.path.append('.')
9
 
10
  def save_similarity_output(output_dict, method_name, leaderboard_path="/home/user/app/src/data/leaderboard_results.csv", similarity_path="/home/user/app/src/data/similarity_results.csv"):
11
  # Load or initialize the DataFrames
12
- print(script_dir)
13
  if os.path.exists(leaderboard_path):
14
  leaderboard_df = pd.read_csv(leaderboard_path)
15
  else:
16
- print("Leaderboard df has not found!")
17
  return -1
18
 
19
  if os.path.exists(similarity_path):
20
  similarity_df = pd.read_csv(similarity_path)
21
  else:
22
- print("Similarity df has not found!")
23
  return -1
24
 
25
- # Check if method exists in similarity results
26
  if method_name not in similarity_df['Method'].values:
27
- similarity_df = pd.concat([similarity_df, pd.DataFrame({'Method': [method_name]})], ignore_index=True)
28
-
 
 
 
 
 
 
 
 
 
29
  # Initialize storage for averages
30
  averages = {}
31
 
32
- # Iterate through the output_dict and calculate averages if all aspects (MF, CC, BP) are present
33
  for dataset in ['sparse', '200', '500']:
34
  correlation_values = []
35
  pvalue_values = []
36
 
37
- # Check each aspect within the dataset (MF, BP, CC)
38
  for aspect in ['MF', 'BP', 'CC']:
39
  correlation_key = f"{dataset}_{aspect}_correlation"
40
  pvalue_key = f"{dataset}_{aspect}_pvalue"
41
-
42
- # Process correlation if present
43
  if correlation_key in output_dict:
44
  correlation = output_dict[correlation_key].item()
45
  correlation_values.append(correlation)
46
- similarity_df.loc[similarity_df['Method'] == method_name, f"{dataset}_{aspect}_correlation"] = correlation
47
- leaderboard_df.loc[leaderboard_df['Method'] == method_name, f"sim_{dataset}_{aspect}_correlation"] = correlation
48
-
49
- # Process pvalue if present
50
  if pvalue_key in output_dict:
51
  pvalue = output_dict[pvalue_key].item()
52
  pvalue_values.append(pvalue)
53
- similarity_df.loc[similarity_df['Method'] == method_name, f"{dataset}_{aspect}_pvalue"] = pvalue
54
- leaderboard_df.loc[leaderboard_df['Method'] == method_name, f"sim_{dataset}_{aspect}_pvalue"] = pvalue
55
 
56
- # Calculate averages if all three aspects (MF, BP, CC) are present
57
  if len(correlation_values) == 3:
58
  averages[f"{dataset}_Ave_correlation"] = sum(correlation_values) / 3
59
  similarity_df.loc[similarity_df['Method'] == method_name, f"{dataset}_Ave_correlation"] = averages[f"{dataset}_Ave_correlation"]
@@ -65,8 +66,8 @@ def save_similarity_output(output_dict, method_name, leaderboard_path="/home/use
65
  leaderboard_df.loc[leaderboard_df['Method'] == method_name, f"sim_{dataset}_Ave_pvalue"] = averages[f"{dataset}_Ave_pvalue"]
66
 
67
  # Save the updated DataFrames back to CSV
68
- leaderboard_df.to_csv(leaderboard_path, index=False)
69
  similarity_df.to_csv(similarity_path, index=False)
 
70
 
71
  return 0
72
 
 
 
1
  import os
2
+ import pandas as pd
 
 
 
 
 
3
 
4
  def save_similarity_output(output_dict, method_name, leaderboard_path="/home/user/app/src/data/leaderboard_results.csv", similarity_path="/home/user/app/src/data/similarity_results.csv"):
5
  # Load or initialize the DataFrames
 
6
  if os.path.exists(leaderboard_path):
7
  leaderboard_df = pd.read_csv(leaderboard_path)
8
  else:
9
+ print("Leaderboard file not found!")
10
  return -1
11
 
12
  if os.path.exists(similarity_path):
13
  similarity_df = pd.read_csv(similarity_path)
14
  else:
15
+ print("Similarity file not found!")
16
  return -1
17
 
18
+ # Ensure the method exists in the similarity DataFrame
19
  if method_name not in similarity_df['Method'].values:
20
+ # Create a new row for the method with default values
21
+ new_row = {col: None for col in similarity_df.columns}
22
+ new_row['Method'] = method_name
23
+ similarity_df = pd.concat([similarity_df, pd.DataFrame([new_row])], ignore_index=True)
24
+
25
+ # Same for the leaderboard DataFrame
26
+ if method_name not in leaderboard_df['Method'].values:
27
+ new_row = {col: None for col in leaderboard_df.columns}
28
+ new_row['Method'] = method_name
29
+ leaderboard_df = pd.concat([leaderboard_df, pd.DataFrame([new_row])], ignore_index=True)
30
+
31
  # Initialize storage for averages
32
  averages = {}
33
 
34
+ # Iterate through the datasets and calculate averages
35
  for dataset in ['sparse', '200', '500']:
36
  correlation_values = []
37
  pvalue_values = []
38
 
 
39
  for aspect in ['MF', 'BP', 'CC']:
40
  correlation_key = f"{dataset}_{aspect}_correlation"
41
  pvalue_key = f"{dataset}_{aspect}_pvalue"
42
+
43
+ # Update correlation if present
44
  if correlation_key in output_dict:
45
  correlation = output_dict[correlation_key].item()
46
  correlation_values.append(correlation)
47
+ similarity_df.loc[similarity_df['Method'] == method_name, correlation_key] = correlation
48
+ leaderboard_df.loc[leaderboard_df['Method'] == method_name, f"sim_{correlation_key}"] = correlation
49
+
50
+ # Update p-value if present
51
  if pvalue_key in output_dict:
52
  pvalue = output_dict[pvalue_key].item()
53
  pvalue_values.append(pvalue)
54
+ similarity_df.loc[similarity_df['Method'] == method_name, pvalue_key] = pvalue
55
+ leaderboard_df.loc[leaderboard_df['Method'] == method_name, f"sim_{pvalue_key}"] = pvalue
56
 
57
+ # Calculate averages if all three aspects are present
58
  if len(correlation_values) == 3:
59
  averages[f"{dataset}_Ave_correlation"] = sum(correlation_values) / 3
60
  similarity_df.loc[similarity_df['Method'] == method_name, f"{dataset}_Ave_correlation"] = averages[f"{dataset}_Ave_correlation"]
 
66
  leaderboard_df.loc[leaderboard_df['Method'] == method_name, f"sim_{dataset}_Ave_pvalue"] = averages[f"{dataset}_Ave_pvalue"]
67
 
68
  # Save the updated DataFrames back to CSV
 
69
  similarity_df.to_csv(similarity_path, index=False)
70
+ leaderboard_df.to_csv(leaderboard_path, index=False)
71
 
72
  return 0
73