mgyigit commited on
Commit
794f79d
·
verified ·
1 Parent(s): a2ca475

Update src/bin/semantic_similarity_infer.py

Browse files
src/bin/semantic_similarity_infer.py CHANGED
@@ -1,5 +1,7 @@
1
  #!/usr/bin/env python
2
  # coding: utf-8
 
 
3
 
4
  import pandas as pd
5
  import numpy as np
@@ -68,10 +70,10 @@ def calculateCorrelationforOntology(aspect,matrix_type):
68
  proteinListNew[:] = []
69
 
70
  similarityMatrixNameDict = {}
71
- similarityMatrixNameDict["All"] = "../data/preprocess/human_"+aspect+"_proteinSimilarityMatrix.csv"
72
- similarityMatrixNameDict["500"] = "../data/preprocess/human_"+aspect+"_proteinSimilarityMatrix_for_highest_annotated_500_proteins.csv"
73
- similarityMatrixNameDict["Sparse"] = "../data/preprocess/human_"+aspect+"_proteinSimilarityMatrix_for_highest_annotated_500_proteins.csv"
74
- similarityMatrixNameDict["200"] = "../data/preprocess/human_"+aspect+"_proteinSimilarityMatrix_for_highest_annotated_200_proteins.csv"
75
 
76
  similarityMatrixFileName = similarityMatrixNameDict[matrix_type]
77
 
@@ -84,7 +86,8 @@ def calculateCorrelationforOntology(aspect,matrix_type):
84
  proteinListNew.append(prot)
85
  if matrix_type == "Sparse":
86
  #sparsified_similarities = np.load("SparsifiedSimilarites_for_highest_500.npy")
87
- sparsified_similarity_coordinates = np.load("../data/auxilary_input/SparsifiedSimilarityCoordinates_"+aspect+"_for_highest_500.npy")
 
88
  protParamList = sparsified_similarity_coordinates
89
  else:
90
  i = range(len(proteinList))
@@ -141,13 +144,13 @@ def calculateCorrelationforOntology(aspect,matrix_type):
141
  return (cosineCorr,manhattanCorr,euclidianCorr)
142
 
143
  def report_detailed_distance_scores(representation_name,similarity_matrix_type,aspect,distance_lists):
144
- saveFileName = "../results/Semantic_sim_inference_detailed_distance_scores"+aspect+"_"+similarity_matrix_type+"_"+representation_name+".pkl"
145
  with open(saveFileName, "wb") as f:
146
  pickle.dump(distance_lists, f)
147
 
148
  def calculate_all_correlations():
149
  for similarity_matrix_type in similarity_tasks:
150
- saveFileName = "../results/Semantic_sim_inference_"+similarity_matrix_type+"_"+representation_name+".csv"
151
  buffer = "Semantic Aspect,CosineSim_Correlation,CosineSim_Correlation p-value, ManhattanSim_Correlation,ManhattanSim_Correlation p-value, EuclidianSim_Correlation,EuclidianSim_Correlation p-value \n"
152
  f = open(saveFileName,'w')
153
  f.write(buffer)
 
1
  #!/usr/bin/env python
2
  # coding: utf-8
3
+ import os
4
+ script_dir = os.path.dirname(os.path.abspath(__file__))
5
 
6
  import pandas as pd
7
  import numpy as np
 
70
  proteinListNew[:] = []
71
 
72
  similarityMatrixNameDict = {}
73
+ similarityMatrixNameDict["All"] = os.path.join(script_dir, "../data/preprocess/human_"+aspect+"_proteinSimilarityMatrix.csv")
74
+ similarityMatrixNameDict["500"] = os.path.join(script_dir, "../data/preprocess/human_"+aspect+"_proteinSimilarityMatrix_for_highest_annotated_500_proteins.csv")
75
+ similarityMatrixNameDict["Sparse"] = os.path.join(script_dir, "../data/preprocess/human_"+aspect+"_proteinSimilarityMatrix_for_highest_annotated_500_proteins.csv")
76
+ similarityMatrixNameDict["200"] = os.path.join(script_dir, "../data/preprocess/human_"+aspect+"_proteinSimilarityMatrix_for_highest_annotated_200_proteins.csv")
77
 
78
  similarityMatrixFileName = similarityMatrixNameDict[matrix_type]
79
 
 
86
  proteinListNew.append(prot)
87
  if matrix_type == "Sparse":
88
  #sparsified_similarities = np.load("SparsifiedSimilarites_for_highest_500.npy")
89
+ sparsified_path = os.path.join(script_dir, "../data/auxilary_input/SparsifiedSimilarityCoordinates_"+aspect+"_for_highest_500.npy")
90
+ sparsified_similarity_coordinates = np.load(sparsified_path)
91
  protParamList = sparsified_similarity_coordinates
92
  else:
93
  i = range(len(proteinList))
 
144
  return (cosineCorr,manhattanCorr,euclidianCorr)
145
 
146
  def report_detailed_distance_scores(representation_name,similarity_matrix_type,aspect,distance_lists):
147
+ saveFileName = os.path.join(script_dir, "../results/Semantic_sim_inference_detailed_distance_scores"+aspect+"_"+similarity_matrix_type+"_"+representation_name+".pkl")
148
  with open(saveFileName, "wb") as f:
149
  pickle.dump(distance_lists, f)
150
 
151
  def calculate_all_correlations():
152
  for similarity_matrix_type in similarity_tasks:
153
+ saveFileName = os.path.join(script_dir, "../results/Semantic_sim_inference_"+similarity_matrix_type+"_"+representation_name+".csv")
154
  buffer = "Semantic Aspect,CosineSim_Correlation,CosineSim_Correlation p-value, ManhattanSim_Correlation,ManhattanSim_Correlation p-value, EuclidianSim_Correlation,EuclidianSim_Correlation p-value \n"
155
  f = open(saveFileName,'w')
156
  f.write(buffer)