Spaces:
Sleeping
Sleeping
Update src/bin/semantic_similarity_infer.py
Browse files
src/bin/semantic_similarity_infer.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1 |
#!/usr/bin/env python
|
2 |
# coding: utf-8
|
|
|
|
|
3 |
|
4 |
import pandas as pd
|
5 |
import numpy as np
|
@@ -68,10 +70,10 @@ def calculateCorrelationforOntology(aspect,matrix_type):
|
|
68 |
proteinListNew[:] = []
|
69 |
|
70 |
similarityMatrixNameDict = {}
|
71 |
-
similarityMatrixNameDict["All"] = "../data/preprocess/human_"+aspect+"_proteinSimilarityMatrix.csv"
|
72 |
-
similarityMatrixNameDict["500"] = "../data/preprocess/human_"+aspect+"_proteinSimilarityMatrix_for_highest_annotated_500_proteins.csv"
|
73 |
-
similarityMatrixNameDict["Sparse"] = "../data/preprocess/human_"+aspect+"_proteinSimilarityMatrix_for_highest_annotated_500_proteins.csv"
|
74 |
-
similarityMatrixNameDict["200"] = "../data/preprocess/human_"+aspect+"_proteinSimilarityMatrix_for_highest_annotated_200_proteins.csv"
|
75 |
|
76 |
similarityMatrixFileName = similarityMatrixNameDict[matrix_type]
|
77 |
|
@@ -84,7 +86,8 @@ def calculateCorrelationforOntology(aspect,matrix_type):
|
|
84 |
proteinListNew.append(prot)
|
85 |
if matrix_type == "Sparse":
|
86 |
#sparsified_similarities = np.load("SparsifiedSimilarites_for_highest_500.npy")
|
87 |
-
|
|
|
88 |
protParamList = sparsified_similarity_coordinates
|
89 |
else:
|
90 |
i = range(len(proteinList))
|
@@ -141,13 +144,13 @@ def calculateCorrelationforOntology(aspect,matrix_type):
|
|
141 |
return (cosineCorr,manhattanCorr,euclidianCorr)
|
142 |
|
143 |
def report_detailed_distance_scores(representation_name,similarity_matrix_type,aspect,distance_lists):
|
144 |
-
saveFileName = "../results/Semantic_sim_inference_detailed_distance_scores"+aspect+"_"+similarity_matrix_type+"_"+representation_name+".pkl"
|
145 |
with open(saveFileName, "wb") as f:
|
146 |
pickle.dump(distance_lists, f)
|
147 |
|
148 |
def calculate_all_correlations():
|
149 |
for similarity_matrix_type in similarity_tasks:
|
150 |
-
saveFileName = "../results/Semantic_sim_inference_"+similarity_matrix_type+"_"+representation_name+".csv"
|
151 |
buffer = "Semantic Aspect,CosineSim_Correlation,CosineSim_Correlation p-value, ManhattanSim_Correlation,ManhattanSim_Correlation p-value, EuclidianSim_Correlation,EuclidianSim_Correlation p-value \n"
|
152 |
f = open(saveFileName,'w')
|
153 |
f.write(buffer)
|
|
|
1 |
#!/usr/bin/env python
|
2 |
# coding: utf-8
|
3 |
+
import os
|
4 |
+
script_dir = os.path.dirname(os.path.abspath(__file__))
|
5 |
|
6 |
import pandas as pd
|
7 |
import numpy as np
|
|
|
70 |
proteinListNew[:] = []
|
71 |
|
72 |
similarityMatrixNameDict = {}
|
73 |
+
similarityMatrixNameDict["All"] = os.path.join(script_dir, "../data/preprocess/human_"+aspect+"_proteinSimilarityMatrix.csv")
|
74 |
+
similarityMatrixNameDict["500"] = os.path.join(script_dir, "../data/preprocess/human_"+aspect+"_proteinSimilarityMatrix_for_highest_annotated_500_proteins.csv")
|
75 |
+
similarityMatrixNameDict["Sparse"] = os.path.join(script_dir, "../data/preprocess/human_"+aspect+"_proteinSimilarityMatrix_for_highest_annotated_500_proteins.csv")
|
76 |
+
similarityMatrixNameDict["200"] = os.path.join(script_dir, "../data/preprocess/human_"+aspect+"_proteinSimilarityMatrix_for_highest_annotated_200_proteins.csv")
|
77 |
|
78 |
similarityMatrixFileName = similarityMatrixNameDict[matrix_type]
|
79 |
|
|
|
86 |
proteinListNew.append(prot)
|
87 |
if matrix_type == "Sparse":
|
88 |
#sparsified_similarities = np.load("SparsifiedSimilarites_for_highest_500.npy")
|
89 |
+
sparsified_path = os.path.join(script_dir, "../data/auxilary_input/SparsifiedSimilarityCoordinates_"+aspect+"_for_highest_500.npy")
|
90 |
+
sparsified_similarity_coordinates = np.load(sparsified_path)
|
91 |
protParamList = sparsified_similarity_coordinates
|
92 |
else:
|
93 |
i = range(len(proteinList))
|
|
|
144 |
return (cosineCorr,manhattanCorr,euclidianCorr)
|
145 |
|
146 |
def report_detailed_distance_scores(representation_name,similarity_matrix_type,aspect,distance_lists):
|
147 |
+
saveFileName = os.path.join(script_dir, "../results/Semantic_sim_inference_detailed_distance_scores"+aspect+"_"+similarity_matrix_type+"_"+representation_name+".pkl")
|
148 |
with open(saveFileName, "wb") as f:
|
149 |
pickle.dump(distance_lists, f)
|
150 |
|
151 |
def calculate_all_correlations():
|
152 |
for similarity_matrix_type in similarity_tasks:
|
153 |
+
saveFileName = os.path.join(script_dir, "../results/Semantic_sim_inference_"+similarity_matrix_type+"_"+representation_name+".csv")
|
154 |
buffer = "Semantic Aspect,CosineSim_Correlation,CosineSim_Correlation p-value, ManhattanSim_Correlation,ManhattanSim_Correlation p-value, EuclidianSim_Correlation,EuclidianSim_Correlation p-value \n"
|
155 |
f = open(saveFileName,'w')
|
156 |
f.write(buffer)
|