Spaces:
Sleeping
Sleeping
Update src/bin/PROBE.py
Browse files- src/bin/PROBE.py +0 -44
src/bin/PROBE.py
CHANGED
@@ -6,16 +6,6 @@ from . import target_family_classifier as tfc
|
|
6 |
from . import function_predictor as fp
|
7 |
from . import binding_affinity_estimator as bae
|
8 |
|
9 |
-
print("\n\nPROBE (Protein RepresentatiOn Benchmark) run is started...\n\n")
|
10 |
-
|
11 |
-
with open('probe_config.yaml') as f:
|
12 |
-
args = yaml.load(f, Loader=yaml.FullLoader)
|
13 |
-
|
14 |
-
if args["benchmark"] not in ["similarity","family","function","affinity","all"]:
|
15 |
-
parser.error('At least one benchmark type should be selected')
|
16 |
-
|
17 |
-
print(args)
|
18 |
-
|
19 |
def load_representation(multi_col_representation_vector_file_path):
|
20 |
multi_col_representation_vector = pd.read_csv(multi_col_representation_vector_file_path)
|
21 |
vals = multi_col_representation_vector.iloc[:,1:(len(multi_col_representation_vector.columns))]
|
@@ -25,40 +15,6 @@ def load_representation(multi_col_representation_vector_file_path):
|
|
25 |
original_values_as_df.loc[index] = [multi_col_representation_vector.iloc[index]['Entry']] + [list_of_floats]
|
26 |
return original_values_as_df
|
27 |
|
28 |
-
if args["benchmark"] in ["similarity","function","all"]:
|
29 |
-
print("\nRepresentation vectors are loading...\n")
|
30 |
-
representation_dataframe = load_representation(args["representation_file_human"])
|
31 |
-
|
32 |
-
if args["benchmark"] in ["similarity","all"]:
|
33 |
-
print("\nSemantic similarity Inference Benchmark is running...\n")
|
34 |
-
ssi.representation_dataframe = representation_dataframe
|
35 |
-
ssi.representation_name = args["representation_name"]
|
36 |
-
ssi.protein_names = ssi.representation_dataframe['Entry'].tolist()
|
37 |
-
ssi.similarity_tasks = args["similarity_tasks"]
|
38 |
-
ssi.detailed_output = args["detailed_output"]
|
39 |
-
ssi.calculate_all_correlations()
|
40 |
-
if args["benchmark"] in ["function","all"]:
|
41 |
-
print("\n\nOntology-based protein function prediction benchmark is running...\n")
|
42 |
-
fp.aspect_type = args["function_prediction_aspect"]
|
43 |
-
fp.dataset_type = args["function_prediction_dataset"]
|
44 |
-
fp.representation_dataframe = representation_dataframe
|
45 |
-
fp.representation_name = args["representation_name"]
|
46 |
-
fp.detailed_output = args["detailed_output"]
|
47 |
-
fp.pred_output()
|
48 |
-
if args["benchmark"] in ["family","all"]:
|
49 |
-
print("\n\nDrug target protein family classification benchmark is running...\n")
|
50 |
-
tfc.representation_path = args["representation_file_human"]
|
51 |
-
tfc.representation_name = args["representation_name"]
|
52 |
-
tfc.detailed_output = args["detailed_output"]
|
53 |
-
for dataset in args["family_prediction_dataset"]:
|
54 |
-
tfc.score_protein_rep(dataset)
|
55 |
-
if args["benchmark"] in ["affinity","all"]:
|
56 |
-
print("\n\nProtein-protein binding affinity estimation benchmark is running...\n")
|
57 |
-
bae.skempi_vectors_path = args["representation_file_affinity"]
|
58 |
-
bae.representation_name = args["representation_name"]
|
59 |
-
bae.predict_affinities_and_report_results()
|
60 |
-
print("\n\nPROBE (Protein RepresentatiOn Benchmark) run is finished...\n")
|
61 |
-
|
62 |
def run_probe(benchmarks, representation_name, representation_file_human, representation_file_affinity, similarity_tasks=["Sparse","200","500"], function_prediction_aspec="All_Aspects", function_prediction_dataset="All_Data_Sets", family_prediction_dataset=["nc","uc50","uc30","mm15"], detailed_output=False):
|
63 |
print("\n\nPROBE (Protein RepresentatiOn Benchmark) run is started...\n\n")
|
64 |
|
|
|
6 |
from . import function_predictor as fp
|
7 |
from . import binding_affinity_estimator as bae
|
8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
def load_representation(multi_col_representation_vector_file_path):
|
10 |
multi_col_representation_vector = pd.read_csv(multi_col_representation_vector_file_path)
|
11 |
vals = multi_col_representation_vector.iloc[:,1:(len(multi_col_representation_vector.columns))]
|
|
|
15 |
original_values_as_df.loc[index] = [multi_col_representation_vector.iloc[index]['Entry']] + [list_of_floats]
|
16 |
return original_values_as_df
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
def run_probe(benchmarks, representation_name, representation_file_human, representation_file_affinity, similarity_tasks=["Sparse","200","500"], function_prediction_aspec="All_Aspects", function_prediction_dataset="All_Data_Sets", family_prediction_dataset=["nc","uc50","uc30","mm15"], detailed_output=False):
|
19 |
print("\n\nPROBE (Protein RepresentatiOn Benchmark) run is started...\n\n")
|
20 |
|