Spaces:
Sleeping
Sleeping
fatmacankara
commited on
Commit
·
c451111
1
Parent(s):
d6a723e
Upload 19 files
Browse files- code/add_sasa.py +3 -5
- code/add_sequence.py +1 -1
- code/alphafold_featureVector.py +2 -2
- code/alphafold_model.py +5 -2
- code/manage_files.py +3 -1
code/add_sasa.py
CHANGED
@@ -8,7 +8,8 @@ import os
|
|
8 |
from pathlib import Path
|
9 |
import gzip
|
10 |
import shutil
|
11 |
-
|
|
|
12 |
|
13 |
def run_freesasa(infile, outfile, include_hetatms=True, outdir=None, force_rerun=False, file_type = 'gzip'):
|
14 |
if not outdir:
|
@@ -25,7 +26,6 @@ def run_freesasa(infile, outfile, include_hetatms=True, outdir=None, force_rerun
|
|
25 |
stderr=subprocess.PIPE,
|
26 |
shell=True)
|
27 |
out, err = command.communicate()
|
28 |
-
|
29 |
elif file_type == 'gzip':
|
30 |
with gzip.open(infile, 'rb') as f_in:
|
31 |
with open('file_temp.pdb', 'wb') as f_out:
|
@@ -43,8 +43,6 @@ def run_freesasa(infile, outfile, include_hetatms=True, outdir=None, force_rerun
|
|
43 |
stderr=subprocess.PIPE,
|
44 |
shell=True)
|
45 |
out, err = command.communicate()
|
46 |
-
|
47 |
-
|
48 |
return outfile
|
49 |
|
50 |
def calculate_freesasa(ID, model_num, existing_free_sasa, path_to_input,path_to_output_files, file_type = 'gzip'):
|
@@ -128,4 +126,4 @@ def sasa(source, pdbID, uniprotID, sasa_pos, wt, mode, path_to_output_files,file
|
|
128 |
return sasa
|
129 |
else:
|
130 |
sasa = 'nan'
|
131 |
-
return sasa
|
|
|
8 |
from pathlib import Path
|
9 |
import gzip
|
10 |
import shutil
|
11 |
+
|
12 |
+
|
13 |
|
14 |
def run_freesasa(infile, outfile, include_hetatms=True, outdir=None, force_rerun=False, file_type = 'gzip'):
|
15 |
if not outdir:
|
|
|
26 |
stderr=subprocess.PIPE,
|
27 |
shell=True)
|
28 |
out, err = command.communicate()
|
|
|
29 |
elif file_type == 'gzip':
|
30 |
with gzip.open(infile, 'rb') as f_in:
|
31 |
with open('file_temp.pdb', 'wb') as f_out:
|
|
|
43 |
stderr=subprocess.PIPE,
|
44 |
shell=True)
|
45 |
out, err = command.communicate()
|
|
|
|
|
46 |
return outfile
|
47 |
|
48 |
def calculate_freesasa(ID, model_num, existing_free_sasa, path_to_input,path_to_output_files, file_type = 'gzip'):
|
|
|
126 |
return sasa
|
127 |
else:
|
128 |
sasa = 'nan'
|
129 |
+
return sasa
|
code/add_sequence.py
CHANGED
@@ -41,4 +41,4 @@ def get_isoforms(protein_id):
|
|
41 |
except:
|
42 |
AttributeError
|
43 |
isoforms = {}
|
44 |
-
return isoforms
|
|
|
41 |
except:
|
42 |
AttributeError
|
43 |
isoforms = {}
|
44 |
+
return isoforms
|
code/alphafold_featureVector.py
CHANGED
@@ -339,7 +339,6 @@ def alphafold(input_set, mode, impute):
|
|
339 |
|
340 |
if get_alignments_3D(uniprotID, mod, pdb_path, pdbSequence, 'nan', 'nan', 'nan', mode, Path(path_to_output_files / '3D_alignment'),
|
341 |
'gzip') != None:
|
342 |
-
|
343 |
alignments, coords, resnums_for_sasa = get_alignments_3D(uniprotID, mod, pdb_path, pdbSequence, 'nan',
|
344 |
'nan', 'nan', mode, Path(path_to_output_files / '3D_alignment'),
|
345 |
'gzip')
|
@@ -413,11 +412,12 @@ def alphafold(input_set, mode, impute):
|
|
413 |
mod)] = list_dist_of_annots # Getting minimum of all possible models
|
414 |
# uniprot_matched.at[i, annotation_type] = minimum_position
|
415 |
else:
|
416 |
-
print('Model File Not Found')
|
417 |
|
|
|
418 |
uniprot_matched.at[i, 'sasa'] = np.NaN
|
419 |
|
420 |
|
|
|
421 |
if len(all_domain_distances) != 0:
|
422 |
uniprot_matched.at[i, 'domaindistance3D'] = min(all_domain_distances)
|
423 |
else:
|
|
|
339 |
|
340 |
if get_alignments_3D(uniprotID, mod, pdb_path, pdbSequence, 'nan', 'nan', 'nan', mode, Path(path_to_output_files / '3D_alignment'),
|
341 |
'gzip') != None:
|
|
|
342 |
alignments, coords, resnums_for_sasa = get_alignments_3D(uniprotID, mod, pdb_path, pdbSequence, 'nan',
|
343 |
'nan', 'nan', mode, Path(path_to_output_files / '3D_alignment'),
|
344 |
'gzip')
|
|
|
412 |
mod)] = list_dist_of_annots # Getting minimum of all possible models
|
413 |
# uniprot_matched.at[i, annotation_type] = minimum_position
|
414 |
else:
|
|
|
415 |
|
416 |
+
print('Model File Not Found')
|
417 |
uniprot_matched.at[i, 'sasa'] = np.NaN
|
418 |
|
419 |
|
420 |
+
|
421 |
if len(all_domain_distances) != 0:
|
422 |
uniprot_matched.at[i, 'domaindistance3D'] = min(all_domain_distances)
|
423 |
else:
|
code/alphafold_model.py
CHANGED
@@ -25,8 +25,11 @@ def which_model(position):
|
|
25 |
def modelCount(path_to_models):
|
26 |
count_list = []
|
27 |
for file in list(path_to_models.glob("*")):
|
28 |
-
|
29 |
-
|
|
|
|
|
|
|
30 |
count_dict = Counter(count_list)
|
31 |
count_dict = {';'.join(sorted(k for k in count_dict.keys() if count_dict[k] == v)): v for v in
|
32 |
set(count_dict.values())}
|
|
|
25 |
def modelCount(path_to_models):
|
26 |
count_list = []
|
27 |
for file in list(path_to_models.glob("*")):
|
28 |
+
try:
|
29 |
+
protein_id = str(file).split('-')[1]
|
30 |
+
count_list.append(protein_id)
|
31 |
+
except:
|
32 |
+
IndexError
|
33 |
count_dict = Counter(count_list)
|
34 |
count_dict = {';'.join(sorted(k for k in count_dict.keys() if count_dict[k] == v)): v for v in
|
35 |
set(count_dict.values())}
|
code/manage_files.py
CHANGED
@@ -7,7 +7,8 @@ def manage_files(mode):
|
|
7 |
swiss_model_path = path_to_input_files / 'INDEX.json'
|
8 |
fisher_path = path_to_input_files / 'significant_domains.txt'
|
9 |
path_to_interfaces = path_to_input_files / 'H_sapiens_interfacesHQ.txt'
|
10 |
-
|
|
|
11 |
path_to_output_files = Path('out_files/pdb')
|
12 |
os.makedirs(path_to_output_files / 'pdb_structures/', exist_ok=True)
|
13 |
os.makedirs(path_to_output_files / 'alignment_files/', exist_ok=True)
|
@@ -33,6 +34,7 @@ def manage_files(mode):
|
|
33 |
# Unzip before using
|
34 |
alphafold_path = Path(path_to_input_files/'alphafold_structures')
|
35 |
|
|
|
36 |
path_to_output_files = Path('out_files/alphafold')
|
37 |
os.makedirs(path_to_output_files, exist_ok=True)
|
38 |
os.makedirs(path_to_output_files / 'freesasa_files', exist_ok=True)
|
|
|
7 |
swiss_model_path = path_to_input_files / 'INDEX.json'
|
8 |
fisher_path = path_to_input_files / 'significant_domains.txt'
|
9 |
path_to_interfaces = path_to_input_files / 'H_sapiens_interfacesHQ.txt'
|
10 |
+
|
11 |
+
os.makedirs('out_files', exist_ok=True)
|
12 |
path_to_output_files = Path('out_files/pdb')
|
13 |
os.makedirs(path_to_output_files / 'pdb_structures/', exist_ok=True)
|
14 |
os.makedirs(path_to_output_files / 'alignment_files/', exist_ok=True)
|
|
|
34 |
# Unzip before using
|
35 |
alphafold_path = Path(path_to_input_files/'alphafold_structures')
|
36 |
|
37 |
+
os.makedirs('out_files', exist_ok=True)
|
38 |
path_to_output_files = Path('out_files/alphafold')
|
39 |
os.makedirs(path_to_output_files, exist_ok=True)
|
40 |
os.makedirs(path_to_output_files / 'freesasa_files', exist_ok=True)
|