Spaces:
Sleeping
Sleeping
fatmacankara
commited on
Commit
·
bacea2c
1
Parent(s):
ab1d2d6
Update code/pdb_featureVector.py
Browse files- code/pdb_featureVector.py +21 -4
code/pdb_featureVector.py
CHANGED
@@ -163,7 +163,8 @@ def pdb(input_set, mode, impute):
|
|
163 |
data.at[i, 'wt_sequence_match'] = 'i'
|
164 |
data.at[i, 'whichIsoform'] = whichIsoform
|
165 |
break
|
166 |
-
|
|
|
167 |
data.wt_sequence_match = data.wt_sequence_match.astype('str')
|
168 |
data.replace({'': 'nan'}, inplace=True)
|
169 |
data_size = len(data.drop_duplicates(['datapoint']))
|
@@ -287,6 +288,11 @@ def pdb(input_set, mode, impute):
|
|
287 |
pdb_info.at[index, 'chain'] = chain_id
|
288 |
pdb_info.at[index, 'resolution'] = resolution
|
289 |
index += 1
|
|
|
|
|
|
|
|
|
|
|
290 |
print('PDB file processing finished..')
|
291 |
for filename in list(Path(path_to_output_files / 'pdb_structures').glob("*")):
|
292 |
try:
|
@@ -426,12 +432,18 @@ def pdb(input_set, mode, impute):
|
|
426 |
with_pdb_size = len(with_pdb.drop_duplicates(['datapoint']))
|
427 |
with_pdb = None
|
428 |
|
429 |
-
|
|
|
|
|
|
|
430 |
print('Aligning sequences...\n')
|
431 |
aligned_m = final_stage(dfM, annotation_list, Path(path_to_output_files / 'alignment_files'))
|
432 |
aligned_nm = final_stage(dfNM, annotation_list, Path(path_to_output_files / 'alignment_files'))
|
433 |
|
434 |
-
|
|
|
|
|
|
|
435 |
|
436 |
|
437 |
# When PDB sequence is nan, it is wrongly aligned to the UniProt sequence. Fix them.
|
@@ -474,7 +486,10 @@ def pdb(input_set, mode, impute):
|
|
474 |
yes_pdb_no_match = after_up_pdb_alignment[
|
475 |
(after_up_pdb_alignment.pdbID != 'nan') & (after_up_pdb_alignment.mutationPositionOnPDB == 'nan')]
|
476 |
no_pdb = no_pdb.copy()
|
477 |
-
|
|
|
|
|
|
|
478 |
print('PDB matching is completed...\n')
|
479 |
print('SUMMARY')
|
480 |
print('-------')
|
@@ -875,6 +890,7 @@ def pdb(input_set, mode, impute):
|
|
875 |
if protein not in existing_modbase_models:
|
876 |
print('Downloading Modbase models for ', protein)
|
877 |
url = 'https://salilab.org/modbase/retrieve/modbase/?databaseID=' + protein
|
|
|
878 |
req = requests.get(url)
|
879 |
name = path_to_output_files / 'modbase_structures' / f'{protein}.txt'
|
880 |
with open(name, 'wb') as f:
|
@@ -1371,6 +1387,7 @@ def pdb(input_set, mode, impute):
|
|
1371 |
|
1372 |
aligner = Align.PairwiseAligner()
|
1373 |
print('Proceeding to 3D distance calculation...\n')
|
|
|
1374 |
data.domainEndonPDB = data.domainEndonPDB.astype(str)
|
1375 |
data.domainStartonPDB = data.domainStartonPDB.astype(str)
|
1376 |
|
|
|
163 |
data.at[i, 'wt_sequence_match'] = 'i'
|
164 |
data.at[i, 'whichIsoform'] = whichIsoform
|
165 |
break
|
166 |
+
print('MATCHING UNIPTOR')
|
167 |
+
print(data.to_string())
|
168 |
data.wt_sequence_match = data.wt_sequence_match.astype('str')
|
169 |
data.replace({'': 'nan'}, inplace=True)
|
170 |
data_size = len(data.drop_duplicates(['datapoint']))
|
|
|
288 |
pdb_info.at[index, 'chain'] = chain_id
|
289 |
pdb_info.at[index, 'resolution'] = resolution
|
290 |
index += 1
|
291 |
+
|
292 |
+
print('PDB INFO')
|
293 |
+
print(pdb_info.to_string())
|
294 |
+
print('PDB FASTA')
|
295 |
+
print(pdb_fasta.to_string())
|
296 |
print('PDB file processing finished..')
|
297 |
for filename in list(Path(path_to_output_files / 'pdb_structures').glob("*")):
|
298 |
try:
|
|
|
432 |
with_pdb_size = len(with_pdb.drop_duplicates(['datapoint']))
|
433 |
with_pdb = None
|
434 |
|
435 |
+
print('dfM')
|
436 |
+
print(dfM.to_string())
|
437 |
+
print('dfNM')
|
438 |
+
print(dfNM)
|
439 |
print('Aligning sequences...\n')
|
440 |
aligned_m = final_stage(dfM, annotation_list, Path(path_to_output_files / 'alignment_files'))
|
441 |
aligned_nm = final_stage(dfNM, annotation_list, Path(path_to_output_files / 'alignment_files'))
|
442 |
|
443 |
+
print('aligned_m')
|
444 |
+
print(aligned_m.to_string())
|
445 |
+
print('aligned_nm')
|
446 |
+
print(aligned_nm.to_string())
|
447 |
|
448 |
|
449 |
# When PDB sequence is nan, it is wrongly aligned to the UniProt sequence. Fix them.
|
|
|
486 |
yes_pdb_no_match = after_up_pdb_alignment[
|
487 |
(after_up_pdb_alignment.pdbID != 'nan') & (after_up_pdb_alignment.mutationPositionOnPDB == 'nan')]
|
488 |
no_pdb = no_pdb.copy()
|
489 |
+
|
490 |
+
print('-----PDB ALIGNED-----')
|
491 |
+
print(pdb_aligned.to_string())
|
492 |
+
|
493 |
print('PDB matching is completed...\n')
|
494 |
print('SUMMARY')
|
495 |
print('-------')
|
|
|
890 |
if protein not in existing_modbase_models:
|
891 |
print('Downloading Modbase models for ', protein)
|
892 |
url = 'https://salilab.org/modbase/retrieve/modbase/?databaseID=' + protein
|
893 |
+
print(url)
|
894 |
req = requests.get(url)
|
895 |
name = path_to_output_files / 'modbase_structures' / f'{protein}.txt'
|
896 |
with open(name, 'wb') as f:
|
|
|
1387 |
|
1388 |
aligner = Align.PairwiseAligner()
|
1389 |
print('Proceeding to 3D distance calculation...\n')
|
1390 |
+
print(data.to_string())
|
1391 |
data.domainEndonPDB = data.domainEndonPDB.astype(str)
|
1392 |
data.domainStartonPDB = data.domainStartonPDB.astype(str)
|
1393 |
|