fatmacankara commited on
Commit
bacea2c
·
1 Parent(s): ab1d2d6

Update code/pdb_featureVector.py

Browse files
Files changed (1) hide show
  1. code/pdb_featureVector.py +21 -4
code/pdb_featureVector.py CHANGED
@@ -163,7 +163,8 @@ def pdb(input_set, mode, impute):
163
  data.at[i, 'wt_sequence_match'] = 'i'
164
  data.at[i, 'whichIsoform'] = whichIsoform
165
  break
166
-
 
167
  data.wt_sequence_match = data.wt_sequence_match.astype('str')
168
  data.replace({'': 'nan'}, inplace=True)
169
  data_size = len(data.drop_duplicates(['datapoint']))
@@ -287,6 +288,11 @@ def pdb(input_set, mode, impute):
287
  pdb_info.at[index, 'chain'] = chain_id
288
  pdb_info.at[index, 'resolution'] = resolution
289
  index += 1
 
 
 
 
 
290
  print('PDB file processing finished..')
291
  for filename in list(Path(path_to_output_files / 'pdb_structures').glob("*")):
292
  try:
@@ -426,12 +432,18 @@ def pdb(input_set, mode, impute):
426
  with_pdb_size = len(with_pdb.drop_duplicates(['datapoint']))
427
  with_pdb = None
428
 
429
-
 
 
 
430
  print('Aligning sequences...\n')
431
  aligned_m = final_stage(dfM, annotation_list, Path(path_to_output_files / 'alignment_files'))
432
  aligned_nm = final_stage(dfNM, annotation_list, Path(path_to_output_files / 'alignment_files'))
433
 
434
-
 
 
 
435
 
436
 
437
  # When PDB sequence is nan, it is wrongly aligned to the UniProt sequence. Fix them.
@@ -474,7 +486,10 @@ def pdb(input_set, mode, impute):
474
  yes_pdb_no_match = after_up_pdb_alignment[
475
  (after_up_pdb_alignment.pdbID != 'nan') & (after_up_pdb_alignment.mutationPositionOnPDB == 'nan')]
476
  no_pdb = no_pdb.copy()
477
-
 
 
 
478
  print('PDB matching is completed...\n')
479
  print('SUMMARY')
480
  print('-------')
@@ -875,6 +890,7 @@ def pdb(input_set, mode, impute):
875
  if protein not in existing_modbase_models:
876
  print('Downloading Modbase models for ', protein)
877
  url = 'https://salilab.org/modbase/retrieve/modbase/?databaseID=' + protein
 
878
  req = requests.get(url)
879
  name = path_to_output_files / 'modbase_structures' / f'{protein}.txt'
880
  with open(name, 'wb') as f:
@@ -1371,6 +1387,7 @@ def pdb(input_set, mode, impute):
1371
 
1372
  aligner = Align.PairwiseAligner()
1373
  print('Proceeding to 3D distance calculation...\n')
 
1374
  data.domainEndonPDB = data.domainEndonPDB.astype(str)
1375
  data.domainStartonPDB = data.domainStartonPDB.astype(str)
1376
 
 
163
  data.at[i, 'wt_sequence_match'] = 'i'
164
  data.at[i, 'whichIsoform'] = whichIsoform
165
  break
166
+ print('MATCHING UNIPTOR')
167
+ print(data.to_string())
168
  data.wt_sequence_match = data.wt_sequence_match.astype('str')
169
  data.replace({'': 'nan'}, inplace=True)
170
  data_size = len(data.drop_duplicates(['datapoint']))
 
288
  pdb_info.at[index, 'chain'] = chain_id
289
  pdb_info.at[index, 'resolution'] = resolution
290
  index += 1
291
+
292
+ print('PDB INFO')
293
+ print(pdb_info.to_string())
294
+ print('PDB FASTA')
295
+ print(pdb_fasta.to_string())
296
  print('PDB file processing finished..')
297
  for filename in list(Path(path_to_output_files / 'pdb_structures').glob("*")):
298
  try:
 
432
  with_pdb_size = len(with_pdb.drop_duplicates(['datapoint']))
433
  with_pdb = None
434
 
435
+ print('dfM')
436
+ print(dfM.to_string())
437
+ print('dfNM')
438
+ print(dfNM)
439
  print('Aligning sequences...\n')
440
  aligned_m = final_stage(dfM, annotation_list, Path(path_to_output_files / 'alignment_files'))
441
  aligned_nm = final_stage(dfNM, annotation_list, Path(path_to_output_files / 'alignment_files'))
442
 
443
+ print('aligned_m')
444
+ print(aligned_m.to_string())
445
+ print('aligned_nm')
446
+ print(aligned_nm.to_string())
447
 
448
 
449
  # When PDB sequence is nan, it is wrongly aligned to the UniProt sequence. Fix them.
 
486
  yes_pdb_no_match = after_up_pdb_alignment[
487
  (after_up_pdb_alignment.pdbID != 'nan') & (after_up_pdb_alignment.mutationPositionOnPDB == 'nan')]
488
  no_pdb = no_pdb.copy()
489
+
490
+ print('-----PDB ALIGNED-----')
491
+ print(pdb_aligned.to_string())
492
+
493
  print('PDB matching is completed...\n')
494
  print('SUMMARY')
495
  print('-------')
 
890
  if protein not in existing_modbase_models:
891
  print('Downloading Modbase models for ', protein)
892
  url = 'https://salilab.org/modbase/retrieve/modbase/?databaseID=' + protein
893
+ print(url)
894
  req = requests.get(url)
895
  name = path_to_output_files / 'modbase_structures' / f'{protein}.txt'
896
  with open(name, 'wb') as f:
 
1387
 
1388
  aligner = Align.PairwiseAligner()
1389
  print('Proceeding to 3D distance calculation...\n')
1390
+ print(data.to_string())
1391
  data.domainEndonPDB = data.domainEndonPDB.astype(str)
1392
  data.domainStartonPDB = data.domainStartonPDB.astype(str)
1393