fatmacankara commited on
Commit
cd60b33
·
1 Parent(s): 8336e50

Update code/alphafold_featureVector.py

Browse files
Files changed (1) hide show
  1. code/alphafold_featureVector.py +3 -22
code/alphafold_featureVector.py CHANGED
@@ -607,11 +607,8 @@ def alphafold(input_set, mode, impute):
607
  pdbSequence, Path(path_to_output_files / 'alignment_files'))
608
 
609
  pdb_alignStatus = mutation_position_on_pdb(alignment_list, uniprot_matched.at[i, 'pos'])[0]
610
- st.write('alignment_list---')
611
  info_per_model[mod]['pdb_alignStatus'] = pdb_alignStatus
612
- st.write(alignment_list)
613
- st.write(uniprot_matched.at[i, 'pos'])
614
- st.write(pdb_alignStatus)
615
  mutationPositionOnPDB = mutation_position_on_pdb(alignment_list, uniprot_matched.at[i, 'pos'])[1]
616
  info_per_model[mod]['mutationPositionOnPDB'] = mutationPositionOnPDB
617
  startGap = mutation_position_on_pdb(alignment_list, uniprot_matched.at[i, 'pos'])[2]
@@ -629,7 +626,6 @@ def alphafold(input_set, mode, impute):
629
  info_per_model[mod][annot] = annotation_pos_on_pdb_
630
 
631
 
632
- st.write('Downloading the model from ASCARIS dataset.')
633
  pdb_path = hf_hub_download(repo_id="HuBioDataLab/AlphafoldStructures", filename=f"AF-{uniprotID}-F{mod}-model_v4.pdb.gz",repo_type = 'dataset')
634
 
635
 
@@ -637,7 +633,6 @@ def alphafold(input_set, mode, impute):
637
  # file_content = f.read()
638
  # st.write(file_content)
639
 
640
- st.write('Download complete.')
641
 
642
 
643
  #st.write(get_alignments_3D(uniprotID, mod, pdb_path, pdbSequence, 'nan', 'nan', 'nan', mode, Path(path_to_output_files / '3D_alignment'),
@@ -648,14 +643,11 @@ def alphafold(input_set, mode, impute):
648
 
649
 
650
  pdbSequence = convert_non_standard_amino_acids(pdbSequence)
651
- st.write(pdbSequence)
652
-
653
- st.write('Hello I am in 3Dalignment')
654
 
655
  atomSequence = ''
656
  coords = []
657
  resnums_for_sasa = []
658
- st.write('Hello I am in 3Dalignment GZIP')
659
  with gzip.open(pdb_path, mode='rb') as f:
660
 
661
  for line in f:
@@ -668,7 +660,6 @@ def alphafold(input_set, mode, impute):
668
  atomSequence += threeToOne(line[17:20].strip())
669
  coords.append([line[31:38].strip(), line[39:46].strip(), line[47:54].strip()])
670
  resnums_for_sasa.append(line[22:26].strip())
671
- st.write('o-complee')
672
  #f = open(Path(path_3D_alignment / f'{identifier}_{str(model_num)}_3Dalignment.txt'),"w")
673
  aligner.mode = 'local'
674
  aligner.substitution_matrix = substitution_matrices.load("BLOSUM62")
@@ -681,7 +672,6 @@ def alphafold(input_set, mode, impute):
681
  # 'gzip') != None:
682
 
683
  if alignments != None:
684
- st.write('I am here')
685
  #alignments, coords, resnums_for_sasa = get_alignments_3D(uniprotID, mod, pdb_path, pdbSequence, 'nan',
686
  # 'nan', 'nan', mode, Path(path_to_output_files / '3D_alignment'),
687
  # 'gzip')
@@ -693,10 +683,8 @@ def alphafold(input_set, mode, impute):
693
  #fullID = f'AF-{uniprotID}-F{mod}-model_v4.pdb.gz'
694
  #st.write(fullID)
695
  run_freesasa(pdb_path, Path(path_to_output_files / f'freesasa_files/AF-{uniprotID}-F{mod}.txt'), include_hetatms=True,outdir=None, force_rerun=False)
696
- st.write('Calculated')
697
  #calculate_freesasa(uniprotID, mod, existing_free_sasa, alphafold_path, path_to_output_files)
698
  if (mutationPositionOnPDB != 'nan'):
699
- st.write('Here1')
700
  if (int(mutationPositionOnPDB) <= 1400):
701
  try:
702
  coordMut = get_coords(mutationPositionOnPDB, alignments, coords, resnums_for_sasa, mode)[0]
@@ -707,12 +695,7 @@ def alphafold(input_set, mode, impute):
707
  coordMut = np.NaN
708
 
709
  sasa_pos = get_coords(mutationPositionOnPDB, alignments, coords, resnums_for_sasa, mode)[2]
710
- st.write('sasa_pos', sasa_pos)
711
- st.write('existing_free_sasa', existing_free_sasa)
712
- st.write('existing_free_sasa', existing_free_sasa)
713
 
714
-
715
- st.write('all', list(Path(path_to_output_files / 'freesasa_files').glob("*")))
716
 
717
 
718
  if sasa_pos != np.NaN:
@@ -720,22 +703,20 @@ def alphafold(input_set, mode, impute):
720
  for filename in list(Path(path_to_output_files / 'freesasa_files').glob("*"))[1:]:
721
 
722
  try:
723
- st.write(list(filter(None, str(filename).split('.')))[0].split('/')[-1].split('-')[1].upper())
724
  fname = list(filter(None, str(filename).split('.')))[0].split('/')[-1].split('-')[1].upper()
725
  except IndexError:
726
 
727
  st.write('IndexError')
728
  fname = ''
729
- st.write(uniprotID , fname, uniprotID == fname)
730
  if uniprotID == fname:
731
  files = open(filename, 'r')
732
  file = files.readlines()
733
  for k in file:
734
-
735
  if str(k.strip()[10:13].strip()) == str(sasa_pos):
736
  st.write('WHY ')
737
  st.write(str(k[4:7].strip()))
738
  st.write('WHY 2')
 
739
  sy.write('kk',k[4:7])
740
  residue = str(k[4:7].strip())
741
 
 
607
  pdbSequence, Path(path_to_output_files / 'alignment_files'))
608
 
609
  pdb_alignStatus = mutation_position_on_pdb(alignment_list, uniprot_matched.at[i, 'pos'])[0]
 
610
  info_per_model[mod]['pdb_alignStatus'] = pdb_alignStatus
611
+
 
 
612
  mutationPositionOnPDB = mutation_position_on_pdb(alignment_list, uniprot_matched.at[i, 'pos'])[1]
613
  info_per_model[mod]['mutationPositionOnPDB'] = mutationPositionOnPDB
614
  startGap = mutation_position_on_pdb(alignment_list, uniprot_matched.at[i, 'pos'])[2]
 
626
  info_per_model[mod][annot] = annotation_pos_on_pdb_
627
 
628
 
 
629
  pdb_path = hf_hub_download(repo_id="HuBioDataLab/AlphafoldStructures", filename=f"AF-{uniprotID}-F{mod}-model_v4.pdb.gz",repo_type = 'dataset')
630
 
631
 
 
633
  # file_content = f.read()
634
  # st.write(file_content)
635
 
 
636
 
637
 
638
  #st.write(get_alignments_3D(uniprotID, mod, pdb_path, pdbSequence, 'nan', 'nan', 'nan', mode, Path(path_to_output_files / '3D_alignment'),
 
643
 
644
 
645
  pdbSequence = convert_non_standard_amino_acids(pdbSequence)
646
+
 
 
647
 
648
  atomSequence = ''
649
  coords = []
650
  resnums_for_sasa = []
 
651
  with gzip.open(pdb_path, mode='rb') as f:
652
 
653
  for line in f:
 
660
  atomSequence += threeToOne(line[17:20].strip())
661
  coords.append([line[31:38].strip(), line[39:46].strip(), line[47:54].strip()])
662
  resnums_for_sasa.append(line[22:26].strip())
 
663
  #f = open(Path(path_3D_alignment / f'{identifier}_{str(model_num)}_3Dalignment.txt'),"w")
664
  aligner.mode = 'local'
665
  aligner.substitution_matrix = substitution_matrices.load("BLOSUM62")
 
672
  # 'gzip') != None:
673
 
674
  if alignments != None:
 
675
  #alignments, coords, resnums_for_sasa = get_alignments_3D(uniprotID, mod, pdb_path, pdbSequence, 'nan',
676
  # 'nan', 'nan', mode, Path(path_to_output_files / '3D_alignment'),
677
  # 'gzip')
 
683
  #fullID = f'AF-{uniprotID}-F{mod}-model_v4.pdb.gz'
684
  #st.write(fullID)
685
  run_freesasa(pdb_path, Path(path_to_output_files / f'freesasa_files/AF-{uniprotID}-F{mod}.txt'), include_hetatms=True,outdir=None, force_rerun=False)
 
686
  #calculate_freesasa(uniprotID, mod, existing_free_sasa, alphafold_path, path_to_output_files)
687
  if (mutationPositionOnPDB != 'nan'):
 
688
  if (int(mutationPositionOnPDB) <= 1400):
689
  try:
690
  coordMut = get_coords(mutationPositionOnPDB, alignments, coords, resnums_for_sasa, mode)[0]
 
695
  coordMut = np.NaN
696
 
697
  sasa_pos = get_coords(mutationPositionOnPDB, alignments, coords, resnums_for_sasa, mode)[2]
 
 
 
698
 
 
 
699
 
700
 
701
  if sasa_pos != np.NaN:
 
703
  for filename in list(Path(path_to_output_files / 'freesasa_files').glob("*"))[1:]:
704
 
705
  try:
 
706
  fname = list(filter(None, str(filename).split('.')))[0].split('/')[-1].split('-')[1].upper()
707
  except IndexError:
708
 
709
  st.write('IndexError')
710
  fname = ''
 
711
  if uniprotID == fname:
712
  files = open(filename, 'r')
713
  file = files.readlines()
714
  for k in file:
 
715
  if str(k.strip()[10:13].strip()) == str(sasa_pos):
716
  st.write('WHY ')
717
  st.write(str(k[4:7].strip()))
718
  st.write('WHY 2')
719
+ st.write('kk', k)
720
  sy.write('kk',k[4:7])
721
  residue = str(k[4:7].strip())
722