Spaces:
Sleeping
Sleeping
Commit
·
cd60b33
1
Parent(s):
8336e50
Update code/alphafold_featureVector.py
Browse files
code/alphafold_featureVector.py
CHANGED
@@ -607,11 +607,8 @@ def alphafold(input_set, mode, impute):
|
|
607 |
pdbSequence, Path(path_to_output_files / 'alignment_files'))
|
608 |
|
609 |
pdb_alignStatus = mutation_position_on_pdb(alignment_list, uniprot_matched.at[i, 'pos'])[0]
|
610 |
-
st.write('alignment_list---')
|
611 |
info_per_model[mod]['pdb_alignStatus'] = pdb_alignStatus
|
612 |
-
|
613 |
-
st.write(uniprot_matched.at[i, 'pos'])
|
614 |
-
st.write(pdb_alignStatus)
|
615 |
mutationPositionOnPDB = mutation_position_on_pdb(alignment_list, uniprot_matched.at[i, 'pos'])[1]
|
616 |
info_per_model[mod]['mutationPositionOnPDB'] = mutationPositionOnPDB
|
617 |
startGap = mutation_position_on_pdb(alignment_list, uniprot_matched.at[i, 'pos'])[2]
|
@@ -629,7 +626,6 @@ def alphafold(input_set, mode, impute):
|
|
629 |
info_per_model[mod][annot] = annotation_pos_on_pdb_
|
630 |
|
631 |
|
632 |
-
st.write('Downloading the model from ASCARIS dataset.')
|
633 |
pdb_path = hf_hub_download(repo_id="HuBioDataLab/AlphafoldStructures", filename=f"AF-{uniprotID}-F{mod}-model_v4.pdb.gz",repo_type = 'dataset')
|
634 |
|
635 |
|
@@ -637,7 +633,6 @@ def alphafold(input_set, mode, impute):
|
|
637 |
# file_content = f.read()
|
638 |
# st.write(file_content)
|
639 |
|
640 |
-
st.write('Download complete.')
|
641 |
|
642 |
|
643 |
#st.write(get_alignments_3D(uniprotID, mod, pdb_path, pdbSequence, 'nan', 'nan', 'nan', mode, Path(path_to_output_files / '3D_alignment'),
|
@@ -648,14 +643,11 @@ def alphafold(input_set, mode, impute):
|
|
648 |
|
649 |
|
650 |
pdbSequence = convert_non_standard_amino_acids(pdbSequence)
|
651 |
-
|
652 |
-
|
653 |
-
st.write('Hello I am in 3Dalignment')
|
654 |
|
655 |
atomSequence = ''
|
656 |
coords = []
|
657 |
resnums_for_sasa = []
|
658 |
-
st.write('Hello I am in 3Dalignment GZIP')
|
659 |
with gzip.open(pdb_path, mode='rb') as f:
|
660 |
|
661 |
for line in f:
|
@@ -668,7 +660,6 @@ def alphafold(input_set, mode, impute):
|
|
668 |
atomSequence += threeToOne(line[17:20].strip())
|
669 |
coords.append([line[31:38].strip(), line[39:46].strip(), line[47:54].strip()])
|
670 |
resnums_for_sasa.append(line[22:26].strip())
|
671 |
-
st.write('o-complee')
|
672 |
#f = open(Path(path_3D_alignment / f'{identifier}_{str(model_num)}_3Dalignment.txt'),"w")
|
673 |
aligner.mode = 'local'
|
674 |
aligner.substitution_matrix = substitution_matrices.load("BLOSUM62")
|
@@ -681,7 +672,6 @@ def alphafold(input_set, mode, impute):
|
|
681 |
# 'gzip') != None:
|
682 |
|
683 |
if alignments != None:
|
684 |
-
st.write('I am here')
|
685 |
#alignments, coords, resnums_for_sasa = get_alignments_3D(uniprotID, mod, pdb_path, pdbSequence, 'nan',
|
686 |
# 'nan', 'nan', mode, Path(path_to_output_files / '3D_alignment'),
|
687 |
# 'gzip')
|
@@ -693,10 +683,8 @@ def alphafold(input_set, mode, impute):
|
|
693 |
#fullID = f'AF-{uniprotID}-F{mod}-model_v4.pdb.gz'
|
694 |
#st.write(fullID)
|
695 |
run_freesasa(pdb_path, Path(path_to_output_files / f'freesasa_files/AF-{uniprotID}-F{mod}.txt'), include_hetatms=True,outdir=None, force_rerun=False)
|
696 |
-
st.write('Calculated')
|
697 |
#calculate_freesasa(uniprotID, mod, existing_free_sasa, alphafold_path, path_to_output_files)
|
698 |
if (mutationPositionOnPDB != 'nan'):
|
699 |
-
st.write('Here1')
|
700 |
if (int(mutationPositionOnPDB) <= 1400):
|
701 |
try:
|
702 |
coordMut = get_coords(mutationPositionOnPDB, alignments, coords, resnums_for_sasa, mode)[0]
|
@@ -707,12 +695,7 @@ def alphafold(input_set, mode, impute):
|
|
707 |
coordMut = np.NaN
|
708 |
|
709 |
sasa_pos = get_coords(mutationPositionOnPDB, alignments, coords, resnums_for_sasa, mode)[2]
|
710 |
-
st.write('sasa_pos', sasa_pos)
|
711 |
-
st.write('existing_free_sasa', existing_free_sasa)
|
712 |
-
st.write('existing_free_sasa', existing_free_sasa)
|
713 |
|
714 |
-
|
715 |
-
st.write('all', list(Path(path_to_output_files / 'freesasa_files').glob("*")))
|
716 |
|
717 |
|
718 |
if sasa_pos != np.NaN:
|
@@ -720,22 +703,20 @@ def alphafold(input_set, mode, impute):
|
|
720 |
for filename in list(Path(path_to_output_files / 'freesasa_files').glob("*"))[1:]:
|
721 |
|
722 |
try:
|
723 |
-
st.write(list(filter(None, str(filename).split('.')))[0].split('/')[-1].split('-')[1].upper())
|
724 |
fname = list(filter(None, str(filename).split('.')))[0].split('/')[-1].split('-')[1].upper()
|
725 |
except IndexError:
|
726 |
|
727 |
st.write('IndexError')
|
728 |
fname = ''
|
729 |
-
st.write(uniprotID , fname, uniprotID == fname)
|
730 |
if uniprotID == fname:
|
731 |
files = open(filename, 'r')
|
732 |
file = files.readlines()
|
733 |
for k in file:
|
734 |
-
|
735 |
if str(k.strip()[10:13].strip()) == str(sasa_pos):
|
736 |
st.write('WHY ')
|
737 |
st.write(str(k[4:7].strip()))
|
738 |
st.write('WHY 2')
|
|
|
739 |
sy.write('kk',k[4:7])
|
740 |
residue = str(k[4:7].strip())
|
741 |
|
|
|
607 |
pdbSequence, Path(path_to_output_files / 'alignment_files'))
|
608 |
|
609 |
pdb_alignStatus = mutation_position_on_pdb(alignment_list, uniprot_matched.at[i, 'pos'])[0]
|
|
|
610 |
info_per_model[mod]['pdb_alignStatus'] = pdb_alignStatus
|
611 |
+
|
|
|
|
|
612 |
mutationPositionOnPDB = mutation_position_on_pdb(alignment_list, uniprot_matched.at[i, 'pos'])[1]
|
613 |
info_per_model[mod]['mutationPositionOnPDB'] = mutationPositionOnPDB
|
614 |
startGap = mutation_position_on_pdb(alignment_list, uniprot_matched.at[i, 'pos'])[2]
|
|
|
626 |
info_per_model[mod][annot] = annotation_pos_on_pdb_
|
627 |
|
628 |
|
|
|
629 |
pdb_path = hf_hub_download(repo_id="HuBioDataLab/AlphafoldStructures", filename=f"AF-{uniprotID}-F{mod}-model_v4.pdb.gz",repo_type = 'dataset')
|
630 |
|
631 |
|
|
|
633 |
# file_content = f.read()
|
634 |
# st.write(file_content)
|
635 |
|
|
|
636 |
|
637 |
|
638 |
#st.write(get_alignments_3D(uniprotID, mod, pdb_path, pdbSequence, 'nan', 'nan', 'nan', mode, Path(path_to_output_files / '3D_alignment'),
|
|
|
643 |
|
644 |
|
645 |
pdbSequence = convert_non_standard_amino_acids(pdbSequence)
|
646 |
+
|
|
|
|
|
647 |
|
648 |
atomSequence = ''
|
649 |
coords = []
|
650 |
resnums_for_sasa = []
|
|
|
651 |
with gzip.open(pdb_path, mode='rb') as f:
|
652 |
|
653 |
for line in f:
|
|
|
660 |
atomSequence += threeToOne(line[17:20].strip())
|
661 |
coords.append([line[31:38].strip(), line[39:46].strip(), line[47:54].strip()])
|
662 |
resnums_for_sasa.append(line[22:26].strip())
|
|
|
663 |
#f = open(Path(path_3D_alignment / f'{identifier}_{str(model_num)}_3Dalignment.txt'),"w")
|
664 |
aligner.mode = 'local'
|
665 |
aligner.substitution_matrix = substitution_matrices.load("BLOSUM62")
|
|
|
672 |
# 'gzip') != None:
|
673 |
|
674 |
if alignments != None:
|
|
|
675 |
#alignments, coords, resnums_for_sasa = get_alignments_3D(uniprotID, mod, pdb_path, pdbSequence, 'nan',
|
676 |
# 'nan', 'nan', mode, Path(path_to_output_files / '3D_alignment'),
|
677 |
# 'gzip')
|
|
|
683 |
#fullID = f'AF-{uniprotID}-F{mod}-model_v4.pdb.gz'
|
684 |
#st.write(fullID)
|
685 |
run_freesasa(pdb_path, Path(path_to_output_files / f'freesasa_files/AF-{uniprotID}-F{mod}.txt'), include_hetatms=True,outdir=None, force_rerun=False)
|
|
|
686 |
#calculate_freesasa(uniprotID, mod, existing_free_sasa, alphafold_path, path_to_output_files)
|
687 |
if (mutationPositionOnPDB != 'nan'):
|
|
|
688 |
if (int(mutationPositionOnPDB) <= 1400):
|
689 |
try:
|
690 |
coordMut = get_coords(mutationPositionOnPDB, alignments, coords, resnums_for_sasa, mode)[0]
|
|
|
695 |
coordMut = np.NaN
|
696 |
|
697 |
sasa_pos = get_coords(mutationPositionOnPDB, alignments, coords, resnums_for_sasa, mode)[2]
|
|
|
|
|
|
|
698 |
|
|
|
|
|
699 |
|
700 |
|
701 |
if sasa_pos != np.NaN:
|
|
|
703 |
for filename in list(Path(path_to_output_files / 'freesasa_files').glob("*"))[1:]:
|
704 |
|
705 |
try:
|
|
|
706 |
fname = list(filter(None, str(filename).split('.')))[0].split('/')[-1].split('-')[1].upper()
|
707 |
except IndexError:
|
708 |
|
709 |
st.write('IndexError')
|
710 |
fname = ''
|
|
|
711 |
if uniprotID == fname:
|
712 |
files = open(filename, 'r')
|
713 |
file = files.readlines()
|
714 |
for k in file:
|
|
|
715 |
if str(k.strip()[10:13].strip()) == str(sasa_pos):
|
716 |
st.write('WHY ')
|
717 |
st.write(str(k[4:7].strip()))
|
718 |
st.write('WHY 2')
|
719 |
+
st.write('kk', k)
|
720 |
sy.write('kk',k[4:7])
|
721 |
residue = str(k[4:7].strip())
|
722 |
|