Spaces:
Runtime error
Runtime error
File size: 2,826 Bytes
753e275 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
import argparse
import abnumber
from Bio import PDB
from Bio.PDB import Model, Chain, Residue, Selection
from Bio.Data import SCOPData
from typing import List, Tuple
def biopython_chain_to_sequence(chain: Chain.Chain):
residue_list = Selection.unfold_entities(chain, 'R')
seq = ''.join([SCOPData.protein_letters_3to1.get(r.resname, 'X') for r in residue_list])
return seq, residue_list
def assign_number_to_sequence(seq):
abchain = abnumber.Chain(seq, scheme='chothia')
offset = seq.index(abchain.seq)
if not (offset >= 0):
raise ValueError(
'The identified Fv sequence is not a subsequence of the original sequence.'
)
numbers = [None for _ in range(len(seq))]
for i, (pos, aa) in enumerate(abchain):
resseq = pos.number
icode = pos.letter if pos.letter else ' '
numbers[i+offset] = (resseq, icode)
return numbers, abchain
def renumber_biopython_chain(chain_id, residue_list: List[Residue.Residue], numbers: List[Tuple[int, str]]):
chain = Chain.Chain(chain_id)
for residue, number in zip(residue_list, numbers):
if number is None:
continue
residue = residue.copy()
new_id = (residue.id[0], number[0], number[1])
residue.id = new_id
chain.add(residue)
return chain
def renumber(in_pdb, out_pdb, return_other_chains=False):
parser = PDB.PDBParser(QUIET=True)
structure = parser.get_structure(None, in_pdb)
model = structure[0]
model_new = Model.Model(0)
heavy_chains, light_chains, other_chains = [], [], []
for chain in model:
try:
seq, reslist = biopython_chain_to_sequence(chain)
numbers, abchain = assign_number_to_sequence(seq)
chain_new = renumber_biopython_chain(chain.id, reslist, numbers)
print(f'[INFO] Renumbered chain {chain_new.id} ({abchain.chain_type})')
if abchain.chain_type == 'H':
heavy_chains.append(chain_new.id)
elif abchain.chain_type in ('K', 'L'):
light_chains.append(chain_new.id)
except abnumber.ChainParseError as e:
print(f'[INFO] Chain {chain.id} does not contain valid Fv: {str(e)}')
chain_new = chain.copy()
other_chains.append(chain_new.id)
model_new.add(chain_new)
pdb_io = PDB.PDBIO()
pdb_io.set_structure(model_new)
pdb_io.save(out_pdb)
if return_other_chains:
return heavy_chains, light_chains, other_chains
else:
return heavy_chains, light_chains
def main():
parser = argparse.ArgumentParser()
parser.add_argument('in_pdb', type=str)
parser.add_argument('out_pdb', type=str)
args = parser.parse_args()
renumber(args.in_pdb, args.out_pdb)
if __name__ == '__main__':
main()
|