luost26's picture
Update
753e275
raw
history blame
2.83 kB
import argparse
import abnumber
from Bio import PDB
from Bio.PDB import Model, Chain, Residue, Selection
from Bio.Data import SCOPData
from typing import List, Tuple
def biopython_chain_to_sequence(chain: Chain.Chain):
residue_list = Selection.unfold_entities(chain, 'R')
seq = ''.join([SCOPData.protein_letters_3to1.get(r.resname, 'X') for r in residue_list])
return seq, residue_list
def assign_number_to_sequence(seq):
abchain = abnumber.Chain(seq, scheme='chothia')
offset = seq.index(abchain.seq)
if not (offset >= 0):
raise ValueError(
'The identified Fv sequence is not a subsequence of the original sequence.'
)
numbers = [None for _ in range(len(seq))]
for i, (pos, aa) in enumerate(abchain):
resseq = pos.number
icode = pos.letter if pos.letter else ' '
numbers[i+offset] = (resseq, icode)
return numbers, abchain
def renumber_biopython_chain(chain_id, residue_list: List[Residue.Residue], numbers: List[Tuple[int, str]]):
chain = Chain.Chain(chain_id)
for residue, number in zip(residue_list, numbers):
if number is None:
continue
residue = residue.copy()
new_id = (residue.id[0], number[0], number[1])
residue.id = new_id
chain.add(residue)
return chain
def renumber(in_pdb, out_pdb, return_other_chains=False):
parser = PDB.PDBParser(QUIET=True)
structure = parser.get_structure(None, in_pdb)
model = structure[0]
model_new = Model.Model(0)
heavy_chains, light_chains, other_chains = [], [], []
for chain in model:
try:
seq, reslist = biopython_chain_to_sequence(chain)
numbers, abchain = assign_number_to_sequence(seq)
chain_new = renumber_biopython_chain(chain.id, reslist, numbers)
print(f'[INFO] Renumbered chain {chain_new.id} ({abchain.chain_type})')
if abchain.chain_type == 'H':
heavy_chains.append(chain_new.id)
elif abchain.chain_type in ('K', 'L'):
light_chains.append(chain_new.id)
except abnumber.ChainParseError as e:
print(f'[INFO] Chain {chain.id} does not contain valid Fv: {str(e)}')
chain_new = chain.copy()
other_chains.append(chain_new.id)
model_new.add(chain_new)
pdb_io = PDB.PDBIO()
pdb_io.set_structure(model_new)
pdb_io.save(out_pdb)
if return_other_chains:
return heavy_chains, light_chains, other_chains
else:
return heavy_chains, light_chains
def main():
parser = argparse.ArgumentParser()
parser.add_argument('in_pdb', type=str)
parser.add_argument('out_pdb', type=str)
args = parser.parse_args()
renumber(args.in_pdb, args.out_pdb)
if __name__ == '__main__':
main()