File size: 2,826 Bytes
753e275
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import argparse
import abnumber
from Bio import PDB
from Bio.PDB import Model, Chain, Residue, Selection
from Bio.Data import SCOPData
from typing import List, Tuple


def biopython_chain_to_sequence(chain: Chain.Chain):
    residue_list = Selection.unfold_entities(chain, 'R')
    seq = ''.join([SCOPData.protein_letters_3to1.get(r.resname, 'X') for r in residue_list])
    return seq, residue_list


def assign_number_to_sequence(seq):
    abchain = abnumber.Chain(seq, scheme='chothia')
    offset = seq.index(abchain.seq)
    if not (offset >= 0):
        raise ValueError(
            'The identified Fv sequence is not a subsequence of the original sequence.'
        )

    numbers = [None for _ in range(len(seq))]
    for i, (pos, aa) in enumerate(abchain):
        resseq = pos.number
        icode = pos.letter if pos.letter else ' '
        numbers[i+offset] = (resseq, icode)
    return numbers, abchain


def renumber_biopython_chain(chain_id, residue_list: List[Residue.Residue], numbers: List[Tuple[int, str]]):
    chain = Chain.Chain(chain_id)
    for residue, number in zip(residue_list, numbers):
        if number is None:
            continue
        residue = residue.copy()
        new_id = (residue.id[0], number[0], number[1])
        residue.id = new_id
        chain.add(residue)
    return chain


def renumber(in_pdb, out_pdb, return_other_chains=False):
    parser = PDB.PDBParser(QUIET=True)
    structure = parser.get_structure(None, in_pdb)
    model = structure[0]
    model_new = Model.Model(0)

    heavy_chains, light_chains, other_chains = [], [], []

    for chain in model:
        try:
            seq, reslist = biopython_chain_to_sequence(chain)
            numbers, abchain = assign_number_to_sequence(seq)
            chain_new = renumber_biopython_chain(chain.id, reslist, numbers)
            print(f'[INFO] Renumbered chain {chain_new.id} ({abchain.chain_type})')
            if abchain.chain_type == 'H':
                heavy_chains.append(chain_new.id)
            elif abchain.chain_type in ('K', 'L'):
                light_chains.append(chain_new.id)
        except abnumber.ChainParseError as e:
            print(f'[INFO] Chain {chain.id} does not contain valid Fv: {str(e)}')
            chain_new = chain.copy()
            other_chains.append(chain_new.id)
        model_new.add(chain_new)

    pdb_io = PDB.PDBIO()
    pdb_io.set_structure(model_new)
    pdb_io.save(out_pdb)
    if return_other_chains:
        return heavy_chains, light_chains, other_chains
    else:
        return heavy_chains, light_chains


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('in_pdb', type=str)
    parser.add_argument('out_pdb', type=str)
    args = parser.parse_args()

    renumber(args.in_pdb, args.out_pdb)

if __name__ == '__main__':
    main()