DiffAb / abnumber /position.py
luost26's picture
Update
6d34920
import copy
from typing import List, Union
from abnumber.common import _validate_chain_type, SCHEME_POSITION_TO_REGION, SCHEME_VERNIER, POS_REGEX
class Position:
"""Numbered position using a given numbering scheme
Used as a key to store Position -> Amino acid information.
Position objects are sortable according to the schema simply using ``sorted()``.
"""
def __init__(self, chain_type: str, number: int, letter: str, scheme: str):
_validate_chain_type(chain_type)
self.chain_type: str = chain_type
self.number: int = int(number)
self.letter: str = letter.strip()
self.scheme: str = scheme
self.cdr_definition: str = self.scheme
self.cdr_definition_position: int = self.number
def copy(self):
return copy.copy(self)
def _key(self):
# Note: We are not including chain_type, but just Heavy/Light flag, to keep Kappa and Lambda chain positions equal
return self.chain_type_prefix(), self.number, self.letter, self.scheme
def __repr__(self):
return f'{self.chain_type_prefix()}{self.number}{self.letter} ({self.scheme})'
def __str__(self):
return self.format()
def set_cdr_definition(self, cdr_definition: str, cdr_definition_position: int):
assert cdr_definition is not None, 'cdr_definition is required'
assert cdr_definition_position is not None, 'cdr_definition_position is required'
self.cdr_definition = cdr_definition
self.cdr_definition_position = cdr_definition_position
def format(self, chain_type=True, region=False, rjust=False, ljust=False, fillchar=' '):
"""Format Position to string
:param chain_type: Add chain type prefix (H/L)
:param region: Add region prefix (FR1, CDR1, ...)
:param rjust: Align text to the right
:param ljust: Align text to the left
:param fillchar: Characer to use for alignment padding
:return: formatted string
"""
formatted = f'{self.number}{self.letter}'
if chain_type:
formatted = f'{self.chain_type_prefix()}{formatted}'
if region:
formatted = f'{self.get_region()} {formatted}'
just = 4 + 1* int(chain_type) + 5 * int(region)
if rjust:
formatted = formatted.rjust(just, fillchar)
if ljust:
formatted = formatted.ljust(just, fillchar)
return formatted
def __hash__(self):
return self._key().__hash__()
def __eq__(self, other):
return isinstance(other, Position) and self._key() == other._key()
def __ge__(self, other):
return self == other or self > other
def __le__(self, other):
return self == other or self < other
def __lt__(self, other):
if not isinstance(other, Position):
raise TypeError(f'Cannot compare Position object with {type(other)}: {other}')
assert self.is_heavy_chain() == other.is_heavy_chain(), f'Positions do not come from the same chain: {self}, {other}'
assert self.scheme == other.scheme, 'Comparing positions in different schemes is not implemented'
return self._sort_key() < other._sort_key()
def chain_type_prefix(self):
if self.chain_type == 'H':
return 'H'
if self.chain_type in ['K', 'L']:
return 'L'
raise NotImplementedError(f'Unknown chain type "{self.chain_type}"')
def _sort_key(self):
letter_ord = ord(self.letter) if self.letter else 0
if self.scheme == 'imgt':
if self.number in [33, 61, 112]:
# position 112 is sorted in reverse
letter_ord = -letter_ord
elif self.scheme in ['chothia', 'kabat', 'aho']:
# all letters are sorted alphabetically for these schemes
pass
else:
raise NotImplementedError(f'Cannot compare positions of scheme: {self.scheme}')
return self.is_heavy_chain(), self.number, letter_ord
def get_region(self):
"""Get string name of this position's region
:return: uppercase string, one of: ``"FR1", "CDR1", "FR2", "CDR2", "FR3", "CDR3", "FR4"``
"""
if self.cdr_definition in SCHEME_POSITION_TO_REGION:
regions = SCHEME_POSITION_TO_REGION[self.cdr_definition]
else:
regions = SCHEME_POSITION_TO_REGION[f'{self.cdr_definition}_{self.chain_type}']
return regions[self.cdr_definition_position]
def is_in_cdr(self):
"""Check if given position is found in the CDR regions"""
return self.get_region().lower().startswith('cdr')
def is_in_vernier(self):
if self.cdr_definition != 'kabat':
raise NotImplementedError('Vernier zone identification is currently supported '
f'only with Kabat CDR definitions, got: {self.cdr_definition}')
return self.cdr_definition_position in SCHEME_VERNIER.get(f'{self.cdr_definition}_{self.chain_type}', [])
@classmethod
def from_string(cls, position, chain_type, scheme):
"""Create Position object from string, e.g. "H5"
Note that Positions parsed from string do not support separate CDR definitions.
"""
match = POS_REGEX.match(position.upper())
_validate_chain_type(chain_type)
expected_chain_prefix = 'H' if chain_type == 'H' else 'L'
if match is None:
raise IndexError(f'Expected position format chainNumberLetter '
f'(e.g. "{expected_chain_prefix}112A" or "112A"), got: "{position}"')
chain_prefix, number, letter = match.groups()
number = int(number)
if chain_prefix and expected_chain_prefix != chain_prefix:
raise IndexError(f'Use no prefix or "{expected_chain_prefix}" prefix for "{chain_type}" chain. '
f'Got: "{chain_prefix}".')
return cls(chain_type=chain_type, number=number, letter=letter, scheme=scheme)
def is_heavy_chain(self):
return self.chain_type == 'H'
def is_light_chain(self):
return self.chain_type in 'KL'
def sort_positions(positions: List[str], chain_type: str, scheme: str) -> List:
"""Sort position strings to correct order based on given scheme"""
has_prefix = [p.startswith('H') or p.startswith('L') for p in positions]
assert all(has_prefix) or not any(has_prefix), 'Inconsistent position prefix'
has_prefix = all(has_prefix)
position_objects = [Position.from_string(p, chain_type=chain_type, scheme=scheme) for p in positions]
return [p.format(chain_type=has_prefix) for p in sorted(position_objects)]