Source code for stcrpy.tcr_geometry.TCRCoM

import os
import Bio
from typing import Union
import warnings
import numpy as np

from ..tcr_processing.TCRParser import TCRParser
from ..tcr_processing.TCRIO import TCRIO
from ..tcr_processing import abTCR, MHCchain


# Some of this code is adapted and refactored from https://github.com/EsamTolba/TCR-CoM/
# Please see the TCRCoM_LICENSE for the license that applies to those code sections.

[docs] class TCRCoM: def __init__(self): """Abstract class for calculating TCR centre of mass after aligning TCR:pMHC complex to reference MHC structure.""" self.set_reffile() tcr_parser = TCRParser() self.ref_model = list( tcr_parser.get_tcr_structure("reference", self.reffile).get_TCRs() )[0] self.set_reference_residues()
[docs] def set_reffile(self): """Super method for setting MHC reference file. Raises: NotImplementedError """ raise NotImplementedError( "TCRCom cannot be insantiated directly, instantiate its subclass" )
[docs] def set_reference_residues(self): """Set TCR and MHC reference residues.""" self.set_tcr_reference() self.set_mhc_reference()
[docs] def set_tcr_reference(self): """Set TCR variable domain residues in reference model as residues numbered 1 to 121 for VA and 1 to 126 for VB.""" self.reference_VA_residues = [ r for r in self.ref_model.get_VA().get_residues() if r.get_id()[1] >= 1 and r.get_id()[1] <= 121 ] self.reference_VB_residues = [ r for r in self.ref_model.get_VB().get_residues() if r.get_id()[1] >= 1 and r.get_id()[1] <= 126 ]
[docs] def set_mhc_reference(self): """Super method for setting MHC reference residues for superposition. Overwritten by MHC class sepcific methods. Raises: NotImplementedError """ raise NotImplementedError( "TCRCom cannot be insantiated directly, instantiate its subclass" )
[docs] def get_filtered_TCR_residues(self, tcr: "TCR") -> "tuple[list[Bio.PDB.Residue]]": """Get variable domain residues of query TCR and filter out those without a counterpart in the reference TCR. Args: tcr (TCR): TCR structure object Returns: tuple[list[Bio.PDB.Residue]]: VA_residues, VB_residues. """ tcr_A_residues = [ tcr.get_VA()[r.get_id()] for r in self.reference_VA_residues if r.get_id() in tcr.get_VA() ] tcr_B_residues = [ tcr.get_VB()[r.get_id()] for r in self.reference_VB_residues if r.get_id() in tcr.get_VB() ] return tcr_A_residues, tcr_B_residues
[docs] def center_of_mass( self, entity: "Union[Bio.PDB.Entity.Entity, list[Bio.PDB.Atom.Atom]]", geometric: bool = False, ) -> np.array: """Calculate the mass weighted or purely geometric centre of mass of an entity or a list of atoms. Args: entity (Union[Bio.PDB.Entity.Entity, list[Bio.PDB.Atom.Atom]]): Structure entity whose centre of mass will be calculated geometric (bool, optional): Whether to calculate geometric mean. Defaults to False. Raises: ValueError: Checks input type to ensure atoms can be retrieved. ValueError: Unknown atoms. Returns: np.array: centre of mass """ # Structure, Model, Chain, Residue if isinstance(entity, Bio.PDB.Entity.Entity): atom_list = entity.get_atoms() # List of Atoms elif hasattr(entity, "__iter__") and [x for x in entity if x.level == "A"]: atom_list = entity else: raise ValueError( "Center of Mass can only be calculated from the following objects:\n" "Structure, Model, Chain, Residue, list of Atoms." ) masses, positions = zip(*[(atom.mass, atom.coord) for atom in atom_list]) positions = np.asarray(positions) if "ukn" in set(masses) and not geometric: raise ValueError( "Some Atoms don't have an element assigned.\n" "Try adding them manually or calculate the geometrical center of mass instead" ) if geometric: return positions.sum(axis=0) / len(atom_list) else: return np.matmul(np.asarray(masses), positions) / len(atom_list)
[docs] def add_com( self, mhc_com: np.array, tcr_com: np.array, VA_com: np.array, VB_com: np.array, tcr: "TCR", ) -> "TCR": """ Function to add pseudoatoms at MHC-CoM, TCR-CoM, and XYZ axis to the output PDB file Args: mhc_com (np.array): MHC centre of mass tcr_com (np.array): TCR centre of mass VA_com (np.array): Alpha chain centre of mass VB_com (np.array): Beta chain entre of mass tcr (TCR): TCR structure object Returns: TCR: Copy of the original TCR strucutre object with added pseudo-atoms. """ new_structure = tcr.copy() # mhc com mhc_com_chain = "X" new_structure.add(Bio.PDB.Chain.Chain(mhc_com_chain)) res_id = (" ", 1, " ") new_residue = Bio.PDB.Residue.Residue(res_id, "MCM", " ") new_atom = Bio.PDB.Atom.Atom("C", mhc_com, 0, 0.0, " ", "C", 1, "C") new_residue.add(new_atom) new_structure.child_dict[mhc_com_chain].add(new_residue) # tcr com tcr_com_chain = "Y" new_structure.add(Bio.PDB.Chain.Chain(tcr_com_chain)) pseudo_atom_ids = ["TCM", "ACM", "BCM"] tcr_com_list = [tcr_com, VA_com, VB_com] for i in range(3): res_id = (" ", i + 1, " ") new_residue = Bio.PDB.Residue.Residue(res_id, pseudo_atom_ids[i], " ") new_atom = Bio.PDB.Atom.Atom("C", tcr_com_list[i], 0, 0.0, " ", "C", 1, "C") new_residue.add(new_atom) new_structure.child_dict[tcr_com_chain].add(new_residue) # X,Y,Z atoms pos = [[50, 0, 0], [0, 50, 0], [0, 0, 50]] resn = ["X", "Y", "Z"] xyz_chain = "Z" new_structure.add(Bio.PDB.Chain.Chain(xyz_chain)) for i in [0, 1, 2]: res_id = (" ", i + 1, " ") new_residue = Bio.PDB.Residue.Residue(res_id, resn[i], " ") new_atom = Bio.PDB.Atom.Atom("O", pos[i], 0, 0.0, " ", "O", 1, "O") new_residue.add(new_atom) new_structure.child_dict[xyz_chain].add(new_residue) return new_structure
[docs] def calculate_centres_of_mass( self, tcr: abTCR, save_aligned_as: str = None, ) -> tuple[np.array]: """Calculate the TCR and MHC centres of mass of an stcrpy TCR structure object. Args: tcr (abTCR): TCR structure object save_aligned_as (str): Path to same alignment to. If None or False alignment is not saved. Defaults to None. Raises: NotImplementedError: Alpha Beta TCR compatible only, Gamma Delta TCRs not implemented. Returns: tuple[np.array]: tcr_com, mhc_com, tcr_VA_com, tcr_VB_com """ assert len(tcr.get_MHC()) > 0, "No MHC associated with TCR" if not isinstance(tcr, abTCR): raise NotImplementedError( f"TCR MHC geometry only implemented for abTCR types, not {type(tcr)}" ) mhc_atoms = [res["CA"] for res in self.get_filtered_MHC_residues(tcr)] ref_mhc_atoms = [ res["CA"] for res in self.reference_MHC_residues if (res.parent.chain_type, res.get_id()) in [(a.parent.parent.chain_type, a.parent.get_id()) for a in mhc_atoms] ] superimposer = Bio.PDB.Superimposer() superimposer.set_atoms(ref_mhc_atoms, mhc_atoms) self.mhc_alignment_transform = (x.astype("f") for x in superimposer.rotran) superimposer.apply(tcr.parent.get_atoms()) mhc_com = self.center_of_mass(mhc_atoms, geometric=True) tcr_VA_residues, tcr_VB_residues = self.get_filtered_TCR_residues(tcr) tcr_VA_atoms = [res["CA"] for res in tcr_VA_residues] tcr_VA_com = self.center_of_mass(tcr_VA_atoms, geometric=True) tcr_VB_atoms = [res["CA"] for res in tcr_VB_residues] tcr_VB_com = self.center_of_mass(tcr_VB_atoms, geometric=True) tcr_com = self.center_of_mass(tcr_VA_atoms + tcr_VB_atoms, geometric=True) if save_aligned_as: if not isinstance(save_aligned_as, str): save_aligned_as = os.path.join( os.getcwd(), f"{tcr.parent.parent.id}_{tcr.id}.pdb" ) aligned_tcr = self.add_com(mhc_com, tcr_com, tcr_VA_com, tcr_VB_com, tcr) io = TCRIO() io.save(aligned_tcr, save_as=save_aligned_as) # com_distance = tcr_com - mhc_com # r = np.sqrt(np.sum(np.square(com_distance))) # theta = np.degrees(np.arctan2(com_distance[1], com_distance[0])) # phi = np.degrees(np.arccos(com_distance[2] / r)) # return r, theta, phi return tcr_com, mhc_com, tcr_VA_com, tcr_VB_com
[docs] class MHCI_TCRCoM(TCRCoM): def __init__(self): """TCRCoM module for MHC Class I complexes.""" super().__init__()
[docs] def set_reffile(self): """Sets reference file for MHC class I structures""" self.reffile = os.path.join( os.path.dirname(__file__), "reference_data/dock_reference_1_imgt_numbered.pdb", )
[docs] def set_mhc_reference(self): """Set class I reference MHC residues""" mhc = self.ref_model.get_MHC()[0].get_MH1() self.reference_MHC_residues = [ r for r in mhc.get_residues() if r.get_id()[1] >= 1 and r.get_id()[1] <= 179 ]
[docs] def get_filtered_MHC_residues(self, tcr: "TCR") -> list[Bio.PDB.Residue]: """Retrieve MHC residues from query TCR and filter out those whose counterpart is not found in reference. Args: tcr (TCR): TCR structure object associated with MHC Class I. Returns: list[Bio.PDB.Residue]: filtered_MHC_residues """ mhc = tcr.get_MHC()[0] if not isinstance(mhc, MHCchain): # handle single MHC chain case mhc = mhc.get_MH1() filtered_MHC_residues = [ mhc[ref_res.get_id()] for ref_res in self.reference_MHC_residues if ref_res.get_id() in mhc ] return filtered_MHC_residues
[docs] class MHCII_TCRCoM(TCRCoM): def __init__(self): """TCRCoM module for MHC Class II complexes.""" super().__init__()
[docs] def set_reffile(self): """Sets reference file for MHC class II structures""" self.reffile = os.path.join( os.path.dirname(__file__), "reference_data/dock_reference_2_imgt_numbered.pdb", )
[docs] def set_mhc_reference(self): """Set class II reference MHC residues""" mhc = self.ref_model.get_MHC()[0] self.reference_MHC_residues = [ r for r in mhc.get_GA().get_residues() if r.get_id()[1] >= 1 and r.get_id()[1] <= 88 ] self.reference_MHC_residues.extend( [ r for r in mhc.get_GB().get_residues() if r.get_id()[1] >= 1 and r.get_id()[1] <= 87 ] )
[docs] def get_filtered_MHC_residues(self, tcr: "TCR") -> list[Bio.PDB.Residue]: """Retrieve MHC residues from query TCR and filter out those whose counterpart is not found in reference. Args: tcr (TCR): TCR structure object associated with MHC Class II. Returns: list[Bio.PDB.Residue]: filtered_MHC_residues """ mhc = tcr.get_MHC()[0] if hasattr(mhc, "get_GA"): filtered_MHC_residues = [ mhc.get_GA()[ref_res.get_id()] for ref_res in self.reference_MHC_residues if ref_res.parent.chain_type == "GA" and ref_res.get_id() in mhc.get_GA() ] else: warnings.warn(f"No GA chain found for MHC class II: {mhc}") if hasattr(mhc, "get_GA"): filtered_MHC_residues.extend( [ mhc.get_GB()[ref_res.get_id()] for ref_res in self.reference_MHC_residues if ref_res.parent.chain_type == "GB" and ref_res.get_id() in mhc.get_GB() ] ) else: warnings.warn(f"No GB chain found for MHC class II: {mhc}") return filtered_MHC_residues