Source code for stcrpy.tcr_interactions.PLIPParser

import typing
import warnings
import os
import pandas as pd


from . import utils as plip_utils
from .TCRpMHC_PLIP_Model_Parser import TCRpMHC_PLIP_Model_Parser

[docs] class PLIPParser: """This class is used to parse the interactions of a TCR-pMHC complex using PLIP."""
[docs] def parse_complex( self, complex: "plip.structure.preparation.PDBComplex", tcr_pmhc_complex: typing.Union["abTCR", "gdTCR"] = None, renumbering=None, domain_assignment=None, ) -> pd.DataFrame: """ Parses PLIP profiled interactions and maps them back onto a syctpy TCR object. Args: complex (plip.structure.preparation.PDBComplex): tcr_pmhc_complex (typing.Union["abTCR", "gdTCR"], optional): _description_. Defaults to None. renumbering (_type_, optional): _description_. Defaults to None. domain_assignment (_type_, optional): _description_. Defaults to None. Returns: pd.DataFrame: _description_ """ all_interactions = [] for _, interaction_set in complex.interaction_sets.items(): for interaction in interaction_set.all_itypes: try: all_interactions.append(plip_utils.parse_interaction(interaction)) except NotImplementedError as e: print(e) continue interactions_df = self._interactions_to_dataframe(all_interactions) if renumbering is not None and len(interactions_df) > 0: self._renumber_interactions(interactions_df, renumbering) if tcr_pmhc_complex is not None: self._map_amino_acids_to_ligands(interactions_df, tcr_pmhc_complex) if domain_assignment is not None: self._assign_domains_to_chains(interactions_df, domain_assignment) return interactions_df
def _renumber_interactions(self, interactions_df, renumbering): # def imgt_number_mapping(original_idx): # imgt_insertions_to_number_map = { # "A": 1, # "B": 2, # "C": 3, # "D": 4, # "E": 5, # "F": 6, # "G": 7, # "H": 8, # "I": 9, # } # if original_idx[-1] == ' ': # return original_idx[1] # else: # return original_idx[1] + 0.1*imgt_insertions_to_number_map[original_idx] interactions_df["original_numbering"] = interactions_df.apply( lambda x: str(renumbering[x.protein_chain][(" ", x.protein_number, " ")][1]) + renumbering[x.protein_chain][(" ", x.protein_number, " ")][2].strip(), axis=1, ) return interactions_df for chain_id, renumber in renumbering.items(): for plip_idx, original_idx in renumber.items(): mask = (interactions_df.protein_chain == chain_id) & ( interactions_df.protein_number == plip_idx[1] ) if sum(mask) > 0: interactions_df[mask].loc[:, "protein_number"] = ( str(original_idx[1]) + str(original_idx[-1]) ).strip() return interactions_df def _assign_domains_to_chains(self, interactions_df, domains): chain_to_domain_mapping = {v: k for k, v in domains.items()} def assign_domain(chain_id): if chain_id in chain_to_domain_mapping: return chain_to_domain_mapping[chain_id] else: return None interactions_df["domain"] = interactions_df.protein_chain.apply(assign_domain) def _interactions_to_dataframe(self, interaction_list: list) -> pd.DataFrame: columns = [ "type", "protein_atom", "protein_chain", "protein_residue", "protein_number", "ligand_atom", "distance", "angle", "plip_id", ] interactions_as_tuples = [ interaction.to_tuple() for interaction in interaction_list ] interactions = list(zip(*interactions_as_tuples)) if len(interactions) > 0: interactions_as_dict = { columns[i]: interactions[i] for i in range(len(columns)) } return pd.DataFrame(interactions_as_dict) else: return pd.DataFrame(columns=columns) def _map_amino_acids_to_ligands( self, interactions_df: pd.DataFrame, tcr_pmhc_complex: str ): parser = TCRpMHC_PLIP_Model_Parser() _, tcr_mhc_pdb, ligand_pdb, ligand_sdf = parser.parse_tcr_pmhc_complex( tcr_pmhc_complex, delete_tmp_files=False, renumber=False ) coordinate_mapping = parser.map_amino_acids_to_ligands(ligand_pdb, ligand_sdf) if len(interactions_df) > 0: ligand_residues = interactions_df.apply( lambda x: coordinate_mapping[x.ligand_atom[0][0]], axis=1 ) interactions_df["ligand_residue"], interactions_df["ligand_number"] = map( list, zip(*ligand_residues) ) else: # return empty dataframe with appropriate columns extended_columns = list(interactions_df.columns) extended_columns.extend(["ligand_residue", "ligand_number"]) interactions_df = pd.DataFrame(columns=extended_columns) # delete temp files needed for renumbering os.remove(tcr_mhc_pdb) os.remove(ligand_pdb) os.remove(ligand_sdf) return interactions_df