import warnings
import os
import pandas as pd
from ..tcr_processing.TCRParser import TCRParser
from ..tcr_interactions.TCRInteractionProfiler import TCRInteractionProfiler
from ..tcr_geometry.TCRGeom import TCRGeom
from ..tcr_geometry.TCRGeomFiltering import DockingGeometryFilter
from ..tcr_formats.tcr_formats import get_sequences
[docs]
class TCRBatchOperator:
def __init__(self):
self._tcr_parser = TCRParser()
def _load_geometry_calculator(self):
self._geometry_calculator = TCRGeom()
def _load_geometry_filter(self):
self._geometry_filter = DockingGeometryFilter()
[docs]
def tcrs_from_file_list(self, file_list):
for file in file_list:
tcr_id = file.split("/")[-1].split(".")[0]
try:
for tcr in self._tcr_parser.get_tcr_structure(tcr_id, file).get_TCRs():
yield tcr
except Exception as e:
warnings.warn(f"Loading {file} failed with error {str(e)}")
yield None
[docs]
def tcrs_from_file_dict(self, file_dict):
for tcr_id, file in file_dict.items():
try:
for tcr in self._tcr_parser.get_tcr_structure(tcr_id, file).get_TCRs():
yield tcr_id, tcr
except Exception as e:
warnings.warn(f"Loading {tcr_id}: {file} failed with error {str(e)}")
yield None
[docs]
def get_TCR_pMHC_interactions(self, tcr_generator, renumber=True, save_as_csv=None):
interaction_analysis_dict = {}
for tcr in tcr_generator:
if tcr is None: # handles case where file could not be parsed in generator
continue
tcr_id = f"{tcr.parent.parent.id}_{tcr.id}"
if isinstance(
tcr, tuple
): # handle case where tcr is passed as (key, value)
tcr_id, tcr = tcr
try:
interaction_analysis_dict[tcr_id] = tcr.profile_peptide_interactions()
except Exception as e:
warnings.warn(
f"Interactions profile failed for {tcr} with error {str(e)}"
)
interactions_df = pd.concat(
interaction_analysis_dict.values(),
keys=interaction_analysis_dict.keys(),
axis=0,
)
if save_as_csv is not None:
interactions_df.to_csv(save_as_csv)
return interactions_df
[docs]
def get_TCR_geometry(self, tcr_generator, mode="rudolph", save_as_csv=None):
geometries_dict = {}
for tcr in tcr_generator:
if tcr is None: # handles case where file could not be parsed in generator
continue
if isinstance(
tcr, tuple
): # handle case where tcr is passed as (key, value)
tcr_id, tcr = tcr
else:
tcr_id = f"{tcr.parent.parent.id}_{tcr.id}"
try:
geometries_dict[tcr_id] = tcr.calculate_docking_geometry(
mode=mode, as_df=True
)
except Exception as e:
warnings.warn(
f"Geometry calculation failed for {tcr} with error {str(e)}"
)
geometries_df = pd.concat(geometries_dict).droplevel(1)
if save_as_csv is not None:
geometries_df.to_csv(save_as_csv)
return geometries_df
[docs]
def get_germlines_and_alleles(self, tcr_generator, save_as_csv=None):
germlines_and_alleles_dict = {}
for tcr in tcr_generator:
if tcr is None: # handles case where file could not be parsed in generator
continue
tcr_id = f"{tcr.parent.parent.id}_{tcr.id}"
if isinstance(
tcr, tuple
): # handle case where tcr is passed as (key, value)
tcr_id, tcr = tcr
germlines_and_alleles_dict[tcr_id] = tcr.get_germlines_and_alleles()
germlines_and_alleles_df = pd.DataFrame(germlines_and_alleles_dict).T
if save_as_csv is not None:
germlines_and_alleles_df.to_csv(save_as_csv)
return germlines_and_alleles_df
[docs]
def full_analysis(self, tcr_generator, geometry_mode="rudolph", save_dir=None):
from tqdm import tqdm
germlines_and_alleles_dict = {}
geometries_dict = {}
interaction_analysis_dict = {}
for tcr in tqdm(tcr_generator):
if tcr is None: # handles case where file could not be parsed in generator
continue
if isinstance(
tcr, tuple
): # handle case where tcr is passed as (key, value)
tcr_id, tcr = tcr
else:
tcr_id = f"{tcr.parent.parent.id}_{tcr.id}"
try:
germlines_and_alleles_dict[tcr_id] = tcr.get_germlines_and_alleles()
except Exception as e:
warnings.warn(
f"Germline and allele retrieval failed for {tcr} with error {str(e)}"
)
try:
geometries_dict[tcr_id] = tcr.calculate_docking_geometry(
mode=geometry_mode, as_df=True
)
except Exception as e:
warnings.warn(
f"Geometry calculation failed for {tcr} with error {str(e)}"
)
try:
interaction_analysis_dict[tcr_id] = tcr.profile_peptide_interactions()
except Exception as e:
warnings.warn(
f"Interaction profiling failed for {tcr} with error {str(e)}"
)
germlines_and_alleles_df = pd.DataFrame(germlines_and_alleles_dict).T
geometries_df = pd.concat(geometries_dict).droplevel(1)
interactions_df = pd.concat(
interaction_analysis_dict.values(),
keys=interaction_analysis_dict.keys(),
axis=0,
)
if save_dir is not None:
geometries_df.to_csv(os.path.join(save_dir, "geometries.csv"))
germlines_and_alleles_df.to_csv(
os.path.join(save_dir, "germlines_and_alleles.csv")
)
interactions_df.to_csv(os.path.join(save_dir, "interactions.csv"))
return germlines_and_alleles_df, geometries_df, interactions_df
[docs]
def batch_load_TCRs(tcr_files):
if isinstance(tcr_files, dict):
return dict(TCRBatchOperator().tcrs_from_file_dict(tcr_files))
else:
return list(TCRBatchOperator().tcrs_from_file_list(tcr_files))
[docs]
def batch_yield_TCRs(tcr_files):
if isinstance(tcr_files, dict):
return TCRBatchOperator().tcrs_from_file_dict(tcr_files)
else:
return TCRBatchOperator().tcrs_from_file_list(tcr_files)
[docs]
def get_TCR_interactions(tcr_files, renumber=True, save_as_csv=None):
batch_ops = TCRBatchOperator()
if isinstance(tcr_files, list):
tcr_generator = batch_ops.tcrs_from_file_list(tcr_files)
if isinstance(tcr_files, dict):
tcr_generator = batch_ops.tcrs_from_file_dict(tcr_files)
return batch_ops.get_TCR_pMHC_interactions(
tcr_generator, renumber=renumber, save_as_csv=save_as_csv
)
[docs]
def get_TCR_geometry(tcr_files, mode="rudolph", save_as_csv=None):
batch_ops = TCRBatchOperator()
if isinstance(tcr_files, list):
tcr_generator = batch_ops.tcrs_from_file_list(tcr_files)
if isinstance(tcr_files, dict):
tcr_generator = batch_ops.tcrs_from_file_dict(tcr_files)
return batch_ops.get_TCR_geometry(tcr_generator, mode=mode, save_as_csv=save_as_csv)
[docs]
def get_germlines_and_alleles(tcr_files, save_as_csv=None):
batch_ops = TCRBatchOperator()
if isinstance(tcr_files, list):
tcr_generator = batch_ops.tcrs_from_file_list(tcr_files)
if isinstance(tcr_files, dict):
tcr_generator = batch_ops.tcrs_from_file_dict(tcr_files)
return batch_ops.get_germlines_and_alleles(tcr_generator, save_as_csv=save_as_csv)
[docs]
def analyse_tcrs(tcr_files, save_dir=None):
batch_ops = TCRBatchOperator()
if isinstance(tcr_files, list):
tcr_generator = batch_ops.tcrs_from_file_list(tcr_files)
if isinstance(tcr_files, dict):
tcr_generator = batch_ops.tcrs_from_file_dict(tcr_files)
return batch_ops.full_analysis(tcr_generator, save_dir=save_dir)