Source code for stcrpy.tcr_methods.tcr_methods

import warnings
import requests
import os

from ..tcr_processing.TCRParser import TCRParser
from .tcr_batch_operations import batch_load_TCRs, batch_yield_TCRs



[docs]
def load_TCR(tcr_structure_file, tcr_id=None):
    tcr_parser = TCRParser()
    if tcr_id is None:
        tcr_id = tcr_structure_file.split("/")[-1].split(".")[0]
    tcr_structure = list(
        tcr_parser.get_tcr_structure(tcr_id, tcr_structure_file).get_TCRs()
    )
    if len(tcr_structure) == 1:
        return tcr_structure[0]
    return tcr_structure




[docs]
def load_TCRs(tcr_structure_files, tcr_ids=None):
    tcr_parser = TCRParser()
    if isinstance(tcr_structure_files, str):  # loading single file
        tcr_id = tcr_structure_files.split("/")[-1].split(".")[
            0
        ]  # set tcr_id to file name without extension
        if tcr_ids is not None:
            if not isinstance(tcr_ids, str):
                warnings.warn(f"TCR ID: {tcr_ids} for a single TCR should be type str.")
            tcr_id = tcr_ids

        tcr_structure = tcr_parser.get_tcr_structure(tcr_id, tcr_structure_files)
        return list(tcr_structure.get_TCRs())

    if len(tcr_structure_files) > 10:
        warnings.warn(
            "Loading more than 10 TCR structure objects into memory. Consider applying generator methods to reduce memory load."
        )

    if tcr_ids is not None:
        if len(tcr_structure_files) == len(tcr_ids):
            return batch_load_TCRs(dict(zip(tcr_ids, tcr_structure_files)))
        else:
            warnings.warn(
                f"Length of TCR IDs {len(tcr_ids)} does not match length of files {len(tcr_structure_files)}. TCR IDs reverted to default."
            )
    return batch_load_TCRs(tcr_structure_files)




[docs]
def yield_TCRs(tcr_structure_files, tcr_ids=None):
    tcr_parser = TCRParser()
    if isinstance(tcr_structure_files, str):  # loading single file
        tcr_id = tcr_structure_files.split("/")[-1].split(".")[
            0
        ]  # set tcr_id to file name without extension
        if tcr_ids is not None:
            if not isinstance(tcr_ids, str):
                warnings.warn(f"TCR ID: {tcr_ids} for a single TCR should be type str.")
            tcr_id = tcr_ids

        tcr_structure = tcr_parser.get_tcr_structure(tcr_id, tcr_structure_files)
        return list(tcr_structure.get_TCRs())

    if tcr_ids is not None:
        if len(tcr_structure_files) == len(tcr_ids):
            return batch_yield_TCRs(dict(zip(tcr_ids, tcr_structure_files)))
        else:
            warnings.warn(
                f"Length of TCR IDs {len(tcr_ids)} does not match length of files {len(tcr_structure_files)}. TCR IDs reverted to default."
            )
    return batch_yield_TCRs(tcr_structure_files)




[docs]
def fetch_TCR(pdb_id: str):
    """
    Fetches and parses a T-cell receptor (TCR) structure from the STCRDab or RCSB PDB databases.

    The function first attempts to download a PDB file from the STCRDab database.
    If the PDB file is not found, it falls back to downloading a CIF file from RCSB PDB.
    The downloaded file is then parsed using `TCRParser` to extract TCR structures.

    Parameters:
        pdb_id (str): The PDB identifier of the structure to be fetched.

    Returns:
        - A single TCR structure if exactly one is found.
        - A list of TCR structures if multiple are found.
        - None if no TCRs are identified (with a `UserWarning` issued).

    Raises:
        - A warning if no TCR structures are found in the downloaded file.
        - Prints an error message if the file cannot be downloaded.

    Notes:
        - STCRDab returns an error message if the requested PDB ID does not exist.
        - The function temporarily saves the downloaded file and deletes it after parsing.

    Example:
        tcr = fetch_TCR("6eqa")

    """

    stcrdab_base_url = "https://opig.stats.ox.ac.uk/webapps/stcrdab-stcrpred/pdb/"
    pdb_base_url = "https://files.rcsb.org/download/"

    filename = f"{pdb_id.upper()}.pdb"

    url = stcrdab_base_url + pdb_id.lower()
    TCR_FOUND = False

    try:
        response = requests.get(url, stream=True, timeout=10)
        if response.status_code == 200:
            with open(filename, "wb") as file:
                for chunk in response.iter_content(chunk_size=1024):
                    file.write(chunk)
                if (
                    not b"does not exist" in chunk
                ):  # STCRDab returns '$PDB does not exist for downloading' if PDB code not found in database
                    TCR_FOUND = True

    except requests.exceptions.Timeout:
        warnings.warn(f"Request to STCRDab ({url}) timed out. Trying RCSB.")

    if not TCR_FOUND:
        if os.path.exists(filename):
            os.remove(filename)  # remove the file written with response from STCRDab

        # Request from RCSB data base
        filename = f"{pdb_id.upper()}.cif"
        url = pdb_base_url + filename
        response = requests.get(url, stream=True, timeout=10)

        if response.status_code == 200:
            with open(filename, "wb") as file:
                for chunk in response.iter_content(chunk_size=1024):
                    file.write(chunk)
        else:
            print("Failed to download file")

    tcr_parser = TCRParser()
    tcr = list(tcr_parser.get_tcr_structure(pdb_id, filename).get_TCRs())
    os.remove(filename)
    if len(tcr) == 1:
        return tcr[0]
    elif len(tcr) == 0:
        warnings.warn(f"No TCRs identified in {pdb_id}")
        return None
    else:
        return tcr