Source code for stcrpy.tcr_processing.Entity

"""
Created on 9 May 2017
@author: leem

A modified Entity class based on SAbDab's ABDB.AbPDB and Bio.PDB's entity

"""

import sys
import io
from copy import copy
from textwrap import wrap

import Bio
import Bio.PDB
from Bio.Data.IUPACData import atom_weights
from .Select import select_all

_ATOM_FORMAT_STRING = (
    "%s%5i %-4s%c%3s %c%4i%c   %8.3f%8.3f%8.3f%6.2f%6.2f      %4s%2s%2s\n"
)


[docs] class Entity(Bio.PDB.Entity.Entity): """ A modified entity object allows for direct writing of coordinates. """ def _get_atom_line(self, atom, atom_number=None, charge=" "): """ Modified from TCRDB.Bio.PDB.PDBIO """ hetfield, resseq, icode = atom.parent.get_id() resname = atom.parent.get_resname() segid = atom.parent.get_segid() if atom_number is None: atom_number = atom.serial_number chain_id = atom.parent.parent.get_id() if hetfield != " ": record_type = "HETATM" else: record_type = "ATOM " if atom.element: element = atom.element.strip().upper() if element.capitalize() not in atom_weights: raise ValueError("Unrecognised element %r" % atom.element) element = element.rjust(2) else: element = " " name = atom.get_fullname() altloc = atom.get_altloc() x, y, z = atom.get_coord() bfactor = atom.get_bfactor() occupancy = atom.get_occupancy() args = ( record_type, atom_number, name, altloc, resname, chain_id, resseq, icode, x, y, z, occupancy, bfactor, segid, element, charge, ) return _ATOM_FORMAT_STRING % args def _get_output_string(self, selection, n): """ Method to get the atom lines of the entity's children. Args: selection: Selector object from TcrPDB.Select or inherited class. n: An integer value to number the current atom with. If this is False the original numbering is used from the pdb file. """ output_string = "" for child in self.get_list(): if selection.accept(child): if child.level == "A": output_string += self._get_atom_line(child, atom_number=n) if n: n += 1 else: output_string_add, n = child._get_output_string(selection, n) output_string += output_string_add return output_string, n # TODO
[docs] def save(self, output=sys.stdout, renumber=True, selection=False, remarks=True): """ Save the coordinates of the entity. Example: entity.save("path/to/file/filename.pdb") residue.save( "residue1.pdb" ) Args: output: Where to write coordinates to. Should be an an open file, string or sys.stdout. By default the output is written to stdout renumber: Flag whether to renumber the atoms to IMGT scheme Default is to renumber the atoms so that the first is 1 etc. Use renumber = False to retain the original atom numbering from the pdb file selection: Provide a selector object to select which children of the entity should be outputted. Selection should be a selector object from TcrPDB.Select. Some basic selector classes are provided in the module. More complex classes can be created by inheriting from these. If selection = False (default) all atoms in the entity are output remarks: Flag to print out remarks generated by TcrPDB. Default TRUE """ def ag_chain_and_type(ags): if ags: chains, types = [], [] for ag in ags: if ag.level == "C": # peptide/protein/nucleic acid. chains.append(ag.id) types.append(ag.type) elif ag.level == "R": # hapten / single carb chains.append(ag.parent.id) types.append(ag.type) elif ag.level == "F": # carbs try: chains.append(ag.child_list[0].parent.id) types.append(ag.type) except IndexError: chains.append("UNKNOWN") types.append("UNKNOWN") else: chains.append("UNKNOWN") types.append("UNKNOWN") return ";".join(chains), ";".join(types) else: return None, None if renumber: n = 1 else: n = None if not selection: selection = select_all() if self.level != "TS" or (self.level == "TS" and len(self.child_list) == 1): output_string, n_atoms = self._get_output_string(selection, n) elif self.level == "TS": # output method for NMR structures output_string, n_atoms = "", 0 # sort models by model id self.child_list = sorted(self.child_list, key=lambda z: z.id) for mod in self.child_list: number = ("%d" % mod.id).rjust(9) # space by 9 for models output_string += "MODEL%s\n" % number string, n_atoms = mod._get_output_string(selection, n_atoms + 1) output_string += string output_string += "ENDMDL\n" n_atoms -= 1 remark_string = "" if remarks: remark_string = ( "REMARK 5 IMGT RENUMBERED STRUCTURE %s GENERATED BY STCRDAB\n" % str(self.id).upper() ) remark_string += ( "REMARK 5 TCR CHAINS ARE RENUMBERED IN THE VARIABLE REGIONS ONLY\n" ) remark_string += "REMARK 5 MHC CHAINS ARE RENUMBERED IN THE G DOMAINS OR FOR B2M-GLOBULIN\n" remark_string += "REMARK 5 NON-TCR and NON-MHC CHAINS ARE LEFT WITH RESIDUE IDS AS IN PDB\n" p = self i = 0 while i < 6: # only try and go up residue,chain, holder, model, structure if hasattr(p, "warnings"): # the entity is the top structure for TCR in p.get_TCRs(): antigen_chain, antigen_type = ag_chain_and_type(TCR.antigen) if TCR.MHC: mh_chains = TCR.MHC[0].get_id() else: mh_chains = "" ch0, ch1 = [(c.chain_type, c.id) for c in TCR.get_chains()] remark_string += ( "REMARK 5 PAIRED_%s %sCHAIN=%s %sCHAIN=%s MHCCHAINS=%s AGCHAIN=%s AGTYPE=%s\n" % ( TCR.get_TCR_type(), ch0[0], ch0[1], ch1[0], ch1[1], mh_chains, antigen_chain, antigen_type, ) ) for TR_chain in p.get_unpaired_TCRchains(): antigen_chain, antigen_type = ag_chain_and_type( TR_chain.antigen ) if TR_chain.get_MHC(): mh_chains = TR_chain.get_MHC()[0].get_id() else: mh_chains = "" remark_string += ( "REMARK 5 SINGLE %sCHAIN=%s MHCCHAINS=%s AGCHAIN=%s AGTYPE=%s\n" % ( TR_chain.chain_type, TR_chain.id, mh_chains, antigen_chain, antigen_type, ) ) for warning in str(p.warnings).split("\n"): if warning: remark_string += ( "\n".join( [ "REMARK 5 " + w.upper() for w in wrap(warning, width=60) ] ) + "\n" ) break if p: p = p.get_parent() i += 1 else: remark_string = "" break output_string = remark_string.upper() + output_string if output is sys.stdout: output.write(output_string) elif type(output) is str: with open(output, "w") as out: out.write(output_string) elif isinstance(output, io.IOBase): output.write(output_string)
[docs] def transform(self, rot, tran): """ Apply rotation and translation to the atomic coordinates. Example: >>> rotation=rotmat(pi, Vector(1,0,0)) >>> translation=array((0,0,1), 'f') >>> entity.transform(rotation, translation) Args: rot: A right multiplying rotation matrix (3x3 Numeric array) tran: the translation vector (size 3 Numeric array) """ for o in self.get_list(): o.transform(rot, tran)
[docs] def copy(self): """ Copy has been played with a bit. For my purposes the version in 1.61 did not work as explicit copying of the child list meant that the child objects became referenced to both self and shallow. This may be due to overriding the residue and chain classes so may not be a bug in biopython. When copying the child_list in the loop, I use the list to iterate over instead of the dictionary. This preserves the ordering of the children. """ shallow = copy(self) shallow.child_list = ( [] ) # copy(self.child_list) # I have removed this explicit copy. shallow.child_dict = {} # copy(self.child_dict) shallow.xtra = copy(self.xtra) shallow.detach_parent() # Order preserving copy for child in self.get_list(): assert ( child.get_id() in self.child_dict ), "child id not in the child_dict keys: this is unexpected and bad :(" shallow.add(child.copy()) return shallow