"""
Created on 9 May 2017
@author: leem
A modified Entity class based on SAbDab's ABDB.AbPDB and Bio.PDB's entity
"""
import sys
import io
from copy import copy
from textwrap import wrap
import Bio
import Bio.PDB
from Bio.Data.IUPACData import atom_weights
from .Select import select_all
_ATOM_FORMAT_STRING = (
"%s%5i %-4s%c%3s %c%4i%c %8.3f%8.3f%8.3f%6.2f%6.2f %4s%2s%2s\n"
)
[docs]
class Entity(Bio.PDB.Entity.Entity):
"""
A modified entity object allows for direct writing of coordinates.
"""
def _get_atom_line(self, atom, atom_number=None, charge=" "):
"""
Modified from TCRDB.Bio.PDB.PDBIO
"""
hetfield, resseq, icode = atom.parent.get_id()
resname = atom.parent.get_resname()
segid = atom.parent.get_segid()
if atom_number is None:
atom_number = atom.serial_number
chain_id = atom.parent.parent.get_id()
if hetfield != " ":
record_type = "HETATM"
else:
record_type = "ATOM "
if atom.element:
element = atom.element.strip().upper()
if element.capitalize() not in atom_weights:
raise ValueError("Unrecognised element %r" % atom.element)
element = element.rjust(2)
else:
element = " "
name = atom.get_fullname()
altloc = atom.get_altloc()
x, y, z = atom.get_coord()
bfactor = atom.get_bfactor()
occupancy = atom.get_occupancy()
args = (
record_type,
atom_number,
name,
altloc,
resname,
chain_id,
resseq,
icode,
x,
y,
z,
occupancy,
bfactor,
segid,
element,
charge,
)
return _ATOM_FORMAT_STRING % args
def _get_output_string(self, selection, n):
"""
Method to get the atom lines of the entity's children.
Args:
selection: Selector object from TcrPDB.Select or inherited class.
n: An integer value to number the current atom with.
If this is False the original numbering is used from the pdb file.
"""
output_string = ""
for child in self.get_list():
if selection.accept(child):
if child.level == "A":
output_string += self._get_atom_line(child, atom_number=n)
if n:
n += 1
else:
output_string_add, n = child._get_output_string(selection, n)
output_string += output_string_add
return output_string, n
# TODO
[docs]
def save(self, output=sys.stdout, renumber=True, selection=False, remarks=True):
"""
Save the coordinates of the entity.
Example:
entity.save("path/to/file/filename.pdb")
residue.save( "residue1.pdb" )
Args:
output: Where to write coordinates to. Should be an an open file, string or sys.stdout.
By default the output is written to stdout
renumber: Flag whether to renumber the atoms to IMGT scheme
Default is to renumber the atoms so that the first is 1 etc.
Use renumber = False to retain the original atom numbering from the pdb file
selection: Provide a selector object to select which children of the entity should be outputted.
Selection should be a selector object from TcrPDB.Select.
Some basic selector classes are provided in the module. More complex classes can be created by inheriting from these.
If selection = False (default) all atoms in the entity are output
remarks: Flag to print out remarks generated by TcrPDB. Default TRUE
"""
def ag_chain_and_type(ags):
if ags:
chains, types = [], []
for ag in ags:
if ag.level == "C": # peptide/protein/nucleic acid.
chains.append(ag.id)
types.append(ag.type)
elif ag.level == "R": # hapten / single carb
chains.append(ag.parent.id)
types.append(ag.type)
elif ag.level == "F": # carbs
try:
chains.append(ag.child_list[0].parent.id)
types.append(ag.type)
except IndexError:
chains.append("UNKNOWN")
types.append("UNKNOWN")
else:
chains.append("UNKNOWN")
types.append("UNKNOWN")
return ";".join(chains), ";".join(types)
else:
return None, None
if renumber:
n = 1
else:
n = None
if not selection:
selection = select_all()
if self.level != "TS" or (self.level == "TS" and len(self.child_list) == 1):
output_string, n_atoms = self._get_output_string(selection, n)
elif self.level == "TS":
# output method for NMR structures
output_string, n_atoms = "", 0
# sort models by model id
self.child_list = sorted(self.child_list, key=lambda z: z.id)
for mod in self.child_list:
number = ("%d" % mod.id).rjust(9) # space by 9 for models
output_string += "MODEL%s\n" % number
string, n_atoms = mod._get_output_string(selection, n_atoms + 1)
output_string += string
output_string += "ENDMDL\n"
n_atoms -= 1
remark_string = ""
if remarks:
remark_string = (
"REMARK 5 IMGT RENUMBERED STRUCTURE %s GENERATED BY STCRDAB\n"
% str(self.id).upper()
)
remark_string += (
"REMARK 5 TCR CHAINS ARE RENUMBERED IN THE VARIABLE REGIONS ONLY\n"
)
remark_string += "REMARK 5 MHC CHAINS ARE RENUMBERED IN THE G DOMAINS OR FOR B2M-GLOBULIN\n"
remark_string += "REMARK 5 NON-TCR and NON-MHC CHAINS ARE LEFT WITH RESIDUE IDS AS IN PDB\n"
p = self
i = 0
while i < 6: # only try and go up residue,chain, holder, model, structure
if hasattr(p, "warnings"):
# the entity is the top structure
for TCR in p.get_TCRs():
antigen_chain, antigen_type = ag_chain_and_type(TCR.antigen)
if TCR.MHC:
mh_chains = TCR.MHC[0].get_id()
else:
mh_chains = ""
ch0, ch1 = [(c.chain_type, c.id) for c in TCR.get_chains()]
remark_string += (
"REMARK 5 PAIRED_%s %sCHAIN=%s %sCHAIN=%s MHCCHAINS=%s AGCHAIN=%s AGTYPE=%s\n"
% (
TCR.get_TCR_type(),
ch0[0],
ch0[1],
ch1[0],
ch1[1],
mh_chains,
antigen_chain,
antigen_type,
)
)
for TR_chain in p.get_unpaired_TCRchains():
antigen_chain, antigen_type = ag_chain_and_type(
TR_chain.antigen
)
if TR_chain.get_MHC():
mh_chains = TR_chain.get_MHC()[0].get_id()
else:
mh_chains = ""
remark_string += (
"REMARK 5 SINGLE %sCHAIN=%s MHCCHAINS=%s AGCHAIN=%s AGTYPE=%s\n"
% (
TR_chain.chain_type,
TR_chain.id,
mh_chains,
antigen_chain,
antigen_type,
)
)
for warning in str(p.warnings).split("\n"):
if warning:
remark_string += (
"\n".join(
[
"REMARK 5 " + w.upper()
for w in wrap(warning, width=60)
]
)
+ "\n"
)
break
if p:
p = p.get_parent()
i += 1
else:
remark_string = ""
break
output_string = remark_string.upper() + output_string
if output is sys.stdout:
output.write(output_string)
elif type(output) is str:
with open(output, "w") as out:
out.write(output_string)
elif isinstance(output, io.IOBase):
output.write(output_string)
[docs]
def copy(self):
"""
Copy has been played with a bit. For my purposes the version in 1.61 did not work as explicit copying of the child list
meant that the child objects became referenced to both self and shallow.
This may be due to overriding the residue and chain classes so may not be a bug in biopython.
When copying the child_list in the loop, I use the list to iterate over instead of the dictionary.
This preserves the ordering of the children.
"""
shallow = copy(self)
shallow.child_list = (
[]
) # copy(self.child_list) # I have removed this explicit copy.
shallow.child_dict = {} # copy(self.child_dict)
shallow.xtra = copy(self.xtra)
shallow.detach_parent()
# Order preserving copy
for child in self.get_list():
assert (
child.get_id() in self.child_dict
), "child id not in the child_dict keys: this is unexpected and bad :("
shallow.add(child.copy())
return shallow