from Bio.PDB import PDBParser
from Bio.PDB.Structure import Structure
from Bio.PDB.Atom import Atom, DisorderedAtom
from Bio.PDB.StructureBuilder import StructureBuilder
from Bio.PDB.Polypeptide import standard_aa_names
import numpy as np
# Datatypes for numpy structured arrays after parsing
structdtype = {
'PCS':np.dtype([
('mdl', int ),
('atm', object),
('exp', float ),
('cal', float ),
('err', float ),
('idx', int )]),
'RDC':np.dtype([
('mdl', int ),
('atm', object),
('atx', object),
('exp', float ),
('cal', float ),
('err', float ),
('idx', int )]),
'PRE':np.dtype([
('mdl', int ),
('atm', object),
('exp', float ),
('cal', float ),
('err', float ),
('idx', int )]),
'CCR':np.dtype([
('mdl', int ),
('atm', object),
('atx', object),
('exp', float ),
('cal', float ),
('err', float ),
('idx', int )])}
[docs]def rotation_matrix(axis, theta):
"""Return the rotation matrix associated with counterclockwise
rotation about the given axis by theta radians.
Parameters
----------
axis : array of floats
the [x,y,z] axis for rotation.
Returns
-------
matrix : numpy 3x3 matrix object
the rotation matrix
"""
axis = np.array(axis)
axis /= np.linalg.norm(axis)
a = np.cos(theta/2.0)
b, c, d = -axis*np.sin(theta/2.0)
aa, bb, cc, dd = a*a, b*b, c*c, d*d
bc, ad, ac, ab, bd, cd = b*c, a*d, a*c, a*b, b*d, c*d
return np.array([[aa+bb-cc-dd, 2*(bc+ad), 2*(bd-ac)],
[2*(bc-ad), aa+cc-bb-dd, 2*(cd+ab)],
[2*(bd+ac), 2*(cd-ab), aa+dd-bb-cc]])
def unique_pairing(a, b):
"""
Bijectively map two integers to a single integer.
The mapped space is minimum size.
The input is symmetric.
see `Bijective mapping f:ZxZ->N <https://stackoverflow.com/questions/919612/mapping-two-integers-to-one-in-a-unique-and-deterministic-way/>`_.
Parameters
----------
a : int
b : int
Returns
-------
c : int
bijective symmetric mapping (a, b) | (b, a) -> c
"""
c = a * b + (abs(a - b) - 1)**2 // 4
return c
def cantor_pairing(a, b):
"""
Map two integers to a single integer.
The mapped space is minimum size.
Ordering matters in this case.
see `Bijective mapping f:ZxZ->N <https://stackoverflow.com/questions/919612/mapping-two-integers-to-one-in-a-unique-and-deterministic-way/>`_.
Parameters
----------
a : int
b : int
Returns
-------
c : int
bijective mapping (a, b) -> c
"""
c = (a + b)*(a + b + 1)//2 + b
return c
def clean_indices(indices):
"""
Uniquely map a list of integers to their smallest size.
For example: [7,4,7,9,9,10,1] -> [2,1,2,3,3,4,0]
Parameters
----------
indices : array-like integers
a list of integers
Returns
-------
new_indices : array-like integers
the mapped integers with smallest size
"""
_, new_indices = np.unique(indices, return_inverse=True)
return new_indices
[docs]class CustomAtom(Atom):
MU0 = 4*np.pi*1E-7
HBAR = 1.0546E-34
gyro_lib = {
'H': 2*np.pi*42.576E6,
'N': 2*np.pi*-4.316E6,
'C': 2*np.pi*10.705E6} # rad/s/T
csa_lib = {
'H': (np.array([-5.8 , 0.0 ,5.8 ])*1E-6, 8. *(np.pi/180.)),
'N': (np.array([-62.8,-45.7 ,108.5])*1E-6, 19.*(np.pi/180.)),
'C': (np.array([-86.5 ,11.8, 74.7])*1E-6, 38.*(np.pi/180.))}
"""docstring for CustomAtom"""
[docs] def __init__(self, *arg, **kwargs):
super().__init__(*arg, **kwargs)
self.coord = np.asarray(self.coord, dtype=np.float64)
self.gamma = self.gyro_lib.get(self.element, 0.0)
self._csa = None
def __repr__(self):
return "<Atom {0:3d}-{1:}>".format(self.parent.id[1], self.name)
[docs] def top(self):
return self.parent.parent.parent.parent
@property
def position(self):
return self.coord*1E-10
@position.setter
def position(self, value):
self.coord = value*1E10
@property
def csa(self):
"""
Get the CSA tensor at the nuclear position
This uses the geometry of neighbouring atoms
and a standard library from Bax J. Am. Chem. Soc. 2000
Returns
-------
matrix : 3x3 array
the CSA tensor in the PDB frame
if appropriate nuclear positions are not
available <None> is returned.
"""
if self._csa is not None:
return self._csa
def norm(x):
return x/np.linalg.norm(x)
res = self.parent
resid = res.id
respid = resid[0], resid[1]-1, resid[2]
resnid = resid[0], resid[1]+1, resid[2]
resp = res.parent.child_dict.get(respid)
resn = res.parent.child_dict.get(resnid)
pas, beta = self.csa_lib.get(self.name, (None,None))
if resp:
Hcond = self.element=='H', 'N' in res ,'C' in resp, beta
Ncond = self.element=='N', 'H' in res ,'C' in resp, beta
else:
Hcond = (None,)
Ncond = (None,)
if resn:
Ccond = self.element=='C', 'H' in resn,'N' in resn, beta
else:
Ccond = (None,)
if all(Hcond):
NC_vec = resp['C'].coord - res['N'].coord
NH_vec = res['H'].coord - res['N'].coord
z = norm(np.cross(NC_vec, NH_vec))
R = rotation_matrix(-z,beta)
x = norm(R.dot(NH_vec))
y = norm(np.cross(z, x))
elif all(Ncond):
NC_vec = resp['C'].coord - res['N'].coord
NH_vec = res['H'].coord - res['N'].coord
y = norm(np.cross(NC_vec, NH_vec))
R = rotation_matrix(-y,beta)
z = norm(R.dot(NH_vec))
x = norm(np.cross(y, z))
elif all(Ccond):
CN_vec = resn['N'].coord - res['C'].coord
NH_vec = resn['H'].coord - resn['N'].coord
x = norm(np.cross(NH_vec, CN_vec))
R = rotation_matrix(x,beta)
z = norm(R.dot(CN_vec))
y = norm(np.cross(z, x))
else:
return np.zeros(9).reshape(3,3)
transform = np.vstack([x, y, z]).T
tensor = transform.dot(np.diag(pas)).dot(transform.T)
return tensor
@csa.setter
def csa(self, newTensor):
if newTensor is None:
self._csa = None
return
try:
assert newTensor.shape == (3,3)
except (AttributeError, AssertionError):
print("The specified CSA tensor does not have the correct format")
raise
self._csa = newTensor
[docs] def dipole_shift_tensor(self, position):
"""
Calculate the magnetic field shielding tensor at the given postition
due to the nuclear dipole
Assumes nuclear spin 1/2
Parameters
----------
position : array floats
the position (x, y, z) in meters
Returns
-------
dipole_shielding_tensor : 3x3 array
the tensor describing magnetic shielding at the given position
"""
pos = np.array(position, dtype=float) - self.position
distance = np.linalg.norm(pos)
preFactor = (self.MU0 * self.gamma * self.HBAR * 0.5) / (4.*np.pi)
p1 = (1./distance**5)*np.kron(pos,pos).reshape(3,3)
p2 = (1./distance**3)*np.identity(3)
return (preFactor * (3.*p1 - p2))
[docs]class CustomStructure(Structure):
"""This is an overload hack of the BioPython Structure object"""
[docs] def __init__(self, *arg, **kwargs):
super().__init__(*arg, **kwargs)
[docs] def parse(self, dataValues, models=None):
"""
Associate experimental data with atoms of the PDB file
This method takes a DataContainer instance from the
dataparse module
Parameters
----------
dataValues : DataContainer instance
a dictionary containing the experimental values
Returns
-------
dataArray : numpy structured array
the returned array has a row for each relevant atom
in the PDB file. The columns contain model,
experimental/calculated data, errors and indexes.
"""
if type(models)==int:
mods = [self[models]]
elif type(models) in (list, tuple):
mods = (self[m] for m in models)
else:
mods = self.get_models()
data = []
used = set([])
if dataValues.dtype in ('PCS', 'PRE'):
for m in mods:
for chain in m:
for key in dataValues:
seq, name = key
if seq in chain:
resi = chain[seq]
if name in resi:
a = resi[name]
exp, err = dataValues[key]
idx = a.serial_number
tmp = (m.id, a, exp, np.nan, err, idx)
data.append(tmp)
used.add(key)
if dataValues.dtype in ('RDC', 'CCR'):
for m in mods:
for chain in m:
for key in dataValues:
(seq1, name1), (seq2, name2) = key
if seq1 in chain and seq2 in chain:
resi1 = chain[seq1]
resi2 = chain[seq2]
if name1 in resi1 and name2 in resi2:
a1 = resi1[name1]
a2 = resi2[name2]
exp, err = dataValues[key]
idx1 = a1.serial_number
idx2 = a2.serial_number
if dataValues.dtype=='RDC':
idx = unique_pairing(idx1, idx2)
elif dataValues.dtype=='CCR':
idx = cantor_pairing(idx1, idx2)
tmp = (m.id, a1, a2, exp, np.nan, err, idx)
data.append(tmp)
used.add(key)
unused = set(dataValues) - used
if unused:
message = "WARNING: Some values were not parsed to {}:"
print(message.format(self.id))
print(list(unused))
arr = np.array(data, dtype=structdtype.get(dataValues.dtype))
arr['idx'] = clean_indices(arr['idx'])
return arr
[docs]class CustomStructureBuilder(StructureBuilder):
"""This is an overload hack of BioPython's CustomStructureBuilder"""
[docs] def __init__(self, *arg, **kwargs):
super().__init__(*arg, **kwargs)
[docs] def init_structure(self, structure_id):
self.structure = CustomStructure(structure_id)
[docs] def init_atom(self, name, coord, b_factor, occupancy, altloc, fullname,
serial_number=None, element=None):
"""Create a new Atom object.
Arguments:
- name - string, atom name, e.g. CA, spaces should be stripped
- coord - Numeric array (Float0, size 3), atomic coordinates
- b_factor - float, B factor
- occupancy - float
- altloc - string, alternative location specifier
- fullname - string, atom name including spaces, e.g. " CA "
- element - string, upper case, e.g. "HG" for mercury
"""
residue = self.residue
# if residue is None, an exception was generated during
# the construction of the residue
if residue is None:
return
# First check if this atom is already present in the residue.
# If it is, it might be due to the fact that the two atoms have atom
# names that differ only in spaces (e.g. "CA.." and ".CA.",
# where the dots are spaces). If that is so, use all spaces
# in the atom name of the current atom.
if residue.has_id(name):
duplicate_atom = residue[name]
# atom name with spaces of duplicate atom
duplicate_fullname = duplicate_atom.get_fullname()
if duplicate_fullname != fullname:
# name of current atom now includes spaces
name = fullname
warnings.warn("Atom names %r and %r differ "
"only in spaces at line %i."
% (duplicate_fullname, fullname,
self.line_counter),
PDBConstructionWarning)
self.atom = CustomAtom(name, coord, b_factor, occupancy, altloc,
fullname, serial_number, element)
if altloc != " ":
# The atom is disordered
if residue.has_id(name):
# Residue already contains this atom
duplicate_atom = residue[name]
if duplicate_atom.is_disordered() == 2:
duplicate_atom.disordered_add(self.atom)
else:
# This is an error in the PDB file:
# a disordered atom is found with a blank altloc
# Detach the duplicate atom, and put it in a
# DisorderedAtom object together with the current
# atom.
residue.detach_child(name)
disordered_atom = DisorderedAtom(name)
residue.add(disordered_atom)
disordered_atom.disordered_add(self.atom)
disordered_atom.disordered_add(duplicate_atom)
residue.flag_disordered()
warnings.warn("WARNING: disordered atom found "
"with blank altloc before line %i.\n"
% self.line_counter,
PDBConstructionWarning)
else:
# The residue does not contain this disordered atom
# so we create a new one.
disordered_atom = DisorderedAtom(name)
residue.add(disordered_atom)
# Add the real atom to the disordered atom, and the
# disordered atom to the residue
disordered_atom.disordered_add(self.atom)
residue.flag_disordered()
else:
# The atom is not disordered
residue.add(self.atom)
[docs]def load_pdb(fileName, ident=None):
"""
Read PDB from file into biopython structure object
Parameters
----------
fileName : str
the path to the file
ident : str (optional)
the desired identity of the structure object
Returns
-------
values : :class:`paramagpy.protein.CustomStructure`
a structure object containing the atomic coordinates
"""
if not ident:
ident = fileName
parser = PDBParser(structure_builder=CustomStructureBuilder())
return parser.get_structure(ident, fileName)