LOOS  v2.3.2
pdb.cpp
1 /*
2  This file is part of LOOS.
3 
4  LOOS (Lightweight Object-Oriented Structure library)
5  Copyright (c) 2008, Tod D. Romo, Alan Grossfield
6  Department of Biochemistry and Biophysics
7  School of Medicine & Dentistry, University of Rochester
8 
9  This package (LOOS) is free software: you can redistribute it and/or modify
10  it under the terms of the GNU General Public License as published by
11  the Free Software Foundation under version 3 of the License.
12 
13  This package is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program. If not, see <http://www.gnu.org/licenses/>.
20 */
21 
22 
23 #include <pdb.hpp>
24 #include <utils.hpp>
25 #include <Fmt.hpp>
26 
27 #include <iomanip>
28 #include <boost/unordered_set.hpp>
29 #include <boost/format.hpp>
30 #include <boost/algorithm/string.hpp>
31 
32 
33 namespace loos {
34 
35  // Assume we're only going to find spaces in a PDB file...
36  bool PDB::emptyString(const std::string& s) {
37  std::string::const_iterator i;
38 
39  for (i = s.begin(); i != s.end(); ++i)
40  if (*i != ' ')
41  return(false);
42 
43  return(true);
44  }
45 
46 
47  // Special handling for REMARKs to ignore the line code, if
48  // present...
49 
50  void PDB::parseRemark(const std::string& s) {
51  std::string t;
52 
53  if (s[6] == ' ' && isdigit(s[7]))
54  t = s.substr(11, 58);
55  else
56  t = s.substr(7, 62);
57 
58  _remarks.add(t);
59  }
60 
61 
62  // Parse an ATOM or HETATM record...
63  // Note: ParseErrors can come from parseStringAs
64 
65  void PDB::parseAtomRecord(const std::string& s) {
66  greal r;
67  gint i;
68  std::string t;
69  GCoord c;
70  pAtom pa(new Atom);
71 
72  pa->index(_max_index++);
73 
74  t = parseStringAs<std::string>(s, 0, 6);
75  pa->recordName(t);
76 
77  i = parseStringAsHybrid36(s, 6, 5);
78  pa->id(i);
79 
80  t = parseStringAs<std::string>(s, 12, 4);
81  pa->name(t);
82 
83  t = parseStringAs<std::string>(s, 16, 1);
84  pa->altLoc(t);
85 
86  t = parseStringAs<std::string>(s, 17, 4);
87  pa->resname(t);
88 
89  t = parseStringAs<std::string>(s, 21, 1);
90  pa->chainId(t);
91 
92  i = parseStringAsHybrid36(s, 22, 4);
93  pa->resid(i);
94 
95  t = parseStringAs<std::string>(s, 26, 1);
96 
97  // Special handling of resid field since it may be frame-shifted by
98  // 1 col in some cases...
99  if (strictness_policy) {
100  char c = t[0];
101 
102  if (c != ' ' && !isalpha(c))
103  throw(ParseError("Non-alpha character in iCode column of PDB"));
104  } else {
105  char c = t[0];
106 
107  // Assume that if we see this variant, then we're not using hybrid-36
108  if (c != ' ' && isdigit(c)) {
109  i = parseStringAs<int>(s, 22, 5);
110  pa->resid(i);
111  t = " ";
112  }
113 
114  }
115  pa->iCode(t);
116 
117  c[0] = parseStringAs<float>(s, 30, 8);
118  c[1] = parseStringAs<float>(s, 38, 8);
119  c[2] = parseStringAs<float>(s, 46, 8);
120  pa->coords(c);
121 
122  if (s.size() > 54) {
123  r = parseStringAs<float>(s, 54, 6);
124  pa->occupancy(r);
125 
126  if (s.size() > 60) {
127  r = parseStringAs<float>(s, 60, 6);
128  pa->bfactor(r);
129 
130  if (s.size() > 72) {
131  t = parseStringAs<std::string>(s, 72, 4);
132  pa->segid(t);
133 
134  if (s.size() > 76) {
135  t = parseStringAs<std::string>(s, 76, 2);
136  pa->PDBelement(t);
137 
138  // Charge is not currently handled...
139  // t = parseStringAs<std::string>(s, 78, 2);
140  }
141  } else { // segid
142  _missing_segid = true;
143  }
144  } else { // b-factor
145  _missing_b = _missing_segid = true;
146  }
147  } else { // occupancies
148  _missing_q = _missing_b = _missing_segid = true;
149  }
150  append(pa);
151 
152  // Record which pAtom belongs to this atomid.
153  // NOTE: duplicate atomid's are NOT checked for
154  _atomid_to_patom[pa->id()] = pa;
155  }
156 
157 
158 
159  // Convert an Atom to a string with a PDB format...
160 
161  std::string PDB::atomAsString(const pAtom p) const {
162  std::ostringstream s;
163 
164  // Float formatter for coords
165  Fmt crdfmt(3);
166  crdfmt.width(8);
167  crdfmt.right();
168  crdfmt.trailingZeros(true);
169  crdfmt.fixed();
170 
171  // Float formatter for B's and Q's...
172  Fmt bqfmt(2);
173  bqfmt.width(6);
174  bqfmt.right();
175  bqfmt.trailingZeros(true);
176  bqfmt.fixed();
177 
178  // We don't worry about strings exceeding field-widths (yet),
179  // but do check for numeric overflows...
180  s << std::setw(6) << std::left << p->recordName();
181  s << hybrid36AsString(p->id(), 5) << " ";
182  s << std::setw(4) << std::left << p->name();
183 
184  s << std::setw(1) << p->altLoc();
185  s << std::setw(4) << std::left << p->resname();
186 
187  s << std::setw(1) << std::right << p->chainId();
188  s << hybrid36AsString(p->resid(), 4);
189  s << std::setw(2) << p->iCode();
190  s << " ";
191 
192  s << crdfmt(p->coords().x());
193  s << crdfmt(p->coords().y());
194  s << crdfmt(p->coords().z());
195  s << bqfmt(p->occupancy());
196  s << bqfmt(p->bfactor());
197  s << " ";
198  s << std::setw(4) << std::left << p->segid();
199  s << std::setw(2) << std::right << p->PDBelement();
200  if (_show_charge)
201  s << std::setw(2) << p->charge();
202  else
203  s << " ";
204 
205  return(s.str());
206  }
207 
208 
209  // Private function to search the map of atomid's -> pAtoms
210  // Throws an error if the atom is not found
211  pAtom PDB::findAtom(const int id) {
212  std::map<int, pAtom>::iterator i = _atomid_to_patom.find(id);
213  if (i == _atomid_to_patom.end()) {
214  std::ostringstream oss;
215  oss << "Cannot find atom corresponding to atomid " << id << " for making a bond.";
216  throw(LOOSError(oss.str()));
217  }
218  return(i->second);
219  }
220 
221 
222 
223 
224  // If the PDB already had symmetrical CONECT records, then it will
225  // have duplicates thanks to how parseConectRecord works. We sort
226  // and filter out repeated bonds to fix this.
227  void PDB::uniqueBonds() {
228 
229  for (iterator atom = begin(); atom != end(); ++atom) {
230  std::vector<int> bonds = (*atom)->getBonds();
231 
232  if (!bonds.empty()) {
233  std::sort(bonds.begin(), bonds.end());
234  std::vector<int> unique_bonds;
235 
236  unique_bonds.push_back(bonds[0]);
237  for (std::vector<int>::const_iterator i = bonds.begin()+1; i != bonds.end(); ++i)
238  if (*i != *(i-1))
239  unique_bonds.push_back(*i);
240 
241  (*atom)->setBonds(unique_bonds);
242  }
243  }
244  }
245 
246 
247  // Parse CONECT records, updating the referenced atoms...
248  // Couple of issues:
249  //
250  // Will accept up to 8 bound atoms and considers them all equal,
251  // but the PDB standard says some are h-bonded and some are
252  // salt-bridged...
253  //
254  // No check is made for overflow of fields...
255  //
256  // Adding a bond requires finding the bound atoms, which can
257  // be expensive, so we build-up a map of atomid to pAtoms as the
258  // ATOM records are being parsed. This is then searched to find
259  // the pAtom that matches the CONECT atomid (rather than using
260  // findById()).
261 
262  void PDB::parseConectRecord(const std::string& s) {
263  int bound_id = parseStringAs<int>(s, 6, 5);
264 
265 
266  // Rely on findAtom to throw if bound_id not found...
267  pAtom bound = findAtom(bound_id);
268 
269  // This currently includes fields designated as H-bond indices...
270  // Should we do this? or separate them out? Hmmm...
271  for (int i=0; i<8; ++i) {
272  int j = i * 5 + 11;
273  if (static_cast<uint>(j) >= s.length())
274  break;
275  std::string t = s.substr(j, 5);
276  if (emptyString(t))
277  break;
278  int id = parseStringAsHybrid36(t);
279 
280  // findAtom will throw if id is not found
281  pAtom boundee = findAtom(id);
282  bound->addBond(boundee);
283  boundee->addBond(bound);
284  }
285  }
286 
287 
288  void PDB::parseCryst1Record(const std::string& s) {
289  greal r;
290  gint i;
291  std::string t;
292 
293  UnitCell newcell;
294 
295  r = parseStringAs<float>(s, 6, 9);
296  newcell.a(r);
297  r = parseStringAs<float>(s, 15, 9);
298  newcell.b(r);
299  r = parseStringAs<float>(s, 24, 9);
300  newcell.c(r);
301 
302  r = parseStringAs<float>(s, 33, 7);
303  newcell.alpha(r);
304  r = parseStringAs<float>(s, 40, 7);
305  newcell.beta(r);
306  r = parseStringAs<float>(s, 47, 7);
307  newcell.gamma(r);
308 
309  // Special handling in case of mangled CRYST1 record...
310  if (s.length() < 66) {
311  t = s.substr(55);
312 
313  newcell.spaceGroup(t);
314  newcell.z(-1); // ???
315  } else {
316  t = parseStringAs<std::string>(s, 55, 11);
317  newcell.spaceGroup(t);
318  i = parseStringAs<int>(s, 66, 4);
319  newcell.z(i);
320  }
321 
322  cell = newcell;
323  _has_cryst = true;
324  }
325 
326 
329  /*
330  * Will transform any caught exceptions into a FileReadError
331  */
332  void PDB::read(std::istream& is) {
333  std::string input;
334  bool has_cryst = false;
335  bool has_bonds = false;
336  boost::unordered_set<std::string> seen;
337 
338  while (getline(is, input)) {
339  try {
340  if (input.substr(0, 4) == "ATOM" || input.substr(0,6) == "HETATM")
341  parseAtomRecord(input);
342  else if (input.substr(0, 6) == "REMARK")
343  parseRemark(input);
344  else if (input.substr(0,6) == "CONECT") {
345  has_bonds = true;
346  parseConectRecord(input);
347  } else if (input.substr(0, 6) == "CRYST1") {
348  parseCryst1Record(input);
349  has_cryst = true;
350  } else if (input.substr(0,3) == "TER")
351  ;
352  else if (input.substr(0,3) == "END")
353  break;
354  else {
355  int space = input.find_first_of(' ');
356  std::string record = input.substr(0, space);
357  if (seen.find(record) == seen.end()) {
358  std::cerr << "Warning - unknown PDB record '" << record << "'" << std::endl;
359  seen.insert(record);
360  }
361  }
362  }
363  catch(LOOSError& e) {
364  throw(FileReadError(_fname, e.what()));
365  }
366  catch(...) {
367  throw(FileReadError(_fname, "Unknown exception"));
368  }
369  }
370  if (isMissingFields())
371  std::cerr << "Warning- PDB is missing fields. Default values will be used.\n";
372 
373  // Clean-up temporary storage...
374  _atomid_to_patom.clear();
375 
376  // Do some post-extraction...
377  if (loos::remarksHasBox(_remarks)) {
378  GCoord c;
379  try {
380  c = loos::boxFromRemarks(_remarks);
381  }
382  catch(ParseError& e) {
383  throw(FileReadError(_fname, e.what()));
384  }
385  periodicBox(c);
386  } else if (has_cryst) {
387  GCoord c(cell.a(), cell.b(), cell.c());
388  periodicBox(c);
389  }
390 
391  // Force atom id's to be monotonic if there was an overflow event...
392  if (atoms.size() >= 100000)
393  renumber();
394 
395  // Set bonds state...
396  if (has_bonds) {
397  setGroupConnectivity();
398  uniqueBonds();
399  }
400 
401  }
402 
403 
404  std::ostream& FormattedUnitCell(std::ostream& os, const UnitCell& u) {
405  os << "CRYST1";
406  Fmt dists(3);
407  dists.width(9).right().trailingZeros(true).fixed();
408  Fmt angles(2);
409  angles.width(7).right().trailingZeros(true).fixed();
410 
411  os << dists(u.a()) << dists(u.b()) << dists(u.c());
412  os << angles(u.alpha()) << angles(u.beta()) << angles(u.gamma());
413  os << " " << std::setw(11) << std::left << u.spaceGroup() << std::setw(4) << u.z();
414 
415  return(os);
416  }
417 
418  std::ostream& XTALLine(std::ostream& os, const GCoord& box) {
419  os << "REMARK XTAL "
420  << box.x() << " "
421  << box.y() << " "
422  << box.z();
423  return(os);
424  }
425 
426 
427  std::ostream& FormatConectRecords(std::ostream& os, const PDB& p) {
428  AtomicGroup::iterator ci;
429 
430  // Copy and sort
431  PDB sorted = p;
432  sorted.sort();
433 
434  for (ci = sorted.begin(); ci != sorted.end(); ++ci) {
435  if ((*ci)->checkProperty(Atom::bondsbit)) {
436  int id = (*ci)->id();
437 
438  std::vector<int> bonds = (*ci)->getBonds();
439  if (bonds.empty())
440  continue;
441 
442 
443  // Filter bonds... Any bond to an atomid smaller than the current atom
444  // will have already been written, so omit.
445  std::sort(bonds.begin(), bonds.end());
446  std::vector<int> filtered_bonds;
447  for (std::vector<int>::const_iterator i = bonds.begin(); i != bonds.end(); ++i)
448  if (*i >= id)
449  filtered_bonds.push_back(*i);
450 
451  if (filtered_bonds.empty())
452  continue;
453 
454  os << "CONECT" << hybrid36AsString(id, 5);
455  int i = 0;
456 
457  std::vector<int>::const_iterator cj;
458  for (cj = filtered_bonds.begin(); cj != filtered_bonds.end(); ++cj) {
459  if (++i > 4) {
460  i = 1;
461  os << "\nCONECT" << hybrid36AsString(id, 5);
462  }
463  int bound_id = *cj;
464  pAtom pa = sorted.findById(bound_id);
465  if (pa != 0)
466  os << hybrid36AsString(bound_id, 5);
467  }
468  os << std::endl;
469  }
470  }
471 
472  return(os);
473  }
474 
475 
477 
485  std::ostream& operator<<(std::ostream& os, const PDB& p) {
486  AtomicGroup::const_iterator i;
487 
488  os << p._remarks;
489  if (p.isPeriodic())
490  XTALLine(os, p.periodicBox()) << std::endl;
491  if (p._has_cryst)
492  FormattedUnitCell(os, p.cell) << std::endl;
493  for (i = p.atoms.begin(); i != p.atoms.end(); ++i)
494  os << p.atomAsString(*i) << std::endl;
495 
496  if (p.hasBonds()) {
497  int maxid = 0;
498  for (i = p.atoms.begin(); i != p.atoms.end(); ++i)
499  if ((*i)->id() > maxid)
500  maxid = (*i)->id();
501 
502  FormatConectRecords(os, p);
503  }
504 
505  if (p._auto_ter)
506  os << "TER \n";
507 
508  return(os);
509  }
510 
511  PDB PDB::copy(void) const {
512  AtomicGroup grp = this->AtomicGroup::copy();
513  PDB p(grp);
514 
515  p._show_charge = _show_charge;
516  p._auto_ter = _auto_ter;
517  p._has_cryst = _has_cryst;
518  p._remarks = _remarks;
519  p.cell = cell;
520 
521  return(p);
522  }
523 
524  PDB* PDB::clone(void) const {
525  return(new PDB(*this));
526  }
527 
529  PDB p(g);
530 
531  if (p.isPeriodic())
532  p.unitCell(UnitCell(p.periodicBox()));
533 
534  return(p);
535  }
536 
537 
538  bool PDB::showCharge(void) const { return(_show_charge); }
539  void PDB::showCharge(bool b) { _show_charge = b; }
540  bool PDB::strict(void) const { return(strictness_policy); }
541  void PDB::strict(const bool b) { strictness_policy = b; }
542 
543  bool PDB::autoTerminate(void) const { return(_auto_ter); }
544 
545  void PDB::autoTerminate(bool b) { _auto_ter = b; }
546 
547  Remarks& PDB::remarks(void) { return(_remarks); }
548  void PDB::remarks(const Remarks& r) { _remarks = r; }
549 
550  const UnitCell& PDB::unitCell(void) { return(cell); }
551  void PDB::unitCell(const UnitCell& c) { _has_cryst = true; cell = c; }
552 
553 
554 
555 }
Remarks & remarks(void)
Accessor for the remarks object...
Definition: pdb.cpp:547
Errors that occur while reading a file.
Definition: exceptions.hpp:180
void add(const std::string s)
Add a remark.
Definition: pdb_remarks.cpp:52
AtomicGroup & append(pAtom pa)
Append the atom onto the group.
PDB copy(void) const
Creates a deep copy (see AtomicGroup::copy() for more info)
Definition: pdb.cpp:511
bool hasBonds(void) const
Does any atom in the group have bond information???
bool isPeriodic(void) const
Test whether or not periodic boundary conditions are set.
void renumber(const int start=1, const int stride=1)
Renumber the atomid's of the contained atoms...
virtual PDB * clone(void) const
Clones an object for polymorphism (see AtomicGroup::clone() for more info)
Definition: pdb.cpp:524
std::string hybrid36AsString(int d, uint n)
Convert an int into a hybrid-36 encoded string.
Definition: utils.cpp:322
GCoord boxFromRemarks(const Remarks &r)
Extract the Alan-style box-size from a PDB Remarks block.
PDB reading/writing class.
Definition: pdb.hpp:69
Generic LOOS exception.
Definition: exceptions.hpp:40
This class encapsulates crystallographic unit cell data.
Definition: cryst.hpp:35
static PDB fromAtomicGroup(const AtomicGroup &)
Class method for creating a PDB from an AtomicGroup.
Definition: pdb.cpp:528
AtomicGroup copy(void) const
Creates a deep copy of this group.
Definition: AtomicGroup.cpp:56
Output formatter class, adapted from Stroustrup's book.
Definition: Fmt.hpp:25
void read(std::istream &is)
Read in PDB from an ifstream.
Definition: pdb.cpp:332
Exception when parsing input data.
Definition: exceptions.hpp:64
Class for handling groups of Atoms (pAtoms, actually)
Definition: AtomicGroup.hpp:87
Class for handling PDB Remarks.
Definition: pdb_remarks.hpp:41
Namespace for most things not already encapsulated within a class.
bool remarksHasBox(const Remarks &r)
Checks to see if a Remarks block has an Alan-style box size in it.
int parseStringAsHybrid36(const std::string &source, const uint pos, const uint nelem)
Convert a hybrid-36 encoded string into an int.
Definition: utils.cpp:266
GCoord periodicBox(void) const
Fetch the periodic boundary conditions.