LOOS  v2.3.2
hcontacts.cpp
1 /*
2  hcontacts.cpp
3 
4  Constructs a matrix representing time series for multiple for inter and/or intra-molecular hbonds
5 */
6 
7 /*
8 
9  This file is part of LOOS.
10 
11  LOOS (Lightweight Object-Oriented Structure library)
12  Copyright (c) 2010, Tod D. Romo
13  Department of Biochemistry and Biophysics
14  School of Medicine & Dentistry, University of Rochester
15 
16  This package (LOOS) is free software: you can redistribute it and/or modify
17  it under the terms of the GNU General Public License as published by
18  the Free Software Foundation under version 3 of the License.
19 
20  This package is distributed in the hope that it will be useful,
21  but WITHOUT ANY WARRANTY; without even the implied warranty of
22  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23  GNU General Public License for more details.
24 
25  You should have received a copy of the GNU General Public License
26  along with this program. If not, see <http://www.gnu.org/licenses/>.
27 */
28 
29 
30 #include <loos.hpp>
31 #include <boost/format.hpp>
32 #include <boost/program_options.hpp>
33 
34 #include "hcore.hpp"
35 
36 using namespace std;
37 using namespace loos;
38 using namespace HBonds;
39 namespace po = boost::program_options;
40 namespace opts = loos::OptionsFramework;
41 
42 
43 typedef vector<AtomicGroup> vGroup;
44 
45 typedef pair<SimpleAtom, SimpleAtom> Bond;
46 typedef vector<Bond> vBond;
47 
48 
49 bool inter_bonds, intra_bonds;
50 double putative_threshold;
51 
52 double length_low, length_high, max_angle;
53 bool use_periodicity;
54 string donor_selection, acceptor_selection;
55 string model_name, traj_name;
56 
57 // @cond TOOLS_INTERNAL
58 
59 
60 
61 
62 string fullHelpMessage(void) {
63  string msg =
64  "\n"
65  "SYNOPSIS\n"
66  "\tHydrogen bond contacts for a trajectory as a matrix\n"
67  "\n"
68  "DESCRIPTION\n"
69  "\n"
70  "\tThis tool creates a matrix that represents a time series of the state of all\n"
71  "possible hydrogen bonds for a given set of donor/acceptor selections. Each element\n"
72  "of the matrix is either a 1 (hydrogen bond present) or a 0 (hydrogen bond absent).\n"
73  "Each column of the matrix is a possible hydrogen bond and each row of the matrix\n"
74  "is a time-step (frame) from the trajectory.\n"
75  "\tThe donors and acceptors are determined by the selections given. The donor selection\n"
76  "must select only hydrogen atoms (names beginning with an 'H'). A search for all possible\n"
77  "hydrogen bond pairs is conducted at the start of the program, where any acceptor atom\n"
78  "within a cutoff distance of any donor hydrogen is a pair that is tracked. This search\n"
79  "is conducted on a per-molecule basis, as determined by the --inter and --intra flags.\n"
80  "If --inter=1, then intermolecular contacts are searched (i.e. any donor in one molecule\n"
81  "vs all possible acceptors in all other molecules). If --intra=1, then intramolecular\n"
82  "contacts are searched (i.e. any donor/acceptor atom in the same molecule). This search\n"
83  "requires both connectivity and coordinates to be present. If the model does not provide\n"
84  "coordinates (e.g. a PSF file), then the coordinates will be taken from the first frame\n"
85  "of the trajectory.\n"
86  "\tThe metadata at the top of the ASCII matrix output lists all of the possible hydrogen-\n"
87  "bond pairs that are tracked. The first number is the column (0-based index) in the\n"
88  "matrix representing that bond. To plot the column in Octave/gnuplot, add 1 to the column\n"
89  "index. The first column of the matrix is the frame number from the trajectory for the\n"
90  "corresponding row of the matrix.\n"
91  "\n"
92  "EXAMPLES\n"
93  "\n"
94  "\thcontacts model.pdb sim.dcd 'resname == \"ARG\" && name =~ \"^HH\" && segid =~ \"PE\\d+\"'\\\n"
95  "\t 'segid =~ \"PE\\d+\" && name =~ \"^O\"' >bonds.asc\n"
96  "This example searches for all contacts between any ARG atom beginnig with 'HH' in\n"
97  "any segment that is PE and a number (i.e. PE0, PE1, PE11, ...) and any atom in the\n"
98  "same set of segments that begins with an O. By default, only intermolecular hydrogen-\n"
99  "bonds are considered. The default bond constraints of angle <= 30 and 1.5 <= d <= 3.0 are\n"
100  "used. The initial search distance cutoff is 10.0 Angstroms.\n"
101  "\n"
102  "\thcontacts --search=30 model.pdb sim.dcd \\\n"
103  "\t 'resname == \"ARG\" && name =~ \"^HH\" && segid =~ \"PE\\d+\"'\\\n"
104  "\t 'segid =~ \"PE\\d+\" && name =~ \"^O\"' >bonds.asc\n"
105  "This example is the same as above, but the initial search for possible bonds uses a\n"
106  "cutoff of 30 Angstroms.\n"
107  "\n"
108  "SEE ALSO\n"
109  "\thbonds, hmatrix, hcorrelation\n";
110 
111  return(msg);
112 }
113 
114 
115 class ToolOptions : public opts::OptionsPackage {
116 public:
117  void addGeneric(po::options_description& o) {
118  o.add_options()
119  ("search", po::value<double>(&putative_threshold)->default_value(10.0), "Threshold for initial bond search")
120  ("blow", po::value<double>(&length_low)->default_value(1.5), "Low cutoff for bond length")
121  ("bhi", po::value<double>(&length_high)->default_value(3.0), "High cutoff for bond length")
122  ("angle", po::value<double>(&max_angle)->default_value(30.0), "Max bond angle deviation from linear")
123  ("periodic", po::value<bool>(&use_periodicity)->default_value(false), "Use periodic boundary")
124  ("inter", po::value<bool>(&inter_bonds)->default_value(true), "Inter-molecular bonds")
125  ("intra", po::value<bool>(&intra_bonds)->default_value(false), "Intra-molecular bonds");
126  }
127 
128  void addHidden(po::options_description& o) {
129  o.add_options()
130  ("donor", po::value<string>(&donor_selection), "donor selection")
131  ("acceptor", po::value<string>(&acceptor_selection), "acceptor selection");
132 
133  }
134 
135  void addPositional(po::positional_options_description& opts) {
136  opts.add("donor", 1);
137  opts.add("acceptor", 1);
138  }
139 
140 
141  bool check(po::variables_map& map) {
142  if (!(inter_bonds || intra_bonds)) {
143  cerr << "Error- must specify at least some kind of bond (inter/intra) to calculate.\n";
144  return(true);
145  }
146  return(false);
147  }
148 
149 
150  string help() const {
151  return("donor-selection acceptor-selection");
152  }
153 
154  string print() const {
155  ostringstream oss;
156  oss << boost::format("search=%f,inter=%d,intra=%d,blow=%f,bhi=%f,angle=%f,periodic=%d,acceptor=\"%s\",donor=\"%s\"")
157  % putative_threshold
158  % inter_bonds
159  % intra_bonds
160  % length_low
161  % length_high
162  % max_angle
163  % use_periodicity
164  % acceptor_selection
165  % donor_selection;
166 
167  return(oss.str());
168  }
169 
170 };
171 
172 
173 // @endcond
174 
175 
176 
177 
178 // Given a vector of molecules, apply selection to each return a vector of subsets
179 
180 vGroup splitSelection(const vGroup& molecules, const string& selection) {
181  vGroup results;
182 
183  for (vGroup::const_iterator i = molecules.begin(); i != molecules.end(); ++i) {
184  AtomicGroup subset;
185  try {
186  subset = selectAtoms(*i, selection);
187  }
188  catch (...) { // Ignore exceptions
189  ;
190  }
191  if (!subset.empty())
192  results.push_back(subset);
193  }
194 
195 
196  if (results.empty()) {
197  cerr << "Error- The selection '" << selection << "' resulted in nothing being selected.\n";
198  exit(-1);
199  }
200 
201  return(results);
202 }
203 
204 
205 
206 
207 
208 // Build up a vector of Bonds by looking for any donor/acceptor pair that's within a threshold
209 // distance
210 
211 vBond findPotentialBonds(const AtomicGroup& donors, const AtomicGroup& acceptors, const AtomicGroup& system) {
212  vBond bonds;
213 
214  for (AtomicGroup::const_iterator j = donors.begin(); j != donors.end(); ++j) {
215  GCoord u = (*j)->coords();
216  for (AtomicGroup::const_iterator i = acceptors.begin(); i != acceptors.end(); ++i) {
217  if (u.distance((*i)->coords()) <= putative_threshold) {
218 
219  // Manually build simple atoms
220  SimpleAtom new_donor(*j, system.sharedPeriodicBox(), use_periodicity);
221  string name = (*j)->name();
222  if (name[0] != 'H') {
223  cerr << boost::format("Error- atom %s was given as a donor, but donors can only be hydrogens.\n") % name;
224  exit(-10);
225  }
226 
227  vector<int> bond_list = (*j)->getBonds();
228  if (bond_list.size() != 1) {
229  cerr << "Error- The following hydrogen atom has more than one bond to it...woops...\n";
230  cerr << *j;
231  exit(-10);
232  }
233 
234  pAtom pa = system.findById(bond_list[0]);
235  if (pa == 0) {
236  cerr << boost::format("Error- cannot find atomid %d in system.\n") % bond_list[0];
237  exit(-10);
238  }
239  new_donor.attach(pa);
240 
241 
242  SimpleAtom new_acceptor(*i, system.sharedPeriodicBox(), use_periodicity);
243 
244  Bond new_bond(new_donor, new_acceptor);
245  bonds.push_back(new_bond);
246  }
247  }
248  }
249 
250  return(bonds);
251 }
252 
253 
254 
255 
256 ostringstream& formatBond(ostringstream& oss, const uint i, const Bond& bond) {
257  pAtom a = bond.first.rawAtom();
258  pAtom b = bond.second.rawAtom();
259 
260  oss << boost::format("# %d : %d-%s-%s-%d-%s => %d-%s-%s-%d-%s")
261  % i
262  % a->id() % a->name() % a->resname() % a->resid() % a->segid()
263  % b->id() % b->name() % b->resname() % b->resid() % b->segid();
264 
265  return(oss);
266 }
267 
268 
269 
270 int main(int argc, char *argv[]) {
271  string hdr = invocationHeader(argc, argv);
272 
273 
274  opts::BasicOptions* bopts = new opts::BasicOptions(fullHelpMessage());
276  ToolOptions* topts = new ToolOptions;
277 
278  opts::AggregateOptions options;
279  options.add(bopts).add(tropts).add(topts);
280  if (! options.parse(argc, argv))
281  exit(-1);
282 
283 
284  AtomicGroup model = tropts->model;
285  pTraj traj = tropts->trajectory;
286 
287  if (use_periodicity && !traj->hasPeriodicBox()) {
288  cerr << "Error- trajectory has no periodic box information\n";
289  exit(-1);
290  }
291 
292 
293  // Get coords if required...
294  if (!model.hasCoords()) {
295  traj->readFrame();
296  traj->updateGroupCoords(model);
297  if (tropts->skip > 0)
298  traj->readFrame(tropts->skip - 1);
299  else
300  traj->rewind();
301  }
302 
303 
304  vGroup mols = model.splitByMolecule();
305 
306  // First, build list of pairs we will search for...
307  vGroup raw_donors = splitSelection(mols, donor_selection);
308  vGroup raw_acceptors = splitSelection(mols, acceptor_selection);
309 
310  vBond bond_list;
311 
312  for (uint j=0; j<raw_donors.size(); ++j) {
313 
314  if (intra_bonds) {
315  vBond bonds = findPotentialBonds(raw_donors[j], raw_acceptors[j], model);
316  copy(bonds.begin(), bonds.end(), back_inserter(bond_list));
317  }
318 
319  if (inter_bonds) {
320  for (uint i=0; i<raw_acceptors.size(); ++i) {
321  if (j == i)
322  continue;
323  vBond bonds = findPotentialBonds(raw_donors[j], raw_acceptors[i], model);
324  copy(bonds.begin(), bonds.end(), back_inserter(bond_list));
325  }
326  }
327 
328  }
329 
330 
331  // Generate the metadata for the output...
332  ostringstream oss;
333  oss << hdr << endl;
334  for (uint i=0; i<bond_list.size(); ++i) {
335  formatBond(oss, i+1, bond_list[i]);
336  if (i < bond_list.size()-1)
337  oss << endl;
338  }
339 
340 
341  loos::Math::Matrix<uint> M(traj->nframes() - tropts->skip, bond_list.size()+1);
342 
343  uint j = 0;
344  while (traj->readFrame()) {
345  traj->updateGroupCoords(model);
346 
347  M(j, 0) = j + tropts->skip;
348  for (uint i=0; i<bond_list.size(); ++i)
349  M(j, i+1) = bond_list[i].first.hydrogenBond(bond_list[i].second);
350 
351  ++j;
352  }
353 
354  writeAsciiMatrix(cout, M, oss.str());
355 }
pTraj trajectory
The trajectory, primed by the –skip value (if specified)
Simple matrix template class using policy classes to determine behavior.
Definition: MatrixImpl.hpp:53
Namespace for encapsulating options processing.
pAtom findById(const int id) const
Find a contained atom by its atomid.
STL namespace.
AtomicGroup model
Model that describes the trajectory.
loos::SharedPeriodicBox sharedPeriodicBox() const
Provide access to the underlying shared periodic box...
bool parse(int argc, char *argv[])
Parses a command line, returning true if parsing was ok.
Options common to all tools (including –fullhelp)
std::vector< AtomicGroup > splitByMolecule(void) const
Returns a vector of AtomicGroups split based on bond connectivity.
std::string invocationHeader(int argc, char *argv[])
Create an invocation header.
Definition: utils.cpp:124
std::ostream & writeAsciiMatrix(std::ostream &os, const Math::Matrix< T, P, S > &M, const std::string &meta, const Math::Range &start, const Math::Range &end, const bool trans=false, F fmt=F())
Write a submatrix to a stream.
Basic trajectory with a –skip option.
double distance(const Coord< T > &o) const
Distance between two coordinates.
Definition: Coord.hpp:443
AtomicGroup selectAtoms(const AtomicGroup &source, const std::string selection)
Applies a string-based selection to an atomic group...
Definition: utils.cpp:195
Class for handling groups of Atoms (pAtoms, actually)
Definition: AtomicGroup.hpp:87
Namespace for most things not already encapsulated within a class.
bool hasCoords(void) const
Do all the atoms in the group have coordinates?
Combines a set of OptionsPackages.
AggregateOptions & add(OptionsPackage *pack)
Add a pointer to an OptionsPackage that will be used for options.