LOOS  v2.3.2
hcorrelation.cpp
1 /*
2  hcorrelation.cpp
3 
4  Computes time-correlation for hydrogen bonds...
5 */
6 
7 
8 /*
9 
10  This file is part of LOOS.
11 
12  LOOS (Lightweight Object-Oriented Structure library)
13  Copyright (c) 2010, Tod D. Romo
14  Department of Biochemistry and Biophysics
15  School of Medicine & Dentistry, University of Rochester
16 
17  This package (LOOS) is free software: you can redistribute it and/or modify
18  it under the terms of the GNU General Public License as published by
19  the Free Software Foundation under version 3 of the License.
20 
21  This package is distributed in the hope that it will be useful,
22  but WITHOUT ANY WARRANTY; without even the implied warranty of
23  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24  GNU General Public License for more details.
25 
26  You should have received a copy of the GNU General Public License
27  along with this program. If not, see <http://www.gnu.org/licenses/>.
28 */
29 
30 
31 
32 #include <loos.hpp>
33 #include <boost/format.hpp>
34 #include <boost/program_options.hpp>
35 
36 #include "hcore.hpp"
37 
38 using namespace std;
39 using namespace loos;
40 using namespace HBonds;
41 
42 namespace po = boost::program_options;
43 namespace opts = loos::OptionsFramework;
44 
45 
46 
47 typedef vector<double> vecDouble;
48 typedef vector<vecDouble> vecvecDouble;
49 typedef vector<string> vString;
50 
51 // --------------- GLOBALS
52 
53 double length_low, length_high;
54 double max_angle;
55 bool use_periodicity;
56 bool use_stderr;
57 string donor_selection, acceptor_selection;
58 string model_name;
59 vString traj_names;
60 uint maxtime;
61 uint skip;
62 bool any_hydrogen;
63 
64 // ---------------
65 
66 
67 
68 
69 // ---------------
70 // @cond TOOLS_INTERNAL
71 
72 
73 
74 string fullHelpMessage(void) {
75  string msg =
76  "\n"
77  "SYNOPSIS\n"
78  "\tHydrogen bond correlation times\n"
79  "\n"
80  "DESCRIPTION\n"
81  "\n"
82  "\tThis tool generates the auto-correlation of putative hydrogen bonds for a trajectory.\n"
83  "Given a donor (hydrogen atom) and a set of acceptors, a matrix is constructed for\n"
84  "the trajectory where each row corresponds to a frame in the trajectory and each column\n"
85  "corresponds to a potential acceptor. If there is a hydrogen bond present, subject to\n"
86  "distance range and angle cutoff, then a 1 is placed in the matrix, otherwise a 0.\n"
87  "An auto-correlation is then calculated for each column. Only columns where there is\n"
88  "at least one hydrogen-bond present are included. If the any-hydrogen flag is set (--any=1),\n"
89  "then the state of the hydrogen bond at each time point is the union of all possible\n"
90  "acceptors. This is useful for asking what the correlation is between a donor and -any-\n"
91  "possible acceptor.\n"
92  "\tThis process is repeated for all possible donors and over all trajectories. The\n"
93  "correlation time-series is then averaged together, so what is written out is the average\n"
94  "correlation at a given time, over all donors and all trajectories. The maximum correlation\n"
95  "time is set automatically based on the shortest trajectory. However, it may be explicitly\n"
96  "set with the --maxtime T option.\n"
97  "\n"
98  "EXAMPLES\n"
99  "\n"
100  "\thcorrelation 'segid == \"PE1\" && resid == 4 && name == \"HE1\"' \\\n"
101  "\t 'name == \"O1\" && (resname == \"PALM\"' model.psf sim.dcd\n"
102  "This example uses the HE1 hydrogen of residue 4 in segment PE1 as the donor, and the O1\n"
103  "palmitoyl carnonyl oxygen as the acceptor. The average correlation over all carbonyl\n"
104  "oxygens is written out.\n"
105  "\n"
106  "\thcorrelation --any=1 'segid == \"PE1\" && resid == 4 && name == \"HE1\"'\\\n"
107  "\t 'name == \"O1\" && (resname == \"PALM\"' model.psf sim.dcd\n"
108  "This example is the same as above, however the correlation is for the peptide hydrogen (HE1)\n"
109  "hydrogen bonding to -any- palmitoyl carbonyl.\n"
110  "\n"
111  "\thcorrelation --blow=2.0 --bhi=4.0 --angle=25.0 --any=1 \\\n"
112  "\t 'segid == \"PE1\" && resid == 4 && name == \"HE1\"' \\\n"
113  "\t 'name == \"O1\" && (resname == \"PALM\"' model.psf sim.dcd\n"
114  "This example is the same as above, but with the hydrogen-bond criteria changed to be\n"
115  "2.0 <= distance <= 4.0 and the angle <= 25.0 degrees.\n"
116  "\n"
117  "SEE ALSO\n"
118  "\thmatrix, hbonds\n";
119 
120  return(msg);
121 }
122 
123 
124 class ToolOptions : public opts::OptionsPackage {
125 public:
126  void addGeneric(po::options_description& o) {
127  o.add_options()
128  ("blow", po::value<double>(&length_low)->default_value(1.5), "Low cutoff for bond length")
129  ("bhi", po::value<double>(&length_high)->default_value(3.0), "High cutoff for bond length")
130  ("angle", po::value<double>(&max_angle)->default_value(30.0), "Max bond angle deviation from linear")
131  ("periodic", po::value<bool>(&use_periodicity)->default_value(false), "Use periodic boundary")
132  ("maxtime", po::value<uint>(&maxtime)->default_value(0), "Max time for correlation (0 = auto-size)")
133  ("any", po::value<bool>(&any_hydrogen)->default_value(false), "Correlation for ANY hydrogen bound")
134  ("stderr", po::value<bool>(&use_stderr)->default_value(0), "Report standard error rather than standard deviation");
135 
136  }
137 
138  void addHidden(po::options_description& o) {
139  o.add_options()
140  ("donor", po::value<string>(&donor_selection), "donor selection")
141  ("acceptor", po::value<string>(&acceptor_selection), "acceptor selection")
142  ("model", po::value<string>(&model_name), "model")
143  ("trajs", po::value< vector<string> >(&traj_names), "Trajectories");
144  }
145 
146  void addPositional(po::positional_options_description& opts) {
147  opts.add("donor", 1);
148  opts.add("acceptor", 1);
149  opts.add("model", 1);
150  opts.add("trajs", -1);
151  }
152 
153  bool check(po::variables_map& vm)
154  {
155  return(
156  donor_selection.empty()
157  || acceptor_selection.empty()
158  || model_name.empty()
159  || traj_names.empty()
160  );
161  }
162 
163 
164  string help() const {
165  return("donor-selection acceptor-selection model traj [traj ...]");
166  }
167 
168  string print() const {
169  ostringstream oss;
170  oss << boost::format("skip=%d,stderr=%d,blow=%f,bhi=%f,angle=%f,periodic=%d,maxtime=%d,any=%d,acceptor=\"%s\",donor=\"%s\",model=\"%s\",trajs=\"%s\"")
171  % skip
172  % use_stderr
173  % length_low
174  % length_high
175  % max_angle
176  % use_periodicity
177  % maxtime
178  % any_hydrogen
179  % acceptor_selection
180  % donor_selection
181  % model_name
182  % vectorAsStringWithCommas(traj_names);
183 
184  return(oss.str());
185  }
186 
187 };
188 
189 
190 
191 
192 // @endcond
193 
194 
195 
196 
197 vecDouble average(const vecvecDouble& A) {
198  uint n = A.size();
199  uint m = A[0].size();
200 
201  vecDouble avg(m,0.0);
202  for (uint j=0; j<m; ++j)
203  for (uint i=0; i<n; ++i)
204  avg[j] += A[i][j];
205 
206  for (uint j=0; j<m; ++j)
207  avg[j] /= n;
208 
209  return(avg);
210 }
211 
212 
213 vecDouble stddev(const vecvecDouble& A, const vecDouble& avg) {
214  uint n = A.size();
215  uint m = A[0].size();
216 
217  vecDouble std(m, 0.0);
218  if (n <= 3)
219  return(std);
220 
221  for (uint j=0; j<m; ++j)
222  for (uint i=0; i<n; ++i) {
223  double d = A[i][j] - avg[j];
224  std[j] += d*d;
225  }
226 
227  for (uint j=0; j<m; ++j)
228  std[j] = sqrt(std[j]/(n-1));
229 
230  return(std);
231 }
232 
233 
234 
235 uint findMinSize(const AtomicGroup& model, const vString& names) {
236  uint n = numeric_limits<uint>::max();
237 
238  for (vString::const_iterator i = names.begin(); i != names.end(); ++i) {
239  pTraj traj = createTrajectory(*i, model);
240  if (traj->nframes() < n)
241  n = traj->nframes();
242  }
243 
244  return(n);
245 }
246 
247 
248 
249 
250 int main(int argc, char *argv[]) {
251  string hdr = invocationHeader(argc, argv);
252 
253 
254  opts::BasicOptions* bopts = new opts::BasicOptions(fullHelpMessage());
255  ToolOptions* topts = new ToolOptions;
256 
258  opts.add(bopts).add(topts);
259  if (! opts.parse(argc, argv))
260  exit(-1);
261 
262 
263  AtomicGroup model = createSystem(model_name);
264 
265  SimpleAtom::innerRadius(length_low);
266  SimpleAtom::outerRadius(length_high);
267  SimpleAtom::maxDeviation(max_angle);
268 
269 
270  SAGroup donors = SimpleAtom::processSelection(donor_selection, model, use_periodicity);
271  SAGroup acceptors = SimpleAtom::processSelection(acceptor_selection, model, use_periodicity);
272 
273 
274  vecvecDouble correlations;
275  back_insert_iterator<vecvecDouble> corr_appender(correlations);
276 
277 
278  if (maxtime == 0)
279  maxtime = findMinSize(model, traj_names) / 2;
280 
281  cerr << boost::format("Using %d as max time for correlation.\n") % maxtime;
282 
283 
284  for (vString::const_iterator ci = traj_names.begin(); ci != traj_names.end(); ++ci) {
285  cerr << "Processing " << *ci << endl;
286  pTraj traj = createTrajectory(*ci, model);
287 
288  for (SAGroup::iterator j = donors.begin(); j != donors.end(); ++j) {
289  if (skip > 0)
290  traj->readFrame(skip-1);
291 
292  BondMatrix bonds = j->findHydrogenBondsMatrix(acceptors, traj, model);
293  if (any_hydrogen) {
295  for (uint j=0; j<bonds.rows(); ++j) {
296  double val = 0.0;
297  for (uint i=0; i<bonds.cols(); ++i)
298  if (bonds(j, i) != 0) {
299  val = 1.0;
300  break;
301  }
302  ts.push_back(val);
303  }
304  TimeSeries<double> tcorr = ts.correl(maxtime);
305  vecDouble vtmp;
306  copy(tcorr.begin(), tcorr.end(), back_inserter(vtmp));
307  correlations.push_back(vtmp);
308 
309  } else {
310  for (uint i=0; i<bonds.cols(); ++i) {
311  bool found = false;
312  for (uint j=0; j<bonds.rows(); ++j)
313  if (bonds(j, i) != 0) {
314  found = true;
315  break;
316  }
317 
318  if (found) {
320  for (uint j=0; j<bonds.rows(); ++j)
321  ts.push_back(bonds(j, i));
322  TimeSeries<double> tcorr = ts.correl(maxtime);
323  vecDouble vtmp;
324  copy(tcorr.begin(), tcorr.end(), back_inserter(vtmp));
325  correlations.push_back(vtmp);
326  }
327 
328  }
329 
330  }
331 
332  }
333 
334  }
335 
336 
337 
338  cerr << boost::format("Found %d time-correlations.\n") % correlations.size();
339 
340  vecDouble avg = average(correlations);
341  vecDouble std = stddev(correlations, avg);
342  double scaling = 1.0;
343  if (use_stderr)
344  scaling = sqrt(correlations.size());
345 
346  cout << "# " << hdr << endl;
347  cout << "# Found " << correlations.size() << " time-correlations." << endl;
348  cout << "# Using " << ( use_stderr ? "stderr" : "stddev") << endl;
349 
350  for (uint j=0; j<avg.size(); ++j)
351  cout << j << "\t" << avg[j] << "\t" << (std[j] / scaling) << endl;
352 
353 }
354 
Simple matrix template class using policy classes to determine behavior.
Definition: MatrixImpl.hpp:53
Namespace for encapsulating options processing.
STL namespace.
bool parse(int argc, char *argv[])
Parses a command line, returning true if parsing was ok.
Options common to all tools (including –fullhelp)
AtomicGroup createSystem(const std::string &filename)
Factory function for reading in structure files.
Definition: sfactories.cpp:115
std::string invocationHeader(int argc, char *argv[])
Create an invocation header.
Definition: utils.cpp:124
Time Series Class.
Definition: TimeSeries.hpp:51
Class for handling groups of Atoms (pAtoms, actually)
Definition: AtomicGroup.hpp:87
Namespace for most things not already encapsulated within a class.
std::string vectorAsStringWithCommas(const std::vector< std::string > &v)
Specialization for strings that sanitizes the contained strings.
Definition: utils.cpp:415
Combines a set of OptionsPackages.
AggregateOptions & add(OptionsPackage *pack)
Add a pointer to an OptionsPackage that will be used for options.