LOOS  v2.3.2
decorr_time.cpp
1 /*
2  Structural Histogram IID analysis a la Lyman & Zuckerman J Phys Chem
3  B (2007) 111:12876-12882
4 */
5 
6 
7 /*
8 
9  This file is part of LOOS.
10 
11  LOOS (Lightweight Object-Oriented Structure library)
12  Copyright (c) 2010, Tod D. Romo
13  Department of Biochemistry and Biophysics
14  School of Medicine & Dentistry, University of Rochester
15 
16  This package (LOOS) is free software: you can redistribute it and/or modify
17  it under the terms of the GNU General Public License as published by
18  the Free Software Foundation under version 3 of the License.
19 
20  This package is distributed in the hope that it will be useful,
21  but WITHOUT ANY WARRANTY; without even the implied warranty of
22  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23  GNU General Public License for more details.
24 
25  You should have received a copy of the GNU General Public License
26  along with this program. If not, see <http://www.gnu.org/licenses/>.
27 */
28 
29 
30 
31 #include <loos.hpp>
32 
33 #include "ConvergenceOptions.hpp"
34 #include "fid-lib.hpp"
35 
36 using namespace std;
37 using namespace loos;
38 
39 
40 namespace opts = loos::OptionsFramework;
41 namespace po = loos::OptionsFramework::po;
42 
43 // * GLOBALS *
44 
45 
46 // Turning on generates some internal debugging information
47 const bool debugging = false;
48 uint verbosity;
49 
50 uint nreps = 5;
51 double frac;
52 
53 vecUint trange;
54 vecUint nrange;
55 vecUint indices;
56 
57 // @cond TOOLS_INTERNAL
58 
59 string fullHelpMessage(void) {
60  string msg =
61  "\n"
62  "SYNOPSIS\n"
63  "\tCompute decorrelation times for a simulation\n"
64  "\n"
65  "DESCRIPTION\n"
66  "\n"
67  "\tThis tool implements the decorrelation time method described in Lyman and Zuckerman,\n"
68  "J Phys Chem B (2007) 111:12876-12882. Bins for the structural histogram used are selected\n"
69  "using a uniform probability, set with the --frac option (the default is 0.05 for 20 bins).\n"
70  "The range of sample sizes (n, in figure 2) is given by the --nrange option, which takes\n"
71  "either a comma-separated list of sizes or a matlab/octave-style range. Finally, the\n"
72  "required t-range is also a matlab/octave-style range (or comma-separated list) that\n"
73  "gives the sample step-size (t in figure 2). This is not to be confused with a range in\n"
74  "frames of the trajectory. However, the notion of \"time\" is dictated by the sampling\n"
75  "rate of your trajectory, and is specified in terms of frames. For example, if your\n"
76  "trajectory has 1 frame/ns, then the t-range is specified in ns. If your trajectory\n"
77  "has one frame every 100 ps, then the t-range is specified in 100 ps units (i.e. frames).\n"
78  "This whole procedure is repeated multiple times for each sample size, n. The number of\n"
79  "repeats is given by the --reps option (default of 5).\n"
80  "\tThe output is an ASCII matrix where the first column is the step-size t. Each subsequent\n"
81  "set of two-columns corresponds to a different sample size or n-value. The first column\n"
82  "in each set is the scaled variance (eq 3), averaged over each replica. The second column\n"
83  "is the standard error in the scaled variance. This data can be plotted, e.g. figure 3.\n"
84  "\n"
85  "EXAMPLES\n"
86  "\n"
87  "\tdecorr_time --selection '!hydrogen' model.pdb simulation.dcd 5:5:100 >decorr.asc\n"
88  "A decorrelation time calculation using the default sample sizes of 2, 4, and 10 and\n"
89  "the default bin-size of 20 (probability 0.05). The calculation is repeated the default\n"
90  "of 5 times for each sample-size. Only non-hydrogen atoms are used. And the range in t\n"
91  "is 5 to 100 at every 5 frames (assuming a 1 frame/ns trajectory, then 5 to 100 ns every 5 ns).\n"
92  "\n"
93  "\tdecorr_time --selection 'name == \"CA\"' --nrange 2,3,4 --frac 0.1 \\\n"
94  "\t model.pdb simulation.dcd 10:10:250 >decorr.asc\n"
95  "Here, only alpha-carbons are used. Sample sizes are 2, 3, and 4 and there are 10 bins. The\n"
96  "t-range here is 10 to 250 at every 10 frames.\n"
97  "\n"
98  "NOTES\n"
99  "\tSimulations that are not well-converge may have difficulty with the default sample-size\n"
100  "range. Try a smaller range (i.e. 2,3,4) as well as large bins.\n"
101  "SEE ALSO\n"
102  "\tufidpick, effsize.pl, neff, assign_frames, hierarchy\n";
103 
104  return(msg);
105 }
106 
107 
108 
109 
110 
111 class ToolOptions : public opts::OptionsPackage {
112 public:
113 
114  void addGeneric(po::options_description& o) {
115  o.add_options()
116  ("nrange", po::value<string>(&nrange_spec)->default_value("2,4,10"), "Range of N to use")
117  ("frac", po::value<double>(&frac)->default_value(0.05), "Bin fraction")
118  ("reps", po::value<uint>(&nreps)->default_value(5), "# of repetitions to use for each N");
119  }
120 
121  bool postConditions(po::variables_map& vm) {
122  nrange = parseRangeList<uint>(nrange_spec);
123  return(true);
124  }
125 
126  string print() const {
127  ostringstream oss;
128  oss << boost::format("nrange='%s', frac=%f, reps=%f")
129  % nrange_spec
130  % frac
131  % nreps;
132  return(oss.str());
133  }
134 
135  string nrange_spec;
136 };
137 
138 
139 // @endcond
140 
141 
142 
143 
144 
145 // Despite the name, this histograms the assignments (i.e. frequency
146 // of the various fiducials in the trajectory)
147 
148 vecDouble rebinFrames(const vecUint& assignments, const uint S, const vecUint& ensemble) {
149  vecUint hist(S, 0);
150 
151  for (vecUint::const_iterator i = ensemble.begin(); i != ensemble.end(); ++i) {
152  if (assignments[*i] >= S)
153  throw(runtime_error("Bin index exceeds number of fiducials in rebinFrames()"));
154 
155  ++hist[assignments[*i]];
156  }
157 
158  vecDouble pops(S);
159  for (uint i=0; i<S; ++i)
160  pops[i] = static_cast<double>(hist[i])/ensemble.size();
161 
162  return(pops);
163 }
164 
165 
166 
167 
168 vecDouble binVariances(const vecUint& assignments, const uint S, const uint n, const uint t) {
169  vector<vecDouble> fik;
170 
171  uint curframe = 0;
172  if (debugging)
173  cerr << boost::format("Debug> S=%d, n=%d, t=%d\n") % S % n % t;
174  while (curframe < assignments.size()) {
175  vecUint ensemble;
176  for (uint i=0; i<n && curframe < assignments.size(); ++i, curframe += t)
177  ensemble.push_back(curframe);
178 
179  // Incomplete sample, so ignore...
180  if (ensemble.size() < n)
181  break;
182 
183  vecDouble hist = rebinFrames(assignments, S, ensemble);
184  fik.push_back(hist);
185  }
186 
187 
188  if (debugging) {
189  cerr << "Debug> populations=\n";
190  for (uint i=0;i<fik.size(); ++i) {
191  cerr << "\t" << i << " : ";
192  copy(fik[i].begin(), fik[i].end(), ostream_iterator<double>(cerr, ","));
193  cerr << "\n";
194  }
195  cerr << endl;
196  }
197 
198  vecDouble means(S, 0.0);
199  for (vector<vecDouble>::iterator j = fik.begin(); j != fik.end(); ++j)
200  for (uint i=0; i<S; ++i)
201  means[i] += (*j)[i];
202  for (uint i=0; i<S; ++i)
203  means[i] /= fik.size();
204 
205  if (debugging) {
206  cerr << "Debug> means=";
207  copy(means.begin(), means.end(), ostream_iterator<double>(cerr, ","));
208  cerr << endl;
209  }
210 
211  vecDouble vars(S, 0.0);
212 
213  for (vector<vecDouble>::iterator j = fik.begin(); j != fik.end(); ++j)
214  for (uint i=0; i<S; ++i) {
215  double d = (*j)[i] - means[i];
216  vars[i] += d*d;
217  }
218 
219  for (vecDouble::iterator i = vars.begin(); i != vars.end(); ++i)
220  *i /= fik.size();
221 
222  if (debugging) {
223  cerr << boost::format("Debug> chunks = %d\n") % fik.size();
224  cerr << "Debug> vars=";
225  copy(vars.begin(), vars.end(), ostream_iterator<double>(cerr, ","));
226  cerr << endl;
227  }
228 
229  return(vars);
230 }
231 
232 
233 double avgVariance(const vecDouble& vars) {
234  double mean = 0.0;
235 
236  for (vecDouble::const_iterator i = vars.begin(); i != vars.end(); ++i)
237  mean += *i;
238  mean /= vars.size();
239 
240  return(mean);
241 }
242 
243 
244 double sigma(const vecUint& assignments, const uint S, const uint n, const uint t) {
245 
246  vecDouble vars = binVariances(assignments, S, n, t);
247  double mean_vars = avgVariance(vars);
248 
249  double f = 1.0 / S;
250  double N = static_cast<double>(assignments.size()) / t;
251  double expected = (f*(1.0-f) / n) * (N-n)/(N-1.0);
252 
253  double val = mean_vars / expected;
254 
255  if (debugging)
256  cerr << boost::format("Debug> f=%f, N=%f, expected=%f, val=%f\n\n") % f % N % expected % val;
257  return(val);
258 }
259 
260 
261 DoubleMatrix statistics(const vector<DoubleMatrix>& V) {
262  uint m = V[0].rows();
263  uint n = V[0].cols();
264 
265  DoubleMatrix M(m, n);
266  for (uint k=0; k<V.size(); ++k)
267  for (ulong i=0; i<m*n; ++i)
268  M[i] += V[k][i];
269  for (long i=0; i<m*n; ++i)
270  M[i] /= V.size();
271 
272  DoubleMatrix S(m, n);
273  if (V.size() > 2) {
274  for (uint k=0; k<V.size(); ++k)
275  for (ulong i=0; i<m*n; ++i) {
276  double d = V[k][i] - M[i];
277  S[i] = d*d;
278  }
279  for (ulong i=0; i<m*n; ++i)
280  S[i] = sqrt(S[i] / (V.size() - 1));
281  }
282 
283  uint nn = (n-1) * 2 + 1;
284  DoubleMatrix R(m, nn);
285  for (uint j=0; j<m; ++j)
286  for (uint i=0; i<n; ++i)
287  if (i == 0)
288  R(j, i) = M(j, i);
289  else {
290  R(j, 2*(i-1)+1) = M(j, i);
291  R(j, 2*(i-1)+2) = S(j, i);
292  }
293 
294  return(R);
295 }
296 
297 
298 
299 int main(int argc, char *argv[]) {
300 
301  string hdr = invocationHeader(argc, argv);
302  opts::BasicOptions *bopts = new opts::BasicOptions(fullHelpMessage());
306  ToolOptions* topts = new ToolOptions;
307  opts::RequiredArguments* ropts = new opts::RequiredArguments("trange", "T-range");
308 
309  opts::AggregateOptions options;
310  options.add(bopts).add(sopts).add(tropts).add(copts).add(topts).add(ropts);
311 
312  if (!options.parse(argc, argv))
313  exit(-1);
314 
315  verbosity = bopts->verbosity;
316 
317  cout << "# " << hdr << endl;
318  cout << "# " << vectorAsStringWithCommas<string>(options.print()) << endl;
319 
320 
321  AtomicGroup model = tropts->model;
322  pTraj traj = tropts->trajectory;
323  AtomicGroup subset = selectAtoms(model, sopts->selection);
324 
325  string trange_spec = ropts->value("trange");
326  trange = parseRangeList<uint>(trange_spec);
327 
328  indices = assignTrajectoryFrames(traj, tropts->frame_index_spec, tropts->skip);
329 
330  vector<DoubleMatrix> results;
331  for (uint k = 0; k<nreps; ++k) {
332  if (verbosity > 0)
333  cerr << "Replica #" << k << endl;
334 
335  boost::tuple<vecGroup, vecUint> fids = pickFiducials(subset, traj, indices, frac);
336  vecGroup fiducials = boost::get<0>(fids);
337  vecUint assignments = assignStructures(subset, traj, indices, fiducials);
338  uint S = fiducials.size();
339 
340  DoubleMatrix M(trange.size(), nrange.size() + 1);
341  for (uint i=0; i<nrange.size(); ++i)
342  for (uint j=0; j<trange.size(); ++j)
343  M(j, i+1) = sigma(assignments, S, nrange[i], trange[j]);
344 
345  for (uint j=0; j<trange.size(); ++j)
346  M(j, 0) = trange[j];
347 
348  results.push_back(M);
349  }
350 
351  DoubleMatrix M = statistics(results);
352  writeAsciiMatrix(cout, M, "");
353 }
354 
Trajectory with either a –range or –skip.
Provides simple way to add command-line arguments (required options)
Simple matrix template class using policy classes to determine behavior.
Definition: MatrixImpl.hpp:53
Namespace for encapsulating options processing.
Provides a single LOOS selection (–selection)
STL namespace.
bool parse(int argc, char *argv[])
Parses a command line, returning true if parsing was ok.
AtomicGroup model
Model that describes the trajectory.
Options common to all tools (including –fullhelp)
std::string invocationHeader(int argc, char *argv[])
Create an invocation header.
Definition: utils.cpp:124
std::string value(const std::string &s) const
Retrieve the value for an argument.
std::ostream & writeAsciiMatrix(std::ostream &os, const Math::Matrix< T, P, S > &M, const std::string &meta, const Math::Range &start, const Math::Range &end, const bool trans=false, F fmt=F())
Write a submatrix to a stream.
std::vector< std::string > print() const
AtomicGroup selectAtoms(const AtomicGroup &source, const std::string selection)
Applies a string-based selection to an atomic group...
Definition: utils.cpp:195
Class for handling groups of Atoms (pAtoms, actually)
Definition: AtomicGroup.hpp:87
Namespace for most things not already encapsulated within a class.
std::vector< uint > assignTrajectoryFrames(const pTraj &traj, const std::string &frame_index_spec, uint skip, uint stride)
Builds a list of trajectory indices (frame_index_spec supercedes skip)
Combines a set of OptionsPackages.
AggregateOptions & add(OptionsPackage *pack)
Add a pointer to an OptionsPackage that will be used for options.