LOOS  v2.3.2
boot_bcom.cpp
1 
2 /*
3  Perform a bootstrap analysis of a trajectory
4 */
5 
6 
7 /*
8 
9  This file is part of LOOS.
10 
11  LOOS (Lightweight Object-Oriented Structure library)
12  Copyright (c) 2009, Tod D. Romo
13  Department of Biochemistry and Biophysics
14  School of Medicine & Dentistry, University of Rochester
15 
16  This package (LOOS) is free software: you can redistribute it and/or modify
17  it under the terms of the GNU General Public License as published by
18  the Free Software Foundation under version 3 of the License.
19 
20  This package is distributed in the hope that it will be useful,
21  but WITHOUT ANY WARRANTY; without even the implied warranty of
22  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23  GNU General Public License for more details.
24 
25  You should have received a copy of the GNU General Public License
26  along with this program. If not, see <http://www.gnu.org/licenses/>.
27 */
28 
29 
30 
31 #include <loos.hpp>
32 #include "ConvergenceOptions.hpp"
33 #include "bcomlib.hpp"
34 
35 using namespace std;
36 using namespace loos;
37 using namespace Convergence;
38 
39 namespace opts = loos::OptionsFramework;
40 namespace po = boost::program_options;
41 
42 
43 const bool debug = false;
44 
45 
46 typedef vector<AtomicGroup> vGroup;
47 typedef boost::tuple<RealMatrix, RealMatrix, RealMatrix> SVDResult;
48 
49 
50 // Configuration
51 
52 const bool length_normalize = true;
53 uint nsteps = 25;
54 
55 
56 // Global options
57 vector<uint> blocksizes;
58 bool local_average;
59 uint nreps;
60 string gold_standard_trajectory_name;
61 
62 
63 string fullHelpMessage() {
64 
65  string s =
66  "\n"
67  "SYNOPSIS\n"
68  "\n"
69  "Perform a bootstrapped block-overlap comparison to a full PCA\n"
70  "\n"
71  "DESCRIPTION\n"
72  "\n"
73  "This tool reports on how well a small \"block\" of a trajectory samples\n"
74  "the subspace explored by the full simulation using principal component\n"
75  "analysis. Similar to bcom it does this by computing the covariance\n"
76  "overlap between a full simulation PCA and the PCA of increasingly \n"
77  "longer \"blocks\". The DIFFERENCE is that the blocks in this version\n"
78  "are not contiguous, but rather pulled randomly from the trajectory.\n"
79  "\n"
80  "Where bcom shows how well a short subset of a trajectory samples the\n"
81  "conformational subspace present in the full simulation; boot_bcom shows\n"
82  "how well a given number of random frames sample the full subspace \n"
83  "explored in the simulation. This bootstrap analysis can then be\n"
84  "compared to the bcom result.\n"
85  "\n"
86  "See: Romo and Grossfield, J. Chem. Theor. Comput., 2011, 7, 2464-2472\n"
87  "\t Specifically Figs 4, 6, and 9 for comparison to bcom results.\n"
88  "\n"
89  "\n"
90  "The output is a tab separated stream:\n"
91  "n\tCoverlap\tVariance\tN_blocks\n"
92  "\n"
93  "\t n - current block size (nanoseconds)\n"
94  "\tCoverlap - covariance overlap between block and full PCA\n"
95  "\tVariance - variance in coverlap across all (N_blocks)\n"
96  "\tN_blocks - number of blocks of a given length\n"
97  "\t Note that this number is constant unlike\n"
98  "\t the output of bcom\n"
99  "\n"
100  "USAGE NOTES\n"
101  "The --skip command is NOT used by this tool.\n"
102  "\n"
103  //
104  "EXAMPLES\n"
105  "\n"
106  "boot_bcom -s 'name==\"CA\"' --blocks 25:25:500 model.pdb traj.dcd\n"
107  "\tCalculate the bootstrapped bcom of traj.dcd using a PCA of CA\n"
108  "\tatoms. This is done for \"blocks\" in a range of 25 ns to 500 ns,\n"
109  "\twith 25 ns intervals. However, the blocks are NOT contiguous, but\n"
110  "\tare pulled randomly from the trajectory.\n"
111  "\n"
112  "boot_bcom -s 'name==\"CA\"' --gold 'combined.dcd' model.pdb traj.dcd\n"
113  "\tHere we make two changes. First don't specify block sizes\n"
114  "\tThis tells bcom to figure it out on its own. In this case\n"
115  "\tthe tool will run through block sizes from ... to the full\n"
116  "\tlength trajectory. Next, we compare our block-averaged PCA\n"
117  "\tresults to a separate trajectory called combined.dcd instead\n"
118  "\tof the PCA of the full traj.dcd. As the name implies, \n"
119  "\tcombined.dcd may be a concatonation of several trajectories.\n"
120  "\t\tTo make such a concatoned trajectory see the tools\n"
121  "\t\tmerge-traj and subsetter.\n"
122  "\n"
123  "boot_bcom -s 'name==\"CA\"' --reps=10 --steps=10 model.pdb traj.dcd\n"
124  "\tHere, we specify the number of reps and number of steps.\n"
125  "\tThe number of reps is how many trails or each block size\n"
126  "\twill be used in the averaging. The number of steps says\n"
127  "\tthat a maximum of 10 different block lengths will be used\n"
128  "\tin the calculation.\n"
129  "SEE ALSO\n"
130  "\n"
131  " Packages/Convergence/bcom - \n"
132  "\tThis tool performs a similar analysis, but pulls random frames\n"
133  "\tfrom the trajectory rather than contiguous ones.\n"
134  "\t\n"
135  "* Visualization Notes *\n"
136  "\tThe output should be plotted in the format X:Y:SQRT(Y-error)\n"
137  "\twhere the colons separate the 1st 3 columns of the output.\n"
138  "\tThis puts stdev error bars on the result\n"
139  "\tIn GNUplot this would look like the following:\n"
140  "\t plot 'bcom_output' using 1:2:(sqrt(\\$3)) with errorlines\n"
141  "\n"
142  "\n";
143 
144  return(s);
145 }
146 
147 
148 // @cond TOOLS_INTERAL
149 class ToolOptions : public opts::OptionsPackage {
150 public:
151 
152  void addGeneric(po::options_description& o) {
153  o.add_options()
154  ("blocks", po::value<string>(&blocks_spec), "Block sizes (MATLAB style range)")
155  ("steps", po::value<uint>(&nsteps)->default_value(25), "Max number of blocks for auto-ranging")
156  ("reps", po::value<uint>(&nreps)->default_value(20), "Number of replicates for bootstrap")
157  ("local", po::value<bool>(&local_average)->default_value(true), "Use local avg in block PCA rather than global")
158  ("gold", po::value<string>(&gold_standard_trajectory_name)->default_value(""), "Use this trajectory for the gold-standard instead");
159 
160 
161  }
162 
163  bool postConditions(po::variables_map& vm) {
164  if (!blocks_spec.empty())
165  blocksizes = parseRangeList<uint>(blocks_spec);
166 
167  return(true);
168  }
169 
170  string print() const {
171  ostringstream oss;
172  oss << boost::format("blocks='%s', local=%d, reps=%d, gold='%s'")
173  % blocks_spec
174  % local_average
175  % nreps
176  % gold_standard_trajectory_name;
177  return(oss.str());
178  }
179 
180  string blocks_spec;
181 };
182 
183 // Convenience structure for aggregating results
184 struct Datum {
185  Datum(const double avg, const double var, const uint nblks) : avg_coverlap(avg),
186  var_coverlap(var),
187  nblocks(nblks) { }
188 
189 
190  double avg_coverlap;
191  double var_coverlap;
192  uint nblocks;
193 };
194 // @endcond
195 
196 
197 // Randomly pick frames
198 vector<uint> pickFrames(const uint nframes, const uint blocksize) {
199 
200  boost::uniform_int<uint> imap(0,nframes-1);
201  boost::variate_generator< base_generator_type&, boost::uniform_int<uint> > rng(rng_singleton(), imap);
202  vector<uint> picks;
203 
204  for (uint i=0; i<blocksize; ++i)
205  picks.push_back(rng());
206 
207  return(picks);
208 }
209 
210 
211 void dumpPicks(const vector<uint>& picks) {
212  cerr << "Picks:\n";
213  for (vector<uint>::const_iterator ci = picks.begin(); ci != picks.end(); ++ci)
214  cerr << "\t" << *ci << endl;
215 }
216 
217 
218 // Extract a subgroup of the vector<AtomicGroup> given the indices in picks...
219 vGroup subgroup(const vGroup& A, const vector<uint>& picks) {
220  vGroup B;
221 
222  for (vector<uint>::const_iterator ci = picks.begin(); ci != picks.end(); ++ci)
223  B.push_back(A[*ci]);
224 
225  return(B);
226 }
227 
228 
229 
230 // Breaks the ensemble up into blocks and computes the PCA for each
231 // block and the statistics for the covariance overlaps...
232 
233 template<class ExtractPolicy>
234 Datum blocker(const RealMatrix& Ua, const RealMatrix sa, const vGroup& ensemble, const uint blocksize, uint repeats, ExtractPolicy& policy) {
235 
236 
237 
238  TimeSeries<double> coverlaps;
239 
240  for (uint i=0; i<repeats; ++i) {
241  vector<uint> picks = pickFrames(ensemble.size(), blocksize);
242 
243  if (debug) {
244  cerr << "***Block " << blocksize << ", replica " << i << ", picks " << picks.size() << endl;
245  dumpPicks(picks);
246  }
247 
248  vGroup subset = subgroup(ensemble, picks);
249  boost::tuple<RealMatrix, RealMatrix> pca_result = pca(subset, policy);
250  RealMatrix s = boost::get<0>(pca_result);
251  RealMatrix U = boost::get<1>(pca_result);
252 
253  if (length_normalize)
254  for (uint j=0; j<s.rows(); ++j)
255  s[j] /= blocksize;
256 
257  coverlaps.push_back(covarianceOverlap(sa, Ua, s, U));
258  }
259 
260  return( Datum(coverlaps.average(), coverlaps.variance(), coverlaps.size()) );
261 
262 }
263 
264 
265 
266 
267 int main(int argc, char *argv[]) {
268  string hdr = invocationHeader(argc, argv);
269 
270  opts::BasicOptions* bopts = new opts::BasicOptions(fullHelpMessage());
274  ToolOptions* topts = new ToolOptions;
275 
276  opts::AggregateOptions options;
277  options.add(bopts).add(sopts).add(tropts).add(copts).add(topts);
278  if (!options.parse(argc, argv))
279  exit(-1);
280 
281  cout << "# " << hdr << endl;
282  cout << "# " << vectorAsStringWithCommas<string>(options.print()) << endl;
283 
284  AtomicGroup model = tropts->model;
285  pTraj traj = tropts->trajectory;
286  if (tropts->skip)
287  cerr << "Warning: --skip option ignored\n";
288 
289 
290  if (blocksizes.empty()) {
291  uint n = traj->nframes();
292  uint half = n / 2;
293  uint step = half / nsteps;
294  if (step < 1)
295  step = 1;
296  cout << "# Auto block-sizes - " << step << ":" << step << ":" << half << endl;
297 
298  for (uint i = step; i <= half; i += step)
299  blocksizes.push_back(i);
300  }
301 
302 
303  AtomicGroup subset = selectAtoms(model, sopts->selection);
304 
305 
306  vector<AtomicGroup> ensemble;
307  readTrajectory(ensemble, subset, traj);
308 
309  // First, align the input trajectory...
310  boost::tuple<std::vector<XForm>, greal, int> ares = iterativeAlignment(ensemble);
311  cout << "# Alignment converged to " << boost::get<1>(ares) << " in " << boost::get<2>(ares) << " iterations\n";
312  cout << "# n\tCoverlap\tVariance\tN_blocks\n";
313 
314  // Handle the gold-standard, either using the whole input traj, or the alternate traj...
315  NoAlignPolicy policy;
316  RealMatrix Us;
317  RealMatrix UA;
318 
319  if (gold_standard_trajectory_name.empty()) {
320  AtomicGroup avg = averageStructure(ensemble);
321  policy = NoAlignPolicy(avg, local_average);
322  boost::tuple<RealMatrix, RealMatrix> res = pca(ensemble, policy);
323 
324  Us = boost::get<0>(res);
325  UA = boost::get<1>(res);
326 
327  if (length_normalize)
328  for (uint i=0; i<Us.rows(); ++i)
329  Us[i] /= traj->nframes();
330  } else {
331  // Must read in another trajectory, process it, and get the PCA
332  pTraj gold = createTrajectory(gold_standard_trajectory_name, model);
333  vector<AtomicGroup> gold_ensemble;
334  readTrajectory(gold_ensemble, subset, gold);
335  boost::tuple<vector<XForm>, greal, int> bres = iterativeAlignment(gold_ensemble);
336  cout << "# Gold Alignment converged to " << boost::get<1>(bres) << " in " << boost::get<2>(bres) << " iterations\n";
337 
338  AtomicGroup avg = averageStructure(gold_ensemble);
339  policy = NoAlignPolicy(avg, local_average);
340  boost::tuple<RealMatrix, RealMatrix> res = pca(gold_ensemble, policy);
341 
342  Us = boost::get<0>(res);
343  UA = boost::get<1>(res);
344 
345 
346  if (length_normalize)
347  for (uint i=0; i<Us.rows(); ++i)
348  Us[i] /= gold->nframes();
349  }
350 
351  // Now iterate over all requested block sizes...
352 
353  PercentProgress watcher;
355  slayer.attach(&watcher);
356  slayer.start();
357 
358 
359  for (vector<uint>::iterator i = blocksizes.begin(); i != blocksizes.end(); ++i) {
360  Datum result = blocker(UA, Us, ensemble, *i, nreps, policy);
361  cout << *i << "\t" << result.avg_coverlap << "\t" << result.var_coverlap << "\t" << result.nblocks << endl;
362  slayer.update();
363  }
364 
365  slayer.finish();
366 }
pTraj trajectory
The trajectory, primed by the –skip value (if specified)
The progress counter front-end.
def averageStructure(traj)
Python version of averageStructure (using loos.pyloos.Trajectory-like objects) The subset defined in ...
Definition: ensembles.py:14
Provide feedback by percent-complete.
Simple matrix template class using policy classes to determine behavior.
Definition: MatrixImpl.hpp:53
Namespace for encapsulating options processing.
def covarianceOverlap(ls, lU, rs, rU, nmodes=0)
Definition: subspace.py:36
Provides a single LOOS selection (–selection)
def iterativeAlignment(ensemble, threshold=1e-8, maxiter=1000)
Iteratively align a loos.pyloos.Trajectory object (or a list of AtomicGroups)
Definition: alignment.py:12
STL namespace.
AtomicGroup model
Model that describes the trajectory.
bool parse(int argc, char *argv[])
Parses a command line, returning true if parsing was ok.
Options common to all tools (including –fullhelp)
std::string invocationHeader(int argc, char *argv[])
Create an invocation header.
Definition: utils.cpp:124
Basic trajectory with a –skip option.
std::vector< std::string > print() const
AtomicGroup selectAtoms(const AtomicGroup &source, const std::string selection)
Applies a string-based selection to an atomic group...
Definition: utils.cpp:195
base_generator_type & rng_singleton(void)
Suite-wide random number generator singleton.
Time Series Class.
Definition: TimeSeries.hpp:51
Class for handling groups of Atoms (pAtoms, actually)
Definition: AtomicGroup.hpp:87
T average(void) const
Return average of time series.
Definition: TimeSeries.hpp:363
Namespace for most things not already encapsulated within a class.
A progress counter that can estimate how much time is left.
Trigger whenever at least frac percent more iterations have happened.
T variance(void) const
Return variance of time series.
Definition: TimeSeries.hpp:373
Combines a set of OptionsPackages.
AggregateOptions & add(OptionsPackage *pack)
Add a pointer to an OptionsPackage that will be used for options.