LOOS  v2.3.2
bcom.cpp
1 /*
2 
3  Perform a block-overlap in comparison to a full PCA
4 
5 */
6 
7 
8 
9 /*
10 
11  This file is part of LOOS.
12 
13  LOOS (Lightweight Object-Oriented Structure library)
14  Copyright (c) 2009, Tod D. Romo
15  Department of Biochemistry and Biophysics
16  School of Medicine & Dentistry, University of Rochester
17 
18  This package (LOOS) is free software: you can redistribute it and/or modify
19  it under the terms of the GNU General Public License as published by
20  the Free Software Foundation under version 3 of the License.
21 
22  This package is distributed in the hope that it will be useful,
23  but WITHOUT ANY WARRANTY; without even the implied warranty of
24  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25  GNU General Public License for more details.
26 
27  You should have received a copy of the GNU General Public License
28  along with this program. If not, see <http://www.gnu.org/licenses/>.
29 */
30 
31 
32 
33 #include <loos.hpp>
34 
35 #include "ConvergenceOptions.hpp"
36 #include "bcomlib.hpp"
37 
38 
39 using namespace std;
40 using namespace loos;
41 using namespace Convergence;
42 
43 namespace opts = loos::OptionsFramework;
44 namespace po = loos::OptionsFramework::po;
45 
46 
47 typedef vector<AtomicGroup> vGroup;
48 typedef boost::tuple<RealMatrix, RealMatrix, RealMatrix> SVDResult;
49 
50 
51 
52 
53 
54 // Configuration
55 
56 const bool length_normalize = true;
57 uint nsteps = 25;
58 
59 
60 
61 // Global options
62 bool local_average;
63 bool use_zscore;
64 uint ntries;
65 vector<uint> blocksizes;
66 uint seed;
67 string gold_standard_trajectory_name;
68 
69 string fullHelpMessage() {
70 
71  string s =
72  "\n"
73  "SYNOPSIS\n"
74  "\n"
75  "Perform a block-overlap in comparison to a full PCA\n"
76  "\n"
77  "DESCRIPTION\n"
78  "\n"
79  "This tool reports on how well a small \"block\" of a trajectory samples\n"
80  "the subspace explored by the full simulation using principal component\n"
81  "analysis. It does this by computing the covariance overlap between a\n"
82  "full simulation PCA and the PCA of increasingly longer contiguous \n"
83  "blocks of that trajectory.\n "
84  "\n"
85  "See: Romo and Grossfield, J. Chem. Theor. Comput., 2011, 7, 2464-2472\n"
86  "\n"
87  "The output is a tab separated stream:\n"
88  "n\tCoverlap\tVariance\tN_blocks\n"
89  "\n"
90  "\t n - current block size (nanoseconds)\n"
91  "\tCoverlap - covariance overlap between block and full PCA\n"
92  "\tVariance - variance in coverlap across all (N_blocks)\n"
93  "\tN_blocks - number of blocks of a given length\n"
94  "\n"
95  "USAGE NOTES\n"
96  "The --skip command is NOT used by this tool.\n"
97  "\n"
98  //
99  "EXAMPLES\n"
100  "bcom -s 'name==\"CA\"' --blocks 25:25:500 model.pdb traj.dcd > bcom_output\n"
101  "\tCalculate the bcom of traj.dcd using a PCA of CA atoms. This\n"
102  "\tis done for blocks in a range of 25 ns to 500 ns, with 25 ns\n"
103  "\tintervals. The result is written to the file bcom_output\n"
104  "\n"
105  "bcom -Z1 -s 'name==\"CA\"' --blocks 25:25:500 model.pdb traj.dcd > bcom_output\n"
106  "\tSame as the example above, but outputs the block-averaged \n"
107  "\tZ-score in the place of the block-averaged coverlap.\n"
108  "\n"
109  "bcom -s 'name==\"CA\"' --gold 'combined.dcd' model.pdb traj.dcd > bcom_output\n"
110  "\tHere we make two changes. First don't specify block sizes\n"
111  "\tThis tells bcom to figure it out on its own. In this case\n"
112  "\tthe tool will run a max block size equal to half the trajectory.\n"
113  "\tNext, we compare our block-averaged PCA results to a separate\n"
114  "\ttrajectory called combined.dcd instead of the PCA of the full\n"
115  "\ttraj.dcd. As the name implies, combined.dcd may be a concatonation\n"
116  "\tof several trajectories. \n"
117  "\t\tTo make such a concatoned trajectory see the tools\n"
118  "\t\tmerge-traj and subsetter.\n"
119  "\n"
120  "SEE ALSO\n"
121  "\n"
122  " Packages/Convergence/boot_bcom - \n"
123  "\tThis tool performs a similar analysis, but pulls random frames\n"
124  "\tfrom the trajectory rather than contiguous ones.\n"
125  "\t\n"
126  "* Visualization Notes *\n"
127  "\tThe output should be plotted in the format X:Y:SQRT(Y-error)\n"
128  "\twhere the colons separate the 1st 3 columns of the output.\n"
129  "\tThis puts stdev error bars on the result\n"
130  "\tIn GNUplot this would look like the following:\n"
131  "\t plot 'bcom_output' using 1:2:(sqrt(\\$3)) with errorlines\n"
132  "\n"
133  "\n";
134 
135  return(s);
136 }
137 
138 
139 // @cond TOOLS_INTERAL
140 class ToolOptions : public opts::OptionsPackage {
141 public:
142 
143  void addGeneric(po::options_description& o) {
144  o.add_options()
145  ("blocks", po::value<string>(&blocks_spec), "Block sizes (MATLAB style range)")
146  ("steps", po::value<uint>(&nsteps)->default_value(25), "Max number of blocks for auto-ranging")
147  ("zscore,Z", po::value<bool>(&use_zscore)->default_value(false), "Use Z-score rather than covariance overlap")
148  ("ntries,N", po::value<uint>(&ntries)->default_value(20), "Number of tries for Z-score")
149  ("local", po::value<bool>(&local_average)->default_value(true), "Use local avg in block PCA rather than global")
150  ("gold", po::value<string>(&gold_standard_trajectory_name)->default_value(""), "Use this trajectory for the gold-standard instead");
151 
152  }
153 
154  bool postConditions(po::variables_map& vm) {
155  if (!blocks_spec.empty())
156  blocksizes = parseRangeList<uint>(blocks_spec);
157 
158  return(true);
159  }
160 
161 
162  string print() const {
163  ostringstream oss;
164  oss << boost::format("blocks='%s', zscore=%d, ntries=%d, local=%d, gold='%s'")
165  % blocks_spec
166  % use_zscore
167  % ntries
168  % local_average
169  % gold_standard_trajectory_name;
170  return(oss.str());
171  }
172 
173  string blocks_spec;
174 };
175 
176 // Convenience structure for aggregating results
177 struct Datum {
178  Datum(const double avg, const double var, const uint nblks) : avg_coverlap(avg),
179  var_coverlap(var),
180  nblocks(nblks) { }
181 
182 
183  double avg_coverlap;
184  double var_coverlap;
185  uint nblocks;
186 };
187 // @endcond
188 
189 
190 
191 
192 
193 
194 vGroup subgroup(const vGroup& A, const uint a, const uint b) {
195  vGroup B;
196 
197  for (uint i=a; i<b; ++i)
198  B.push_back(A[i]);
199 
200  return(B);
201 }
202 
203 
204 // Breaks the ensemble up into blocks and computes the PCA for each
205 // block and the statistics for the covariance overlaps...
206 
207 template<class ExtractPolicy>
208 Datum blocker(const RealMatrix& Ua, const RealMatrix sa, vGroup& ensemble, const uint blocksize, ExtractPolicy& policy) {
209 
210 
211  TimeSeries<double> coverlaps;
212 
213  for (uint i=0; i<ensemble.size() - blocksize; i += blocksize) {
214  vGroup subset = subgroup(ensemble, i, i+blocksize);
215  boost::tuple<RealMatrix, RealMatrix> pca_result = pca(subset, policy);
216  RealMatrix s = boost::get<0>(pca_result);
217  RealMatrix U = boost::get<1>(pca_result);
218 
219  // Scale the singular values by block-size
220  if (length_normalize)
221  for (uint j=0; j<s.rows(); ++j)
222  s[j] /= blocksize;
223 
224  double val;
225  if (use_zscore) {
226  boost::tuple<double, double, double> result = zCovarianceOverlap(sa, Ua, s, U, ntries);
227  val = boost::get<0>(result);
228  } else
229  val = covarianceOverlap(sa, Ua, s, U);
230 
231  coverlaps.push_back(val);
232  }
233 
234  return( Datum(coverlaps.average(), coverlaps.variance(), coverlaps.size()) );
235 }
236 
237 
238 
239 
240 int main(int argc, char *argv[]) {
241  string hdr = invocationHeader(argc, argv);
242 
243  opts::BasicOptions* bopts = new opts::BasicOptions(fullHelpMessage());
247  ToolOptions* topts = new ToolOptions;
248 
249  opts::AggregateOptions options;
250  options.add(bopts).add(sopts).add(tropts).add(copts).add(topts);
251  if (!options.parse(argc, argv))
252  exit(-1);
253 
254  cout << "# " << hdr << endl;
255  cout << "# " << vectorAsStringWithCommas<string>(options.print()) << endl;
256 
257  AtomicGroup model = tropts->model;
258  pTraj traj = tropts->trajectory;
259  if (tropts->skip)
260  cerr << "Warning: --skip option ignored\n";
261 
262 
263  if (blocksizes.empty()) {
264  uint n = traj->nframes();
265  uint half = n / 2;
266  uint step = half / nsteps;
267  if (step < 1)
268  step = 1;
269  cout << "# Auto block-sizes - " << step << ":" << step << ":" << half << endl;
270 
271  for (uint i = step; i <= half; i += step)
272  blocksizes.push_back(i);
273  }
274 
275 
276  AtomicGroup subset = selectAtoms(model, sopts->selection);
277 
278  vector<AtomicGroup> ensemble;
279  readTrajectory(ensemble, subset, traj);
280 
281  // First, align the input trajectory...
282  boost::tuple<std::vector<XForm>, greal, int> ares = iterativeAlignment(ensemble);
283  cout << "# Alignment converged to " << boost::get<1>(ares) << " in " << boost::get<2>(ares) << " iterations\n";
284  cout << "# n\t" << (use_zscore ? "Z-score" : "Coverlap") << "\tVariance\tN_blocks\n";
285 
286  // Handle the gold-standard, either using the whole input traj, or the alternate traj...
287  NoAlignPolicy policy;
288  RealMatrix Us;
289  RealMatrix UA;
290 
291  if (gold_standard_trajectory_name.empty()) {
292  AtomicGroup avg = averageStructure(ensemble);
293  policy = NoAlignPolicy(avg, local_average);
294  boost::tuple<RealMatrix, RealMatrix> res = pca(ensemble, policy);
295 
296  Us = boost::get<0>(res);
297  UA = boost::get<1>(res);
298 
299  if (length_normalize)
300  for (uint i=0; i<Us.rows(); ++i)
301  Us[i] /= traj->nframes();
302  } else {
303  // Must read in another trajectory, process it, and get the PCA
304  pTraj gold = createTrajectory(gold_standard_trajectory_name, model);
305  vector<AtomicGroup> gold_ensemble;
306  readTrajectory(gold_ensemble, subset, gold);
307  boost::tuple<vector<XForm>, greal, int> bres = iterativeAlignment(gold_ensemble);
308  cout << "# Gold Alignment converged to " << boost::get<1>(bres) << " in " << boost::get<2>(bres) << " iterations\n";
309 
310  AtomicGroup avg = averageStructure(gold_ensemble);
311  policy = NoAlignPolicy(avg, local_average);
312  boost::tuple<RealMatrix, RealMatrix> res = pca(gold_ensemble, policy);
313 
314  Us = boost::get<0>(res);
315  UA = boost::get<1>(res);
316 
317 
318  if (length_normalize)
319  for (uint i=0; i<Us.rows(); ++i)
320  Us[i] /= gold->nframes();
321  }
322 
323 
324 
325  // Now iterate over all requested block sizes
326 
327  // Provide user-feedback since this can be a slow computation
328  PercentProgress watcher;
330  slayer.attach(&watcher);
331  slayer.start();
332 
333  for (vector<uint>::iterator i = blocksizes.begin(); i != blocksizes.end(); ++i) {
334  Datum result = blocker(UA, Us, ensemble, *i, policy);
335  cout << *i << "\t" << result.avg_coverlap << "\t" << result.var_coverlap << "\t" << result.nblocks << endl;
336  slayer.update();
337  }
338 
339  slayer.finish();
340 
341 }
pTraj trajectory
The trajectory, primed by the –skip value (if specified)
The progress counter front-end.
def averageStructure(traj)
Python version of averageStructure (using loos.pyloos.Trajectory-like objects) The subset defined in ...
Definition: ensembles.py:14
Provide feedback by percent-complete.
Simple matrix template class using policy classes to determine behavior.
Definition: MatrixImpl.hpp:53
Namespace for encapsulating options processing.
def covarianceOverlap(ls, lU, rs, rU, nmodes=0)
Definition: subspace.py:36
Provides a single LOOS selection (–selection)
def iterativeAlignment(ensemble, threshold=1e-8, maxiter=1000)
Iteratively align a loos.pyloos.Trajectory object (or a list of AtomicGroups)
Definition: alignment.py:12
STL namespace.
AtomicGroup model
Model that describes the trajectory.
bool parse(int argc, char *argv[])
Parses a command line, returning true if parsing was ok.
Options common to all tools (including –fullhelp)
std::string invocationHeader(int argc, char *argv[])
Create an invocation header.
Definition: utils.cpp:124
Basic trajectory with a –skip option.
std::vector< std::string > print() const
AtomicGroup selectAtoms(const AtomicGroup &source, const std::string selection)
Applies a string-based selection to an atomic group...
Definition: utils.cpp:195
Time Series Class.
Definition: TimeSeries.hpp:51
Class for handling groups of Atoms (pAtoms, actually)
Definition: AtomicGroup.hpp:87
T average(void) const
Return average of time series.
Definition: TimeSeries.hpp:363
Namespace for most things not already encapsulated within a class.
A progress counter that can estimate how much time is left.
Trigger whenever at least frac percent more iterations have happened.
T variance(void) const
Return variance of time series.
Definition: TimeSeries.hpp:373
Combines a set of OptionsPackages.
AggregateOptions & add(OptionsPackage *pack)
Add a pointer to an OptionsPackage that will be used for options.