LOOS  v2.3.2
coscon.cpp
1 /*
2 
3  Cosine content for varying windows of a trajectory,
4  based on:
5  Hess, B. "Convergence of sampling in protein simulations."
6  Phys Rev E (2002) 65(3):031910
7 
8 */
9 
10 
11 
12 /*
13 
14  This file is part of LOOS.
15 
16  LOOS (Lightweight Object-Oriented Structure library)
17  Copyright (c) 2011, Tod D. Romo
18  Department of Biochemistry and Biophysics
19  School of Medicine & Dentistry, University of Rochester
20 
21  This package (LOOS) is free software: you can redistribute it and/or modify
22  it under the terms of the GNU General Public License as published by
23  the Free Software Foundation under version 3 of the License.
24 
25  This package is distributed in the hope that it will be useful,
26  but WITHOUT ANY WARRANTY; without even the implied warranty of
27  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28  GNU General Public License for more details.
29 
30  You should have received a copy of the GNU General Public License
31  along with this program. If not, see <http://www.gnu.org/licenses/>.
32 */
33 
34 
35 
36 #include <loos.hpp>
37 
38 
39 #include "bcomlib.hpp"
40 
41 using namespace std;
42 using namespace loos;
43 using namespace Convergence;
44 
45 namespace opts = loos::OptionsFramework;
46 namespace po = loos::OptionsFramework::po;
47 
48 // @cond TOOLS_INTERNAL
49 
50 typedef vector<AtomicGroup> vGroup;
51 
52 
53 
54 // Convenience structure for aggregating results
55 struct Datum {
56  Datum(const double avg, const double var, const uint nblks) : avg_cosine(avg),
57  var_cosine(var),
58  nblocks(nblks) { }
59 
60 
61  double avg_cosine;
62  double var_cosine;
63  uint nblocks;
64 };
65 
66 
67 
68 string fullHelpMessage() {
69 
70  string s =
71  "\n"
72  "SYNOPSIS\n"
73  "\n"
74  "Calculate the cosine constent of a simulation\n"
75  "\n"
76  "DESCRIPTION\n"
77  "\n"
78  "\n"
79  "Calculate the cosine content in the PCA of a simulation.\n"
80  "This is done in a block averaged manner. Smaller contiguous\n"
81  "blocks of the trajectory are used for the calcualation. This\n"
82  "tool assesses convergence of a trajectory by calulating how\n"
83  "\"cosine-like\" the principal components appear. The technique\n"
84  "was described by Hess who noted that random diffusion showed a \n"
85  "close resemblance to a cosine in:\n"
86  "\n"
87  "Hess, B. \"Convergence of sampling in protein \n"
88  " simulations.\" Phys Rev E (2002) 65(3):031910\n"
89  "\n"
90  "As the blocks increase in size more of the trajectory is used\n"
91  "in this calulation. The output is formatted in 4 columns:\n"
92  "\n"
93  "\t n - current block size (nanoseconds)\n"
94  "\tCos cont - avg cosine content in a block\n"
95  "\tVariance - variance across all (N_blocks)\n"
96  "\tN_blocks - number of blocks of a given length\n"
97  "\n"
98  "\n"
99  //
100  "EXAMPLES\n"
101  "\n"
102  "coscon -s 'name==\"CA\"' model.pdb traj.dcd\n"
103  "\tCalculate the block averaged cosine content in the\n"
104  "\ttrajectory using only CA's in the calculation.\n"
105  "\n"
106  "coscon --pc=1 -s 'name==\"CA\"' model.pdb traj.dcd\n"
107  "\tSame as above, but only compute the cosine content\n"
108  "\tof the first principal component.\n"
109  "\n"
110  "coscon --block 100:100:500 -s 'name==\"CA\"' model.pdb traj.dcd\n"
111  "\tHere we specify the range over which the calculation\n"
112  "\tis performed. The smallest block size used is 100 frames.\n"
113  "\tThe block size will then increase by 100 frames upto a max\n"
114  "\tblock size of 500 frames (ie...100,200,300,400,500).\n"
115  "\n"
116  "\n"
117  "SEE ALSO\n"
118  "\n"
119  "Packages/Convergence/qcoscon - \n"
120  "\tPerform a quick cos content analysis on a simulation.\n"
121  "\tSimilar to coscon, but only performs the analysis on\n"
122  "\tthe full length simulation.\n"
123  "\n"
124  "Packages/Convergence/rsv-coscon - \n"
125  "\tCalculate the cos content of the RSVs from a simulation\n"
126  "\tPCA.\n"
127  "\t\n"
128  "\n"
129  "Tools/svd - \n"
130  "\tCompute the principal components via the SVD.\n"
131  "\tThis results in several matrix files including\n"
132  "\tthe RSVs used as input to the current tool. \n"
133  "\tThe file [prefix]_V.asc contains the RSV matrix.\n"
134  "\n"
135  "\n";
136 
137 
138  return(s);
139 
140 }
141 
142 // @endcond
143 
144 // Configuration
145 
146 const uint nsteps = 50;
147 
148 
149 
150 // Global options
151 bool local_average;
152 vector<uint> blocksizes;
153 string model_name, traj_name, selection;
154 uint principal_component;
155 
156 
157 // @cond TOOLS_INTERAL
158 class ToolOptions : public opts::OptionsPackage {
159 public:
160 
161  void addGeneric(po::options_description& o) {
162  o.add_options()
163  ("pc", po::value<uint>(&principal_component)->default_value(0), "Which principal component to use")
164  ("blocks", po::value<string>(&blocks_spec), "Block sizes (MATLAB style range)")
165  ("local", po::value<bool>(&local_average)->default_value(true), "Use local avg in block PCA rather than global");
166 
167  }
168 
169  bool postConditions(po::variables_map& vm) {
170  if (!blocks_spec.empty())
171  blocksizes = parseRangeList<uint>(blocks_spec);
172  for (vector<uint>::iterator i = blocksizes.begin(); i != blocksizes.end(); ++i)
173  if (*i == 0) {
174  cerr << "Error- a block-size must be > 0\n";
175  return(false);
176  }
177 
178  return(true);
179  }
180 
181  string print() const {
182  ostringstream oss;
183  oss << boost::format("blocks='%s', local=%d, pc=%d")
184  % blocks_spec
185  % local_average
186  % principal_component;
187  return(oss.str());
188  }
189 
190  string blocks_spec;
191 };
192 // @endcond
193 
194 
195 
196 
197 
198 
199 
200 vGroup subgroup(const vGroup& A, const uint a, const uint b) {
201  vGroup B;
202 
203  for (uint i=a; i<b; ++i)
204  B.push_back(A[i]);
205 
206  return(B);
207 }
208 
209 
210 
211 // Breaks the ensemble up into blocks and computes the RSV for each
212 // block and the statistics for the cosine content...
213 
214 template<class ExtractPolicy>
215 Datum blocker(const uint pc, vGroup& ensemble, const uint blocksize, ExtractPolicy& policy) {
216 
217 
218  TimeSeries<double> cosines;
219 
220  for (uint i=0; i<ensemble.size() - blocksize; i += blocksize) {
221  vGroup subset = subgroup(ensemble, i, i+blocksize);
222  RealMatrix V = rsv(subset, policy);
223 
224  double val = cosineContent(V, pc);
225  cosines.push_back(val);
226  }
227 
228  return( Datum(cosines.average(), cosines.variance(), cosines.size()) );
229 }
230 
231 
232 
233 
234 int main(int argc, char *argv[]) {
235  string hdr = invocationHeader(argc, argv);
236 
237  opts::BasicOptions* bopts = new opts::BasicOptions(fullHelpMessage());
240  ToolOptions* topts = new ToolOptions;
241 
242  opts::AggregateOptions options;
243  options.add(bopts).add(sopts).add(tropts).add(topts);
244  if (!options.parse(argc, argv))
245  exit(-1);
246 
247  cout << "# " << hdr << endl;
248  cout << "# " << vectorAsStringWithCommas<string>(options.print()) << endl;
249 
250  AtomicGroup model = tropts->model;
251  pTraj traj = tropts->trajectory;
252 
253 
254  if (blocksizes.empty()) {
255  uint n = traj->nframes();
256  uint step = n / nsteps;
257  if (step < 1)
258  step = 1;
259  cout << "# Auto block-sizes - " << step << ":" << step << ":" << n-1 << endl;
260 
261  for (uint i = step; i < n; i += step)
262  blocksizes.push_back(i);
263  }
264 
265 
266  AtomicGroup subset = selectAtoms(model, sopts->selection);
267 
268 
269  vector<AtomicGroup> ensemble;
270  readTrajectory(ensemble, subset, traj);
271 
272  // First, read in and align trajectory
273  boost::tuple<std::vector<XForm>, greal, int> ares = iterativeAlignment(ensemble);
274  AtomicGroup avg = averageStructure(ensemble);
275  NoAlignPolicy policy(avg, local_average);
276 
277 
278  // Now iterate over all requested block sizes
279 
280  // Provide user-feedback since this can be a slow computation
281  PercentProgress watcher;
283  slayer.attach(&watcher);
284  slayer.start();
285 
286  for (vector<uint>::iterator i = blocksizes.begin(); i != blocksizes.end(); ++i) {
287  Datum result = blocker(principal_component, ensemble, *i, policy);
288  cout << *i << "\t" << result.avg_cosine << "\t" << result.var_cosine << "\t" << result.nblocks << endl;
289  slayer.update();
290  }
291 
292  slayer.finish();
293 
294 }
pTraj trajectory
The trajectory, primed by the –skip value (if specified)
The progress counter front-end.
def averageStructure(traj)
Python version of averageStructure (using loos.pyloos.Trajectory-like objects) The subset defined in ...
Definition: ensembles.py:14
Provide feedback by percent-complete.
Simple matrix template class using policy classes to determine behavior.
Definition: MatrixImpl.hpp:53
Namespace for encapsulating options processing.
Provides a single LOOS selection (–selection)
def iterativeAlignment(ensemble, threshold=1e-8, maxiter=1000)
Iteratively align a loos.pyloos.Trajectory object (or a list of AtomicGroups)
Definition: alignment.py:12
STL namespace.
AtomicGroup model
Model that describes the trajectory.
bool parse(int argc, char *argv[])
Parses a command line, returning true if parsing was ok.
Options common to all tools (including –fullhelp)
std::string invocationHeader(int argc, char *argv[])
Create an invocation header.
Definition: utils.cpp:124
Basic trajectory with a –skip option.
std::vector< std::string > print() const
AtomicGroup selectAtoms(const AtomicGroup &source, const std::string selection)
Applies a string-based selection to an atomic group...
Definition: utils.cpp:195
Time Series Class.
Definition: TimeSeries.hpp:51
Class for handling groups of Atoms (pAtoms, actually)
Definition: AtomicGroup.hpp:87
T average(void) const
Return average of time series.
Definition: TimeSeries.hpp:363
Namespace for most things not already encapsulated within a class.
A progress counter that can estimate how much time is left.
Trigger whenever at least frac percent more iterations have happened.
T variance(void) const
Return variance of time series.
Definition: TimeSeries.hpp:373
Combines a set of OptionsPackages.
AggregateOptions & add(OptionsPackage *pack)
Add a pointer to an OptionsPackage that will be used for options.