LOOS  v2.3.2
block_avgconv.cpp
1 /*
2  block_avgconv.cpp
3 
4 
5  Convergence of average via block averaging
6 
7  Usage- block_avgconv model traj selection range
8 */
9 
10 
11 
12 /*
13 
14  This file is part of LOOS.
15 
16  LOOS (Lightweight Object-Oriented Structure library)
17  Copyright (c) 2010, Tod D. Romo
18  Department of Biochemistry and Biophysics
19  School of Medicine & Dentistry, University of Rochester
20 
21  This package (LOOS) is free software: you can redistribute it and/or modify
22  it under the terms of the GNU General Public License as published by
23  the Free Software Foundation under version 3 of the License.
24 
25  This package is distributed in the hope that it will be useful,
26  but WITHOUT ANY WARRANTY; without even the implied warranty of
27  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28  GNU General Public License for more details.
29 
30  You should have received a copy of the GNU General Public License
31  along with this program. If not, see <http://www.gnu.org/licenses/>.
32 */
33 
34 
35 
36 
37 
38 #include <loos.hpp>
39 #include <boost/format.hpp>
40 
41 using namespace loos;
42 using namespace std;
43 
44 
45 const uint default_starting_number_of_blocks = 500;
46 const double default_fraction_of_trajectory = 0.25;
47 
48 
49 AtomicGroup averageSelectedSubset(const vector<AtomicGroup>& ensemble, const vector<uint>& indices) {
50  AtomicGroup avg = ensemble[0].copy();
51  for (AtomicGroup::iterator i = avg.begin(); i != avg.end(); ++i)
52  (*i)->coords() = GCoord(0,0,0);
53 
54  uint n = avg.size();
55  for (vector<uint>::const_iterator j = indices.begin(); j != indices.end(); ++j)
56  for (uint i=0; i<n; ++i)
57  avg[i]->coords() += ensemble[*j][i]->coords();
58 
59  for (AtomicGroup::iterator i = avg.begin(); i != avg.end(); ++i)
60  (*i)->coords() /= indices.size();
61 
62  return(avg);
63 }
64 
65 
66 
67 string fullHelpMessage(void) {
68  string msg =
69  "\n"
70  "SYNOPSIS\n"
71  "\tBlock-average approach to average structure convergence\n"
72  "\n"
73  "DESCRIPTION\n"
74  "\n"
75  "\tThe trajectory is divided into n-blocks. The average structure for each\n"
76  "block is computed. The RMSD between all pairs of blocks is calculated and the\n"
77  "average, variance, and standard error are written out. The block size is then\n"
78  "increased and the process repeated.\n"
79  "\tThe trajectory is first optimally aligned using an iterative method described in\n"
80  "in Grossfield, et al. Proteins 67, 31–40 (2007) unless the 'do not align' flag is given.\n"
81  "\n"
82  "EXAMPLES\n"
83  "\n"
84  "\tblock_avgconv sim.psf traj.dcd '!hydrogen' >blocks.asc\n"
85  "This example uses all non-hydrogen atoms with automatically determined block sizes.\n"
86  "The trajectory is optimally aligned.\n"
87  "\n"
88  "\tblock_avgconv sim.psf traj.dcd 'name == \"CA\" 10:10:1000 >blocks.asc\n"
89  "This example uses all alpha-carbons and block sizes 10, 20, 30, ..., 1000.\n"
90  "\n"
91  "\tblock_avgconv sim.psf traj.dcd 'segid == \"PE1\"' 25:25:500 1 >blocks.asc\n"
92  "This example does NOT optimally align the trajectory first. All atoms from segment\n"
93  "'PE1' are used. Block sizes are 25, 50, 75, ..., 500\n"
94  "\n"
95  "SEE ALSO\n"
96  "\tavgconv, block_average\n";
97 
98  return(msg);
99 }
100 
101 
102 int main(int argc, char *argv[]) {
103 
104  if (argc < 4 || argc > 6) {
105  cerr << "Usage- block_avgconv model traj sel [range [1 = do not align trajectory]]\n";
106  cerr << fullHelpMessage();
107  exit(-1);
108  }
109 
110  string hdr = invocationHeader(argc, argv);
111 
112  int k = 1;
113  AtomicGroup model = createSystem(argv[k++]);
114  pTraj traj = createTrajectory(argv[k++], model);
115  AtomicGroup subset = selectAtoms(model, argv[k++]);
116 
117  vector<uint> sizes;
118  bool do_align = true;
119 
120  if (argc == k) {
121  uint step = traj->nframes() / default_starting_number_of_blocks;
122  for (uint i=step; i<traj->nframes() * default_fraction_of_trajectory; i += step)
123  sizes.push_back(i);
124  } else {
125  sizes = parseRangeList<uint>(argv[k++]);
126  if (argc == k+1)
127  do_align = (argv[k][0] != '1');
128  }
129 
130 
131  cout << "# " << hdr << endl;
132  cout << "# n\tavg\tvar\tblocks\tstderr\n";
133 
134  vector<AtomicGroup> ensemble;
135  cerr << "Reading trajectory...\n";
136  readTrajectory(ensemble, subset, traj);
137 
138  if (do_align) {
139  cerr << "Aligning trajectory...\n";
140  boost::tuple<vector<XForm>, greal, int> result = iterativeAlignment(ensemble);
141  } else
142  cerr << "Trajectory is already aligned!\n";
143 
144  cerr << "Processing- ";
145  for (uint block = 0; block < sizes.size(); ++block) {
146  if (block % 50)
147  cerr << ".";
148 
149  uint blocksize = sizes[block];
150 
151  vector<AtomicGroup> averages;
152  for (uint i=0; i<ensemble.size() - blocksize; i += blocksize) {
153 
154  vector<uint> indices(blocksize);
155  for (uint j=0; j<blocksize; ++j)
156  indices[j] = i+j;
157 
158  averages.push_back(averageSelectedSubset(ensemble, indices));
159  }
160 
161  TimeSeries<double> rmsds;
162  for (uint j=0; j<averages.size() - 1; ++j)
163  for (uint i=j+1; i<averages.size(); ++i) {
164  AtomicGroup left = averages[j];
165  AtomicGroup right = averages[i];
166  left.alignOnto(right);
167  rmsds.push_back(left.rmsd(right));
168  }
169 
170 
171  double v = rmsds.variance();
172  uint n = averages.size();
173  cout << boost::format("%d\t%f\t%f\t%d\t%f\n") % blocksize % rmsds.average() % v % n % sqrt(v/n);
174  }
175 
176  cerr << "\nDone!\n";
177 }
def iterativeAlignment(ensemble, threshold=1e-8, maxiter=1000)
Iteratively align a loos.pyloos.Trajectory object (or a list of AtomicGroups)
Definition: alignment.py:12
STL namespace.
greal rmsd(const AtomicGroup &)
Compute the RMSD between two groups.
AtomicGroup createSystem(const std::string &filename)
Factory function for reading in structure files.
Definition: sfactories.cpp:115
std::string invocationHeader(int argc, char *argv[])
Create an invocation header.
Definition: utils.cpp:124
AtomicGroup selectAtoms(const AtomicGroup &source, const std::string selection)
Applies a string-based selection to an atomic group...
Definition: utils.cpp:195
GMatrix alignOnto(const AtomicGroup &)
Superimposes the current group onto the passed group.
Definition: AG_linalg.cpp:196
Time Series Class.
Definition: TimeSeries.hpp:51
Class for handling groups of Atoms (pAtoms, actually)
Definition: AtomicGroup.hpp:87
T average(void) const
Return average of time series.
Definition: TimeSeries.hpp:363
Namespace for most things not already encapsulated within a class.
T variance(void) const
Return variance of time series.
Definition: TimeSeries.hpp:373