LOOS  v2.3.2
neff.cpp
1 /*
2  neff
3 
4  Based on Zhang, Bhatt, and Zuckerman; JCTC, DOI: 10.1021/ct1002384
5  and code provided by the Zuckerman Lab
6  (http://www.ccbb.pitt.edu/Faculty/zuckerman/software.html)
7 
8 
9 
10  Computes effective sample size given an assignment file and a state file
11 */
12 
13 
14 
15 /*
16 
17  This file is part of LOOS.
18 
19  LOOS (Lightweight Object-Oriented Structure library)
20  Copyright (c) 2010, Tod D. Romo
21  Department of Biochemistry and Biophysics
22  School of Medicine & Dentistry, University of Rochester
23 
24  This package (LOOS) is free software: you can redistribute it and/or modify
25  it under the terms of the GNU General Public License as published by
26  the Free Software Foundation under version 3 of the License.
27 
28  This package is distributed in the hope that it will be useful,
29  but WITHOUT ANY WARRANTY; without even the implied warranty of
30  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31  GNU General Public License for more details.
32 
33  You should have received a copy of the GNU General Public License
34  along with this program. If not, see <http://www.gnu.org/licenses/>.
35 */
36 
37 
38 
39 #include <loos.hpp>
40 #include <boost/format.hpp>
41 #include <limits>
42 
43 using namespace loos;
44 using namespace std;
45 
46 
47 typedef vector<uint> vUint;
48 typedef vector<vUint> vvUint;
49 
50 
51 const double stddev_tol = 1e-6;
52 
53 
54 vvUint readStates(const string& fname) {
55 
56  ifstream ifs(fname.c_str());
57  if (ifs.fail()) {
58  cerr << "Error- cannot open " << fname << endl;
59  exit(-1);
60  }
61 
62  vvUint states;
63 
64  int n;
65  string dummy;
66  getline(ifs, dummy);
67  ifs >> n;
68  if (n <= 0) {
69  cerr << boost::format("Error- bad number of states (%d).\n") % n;
70  exit(-10);
71  }
72 
73  while (n-- > 0) {
74  int m;
75  ifs >> m;
76  if (m <= 0) {
77  cerr << boost::format("Error- bad number of bins (%d).\n") % m;
78  exit(-10);
79  }
80  vUint list;
81  while (m-- > 0) {
82  uint s;
83  ifs >> s;
84  list.push_back(s);
85  }
86  states.push_back(list);
87  }
88 
89  return(states);
90 }
91 
92 
93 vector<uint> readAssignments(const string& fname) {
94  ifstream ifs(fname.c_str());
95  if (ifs.fail()) {
96  cerr << "Error- cannot open " << fname << endl;
97  exit(-1);
98  }
99 
100  return(readVector<uint>(ifs));
101 }
102 
103 
104 
105 vUint mapStates(const vvUint& states) {
106  uint maxbin = 0;
107  for (vvUint::const_iterator j = states.begin(); j != states.end(); ++j)
108  for (vUint::const_iterator i = j->begin(); i != j->end(); ++i)
109  if (*i > maxbin)
110  maxbin = *i;
111 
112  vUint binmap(maxbin, 0);
113  for (uint j=0; j<states.size(); ++j)
114  for (vUint::const_iterator i = states[j].begin(); i != states[j].end(); ++i)
115  binmap[*i] = j;
116 
117  return(binmap);
118 }
119 
120 
121 vector<double> rowAvg(const DoubleMatrix& M) {
122  vector<double> avg(M.rows(), 0.0);
123 
124  for (uint j=0; j<M.rows(); ++j) {
125  for (uint i=0; i<M.cols(); ++i)
126  avg[j] += M(j, i);
127  avg[j] /= M.cols();
128  }
129 
130  return(avg);
131 }
132 
133 
134 vector<double> rowStd(const DoubleMatrix& M, const vector<double>& means) {
135  vector<double> dev(M.rows(), 0.0);
136 
137  for (uint j=0; j<M.rows(); ++j) {
138  for (uint i=0; i<M.cols(); ++i) {
139  double d = M(j, i) - means[j];
140  dev[j] += d*d;
141  }
142  dev[j] = sqrt(dev[j] / (M.cols() - 1));
143  }
144 
145  return(dev);
146 }
147 
148 
149 string fullHelpMessage(void) {
150  string msg =
151  "\n"
152  "SYNOPSIS\n"
153  "\tDetermine the effective sample size of a simulation\n"
154  "\n"
155  "DESCRIPTION\n"
156  "\n"
157  "\tThis tool determines the effective sample size (Neff) as described in\n"
158  "Zhang, Batt, and Zuckerman, JCTC (2010) 6:3048-57.\n"
159  "\n"
160  "EXAMPLES\n"
161  "\n"
162  "\tneff assignments.asc zuckerman.states 0.1\n"
163  "This example determines the Neff given the structural histogram assigments\n"
164  "in assignments.asc, the clustering in zuckerman.states, and a bin-probability of 0.1\n"
165  "\n"
166  "NOTES\n"
167  "\n"
168  "\tThe partition_size should match the bin-probability used in\n"
169  "generating the structural histogram (i.e. ufidpick)\n"
170  "\n"
171  "SEE ALSO\n"
172  "\tufidpick, assign_frames, hierarchy, effsize.pl\n";
173 
174  return(msg);
175 }
176 
177 
178 
179 int main(int argc, char *argv[]) {
180 
181  if (argc != 4) {
182  cerr << "Usage- " << argv[0] << " assignments states partition_size\n";
183  cerr << fullHelpMessage();
184  exit(-1);
185  }
186 
187  string hdr = invocationHeader(argc, argv);
188 
189  int k = 1;
190  vector<uint> assignments = readAssignments(argv[k++]);
191 
192  vvUint states = readStates(argv[k++]);
193  uint N = states.size();
194 
195  uint partition_size = atoi(argv[k++]);
196 
197  uint nparts = floor(assignments.size() / partition_size);
198  DoubleMatrix M(N, nparts);
199  vUint binmap = mapStates(states);
200 
201  for (uint i=0; i<nparts; ++i) {
202  for (uint k=0; k<partition_size; ++k) {
203  uint col = i*partition_size + k;
204  uint bin = binmap[assignments[col]];
205  if (bin > N) {
206  cerr << "Error- internal error, bin=" << bin << ", N=" << N << endl;
207  exit(-20);
208  }
209  M(bin, i) += 1;
210  }
211  for (uint j=0; j<N; ++j)
212  M(j, i) /= partition_size;
213  }
214 
215  vector<double> means = rowAvg(M);
216  vector<double> devs = rowStd(M, means);
217 
218  double min_neff = numeric_limits<double>::max();
219  for (uint j=0; j<N; ++j) {
220  double neff = (1.0 - means[j]) * means[j] / (devs[j] * devs[j]);
221  cout << boost::format("Estimated effective sample size from state %d = %f\n") % j % neff;
222  if (neff < min_neff)
223  min_neff = neff;
224  }
225 
226  double total = min_neff * nparts;
227 
228  cout << boost::format("Segment effective sample size = %f\n") % min_neff;
229  cout << boost::format("Trajectory effective sample size = %f\n") % total;
230 }
Simple matrix template class using policy classes to determine behavior.
Definition: MatrixImpl.hpp:53
STL namespace.
std::string invocationHeader(int argc, char *argv[])
Create an invocation header.
Definition: utils.cpp:124
Namespace for most things not already encapsulated within a class.