LOOS  v2.3.2
chist.cpp
1 // Histogram of a time series using an increasingly larger window
2 
3 
4 /*
5  This file is part of LOOS.
6 
7  LOOS (Lightweight Object-Oriented Structure library)
8  Copyright (c) 2014, Tod D. Romo and Alan Grossfield
9  Department of Biochemistry and Biophysics
10  School of Medicine & Dentistry, University of Rochester
11 
12  This package (LOOS) is free software: you can redistribute it and/or modify
13  it under the terms of the GNU General Public License as published by
14  the Free Software Foundation under version 3 of the License.
15 
16  This package is distributed in the hope that it will be useful,
17  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19  GNU General Public License for more details.
20 
21  You should have received a copy of the GNU General Public License
22  along with this program. If not, see <http://www.gnu.org/licenses/>.
23 
24 */
25 
26 
27 #include <loos.hpp>
28 
29 using namespace std;
30 using namespace loos;
31 
32 
33 namespace opts = loos::OptionsFramework;
34 namespace po = loos::OptionsFramework::po;
35 
36 // @cond TOOLS_INTERNAL
37 
38 
39 string fullHelpMessage(void) {
40  string msg =
41  "\n"
42  "SYNOPSIS\n"
43  "\tCumulative or windowed histogram\n"
44  "\n"
45  "DESCRIPTION\n"
46  "\n"
47  "\tThis tool can calculate either a cumulative or a windowed histogram. The former is\n"
48  "made by calculating the histogram of the input data up to time t. This is written out\n"
49  "as a row of data suitable for plotting with gnuplot using the splot command. Each row\n"
50  "then corresponds to calculating the histogram with more points. The alternative, is\n"
51  "to only calculate the histogram over a window that is slid along the data.\n"
52  "\n"
53  "EXAMPLES\n"
54  "\n"
55  "\tchist torsion_data >torsion_hist.asc\n"
56  "This example uses the defaults, which assumes the column to histogram is column 1\n"
57  "(i.e. the second column, since colunm indices are 0-based), wich 20 bins, a stride\n"
58  "through the data of 10 (every 10th datapoint is used in the histogram), the range\n"
59  "of the histogram is automatically determined from the data, and the histogram type\n"
60  "is cumulative.\n"
61  "\n"
62  "\tchist --min -180 --max 180 --nbins 50 --stride 2 torsion_data >torsion_hist.asc\n"
63  "This example is similar to the previous, except that the histogram range is explicitly\n"
64  "set to -180 to 180, 50 bins are used, and every other datapoint is taken.\n"
65  "\n"
66  "\tchist --mode window --window 250 torsion_data.asc >torsion_hist.asc\n"
67  "This example calculates a windowed histogram using 250 datapoints per histogram,\n"
68  "each window is slid 10 points down (the default for --stride)\n\n";
69 
70  return(msg);
71 }
72 
73 
74 struct ToolOptions : public opts::OptionsPackage {
75 public:
76  enum ToolMode { CUMULATIVE, WINDOW };
77  static const unsigned char SETMIN_F = 0x01;
78  static const unsigned char SETMAX_F = 0x02;
79 
80  ToolOptions() : col(1),
81  nbins(20),
82  window(100),
83  stride(10),
84  mode_string("cume"),
85  minval(0),
86  maxval(0),
87  range_flags(0),
88  mode(CUMULATIVE)
89  {}
90 
91 
92 
93 
94  void addGeneric(po::options_description& o) {
95  o.add_options()
96  ("column,C", po::value<uint>(&col)->default_value(col), "Data column to use")
97  ("nbins,N", po::value<uint>(&nbins)->default_value(nbins), "Number of bins in histogram")
98  ("window", po::value<uint>(&window)->default_value(window), "Histogram window size")
99  ("stride", po::value<uint>(&stride)->default_value(stride), "Stride through trajectory for cumulative histogram mode, or how far to slide the window")
100  ("mode", po::value<string>(&mode_string)->default_value(mode_string), "Histogram mode: cume or window")
101  ("min", po::value<double>(), "Set min value for histogram range")
102  ("max", po::value<double>(), "Set max value for histogram range");
103  }
104 
105  bool postConditions(po::variables_map& map) {
106  if (mode_string == "cume")
107  mode = CUMULATIVE;
108  else if (mode_string == "window")
109  mode = WINDOW;
110  else {
111  cerr << "ERROR- '" << mode_string << "' is an unknown mode. Must be either 'cume' or 'window'\n";
112  return(false);
113  }
114 
115  if (map.count("min")) {
116  minval = map["min"].as<double>();
117  range_flags |= SETMIN_F;
118  }
119  if (map.count("max")) {
120  maxval = map["max"].as<double>();
121  range_flags |= SETMAX_F;
122  }
123  return(true);
124  }
125 
126 
127  string print() const {
128  ostringstream oss;
129 
130  oss << boost::format("col=%d,nbins=%d,window=%d,stride=%d,mode='%s'")
131  % col
132  % nbins
133  % window
134  % stride
135  % mode_string;
136  return(oss.str());
137  }
138 
139 
140 
141  uint col;
142  uint nbins;
143  uint window;
144  uint stride;
145  string mode_string;
146  double minval, maxval;
147  unsigned char range_flags;
148  ToolMode mode;
149 };
150 
151 // @endcond
152 
153 
154 vector<double> histogram(const vector<double>& data,
155  const uint start,
156  const uint end,
157  const uint nbins,
158  const double minval,
159  const double maxval) {
160 
161  vector<uint> hist(nbins, 0);
162  double delta = nbins / (maxval - minval);
163 
164  for (uint i=start; i<end; ++i) {
165  uint bin = (data[i] - minval) * delta;
166  if (bin < nbins)
167  hist[bin] += 1;
168  }
169 
170  vector<double> h(nbins);
171  uint nelems = end - start;
172  for (uint i=0; i<nbins; ++i)
173  h[i] = static_cast<double>(hist[i]) / nelems;
174 
175  return(h);
176 }
177 
178 
179 pair<double, double> findMinMax(const vector<double>& data) {
180  double max = numeric_limits<double>::min();
181  double min = numeric_limits<double>::max();
182 
183  for (vector<double>::const_iterator ci = data.begin(); ci != data.end(); ++ci) {
184  if (*ci < min)
185  min = *ci;
186  if (*ci > max)
187  max = *ci;
188  }
189 
190  return(pair<double, double>(min, max));
191 }
192 
193 
194 vector<double> readData(const string& fname, const uint col)
195 {
196  vector< vector<double> > table = readTable<double>(fname);
197  vector<double> d(table.size());
198 
199  for (uint i=0; i<table.size(); ++i)
200  d[i] = table[i][col];
201 
202  return(d);
203 }
204 
205 
206 
207 int main(int argc, char *argv[]) {
208 
209  string hdr = invocationHeader(argc, argv);
210 
211  opts::BasicOptions* bopts = new opts::BasicOptions(fullHelpMessage());
212  ToolOptions* topts = new ToolOptions;
213  opts::RequiredArguments* ropts = new opts::RequiredArguments("datafile", "Name of file to histogram");
214 
215  opts::AggregateOptions options;
216  options.add(bopts).add(topts).add(ropts);
217  if (!options.parse(argc, argv))
218  exit(-1);
219 
220 
221  vector<double> data = readData(ropts->value("datafile"), topts->col);
222 
223  double minval, maxval;
224  pair<double,double> r = findMinMax(data);
225  minval = r.first;
226  maxval = r.second;
227 
228  if (topts->range_flags & ToolOptions::SETMIN_F)
229  minval = topts->minval;
230  if (topts->range_flags & ToolOptions::SETMAX_F)
231  maxval = topts->maxval;
232 
233  cout << "# " << hdr << endl;
234  cout << "# min = " << minval << endl << "# max = " << maxval << endl;
235 
236  double factor = (maxval - minval) / topts->nbins;
237 
238  if (topts->mode == ToolOptions::CUMULATIVE) {
239 
240  for (uint y = topts->stride; y<data.size(); y += topts->stride) {
241  vector<double> h = histogram(data, 0, y, topts->nbins, minval, maxval);
242  for (uint n=0; n<topts->nbins; ++n) {
243  double x = (n + 0.5) * factor + minval;
244  cout << x << '\t' << y << '\t' << h[n] << endl;
245  }
246  cout << endl;
247  }
248 
249  } else {
250 
251  for (uint y = 0; y<data.size() + topts->window; y += topts->stride) {
252  vector<double> h = histogram(data, y, y+topts->window, topts->nbins, minval, maxval);
253  for (uint n=0; n<topts->nbins; ++n) {
254  double x = (n + 0.5) * factor + minval;
255  cout << x << '\t' << y << '\t' << h[n] << endl;
256  }
257  cout << endl;
258 
259  }
260 
261  }
262 
263 }
Provides simple way to add command-line arguments (required options)
Namespace for encapsulating options processing.
STL namespace.
bool parse(int argc, char *argv[])
Parses a command line, returning true if parsing was ok.
Options common to all tools (including –fullhelp)
std::string invocationHeader(int argc, char *argv[])
Create an invocation header.
Definition: utils.cpp:124
std::string value(const std::string &s) const
Retrieve the value for an argument.
Namespace for most things not already encapsulated within a class.
Combines a set of OptionsPackages.
AggregateOptions & add(OptionsPackage *pack)
Add a pointer to an OptionsPackage that will be used for options.