LOOS  v2.3.2
water-hist.cpp
1 /*
2  water-hist.cpp
3 
4 */
5 
6 /*
7  This file is part of LOOS.
8 
9  LOOS (Lightweight Object-Oriented Structure library)
10  Copyright (c) 2009, Tod D. Romo, Alan Grossfield
11  Department of Biochemistry and Biophysics
12  School of Medicine & Dentistry, University of Rochester
13 
14  This package (LOOS) is free software: you can redistribute it and/or modify
15  it under the terms of the GNU General Public License as published by
16  the Free Software Foundation under version 3 of the License.
17 
18  This package is distributed in the hope that it will be useful,
19  but WITHOUT ANY WARRANTY; without even the implied warranty of
20  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21  GNU General Public License for more details.
22 
23  You should have received a copy of the GNU General Public License
24  along with this program. If not, see <http://www.gnu.org/licenses/>.
25 */
26 
27 
28 
29 #include <boost/format.hpp>
30 #include <boost/program_options.hpp>
31 
32 #include <loos.hpp>
33 #include <DensityGrid.hpp>
34 #include <water-hist-lib.hpp>
35 #include <DensityTools.hpp>
36 #include <DensityOptions.hpp>
37 
38 using namespace std;
39 using namespace loos;
40 using namespace loos::DensityTools;
41 
42 
43 
44 // @cond TOOLS_INTERAL
45 
46 string fullHelpMessage(void) {
47  string msg =
48  "\n"
49  "SYNOPSIS\n"
50  "Generate a 3D histogram of internal waters for a trajectory\n"
51  "\n"
52  "DESCRIPTION\n"
53  "\n"
54  "\twater-hist generates a 3-dimensional histogram for a given selection\n"
55  "over the coarse of a trajectory. This tool was originally designed\n"
56  "for tracking water internal to a membrane protein, however any group\n"
57  "of atoms can be substituted for \"water\" (e.g. ligand) and \"protein\".\n"
58  "\n"
59  "The tool first requires that you specify what atoms will be integrated.\n"
60  "This is the \"water\" selection. Next, you need to define what is considered\n"
61  "\"internal\" to the protein, filtering which waters will be considered.\n"
62  "This is typically done by defining a \"protein\" selection and a mode for\n"
63  "filtering: axis, box, radius, or grid. The axis mode takes the first\n"
64  "principal component for the protein and picks all waters that are within\n"
65  "a given radius of that axis. The box mode uses the bounding box for the\n"
66  "protein selection (i.e. any water that is within this box). The radius\n"
67  "mode picks waters that are within a given radius of any protein atom.\n"
68  "Finally, the grid mode takes a grid mask and picks any waters that are\n"
69  "within the masked gridpoints.\n"
70  "\n"
71  "The resultant density histogram can be scaled by an estimate of the\n"
72  "bulk solvent density by using the --scale option along with either\n"
73  "--bulk or --brange. The former uses the average density for any Z-plane\n"
74  "that is sufficiently far from 0 (i.e. |Z| >= k) whereas the latter explicitly\n"
75  "takes a Z-range to average over. Note that you must explicitly rescale\n"
76  "the density by using the --scale=1 option, otherwise the estimated bulk\n"
77  "solvent density will be printed only.\n"
78  "\n"
79  "For visualization purposes, it you are using a membrane-protein system\n"
80  "and the axis mode for filtering out waters, you may end up with a plug\n"
81  "of bulk water at the protein/solvent interface. To make it more clear\n"
82  "that there is a layer of bulk solvent, use the --bulked option. This\n"
83  "adds water back into the histogram based on the Z-coordinate and the\n"
84  "bounding box of the protein (with an optional padding)\n"
85  "\n"
86  "Water-hist treats each atom as a single grid point (based on nearest)\n"
87  "grid-coordinate. This means that even though a water should cover\n"
88  "multiple grid-points based on the grid resolution and water radius,\n"
89  "only one grid point will be used. For visualization then, the\n"
90  "grid should be smoothed out. This can be done via the \"gridgauss\"\n"
91  "tool which convolves the grid with a gaussian kernel. Finally,\n"
92  "the grid needs to be converted to an X-Plor electron density format\n"
93  "using \"grid2xplor\". This can then be read into PyMol, VMD, or other\n"
94  "visualization tools.\n"
95  "\n"
96  "These tools can be chained together via Unix pipes,\n"
97  " water-hist model.pdb model.dcd | gridgauss 10 3 1 1 | grid2xplor >water.xplor\n"
98  "\n"
99  "For more details about available options, see the help information for the\n"
100  "respective tool.\n"
101  "\n"
102  "\n"
103  "EXAMPLES\n"
104  "\n"
105  "\twater-hist --radius=15 --bulk=25 --scale=1 b2ar.pdb b2ar.dcd | gridgauss 10 3 1 1 |\\\n"
106  "\t grid2xplor >b2ar_water.xplor\n"
107  "Internal water for a GPCR with a bulk estimate, converted to Xplor EDM.\n"
108  "\n"
109  "\twater-hist --bulk=25 --scale=1 --bulked=20,-25:30 b2ar.pdb b2ar.dcd |\\\n"
110  "\t gridgauss 4 2 | grid2xplor >b2ar_water.xplor\n"
111  "Internal water for a GPCR with the bulk solvent layer added back, converted to Xplor EDM.\n"
112  "The bulk water is for any water with Z < -25 or Z > 30 and within the bounding box\n"
113  "of the protein with a 20 angstrom pad.\n"
114  "\n"
115  "\twater-hist --radius=20 --prot='resid > 10 && resid < 25' --mode=radius |\\\n"
116  "\t gridgauss 4 2 | grid2xplor >binding.xplor\n"
117  "All water within a given radius of a binding pocket, converted to Xplor EDM.\n"
118  "\n"
119  "\twater-hist --gridres=0.5 b2ar.pdb b2ar.dcd | gridgauss 10 3 1 1 |\\\n"
120  "\t grid2xplor >b2ar_water.xplor\n"
121  "Higher resolution grid, converted to Xplor EDM.\n"
122  "\n"
123  "\twater-hist --prot='resname == \"PEGL\"' --water='resname === \"PEGL\"'\\\n"
124  "\t --mode=box membrane.pdb membrane.dcd >membrane.grid\n"
125  "All lipid head-group density, written as LOOS grid.\n"
126  "\n"
127  "\twater-hist --water='resname == \"CAU\"' --mode=box b2ar.pdb b2ar.dcd >b2ar.grid\n"
128  "Ligand (carazolol) Density, written as LOOS grid.\n"
129  "\n"
130  "\twater-hist b2ar.pdb b2ar.dcd | gridgauss 10 3 1 1 | gridautoscale | grid2xplor >b2ar.xplor\n"
131  "Smoothed grid, automatically scaled so that a density of 1.0 is bulk water.\n"
132  "This is specific to membrane systems where the membrane normal is parallel to\n"
133  "the Z-axis.\n"
134  "\n"
135  "NOTES\n"
136  "\n"
137  "When using the --bulked option, the extents of the grid are adjusted to be\n"
138  "the bounding box of the protein plus the bulked pad PLUS the global pad.\n"
139  "Be careful not to make the volume too large.\n"
140  "\n"
141  "SEE ALSO\n"
142  "\tgridgauss, grid2xplor, gridstat, gridslice, blobid, pick_blob, gridautoscale\n";
143 
144  return(msg);
145 }
146 
147 class WaterHistogramOptions : public opts::OptionsPackage {
148 public:
149  WaterHistogramOptions() :
150  grid_resolution(1.0),
151  count_empty_voxels(false),
152  rescale_density(false),
153  bulk_zclip(0.0),
154  bulk_zmin(0.0), bulk_zmax(0.0)
155  { }
156 
157  void addGeneric(po::options_description& opts) {
158  opts.add_options()
159  ("gridres", po::value<double>(&grid_resolution)->default_value(grid_resolution), "Grid resolution")
160  ("empty", po::value<bool>(&count_empty_voxels)->default_value(count_empty_voxels), "Count empty voxels in bulk density estimate")
161  ("bulk", po::value<double>(&bulk_zclip)->default_value(bulk_zclip), "Bulk water is defined as |Z| >= k")
162  ("brange", po::value<string>(), "Bulk water (--brange a,b) is defined as a <= z < b")
163  ("scale", po::value<bool>(&rescale_density)->default_value(rescale_density), "Scale density by bulk estimate")
164  ("clamp", po::value<string>(), "Clamp the bounding box [(x,y,z),(x,y,z)]");
165  }
166 
167 
168  bool postConditions(po::variables_map& map) {
169  if (map.count("clamp")) {
170  GCoord clamp_min, clamp_max;
171  string s = map["clamp"].as<string>();
172  stringstream ss(s);
173  if (!(ss >> clamp_min)) {
174  cerr << "Error- cannot parse lower bounds for box-clamp\n";
175  return(false);
176  }
177  clamped_box.push_back(clamp_min);
178 
179  int c = ss.get();
180  if (c != ',') {
181  cerr << "Error- cannot parse box-clamp\n";
182  return(false);
183  }
184  if (!(ss >> clamp_max)) {
185  cerr << "Error- cannot parse upper bounds for box-clamp\n";
186  return(false);
187  }
188  clamped_box.push_back(clamp_max);
189  cerr << "Warning- clamping grid to " << clamp_min << " -> " << clamp_max << endl;
190  }
191 
192  if (map.count("brange")) {
193  string s = map["brange"].as<string>();
194  istringstream iss(s);
195  if (!(iss >> bulk_zmin)) {
196  cerr << "Error- brange format is low,high\n";
197  return(false);
198  }
199  char c;
200  iss >> c;
201  if (c != ',') {
202  cerr << "Error- brange format is low,high\n";
203  return(false);
204  }
205  if (!(iss >> bulk_zmax)) {
206  cerr << "Error- brange format is low,high\n";
207  return(false);
208  }
209 
210  }
211  return(true);
212  }
213 
214  string print() const {
215  ostringstream oss;
216  oss << boost::format("gridres=%f, empty=%d, bulk_zclip=%d, scale=%d, bulk_zmin=%d, bulk_zmax=%d")
217  % grid_resolution
218  % count_empty_voxels
219  % bulk_zclip
220  % rescale_density
221  % bulk_zmin
222  % bulk_zmax;
223 
224  if (!clamped_box.empty())
225  oss << boost::format(", clamp=[%s,%s]")
226  % clamped_box[0]
227  % clamped_box[1];
228 
229  return(oss.str());
230  }
231 
232 public:
233  double grid_resolution;
234  bool count_empty_voxels;
235  bool rescale_density;
236  double bulk_zclip;
237  double bulk_zmin, bulk_zmax;
238  vector<GCoord> clamped_box;
239 };
240 
241 // @endcond
242 
243 
244 
245 
246 
247 
248 
249 int main(int argc, char *argv[]) {
250  string hdr = invocationHeader(argc, argv);
251 
252  // Build up possible options
253  opts::BasicOptions* basopts = new opts::BasicOptions(fullHelpMessage());
255  opts::BasicWater* watopts = new opts::BasicWater;
256  WaterHistogramOptions *xopts = new WaterHistogramOptions;
257 
258  opts::AggregateOptions options;
259  options.add(basopts).add(tropts).add(watopts).add(xopts);
260  if (!options.parse(argc, argv))
261  exit(-1);
262 
263 
264 
265  AtomicGroup model = tropts->model;
266  pTraj traj = tropts->trajectory;
267  vector<uint> indices = tropts->frameList();
268 
269  AtomicGroup protein = selectAtoms(model, watopts->prot_string);
270  AtomicGroup water = selectAtoms(model, watopts->water_string);
271 
272  if (basopts->verbosity >= 1)
273  cerr << "Filter(s): " << watopts->filter_func->name() << endl;
274 
275  // Handle rescaling density by using a bulk-water density estimator
276  BulkEstimator* est;
277  if (xopts->rescale_density) {
278  // Double-check the clip
279  traj->readFrame(indices[0]);
280  traj->updateGroupCoords(protein);
281  vector<GCoord> bdd = protein.boundingBox();
282 
283  if (xopts->bulk_zclip != 0.0) {
284  if (xopts->bulk_zclip <= bdd[1].z())
285  cerr << "***WARNING: the z-clip for bulk solvent overlaps the protein***\n";
286 
287  ZClipEstimator* myest = new ZClipEstimator(water, traj, indices, xopts->bulk_zclip, xopts->grid_resolution);
288  myest->countZero(xopts->count_empty_voxels);
289  est = myest;
290  } else if (xopts->bulk_zmin != 0.0 || xopts->bulk_zmax != 0.0) {
291  ZSliceEstimator* myest = new ZSliceEstimator(water, traj, indices, xopts->bulk_zmin, xopts->bulk_zmax, xopts->grid_resolution);
292  est = myest;
293  } else
294  est = new NullEstimator();
295  } else
296  est = new NullEstimator();
297 
298  cerr << *est << endl;
299 
300  WaterHistogrammer wh(protein, water, est, watopts->filter_func);
301  if (!xopts->clamped_box.empty()) {
302  wh.setGrid(xopts->clamped_box[0]-watopts->pad, xopts->clamped_box[1]+watopts->pad, xopts->grid_resolution);
303  } else
304  wh.setGrid(traj, indices, xopts->grid_resolution, watopts->pad);
305 
306  wh.accumulate(traj, indices);
307 
308  long ob = wh.outOfBounds();
309  if (ob)
310  cerr << "***WARNING*** There were " << ob << " out of bounds waters\n";
311 
312  DensityGrid<double> grid = wh.grid();
313  cerr << boost::format("Grid = %s x %s @ %s\n") % grid.minCoord() % grid.maxCoord() % grid.gridDims();
314 
315  if (xopts->rescale_density) {
316  double d = est->bulkDensity();
317  double s = est->stdDev(d);
318  cerr << boost::format("Bulk density estimate = %f, std = %f\n") % d % s;
319  if (xopts->rescale_density) {
320  cerr << "Rescaling grid by bulk estimate...\n";
321  grid.scale(1.0 / d);
322  stringstream ss;
323  ss << boost::format("water-hist: bulk density estimate = %f, std = %f") % d % s;
324  grid.addMetadata(ss.str());
325  }
326  }
327 
328  grid.addMetadata(hdr);
329  grid.addMetadata(vectorAsStringWithCommas(options.print()));
330  cout << grid;
331 }
332 
333 
334 
Trajectory with either a –range or –skip.
Options specific to tools that work with water/internal-water.
Namespace for encapsulating options processing.
std::vector< GCoord > boundingBox(void) const
Bounding box for the group...
STL namespace.
bool parse(int argc, char *argv[])
Parses a command line, returning true if parsing was ok.
AtomicGroup model
Model that describes the trajectory.
std::string water_string
User-specified strings.
Options common to all tools (including –fullhelp)
double pad
Extra padding for water.
std::string invocationHeader(int argc, char *argv[])
Create an invocation header.
Definition: utils.cpp:124
Namespace for Density package.
Definition: DensityGrid.hpp:48
std::vector< std::string > print() const
AtomicGroup selectAtoms(const AtomicGroup &source, const std::string selection)
Applies a string-based selection to an atomic group...
Definition: utils.cpp:195
DensityTools::WaterFilterBase * filter_func
Filter for determining internal waters.
std::vector< uint > frameList() const
Returns the list of frames the user requested.
Class for handling groups of Atoms (pAtoms, actually)
Definition: AtomicGroup.hpp:87
Namespace for most things not already encapsulated within a class.
std::string vectorAsStringWithCommas(const std::vector< std::string > &v)
Specialization for strings that sanitizes the contained strings.
Definition: utils.cpp:415
virtual std::string name(void) const =0
Just states the name of the filter/picker.
Combines a set of OptionsPackages.
AggregateOptions & add(OptionsPackage *pack)
Add a pointer to an OptionsPackage that will be used for options.