LOOS  v2.3.2
pick_blob.cpp
1 /*
2  pick_blob.cpp
3 
4  Given an grid-mask, a PDB, and a selection, finds the blob closest
5  to ANY atom in the selection...
6 */
7 
8 
9 /*
10  This file is part of LOOS.
11 
12  LOOS (Lightweight Object-Oriented Structure library)
13  Copyright (c) 2008, Tod D. Romo, Alan Grossfield
14  Department of Biochemistry and Biophysics
15  School of Medicine & Dentistry, University of Rochester
16 
17  This package (LOOS) is free software: you can redistribute it and/or modify
18  it under the terms of the GNU General Public License as published by
19  the Free Software Foundation under version 3 of the License.
20 
21  This package is distributed in the hope that it will be useful,
22  but WITHOUT ANY WARRANTY; without even the implied warranty of
23  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24  GNU General Public License for more details.
25 
26  You should have received a copy of the GNU General Public License
27  along with this program. If not, see <http://www.gnu.org/licenses/>.
28 */
29 
30 
31 #include <loos.hpp>
32 #include <algorithm>
33 #include <limits>
34 
35 #include <DensityGrid.hpp>
36 
37 using namespace std;
38 using namespace loos;
39 using namespace loos::DensityTools;
40 
41 namespace opts = loos::OptionsFramework;
42 namespace po = loos::OptionsFramework::po;
43 
44 // @cond TOOL_INTERNAL
45 struct Blob {
46  Blob() : closest_point(0,0,0), grid_dist(numeric_limits<double>::max()),
47  real_dist(numeric_limits<double>::max()) { }
48 
49  int id;
50  DensityGridpoint closest_point;
51  double grid_dist;
52  double real_dist;
53 };
54 // @endcond
55 
56 int debug = 0;
57 
58 
59 string model_name, selection;
60 GCoord spot;
61 int picked_id;
62 bool use_spot = false;
63 bool largest = false;
64 double range = 0.0;
65 bool query = false;
66 
67 // @cond TOOLS_INTERNAL
68 
69 
70 string fullHelpMessage(void) {
71  string msg =
72  "SYNOPSIS\n"
73  "\n"
74  "\tIdentify the blob closest to a user-specified criterion\n"
75  "\n"
76  "DESCRIPTION\n"
77  "\n"
78  "\tpick_blobs finds the blob closest to a user input. Several methods\n"
79  "of input are supported. For instance, given a pdb and selection string\n"
80  "the blob closest to the selection will be returned. Additionally, a blob\n"
81  "may be selected using its ID (see blobid). A point within the grid may\n"
82  "also be used. A range of distances and the largest blob within the range\n"
83  "are alternate criteria.\n"
84  "\n"
85  "The input is an integer grid (from blobid), and the output is another integer-grid\n"
86  "that can be used to mask a density grid.\n"
87  "\n"
88  "EXAMPLES\n"
89  "\tblobid --threshold 1 <foo.grid >foo_id.grid\n"
90  "\tpick_blob --model foo.pdb --selection 'resid==65' < foo_id.grid > foo_picked.grid\n"
91  "This example first segments the density at 1.0, and then picks the blob closest to\n"
92  "any atom in residue 65 in the model.\n"
93  "\n"
94  "\tpick_blob --point '(13,7,3)' <foo.grid >foo_picked.grid\n"
95  "This example picks the blob nearest coordinates (13,7,3) in real-space (i.e.\n"
96  "Angstroms).\n"
97  "\n"
98  "\tpick_blob --model foo.pdb --selection 'resid==65' --range 15 <foo_id.grid >foo_picked.grid\n"
99  "This example finds ALL blobs that are within 15 Angstroms of any atom in residue 65.\n"
100  "\n"
101  "\tpick_blob --model foo.pdb --selection 'resid==64' --range 15 --largest 1 <foo_id.grid >foo_picked.grid\n"
102  "This example is as above, except that only the largest blob within 15 Angstroms is picked,\n"
103  "rather than ALL blobs within 15 Angstroms.\n"
104  "\n"
105  "SEE ALSO\n"
106  "\tblobid, gridmask\n";
107 
108  return(msg);
109 }
110 
111 
112 class ToolOptions : public opts::OptionsPackage {
113 public:
114 
115  void addGeneric(po::options_description& o) {
116  o.add_options()
117  ("query", po::value<bool>(&query)->default_value(false), "Query nearby blobs, do NOT write out a grid")
118  ("model", po::value<string>(&model_name)->default_value(""), "Select using this model (must have coords)")
119  ("selection", po::value<string>(&selection)->default_value(""), "Select atoms within the PDB to find nearest blob")
120  ("id", po::value<int>(&picked_id)->default_value(-1), "Select blob with this ID")
121  ("point", po::value<string>(&point_spec), "Select blob closest to this point")
122  ("range", po::value<double>(&range), "Select blobs that are closer than this distance")
123  ("largest", po::value<bool>(&largest)->default_value(false), "Select only the largest blob that fits the distance criterion");
124  }
125 
126  bool postConditions(po::variables_map& vm) {
127  if (!model_name.empty()) {
128  if (selection.empty()) {
129  cerr << "Error: must provide a selection when using a model to select blobs\n";
130  return(false);
131  }
132  } else if (!point_spec.empty()) {
133  use_spot = true;
134  istringstream iss(point_spec);
135  if (!(iss >> spot)) {
136  cerr << "Error: cannot parse coordinate " << point_spec << endl;
137  return(false);
138  }
139  } else if (picked_id < 0) {
140  cerr << "Error: must specify either a PDB with selection, a point, or a blob-ID to pick\n";
141  return(false);
142  }
143 
144  return(true);
145  }
146 
147  string spot_spec;
148  string point_spec;
149 };
150 
151 
152 void zapGrid(DensityGrid<int>& grid, const vector<int>& vals) {
153  DensityGridpoint dims = grid.gridDims();
154 
155  for (int k=0; k<dims[2]; k++)
156  for (int j=0; j<dims[1]; j++)
157  for (int i=0; i<dims[0]; i++) {
158  DensityGridpoint point(i,j,k);
159  int val = grid(point);
160  vector<int>::const_iterator ci = find(vals.begin(), vals.end(), val);
161  if (ci == vals.end())
162  grid(point) = 0;
163  }
164 }
165 
166 
167 int maxBlobId(const DensityGrid<int>& grid) {
168  DensityGridpoint dims = grid.gridDims();
169  long k = dims[0] * dims[1] * dims[2];
170  int maxid = 0;
171 
172  for (long i=0; i<k; i++)
173  if (grid(i) > maxid)
174  maxid = grid(i);
175 
176  return(maxid);
177 }
178 
179 
180 vector<Blob> pickBlob(const DensityGrid<int>& grid, const vector<GCoord>& points) {
181  vector<DensityGridpoint> gridded;
182  vector<GCoord>::const_iterator ci;
183 
184  for (ci = points.begin(); ci != points.end(); ++ci)
185  gridded.push_back(grid.gridpoint(*ci));
186 
187  int maxid = maxBlobId(grid);
188 
189  if (debug >= 1)
190  cerr << boost::format("Found %d total blobs in grid.\n") % maxid;
191 
192  vector<Blob> blobs(maxid+1, Blob());
193 
194  DensityGridpoint dims = grid.gridDims();
195  for (int k=0; k<dims[2]; k++)
196  for (int j=0; j<dims[1]; j++)
197  for (int i=0; i<dims[0]; i++) {
198  DensityGridpoint point(i,j,k);
199  int id = grid(point);
200  if (!id)
201  continue;
202 
203  vector<DensityGridpoint>::iterator cj;
204  for (cj = gridded.begin(); cj != gridded.end(); ++cj) {
205  double d = point.distance2(*cj);
206  if (d < blobs[id].grid_dist) {
207  blobs[id].id = id;
208  blobs[id].grid_dist = d;
209  blobs[id].closest_point = point;
210  GCoord a = grid.gridToWorld(point);
211  GCoord b = grid.gridToWorld(*cj);
212  blobs[id].real_dist = a.distance2(b);
213  }
214  }
215  }
216 
217  if (debug > 1) {
218  cerr << "* DEBUG: Blob list dump *\n";
219  for (int i=0; i<=maxid; i++)
220  cerr << boost::format("\tid=%d, grid_dist=%12.8g, real_dist=%12.8g\n")
221  % blobs[i].id
222  % blobs[i].grid_dist
223  % blobs[i].real_dist;
224  }
225 
226  vector<Blob> res;
227  if (range == 0.0) {
228  double min = numeric_limits<double>::max();
229  int id = -1;
230  for (int i=1; i<=maxid; i++)
231  if (blobs[i].grid_dist < min) {
232  min = blobs[i].grid_dist;
233  id = i;
234  }
235 
236  if (id > 0)
237  res.push_back(blobs[id]);
238 
239  } else {
240  for (int i=1; i<=maxid; i++)
241  if (blobs[i].real_dist <= range)
242  res.push_back(blobs[i]);
243  }
244 
245  return(res);
246 }
247 
248 // @endcond
249 
250 
251 int main(int argc, char *argv[]) {
252  string header = invocationHeader(argc, argv);
253 
254  opts::BasicOptions* bopts = new opts::BasicOptions(fullHelpMessage());
255  ToolOptions* topts = new ToolOptions;
256 
257  opts::AggregateOptions options;
258  options.add(bopts).add(topts);
259  if (!options.parse(argc, argv))
260  exit(-1);
261 
262  vector<GCoord> points;
263  if (picked_id < 0) {
264  if (use_spot)
265  points.push_back(spot);
266  else {
267  AtomicGroup model = createSystem(model_name);
268  AtomicGroup subset = selectAtoms(model, selection);
269 
270  for (AtomicGroup::iterator i = subset.begin(); i != subset.end(); ++i)
271  points.push_back((*i)->coords());
272  }
273  }
274 
275  DensityGrid<int> grid;
276  cin >> grid;
277 
278  cerr << "Read in grid with dimensions " << grid.gridDims() << endl;
279 
280  GCoord delta = grid.gridDelta();
281  double voxel_volume = 1.0 / delta[0];
282  for (int i=1; i<3; i++)
283  voxel_volume *= (1.0 / delta[i]);
284 
285  if (picked_id < 0) {
286 
287  vector<Blob> picks = pickBlob(grid, points);
288 
289  if (picks.empty()) {
290  cerr << "Warning - no blobs picked\n";
291  exit(0);
292  }
293 
294  cerr << "Picked " << picks.size() << " blobs:\n";
295  vector<Blob>::const_iterator ci;
296  vector<int> ids;
297  for (ci = picks.begin(); ci != picks.end(); ++ci) {
298  cerr << boost::format("\tid=%d, dist=%12.8g\n") % ci->id % ci->real_dist;
299  ids.push_back(ci->id);
300  }
301 
302  if (!query) {
303  zapGrid(grid, ids);
304  cout << grid;
305  }
306 
307  } else if (!query) {
308  vector<int> picks;
309  picks.push_back(picked_id);
310  zapGrid(grid, picks);
311  cout << grid;
312  }
313 
314  exit(0);
315 }
DensityGridpoint gridpoint(const loos::GCoord &x) const
Converts a real-space coordinate into grid coords.
Namespace for encapsulating options processing.
STL namespace.
double distance2(const Coord< T > &o) const
Distance squared between two coordinates.
Definition: Coord.hpp:429
bool parse(int argc, char *argv[])
Parses a command line, returning true if parsing was ok.
Options common to all tools (including –fullhelp)
AtomicGroup createSystem(const std::string &filename)
Factory function for reading in structure files.
Definition: sfactories.cpp:115
std::string invocationHeader(int argc, char *argv[])
Create an invocation header.
Definition: utils.cpp:124
Namespace for Density package.
Definition: DensityGrid.hpp:48
AtomicGroup selectAtoms(const AtomicGroup &source, const std::string selection)
Applies a string-based selection to an atomic group...
Definition: utils.cpp:195
loos::GCoord gridToWorld(const DensityGridpoint &v) const
Converts grid coords to real-space (world) coords.
Class for handling groups of Atoms (pAtoms, actually)
Definition: AtomicGroup.hpp:87
Namespace for most things not already encapsulated within a class.
Combines a set of OptionsPackages.
AggregateOptions & add(OptionsPackage *pack)
Add a pointer to an OptionsPackage that will be used for options.