LOOS  v2.3.2
enmovie.cpp
1 /*
2  enmovie
3 
4  Elastic Network MOde VIsualizEr
5 
6 
7  Usage:
8  enmovie [options] model-name eigenvector-matrix output-name
9 
10  Notes:
11  use the "--help" option for more information about how to run...
12 
13 */
14 
15 
16 /*
17  This file is part of LOOS.
18 
19  LOOS (Lightweight Object-Oriented Structure library)
20  Copyright (c) 2008 Tod D. Romo
21  Department of Biochemistry and Biophysics
22  School of Medicine & Dentistry, University of Rochester
23 
24  This package (LOOS) is free software: you can redistribute it and/or modify
25  it under the terms of the GNU General Public License as published by
26  the Free Software Foundation under version 3 of the License.
27 
28  This package is distributed in the hope that it will be useful,
29  but WITHOUT ANY WARRANTY; without even the implied warranty of
30  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31  GNU General Public License for more details.
32 
33  You should have received a copy of the GNU General Public License
34  along with this program. If not, see <http://www.gnu.org/licenses/>.
35 
36 */
37 
38 
39 
40 #include <loos.hpp>
41 #include <iterator>
42 #include <boost/format.hpp>
43 #include <boost/program_options.hpp>
44 
45 #include <cassert>
46 
47 using namespace std;
48 using namespace loos;
49 
50 namespace opts = loos::OptionsFramework;
51 namespace po = loos::OptionsFramework::po;
52 
53 
55 
56 
57 
58 
59 // --------------------------------------------------------------------------------
60 
61 string vec_name;
62 vector<int> cols;
63 vector<double> scales;
64 double global_scale = 1.0;
65 bool uniform;
66 bool invert = false;
67 string model_name;
68 string map_name;
69 string selection;
70 string avgname;
71 bool autoscale, square;
72 double autolength;
73 string svals_file;
74 int verbosity = 0;
75 int offset = 0;
76 int nsteps = 100;
77 string supersel;
78 bool tag = false;
79 bool positive = false;
80 bool negative = false;
81 
82 
83 
84 // @cond TOOLS_INTERNAL
85 class ToolOptions : public opts::OptionsPackage {
86 public:
87 
88  void addGeneric(po::options_description& o) {
89  o.add_options()
90  ("mode,M", po::value< vector<string> >(&strings), "Modes to use")
91  ("autoscale,A", po::value<bool>(&autoscale)->default_value(true), "Automatically scale vectors")
92  ("autolength,L", po::value<double>(&autolength)->default_value(2.0), "Length of average vector in Angstroms")
93  ("svals,S", po::value<string>(&svals_file), "Scale columns by singular values from file")
94  ("pca", "Vectors are from PCA (sets square=1, invert=0, offset=0)")
95  ("enm", "Vectors are from ENM (sets square=0, invert=1, offset=6)")
96  ("superset,U", po::value<string>(&supersel)->default_value("all"), "Superset to use for frames in the output")
97  ("tag,T", po::value<bool>(&tag)->default_value(false), "Tag ENM atoms with 'E' alt-loc")
98  ("avg", po::value<string>(&avgname), "Use this model for average structure coordinates")
99  ("square", po::value<bool>(&square)->default_value(true), "square the singular values")
100  ("invert", po::value<bool>(&invert)->default_value(false), "Invert singular values (ENM)")
101  ("scale", po::value< vector<double> >(&scales), "Scale the requested columns")
102  ("global", po::value<double>(&global_scale)->default_value(1.0), "Global scaling")
103  ("uniform", po::value<bool>(&uniform)->default_value(false), "Scale all elements uniformly")
104  ("map", po::value<string>(&map_name), "Use a map file to map LSV/eigenvectors to atomids")
105  ("offset", po::value<int>(&offset), "Added to mode indices to select columns in eigenvector matrix")
106  ("positive", po::value<bool>(&positive)->default_value(false), "Only move along positive eigenvector direction")
107  ("negative", po::value<bool>(&negative)->default_value(false), "Only move along negative eigenvector direction");
108 
109  }
110 
111  bool postConditions(po::variables_map& vm) {
112 
113  if (vm.count("enm")) {
114  square = false;
115  invert = true;
116  offset = 6;
117  } else if (vm.count("pca")) {
118  square = true;
119  invert = false;
120  offset = 0;
121  }
122 
123  if (strings.empty())
124  cols.push_back(0);
125  else
126  cols = parseRangeList<int>(strings);
127 
128  for (uint i=0; i<cols.size(); ++i)
129  cols[i] += offset;
130 
131  if (!scales.empty()) {
132  if (scales.size() != cols.size()) {
133  cerr << "ERROR - You must have the same number of scalings as columns or rely on the global scaling\n";
134  return(false);
135  }
136  } else {
137  for (uint i=0; i<cols.size(); ++i)
138  scales.push_back(1.0);
139  }
140 
141  // Turn off since that's the default behavior
142  if (negative && positive)
143  negative = positive = false;
144 
145 
146  return(true);
147  }
148 
149  string print() const {
150  ostringstream oss;
151  oss << boost::format("columns='%s', global=%f, uniform=%d, map='%s', autoscale=%d, autolength=%f, svals='%s', square=%d, invert=%d, offset=%d, tag=%d, superset='%s', avg='%s', positive=%d, negative=%d")
152  % vectorAsStringWithCommas<string>(strings)
153  % global_scale
154  % uniform
155  % map_name
156  % autoscale
157  % autolength
158  % svals_file
159  % square
160  % invert
161  % offset
162  % tag
163  % supersel
164  % avgname
165  % positive
166  % negative;
167 
168  oss << "scale='";
169  copy(scales.begin(), scales.end(), ostream_iterator<double>(oss, ","));
170  oss << "'";
171  return(oss.str());
172  }
173 
174  vector<string> strings;
175 };
176 // @endcond
177 
178 
179 string fullHelpMessage(void){
180  string msg =
181  "\n"
182  "SYNOPSIS\n"
183  "\n"
184  "\tCreate a representation of motion along the mode(s) of an ENM\n"
185  "\n"
186  "DESCRIPTION\n"
187  "\n"
188  "It is often informative to visualize the modes of motion predicted\n"
189  "by an ENM in addition to plotting eigenvectors. enmovie creates a dcd\n"
190  "and an accompanying pdb for this purpose. A 100 frame trajectory is \n"
191  "made and the beads follow a given eigenvector(s).\n"
192  "\n"
193  "* PCA vs ENM *\n"
194  "Enmovie should use different options depending on whether the eigenvectors come\n"
195  "from a PCA or an ENM. The --enm and --pca flags configure porcupine to expect\n"
196  "the appropriate input. If neither flag is given, then PCA is assumed.\n"
197  "For PCA results, the first mode is in the first column. LOOS\n"
198  "calculates a PCA using the singular value decomposition, so the 'eigenvalues' are\n"
199  "actually singular values and need to be squared. For typical ENMs, the first 6\n"
200  "eigenvectors correspond to rigid-body motion and are zero, and hence skipped.\n"
201  "In addition, the magnitude of the fluctuations are the inverse of the eigenvalues.\n"
202  "\n"
203  "* Scaling and Autoscaling *\n"
204  "There are several different ways the individual vectors can be scaled. The default\n"
205  "is to automatically determine a scaling such that the largest average displacement\n"
206  "is 2 Angstroms. If multiple modes are being used, then the corresponding eigenvector\n"
207  "can be used so the relative lengths are correct. When used with autoscaling, the\n"
208  "the relative lengths are maintained. In addition, an explicit scaling can be used\n"
209  "for each mode. If autoscaling or eigenvectors are used, then this is applied -after-\n"
210  "both of those. Finally, a global scaling can be applied. To see the scaling used\n"
211  "turn on verbose output (-v1). For more details about exactly what scaling is used,\n"
212  "set verbosity greater than 1 (-v2).\n"
213  "\n"
214  "In general, the default options should be fine for visualization. If you are using\n"
215  "more than one mode, then include the eigenvectors to preserve the relative scalings\n"
216  "between the modes.\n"
217  "\n"
218  "* Supersets *\n"
219  "Some visualization programs require more atoms than what the PCA/ENM used in order\n"
220  "to get the structure correct (such as ribbons representations). Including all atoms\n"
221  "can solve this problem. Alternatively, sometimes extra atoms are required to provide\n"
222  "context to the region of interest, such as the extracellular loops in GPCRs. You can\n"
223  "control what atoms are written to the trajectory with the superset selection. This\n"
224  "lets you add back in atoms that were excluded by the PCA/ENM. The catch is that they\n"
225  "will not move in the trajectory, resulting in distorted bonds/connections. The default\n"
226  "is to include all atoms in the output. If you want only the PCA/ENM region, then use\n"
227  "the same selection for the superset as the vector selection.\n"
228  "\n"
229  "EXAMPLES\n"
230  "\n"
231  "\tenmovie model.pdb pca_U.asc\n"
232  "This example uses the first mode, assumes a PCA result,\n"
233  "and autoscales the vectors. Creates output.pdb and output.dcd and\n"
234  "the trajectory has 100 frames.\n"
235  "\n"
236  "\tenmovie --pca -S pca_s.asc -M 0:3 -p modes model.pdb pca_U.asc\n"
237  "This example again uses the first three modes, autoscales, and also\n"
238  "scales each mode by the corresponding singular value. It explicitly uses\n"
239  "a PCA result. It creates modes.pdb and modes.dcd with 100 frames.\n"
240  "\n"
241  "\tenmovie --enm -S enm_s.asc -M 0:3 -p modes model.pdb enm_U.asc\n"
242  "This example is the same as above, but expects an ENM result (inverting the\n"
243  "eigenvalues, and skipping the first 6 eigenpairs.\n"
244  "\n"
245  "\tenmovie -S pca_s.asc -M 0,3,7 -L 3 -p modes model.pdb pca_U.asc\n"
246  "A PCA result is assumed, the first, fourth, and eighth mode are used, autoscaling\n"
247  "is turned on with a length of 3 Angstroms. The singular values are also included.\n"
248  "The output prefix is modes."
249  "\n"
250  "\tenmovie --enm -S enm_s.asc -M 0,1 -A 0 -p modes --global 50 model.pdb enm_U.asc\n"
251  "An ENM result is expected and the first two modes are used. Autoscaling is disabled.\n"
252  "Each mode is scaled by the corresponding eigenvalue (inverted, since this is an ENM).\n"
253  "A global scaling of 50 is applied to all modes.\n"
254  "\n"
255  "\tenmovie --pca -S pca_s.asc -M 0:3 -p modes -U 'name == \"CA\"' model.pdb pca_U.asc\n"
256  "This example again uses the first three modes, autoscales, and also\n"
257  "scales each mode by the corresponding singular value. It explicitly uses\n"
258  "a PCA result. It creates modes.pdb and modes.dcd with 100 frames.\n"
259  "The default selection is to use CAs for the eigenvectors, and the -U option\n"
260  "causes the output trajectory to only include CAs.\n"
261  "\n"
262  "\tenmovie -p pca_mode1 -t xtc model.pdb pca_U.asc\n"
263  "This example uses the first mode, assumes a PCA result,\n"
264  "and autoscales the vectors. Creates pca_mode1.pdb and pca_mode1.xtc and\n"
265  "the trajectory has 100 frames.\n"
266  "\n"
267  "SEE ALSO\n"
268  "\n"
269  "\tsvd, big-svd, svdcolmap, anm, gnm, vsa, porcupine\n"
270  ;
271 
272  return(msg);
273 }
274 
275 
276 string generateSegid(const uint n) {
277  stringstream s;
278 
279  s << boost::format("P%03d") % n;
280  return(s.str());
281 }
282 
283 
284 
285 // Accept a map file mapping the vectors (3-tuples in the rows) back
286 // onto the appropriate atoms
287 
288 vector<int> readMap(const string& name) {
289  ifstream ifs(name.c_str());
290  if (!ifs) {
291  cerr << "Error- cannot open " << name << endl;
292  exit(-1);
293  }
294 
295 
296  string line;
297  uint lineno = 0;
298  vector<int> atomids;
299 
300  while (!getline(ifs, line).eof()) {
301  stringstream ss(line);
302  int a, b;
303  if (!(ss >> a >> b)) {
304  cerr << "ERROR - cannot parse map at line " << lineno << " of file " << name << endl;
305  exit(-10);
306  }
307  atomids.push_back(b);
308  }
309 
310  return(atomids);
311 }
312 
313 
314 // Fake the mapping, i.e. each vector corresponds to each atom...
315 
316 vector<int> fakeMap(const AtomicGroup& g) {
317  AtomicGroup::const_iterator ci;
318  vector<int> atomids;
319 
320  for (ci = g.begin(); ci != g.end(); ++ci)
321  atomids.push_back((*ci)->id());
322 
323  return(atomids);
324 }
325 
326 
327 // Record the atomid's for each atom in the selected subset. This
328 // allows use to map vectors back onto the correct atoms when they
329 // were computed from a subset...
330 
331 vector<int> inferMap(const AtomicGroup& g, const string& sel) {
332  AtomicGroup subset = selectAtoms(g, sel);
333  vector<int> atomids;
334 
335  AtomicGroup::iterator ci;
336  for (ci = subset.begin(); ci != subset.end(); ++ci)
337  atomids.push_back((*ci)->id());
338 
339 
340  return(atomids);
341 }
342 
343 
344 double subvectorSize(const Matrix& U, const vector<double>& scaling, const uint j) {
345 
346  GCoord c;
347  for (uint i=0; i<cols.size(); ++i) {
348  GCoord v(U(j, i), U(j+1, i), U(j+2, i));
349  c += scaling[i] * v;
350  }
351 
352  return(c.length());
353 }
354 
355 
356 vector<double> determineScaling(const Matrix& U) {
357 
358  uint n = cols.size();
359  vector<double> scaling(n, 1.0);
360  vector<double> svals(n, 1.0);
361 
362  // First, handle singular values, if given
363  if (!svals_file.empty()) {
364  Matrix S;
365  readAsciiMatrix(svals_file, S);
366  if (verbosity > 1)
367  cerr << "Read singular values from file " << svals_file << endl;
368  if (S.cols() != 1) {
369  cerr << boost::format("Error- singular value file is %d x %d, but it should be a %d x 1\n") % S.rows() % S.cols() % U.rows();
370  exit(-2);
371  }
372  for (uint i = 0; i<n; ++i) {
373  scaling[i] = S[cols[i]];
374  if (square)
375  scaling[i] *= scaling[i];
376  if (invert && scaling[i] != 0.0)
377  scaling[i] = 1.0 / scaling[i];
378  svals[i] = scaling[i];
379  }
380  }
381 
382  double avg = 0.0;
383  if (autoscale) {
384  for (uint i=0; i<U.rows(); i += 3)
385  avg += subvectorSize(U, scaling, i);
386  avg /= U.rows()/3.0;
387 
388  for (uint i=0; i<n; ++i)
389  scaling[i] *= autolength / avg;
390  }
391 
392  // Incorporate additional scaling...
393  if (verbosity > 1) {
394  cerr << "Average subvector size was " << avg << endl;
395  cerr << boost::format("%4s %4s %15s %15s\n") % "col" % "mode" % "sval" % "scale";
396  cerr << boost::format("%4s %4s %15s %15s\n") % "----" % "----" % "---------------" % "---------------";
397  }
398  for (uint i=0; i<n; ++i) {
399  scaling[i] *= scales[i] * global_scale;
400  if (verbosity > 1)
401  cerr << boost::format("%4d %4d %15.5f %15.5f\n") % cols[i] % (cols[i]-offset) % svals[i] % scaling[i];
402  else if (verbosity > 0)
403  cerr << boost::format("Scaling column %d by %f\n") % cols[i] % scaling[i];
404  }
405 
406  return(scaling);
407 }
408 
409 
410 
411 
412 void copyCoordinates(AtomicGroup& target, const AtomicGroup& source) {
413  for (uint i=0; i<source.size(); ++i) {
414  pAtom atom = target.findById(source[i]->id());
415  if (atom == 0) {
416  cerr << "Error- could not find atomid " << source[i]->id() << " in the superset.\n";
417  cerr << source[i] << endl;
418  exit(-10);
419  }
420  atom->coords(source[i]->coords());
421  }
422 }
423 
424 
425 
426 AtomicGroup renumberAndMapBonds(const AtomicGroup& model, const AtomicGroup& subset) {
427 
428  AtomicGroup renumbered = subset.copy();
429 
430  if (!renumbered.hasBonds()) {
431  renumbered.renumber();
432  return(renumbered);
433  }
434 
435  AtomicGroup sorted = model.copy();
436  sorted.sort();
437 
438  vector<int> idmap(model.size());
439  for (uint i=0; i<renumbered.size(); ++i)
440  idmap[renumbered[i]->index()] = i;
441 
442  renumbered.pruneBonds();
443  for (uint i=0; i<renumbered.size(); ++i) {
444  if (! renumbered[i]->hasBonds())
445  continue;
446  vector<int> bondlist = renumbered[i]->getBonds();
447  vector<int> newbonds(bondlist.size());
448  for (uint j=0; j<bondlist.size(); ++j) {
449  pAtom a = sorted.findById(bondlist[j]);
450  if (a == 0) {
451  cerr << "Error- could not find atom id " << bondlist[j] << " in model\n";
452  exit(-2);
453  }
454  newbonds[j] = idmap[a->index()]+1;
455  }
456  renumbered[i]->setBonds(newbonds);
457  }
458  renumbered.renumber();
459  return(renumbered);
460 }
461 
462 
463 int main(int argc, char *argv[]) {
464  string hdr = invocationHeader(argc, argv);
465 
466  opts::BasicOptions* bopts = new opts::BasicOptions(fullHelpMessage());
467  opts::OutputPrefix* popts = new opts::OutputPrefix("output");
469  opts::BasicSelection* sopts = new opts::BasicSelection("name == 'CA'");
471  ToolOptions* topts = new ToolOptions;
473  ropts->addArgument("lsv", "left-singular-vector-file");
474 
475  opts::AggregateOptions options;
476  options.add(bopts).add(popts).add(otopts).add(sopts).add(mopts).add(topts).add(ropts);
477  if (!options.parse(argc, argv)) {
478  cerr << "***WARNING***\n";
479  cerr << "The interface to enmovie has changed significantly\n";
480  cerr << "and is not compatible with previous versions. See the\n";
481  cerr << "help info above, or the --fullhelp guide.\n";
482  exit(-1);
483  }
484 
485  verbosity = bopts->verbosity;
486 
487  // First, read in the LSVs
488  Matrix U;
489  readAsciiMatrix(ropts->value("lsv"), U);
490  uint m = U.rows();
491 
492  vector<double> scalings = determineScaling(U);
493 
494  // Read in the average structure...
495  AtomicGroup avg = mopts->model;
496  if (! avgname.empty()) {
497  AtomicGroup avgcoords = createSystem(avgname);
498  copyCoordinates(avg, avgcoords);
499  }
500 
501 
502 
503  AtomicGroup superset = selectAtoms(mopts->model, supersel);
504 
505  vector<int> atomids;
506  if (map_name.empty()) {
507  if (sopts->selection.empty())
508  atomids = fakeMap(avg);
509  else
510  atomids = inferMap(avg, sopts->selection);
511 
512  } else
513  atomids = readMap(map_name);
514 
515  // Double check size of atomid map
516  if (atomids.size() * 3 != m) {
517  cerr << boost::format("Error - The vector-to-atom map (provided or inferred) has %d atoms, but expected %d.\n") %
518  atomids.size() % (m / 3);
519  exit(-1);
520  }
521 
522 
523 
524  pTrajectoryWriter traj = otopts->createTrajectory(popts->prefix);
525 
526  // We'll step along the Eigenvectors using a sin wave as a final scaling...
527  bool doublesided = !(negative || positive);
528  double delta = (doublesided ? 2.0 : 1.0)*PI/nsteps;
529  double phase = negative ? PI : 0.0;
530 
531  // Loop over requested number of frames...
532  for (int frameno=0; frameno<nsteps; frameno++) {
533  double k = sin(delta * frameno + phase);
534 
535  // Have to make a copy of the atoms since we're computing a
536  // displacement from the model structure...
537  AtomicGroup frame = superset.copy();
538  AtomicGroup frame_subset = selectAtoms(frame, sopts->selection);
539 
540  // Loop over all requested modes...
541  for (uint j = 0; j<cols.size(); ++j) {
542  // Loop over all atoms...
543  for (uint i=0; i<frame_subset.size(); i++) {
544  GCoord c = frame_subset[i]->coords();
545 
546  // This gets the displacement vector for the ith atom of the
547  // ci'th mode...
548  GCoord d( U(i*3, cols[j]), U(i*3+1, cols[j]), U(i*3+2, cols[j]) );
549  if (uniform)
550  d /= d.length();
551 
552  c += k * scalings[j] * d;
553 
554  // Stuff those coords back into the Atom object...
555  frame_subset[i]->coords(c);
556  if (tag)
557  frame_subset[i]->chainId("E");
558  }
559  }
560 
561  if (frameno == 0) {
562  // Write out the selection, converting it to a PDB
563  string outpdb(popts->prefix + ".pdb");
564  ofstream ofs(outpdb.c_str());
565  PDB pdb;
566  AtomicGroup structure = renumberAndMapBonds(avg, frame);
567 
568  pdb = PDB::fromAtomicGroup(structure);
569  pdb.remarks().add(hdr);
570  ofs << pdb;
571  }
572 
573 
574  // Now that we've displaced the frame, add it to the growing trajectory...
575  traj->writeFrame(frame);
576  }
577 }
Gets a string as prefix for output files (–prefix)
Provides simple way to add command-line arguments (required options)
Simple matrix template class using policy classes to determine behavior.
Definition: MatrixImpl.hpp:53
Namespace for encapsulating options processing.
Provides a single LOOS selection (–selection)
bool hasBonds(void) const
Does any atom in the group have bond information???
pAtom findById(const int id) const
Find a contained atom by its atomid.
double length(void) const
Length of the coordinate (as a vector)
Definition: Coord.hpp:423
void addArgument(const std::string &name, const std::string &description)
Add a required argument given a name (tag) and a description (currently unused)
STL namespace.
void renumber(const int start=1, const int stride=1)
Renumber the atomid's of the contained atoms...
bool parse(int argc, char *argv[])
Parses a command line, returning true if parsing was ok.
Options common to all tools (including –fullhelp)
AtomicGroup createSystem(const std::string &filename)
Factory function for reading in structure files.
Definition: sfactories.cpp:115
std::string invocationHeader(int argc, char *argv[])
Create an invocation header.
Definition: utils.cpp:124
std::string value(const std::string &s) const
Retrieve the value for an argument.
PDB reading/writing class.
Definition: pdb.hpp:69
AtomicGroup selectAtoms(const AtomicGroup &source, const std::string selection)
Applies a string-based selection to an atomic group...
Definition: utils.cpp:195
void sort(void)
Sort based on atomid.
AtomicGroup copy(void) const
Creates a deep copy of this group.
Definition: AtomicGroup.cpp:56
Request a model with coordinates.
Math::Matrix< T, P, S > readAsciiMatrix(std::istream &is)
Read in a matrix from a stream returning a newly created matrix.
Definition: MatrixRead.hpp:72
Class for handling groups of Atoms (pAtoms, actually)
Definition: AtomicGroup.hpp:87
void pruneBonds()
Attempt to prune connectivity (only retain bonds to atoms within this AtomicGroup) ...
Namespace for most things not already encapsulated within a class.
Combines a set of OptionsPackages.
AggregateOptions & add(OptionsPackage *pack)
Add a pointer to an OptionsPackage that will be used for options.