LOOS  v2.3.2
cluster-structures.py
1 #!/usr/bin/env python
2 """
3 Cluster structures based from a simulation
4 """
5 """
6 
7  This file is part of LOOS.
8 
9  LOOS (Lightweight Object-Oriented Structure library)
10  Copyright (c) 2013 Tod Romo, Grossfield Lab
11  Department of Biochemistry and Biophysics
12  School of Medicine & Dentistry, University of Rochester
13 
14  This package (LOOS) is free software: you can redistribute it and/or modify
15  it under the terms of the GNU General Public License as published by
16  the Free Software Foundation under version 3 of the License.
17 
18  This package is distributed in the hope that it will be useful,
19  but WITHOUT ANY WARRANTY; without even the implied warranty of
20  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21  GNU General Public License for more details.
22 
23  You should have received a copy of the GNU General Public License
24  along with this program. If not, see <http://www.gnu.org/licenses/>.
25 """
26 import sys
27 import loos
28 import loos.pyloos
29 import numpy
30 import argparse
31 from scipy.cluster.vq import kmeans,vq
32 
33 
34 parser = argparse.ArgumentParser()
35 parser.add_argument('model', help='Structure to use')
36 parser.add_argument('selection', help='LOOS selection for subset of structure to use for clustering')
37 parser.add_argument('num_means', help='# of clusters to make', type = int)
38 parser.add_argument('prefix', help='Prefix output files with this')
39 parser.add_argument('traj', help='Trajectory to use', nargs='+')
40 parser.add_argument('--align', help='Align trajectory using this LOOS selection')
41 parser.add_argument('--allskip', help='Skip this amount from the start of each trajectory', type = int, default = 0)
42 parser.add_argument('--allstride', help='Step through each trajectory by this many frames', type = int, default = 1)
43 parser.add_argument('--skip', help='Skip this amount from the start of the combined trajectory', type = int, default = 0)
44 parser.add_argument('--stride', help='Step through the combined trajectory by this many frames', type = int, default = 1)
45 
46 args = parser.parse_args()
47 cmd_string = sys.argv[0]
48 for i in range(1, len(sys.argv)):
49  arg = sys.argv[i].replace('\n', '\\n')
50  cmd_string += " '" + arg + "'"
51 print '# ', cmd_string
52 
53 
54 # Create the model & read in the trajectories
55 model = loos.createSystem(args.model)
56 allTrajs = loos.pyloos.VirtualTrajectory(skip=args.skip, stride=args.stride)
57 if args.align:
58  allTrajs = loos.pyloos.AlignedVirtualTrajectory(alignwith = args.align, skip=args.skip, stride=args.stride)
59 for trajname in args.traj:
60  allTrajs.append(loos.pyloos.Trajectory(trajname, model, subset=args.selection, skip=args.allskip, stride=args.allstride))
61 
62 
63 # Set up lists to hold the coordinates...
64 
65 n = len(allTrajs.frame()) * 3
66 data = numpy.zeros( (len(allTrajs), n) )
67 for frame in allTrajs:
68  coords = frame.getCoords()
69  obs = numpy.reshape(coords, (1, n))
70  data[allTrajs.index()] = obs
71 
72 
73 if args.align:
74  print '# Iteratively aligned with %d iterations and final RMSD %g.' % (allTrajs._iters, allTrajs._rmsd)
75 
76 
77 # Do the clustering...
78 # Computing K-Means with K = num_means clusters
79 centroids,distortion = kmeans(data, args.num_means)
80 # centroids - the codebook of centroids
81 # distortion - total distortion
82 
83 # Assign each sample to a cluster
84 idx,dists = vq(data, centroids)
85 # idx - code (which cluster a point belongs to)
86 # dists - distance of each point from cluster
87 # used for the distortion calculation
88 # May want to output for determining
89 # number of clusters in system
90 
91 
92 # Write out the meta-data file
93 print "# Means\tDistortion: "
94 print '# ', args.num_means," \t",distortion
95 print "# -------------------------------"
96 print "# Trajectory list:"
97 for i in range(len(args.traj)):
98  print '# %5d = "%s"' % (i, args.traj[i])
99 
100 print '#\n# %8s %16s %8s %8s' % ('Index', 'Trajectory', 'Frame', 'Cluster')
101 print '# %8s %16s %8s %8s' % ('--------', '----------------', '--------', '--------')
102 
103 for i in range(len(idx)):
104  loc = allTrajs.frameLocation(i)
105  print '%10d %16d %8d %8d' % (i, loc[1], loc[3], idx[i])
106 
107 
108 # Output centroids
109 cen_list = centroids.tolist()
110 subset = allTrajs.frame()
111 for j in range(len(cen_list)):
112  troid = cen_list[j]
113  centroid_structure = subset.copy()
114  for i in range(0, len(troid), 3):
115  centroid_structure[i/3].coords(loos.GCoord(troid[i], troid[i+1], troid[i+2]))
116  pdb = loos.PDB.fromAtomicGroup(centroid_structure)
117  pdb.remarks().add(cmd_string)
118  pdb.remarks().add(">>> Means = %s, Distortion = %f" % (args.num_means, distortion))
119 
120  filename = "%s-centroid-%d.pdb" % (args.prefix, j)
121  file = open(filename, 'w')
122  file.write(str(pdb))
123  file.close()
Python-based wrapper for LOOS Trajectories This class turns a loos Trajectory into something more pyt...
Definition: trajectories.py:55
Virtual trajectory composed of multiple Trajectory objects This class can combine multiple loos...
A virtual trajectory that supports iterative alignment.
static PDB fromAtomicGroup(const AtomicGroup &)
Class method for creating a PDB from an AtomicGroup.
Definition: pdb.cpp:528
AtomicGroup createSystem(const std::string &filename, const std::string &filetype)
Definition: sfactories.cpp:119