LOOS  v2.3.2
inside_helices.py
1 #!/usr/bin/env python
2 """
3 Program to detect lipid chains that have made their way inside a membrane
4 protein
5 """
6 
7 import sys
8 import loos
9 import ConvexHull
10 import argparse
11 import os
12 
13 fullhelp= """
14 
15 Example command line:
16 inside_helices.py rhodopsin.psf sim.dcd 'segid == "RHOD" && backbone' -17 17 10 'segid =~ "[TB]PE" && name =~ "C2\d+"' 35:64 71:100 107:139 151:173 200:233 246:277 286:308 -d dha_helices
17 
18 What's actually going on:
19 The purpose of this program is to detect cases where lipid chains or
20 headgroups move "inside" a helical protein. The user specifies what the
21 protein is and the residue ranges of the helices, and tells us how to
22 identify the lipids (in the given example, we're looking at the C2 lipid
23 chain from PE lipids).
24 
25 At each time point, the code breaks the system into slices along z, and
26 within each slice computes the centroid for each helix; this lets the
27 program correctly capture helix tilts and kinks, which can alter where the
28 "surface" of the protein is. Note: if you set zblocks too big, you won't
29 have enough atoms for each helix in that block to compute a sane centroid.
30 It's up to you whether you want to include side chains.
31 
32 The centroids of the helices are used to compute a convex hull for each
33 z-slice. Then, for each lipid chain (or headgroup, etc) we check to see if
34 there are any atoms that are inside the hull that's in its z range. If the
35 number of atoms is greater than the threshold (default value is 7), then that
36 chain is considered to be "inside" the protein at that time. Note: you must
37 specify at least 3 helices for this program to make any sense, since you can't
38 have a convex hull with 2 points.
39 
40 The output of the program is a set of time series: each lipid that is ever
41 identified as being inside the protein gets its own file (named as
42 SEGID:resid.dat). The first column is the frame number, and the second is
43 1 if the lipid was inside on that frame and 0 otherwise.
44 
45 If you just want the distribution of occupancies (e.g. what fraction of
46 time the lipid was present), you can say something like
47 
48 grep Occ *.dat | awk '{print $4}' > vals.dat
49 
50 and then histogram vals.dat.
51  """
52 
53 
54 parser = argparse.ArgumentParser(prog=sys.argv[0], epilog=fullhelp, formatter_class=argparse.RawDescriptionHelpFormatter)
55 parser.add_argument("system_file",
56  help="File describing system contents, e.g. PDB or PSF")
57 parser.add_argument("traj_file", help="Trajectory file")
58 parser.add_argument("protein_string", help="Selection for the protein")
59 parser.add_argument("zmin",
60  type=float,
61  help="Minimum z value to consider for target atoms")
62 parser.add_argument("zmax",
63  type=float,
64  help="Maxmimum z value to consider for target atoms")
65 parser.add_argument("zblocks",
66  type=int,
67  help="Number of blocks to slice the system into along the z-axis")
68 parser.add_argument("target_string",
69  help="Selection string for the stuff that may penetrate")
70 parser.add_argument("helix_ranges",
71  nargs="+",
72  help="list of colon-delimited residue number ranges specifying where the helices are")
73 parser.add_argument("-d", "--directory",
74  help="Directory to create output files",
75  default=".")
76 parser.add_argument("-t", "--threshold",
77  default=7,
78  type=int,
79  help="Number of atoms inside for the chain to be considered inside")
80 
81 args = parser.parse_args()
82 
83 
84 
85 system = loos.createSystem(args.system_file)
86 traj = loos.createTrajectory(args.traj_file, system)
87 
88 protein = loos.selectAtoms(system, args.protein_string)
89 
90 output_directory = args.directory
91 if not os.path.exists(output_directory):
92  try:
93  os.mkdir(output_directory)
94  except OSError as inst:
95  print 'Error creating output directory %s : ' % output_directory
96  print inst
97  sys.exit(1)
98 if not os.access(output_directory, os.W_OK):
99  print "Error: no permission to write to output directory ", output_directory
100  sys.exit(1)
101 
102 
103 helices = []
104 helix_centroids = loos.AtomicGroup()
105 for h in args.helix_ranges:
106  first, last = h.split(":")
107  helix_string ='(resid >= ' + first + ') ' + '&& (resid <= ' + last + ')'
108  helix = loos.selectAtoms(protein, helix_string)
109  helices.append(helix)
110  helix_centroids.append(loos.Atom())
111 
112 
113 target = loos.selectAtoms(system, args.target_string)
114 target = loos.selectAtoms(target, "!hydrogen")
115 
116 chains = target.splitByResidue()
117 
118 if (args.zmin > args.zmax):
119  tmp = args.zmax
120  args.zmax = args.zmin
121  args.zmin = tmp
122 
123 slicers = []
124 block_size = (args.zmax - args.zmin) / args.zblocks
125 for i in range(args.zblocks):
126  z1 = args.zmin + i*block_size
127  z2 = args.zmin + (i+1)*block_size
128 
129  ch = ConvexHull.ZSliceSelector(z1, z2)
130  slicers.append(ch)
131 
132 
133 frame = 0
134 
135 bound_lipids = {}
136 
137 while (traj.readFrame()):
138  traj.updateGroupCoords(system)
139 
140  # set up the convex hulls for this slice
141  hulls = []
142  for i in range(args.zblocks):
143  current_list = []
144  for h in range(len(helices)):
145  helix = slicers[i](helices[h])
146  if len(helix) > 0:
147  centroid = helix.centroid()
148  helix_centroids[h].coords(centroid)
149  current_list.append(helix_centroids[h])
150 
151  if len(current_list) < 3:
152  print "Warning: only %d helices represented in frame %d and slice %d" % (len(current_list), frame, i)
153  hulls.append(None)
154  else:
155  hull = ConvexHull.ConvexHull(helix_centroids)
156  hull.generate_hull()
157  hull.generate_vertices()
158  hulls.append(hull)
159 
160  for chain in chains:
161  atoms_inside = 0
162  for atom in chain:
163  z = atom.coords().z()
164 
165  # skip atoms outside the z range
166  if not (args.zmin < z < args.zmax):
167  continue
168 
169  index = int((z-args.zmin)/block_size)
170 
171  if hulls[index] and hulls[index].is_inside(atom.coords()):
172  atoms_inside += 1
173  if atoms_inside >= args.threshold:
174  key = atom.segid() + ":" + str(atom.resid())
175  if not bound_lipids.has_key(key):
176  bound_lipids[key] = []
177  bound_lipids[key].append(frame)
178  break
179  frame += 1
180  if frame % 20 == 0: print frame
181 
182 for lipid in bound_lipids.keys():
183  frames = bound_lipids[lipid]
184  occ = float(len(frames)) / frame
185 
186  file = open(output_directory + "/" + lipid + ".dat", "w")
187  file.write("# Occupancy = %f\n" % (occ))
188  file.write("#Frame\tBound\n")
189  for i in range(frame):
190  if i in frames:
191  file.write("%d\t%d\n" %(i, 1))
192  else:
193  file.write("%d\t%d\n" %(i, 0))
194  file.close()
pTraj createTrajectory(const std::string &filename, const AtomicGroup &g)
Factory function for reading in a trajectory file.
Definition: sfactories.cpp:184
Basic Atom class for handling atom properties.
Definition: Atom.hpp:50
AtomicGroup selectAtoms(const AtomicGroup &source, const std::string selection)
Applies a string-based selection to an atomic group...
Definition: utils.cpp:195
Class for handling groups of Atoms (pAtoms, actually)
Definition: AtomicGroup.hpp:87
AtomicGroup createSystem(const std::string &filename, const std::string &filetype)
Definition: sfactories.cpp:119