LOOS  v2.3.2
lipid_lifetime.py
1 #!/usr/bin/env python
2 """
3 Use Voronoi polygons to compute the lifetime of lipids near the surface of
4 a protein
5 
6 
7 Alan Grossfield, University of Rochester Medical Center
8 Copyright 2015
9 """
10 
11 import sys
12 import loos
13 import loos.pyloos
14 import numpy
15 
16 from Voronoi import *
17 
18 def autocorrel(timeseries, expected=None):
19  """
20  Computes the autocorrelation function for a set of timeseries.
21 
22  Destroys the timeseries matrix.
23  Does not trap for timeseries with zero std dev (e.g. all constant)
24  """
25  ave = numpy.mean(timeseries, axis=1, keepdims=True)
26  dev = numpy.std(timeseries, axis=1, keepdims=True)
27  timeseries -= ave
28  timeseries /= dev
29 
30  length = timeseries.shape[1]
31  if expected is None:
32  expected = length
33 
34  corr = numpy.zeros([expected, len(timeseries)], float)
35  for i in range(expected):
36  num_vals = length - i
37  num = timeseries[:,i:] * timeseries[:,:num_vals]
38  corr[i] = numpy.average(num, axis = 1)
39 
40  ave_corr = numpy.average(corr, axis=1)
41  dev_corr = numpy.std(corr, axis=1)
42 
43  return ave_corr, dev_corr
44 
45 
46 
47 def Usage():
48  return """
49 Usage: lipid_lifetime.py system trajectory zmin zmax protein-selection all-lipid-selection target-lipids
50 
51 This program uses 2D Voronoi decomposition to identify lipids in contact
52 with the protein surface, and returns the time-correlation function
53 for this contact (averaged over all lipids considered). This can be fit
54 to a sum of exponentials to extract a "lifetime".
55 
56 zmin and zmax define the "slice" of the membrane and protein used in the
57 Voronoi decomposition. The details of the choice shouldn't matter too much,
58 as long as you pick out one leaflet or the other; beyond that, your choice
59 should be guided by the shape of the protein and the variation of whatever atom
60 you use to define the lipid. At the moment, a lipid that leaves the z-slice
61 is treated the same as one that leaves the protein, so I'd say handling of
62 flip-flop is not very robust.
63 
64 protein-selection is the set of atoms used to define the "protein", or the
65 thing that you're calculating the lifetime around. It is assumed to be 1 thing
66 (e.g. all the atoms are treated together, and we actually just use the
67 centroid), so you couldn't put a selection for something like a bunch of
68 cholesterol molecules there and expect to get the lifetime for lipids around
69 cholesterol. A fairly sparse selection is probably best, e.g. just alpha
70 carbons.
71 
72 all-lipids is a selection that described all of the lipids that are found in
73 the leaflet you're examining. The code is written assuming that you'll pick
74 one atom per lipid, e.g. the phosphorus. The important thing is that all
75 lipids (and cholesterols, etc) in the leaflet in question must be represented,
76 or else the resulting "gaps" may cause some lipids to be incorrectly be
77 marked as neigboring the protein.
78 
79 target-lipids is a selection of those lipids for which you want to compute the
80 lifetime. This group is by definition a subset of the all-lipids selection;
81 since we actually apply this selection to the group created by the all-lipids
82 selection, you can use 'all' as this selection if you want to compute the
83 correlation function for every lipid. Alternatively, if you have a mixture
84 of POPE and POPC lipids, but you want just the lifetime for just the POPE
85 lipids, you might use 'resname =~ "POP[CE]" and name == "P"' for all-lipids
86 and 'resname == "POPE"' for target-lipids.
87 
88 
89 Note: The third column in the output is the standard deviation in the
90 correlation function, where you've averaged all lipids. If you believe
91 that the lipids can be treated as independent measurements, you would divide
92 this value by sqrt(N_lipids_considered) to get the standard error in the
93 correlation function, which is related to the statistical uncertainty. I
94 didn't do this because it's not clear to me that that is a good assumption.
95  """
96 
97 if __name__ == '__main__':
98 
99  if len(sys.argv) <= 1 or sys.argv[1] == '--fullhelp':
100  print Usage()
101  sys.exit()
102 
103  print "#", " ".join(sys.argv)
104 
105  system_file = sys.argv[1]
106  traj_file = sys.argv[2]
107  zmin = float(sys.argv[3])
108  zmax = float(sys.argv[4])
109  protein_selection = sys.argv[5]
110  all_lipid_selection = sys.argv[6]
111  target_lipid_selection = sys.argv[7]
112 
113  system = loos.createSystem(system_file)
114  # TODO: go back and support skip/stride later
115  traj = loos.pyloos.Trajectory(traj_file, system)
116 
117  protein = loos.selectAtoms(system, protein_selection)
118  all_lipids = loos.selectAtoms(system, all_lipid_selection)
119  # NOTE: target_lipids must be a subset of all_lipids
120  target_lipids = loos.selectAtoms(all_lipids, target_lipid_selection)
121 
122  slicer = ZSliceSelector(zmin, zmax)
123 
124  # TODO: this should probably be a command line option
125  # Note: normally, this small a padding might be a problem for Voronoi
126  # decomposition with 1 atom/lipid. However, we're not using the areas
127  # (which is what gets screwed up by lack of padding), and 15 ought
128  # to be big enough to make sure we've got 1 layer of lipid around
129  # the protein.
130  padding = 15.
131 
132  protein_centroid = loos.AtomicGroup()
133  protein_centroid.append(loos.Atom())
134 
135  # set up space to hold the neigbor time series
136  neighbor_timeseries = numpy.zeros([len(target_lipids), len(traj)], numpy.float)
137 
138  for frame in traj:
139  # Use the centroid of the protein slice to represent the protein
140  protein_slice = slicer(protein)
141  centroid = protein_slice.centroid()
142  protein_centroid[0].coords(centroid)
143 
144 
145 
146  # We assume you're using 1 atom/lipid.
147  # Applying slice operation here allows you to be sloppy
148  # with your selections.
149  all_lipids_slice = slicer(all_lipids)
150  target_lipid_slice = slicer(target_lipids)
151 
152  combined = protein_centroid + all_lipids_slice
153 
154  # translate so that the protein is in the center, then reimage by
155  # atom
156  combined.translate(-protein_centroid[0].coords())
157  combined.periodicBox(frame.periodicBox())
158  combined.reimageByAtom()
159 
160  # run voronoi
161  v = VoronoiWrapper(combined, padding)
162  v.generate_voronoi()
163 
164  protein_region = v.get_region_from_atomid(protein_centroid[0].id())
165  # loop over all target lipids
166  for i in range(len(target_lipid_slice)):
167  region = v.get_region_from_atomid(target_lipid_slice[i].id())
168  if protein_region.is_neighbor(region):
169  neighbor_timeseries[i][traj.index()] = 1
170 
171  # remove entries that are all-zero.
172  ave = numpy.mean(neighbor_timeseries, axis=1)
173  is_nonzero = numpy.select([abs(ave)>1e-6], [ave])
174  neighbor_timeseries = numpy.compress(is_nonzero,
175  neighbor_timeseries, axis=0)
176 
177  ave_corr, dev_corr = autocorrel(neighbor_timeseries, neighbor_timeseries.shape[1]/2)
178 
179  print "#Frame\tCorrel\tDev"
180  for i in range(len(ave_corr)):
181  print i, ave_corr[i], dev_corr[i]
Python-based wrapper for LOOS Trajectories This class turns a loos Trajectory into something more pyt...
Definition: trajectories.py:55
Basic Atom class for handling atom properties.
Definition: Atom.hpp:50
AtomicGroup selectAtoms(const AtomicGroup &source, const std::string selection)
Applies a string-based selection to an atomic group...
Definition: utils.cpp:195
Class for handling groups of Atoms (pAtoms, actually)
Definition: AtomicGroup.hpp:87
AtomicGroup createSystem(const std::string &filename, const std::string &filetype)
Definition: sfactories.cpp:119