Ticket #1045: new-responsecache.darcs.patch

File new-responsecache.darcs.patch, 19.2 KB (added by davidsarah, at 2010-10-27T05:00:30Z)

make ResponseCache? smarter to avoid memory leaks: don't record timestamps, use DataSpans? to merge entries, and clear the cache when we see a new seqnum. refs #1045, #1229. (This supercedes only-cache-last-seen-seqnum.darcs.patch.)

Line 
11 patch for repository davidsarah@dev.allmydata.org:/home/darcs/tahoe/trunk:
2
3Wed Oct 27 05:33:02 GMT Daylight Time 2010  david-sarah@jacaranda.org
4  * make ResponseCache smarter to avoid memory leaks: don't record timestamps, use DataSpans to merge entries, and clear the cache when we see a new seqnum. refs #1045, #1229
5
6New patches:
7
8[make ResponseCache smarter to avoid memory leaks: don't record timestamps, use DataSpans to merge entries, and clear the cache when we see a new seqnum. refs #1045, #1229
9david-sarah@jacaranda.org**20101027043302
10 Ignore-this: 88fd6fba7f35a2f8af1693b92718f5f3
11] {
12hunk ./src/allmydata/mutable/common.py 3
13 
14 from allmydata.util import idlib
15-from allmydata.util.dictutil import DictOfSets
16+from allmydata.util.spans import DataSpans
17 
18 MODE_CHECK = "MODE_CHECK" # query all peers
19 MODE_ANYTHING = "MODE_ANYTHING" # one recoverable version
20hunk ./src/allmydata/mutable/common.py 62
21 class ResponseCache:
22     """I cache share data, to reduce the number of round trips used during
23     mutable file operations. All of the data in my cache is for a single
24-    storage index, but I will keep information on multiple shares (and
25-    multiple versions) for that storage index.
26+    storage index, but I will keep information on multiple shares for
27+    that storage index.
28+
29+    I maintain a highest-seen sequence number, and will flush all entries
30+    each time this number increases (this doesn't necessarily imply that
31+    all entries have the same sequence number).
32 
33     My cache is indexed by a (verinfo, shnum) tuple.
34 
35hunk ./src/allmydata/mutable/common.py 71
36-    My cache entries contain a set of non-overlapping byteranges: (start,
37-    data, timestamp) tuples.
38+    My cache entries are DataSpans instances, each representing a set of
39+    non-overlapping byteranges.
40     """
41 
42     def __init__(self):
43hunk ./src/allmydata/mutable/common.py 76
44-        self.cache = DictOfSets()
45+        self.cache = {}
46+        self.seqnum = None
47 
48     def _clear(self):
49hunk ./src/allmydata/mutable/common.py 80
50-        # used by unit tests
51-        self.cache = DictOfSets()
52-
53-    def _does_overlap(self, x_start, x_length, y_start, y_length):
54-        if x_start < y_start:
55-            x_start, y_start = y_start, x_start
56-            x_length, y_length = y_length, x_length
57-        x_end = x_start + x_length
58-        y_end = y_start + y_length
59-        # this just returns a boolean. Eventually we'll want a form that
60-        # returns a range.
61-        if not x_length:
62-            return False
63-        if not y_length:
64-            return False
65-        if x_start >= y_end:
66-            return False
67-        if y_start >= x_end:
68-            return False
69-        return True
70-
71+        # also used by unit tests
72+        self.cache = {}
73 
74hunk ./src/allmydata/mutable/common.py 83
75-    def _inside(self, x_start, x_length, y_start, y_length):
76-        x_end = x_start + x_length
77-        y_end = y_start + y_length
78-        if x_start < y_start:
79-            return False
80-        if x_start >= y_end:
81-            return False
82-        if x_end < y_start:
83-            return False
84-        if x_end > y_end:
85-            return False
86-        return True
87+    def add(self, verinfo, shnum, offset, data):
88+        seqnum = verinfo[0]
89+        if seqnum > self.seqnum:
90+            self._clear()
91+            self.seqnum = seqnum
92 
93hunk ./src/allmydata/mutable/common.py 89
94-    def add(self, verinfo, shnum, offset, data, timestamp):
95         index = (verinfo, shnum)
96hunk ./src/allmydata/mutable/common.py 90
97-        self.cache.add(index, (offset, data, timestamp) )
98+        if index in self.cache:
99+            self.cache[index].add(offset, data)
100+        else:
101+            spans = DataSpans()
102+            spans.add(offset, data)
103+            self.cache[index] = spans
104 
105     def read(self, verinfo, shnum, offset, length):
106         """Try to satisfy a read request from cache.
107hunk ./src/allmydata/mutable/common.py 99
108-        Returns (data, timestamp), or (None, None) if the cache did not hold
109-        the requested data.
110+        Returns data, or None if the cache did not hold the entire requested span.
111         """
112 
113hunk ./src/allmydata/mutable/common.py 102
114-        # TODO: join multiple fragments, instead of only returning a hit if
115-        # we have a fragment that contains the whole request
116+        # TODO: perhaps return a DataSpans object representing the fragments
117+        # that we have, instead of only returning a hit if we can satisfy the
118+        # whole request from cache.
119 
120         index = (verinfo, shnum)
121hunk ./src/allmydata/mutable/common.py 107
122-        for entry in self.cache.get(index, set()):
123-            (e_start, e_data, e_timestamp) = entry
124-            if self._inside(offset, length, e_start, len(e_data)):
125-                want_start = offset - e_start
126-                want_end = offset+length - e_start
127-                return (e_data[want_start:want_end], e_timestamp)
128-        return None, None
129-
130-
131+        if index in self.cache:
132+            return self.cache[index].get(offset, length)
133+        else:
134+            return None
135hunk ./src/allmydata/mutable/filenode.py 152
136         self._privkey = privkey
137     def _populate_encprivkey(self, encprivkey):
138         self._encprivkey = encprivkey
139-    def _add_to_cache(self, verinfo, shnum, offset, data, timestamp):
140-        self._cache.add(verinfo, shnum, offset, data, timestamp)
141+    def _add_to_cache(self, verinfo, shnum, offset, data):
142+        self._cache.add(verinfo, shnum, offset, data)
143     def _read_from_cache(self, verinfo, shnum, offset, length):
144         return self._cache.read(verinfo, shnum, offset, length)
145 
146hunk ./src/allmydata/mutable/publish.py 10
147 from twisted.python import failure
148 from allmydata.interfaces import IPublishStatus
149 from allmydata.util import base32, hashutil, mathutil, idlib, log
150+from allmydata.util.dictutil import DictOfSets
151 from allmydata import hashtree, codec
152 from allmydata.storage.server import si_b2a
153 from pycryptopp.cipher.aes import AES
154hunk ./src/allmydata/mutable/publish.py 16
155 from foolscap.api import eventually, fireEventually
156 
157-from allmydata.mutable.common import MODE_WRITE, MODE_CHECK, DictOfSets, \
158+from allmydata.mutable.common import MODE_WRITE, MODE_CHECK, \
159      UncoordinatedWriteError, NotEnoughServersError
160 from allmydata.mutable.servermap import ServerMap
161 from allmydata.mutable.layout import pack_prefix, pack_share, unpack_header, pack_checkstring, \
162hunk ./src/allmydata/mutable/retrieve.py 10
163 from foolscap.api import DeadReferenceError, eventually, fireEventually
164 from allmydata.interfaces import IRetrieveStatus, NotEnoughSharesError
165 from allmydata.util import hashutil, idlib, log
166+from allmydata.util.dictutil import DictOfSets
167 from allmydata import hashtree, codec
168 from allmydata.storage.server import si_b2a
169 from pycryptopp.cipher.aes import AES
170hunk ./src/allmydata/mutable/retrieve.py 16
171 from pycryptopp.publickey import rsa
172 
173-from allmydata.mutable.common import DictOfSets, CorruptShareError, UncoordinatedWriteError
174+from allmydata.mutable.common import CorruptShareError, UncoordinatedWriteError
175 from allmydata.mutable.layout import SIGNED_PREFIX, unpack_share_data
176 
177 class RetrieveStatus:
178hunk ./src/allmydata/mutable/retrieve.py 202
179         got_from_cache = False
180         datavs = []
181         for (offset, length) in readv:
182-            (data, timestamp) = self._node._read_from_cache(self.verinfo, shnum,
183-                                                            offset, length)
184+            data = self._node._read_from_cache(self.verinfo, shnum, offset, length)
185             if data is not None:
186                 datavs.append(data)
187         if len(datavs) == len(readv):
188hunk ./src/allmydata/mutable/servermap.py 9
189 from twisted.python import failure
190 from foolscap.api import DeadReferenceError, RemoteException, eventually
191 from allmydata.util import base32, hashutil, idlib, log
192+from allmydata.util.dictutil import DictOfSets
193 from allmydata.storage.server import si_b2a
194 from allmydata.interfaces import IServermapUpdaterStatus
195 from pycryptopp.publickey import rsa
196hunk ./src/allmydata/mutable/servermap.py 15
197 
198 from allmydata.mutable.common import MODE_CHECK, MODE_ANYTHING, MODE_WRITE, MODE_READ, \
199-     DictOfSets, CorruptShareError, NeedMoreDataError
200+     CorruptShareError, NeedMoreDataError
201 from allmydata.mutable.layout import unpack_prefix_and_signature, unpack_header, unpack_share, \
202      SIGNED_PREFIX_LENGTH
203 
204hunk ./src/allmydata/mutable/servermap.py 585
205                 verinfo = self._got_results_one_share(shnum, data, peerid, lp)
206                 last_verinfo = verinfo
207                 last_shnum = shnum
208-                self._node._add_to_cache(verinfo, shnum, 0, data, now)
209+                self._node._add_to_cache(verinfo, shnum, 0, data)
210             except CorruptShareError, e:
211                 # log it and give the other shares a chance to be processed
212                 f = failure.Failure()
213hunk ./src/allmydata/test/test_mutable.py 304
214             d.addCallback(lambda res: self.failUnlessEqual(res, "contents"))
215             d.addCallback(lambda ign: self.failUnless(isinstance(n._cache, ResponseCache)))
216 
217-            def _check_cache_size(expected):
218-                # The total size of cache entries should not increase on the second download.
219+            def _check_cache(expected):
220+                # The total size of cache entries should not increase on the second download;
221+                # in fact the cache contents should be identical.
222                 d2 = n.download_best_version()
223hunk ./src/allmydata/test/test_mutable.py 308
224-                d2.addCallback(lambda ign: self.failUnlessEqual(len(repr(n._cache.cache)), expected))
225+                d2.addCallback(lambda rep: self.failUnlessEqual(repr(n._cache.cache), expected))
226                 return d2
227hunk ./src/allmydata/test/test_mutable.py 310
228-            d.addCallback(lambda ign: _check_cache_size(len(repr(n._cache.cache))))
229+            d.addCallback(lambda ign: _check_cache(repr(n._cache.cache)))
230             return d
231         d.addCallback(_created)
232         return d
233hunk ./src/allmydata/test/test_mutable.py 314
234-    test_response_cache_memory_leak.todo = "This isn't fixed (see #1045)."
235 
236     def test_create_with_initial_contents_function(self):
237         data = "initial contents"
238hunk ./src/allmydata/test/test_mutable.py 1720
239 
240 
241 class Utils(unittest.TestCase):
242-    def _do_inside(self, c, x_start, x_length, y_start, y_length):
243-        # we compare this against sets of integers
244-        x = set(range(x_start, x_start+x_length))
245-        y = set(range(y_start, y_start+y_length))
246-        should_be_inside = x.issubset(y)
247-        self.failUnlessEqual(should_be_inside, c._inside(x_start, x_length,
248-                                                         y_start, y_length),
249-                             str((x_start, x_length, y_start, y_length)))
250-
251-    def test_cache_inside(self):
252-        c = ResponseCache()
253-        x_start = 10
254-        x_length = 5
255-        for y_start in range(8, 17):
256-            for y_length in range(8):
257-                self._do_inside(c, x_start, x_length, y_start, y_length)
258-
259-    def _do_overlap(self, c, x_start, x_length, y_start, y_length):
260-        # we compare this against sets of integers
261-        x = set(range(x_start, x_start+x_length))
262-        y = set(range(y_start, y_start+y_length))
263-        overlap = bool(x.intersection(y))
264-        self.failUnlessEqual(overlap, c._does_overlap(x_start, x_length,
265-                                                      y_start, y_length),
266-                             str((x_start, x_length, y_start, y_length)))
267-
268-    def test_cache_overlap(self):
269-        c = ResponseCache()
270-        x_start = 10
271-        x_length = 5
272-        for y_start in range(8, 17):
273-            for y_length in range(8):
274-                self._do_overlap(c, x_start, x_length, y_start, y_length)
275-
276     def test_cache(self):
277         c = ResponseCache()
278         # xdata = base62.b2a(os.urandom(100))[:100]
279hunk ./src/allmydata/test/test_mutable.py 1725
280         xdata = "1Ex4mdMaDyOl9YnGBM3I4xaBF97j8OQAg1K3RBR01F2PwTP4HohB3XpACuku8Xj4aTQjqJIR1f36mEj3BCNjXaJmPBEZnnHL0U9l"
281         ydata = "4DCUQXvkEPnnr9Lufikq5t21JsnzZKhzxKBhLhrBB6iIcBOWRuT4UweDhjuKJUre8A4wOObJnl3Kiqmlj4vjSLSqUGAkUD87Y3vs"
282-        nope = (None, None)
283-        c.add("v1", 1, 0, xdata, "time0")
284-        c.add("v1", 1, 2000, ydata, "time1")
285-        self.failUnlessEqual(c.read("v2", 1, 10, 11), nope)
286-        self.failUnlessEqual(c.read("v1", 2, 10, 11), nope)
287-        self.failUnlessEqual(c.read("v1", 1, 0, 10), (xdata[:10], "time0"))
288-        self.failUnlessEqual(c.read("v1", 1, 90, 10), (xdata[90:], "time0"))
289-        self.failUnlessEqual(c.read("v1", 1, 300, 10), nope)
290-        self.failUnlessEqual(c.read("v1", 1, 2050, 5), (ydata[50:55], "time1"))
291-        self.failUnlessEqual(c.read("v1", 1, 0, 101), nope)
292-        self.failUnlessEqual(c.read("v1", 1, 99, 1), (xdata[99:100], "time0"))
293-        self.failUnlessEqual(c.read("v1", 1, 100, 1), nope)
294-        self.failUnlessEqual(c.read("v1", 1, 1990, 9), nope)
295-        self.failUnlessEqual(c.read("v1", 1, 1990, 10), nope)
296-        self.failUnlessEqual(c.read("v1", 1, 1990, 11), nope)
297-        self.failUnlessEqual(c.read("v1", 1, 1990, 15), nope)
298-        self.failUnlessEqual(c.read("v1", 1, 1990, 19), nope)
299-        self.failUnlessEqual(c.read("v1", 1, 1990, 20), nope)
300-        self.failUnlessEqual(c.read("v1", 1, 1990, 21), nope)
301-        self.failUnlessEqual(c.read("v1", 1, 1990, 25), nope)
302-        self.failUnlessEqual(c.read("v1", 1, 1999, 25), nope)
303+        c.add("v1", 1, 0, xdata)
304+        c.add("v1", 1, 2000, ydata)
305+        self.failUnlessEqual(c.read("v2", 1, 10, 11), None)
306+        self.failUnlessEqual(c.read("v1", 2, 10, 11), None)
307+        self.failUnlessEqual(c.read("v1", 1, 0, 10), xdata[:10])
308+        self.failUnlessEqual(c.read("v1", 1, 90, 10), xdata[90:])
309+        self.failUnlessEqual(c.read("v1", 1, 300, 10), None)
310+        self.failUnlessEqual(c.read("v1", 1, 2050, 5), ydata[50:55])
311+        self.failUnlessEqual(c.read("v1", 1, 0, 101), None)
312+        self.failUnlessEqual(c.read("v1", 1, 99, 1), xdata[99:100])
313+        self.failUnlessEqual(c.read("v1", 1, 100, 1), None)
314+        self.failUnlessEqual(c.read("v1", 1, 1990, 9), None)
315+        self.failUnlessEqual(c.read("v1", 1, 1990, 10), None)
316+        self.failUnlessEqual(c.read("v1", 1, 1990, 11), None)
317+        self.failUnlessEqual(c.read("v1", 1, 1990, 15), None)
318+        self.failUnlessEqual(c.read("v1", 1, 1990, 19), None)
319+        self.failUnlessEqual(c.read("v1", 1, 1990, 20), None)
320+        self.failUnlessEqual(c.read("v1", 1, 1990, 21), None)
321+        self.failUnlessEqual(c.read("v1", 1, 1990, 25), None)
322+        self.failUnlessEqual(c.read("v1", 1, 1999, 25), None)
323 
324hunk ./src/allmydata/test/test_mutable.py 1746
325-        # optional: join fragments
326+        # test joining fragments
327         c = ResponseCache()
328hunk ./src/allmydata/test/test_mutable.py 1748
329-        c.add("v1", 1, 0, xdata[:10], "time0")
330-        c.add("v1", 1, 10, xdata[10:20], "time1")
331-        #self.failUnlessEqual(c.read("v1", 1, 0, 20), (xdata[:20], "time0"))
332+        c.add("v1", 1, 0, xdata[:10])
333+        c.add("v1", 1, 10, xdata[10:20])
334+        self.failUnlessEqual(c.read("v1", 1, 0, 20), xdata[:20])
335 
336 class Exceptions(unittest.TestCase):
337     def test_repr(self):
338}
339
340Context:
341
342[docs/known_issues.rst: Add section on traffic analysis. Fix URL for current version of file.
343david-sarah@jacaranda.org**20101024234259
344 Ignore-this: f3416e79d3bb833f5118da23e85723ad
345]
346[test_mutable.py: add test for ResponseCache memory leak. refs #1045, #1129
347david-sarah@jacaranda.org**20101024193409
348 Ignore-this: 3aee7f0677956cc6deaccb4d5b8e415f
349]
350[docs/running.html: fix missing end-quote, and change frontends/ doc references to .rst.
351david-sarah@jacaranda.org**20101024171500
352 Ignore-this: 47c645a6595e1790b1d1adfa71af0e1d
353]
354[docs/running.html: 'tahoe create-client' now creates a node with storage disabled. Also change configuration.txt references to configuration.rst.
355david-sarah@jacaranda.org**20101024170431
356 Ignore-this: e5b048055494ba3505bb8a506610681c
357]
358[test_encodingutil.py: test_argv_to_unicode modified the wrong encoding variable. fixes #1214
359david-sarah@jacaranda.org**20101023035810
360 Ignore-this: e5f1f849931b96939facc53d93ff61c5
361]
362[doc: add explanation of the motivation for the surprising and awkward API to erasure coding
363zooko@zooko.com**20101015060202
364 Ignore-this: 428913ff6e1bf5b393deffb1f20b949b
365]
366[setup: catch and log ValueError from locale.getdefaultlocale() in show-tool-versions.py
367zooko@zooko.com**20101015054440
368 Ignore-this: 827d91490562c32ed7cf6526dfded773
369 I got a bug report from Mathias Baert showing that locale.getdefaultlocale() raises an exception on his Mac OS X system. Heh.
370]
371[docs: update how-to-make-a-release doc with a few tweaks from the 1.8.0 process
372zooko@zooko.com**20101015054413
373 Ignore-this: ca5e9478531a3393792ae283239549dd
374]
375[docs: update NEWS ref: #1216
376zooko@zooko.com**20101015053719
377 Ignore-this: 2e0b92e4145d667cdf075e64b7965530
378]
379[docs: fix tab-vs-spaces, make some CLI examples <tt>/"literal", wrap some to
380Brian Warner <warner@lothar.com>**20101015060606
381 Ignore-this: eae08bdf0afb19a2fbf41c31e70a8122
382 80-cols, remove spurious whitespace. Add rst2html.py rule to Makefile.
383]
384[docs: add Peter Secor, Shawn Willden, and Terrell Russell as signatories to docs/backdoors.rst
385zooko@zooko.com**20101015053242
386 Ignore-this: c77adf819d664f673e17c4aaeb353f33
387]
388[docs: convert all .txt docs to .rst thanks to Ravi Pinjala
389zooko@zooko.com**20101015052913
390 Ignore-this: 178a5122423189ecfc45b142314a78ec
391 fixes #1225
392]
393[docs: add statement on our refusal to insert backdoors
394zooko@zooko.com**20101006051147
395 Ignore-this: 644d308319a7b80c4434bdff9760404a
396]
397[setup: add --multi-version to the "setup.py develop" command-line
398zooko@zooko.com**20101005182350
399 Ignore-this: 709155cc21caff29826b8d41a8c8d63d
400 fixes #530. I earlier tried this twice (see #530 for history) and then twice rolled it back due to some problems that arose. However, I didn't write down what the problems were in enough detail on the ticket that I can tell today whether those problems are still issues, so here goes the third attempt. (I did write down on the ticket that it would not create site.py or .pth files in the target directory with --multi-version mode, but I didn't explain why *that* was a problem.)
401]
402[setup: use execfile to access _auto_deps.py in its proper location of src/allmydata/ instead of copying it into place when setup.py is executed
403zooko@zooko.com**20100906055714
404 Ignore-this: c179b42672d775580afad40121f86812
405]
406[trivial: M-x whitespace-cleanup
407zooko@zooko.com**20100903144712
408 Ignore-this: 1bb764d11ac69b4a35ea091cfb13158a
409]
410[minor: remove unused interface declaration, change allmydata.org to tahoe-lafs.org in email address, fix wording in relnotes.txt
411zooko@zooko.com**20100930153708
412 Ignore-this: a452969228afed2774de375e29fa3048
413]
414[immutable/repairer.py: don't use the default happiness setting when repairing
415Kevan Carstensen <kevan@isnotajoke.com>**20100927200102
416 Ignore-this: bd704d9744b970849da8d46a16b8089a
417]
418[NEWS: note dependency updates to pycryptopp and pycrypto.
419david-sarah@jacaranda.org**20100924191207
420 Ignore-this: eeaf5c9c9104f24c450c2ec4482ac1ee
421]
422[TAG allmydata-tahoe-1.8.0
423zooko@zooko.com**20100924021631
424 Ignore-this: 494ca0a885c5e20c883845fc53e7ab5d
425]
426Patch bundle hash:
42762cba8391bf4fcbaf0f842a45be08e30f83ac5f4