# lrucache.py -- a simple LRU (Least-Recently-Used) cache class |
002 | |
003 | # Copyright 2004 Evan Prodromou <evan@bad.dynu.ca> |
004 | # Licensed under the Academic Free License 2.1 |
005 | |
006 | # Licensed for ftputil under the revised BSD license |
007 | # with permission by the author, Evan Prodromou. Many |
008 | # thanks, Evan! :-) |
009 | # |
010 | # The original file is available at |
011 | # http://pypi.python.org/pypi/lrucache/0.2 . |
012 | |
013 | # arch-tag: LRU cache main module |
014 | |
015 | """a simple LRU (Least-Recently-Used) cache module |
016 | |
017 | This module provides very simple LRU (Least-Recently-Used) cache |
018 | functionality. |
019 | |
020 | An *in-memory cache* is useful for storing the results of an |
021 | 'expe\nsive' process (one that takes a lot of time or resources) for |
022 | later re-use. Typical examples are accessing data from the filesystem, |
023 | a database, or a network location. If you know you'll need to re-read |
024 | the data again, it can help to keep it in a cache. |
025 | |
026 | You *can* use a Python dictionary as a cache for some purposes. |
027 | However, if the results you're caching are large, or you have a lot of |
028 | possible results, this can be impractical memory-wise. |
029 | |
030 | An *LRU cache*, on the other hand, only keeps _some_ of the results in |
031 | memory, which keeps you from overusing resources. The cache is bounded |
032 | by a maximum size; if you try to add more values to the cache, it will |
033 | automatically discard the values that you haven't read or written to |
034 | in the longest time. In other words, the least-recently-used items are |
035 | discarded. [1]_ |
036 | |
037 | .. [1]: 'Discarded' here means 'removed from the cache'. |
038 | |
039 | """ |
040 | |
041 | from __future__ import generators |
042 | import time |
043 | from heapq import heappush, heappop, heapify |
044 | |
045 | # the suffix after the hyphen denotes modifications by the |
046 | # ftputil project with respect to the original version |
047 | __version__ = "0.2-1" |
048 | __all__ = [ 'CacheKeyError' , 'LRUCache' , 'DEFAULT_SIZE' ] |
049 | __docformat__ = 'reStructuredText en' |
050 | |
051 | DEFAULT_SIZE = 16 |
052 | """Default size of a new LRUCache object, if no 'size' argument is given.""" |
053 | |
054 | class CacheKeyError(KeyError): |
055 | """Error raised when cache requests fail |
056 | |
057 | When a cache record is accessed which no longer exists (or never did), |
058 | this error is raised. To avoid it, you may want to check for the existence |
059 | of a cache record before reading or deleting it.""" |
060 | pass |
061 | |
062 | class LRUCache( object ): |
063 | """Least-Recently-Used (LRU) cache. |
064 | |
065 | Instances of this class provide a least-recently-used (LRU) cache. They |
066 | emulate a Python mapping type. You can use an LRU cache more or less like |
067 | a Python dictionary, with the exception that objects you put into the |
068 | cache may be discarded before you take them out. |
069 | |
070 | Some example usage:: |
071 | |
072 | cache = LRUCache(32) # new cache |
073 | cache['foo'] = get_file_contents('foo') # or whatever |
074 | |
075 | if 'foo' in cache: # if it's still in cache... |
076 | # use cached version |
077 | contents = cache['foo'] |
078 | else: |
079 | # recalculate |
080 | contents = get_file_contents('foo') |
081 | # store in cache for next time |
082 | cache['foo'] = contents |
083 | |
084 | print cache.size # Maximum size |
085 | |
086 | print len(cache) # 0 <= len(cache) <= cache.size |
087 | |
088 | cache.size = 10 # Auto-shrink on size assignment |
089 | |
090 | for i in range(50): # note: larger than cache size |
091 | cache[i] = i |
092 | |
093 | if 0 not in cache: print 'Zero was discarded.' |
094 | |
095 | if 42 in cache: |
096 | del cache[42] # Manual deletion |
097 | |
098 | for j in cache: # iterate (in LRU order) |
099 | print j, cache[j] # iterator produces keys, not values |
100 | """ |
101 | |
102 | class __Node( object ): |
103 | """Record of a cached value. Not for public consumption.""" |
104 | |
105 | def __init__( self , key, obj, timestamp, sort_key): |
106 | object .__init__( self ) |
107 | self .key = key |
108 | self .obj = obj |
109 | self .atime = timestamp |
110 | self .mtime = self .atime |
111 | self ._sort_key = sort_key |
112 | |
113 | def __cmp__( self , other): |
114 | return cmp ( self ._sort_key, other._sort_key) |
115 | |
116 | def __repr__( self ): |
117 | return "<%s %s => %s (%s)>" % \ |
118 | ( self .__class__, self .key, self .obj, \ |
119 | time.asctime(time.localtime( self .atime))) |
120 | |
121 | def __init__( self , size = DEFAULT_SIZE): |
122 | # Check arguments |
123 | if size < = 0 : |
124 | raise ValueError, size |
125 | elif type (size) is not type ( 0 ): |
126 | raise TypeError, size |
127 | object .__init__( self ) |
128 | self .__heap = [] |
129 | self .__dict = {} |
130 | """Maximum size of the cache. |
131 | If more than 'size' elements are added to the cache, |
132 | the least-recently-used ones will be discarded.""" |
133 | self .size = size |
134 | self .__counter = 0 |
135 | |
136 | def _sort_key( self ): |
137 | """Return a new integer value upon every call. |
138 | |
139 | Cache nodes need a monotonically increasing time indicator. |
140 | time.time() and time.clock() don't guarantee this in a |
141 | platform-independent way. |
142 | """ |
143 | self .__counter + = 1 |
144 | return self .__counter |
145 | |
146 | def __len__( self ): |
147 | return len ( self .__heap) |
148 | |
149 | def __contains__( self , key): |
150 | return self .__dict.has_key(key) |
151 | |
152 | def __setitem__( self , key, obj): |
153 | if self .__dict.has_key(key): |
154 | node = self .__dict[key] |
155 | # update node object in-place |
156 | node.obj = obj |
157 | node.atime = time.time() |
158 | node.mtime = node.atime |
159 | node._sort_key = self ._sort_key() |
160 | heapify( self .__heap) |
161 | else : |
162 | # size may have been reset, so we loop |
163 | while len ( self .__heap) > = self .size: |
164 | lru = heappop( self .__heap) |
165 | del self .__dict[lru.key] |
166 | node = self .__Node(key, obj, time.time(), self ._sort_key()) |
167 | self .__dict[key] = node |
168 | heappush( self .__heap, node) |
169 | |
170 | def __getitem__( self , key): |
171 | if not self .__dict.has_key(key): |
172 | raise CacheKeyError(key) |
173 | else : |
174 | node = self .__dict[key] |
175 | # update node object in-place |
176 | node.atime = time.time() |
177 | node._sort_key = self ._sort_key() |
178 | heapify( self .__heap) |
179 | return node.obj |
180 | |
181 | def __delitem__( self , key): |
182 | if not self .__dict.has_key(key): |
183 | raise CacheKeyError(key) |
184 | else : |
185 | node = self .__dict[key] |
186 | del self .__dict[key] |
187 | self .__heap.remove(node) |
188 | heapify( self .__heap) |
189 | return node.obj |
190 | |
191 | def __iter__( self ): |
192 | copy = self .__heap[:] |
193 | while len (copy) > 0 : |
194 | node = heappop(copy) |
195 | yield node.key |
196 | raise StopIteration |
197 | |
198 | def __setattr__( self , name, value): |
199 | object .__setattr__( self , name, value) |
200 | # automagically shrink heap on resize |
201 | if name = = 'size' : |
202 | while len ( self .__heap) > value: |
203 | lru = heappop( self .__heap) |
204 | del self .__dict[lru.key] |
205 | |
206 | def __repr__( self ): |
207 | return "<%s (%d elements)>" % ( str ( self .__class__), len ( self .__heap)) |
208 | |
209 | def mtime( self , key): |
210 | """Return the last modification time for the cache record with key. |
211 | May be useful for cache instances where the stored values can get |
212 | 'stale', such as caching file or network resource contents.""" |
213 | if not self .__dict.has_key(key): |
214 | raise CacheKeyError(key) |
215 | else : |
216 | node = self .__dict[key] |
217 | return node.mtime |
218 | |
219 | if __name__ = = "__main__" : |
220 | cache = LRUCache( 25 ) |
221 | print cache |
222 | for i in range ( 50 ): |
223 | cache[i] = str (i) |
224 | print cache |
225 | if 46 in cache: |
226 | print "46 in cache" |
227 | del cache[ 46 ] |
228 | print cache |
229 | cache.size = 10 |
230 | print cache |
231 | cache[ 46 ] = '46' |
232 | print cache |
233 | print len (cache) |
234 | for c in cache: |
235 | print c |
236 | print cache |
237 | print cache.mtime( 46 ) |
238 | for c in cache: |
239 | print c |