-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcache.py
More file actions
131 lines (114 loc) · 4.14 KB
/
cache.py
File metadata and controls
131 lines (114 loc) · 4.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
"""
urllib2 caching handler
Modified from http://code.activestate.com/recipes/491261/
"""
import os
import time
import httplib
import urllib2
import StringIO
from hashlib import md5
def calculate_cache_path(cache_location, url):
"""Checks if cache_location/hash_of_url.headers an .body exist
"""
thumb = md5(url).hexdigest()
header = os.path.join(cache_location, thumb + ".headers")
body = os.path.join(cache_location, thumb + ".body")
return header, body
def check_cache_time(path, max_age):
"""Checks if a file has been created/modified in the last max_age seconds.
False means the file is too old (or doesn't exist), True means it is
upto-date and valid"""
if not os.path.isfile(path):
return False
cache_modified_time = os.stat(path).st_mtime
time_now = time.time()
if cache_modified_time < time_now - max_age:
# Cache is old
return False
else:
return True
def exists_in_cache(cache_location, url, max_age):
"""Returns if header AND body cache file exist"""
hpath, bpath = calculate_cache_path(cache_location, url)
if os.path.exists(hpath) and os.path.exists(bpath):
return check_cache_time(hpath, max_age) and check_cache_time(bpath, max_age)
else:
# File does not exist
return False
def store_in_cache(cache_location, url, response):
"""Tries to store response in cache"""
hpath, bpath = calculate_cache_path(cache_location, url)
try:
outf = open(hpath, "w")
headers = str(response.info())
outf.write(headers)
outf.close()
outf = open(bpath, "w")
outf.write(response.read())
outf.close()
except IOError:
pass
class CacheHandler(urllib2.BaseHandler):
"""Stores responses in a persistant on-disk cache.
If a subsequent GET request is made for the same URL, the stored
response is returned, saving time, resources and bandwith
"""
def __init__(self, cache_location, max_age = 21600):
"""The location of the cache directory"""
self.max_age = max_age
self.cache_location = cache_location
if not os.path.exists(self.cache_location):
os.mkdir(self.cache_location)
def default_open(self, request):
if request.get_method() is not "GET":
return None # let the next handler try to handle the request
if exists_in_cache(
self.cache_location, request.get_full_url(), self.max_age
):
return CachedResponse(
self.cache_location,
request.get_full_url(),
set_cache_header=True
)
else:
return None
def http_response(self, request, response):
if request.get_method() == "GET":
if 'x-cache' not in response.info():
store_in_cache(
self.cache_location,
request.get_full_url(),
response
)
set_cache_header = False
else:
set_cache_header = True
#end if x-cahce in response
return CachedResponse(
self.cache_location,
request.get_full_url(),
set_cache_header=set_cache_header
)
else:
return response
class CachedResponse(StringIO.StringIO):
"""An urllib2.response-like object for cached responses.
To determine wheter a response is cached or coming directly from
the network, check the x-cache header rather than the object type.
"""
def __init__(self, cache_location, url, set_cache_header=True):
self.cache_location = cache_location
hpath, bpath = calculate_cache_path(cache_location, url)
StringIO.StringIO.__init__(self, file(bpath).read())
self.url = url
self.code = 200
self.msg = "OK"
headerbuf = file(hpath).read()
if set_cache_header:
headerbuf += "x-cache: %s\r\n" % (bpath)
self.headers = httplib.HTTPMessage(StringIO.StringIO(headerbuf))
def info(self):
return self.headers
def geturl(self):
return self.url