diff options
author | Lars Hjemli | 2008-05-03 10:10:07 +0200 |
---|---|---|
committer | Lars Hjemli | 2008-05-03 10:10:07 +0200 |
commit | e19683bedebc74593cb4c4518e47a334a5478e1e (patch) | |
tree | 8b4f231327d27b9451a6d9ab4b2af47558c61352 /cache.c | |
parent | 112b2080626c62fff27cf8aaa9ac2fb07eb50b74 (diff) | |
parent | 9000bbf865cb3578ba5ed3810dc44253cb46ec7f (diff) | |
download | cgit-e19683bedebc74593cb4c4518e47a334a5478e1e.tar.gz cgit-e19683bedebc74593cb4c4518e47a334a5478e1e.tar.bz2 cgit-e19683bedebc74593cb4c4518e47a334a5478e1e.zip |
Merge branch 'lh/cache'
* lh/cache:
Add page 'ls_cache'
Redesign the caching layer
Diffstat (limited to 'cache.c')
-rw-r--r-- | cache.c | 427 |
1 files changed, 359 insertions, 68 deletions
@@ -4,117 +4,408 @@ | |||
4 | * | 4 | * |
5 | * Licensed under GNU General Public License v2 | 5 | * Licensed under GNU General Public License v2 |
6 | * (see COPYING for full license text) | 6 | * (see COPYING for full license text) |
7 | * | ||
8 | * | ||
9 | * The cache is just a directory structure where each file is a cache slot, | ||
10 | * and each filename is based on the hash of some key (e.g. the cgit url). | ||
11 | * Each file contains the full key followed by the cached content for that | ||
12 | * key. | ||
13 | * | ||
7 | */ | 14 | */ |
8 | 15 | ||
9 | #include "cgit.h" | 16 | #include "cgit.h" |
10 | #include "cache.h" | 17 | #include "cache.h" |
11 | 18 | ||
12 | const int NOLOCK = -1; | 19 | #define CACHE_BUFSIZE (1024 * 4) |
13 | 20 | ||
14 | char *cache_safe_filename(const char *unsafe) | 21 | struct cache_slot { |
22 | const char *key; | ||
23 | int keylen; | ||
24 | int ttl; | ||
25 | cache_fill_fn fn; | ||
26 | void *cbdata; | ||
27 | int cache_fd; | ||
28 | int lock_fd; | ||
29 | const char *cache_name; | ||
30 | const char *lock_name; | ||
31 | int match; | ||
32 | struct stat cache_st; | ||
33 | struct stat lock_st; | ||
34 | int bufsize; | ||
35 | char buf[CACHE_BUFSIZE]; | ||
36 | }; | ||
37 | |||
38 | /* Open an existing cache slot and fill the cache buffer with | ||
39 | * (part of) the content of the cache file. Return 0 on success | ||
40 | * and errno otherwise. | ||
41 | */ | ||
42 | static int open_slot(struct cache_slot *slot) | ||
15 | { | 43 | { |
16 | static char buf[4][PATH_MAX]; | 44 | char *bufz; |
17 | static int bufidx; | 45 | int bufkeylen = -1; |
18 | char *s; | 46 | |
19 | char c; | 47 | slot->cache_fd = open(slot->cache_name, O_RDONLY); |
20 | 48 | if (slot->cache_fd == -1) | |
21 | bufidx++; | 49 | return errno; |
22 | bufidx &= 3; | 50 | |
23 | s = buf[bufidx]; | 51 | if (fstat(slot->cache_fd, &slot->cache_st)) |
24 | 52 | return errno; | |
25 | while(unsafe && (c = *unsafe++) != 0) { | 53 | |
26 | if (c == '/' || c == ' ' || c == '&' || c == '|' || | 54 | slot->bufsize = read(slot->cache_fd, slot->buf, sizeof(slot->buf)); |
27 | c == '>' || c == '<' || c == '.') | 55 | if (slot->bufsize == 0) |
28 | c = '_'; | 56 | return errno; |
29 | *s++ = c; | 57 | |
30 | } | 58 | bufz = memchr(slot->buf, 0, slot->bufsize); |
31 | *s = '\0'; | 59 | if (bufz) |
32 | return buf[bufidx]; | 60 | bufkeylen = bufz - slot->buf; |
61 | |||
62 | slot->match = bufkeylen == slot->keylen && | ||
63 | !memcmp(slot->key, slot->buf, bufkeylen + 1); | ||
64 | |||
65 | return 0; | ||
33 | } | 66 | } |
34 | 67 | ||
35 | int cache_exist(struct cacheitem *item) | 68 | /* Close the active cache slot */ |
69 | static void close_slot(struct cache_slot *slot) | ||
36 | { | 70 | { |
37 | if (stat(item->name, &item->st)) { | 71 | if (slot->cache_fd > 0) { |
38 | item->st.st_mtime = 0; | 72 | close(slot->cache_fd); |
39 | return 0; | 73 | slot->cache_fd = -1; |
40 | } | 74 | } |
41 | return 1; | ||
42 | } | 75 | } |
43 | 76 | ||
44 | int cache_create_dirs() | 77 | /* Print the content of the active cache slot (but skip the key). */ |
78 | static int print_slot(struct cache_slot *slot) | ||
45 | { | 79 | { |
46 | char *path; | 80 | ssize_t i, j = 0; |
81 | |||
82 | i = lseek(slot->cache_fd, slot->keylen + 1, SEEK_SET); | ||
83 | if (i != slot->keylen + 1) | ||
84 | return errno; | ||
85 | |||
86 | while((i=read(slot->cache_fd, slot->buf, sizeof(slot->buf))) > 0) | ||
87 | j = write(STDOUT_FILENO, slot->buf, i); | ||
47 | 88 | ||
48 | path = fmt("%s", ctx.cfg.cache_root); | 89 | if (j < 0) |
49 | if (mkdir(path, S_IRWXU) && errno!=EEXIST) | 90 | return errno; |
91 | else | ||
50 | return 0; | 92 | return 0; |
93 | } | ||
51 | 94 | ||
52 | if (!ctx.repo) | 95 | /* Check if the slot has expired */ |
96 | static int is_expired(struct cache_slot *slot) | ||
97 | { | ||
98 | if (slot->ttl < 0) | ||
53 | return 0; | 99 | return 0; |
100 | else | ||
101 | return slot->cache_st.st_mtime + slot->ttl*60 < time(NULL); | ||
102 | } | ||
54 | 103 | ||
55 | path = fmt("%s/%s", ctx.cfg.cache_root, | 104 | /* Check if the slot has been modified since we opened it. |
56 | cache_safe_filename(ctx.repo->url)); | 105 | * NB: If stat() fails, we pretend the file is modified. |
106 | */ | ||
107 | static int is_modified(struct cache_slot *slot) | ||
108 | { | ||
109 | struct stat st; | ||
57 | 110 | ||
58 | if (mkdir(path, S_IRWXU) && errno!=EEXIST) | 111 | if (stat(slot->cache_name, &st)) |
59 | return 0; | 112 | return 1; |
113 | return (st.st_ino != slot->cache_st.st_ino || | ||
114 | st.st_mtime != slot->cache_st.st_mtime || | ||
115 | st.st_size != slot->cache_st.st_size); | ||
116 | } | ||
60 | 117 | ||
61 | if (ctx.qry.page) { | 118 | /* Close an open lockfile */ |
62 | path = fmt("%s/%s/%s", ctx.cfg.cache_root, | 119 | static void close_lock(struct cache_slot *slot) |
63 | cache_safe_filename(ctx.repo->url), | 120 | { |
64 | ctx.qry.page); | 121 | if (slot->lock_fd > 0) { |
65 | if (mkdir(path, S_IRWXU) && errno!=EEXIST) | 122 | close(slot->lock_fd); |
66 | return 0; | 123 | slot->lock_fd = -1; |
67 | } | 124 | } |
68 | return 1; | ||
69 | } | 125 | } |
70 | 126 | ||
71 | int cache_refill_overdue(const char *lockfile) | 127 | /* Create a lockfile used to store the generated content for a cache |
128 | * slot, and write the slot key + \0 into it. | ||
129 | * Returns 0 on success and errno otherwise. | ||
130 | */ | ||
131 | static int lock_slot(struct cache_slot *slot) | ||
72 | { | 132 | { |
73 | struct stat st; | 133 | slot->lock_fd = open(slot->lock_name, O_RDWR|O_CREAT|O_EXCL, |
134 | S_IRUSR|S_IWUSR); | ||
135 | if (slot->lock_fd == -1) | ||
136 | return errno; | ||
137 | write(slot->lock_fd, slot->key, slot->keylen + 1); | ||
138 | return 0; | ||
139 | } | ||
74 | 140 | ||
75 | if (stat(lockfile, &st)) | 141 | /* Release the current lockfile. If `replace_old_slot` is set the |
76 | return 0; | 142 | * lockfile replaces the old cache slot, otherwise the lockfile is |
143 | * just deleted. | ||
144 | */ | ||
145 | static int unlock_slot(struct cache_slot *slot, int replace_old_slot) | ||
146 | { | ||
147 | int err; | ||
148 | |||
149 | if (replace_old_slot) | ||
150 | err = rename(slot->lock_name, slot->cache_name); | ||
77 | else | 151 | else |
78 | return (time(NULL) - st.st_mtime > ctx.cfg.cache_max_create_time); | 152 | err = unlink(slot->lock_name); |
153 | return err; | ||
79 | } | 154 | } |
80 | 155 | ||
81 | int cache_lock(struct cacheitem *item) | 156 | /* Generate the content for the current cache slot by redirecting |
157 | * stdout to the lock-fd and invoking the callback function | ||
158 | */ | ||
159 | static int fill_slot(struct cache_slot *slot) | ||
82 | { | 160 | { |
83 | int i = 0; | 161 | int tmp; |
84 | char *lockfile = xstrdup(fmt("%s.lock", item->name)); | ||
85 | 162 | ||
86 | top: | 163 | /* Preserve stdout */ |
87 | if (++i > ctx.cfg.max_lock_attempts) | 164 | tmp = dup(STDOUT_FILENO); |
88 | die("cache_lock: unable to lock %s: %s", | 165 | if (tmp == -1) |
89 | item->name, strerror(errno)); | 166 | return errno; |
90 | 167 | ||
91 | item->fd = open(lockfile, O_WRONLY|O_CREAT|O_EXCL, S_IRUSR|S_IWUSR); | 168 | /* Redirect stdout to lockfile */ |
169 | if (dup2(slot->lock_fd, STDOUT_FILENO) == -1) | ||
170 | return errno; | ||
92 | 171 | ||
93 | if (item->fd == NOLOCK && errno == ENOENT && cache_create_dirs()) | 172 | /* Generate cache content */ |
94 | goto top; | 173 | slot->fn(slot->cbdata); |
95 | 174 | ||
96 | if (item->fd == NOLOCK && errno == EEXIST && | 175 | /* Restore stdout */ |
97 | cache_refill_overdue(lockfile) && !unlink(lockfile)) | 176 | if (dup2(tmp, STDOUT_FILENO) == -1) |
98 | goto top; | 177 | return errno; |
99 | 178 | ||
100 | free(lockfile); | 179 | /* Close the temporary filedescriptor */ |
101 | return (item->fd > 0); | 180 | close(tmp); |
181 | return 0; | ||
102 | } | 182 | } |
103 | 183 | ||
104 | int cache_unlock(struct cacheitem *item) | 184 | /* Crude implementation of 32-bit FNV-1 hash algorithm, |
185 | * see http://www.isthe.com/chongo/tech/comp/fnv/ for details | ||
186 | * about the magic numbers. | ||
187 | */ | ||
188 | #define FNV_OFFSET 0x811c9dc5 | ||
189 | #define FNV_PRIME 0x01000193 | ||
190 | |||
191 | unsigned long hash_str(const char *str) | ||
105 | { | 192 | { |
106 | close(item->fd); | 193 | unsigned long h = FNV_OFFSET; |
107 | return (rename(fmt("%s.lock", item->name), item->name) == 0); | 194 | unsigned char *s = (unsigned char *)str; |
195 | |||
196 | if (!s) | ||
197 | return h; | ||
198 | |||
199 | while(*s) { | ||
200 | h *= FNV_PRIME; | ||
201 | h ^= *s++; | ||
202 | } | ||
203 | return h; | ||
108 | } | 204 | } |
109 | 205 | ||
110 | int cache_cancel_lock(struct cacheitem *item) | 206 | static int process_slot(struct cache_slot *slot) |
111 | { | 207 | { |
112 | return (unlink(fmt("%s.lock", item->name)) == 0); | 208 | int err; |
209 | |||
210 | err = open_slot(slot); | ||
211 | if (!err && slot->match) { | ||
212 | if (is_expired(slot)) { | ||
213 | if (!lock_slot(slot)) { | ||
214 | /* If the cachefile has been replaced between | ||
215 | * `open_slot` and `lock_slot`, we'll just | ||
216 | * serve the stale content from the original | ||
217 | * cachefile. This way we avoid pruning the | ||
218 | * newly generated slot. The same code-path | ||
219 | * is chosen if fill_slot() fails for some | ||
220 | * reason. | ||
221 | * | ||
222 | * TODO? check if the new slot contains the | ||
223 | * same key as the old one, since we would | ||
224 | * prefer to serve the newest content. | ||
225 | * This will require us to open yet another | ||
226 | * file-descriptor and read and compare the | ||
227 | * key from the new file, so for now we're | ||
228 | * lazy and just ignore the new file. | ||
229 | */ | ||
230 | if (is_modified(slot) || fill_slot(slot)) { | ||
231 | unlock_slot(slot, 0); | ||
232 | close_lock(slot); | ||
233 | } else { | ||
234 | close_slot(slot); | ||
235 | unlock_slot(slot, 1); | ||
236 | slot->cache_fd = slot->lock_fd; | ||
237 | } | ||
238 | } | ||
239 | } | ||
240 | print_slot(slot); | ||
241 | close_slot(slot); | ||
242 | return 0; | ||
243 | } | ||
244 | |||
245 | /* If the cache slot does not exist (or its key doesn't match the | ||
246 | * current key), lets try to create a new cache slot for this | ||
247 | * request. If this fails (for whatever reason), lets just generate | ||
248 | * the content without caching it and fool the caller to belive | ||
249 | * everything worked out (but print a warning on stdout). | ||
250 | */ | ||
251 | |||
252 | close_slot(slot); | ||
253 | if ((err = lock_slot(slot)) != 0) { | ||
254 | cache_log("[cgit] Unable to lock slot %s: %s (%d)\n", | ||
255 | slot->lock_name, strerror(err), err); | ||
256 | slot->fn(slot->cbdata); | ||
257 | return 0; | ||
258 | } | ||
259 | |||
260 | if ((err = fill_slot(slot)) != 0) { | ||
261 | cache_log("[cgit] Unable to fill slot %s: %s (%d)\n", | ||
262 | slot->lock_name, strerror(err), err); | ||
263 | unlock_slot(slot, 0); | ||
264 | close_lock(slot); | ||
265 | slot->fn(slot->cbdata); | ||
266 | return 0; | ||
267 | } | ||
268 | // We've got a valid cache slot in the lock file, which | ||
269 | // is about to replace the old cache slot. But if we | ||
270 | // release the lockfile and then try to open the new cache | ||
271 | // slot, we might get a race condition with a concurrent | ||
272 | // writer for the same cache slot (with a different key). | ||
273 | // Lets avoid such a race by just printing the content of | ||
274 | // the lock file. | ||
275 | slot->cache_fd = slot->lock_fd; | ||
276 | unlock_slot(slot, 1); | ||
277 | err = print_slot(slot); | ||
278 | close_slot(slot); | ||
279 | return err; | ||
113 | } | 280 | } |
114 | 281 | ||
115 | int cache_expired(struct cacheitem *item) | 282 | /* Print cached content to stdout, generate the content if necessary. */ |
283 | int cache_process(int size, const char *path, const char *key, int ttl, | ||
284 | cache_fill_fn fn, void *cbdata) | ||
116 | { | 285 | { |
117 | if (item->ttl < 0) | 286 | unsigned long hash; |
287 | int len, i; | ||
288 | char filename[1024]; | ||
289 | char lockname[1024 + 5]; /* 5 = ".lock" */ | ||
290 | struct cache_slot slot; | ||
291 | |||
292 | /* If the cache is disabled, just generate the content */ | ||
293 | if (size <= 0) { | ||
294 | fn(cbdata); | ||
295 | return 0; | ||
296 | } | ||
297 | |||
298 | /* Verify input, calculate filenames */ | ||
299 | if (!path) { | ||
300 | cache_log("[cgit] Cache path not specified, caching is disabled\n"); | ||
301 | fn(cbdata); | ||
118 | return 0; | 302 | return 0; |
119 | return item->st.st_mtime + item->ttl * 60 < time(NULL); | 303 | } |
304 | len = strlen(path); | ||
305 | if (len > sizeof(filename) - 10) { /* 10 = "/01234567\0" */ | ||
306 | cache_log("[cgit] Cache path too long, caching is disabled: %s\n", | ||
307 | path); | ||
308 | fn(cbdata); | ||
309 | return 0; | ||
310 | } | ||
311 | if (!key) | ||
312 | key = ""; | ||
313 | hash = hash_str(key) % size; | ||
314 | strcpy(filename, path); | ||
315 | if (filename[len - 1] != '/') | ||
316 | filename[len++] = '/'; | ||
317 | for(i = 0; i < 8; i++) { | ||
318 | sprintf(filename + len++, "%x", | ||
319 | (unsigned char)(hash & 0xf)); | ||
320 | hash >>= 4; | ||
321 | } | ||
322 | filename[len] = '\0'; | ||
323 | strcpy(lockname, filename); | ||
324 | strcpy(lockname + len, ".lock"); | ||
325 | slot.fn = fn; | ||
326 | slot.cbdata = cbdata; | ||
327 | slot.ttl = ttl; | ||
328 | slot.cache_name = filename; | ||
329 | slot.lock_name = lockname; | ||
330 | slot.key = key; | ||
331 | slot.keylen = strlen(key); | ||
332 | return process_slot(&slot); | ||
120 | } | 333 | } |
334 | |||
335 | /* Return a strftime formatted date/time | ||
336 | * NB: the result from this function is to shared memory | ||
337 | */ | ||
338 | char *sprintftime(const char *format, time_t time) | ||
339 | { | ||
340 | static char buf[64]; | ||
341 | struct tm *tm; | ||
342 | |||
343 | if (!time) | ||
344 | return NULL; | ||
345 | tm = gmtime(&time); | ||
346 | strftime(buf, sizeof(buf)-1, format, tm); | ||
347 | return buf; | ||
348 | } | ||
349 | |||
350 | int cache_ls(const char *path) | ||
351 | { | ||
352 | DIR *dir; | ||
353 | struct dirent *ent; | ||
354 | int err = 0; | ||
355 | struct cache_slot slot; | ||
356 | char fullname[1024]; | ||
357 | char *name; | ||
358 | |||
359 | if (!path) { | ||
360 | cache_log("[cgit] cache path not specified\n"); | ||
361 | return -1; | ||
362 | } | ||
363 | if (strlen(path) > 1024 - 10) { | ||
364 | cache_log("[cgit] cache path too long: %s\n", | ||
365 | path); | ||
366 | return -1; | ||
367 | } | ||
368 | dir = opendir(path); | ||
369 | if (!dir) { | ||
370 | err = errno; | ||
371 | cache_log("[cgit] unable to open path %s: %s (%d)\n", | ||
372 | path, strerror(err), err); | ||
373 | return err; | ||
374 | } | ||
375 | strcpy(fullname, path); | ||
376 | name = fullname + strlen(path); | ||
377 | if (*(name - 1) != '/') { | ||
378 | *name++ = '/'; | ||
379 | *name = '\0'; | ||
380 | } | ||
381 | slot.cache_name = fullname; | ||
382 | while((ent = readdir(dir)) != NULL) { | ||
383 | if (strlen(ent->d_name) != 8) | ||
384 | continue; | ||
385 | strcpy(name, ent->d_name); | ||
386 | if ((err = open_slot(&slot)) != 0) { | ||
387 | cache_log("[cgit] unable to open path %s: %s (%d)\n", | ||
388 | fullname, strerror(err), err); | ||
389 | continue; | ||
390 | } | ||
391 | printf("%s %s %10lld %s\n", | ||
392 | name, | ||
393 | sprintftime("%Y-%m-%d %H:%M:%S", | ||
394 | slot.cache_st.st_mtime), | ||
395 | slot.cache_st.st_size, | ||
396 | slot.buf); | ||
397 | close_slot(&slot); | ||
398 | } | ||
399 | closedir(dir); | ||
400 | return 0; | ||
401 | } | ||
402 | |||
403 | /* Print a message to stdout */ | ||
404 | void cache_log(const char *format, ...) | ||
405 | { | ||
406 | va_list args; | ||
407 | va_start(args, format); | ||
408 | vfprintf(stderr, format, args); | ||
409 | va_end(args); | ||
410 | } | ||
411 | |||