2 * Simple and Quick Shared Memory Cache
4 * (c) 2005 Martin Mares <mj@ucw.cz>
10 #include "lib/fastbuf.h"
11 #include "lib/qache.h"
20 * The cache lives in a mmapped file of the following format:
22 * qache_entry[max_entries] table of entries and their keys
23 * u32 qache_hash[hash_size] hash table pointing to keys
24 * u32 block_next[num_blocks] next block pointers
25 * padding to a multiple of block size
26 * blocks[] data blocks
30 u32 magic; /* QCACHE_MAGIC */
31 u32 block_size; /* Parameters as in qache_params */
32 u32 block_shift; /* block_size = 1 << block_shift */
35 u32 entry_table_start; /* Array of qache_entry's */
37 u32 hash_table_start; /* Hash table containing all keys */
39 u32 next_table_start; /* Array of next pointers */
43 #define QACHE_MAGIC 0xb79f6d12
46 u32 lru_prev, lru_next; /* Entry #0: head of the cyclic LRU list */
47 u32 data_len; /* Entry #0: number of free blocks, Free entries: ~0U */
48 u32 first_data_block; /* Entry #0: first free block */
50 u32 hash_next; /* Entry #0: first free entry, Free entries: next free */
54 struct qache_header *hdr;
55 struct qache_entry *entry_table;
65 #define first_free_entry entry_table[0].hash_next
66 #define first_free_block entry_table[0].first_data_block
67 #define num_free_blocks entry_table[0].data_len
70 format_key(qache_key_t *key)
72 static byte keybuf[2*sizeof(qache_key_t)+1];
73 for (uns i=0; i<sizeof(qache_key_t); i++)
74 sprintf(keybuf+2*i, "%02x", (*key)[i]);
79 qache_msync(struct qache *q UNUSED, uns start UNUSED, uns len UNUSED)
82 /* We don't need msyncing on Linux, since the mappings are guaranteed to be coherent */
83 len += (start % PAGE_SIZE);
84 start -= start % PAGE_SIZE;
85 len = ALIGN(len, PAGE_SIZE);
86 if (msync(q->mmap_data + start, len, MS_ASYNC | MS_INVALIDATE) < 0)
87 log(L_ERROR, "Cache %s: msync failed: %m", q->file_name);
92 qache_msync_block(struct qache *q, uns blk)
94 DBG("\tSyncing block %d", blk);
95 qache_msync(q, blk << q->hdr->block_shift, q->hdr->block_size);
99 qache_lock(struct qache *q)
101 /* We cannot use flock() since it happily permits locking a shared fd (e.g., after fork()) multiple times */
103 struct flock fl = { .l_type = F_WRLCK, .l_whence = SEEK_SET, .l_start = 0, .l_len = sizeof(struct qache_header) };
104 if (fcntl(q->fd, F_SETLKW, &fl) < 0)
105 die("fcntl lock on %s: %m", q->file_name);
107 DBG("Locked cache %s", q->file_name);
111 qache_unlock(struct qache *q, uns dirty)
114 if (dirty) /* Sync header, entry table and hash table */
115 qache_msync(q, 0, q->hdr->first_data_block << q->hdr->block_shift);
116 struct flock fl = { .l_type = F_UNLCK, .l_whence = SEEK_SET, .l_start = 0, .l_len = sizeof(struct qache_header) };
117 if (fcntl(q->fd, F_SETLKW, &fl) < 0)
118 die("fcntl unlock on %s: %m", q->file_name);
120 DBG("Unlocked cache %s (dirty=%d)", q->file_name, dirty);
123 enum entry_audit_flags {
130 audit_entries(struct qache *q, byte *entrymap)
134 DBG("Auditing entries");
136 /* Check the free list */
137 i = q->first_free_entry;
140 if (i >= q->hdr->max_entries || (entrymap[i] & ET_FREE_LIST) || q->entry_table[i].data_len != ~0U)
141 return "inconsistent free entry list";
142 entrymap[i] |= ET_FREE_LIST;
143 i = q->entry_table[i].hash_next;
146 /* Check the hash table */
147 for (i=0; i<q->hdr->hash_size; i++)
149 j = q->hash_table[i];
152 if (j >= q->hdr->max_entries || (entrymap[j] & (ET_HASH | ET_FREE_LIST)))
153 return "inconsistent hash chains";
154 entrymap[j] |= ET_HASH;
155 j = q->entry_table[j].hash_next;
163 j = q->entry_table[i].lru_next;
164 if ((entrymap[i] & (ET_LRU | ET_FREE_LIST)) || j >= q->hdr->max_entries || q->entry_table[j].lru_prev != i)
165 return "inconsistent LRU list";
166 entrymap[i] |= ET_LRU;
171 /* Check if all non-free items are in all lists */
172 for (i=1; i<q->hdr->max_entries; i++)
174 if (entrymap[i] != ((q->entry_table[i].data_len == ~0U) ? ET_FREE_LIST : (ET_LRU | ET_HASH)))
175 return "inconsistent lists";
180 enum block_audit_flags {
186 audit_blocks(struct qache *q, byte *entrymap, byte *blockmap)
190 DBG("Auditing blocks");
192 /* Check the free list */
193 for (i=q->first_free_block; i; i=q->next_table[i])
195 if (i < q->hdr->first_data_block || i >= q->hdr->num_blocks || (blockmap[i] & BT_FREE_LIST))
196 return "inconsistent free block list";
197 blockmap[i] |= BT_FREE_LIST;
200 /* Check allocation lists of entries */
201 for (i=1; i<q->hdr->max_entries; i++)
202 if (!(entrymap[i] & ET_FREE_LIST))
205 for (j=q->entry_table[i].first_data_block; j; j=q->next_table[j])
208 return "inconsistent entry block list";
209 blockmap[j] |= BT_ALLOC;
212 if (((q->entry_table[i].data_len + q->hdr->block_size - 1) >> q->hdr->block_shift) != blocks)
213 return "inconsistent entry data length";
216 /* Check if all blocks belong somewhere */
217 for (i=q->hdr->first_data_block; i < q->hdr->num_blocks; i++)
220 DBG("Block %d unreferenced", i);
221 return "unreferenced blocks found";
228 do_audit(struct qache *q)
230 byte *entry_map = xmalloc_zero(q->hdr->max_entries);
231 byte *block_map = xmalloc_zero(q->hdr->num_blocks);
232 byte *err = audit_entries(q, entry_map);
234 err = audit_blocks(q, entry_map, block_map);
241 qache_setup_pointers(struct qache *q)
243 q->hdr = (struct qache_header *) q->mmap_data;
244 q->entry_table = (struct qache_entry *) (q->mmap_data + q->hdr->entry_table_start);
245 q->hash_table = (u32 *) (q->mmap_data + q->hdr->hash_table_start);
246 q->next_table = (u32 *) (q->mmap_data + q->hdr->next_table_start);
250 qache_open_existing(struct qache *q, struct qache_params *par)
252 if ((q->fd = open(q->file_name, O_RDWR, 0)) < 0)
256 byte *err = "stat failed";
257 if (fstat(q->fd, &st) < 0)
260 err = "invalid file size";
261 if (st.st_size < (int)sizeof(struct qache_header) || (st.st_size % par->block_size))
263 q->file_size = st.st_size;
265 err = "requested size change";
266 if (q->file_size != par->cache_size)
270 if ((q->mmap_data = mmap(NULL, q->file_size, PROT_READ | PROT_WRITE, MAP_SHARED, q->fd, 0)) == MAP_FAILED)
272 struct qache_header *h = (struct qache_header *) q->mmap_data;
274 qache_setup_pointers(q);
277 err = "incompatible format";
278 if (h->magic != QACHE_MAGIC ||
279 h->block_size != par->block_size ||
280 h->max_entries != par->max_entries ||
281 h->format_id != par->format_id)
282 goto unlock_and_fail;
284 err = "incomplete file";
285 if (h->num_blocks*h->block_size != q->file_size)
286 goto unlock_and_fail;
288 if (err = do_audit(q))
289 goto unlock_and_fail;
292 log(L_INFO, "Cache %s: using existing data", q->file_name);
297 munmap(q->mmap_data, q->file_size);
299 log(L_INFO, "Cache %s: ignoring old contents (%s)", q->file_name, err);
305 qache_create(struct qache *q, struct qache_params *par)
307 q->fd = open(q->file_name, O_RDWR | O_CREAT | O_TRUNC, 0666);
309 die("Cache %s: unable to create (%m)", q->file_name);
310 struct fastbuf *fb = bfdopen_shared(q->fd, 16384);
312 struct qache_header h;
313 bzero(&h, sizeof(h));
314 h.magic = QACHE_MAGIC;
315 h.block_size = par->block_size;
316 h.block_shift = fls(h.block_size);
317 h.num_blocks = par->cache_size >> h.block_shift;
318 h.format_id = par->format_id;
319 h.entry_table_start = sizeof(h);
320 h.max_entries = par->max_entries;
321 h.hash_table_start = h.entry_table_start + h.max_entries * sizeof(struct qache_entry);
323 while (h.hash_size < h.max_entries)
325 h.next_table_start = h.hash_table_start + h.hash_size * 4;
326 h.first_data_block = (h.next_table_start + 4*h.num_blocks + h.block_size - 1) >> h.block_shift;
327 if (h.first_data_block >= h.num_blocks)
328 die("Cache %s: Requested size is too small even to hold the maintenance structures", q->file_name);
329 bwrite(fb, &h, sizeof(h));
331 /* Entry #0: heads of all lists */
332 ASSERT(btell(fb) == h.entry_table_start);
333 struct qache_entry ent;
334 bzero(&ent, sizeof(ent));
335 ent.first_data_block = h.first_data_block;
336 ent.data_len = h.num_blocks - h.first_data_block;
338 bwrite(fb, &ent, sizeof(ent));
341 bzero(&ent, sizeof(ent));
343 for (uns i=1; i<h.max_entries; i++)
345 ent.hash_next = (i == h.max_entries-1 ? 0 : i+1);
346 bwrite(fb, &ent, sizeof(ent));
350 ASSERT(btell(fb) == h.hash_table_start);
351 for (uns i=0; i<h.hash_size; i++)
354 /* The next pointers */
355 ASSERT(btell(fb) == h.next_table_start);
356 for (uns i=0; i<h.num_blocks; i++)
357 bputl(fb, (i < h.first_data_block || i == h.num_blocks-1) ? 0 : i+1);
360 ASSERT(btell(fb) <= h.first_data_block << h.block_shift);
361 while (btell(fb) < h.first_data_block << h.block_shift)
365 for (uns i=h.first_data_block; i<h.num_blocks; i++)
366 for (uns j=0; j<h.block_size; j+=4)
369 ASSERT(btell(fb) == par->cache_size);
371 log(L_INFO, "Cache %s: created (%d bytes, %d slots, %d buckets)", q->file_name, par->cache_size, h.max_entries, h.hash_size);
373 if ((q->mmap_data = mmap(NULL, par->cache_size, PROT_READ | PROT_WRITE, MAP_SHARED, q->fd, 0)) == MAP_FAILED)
374 die("Cache %s: mmap failed (%m)", par->file_name);
375 q->file_size = par->cache_size;
376 qache_setup_pointers(q);
380 qache_open(struct qache_params *par)
382 struct qache *q = xmalloc_zero(sizeof(*q));
383 q->file_name = xstrdup(par->file_name);
385 ASSERT(par->block_size >= 8 && !(par->block_size & (par->block_size-1)));
386 par->cache_size = ALIGN(par->cache_size, par->block_size);
388 if (par->force_reset <= 0 && qache_open_existing(q, par))
390 else if (par->force_reset < 0)
391 die("Cache %s: read-only access requested, but no data available", q->file_name);
393 qache_create(q, par);
398 qache_close(struct qache *q, uns retain_data)
400 munmap(q->mmap_data, q->file_size);
402 if (!retain_data && unlink(q->file_name) < 0)
403 log(L_ERROR, "Cache %s: unlink failed (%m)", q->file_name);
409 qache_hash(struct qache *q, qache_key_t *key)
411 uns h = ((*key)[0] << 24) | ((*key)[1] << 16) | ((*key)[2] << 8) | (*key)[3];
412 return h % q->hdr->hash_size;
416 qache_hash_find(struct qache *q, qache_key_t *key, uns pos_hint)
420 if (pos_hint && pos_hint < q->hdr->max_entries && q->entry_table[pos_hint].data_len != ~0U && !memcmp(q->entry_table[pos_hint].key, key, sizeof(*key)))
423 uns h = qache_hash(q, key);
424 for (uns e = q->hash_table[h]; e; e=q->entry_table[e].hash_next)
425 if (!memcmp(q->entry_table[e].key, key, sizeof(*key)))
431 qache_hash_insert(struct qache *q, uns e)
433 uns h = qache_hash(q, &q->entry_table[e].key);
434 q->entry_table[e].hash_next = q->hash_table[h];
435 q->hash_table[h] = e;
439 qache_hash_remove(struct qache *q, uns e)
441 struct qache_entry *entry = &q->entry_table[e];
443 for (hh=&q->hash_table[qache_hash(q, &entry->key)]; f=*hh; hh=&(q->entry_table[f].hash_next))
444 if (!memcmp(q->entry_table[f].key, entry->key, sizeof(qache_key_t)))
446 *hh = entry->hash_next;
453 qache_alloc_entry(struct qache *q)
455 uns e = q->first_free_entry;
456 ASSERT(q->locked && e);
457 struct qache_entry *entry = &q->entry_table[e];
458 ASSERT(entry->data_len == ~0U);
459 q->first_free_entry = entry->hash_next;
465 qache_free_entry(struct qache *q, uns e)
467 struct qache_entry *entry = &q->entry_table[e];
468 ASSERT(q->locked && entry->data_len != ~0U);
469 entry->data_len = ~0U;
470 entry->hash_next = q->first_free_entry;
471 q->first_free_entry = e;
475 get_block_start(struct qache *q, uns block)
477 ASSERT(block && block < q->hdr->num_blocks);
478 return q->mmap_data + (block << q->hdr->block_shift);
482 qache_alloc_block(struct qache *q)
484 ASSERT(q->locked && q->num_free_blocks);
485 uns blk = q->first_free_block;
486 q->first_free_block = q->next_table[blk];
487 q->num_free_blocks--;
488 DBG("\tAllocated block %d", blk);
493 qache_free_block(struct qache *q, uns blk)
496 q->next_table[blk] = q->first_free_block;
497 q->first_free_block = blk;
498 q->num_free_blocks++;
499 DBG("\tFreed block %d", blk);
503 qache_lru_insert(struct qache *q, uns e)
505 struct qache_entry *head = &q->entry_table[0];
506 struct qache_entry *entry = &q->entry_table[e];
507 ASSERT(q->locked && !entry->lru_prev && !entry->lru_next);
508 uns succe = head->lru_next;
509 struct qache_entry *succ = &q->entry_table[succe];
512 entry->lru_next = succe;
517 qache_lru_remove(struct qache *q, uns e)
520 struct qache_entry *entry = &q->entry_table[e];
521 q->entry_table[entry->lru_prev].lru_next = entry->lru_next;
522 q->entry_table[entry->lru_next].lru_prev = entry->lru_prev;
523 entry->lru_prev = entry->lru_next = 0;
527 qache_lru_get(struct qache *q)
529 return q->entry_table[0].lru_prev;
533 qache_ll_delete(struct qache *q, uns e)
535 struct qache_entry *entry = &q->entry_table[e];
536 uns blk = entry->first_data_block;
537 while (entry->data_len)
539 uns next = q->next_table[blk];
540 qache_free_block(q, blk);
542 if (entry->data_len >= q->hdr->block_size)
543 entry->data_len -= q->hdr->block_size;
547 qache_lru_remove(q, e);
548 qache_hash_remove(q, e);
549 qache_free_entry(q, e);
553 qache_insert(struct qache *q, qache_key_t *key, uns pos_hint, void *data, uns size)
557 uns e = qache_hash_find(q, key, pos_hint);
560 qache_ll_delete(q ,e);
561 DBG("Insert <%s>: deleting old entry %d", format_key(key), e);
564 uns blocks = (size + q->hdr->block_size - 1) >> q->hdr->block_shift;
565 if (blocks > q->hdr->num_blocks - q->hdr->first_data_block)
570 while (q->num_free_blocks < blocks || !q->first_free_entry)
572 e = qache_lru_get(q);
573 DBG("Insert <%s>: evicting entry %d to make room for %d blocks", format_key(key), e, blocks);
575 qache_ll_delete(q, e);
577 e = qache_alloc_entry(q);
578 struct qache_entry *entry = &q->entry_table[e];
579 entry->data_len = size;
580 memcpy(entry->key, key, sizeof(*key));
581 DBG("Insert <%s>: created entry %d with %d data blocks", format_key(key), e, blocks);
583 entry->first_data_block = 0;
586 uns chunk = (size & (q->hdr->block_size-1)) ? : q->hdr->block_size;
587 uns blk = qache_alloc_block(q);
588 q->next_table[blk] = entry->first_data_block;
589 memcpy(get_block_start(q, blk), data+size-chunk, chunk);
590 qache_msync_block(q, blk);
591 entry->first_data_block = blk;
595 qache_lru_insert(q, e);
596 qache_hash_insert(q, e);
602 copy_out(struct qache *q, struct qache_entry *entry, byte **datap, uns *sizep, uns start)
607 uns avail = (start > entry->data_len) ? 0 : entry->data_len - start;
608 uns xfer = MIN(size, avail);
613 *datap = xmalloc(xfer);
614 uns blk = entry->first_data_block;
615 while (start >= q->hdr->block_size)
617 blk = q->next_table[blk];
618 start -= q->hdr->block_size;
623 uns len = MIN(xfer, q->hdr->block_size - start);
624 memcpy(data, get_block_start(q, blk), len);
625 blk = q->next_table[blk];
637 qache_lookup(struct qache *q, qache_key_t *key, uns pos_hint, byte **datap, uns *sizep, uns start)
640 uns e = qache_hash_find(q, key, pos_hint);
643 struct qache_entry *entry = &q->entry_table[e];
644 DBG("Lookup <%s>: found entry %d", format_key(key), e);
645 qache_lru_remove(q, e);
646 qache_lru_insert(q, e);
647 copy_out(q, entry, datap, sizep, start);
648 qache_unlock(q, 1); /* Yes, modified -- we update the LRU */
652 DBG("Lookup <%s>: not found", format_key(key));
659 qache_probe(struct qache *q, qache_key_t *key, uns pos, byte **datap, uns *sizep, uns start)
661 if (!pos || pos >= q->hdr->max_entries)
663 DBG("Probe %d: Out of range", pos);
669 struct qache_entry *entry = &q->entry_table[pos];
670 if (entry->data_len != ~0U)
672 DBG("Probe %d: Found key <%s>", format_key(entry->key));
674 memcpy(key, entry->key, sizeof(qache_key_t));
675 copy_out(q, entry, datap, sizep, start);
679 DBG("Probe %d: Empty", pos);
685 qache_delete(struct qache *q, qache_key_t *key, uns pos_hint)
688 uns e = qache_hash_find(q, key, pos_hint);
691 DBG("Delete <%s: deleting entry %d", format_key(key), e);
692 qache_ll_delete(q, e);
695 DBG("Delete <%s>: No match", format_key(key));
701 qache_debug(struct qache *q)
703 log(L_DEBUG, "Cache %s: block_size=%d (%d data), num_blocks=%d (%d first data), %d slots, %d hash buckets",
704 q->file_name, q->hdr->block_size, q->hdr->block_size, q->hdr->num_blocks, q->hdr->first_data_block,
705 q->hdr->max_entries, q->hdr->hash_size);
707 log(L_DEBUG, "Table of cache entries:");
708 log(L_DEBUG, "\tEntry\tLruPrev\tLruNext\tDataLen\tDataBlk\tHashNxt\tKey");
709 for (uns e=0; e<q->hdr->max_entries; e++)
711 struct qache_entry *ent = &q->entry_table[e];
712 log(L_DEBUG, "\t%d\t%d\t%d\t%d\t%d\t%d\t%s", e, ent->lru_prev, ent->lru_next, ent->data_len,
713 ent->first_data_block, ent->hash_next, format_key(&ent->key));
716 log(L_DEBUG, "Hash table:");
717 for (uns h=0; h<q->hdr->hash_size; h++)
718 log(L_DEBUG, "\t%04x\t%d", h, q->hash_table[h]);
720 log(L_DEBUG, "Next pointers:");
721 for (uns blk=q->hdr->first_data_block; blk<q->hdr->num_blocks; blk++)
722 log(L_DEBUG, "\t%d\t%d", blk, q->next_table[blk]);
726 qache_audit(struct qache *q)
730 if (err = do_audit(q))
731 die("Cache %s: %s", q->file_name, err);
737 int main(int argc UNUSED, char **argv UNUSED)
739 struct qache_params par = {
740 .file_name = "tmp/test",
745 .format_id = 0xfeedcafe
747 struct qache *q = qache_open(&par);
749 qache_key_t key = { 0x12, 0x34, 0x56, 0x78, 0x9a, 0xbc, 0xde, 0xf0, 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef };
755 key[3] = i / 16; key[15] = i % 16;
756 for (j=0; j<11*i; j++)
757 data[j] = 0x33 + i*j;
758 qache_insert(q, &key, 0, data, 11*i);
764 for (i=0; i<100; i++)
766 key[3] = i / 16; key[15] = i % 16;
768 uns sz = sizeof(data);
769 uns e = qache_lookup(q, &key, 0, &dptr, &sz, 0);
774 ASSERT(data[j] == (byte)(0x33 + i*j));
778 log(L_INFO, "Found %d of %d entries", found, N);