2 * Simple and Quick Shared Memory Cache
4 * (c) 2005 Martin Mares <mj@ucw.cz>
10 #include "ucw/bitops.h"
11 #include "ucw/fastbuf.h"
12 #include "ucw/ff-binary.h"
13 #include "ucw/qache.h"
22 * The cache lives in a mmapped file of the following format:
24 * qache_entry[max_entries] table of entries and their keys
25 * u32 qache_hash[hash_size] hash table pointing to keys
26 * u32 block_next[num_blocks] next block pointers
27 * padding to a multiple of block size
28 * blocks[] data blocks
32 u32 magic; /* QCACHE_MAGIC */
33 u32 block_size; /* Parameters as in qache_params */
34 u32 block_shift; /* block_size = 1 << block_shift */
37 u32 entry_table_start; /* Array of qache_entry's */
39 u32 hash_table_start; /* Hash table containing all keys */
41 u32 next_table_start; /* Array of next pointers */
45 #define QACHE_MAGIC 0xb79f6d12
48 u32 lru_prev, lru_next; /* Entry #0: head of the cyclic LRU list */
49 u32 data_len; /* Entry #0: number of free blocks, Free entries: ~0U */
50 u32 first_data_block; /* Entry #0: first free block */
52 u32 hash_next; /* Entry #0: first free entry, Free entries: next free */
56 struct qache_header *hdr;
57 struct qache_entry *entry_table;
67 #define first_free_entry entry_table[0].hash_next
68 #define first_free_block entry_table[0].first_data_block
69 #define num_free_blocks entry_table[0].data_len
72 format_key(qache_key_t *key)
74 static char keybuf[2*sizeof(qache_key_t)+1];
75 for (uns i=0; i<sizeof(qache_key_t); i++)
76 sprintf(keybuf+2*i, "%02x", (*key)[i]);
81 qache_msync(struct qache *q UNUSED, uns start UNUSED, uns len UNUSED)
84 /* We don't need msyncing on Linux, since the mappings are guaranteed to be coherent */
85 len += (start % CPU_PAGE_SIZE);
86 start -= start % CPU_PAGE_SIZE;
87 len = ALIGN_TO(len, CPU_PAGE_SIZE);
88 if (msync(q->mmap_data + start, len, MS_ASYNC | MS_INVALIDATE) < 0)
89 msg(L_ERROR, "Cache %s: msync failed: %m", q->file_name);
94 qache_msync_block(struct qache *q, uns blk)
96 DBG("\tSyncing block %d", blk);
97 qache_msync(q, blk << q->hdr->block_shift, q->hdr->block_size);
101 qache_lock(struct qache *q)
103 /* We cannot use flock() since it happily permits locking a shared fd (e.g., after fork()) multiple times */
105 struct flock fl = { .l_type = F_WRLCK, .l_whence = SEEK_SET, .l_start = 0, .l_len = sizeof(struct qache_header) };
106 if (fcntl(q->fd, F_SETLKW, &fl) < 0)
107 die("fcntl lock on %s: %m", q->file_name);
109 DBG("Locked cache %s", q->file_name);
113 qache_unlock(struct qache *q, uns dirty)
116 if (dirty) /* Sync header, entry table and hash table */
117 qache_msync(q, 0, q->hdr->first_data_block << q->hdr->block_shift);
118 struct flock fl = { .l_type = F_UNLCK, .l_whence = SEEK_SET, .l_start = 0, .l_len = sizeof(struct qache_header) };
119 if (fcntl(q->fd, F_SETLKW, &fl) < 0)
120 die("fcntl unlock on %s: %m", q->file_name);
122 DBG("Unlocked cache %s (dirty=%d)", q->file_name, dirty);
125 enum entry_audit_flags {
132 audit_entries(struct qache *q, byte *entrymap)
136 DBG("Auditing entries");
138 /* Check the free list */
139 i = q->first_free_entry;
142 if (i >= q->hdr->max_entries || (entrymap[i] & ET_FREE_LIST) || q->entry_table[i].data_len != ~0U)
143 return "inconsistent free entry list";
144 entrymap[i] |= ET_FREE_LIST;
145 i = q->entry_table[i].hash_next;
148 /* Check the hash table */
149 for (i=0; i<q->hdr->hash_size; i++)
151 j = q->hash_table[i];
154 if (j >= q->hdr->max_entries || (entrymap[j] & (ET_HASH | ET_FREE_LIST)))
155 return "inconsistent hash chains";
156 entrymap[j] |= ET_HASH;
157 j = q->entry_table[j].hash_next;
165 j = q->entry_table[i].lru_next;
166 if ((entrymap[i] & (ET_LRU | ET_FREE_LIST)) || j >= q->hdr->max_entries || q->entry_table[j].lru_prev != i)
167 return "inconsistent LRU list";
168 entrymap[i] |= ET_LRU;
173 /* Check if all non-free items are in all lists */
174 for (i=1; i<q->hdr->max_entries; i++)
176 if (entrymap[i] != ((q->entry_table[i].data_len == ~0U) ? ET_FREE_LIST : (ET_LRU | ET_HASH)))
177 return "inconsistent lists";
182 enum block_audit_flags {
188 audit_blocks(struct qache *q, byte *entrymap, byte *blockmap)
192 DBG("Auditing blocks");
194 /* Check the free list */
195 for (i=q->first_free_block; i; i=q->next_table[i])
197 if (i < q->hdr->first_data_block || i >= q->hdr->num_blocks || (blockmap[i] & BT_FREE_LIST))
198 return "inconsistent free block list";
199 blockmap[i] |= BT_FREE_LIST;
202 /* Check allocation lists of entries */
203 for (i=1; i<q->hdr->max_entries; i++)
204 if (!(entrymap[i] & ET_FREE_LIST))
207 for (j=q->entry_table[i].first_data_block; j; j=q->next_table[j])
210 return "inconsistent entry block list";
211 blockmap[j] |= BT_ALLOC;
214 if (((q->entry_table[i].data_len + q->hdr->block_size - 1) >> q->hdr->block_shift) != blocks)
215 return "inconsistent entry data length";
218 /* Check if all blocks belong somewhere */
219 for (i=q->hdr->first_data_block; i < q->hdr->num_blocks; i++)
222 DBG("Block %d unreferenced", i);
223 return "unreferenced blocks found";
230 do_audit(struct qache *q)
232 byte *entry_map = xmalloc_zero(q->hdr->max_entries);
233 byte *block_map = xmalloc_zero(q->hdr->num_blocks);
234 byte *err = audit_entries(q, entry_map);
236 err = audit_blocks(q, entry_map, block_map);
243 qache_setup_pointers(struct qache *q)
245 q->hdr = (struct qache_header *) q->mmap_data;
246 q->entry_table = (struct qache_entry *) (q->mmap_data + q->hdr->entry_table_start);
247 q->hash_table = (u32 *) (q->mmap_data + q->hdr->hash_table_start);
248 q->next_table = (u32 *) (q->mmap_data + q->hdr->next_table_start);
252 qache_open_existing(struct qache *q, struct qache_params *par)
254 if ((q->fd = open(q->file_name, O_RDWR, 0)) < 0)
258 char *err = "stat failed";
259 if (fstat(q->fd, &st) < 0)
262 err = "invalid file size";
263 if (st.st_size < (int)sizeof(struct qache_header) || (st.st_size % par->block_size))
265 q->file_size = st.st_size;
267 err = "requested size change";
268 if (q->file_size != par->cache_size)
272 if ((q->mmap_data = mmap(NULL, q->file_size, PROT_READ | PROT_WRITE, MAP_SHARED, q->fd, 0)) == MAP_FAILED)
274 struct qache_header *h = (struct qache_header *) q->mmap_data;
276 qache_setup_pointers(q);
279 err = "incompatible format";
280 if (h->magic != QACHE_MAGIC ||
281 h->block_size != par->block_size ||
282 h->max_entries != par->max_entries ||
283 h->format_id != par->format_id)
284 goto unlock_and_fail;
286 err = "incomplete file";
287 if (h->num_blocks*h->block_size != q->file_size)
288 goto unlock_and_fail;
290 if (err = do_audit(q))
291 goto unlock_and_fail;
294 msg(L_INFO, "Cache %s: using existing data", q->file_name);
299 munmap(q->mmap_data, q->file_size);
301 msg(L_INFO, "Cache %s: ignoring old contents (%s)", q->file_name, err);
307 qache_create(struct qache *q, struct qache_params *par)
309 q->fd = open(q->file_name, O_RDWR | O_CREAT | O_TRUNC, 0666);
311 die("Cache %s: unable to create (%m)", q->file_name);
312 struct fastbuf *fb = bfdopen_shared(q->fd, 16384);
314 struct qache_header h;
315 bzero(&h, sizeof(h));
316 h.magic = QACHE_MAGIC;
317 h.block_size = par->block_size;
318 h.block_shift = bit_fls(h.block_size);
319 h.num_blocks = par->cache_size >> h.block_shift;
320 h.format_id = par->format_id;
321 h.entry_table_start = sizeof(h);
322 h.max_entries = par->max_entries;
323 h.hash_table_start = h.entry_table_start + h.max_entries * sizeof(struct qache_entry);
325 while (h.hash_size < h.max_entries)
327 h.next_table_start = h.hash_table_start + h.hash_size * 4;
328 h.first_data_block = (h.next_table_start + 4*h.num_blocks + h.block_size - 1) >> h.block_shift;
329 if (h.first_data_block >= h.num_blocks)
330 die("Cache %s: Requested size is too small even to hold the maintenance structures", q->file_name);
331 bwrite(fb, &h, sizeof(h));
333 /* Entry #0: heads of all lists */
334 ASSERT(btell(fb) == (ucw_off_t)h.entry_table_start);
335 struct qache_entry ent;
336 bzero(&ent, sizeof(ent));
337 ent.first_data_block = h.first_data_block;
338 ent.data_len = h.num_blocks - h.first_data_block;
340 bwrite(fb, &ent, sizeof(ent));
343 bzero(&ent, sizeof(ent));
345 for (uns i=1; i<h.max_entries; i++)
347 ent.hash_next = (i == h.max_entries-1 ? 0 : i+1);
348 bwrite(fb, &ent, sizeof(ent));
352 ASSERT(btell(fb) == (ucw_off_t)h.hash_table_start);
353 for (uns i=0; i<h.hash_size; i++)
356 /* The next pointers */
357 ASSERT(btell(fb) == (ucw_off_t)h.next_table_start);
358 for (uns i=0; i<h.num_blocks; i++)
359 bputl(fb, (i < h.first_data_block || i == h.num_blocks-1) ? 0 : i+1);
362 ASSERT(btell(fb) <= (ucw_off_t)(h.first_data_block << h.block_shift));
363 while (btell(fb) < (ucw_off_t)(h.first_data_block << h.block_shift))
367 for (uns i=h.first_data_block; i<h.num_blocks; i++)
368 for (uns j=0; j<h.block_size; j+=4)
371 ASSERT(btell(fb) == (ucw_off_t)par->cache_size);
373 msg(L_INFO, "Cache %s: created (%d bytes, %d slots, %d buckets)", q->file_name, par->cache_size, h.max_entries, h.hash_size);
375 if ((q->mmap_data = mmap(NULL, par->cache_size, PROT_READ | PROT_WRITE, MAP_SHARED, q->fd, 0)) == MAP_FAILED)
376 die("Cache %s: mmap failed (%m)", par->file_name);
377 q->file_size = par->cache_size;
378 qache_setup_pointers(q);
382 qache_open(struct qache_params *par)
384 struct qache *q = xmalloc_zero(sizeof(*q));
385 q->file_name = xstrdup(par->file_name);
387 ASSERT(par->block_size >= 8 && !(par->block_size & (par->block_size-1)));
388 par->cache_size = ALIGN_TO(par->cache_size, par->block_size);
390 if (par->force_reset <= 0 && qache_open_existing(q, par))
392 else if (par->force_reset < 0)
393 die("Cache %s: read-only access requested, but no data available", q->file_name);
395 qache_create(q, par);
400 qache_close(struct qache *q, uns retain_data)
402 munmap(q->mmap_data, q->file_size);
404 if (!retain_data && unlink(q->file_name) < 0)
405 msg(L_ERROR, "Cache %s: unlink failed (%m)", q->file_name);
411 qache_hash(struct qache *q, qache_key_t *key)
413 uns h = ((*key)[0] << 24) | ((*key)[1] << 16) | ((*key)[2] << 8) | (*key)[3];
414 return h % q->hdr->hash_size;
418 qache_hash_find(struct qache *q, qache_key_t *key, uns pos_hint)
422 if (pos_hint && pos_hint < q->hdr->max_entries && q->entry_table[pos_hint].data_len != ~0U && !memcmp(q->entry_table[pos_hint].key, key, sizeof(*key)))
425 uns h = qache_hash(q, key);
426 for (uns e = q->hash_table[h]; e; e=q->entry_table[e].hash_next)
427 if (!memcmp(q->entry_table[e].key, key, sizeof(*key)))
433 qache_hash_insert(struct qache *q, uns e)
435 uns h = qache_hash(q, &q->entry_table[e].key);
436 q->entry_table[e].hash_next = q->hash_table[h];
437 q->hash_table[h] = e;
441 qache_hash_remove(struct qache *q, uns e)
443 struct qache_entry *entry = &q->entry_table[e];
445 for (hh=&q->hash_table[qache_hash(q, &entry->key)]; f=*hh; hh=&(q->entry_table[f].hash_next))
446 if (!memcmp(q->entry_table[f].key, entry->key, sizeof(qache_key_t)))
448 *hh = entry->hash_next;
455 qache_alloc_entry(struct qache *q)
457 uns e = q->first_free_entry;
458 ASSERT(q->locked && e);
459 struct qache_entry *entry = &q->entry_table[e];
460 ASSERT(entry->data_len == ~0U);
461 q->first_free_entry = entry->hash_next;
467 qache_free_entry(struct qache *q, uns e)
469 struct qache_entry *entry = &q->entry_table[e];
470 ASSERT(q->locked && entry->data_len != ~0U);
471 entry->data_len = ~0U;
472 entry->hash_next = q->first_free_entry;
473 q->first_free_entry = e;
477 get_block_start(struct qache *q, uns block)
479 ASSERT(block && block < q->hdr->num_blocks);
480 return q->mmap_data + (block << q->hdr->block_shift);
484 qache_alloc_block(struct qache *q)
486 ASSERT(q->locked && q->num_free_blocks);
487 uns blk = q->first_free_block;
488 q->first_free_block = q->next_table[blk];
489 q->num_free_blocks--;
490 DBG("\tAllocated block %d", blk);
495 qache_free_block(struct qache *q, uns blk)
498 q->next_table[blk] = q->first_free_block;
499 q->first_free_block = blk;
500 q->num_free_blocks++;
501 DBG("\tFreed block %d", blk);
505 qache_lru_insert(struct qache *q, uns e)
507 struct qache_entry *head = &q->entry_table[0];
508 struct qache_entry *entry = &q->entry_table[e];
509 ASSERT(q->locked && !entry->lru_prev && !entry->lru_next);
510 uns succe = head->lru_next;
511 struct qache_entry *succ = &q->entry_table[succe];
514 entry->lru_next = succe;
519 qache_lru_remove(struct qache *q, uns e)
522 struct qache_entry *entry = &q->entry_table[e];
523 q->entry_table[entry->lru_prev].lru_next = entry->lru_next;
524 q->entry_table[entry->lru_next].lru_prev = entry->lru_prev;
525 entry->lru_prev = entry->lru_next = 0;
529 qache_lru_get(struct qache *q)
531 return q->entry_table[0].lru_prev;
535 qache_ll_delete(struct qache *q, uns e)
537 struct qache_entry *entry = &q->entry_table[e];
538 uns blk = entry->first_data_block;
539 while (entry->data_len)
541 uns next = q->next_table[blk];
542 qache_free_block(q, blk);
544 if (entry->data_len >= q->hdr->block_size)
545 entry->data_len -= q->hdr->block_size;
549 qache_lru_remove(q, e);
550 qache_hash_remove(q, e);
551 qache_free_entry(q, e);
555 qache_insert(struct qache *q, qache_key_t *key, uns pos_hint, void *data, uns size)
559 uns e = qache_hash_find(q, key, pos_hint);
562 qache_ll_delete(q ,e);
563 DBG("Insert <%s>: deleting old entry %d", format_key(key), e);
566 uns blocks = (size + q->hdr->block_size - 1) >> q->hdr->block_shift;
567 if (blocks > q->hdr->num_blocks - q->hdr->first_data_block)
572 while (q->num_free_blocks < blocks || !q->first_free_entry)
574 e = qache_lru_get(q);
575 DBG("Insert <%s>: evicting entry %d to make room for %d blocks", format_key(key), e, blocks);
577 qache_ll_delete(q, e);
579 e = qache_alloc_entry(q);
580 struct qache_entry *entry = &q->entry_table[e];
581 entry->data_len = size;
582 memcpy(entry->key, key, sizeof(*key));
583 DBG("Insert <%s>: created entry %d with %d data blocks", format_key(key), e, blocks);
585 entry->first_data_block = 0;
588 uns chunk = (size & (q->hdr->block_size-1)) ? : q->hdr->block_size;
589 uns blk = qache_alloc_block(q);
590 q->next_table[blk] = entry->first_data_block;
591 memcpy(get_block_start(q, blk), data+size-chunk, chunk);
592 qache_msync_block(q, blk);
593 entry->first_data_block = blk;
597 qache_lru_insert(q, e);
598 qache_hash_insert(q, e);
604 copy_out(struct qache *q, struct qache_entry *entry, byte **datap, uns *sizep, uns start)
609 uns avail = (start > entry->data_len) ? 0 : entry->data_len - start;
610 uns xfer = MIN(size, avail);
615 *datap = xmalloc(xfer);
616 uns blk = entry->first_data_block;
617 while (start >= q->hdr->block_size)
619 blk = q->next_table[blk];
620 start -= q->hdr->block_size;
625 uns len = MIN(xfer, q->hdr->block_size - start);
626 memcpy(data, get_block_start(q, blk), len);
627 blk = q->next_table[blk];
639 qache_lookup(struct qache *q, qache_key_t *key, uns pos_hint, byte **datap, uns *sizep, uns start)
642 uns e = qache_hash_find(q, key, pos_hint);
645 struct qache_entry *entry = &q->entry_table[e];
646 DBG("Lookup <%s>: found entry %d", format_key(key), e);
647 qache_lru_remove(q, e);
648 qache_lru_insert(q, e);
649 copy_out(q, entry, datap, sizep, start);
650 qache_unlock(q, 1); /* Yes, modified -- we update the LRU */
654 DBG("Lookup <%s>: not found", format_key(key));
661 qache_probe(struct qache *q, qache_key_t *key, uns pos, byte **datap, uns *sizep, uns start)
663 if (!pos || pos >= q->hdr->max_entries)
665 DBG("Probe %d: Out of range", pos);
671 struct qache_entry *entry = &q->entry_table[pos];
672 if (entry->data_len != ~0U)
674 DBG("Probe %d: Found key <%s>", format_key(entry->key));
676 memcpy(key, entry->key, sizeof(qache_key_t));
677 copy_out(q, entry, datap, sizep, start);
681 DBG("Probe %d: Empty", pos);
687 qache_delete(struct qache *q, qache_key_t *key, uns pos_hint)
690 uns e = qache_hash_find(q, key, pos_hint);
693 DBG("Delete <%s: deleting entry %d", format_key(key), e);
694 qache_ll_delete(q, e);
697 DBG("Delete <%s>: No match", format_key(key));
703 qache_debug(struct qache *q)
705 msg(L_DEBUG, "Cache %s: block_size=%d (%d data), num_blocks=%d (%d first data), %d slots, %d hash buckets",
706 q->file_name, q->hdr->block_size, q->hdr->block_size, q->hdr->num_blocks, q->hdr->first_data_block,
707 q->hdr->max_entries, q->hdr->hash_size);
709 msg(L_DEBUG, "Table of cache entries:");
710 msg(L_DEBUG, "\tEntry\tLruPrev\tLruNext\tDataLen\tDataBlk\tHashNxt\tKey");
711 for (uns e=0; e<q->hdr->max_entries; e++)
713 struct qache_entry *ent = &q->entry_table[e];
714 msg(L_DEBUG, "\t%d\t%d\t%d\t%d\t%d\t%d\t%s", e, ent->lru_prev, ent->lru_next, ent->data_len,
715 ent->first_data_block, ent->hash_next, format_key(&ent->key));
718 msg(L_DEBUG, "Hash table:");
719 for (uns h=0; h<q->hdr->hash_size; h++)
720 msg(L_DEBUG, "\t%04x\t%d", h, q->hash_table[h]);
722 msg(L_DEBUG, "Next pointers:");
723 for (uns blk=q->hdr->first_data_block; blk<q->hdr->num_blocks; blk++)
724 msg(L_DEBUG, "\t%d\t%d", blk, q->next_table[blk]);
728 qache_audit(struct qache *q)
732 if (err = do_audit(q))
733 die("Cache %s: %s", q->file_name, err);
739 int main(int argc UNUSED, char **argv UNUSED)
741 struct qache_params par = {
742 .file_name = "tmp/test",
747 .format_id = 0xfeedcafe
749 struct qache *q = qache_open(&par);
751 qache_key_t key = { 0x12, 0x34, 0x56, 0x78, 0x9a, 0xbc, 0xde, 0xf0, 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef };
757 key[3] = i / 16; key[15] = i % 16;
758 for (j=0; j<11*i; j++)
759 data[j] = 0x33 + i*j;
760 qache_insert(q, &key, 0, data, 11*i);
766 for (i=0; i<100; i++)
768 key[3] = i / 16; key[15] = i % 16;
770 uns sz = sizeof(data);
771 uns e = qache_lookup(q, &key, 0, &dptr, &sz, 0);
776 ASSERT(data[j] == (byte)(0x33 + i*j));
780 msg(L_INFO, "Found %d of %d entries", found, N);