/*
* Sherlock Library -- Fast Database Management Routines
*
- * (c) 1999 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
+ * (c) 1999--2000 Martin Mares <mj@ucw.cz>
*/
/*
* and we assume it's sorted.
*/
+#include "lib/lib.h"
+#include "lib/lfs.h"
+#include "lib/pagecache.h"
+#include "lib/db.h"
+#include "lib/db_internal.h"
+
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>
-#include "lib.h"
-#include "pagecache.h"
-#include "db.h"
-#include "db_internal.h"
-
struct sdbm *
sdbm_open(struct sdbm_options *o)
{
struct sdbm_root root, *r;
uns cache_size = o->cache_size ? o->cache_size : 16;
- d = xmalloc(sizeof(struct sdbm));
- bzero(d, sizeof(*d));
+ d = xmalloc_zero(sizeof(struct sdbm));
d->flags = o->flags;
- d->fd = open(o->name, ((d->flags & SDBM_WRITE) ? O_RDWR : O_RDONLY), 0666);
+ d->fd = sh_open(o->name, ((d->flags & SDBM_WRITE) ? O_RDWR : O_RDONLY), 0666);
if (d->fd >= 0) /* Already exists, let's check it */
{
if (read(d->fd, &root, sizeof(root)) != sizeof(root))
goto bad;
if (root.magic != SDBM_MAGIC || root.version != SDBM_VERSION)
goto bad;
- d->file_size = lseek(d->fd, 0, SEEK_END);
+ d->file_size = sh_seek(d->fd, 0, SEEK_END);
d->page_size = 1 << root.page_order;
d->cache = pgc_open(d->page_size, cache_size);
d->root_page = pgc_read(d->cache, d->fd, 0);
d->root = (void *) d->root_page->data;
}
- else if ((d->flags & SDBM_CREAT) && (d->fd = open(o->name, O_RDWR | O_CREAT, 0666)) >= 0)
+ else if ((d->flags & SDBM_CREAT) && (d->fd = sh_open(o->name, O_RDWR | O_CREAT, 0666)) >= 0)
{
struct page *q;
uns page_order = o->page_order;
pgc_close(d->cache);
if (d->fd >= 0)
close(d->fd);
- free(d);
+ xfree(d);
}
static uns
sdbm_alloc_pages(struct sdbm *d, uns number)
{
uns where = d->file_size;
- d->file_size += number << d->page_order;
+ uns size = number << d->page_order;
+ if (d->file_size + size < d->file_size) /* Wrap around? */
+ die("SDB: Database file too large, giving up");
+ d->file_size += size;
return where;
}
sdbm_hash(byte *key, uns keylen)
{
/*
- * This is the same hash function as GDBM uses.
- * It seems to work well.
+ * This used to be the same hash function as GDBM uses,
+ * but it turned out that it tends to give the same results
+ * on similar keys. Damn it.
*/
- u32 value;
- uns index;
-
- /* Set the initial value from key. */
- value = 0x238F13AF * keylen;
- for (index = 0; index < keylen; index++)
- value = value + (key[index] << (index*5 % 24));
+ u32 value = 0x238F13AF * keylen;
+ while (keylen--)
+ value = 37*value + *key++;
return (1103515243 * value + 12345);
}
}
}
+static inline uns
+sdbm_dirpos(struct sdbm *d, uns hash)
+{
+ if (d->dir_shift != 32) /* avoid shifting by 32 bits */
+ return (hash >> d->dir_shift) << 2; /* offset in the directory */
+ else
+ return 0;
+}
+
static struct page *
-sdbm_split_page(struct sdbm *d, struct page *b, u32 hash, uns dirpos)
+sdbm_split_page(struct sdbm *d, struct page *b, u32 hash)
{
struct page *p[2];
- uns i, rank, sigbit, rank_log;
+ uns i, rank, sigbit, rank_log, dirpos;
+ dirpos = sdbm_dirpos(d, hash);
rank = sdbm_page_rank(d, dirpos); /* rank = # of pointers to this page */
if (rank == 1)
{
p[0] = b;
p[1] = pgc_get(d->cache, d->fd, sdbm_alloc_page(d));
sdbm_split_data(d, (void *) b->data, (void *) p[0]->data, (void *) p[1]->data, sigbit);
- sdbm_split_dir(d, (dirpos & ~(4*rank - 1))+2*rank, rank/2, pgc_page_pos(d->cache, p[1]));
+ sdbm_split_dir(d, (dirpos & ~(4*rank - 1))+2*rank, rank/2, p[1]->pos);
pgc_mark_dirty(d->cache, p[0]);
i = (hash & (1 << sigbit)) ? 1 : 0;
pgc_put(d->cache, p[!i]);
if ((d->key_size >= 0 && keylen != (uns) d->key_size) || keylen > 65535)
return SDBM_ERROR_BAD_KEY_SIZE;
- if (val && ((d->val_size >= 0 && *vallen != (uns) d->val_size) || *vallen >= 65535))
+ if (val && ((d->val_size >= 0 && *vallen != (uns) d->val_size) || *vallen >= 65535) && mode)
return SDBM_ERROR_BAD_VAL_SIZE;
if (!mode && !(d->flags & SDBM_WRITE))
return SDBM_ERROR_READ_ONLY;
hash = sdbm_hash(key, keylen);
- if (d->dir_shift != 32) /* avoid shifting by 32 bits */
- h = (hash >> d->dir_shift) << 2; /* offset in the directory */
- else
- h = 0;
+ h = sdbm_dirpos(d, hash);
p = pgc_read(d->cache, d->fd, d->root->dir_start + (h & ~d->page_mask));
pos = GET32(p->data, h & d->page_mask);
pgc_put(d->cache, p);
case 0: /* fetch: found */
rc = sdbm_put_user(D, Dl, val, vallen);
pgc_put(d->cache, q);
- return rc ? 1 : SDBM_ERROR_TOO_LARGE;
+ return rc ? SDBM_ERROR_TOO_LARGE : 1;
case 1: /* store: already present */
pgc_put(d->cache, q);
return 0;
while (b->used + size > d->page_size - sizeof(struct sdbm_bucket))
{
/* Page overflow, need to split */
- q = sdbm_split_page(d, q, hash, h);
+ if (size >= d->page_size - sizeof(struct sdbm_bucket))
+ {
+ pgc_put(d->cache, q);
+ return SDBM_ERROR_GIANT;
+ }
+ q = sdbm_split_page(d, q, hash);
b = (void *) q->data;
}
sdbm_store_entry(d, b->data + b->used, key, keylen, val, *vallen);
if (d->flags & SDBM_FSYNC)
fsync(d->fd);
}
-
-#ifdef TEST
-
-int main(void)
-{
- struct sdbm *d;
- struct sdbm_options o = {
- name: "db.test",
- flags: SDBM_CREAT | SDBM_WRITE,
- page_order: 10,
- cache_size: 1024,
- key_size: -1,
- val_size: 4
- };
- byte buf[256];
- int i, j, k, l, m, n;
-#define BIGN 100000
-
- puts("OPEN");
- d = sdbm_open(&o);
- if (!d)
- die("failed: %m");
-
- puts("WRITE");
- for(i=0; i<BIGN; i++)
- {
- sprintf(buf, "%d", i);
- k = sdbm_store(d, buf, strlen(buf), (byte *) &i, sizeof(i));
-// printf("%s:%d\r", buf, k);
- fflush(stdout);
- }
- sdbm_sync(d);
-
- puts("REWRITE");
- for(i=0; i<BIGN; i++)
- {
- sprintf(buf, "%d", i);
- k = sdbm_replace(d, buf, strlen(buf), (byte *) &i, sizeof(i));
-// printf("%s:%d\r", buf, k);
- if (!k) { printf("XXX %s %d\n", buf, k); return 1; }
- fflush(stdout);
- }
- sdbm_sync(d);
-
- puts("READ");
- for(i=0; i<BIGN; i++)
- {
- sprintf(buf, "%d", i);
- j = sdbm_fetch(d, buf, strlen(buf), NULL, NULL);
-// printf("%s:%d\r", buf, j);
- fflush(stdout);
- if (!j)
- { printf("\nERR: %s %d %x %d\n", buf, j, sdbm_hash(buf, strlen(buf)), d->dir_shift); return 1; }
- }
-
- puts("FETCH");
- sdbm_rewind(d);
- l = 0;
- m = 0;
- n = 0;
- for(;;)
- {
- j = sizeof(buf);
- i = sdbm_get_next(d, buf, &j, (byte *) &k, NULL);
- if (i < 0) { puts("ERRRR\n"); return 1; }
- if (!i) break;
- buf[j] = 0;
-// printf("%s %d\n", buf, k);
- l += k;
- m += n++;
- }
- if (n != BIGN) { printf("MISMATCH COUNT %d\n", n); return 1; }
- if (l != m) { printf("MISMATCH %d != %d\n", l, m); return 1; }
-
- puts("DELETE");
- for(i=0; i<BIGN; i++)
- {
- sprintf(buf, "%d", i);
- j = sdbm_delete(d, buf, strlen(buf));
-// printf("%s:%d\r", buf, j);
- fflush(stdout);
- if (!j)
- { printf("\nERR: %s %d %x %d\n", buf, j, sdbm_hash(buf, strlen(buf)), d->dir_shift); return 1; }
- }
- sdbm_sync(d);
-
- puts("CHECK");
- sdbm_rewind(d);
- if (sdbm_get_next(d, buf, NULL, NULL, NULL)) { puts("NOT EMPTY!"); return 1; }
-
- puts("CLOSE");
- sdbm_close(d);
- puts("DONE");
- return 0;
-}
-
-#endif