X-Git-Url: http://mj.ucw.cz/gitweb/?a=blobdiff_plain;f=ucw%2Ffastbuf.h;h=4f0d99d3190be67c6fad8ae22e4e11a46760167d;hb=06cac26f0d8895b5cb8fbe03ca96444dc4ac8d51;hp=e74034f44d2dcc4c7c519ee44ec2188a19ccd8af;hpb=1d542df8e8b1ca49df7828f27c6c7a9d6bfb619b;p=libucw.git diff --git a/ucw/fastbuf.h b/ucw/fastbuf.h index e74034f4..4f0d99d3 100644 --- a/ucw/fastbuf.h +++ b/ucw/fastbuf.h @@ -1,7 +1,7 @@ /* * UCW Library -- Fast Buffered I/O * - * (c) 1997--2008 Martin Mares + * (c) 1997--2011 Martin Mares * (c) 2004 Robert Spalek * * This software may be freely distributed and used according to the terms @@ -19,52 +19,109 @@ * * Generally speaking, a fastbuf consists of a buffer and a set of callbacks. * All front-end functions operate on the buffer and if the buffer becomes - * empty or fills up, they ask the corresponding callback to solve the + * empty or fills up, they ask the corresponding callback to handle the * situation. Back-ends then differ just in the definition of the callbacks. - * The state of the fastbuf is represented by `struct fastbuf`, which - * is a simple structure describing the state of the buffer and pointers - * to the callback functions. - * - * When we are reading from the fastbuf, the buffer is laid out as follows: - * - * +----------------+---------------------------+ - * | read data | free space | - * +----------------+---------------------------+ - * ^ ^ ^ ^ - * buffer bptr bstop bufend - * - * Here `bptr` points to the next character to be read. After the last character is - * read, `bptr == bstop` and the `refill` callback gets called upon the next read - * attempt to bring further data. This gives us an easy way how to implement @bungetc(). - * - * When writing, the situation looks like: - * - * +--------+--------------+--------------------+ - * | unused | written data | free space | - * +--------+--------------+--------------------+ - * ^ ^ ^ ^ - * buffer bstop bptr bufend - * - * In this case, the `bptr` points at the position where the next character - * will be written to. When we want to write, but `bptr == bufend`, we call - * the `spout` hook to flush the data and get an empty buffer. - * - * Several dirty tricks can be played: - * - * - The `spout`/`refill` hooks can change not only `bptr` and `bstop`, but also - * the location and size of the buffer; the fb-mem back-end takes advantage of it. - * - In some cases, the user of the `bdirect` interface can be allowed to modify - * the data in the buffer to avoid unnecessary copying. If the back-end - * allows such modifications, it can set `fastbuf->can_overwrite_buffer` accordingly: - * * 0 if no modification is allowed, - * * 1 if the user can modify the buffer on the condition that - * the modifications will be undone before calling the next - * fastbuf operation - * * 2 if the user is allowed to overwrite the data in the buffer - * if @bdirect_read_commit_modified() is called afterwards. - * In this case, the back-end must be prepared for trimming - * of the buffer which is done by the commit function. * + * The state of the fastbuf is represented by a <>, + * which is a simple structure describing the state of the buffer (the pointers + * `buffer`, `bufend`), the front-end cursor (`bptr`), the back-end cursor (`bstop`), + * position of the back-end cursor in the file (`pos`), some flags (`flags`) + * and pointers to the callback functions. + * + * The buffer can be in one of the following states: + * + * 1. Flushed: + * + * +------------------------------------+---------------------------+ + * | unused | free space | + * +------------------------------------+---------------------------+ + * ^ ^ ^ ^ + * buffer <= bstop (BE pos) <= bptr (FE pos) <= bufend + * + * * This schema describes a fastbuf after its initialization or bflush(). + * * There is no cached data and we are ready for any read or write operation + * (well, only if the back-end supports it). + * * The interval `[bptr, bufend]` can be used by front-ends + * for writing. If it is empty, the `spout` callback gets called + * upon the first write attempt to allocate a new buffer. Otherwise + * the fastbuf silently comes to the writing mode. + * * When a front-end needs to read something, it calls the `refill` callback. + * * The pointers can be either all non-`NULL` or all NULL. + * * `bstop == bptr` in most back-ends, but it is not necessary. Some + * in-memory streams take advantage of this. + * + * 2. Reading: + * + * +------------------------------------+---------------------------+ + * | read data | unused | + * +------------------------------------+---------------------------+ + * ^ ^ ^ ^ + * buffer <= bptr (FE pos) <= bstop (BE pos) <= bufend + * + * * If we try to read something, we get to the reading mode. + * * No writing is allowed until a flush operation. But note that @bflush() + * will simply set `bptr` to `bstop` before `spout` + * and it breaks the position of the front-end's cursor, + * so the user should seek afwards. + * * The interval `[buffer, bstop]` contains a block of data read by the back-end. + * `bptr` is the front-end's cursor which points to the next character to be read. + * After the last character is read, `bptr == bstop` and the `refill` callback + * gets called upon the next read attempt to bring further data. + * This gives us an easy way how to implement @bungetc(). + * + * 3. Writing: + * + * +-----------------------+----------------+-----------------------+ + * | unused | written data | free space | + * +-----------------------+----------------+-----------------------+ + * ^ ^ ^ ^ + * buffer <= bstop (BE pos) < bptr (FE pos) <= bufend + * + * * This schema corresponds to the situation after a write attempt. + * * No reading is allowed until a flush operation. + * * The `bptr` points at the position where the next character + * will be written to. When we want to write, but `bptr == bufend`, we call + * the `spout` hook to flush the witten data and get an empty buffer. + * * `bstop` usually points at the beginning of the written data, + * but it is not necessary. + * + * + * Rules for back-ends: + * + * - Front-ends are only allowed to change the value of `bptr`, some flags + * and if a fatal error occurs, then also `bstop`. Back-ends can rely on it. + * - `buffer <= bstop <= bufend` and `buffer <= bptr <= bufend`. + * - `pos` should be the real position in the file corresponding to the location of `bstop` in the buffer. + * It can be modified by any back-end's callback, but the position of `bptr` (`pos + (bptr - bstop)`) + * must stay unchanged after `refill` or `spout`. + * - Failed callbacks (except `close`) should use @bthrow(). + * - Any callback pointer may be NULL in case the callback is not implemented. + * - Callbacks can change not only `bptr` and `bstop`, but also the location and size of the buffer; + * the fb-mem back-end takes advantage of it. + * + * - Initialization: + * * out: `buffer <= bstop <= bptr <= bufend` (flushed). + * * @fb_tie() should be called on the newly created fastbuf. + * + * - `refill`: + * * in: `buffer <= bstop <= bptr <= bufend` (reading or flushed). + * * out: `buffer <= bptr <= bstop <= bufend` (reading). + * * Resulting `bptr == bstop` signals the end of file. + * The next reading attempt will again call `refill` which can succeed this time. + * * The callback must also return zero on EOF (iff `bptr == bstop`). + * + * - `spout`: + * * in: `buffer <= bstop <= bptr <= bufend` (writing or flushed). + * * out: `buffer <= bstop <= bptr < bufend` (flushed). + * + * - `seek`: + * * in: `buffer <= bstop <= bptr <= bufend` (flushed). + * * in: `(ofs >= 0 && whence == SEEK_SET) || (ofs <= 0 && whence == SEEK_END)`. + * * out: `buffer <= bstop <= bptr <= bufend` (flushed). + * + * - `close`: + * * in: `buffer <= bstop <= bptr <= bufend` (flushed or after @bthrow()). + * * `close` must always free all internal structures, even when it throws an exception. ***/ /** @@ -72,19 +129,31 @@ * for how it works. **/ struct fastbuf { - byte is_fastbuf[0]; /* Dummy field for checking of type casts */ byte *bptr, *bstop; /* State of the buffer */ byte *buffer, *bufend; /* Start and end of the buffer */ char *name; /* File name (used for error messages) */ ucw_off_t pos; /* Position of bstop in the file */ + uns flags; /* See enum fb_flags */ int (*refill)(struct fastbuf *); /* Get a buffer with new data, returns 0 on EOF */ void (*spout)(struct fastbuf *); /* Write buffer data to the file */ int (*seek)(struct fastbuf *, ucw_off_t, int);/* Slow path for @bseek(), buffer already flushed; returns success */ void (*close)(struct fastbuf *); /* Close the stream */ int (*config)(struct fastbuf *, uns, int); /* Configure the stream */ int can_overwrite_buffer; /* Can the buffer be altered? 0=never, 1=temporarily, 2=permanently */ + struct resource *res; /* The fastbuf can be tied to a resource pool */ }; +/** + * Fastbuf flags + */ +enum fb_flags { + FB_DEAD = 0x1, /* Some fastbuf's method has thrown an exception */ + FB_DIE_ON_EOF = 0x2, /* Most of read operations throw "fb.eof" on EOF */ +}; + +/** Tie a fastbuf to a resource in the current resource pool. Returns the pointer to the same fastbuf. **/ +struct fastbuf *fb_tie(struct fastbuf *b); /* Tie fastbuf to a resource if there is an active pool */ + /*** * === Fastbuf on files [[fbparam]] * @@ -117,7 +186,7 @@ struct fb_params { }; struct cf_section; -extern struct cf_section fbpar_cf; /** Configuration section with which you can fill the `fb_params` **/ +extern struct cf_section fbpar_cf; /** Configuration section with which you can fill the `fb_params` **/ extern struct fb_params fbpar_def; /** The default `fb_params` **/ /** @@ -125,7 +194,7 @@ extern struct fb_params fbpar_def; /** The default `fb_params` **/ * Use @params to select the fastbuf back-end and its parameters or * pass NULL if you are fine with defaults. * - * Dies if the file does not exist. + * Raises `ucw.fb.open` if the file does not exist. **/ struct fastbuf *bopen_file(const char *name, int mode, struct fb_params *params); struct fastbuf *bopen_file_try(const char *name, int mode, struct fb_params *params); /** Like bopen_file(), but returns NULL on failure. **/ @@ -223,9 +292,11 @@ void bfix_tmp_file(struct fastbuf *fb, const char *name); struct fastbuf *bfdopen_internal(int fd, const char *name, uns buflen); struct fastbuf *bfmmopen_internal(int fd, const char *name, uns mode); +#ifdef CONFIG_UCW_FB_DIRECT extern uns fbdir_cheat; struct asio_queue; struct fastbuf *fbdir_open_fd_internal(int fd, const char *name, struct asio_queue *io_queue, uns buffer_size, uns read_ahead, uns write_back); +#endif void bclose_file_helper(struct fastbuf *f, int fd, int is_temp_file); @@ -269,19 +340,21 @@ struct fastbuf *fbmem_clone_read(struct fastbuf *f); /** Given a writing fastbuf * of the buffer temporarily. In this case, set @can_overwrite as described * in <>. If you do not care, keep @can_overwrite zero. * - * It is not possible to close this fastbuf. + * It is not possible to close this fastbuf. This implies that no tying to + * resources takes place. */ void fbbuf_init_read(struct fastbuf *f, byte *buffer, uns size, uns can_overwrite); /** * Creates a write-only fastbuf which writes into a provided memory buffer. * The fastbuf structure is allocated by the caller and pointed to by @f. - * An attempt to write behind the end of the buffer dies. + * An attempt to write behind the end of the buffer causes the `ucw.fb.write` exception. * * Data are written directly into the buffer, so it is not necessary to call @bflush() * at any moment. * - * It is not possible to close this fastbuf. + * It is not possible to close this fastbuf. This implies that no tying to + * resources takes place. */ void fbbuf_init_write(struct fastbuf *f, byte *buffer, uns size); @@ -300,10 +373,20 @@ static inline uns fbbuf_count_written(struct fastbuf *f) /** Calculates, how man * At every moment, you can use `fastbuf->buffer` to gain access to the stream. ***/ +struct mempool; + struct fastbuf *fbgrow_create(unsigned basic_size); /** Create the growing buffer pre-allocated to @basic_size bytes. **/ +struct fastbuf *fbgrow_create_mp(struct mempool *mp, unsigned basic_size); /** Create the growing buffer pre-allocated to @basic_size bytes. **/ void fbgrow_reset(struct fastbuf *b); /** Reset stream and prepare for writing. **/ void fbgrow_rewind(struct fastbuf *b); /** Prepare for reading (of already written data). **/ +/** + * Can be used in any state of @b (for example when writing or after + * @fbgrow_rewind()) to return the pointer to internal buffer and its length in + * bytes. The returned buffer can be invalidated by further requests. + **/ +uns fbgrow_get_buf(struct fastbuf *b, byte **buf); + /*** * === Fastbuf on memory pools [[fbpool]] * @@ -311,14 +394,14 @@ void fbgrow_rewind(struct fastbuf *b); /** Prepare for reading (of already wri * buffer, but this time the buffer is allocated from within a memory pool. ***/ -struct mempool; struct fbpool { /** Structure for fastbufs & mempools. **/ struct fastbuf fb; struct mempool *mp; }; /** - * Initialize a new `fbpool`. The structure is allocated by the caller. + * Initialize a new `fbpool`. The structure is allocated by the caller, + * so bclose() should not be called and no resource tying takes place. **/ void fbpool_init(struct fbpool *fb); /** Initialize a new mempool fastbuf. **/ /** @@ -361,7 +444,6 @@ struct fb_atomic { byte *expected_max_bptr; uns slack_size; }; -#define FB_ATOMIC(f) ((struct fb_atomic *)(f)->is_fastbuf) /** * Open an atomic fastbuf. @@ -391,6 +473,53 @@ static inline void fbatomic_commit(struct fastbuf *b) fbatomic_internal_write(b); } +/*** + * === Fastbufs atop other fastbufs [[fbmulti]] + * + * Imagine some code which does massive string processing. It takes an input + * buffer, writes a part of it into an output buffer, then some other string + * and then the remaining part of the input buffer. Or anything else where you + * copy all the data at each stage of the complicated process. + * + * This backend takes multiple fastbufs and concatenates them formally into + * one. You may then read them consecutively as they were one fastbuf at all. + * + * This backend is read-only. + * + * This backend is seekable iff all of the supplied fastbufs are seekable. + * + * You aren't allowed to do anything with the underlying buffers while these + * are connected into fbmulti. + * + * The fbmulti is inited by @fbmulti_create(). It returns an empty fbmulti. + * Then you call @fbmulti_append() for each fbmulti. + * + * If @bclose() is called on fbmulti, all the underlying buffers get closed + * recursively. + * + * If you want to keep an underlying fastbuf open after @bclose, just remove it + * by @fbmulti_remove where the second parameter is a pointer to the removed + * fastbuf. If you pass NULL, all the underlying fastbufs are removed. + * + * After @fbmulti_remove, the state of the fbmulti is undefined. The only allowed + * operation is either another @fbmulti_remove or @bclose on the fbmulti. + ***/ + +/** + * Create an empty fbmulti + **/ +struct fastbuf *fbmulti_create(void); + +/** + * Append a fb to fbmulti + **/ +void fbmulti_append(struct fastbuf *f, struct fastbuf *fb); + +/** + * Remove a fb from fbmulti + **/ +void fbmulti_remove(struct fastbuf *f, struct fastbuf *fb); + /*** === Configuring stream parameters [[bconfig]] ***/ enum bconfig_type { /** Parameters that could be configured. **/ @@ -407,6 +536,9 @@ int bconfig(struct fastbuf *f, uns type, int data); /** Configure a fastbuf. Ret * Can not be used for fastbufs not returned from function (initialized in a parameter, for example the one from `fbbuf_init_read`). */ void bclose(struct fastbuf *f); +void bthrow(struct fastbuf *f, const char *id, const char *fmt, ...) FORMAT_CHECK(printf,3,4) NONRET; /** Throw exception on a given fastbuf **/ +int brefill(struct fastbuf *f, int allow_eof); +void bspout(struct fastbuf *f); void bflush(struct fastbuf *f); /** Write data (if it makes any sense, do not use for in-memory buffers). **/ void bseek(struct fastbuf *f, ucw_off_t pos, int whence); /** Seek in the buffer. See `man fseek` for description of @whence. Only for seekable fastbufs. **/ void bsetpos(struct fastbuf *f, ucw_off_t pos); /** Set position to @pos bytes from beginning. Only for seekable fastbufs. **/ @@ -430,6 +562,12 @@ static inline int bpeekc(struct fastbuf *f) /** Return next character from the return (f->bptr < f->bstop) ? (int) *f->bptr : bpeekc_slow(f); } +int beof_slow(struct fastbuf *f); +static inline int beof(struct fastbuf *f) /** Have I reached EOF? **/ +{ + return (f->bptr < f->bstop) ? 0 : beof_slow(f); +} + static inline void bungetc(struct fastbuf *f) /** Return last read character back. Only one back is guaranteed to work. **/ { f->bptr--; @@ -444,12 +582,12 @@ static inline void bputc(struct fastbuf *f, uns c) /** Write a single character bputc_slow(f, c); } -static inline uns bavailr(struct fastbuf *f) +static inline uns bavailr(struct fastbuf *f) /** Return the length of the cached data to be read. Do not use directly. **/ { return f->bstop - f->bptr; } -static inline uns bavailw(struct fastbuf *f) +static inline uns bavailw(struct fastbuf *f) /** Return the length of the buffer available for writing. Do not use directly. **/ { return f->bufend - f->bptr; } @@ -475,7 +613,7 @@ static inline uns bread(struct fastbuf *f, void *b, uns l) /** * Reads exactly @l bytes of data into @b. * If at the end of file, it returns 0. - * If there are data, but less than @l, it dies. + * If there are data, but less than @l, it raises `ucw.fb.eof`. */ static inline uns breadb(struct fastbuf *f, void *b, uns l) { @@ -504,7 +642,7 @@ static inline void bwrite(struct fastbuf *f, const void *b, uns l) /** Writes bu /** * Reads a line into @b and strips trailing `\n`. * Returns pointer to the terminating 0 or NULL on `EOF`. - * Dies if the line is longer than @l. + * Raises `ucw.fb.toolong` if the line is longer than @l. **/ char *bgets(struct fastbuf *f, char *b, uns l); char *bgets0(struct fastbuf *f, char *b, uns l); /** The same as @bgets(), but for 0-terminated strings. **/ @@ -518,7 +656,7 @@ struct mempool; struct bb_t; /** * Read a string, strip the trailing `\n` and store it into growing buffer @b. - * Dies if the line is longer than @limit. + * Raises `ucw.fb.toolong` if the line is longer than @limit. **/ uns bgets_bb(struct fastbuf *f, struct bb_t *b, uns limit); /** @@ -595,10 +733,28 @@ static inline int bskip(struct fastbuf *f, uns len) /** Skip @len bytes without } /*** === Direct I/O on buffers ***/ -// TODO Documentation -- what do they do? -static inline uns -bdirect_read_prepare(struct fastbuf *f, byte **buf) +/** + * Begin direct reading from fastbuf's internal buffer to avoid unnecessary copying. + * The function returns a buffer @buf together with its length in bytes (zero means EOF) + * with cached data to be read. + * + * Some back-ends allow the user to modify the data in the returned buffer to avoid unnecessary. + * If the back-end allows such modifications, it can set `f->can_overwrite_buffer` accordingly: + * + * - 0 if no modification is allowed, + * - 1 if the user can modify the buffer on the condition that + * the modifications will be undone before calling the next + * fastbuf operation + * - 2 if the user is allowed to overwrite the data in the buffer + * if @bdirect_read_commit_modified() is called afterwards. + * In this case, the back-end must be prepared for trimming + * of the buffer which is done by the commit function. + * + * The reading must be ended by @bdirect_read_commit() or @bdirect_read_commit_modified(), + * unless the user did not read or modify anything. + **/ +static inline uns bdirect_read_prepare(struct fastbuf *f, byte **buf) { if (f->bptr == f->bstop && !f->refill(f)) { @@ -609,21 +765,33 @@ bdirect_read_prepare(struct fastbuf *f, byte **buf) return bavailr(f); } -static inline void -bdirect_read_commit(struct fastbuf *f, byte *pos) +/** + * End direct reading started by @bdirect_read_prepare() and move the cursor at @pos. + * Data in the returned buffer must be same as after @bdirect_read_prepare() and + * @pos must point somewhere inside the buffer. + **/ +static inline void bdirect_read_commit(struct fastbuf *f, byte *pos) { f->bptr = pos; } -static inline void -bdirect_read_commit_modified(struct fastbuf *f, byte *pos) +/** + * Similar to @bdirect_read_commit(), but accepts also modified data before @pos. + * Note that such modifications are supported only if `f->can_overwrite_buffer == 2`. + **/ +static inline void bdirect_read_commit_modified(struct fastbuf *f, byte *pos) { f->bptr = pos; f->buffer = pos; /* Avoid seeking backwards in the buffer */ } -static inline uns -bdirect_write_prepare(struct fastbuf *f, byte **buf) +/** + * Start direct writing to fastbuf's internal buffer to avoid copy overhead. + * The function returns the length of the buffer in @buf (at least one byte) + * where we can write to. The operation must be ended by @bdirect_write_commit(), + * unless nothing is written. + **/ +static inline uns bdirect_write_prepare(struct fastbuf *f, byte **buf) { if (f->bptr == f->bufend) f->spout(f); @@ -631,8 +799,12 @@ bdirect_write_prepare(struct fastbuf *f, byte **buf) return bavailw(f); } -static inline void -bdirect_write_commit(struct fastbuf *f, byte *pos) +/** + * Commit the data written to the buffer returned by @bdirect_write_prepare(). + * The length is specified by @pos which must point just after the written data. + * Also moves the cursor to @pos. + **/ +static inline void bdirect_write_commit(struct fastbuf *f, byte *pos) { f->bptr = pos; }