2 * UCW Library -- Fast Buffered I/O
4 * (c) 1997--2008 Martin Mares <mj@ucw.cz>
5 * (c) 2004 Robert Spalek <robert@ucw.cz>
7 * This software may be freely distributed and used according to the terms
8 * of the GNU Lesser General Public License.
11 #ifndef _UCW_FASTBUF_H
12 #define _UCW_FASTBUF_H
18 * === Internal structure [[internal]]
20 * Generally speaking, a fastbuf consists of a buffer and a set of callbacks.
21 * All front-end functions operate on the buffer and if the buffer becomes
22 * empty or fills up, they ask the corresponding callback to handle the
23 * situation. Back-ends then differ just in the definition of the callbacks.
25 * The state of the fastbuf is represented by a `struct fastbuf`, which
26 * is a simple structure describing the state of the buffer (the pointers
27 * `buffer`, `bufend`), two front-end cursors (`bptr`, `bstop`), position in the file (`pos`)
28 * and pointers to the callback functions.
30 * The buffer can be in one of the following states:
34 * +----------------+---------------------------+
35 * | unused | free space |
36 * +----------------+---------------------------+
38 * buffer <= bptr == bstop (pos) <= bufend
40 * * If `bptr == bstop`, then there is no cached data and
41 * the fastbuf is ready for any read or write operation.
42 * Position of the back-end's cursor equals the front-end's one.
43 * * The interval `[bstop, bufend]` can be used by front-ends
44 * for writing. If it is empty, the `spout` callback gets called
45 * upon the first write attempt to allocate a new buffer.
46 * * When a front-end needs to read something, it calls the `spout` callback.
47 * * Any of the pointers can be NULL.
51 * +----------------+---------------------------+
52 * | read data | unused |
53 * +----------------+---------------------------+
55 * buffer <= bptr <= bstop (pos) <= bufend
57 * * If we try to read something, we get to the reading mode.
58 * * No writing is allowed until a flush operation. But note that @bflush()
59 * will simply set `bptr` to `bstop` and it breaks the position of the front-end's cursor.
60 * * The interval `[buffer, bstop]` contains a block of data read by the back-end.
61 * `bptr` is the front-end's cursor which points to the next character to be read.
62 * After the last character is read, `bptr == bstop` and the `refill` callback
63 * gets called upon the next read attempt to bring further data.
64 * This gives us an easy way how to implement @bungetc().
68 * +---------+--------------+-------------------+
69 * | unused | written data | free space |
70 * +---------+--------------+-------------------+
72 * buffer <= bstop (pos) < bptr <= bufend
74 * * This schema corresponds to the situation after a write attempt.
75 * * No reading is allowed until a flush operation.
76 * * The `bptr` points at the position where the next character
77 * will be written to. When we want to write, but `bptr == bufend`, we call
78 * the `spout` hook to flush the data and get an empty buffer.
81 * Rules for back-ends:
83 * - Front-ends are only allowed to change the value of `bptr`, some flags
84 * and if a fatal error occurs, then also `bstop`.
85 * - `buffer <= bstop <= bufend`.
86 * - `pos` should be the position in the file corresponding of the location of `bstop` in the buffer.
87 * - Failed callbacks (except `close`) should use @bthrow().
88 * - Any callback pointers may be NULL in case the callback is not implemented.
91 * * out: `buffer <= bptr == bstop <= bufend` (flushed)
94 * * in: `buffer <= bptr == bstop <= bufend` (reading or flushed)
95 * * out: `buffer <= bptr < bstop <= bufend` (reading)
98 * * in: `buffer <= bstop <= bptr <= bufend` (writing or flushed)
99 * * out: `buffer <= bstop <= bufend` (flushed)
100 * * `bptr` is set automatically to `bstop`.
101 * * If the input `bptr` equals ` bstop`, then the resulting `bstop` must be lower than `bufend`.
104 * * in: `buffer <= bstop == bptr <= bufend` (flushed)
105 * * out: `buffer <= bstop <= bufend` (flushed)
106 * * `bptr` is set automatically to `bstop`.
109 * * out: `buffer <= bptr == bstop <= bufend` (flushed)
110 * * `close` must always free all internal structures, even when it throws an exception.
113 * Several dirty tricks can be played:
115 * - The `spout`/`refill` hooks can change not only `bptr` and `bstop`, but also
116 * the location and size of the buffer; the fb-mem back-end takes advantage of it.
117 * - In some cases, the user of the `bdirect` interface can be allowed to modify
118 * the data in the buffer to avoid unnecessary copying. If the back-end
119 * allows such modifications, it can set `fastbuf->can_overwrite_buffer` accordingly:
120 * * 0 if no modification is allowed,
121 * * 1 if the user can modify the buffer on the condition that
122 * the modifications will be undone before calling the next
124 * * 2 if the user is allowed to overwrite the data in the buffer
125 * if @bdirect_read_commit_modified() is called afterwards.
126 * In this case, the back-end must be prepared for trimming
127 * of the buffer which is done by the commit function.
132 * This structure contains the state of the fastbuf. See the discussion above
136 byte is_fastbuf[0]; /* Dummy field for checking of type casts */
137 byte *bptr, *bstop; /* State of the buffer */
138 byte *buffer, *bufend; /* Start and end of the buffer */
139 char *name; /* File name (used for error messages) */
140 ucw_off_t pos; /* Position of bstop in the file */
141 int (*refill)(struct fastbuf *); /* Get a buffer with new data, returns 0 on EOF */
142 void (*spout)(struct fastbuf *); /* Write buffer data to the file */
143 int (*seek)(struct fastbuf *, ucw_off_t, int);/* Slow path for @bseek(), buffer already flushed; returns success */
144 void (*close)(struct fastbuf *); /* Close the stream */
145 int (*config)(struct fastbuf *, uns, int); /* Configure the stream */
146 int can_overwrite_buffer; /* Can the buffer be altered? 0=never, 1=temporarily, 2=permanently */
150 * === Fastbuf on files [[fbparam]]
152 * If you want to use fastbufs to access files, you can choose one of several
153 * back-ends and set their parameters.
160 FB_STD, /* Standard buffered I/O */
161 FB_DIRECT, /* Direct I/O bypassing system caches (see fb-direct.c for a description) */
162 FB_MMAP /* Memory mapped files */
166 * When you open a file fastbuf, you can use this structure to select a back-end
167 * and set its parameters. If you want just an "ordinary" file stream, you can
168 * happily pass NULL instead and the defaults from the configuration file (or
169 * hard-wired defaults if no config file has been read) will be used.
172 enum fb_type type; /* The chosen back-end */
173 uns buffer_size; /* 0 for default size */
174 uns keep_back_buf; /* FB_STD: optimize for bi-directional access */
175 uns read_ahead; /* FB_DIRECT options */
177 struct asio_queue *asio;
181 extern struct cf_section fbpar_cf; /** Configuration section with which you can fill the `fb_params` **/
182 extern struct fb_params fbpar_def; /** The default `fb_params` **/
185 * Opens a file with file mode @mode (see the man page of open()).
186 * Use @params to select the fastbuf back-end and its parameters or
187 * pass NULL if you are fine with defaults.
189 * Dies if the file does not exist.
191 struct fastbuf *bopen_file(const char *name, int mode, struct fb_params *params);
192 struct fastbuf *bopen_file_try(const char *name, int mode, struct fb_params *params); /** Like bopen_file(), but returns NULL on failure. **/
195 * Opens a temporary file.
196 * It is placed with other temp files and it is deleted when closed.
197 * Again, use NULL for @params if you want the defaults.
199 struct fastbuf *bopen_tmp_file(struct fb_params *params);
202 * Creates a fastbuf from a file descriptor @fd and sets its filename
203 * to @name (the name is used only in error messages).
204 * When the fastbuf is closed, the fd is closed as well. You can override
205 * this behavior by calling @bconfig().
207 struct fastbuf *bopen_fd_name(int fd, struct fb_params *params, const char *name);
208 static inline struct fastbuf *bopen_fd(int fd, struct fb_params *params) /** Same as above, but with an auto-generated filename. **/
210 return bopen_fd_name(fd, params, NULL);
214 * Flushes all buffers and makes sure that they are written to the disk.
216 void bfilesync(struct fastbuf *b);
219 * === Fastbufs on regular files [[fbfile]]
221 * If you want to use the `FB_STD` back-end and not worry about setting
222 * up any parameters, there is a couple of shortcuts.
225 struct fastbuf *bopen(const char *name, uns mode, uns buflen); /** Equivalent to @bopen_file() with `FB_STD` back-end. **/
226 struct fastbuf *bopen_try(const char *name, uns mode, uns buflen); /** Equivalent to @bopen_file_try() with `FB_STD` back-end. **/
227 struct fastbuf *bopen_tmp(uns buflen); /** Equivalent to @bopen_tmp_file() with `FB_STD` back-end. **/
228 struct fastbuf *bfdopen(int fd, uns buflen); /** Equivalent to @bopen_fd() with `FB_STD` back-end. **/
229 struct fastbuf *bfdopen_shared(int fd, uns buflen); /** Like @bfdopen(), but it does not close the @fd on @bclose(). **/
232 * === Temporary files [[fbtemp]]
234 * Usually, @bopen_tmp_file() is the best way how to come to a temporary file.
235 * However, in some specific cases you can need more, so there is also a set
236 * of more general functions.
239 #define TEMP_FILE_NAME_LEN 256
242 * Generates a temporary filename and stores it to the @name_buf (of size
243 * at least * `TEMP_FILE_NAME_LEN`). If @open_flags are not NULL, flags that
244 * should be OR-ed with other flags to open() will be stored there.
246 * The location and style of temporary files is controlled by the configuration.
247 * By default, the system temp directory (`$TMPDIR` or `/tmp`) is used.
249 * If the location is a publicly writeable directory (like `/tmp`), the
250 * generated filename cannot be guaranteed to be unique, so @open_flags
251 * will include `O_EXCL` and you have to check the result of open() and
254 * This function is not specific to fastbufs, it can be used separately.
256 void temp_file_name(char *name_buf, int *open_flags);
259 * Opens a temporary file and returns its file descriptor.
260 * You specify the file @mode and @open_flags passed to open().
262 * If the @name_buf (of at last `TEMP_FILE_NAME_LEN` chars) is not NULL,
263 * the filename is also stored in it.
265 * This function is not specific to fastbufs, it can be used separately.
267 int open_tmp(char *name_buf, int open_flags, int mode);
270 * Sometimes, a file is created as temporary and then moved to a stable
271 * location. This function takes a fastbuf created by @bopen_tmp_file()
272 * or @bopen_tmp(), marks it as permanent, closes it and renames it to
275 * Please note that it assumes that the temporary file and the @name
276 * are on the same volume (otherwise, rename() fails), so you might
277 * want to configure a special location for the temporary files
280 void bfix_tmp_file(struct fastbuf *fb, const char *name);
282 /* Internal functions of some file back-ends */
284 struct fastbuf *bfdopen_internal(int fd, const char *name, uns buflen);
285 struct fastbuf *bfmmopen_internal(int fd, const char *name, uns mode);
287 extern uns fbdir_cheat;
289 struct fastbuf *fbdir_open_fd_internal(int fd, const char *name, struct asio_queue *io_queue, uns buffer_size, uns read_ahead, uns write_back);
291 void bclose_file_helper(struct fastbuf *f, int fd, int is_temp_file);
294 * === Fastbufs on file fragments [[fblim]]
296 * The `fblim` back-end reads from a file handle, but at most a given
297 * number of bytes. This is frequently used for reading from sockets.
300 struct fastbuf *bopen_limited_fd(int fd, uns bufsize, uns limit); /** Create a fastbuf which reads at most @limit bytes from @fd. **/
303 * === Fastbufs on in-memory streams [[fbmem]]
305 * The `fbmem` back-end keeps the whole contents of the stream
306 * in memory (as a linked list of memory blocks, so address space
307 * fragmentation is avoided).
309 * First, you use @fbmem_create() to create the stream and the fastbuf
310 * used for writing to it. Then you can call @fbmem_clone_read() to get
311 * an arbitrary number of fastbuf for reading from the stream.
314 struct fastbuf *fbmem_create(uns blocksize); /** Create stream and return its writing fastbuf. **/
315 struct fastbuf *fbmem_clone_read(struct fastbuf *f); /** Given a writing fastbuf, create a new reading fastbuf. **/
318 * === Fastbufs on static buffers [[fbbuf]]
320 * The `fbbuf` back-end stores the stream in a given block of memory.
321 * This is useful for parsing and generating of complex data structures.
325 * Creates a read-only fastbuf that takes its data from a given buffer.
326 * The fastbuf structure is allocated by the caller and pointed to by @f.
327 * The @buffer and @size specify the location and size of the buffer.
329 * In some cases, the front-ends can take advantage of rewriting the contents
330 * of the buffer temporarily. In this case, set @can_overwrite as described
331 * in <<internal,Internals>>. If you do not care, keep @can_overwrite zero.
333 * It is not possible to close this fastbuf.
335 void fbbuf_init_read(struct fastbuf *f, byte *buffer, uns size, uns can_overwrite);
338 * Creates a write-only fastbuf which writes into a provided memory buffer.
339 * The fastbuf structure is allocated by the caller and pointed to by @f.
340 * An attempt to write behind the end of the buffer dies.
342 * Data are written directly into the buffer, so it is not necessary to call @bflush()
345 * It is not possible to close this fastbuf.
347 void fbbuf_init_write(struct fastbuf *f, byte *buffer, uns size);
349 static inline uns fbbuf_count_written(struct fastbuf *f) /** Calculates, how many bytes were already written into the buffer. **/
351 return f->bptr - f->bstop;
355 * === Fastbuf on recyclable growing buffers [[fbgrow]]
357 * The `fbgrow` back-end keeps the stream in a contiguous buffer stored in the
358 * main memory, but unlike <<fbmem,`fbmem`>>, the buffer does not have a fixed
359 * size and it is expanded to accomodate all data.
361 * At every moment, you can use `fastbuf->buffer` to gain access to the stream.
364 struct fastbuf *fbgrow_create(unsigned basic_size); /** Create the growing buffer pre-allocated to @basic_size bytes. **/
365 void fbgrow_reset(struct fastbuf *b); /** Reset stream and prepare for writing. **/
366 void fbgrow_rewind(struct fastbuf *b); /** Prepare for reading (of already written data). **/
369 * === Fastbuf on memory pools [[fbpool]]
371 * The write-only `fbpool` back-end also keeps the stream in a contiguous
372 * buffer, but this time the buffer is allocated from within a memory pool.
376 struct fbpool { /** Structure for fastbufs & mempools. **/
382 * Initialize a new `fbpool`. The structure is allocated by the caller.
384 void fbpool_init(struct fbpool *fb); /** Initialize a new mempool fastbuf. **/
386 * Start a new continuous block and prepare for writing (see <<mempool:mp_start()>>).
387 * Provide the memory pool you want to use for this block as @mp.
389 void fbpool_start(struct fbpool *fb, struct mempool *mp, uns init_size);
391 * Close the block and return the address of its start (see <<mempool:mp_end()>>).
392 * The length can be determined by calling <<mempool:mp_size(mp, ptr)>>.
394 void *fbpool_end(struct fbpool *fb);
397 * === Atomic files for multi-threaded programs [[fbatomic]]
399 * This fastbuf backend is designed for cases when several threads
400 * of a single program append records to a common file and while the
401 * record can mix in an arbitrary way, the bytes inside a single
402 * record must remain uninterrupted.
404 * In case of files with fixed record size, we just allocate the
405 * buffer to hold a whole number of records and take advantage
406 * of the atomicity of the write() system call.
408 * With variable-sized records, we need another solution: when
409 * writing a record, we keep the fastbuf in a locked state, which
410 * prevents buffer flushing (and if the buffer becomes full, we extend it),
411 * and we wait for an explicit commit operation which write()s the buffer
412 * if the free space in the buffer falls below the expected maximum record
415 * Please note that initialization of the clones is not thread-safe,
416 * so you have to serialize it yourself.
421 struct fb_atomic_file *af;
422 byte *expected_max_bptr;
425 #define FB_ATOMIC(f) ((struct fb_atomic *)(f)->is_fastbuf)
428 * Open an atomic fastbuf.
429 * If @master is NULL, the file @name is opened. If it is non-null,
430 * a new clone of an existing atomic fastbuf is created.
432 * If the file has fixed record length, just set @record_len to it.
433 * Otherwise set @record_len to the expected maximum record length
434 * with a negative sign (you need not fit in this length, but as long
435 * as you do, the fastbuf is more efficient) and call @fbatomic_commit()
438 * You can specify @record_len, if it is known (for optimisations).
440 * The file is closed when all fastbufs using it are closed.
442 struct fastbuf *fbatomic_open(const char *name, struct fastbuf *master, uns bufsize, int record_len);
443 void fbatomic_internal_write(struct fastbuf *b);
446 * Declare that you have finished writing a record. This is required only
447 * if a fixed record size was not specified.
449 static inline void fbatomic_commit(struct fastbuf *b)
451 if (b->bptr >= ((struct fb_atomic *)b)->expected_max_bptr)
452 fbatomic_internal_write(b);
455 /*** === Configuring stream parameters [[bconfig]] ***/
457 enum bconfig_type { /** Parameters that could be configured. **/
458 BCONFIG_IS_TEMP_FILE, /* 0=normal file, 1=temporary file, 2=shared fd */
459 BCONFIG_KEEP_BACK_BUF, /* Optimize for bi-directional access */
462 int bconfig(struct fastbuf *f, uns type, int data); /** Configure a fastbuf. Returns previous value. **/
464 /*** === Universal functions working on all fastbuf's [[ffbasic]] ***/
467 * Close and free fastbuf.
468 * Can not be used for fastbufs not returned from function (initialized in a parameter, for example the one from `fbbuf_init_read`).
470 void bclose(struct fastbuf *f);
471 void bflush(struct fastbuf *f); /** Write data (if it makes any sense, do not use for in-memory buffers). **/
472 void bseek(struct fastbuf *f, ucw_off_t pos, int whence); /** Seek in the buffer. See `man fseek` for description of @whence. Only for seekable fastbufs. **/
473 void bsetpos(struct fastbuf *f, ucw_off_t pos); /** Set position to @pos bytes from beginning. Only for seekable fastbufs. **/
474 void brewind(struct fastbuf *f); /** Go to the beginning of the fastbuf. Only for seekable ones. **/
475 ucw_off_t bfilesize(struct fastbuf *f); /** How large is the file? -1 if not seekable. **/
477 static inline ucw_off_t btell(struct fastbuf *f) /** Where am I (from the beginning)? **/
479 return f->pos + (f->bptr - f->bstop);
482 int bgetc_slow(struct fastbuf *f);
483 static inline int bgetc(struct fastbuf *f) /** Return next character from the buffer. **/
485 return (f->bptr < f->bstop) ? (int) *f->bptr++ : bgetc_slow(f);
488 int bpeekc_slow(struct fastbuf *f);
489 static inline int bpeekc(struct fastbuf *f) /** Return next character from the buffer, but keep the current position. **/
491 return (f->bptr < f->bstop) ? (int) *f->bptr : bpeekc_slow(f);
494 static inline void bungetc(struct fastbuf *f) /** Return last read character back. Only one back is guaranteed to work. **/
499 void bputc_slow(struct fastbuf *f, uns c);
500 static inline void bputc(struct fastbuf *f, uns c) /** Write a single character. **/
502 if (f->bptr < f->bufend)
508 static inline uns bavailr(struct fastbuf *f)
510 return f->bstop - f->bptr;
513 static inline uns bavailw(struct fastbuf *f)
515 return f->bufend - f->bptr;
518 uns bread_slow(struct fastbuf *f, void *b, uns l, uns check);
520 * Read at most @l bytes of data into @b.
521 * Returns number of bytes read.
522 * 0 means end of file.
524 static inline uns bread(struct fastbuf *f, void *b, uns l)
528 memcpy(b, f->bptr, l);
533 return bread_slow(f, b, l, 0);
537 * Reads exactly @l bytes of data into @b.
538 * If at the end of file, it returns 0.
539 * If there are data, but less than @l, it dies.
541 static inline uns breadb(struct fastbuf *f, void *b, uns l)
545 memcpy(b, f->bptr, l);
550 return bread_slow(f, b, l, 1);
553 void bwrite_slow(struct fastbuf *f, const void *b, uns l);
554 static inline void bwrite(struct fastbuf *f, const void *b, uns l) /** Writes buffer @b of length @l into fastbuf. **/
558 memcpy(f->bptr, b, l);
562 bwrite_slow(f, b, l);
566 * Reads a line into @b and strips trailing `\n`.
567 * Returns pointer to the terminating 0 or NULL on `EOF`.
568 * Dies if the line is longer than @l.
570 char *bgets(struct fastbuf *f, char *b, uns l);
571 char *bgets0(struct fastbuf *f, char *b, uns l); /** The same as @bgets(), but for 0-terminated strings. **/
573 * Returns either length of read string (excluding the terminator) or -1 if it is too long.
574 * In such cases exactly @l bytes are read.
576 int bgets_nodie(struct fastbuf *f, char *b, uns l);
581 * Read a string, strip the trailing `\n` and store it into growing buffer @b.
582 * Dies if the line is longer than @limit.
584 uns bgets_bb(struct fastbuf *f, struct bb_t *b, uns limit);
586 * Read a string, strip the trailing `\n` and store it into buffer allocated from a memory pool.
588 char *bgets_mp(struct fastbuf *f, struct mempool *mp);
590 struct bgets_stk_struct {
592 byte *old_buf, *cur_buf, *src;
593 uns old_len, cur_len, src_len;
595 void bgets_stk_init(struct bgets_stk_struct *s);
596 void bgets_stk_step(struct bgets_stk_struct *s);
599 * Read a string, strip the trailing `\n` and store it on the stack (allocated using alloca()).
601 #define bgets_stk(fb) \
602 ({ struct bgets_stk_struct _s; _s.f = (fb); for (bgets_stk_init(&_s); _s.cur_len; _s.cur_buf = alloca(_s.cur_len), bgets_stk_step(&_s)); _s.cur_buf; })
605 * Write a string, without 0 or `\n` at the end.
607 static inline void bputs(struct fastbuf *f, const char *b)
609 bwrite(f, b, strlen(b));
613 * Write string, including terminating 0.
615 static inline void bputs0(struct fastbuf *f, const char *b)
617 bwrite(f, b, strlen(b)+1);
621 * Write string and append a newline to the end.
623 static inline void bputsn(struct fastbuf *f, const char *b)
629 void bbcopy_slow(struct fastbuf *f, struct fastbuf *t, uns l);
631 * Copy @l bytes of data from fastbuf @f to fastbuf @t.
632 * `UINT_MAX` (`~0U`) means all data, even if more than `UINT_MAX` bytes remain.
634 static inline void bbcopy(struct fastbuf *f, struct fastbuf *t, uns l)
636 if (bavailr(f) >= l && bavailw(t) >= l)
638 memcpy(t->bptr, f->bptr, l);
643 bbcopy_slow(f, t, l);
646 int bskip_slow(struct fastbuf *f, uns len);
647 static inline int bskip(struct fastbuf *f, uns len) /** Skip @len bytes without reading them. **/
649 if (bavailr(f) >= len)
655 return bskip_slow(f, len);
658 /*** === Direct I/O on buffers ***/
659 // TODO Documentation -- what do they do?
662 bdirect_read_prepare(struct fastbuf *f, byte **buf)
664 if (f->bptr == f->bstop && !f->refill(f))
666 *buf = NULL; // This is not needed, but it helps to get rid of spurious warnings
674 bdirect_read_commit(struct fastbuf *f, byte *pos)
680 bdirect_read_commit_modified(struct fastbuf *f, byte *pos)
683 f->buffer = pos; /* Avoid seeking backwards in the buffer */
687 bdirect_write_prepare(struct fastbuf *f, byte **buf)
689 if (f->bptr == f->bufend)
696 bdirect_write_commit(struct fastbuf *f, byte *pos)
701 /*** === Formatted output ***/
704 * printf into a fastbuf.
706 int bprintf(struct fastbuf *b, const char *msg, ...)
707 FORMAT_CHECK(printf,2,3);
708 int vbprintf(struct fastbuf *b, const char *msg, va_list args); /** vprintf into a fastbuf. **/