2 * UCW Library -- Fast Buffered I/O
4 * (c) 1997--2008 Martin Mares <mj@ucw.cz>
5 * (c) 2004 Robert Spalek <robert@ucw.cz>
7 * This software may be freely distributed and used according to the terms
8 * of the GNU Lesser General Public License.
11 #ifndef _UCW_FASTBUF_H
12 #define _UCW_FASTBUF_H
18 * === Internal structure [[internal]]
20 * Generally speaking, a fastbuf consists of a buffer and a set of callbacks.
21 * All front-end functions operate on the buffer and if the buffer becomes
22 * empty or fills up, they ask the corresponding callback to solve the
23 * situation. Back-ends then differ just in the definition of the callbacks.
24 * The state of the fastbuf is represented by `struct fastbuf`, which
25 * is a simple structure describing the state of the buffer, cursor's position
26 * and pointers to the callback functions.
29 * The buffer can be in one of the following states:
33 * +----------------+---------------------------+
34 * | unused | free space |
35 * +----------------+---------------------------+
37 * buffer <= bptr == bstop (pos) <= bufend
39 * * If `bptr == bstop`, then there is no cached data and
40 * the fastbuf is ready for any read or write operation.
41 * Position of the back-end's cursor equals the front-end's one.
42 * * The interval `[bstop, bufend]` can be used by front-ends
43 * for writing. If it is empty, the `spout` callback gets called
44 * upon the first write attempt to allocate a new buffer.
45 * * When a front-end needs to read something, it calls the `spout` callback.
46 * * The pointers can be NULL.
50 * +----------------+---------------------------+
51 * | read data | unused |
52 * +----------------+---------------------------+
54 * buffer <= bptr <= bstop (pos) <= bufend
56 * * If we try to read something, we get to the reading mode.
57 * * No writing is allowed until a flush operation. But note that @bflush()
58 * will simply set `bptr` to `bstop` and breaks the position of the front-end's cursor.
59 * * The interval `[buffer, bstop]` contains a block of data read by the back-end.
60 * `bptr` is the front-end's cursor and points to the next character to be read.
61 * After the last character is read, `bptr == bstop` and the `refill` callback
62 * gets called upon the next read attempt to bring further data.
63 * This gives us an easy way how to implement @bungetc().
67 * +---------+--------------+-------------------+
68 * | unused | written data | free space |
69 * +---------+--------------+-------------------+
71 * buffer <= bstop (pos) < bptr <= bufend
73 * * This schema corresponds to the situation after a write attempt.
74 * * No reading is allowed until a flush operation.
75 * * The `bptr` points at the position where the next character
76 * will be written to. When we want to write, but `bptr == bufend`, we call
77 * the `spout` hook to flush the data and get an empty buffer.
80 * Rules for back-ends:
82 * - Front-ends are only allowed to change the value of `bptr`, some flags
83 * and if a fatal error occures also `bstop`.
84 * - `buffer <= bstop <= bufend`.
85 * - `pos` and `bstop` should correspond to the back-end's cursor.
86 * - Failed callbacks (except `close`) should use @bthrow().
87 * - All callback pointers can be NULL.
90 * * out: `buffer <= bptr == bstop <= bufend` (flushed)
93 * * in: `buffer <= bptr == bstop <= bufend` (reading or flushed)
94 * * out: `buffer <= bptr < bstop <= bufend` (reading)
97 * * in: `buffer <= bstop <= bptr <= bufend` (writing or flushed)
98 * * out: `buffer <= bstop <= bufend` (flushed)
99 * * `bptr` is set automatically to `bstop`.
100 * * If the input `bptr` equals ` bstop`, then the resulting `bstop` muset be lower than `bufend`.
103 * * in: `buffer <= bstop == bptr <= bufend` (flushed)
104 * * out: `buffer <= bstop <= bufend` (flushed)
105 * * `bptr` is set automatically to `bstop`.
108 * * out: `buffer <= bptr == bstop <= bufend` (flushed)
109 * * `close` must always free all internal structures, even when it throws an exception.
112 * Several dirty tricks can be played:
114 * - The `spout`/`refill` hooks can change not only `bptr` and `bstop`, but also
115 * the location and size of the buffer; the fb-mem back-end takes advantage of it.
116 * - In some cases, the user of the `bdirect` interface can be allowed to modify
117 * the data in the buffer to avoid unnecessary copying. If the back-end
118 * allows such modifications, it can set `fastbuf->can_overwrite_buffer` accordingly:
119 * * 0 if no modification is allowed,
120 * * 1 if the user can modify the buffer on the condition that
121 * the modifications will be undone before calling the next
123 * * 2 if the user is allowed to overwrite the data in the buffer
124 * if @bdirect_read_commit_modified() is called afterwards.
125 * In this case, the back-end must be prepared for trimming
126 * of the buffer which is done by the commit function.
131 * This structure contains the state of the fastbuf. See the discussion above
135 byte is_fastbuf[0]; /* Dummy field for checking of type casts */
136 byte *bptr, *bstop; /* State of the buffer */
137 byte *buffer, *bufend; /* Start and end of the buffer */
138 char *name; /* File name (used for error messages) */
139 ucw_off_t pos; /* Position of bstop in the file */
140 int (*refill)(struct fastbuf *); /* Get a buffer with new data, returns 0 on EOF */
141 void (*spout)(struct fastbuf *); /* Write buffer data to the file */
142 int (*seek)(struct fastbuf *, ucw_off_t, int);/* Slow path for @bseek(), buffer already flushed; returns success */
143 void (*close)(struct fastbuf *); /* Close the stream */
144 int (*config)(struct fastbuf *, uns, int); /* Configure the stream */
145 int can_overwrite_buffer; /* Can the buffer be altered? 0=never, 1=temporarily, 2=permanently */
149 * === Fastbuf on files [[fbparam]]
151 * If you want to use fastbufs to access files, you can choose one of several
152 * back-ends and set their parameters.
159 FB_STD, /* Standard buffered I/O */
160 FB_DIRECT, /* Direct I/O bypassing system caches (see fb-direct.c for a description) */
161 FB_MMAP /* Memory mapped files */
165 * When you open a file fastbuf, you can use this structure to select a back-end
166 * and set its parameters. If you want just an "ordinary" file stream, you can
167 * happily pass NULL instead and the defaults from the configuration file (or
168 * hard-wired defaults if no config file has been read) will be used.
171 enum fb_type type; /* The chosen back-end */
172 uns buffer_size; /* 0 for default size */
173 uns keep_back_buf; /* FB_STD: optimize for bi-directional access */
174 uns read_ahead; /* FB_DIRECT options */
176 struct asio_queue *asio;
180 extern struct cf_section fbpar_cf; /** Configuration section with which you can fill the `fb_params` **/
181 extern struct fb_params fbpar_def; /** The default `fb_params` **/
184 * Opens a file with file mode @mode (see the man page of open()).
185 * Use @params to select the fastbuf back-end and its parameters or
186 * pass NULL if you are fine with defaults.
188 * Dies if the file does not exist.
190 struct fastbuf *bopen_file(const char *name, int mode, struct fb_params *params);
191 struct fastbuf *bopen_file_try(const char *name, int mode, struct fb_params *params); /** Like bopen_file(), but returns NULL on failure. **/
194 * Opens a temporary file.
195 * It is placed with other temp files and it is deleted when closed.
196 * Again, use NULL for @params if you want the defaults.
198 struct fastbuf *bopen_tmp_file(struct fb_params *params);
201 * Creates a fastbuf from a file descriptor @fd and sets its filename
202 * to @name (the name is used only in error messages).
203 * When the fastbuf is closed, the fd is closed as well. You can override
204 * this behavior by calling @bconfig().
206 struct fastbuf *bopen_fd_name(int fd, struct fb_params *params, const char *name);
207 static inline struct fastbuf *bopen_fd(int fd, struct fb_params *params) /** Same as above, but with an auto-generated filename. **/
209 return bopen_fd_name(fd, params, NULL);
213 * Flushes all buffers and makes sure that they are written to the disk.
215 void bfilesync(struct fastbuf *b);
218 * === Fastbufs on regular files [[fbfile]]
220 * If you want to use the `FB_STD` back-end and not worry about setting
221 * up any parameters, there is a couple of shortcuts.
224 struct fastbuf *bopen(const char *name, uns mode, uns buflen); /** Equivalent to @bopen_file() with `FB_STD` back-end. **/
225 struct fastbuf *bopen_try(const char *name, uns mode, uns buflen); /** Equivalent to @bopen_file_try() with `FB_STD` back-end. **/
226 struct fastbuf *bopen_tmp(uns buflen); /** Equivalent to @bopen_tmp_file() with `FB_STD` back-end. **/
227 struct fastbuf *bfdopen(int fd, uns buflen); /** Equivalent to @bopen_fd() with `FB_STD` back-end. **/
228 struct fastbuf *bfdopen_shared(int fd, uns buflen); /** Like @bfdopen(), but it does not close the @fd on @bclose(). **/
231 * === Temporary files [[fbtemp]]
233 * Usually, @bopen_tmp_file() is the best way how to come to a temporary file.
234 * However, in some specific cases you can need more, so there is also a set
235 * of more general functions.
238 #define TEMP_FILE_NAME_LEN 256
241 * Generates a temporary filename and stores it to the @name_buf (of size
242 * at least * `TEMP_FILE_NAME_LEN`). If @open_flags are not NULL, flags that
243 * should be OR-ed with other flags to open() will be stored there.
245 * The location and style of temporary files is controlled by the configuration.
246 * By default, the system temp directory (`$TMPDIR` or `/tmp`) is used.
248 * If the location is a publicly writeable directory (like `/tmp`), the
249 * generated filename cannot be guaranteed to be unique, so @open_flags
250 * will include `O_EXCL` and you have to check the result of open() and
253 * This function is not specific to fastbufs, it can be used separately.
255 void temp_file_name(char *name_buf, int *open_flags);
258 * Opens a temporary file and returns its file descriptor.
259 * You specify the file @mode and @open_flags passed to open().
261 * If the @name_buf (of at last `TEMP_FILE_NAME_LEN` chars) is not NULL,
262 * the filename is also stored in it.
264 * This function is not specific to fastbufs, it can be used separately.
266 int open_tmp(char *name_buf, int open_flags, int mode);
269 * Sometimes, a file is created as temporary and then moved to a stable
270 * location. This function takes a fastbuf created by @bopen_tmp_file()
271 * or @bopen_tmp(), marks it as permanent, closes it and renames it to
274 * Please note that it assumes that the temporary file and the @name
275 * are on the same volume (otherwise, rename() fails), so you might
276 * want to configure a special location for the temporary files
279 void bfix_tmp_file(struct fastbuf *fb, const char *name);
281 /* Internal functions of some file back-ends */
283 struct fastbuf *bfdopen_internal(int fd, const char *name, uns buflen);
284 struct fastbuf *bfmmopen_internal(int fd, const char *name, uns mode);
286 extern uns fbdir_cheat;
288 struct fastbuf *fbdir_open_fd_internal(int fd, const char *name, struct asio_queue *io_queue, uns buffer_size, uns read_ahead, uns write_back);
290 void bclose_file_helper(struct fastbuf *f, int fd, int is_temp_file);
293 * === Fastbufs on file fragments [[fblim]]
295 * The `fblim` back-end reads from a file handle, but at most a given
296 * number of bytes. This is frequently used for reading from sockets.
299 struct fastbuf *bopen_limited_fd(int fd, uns bufsize, uns limit); /** Create a fastbuf which reads at most @limit bytes from @fd. **/
302 * === Fastbufs on in-memory streams [[fbmem]]
304 * The `fbmem` back-end keeps the whole contents of the stream
305 * in memory (as a linked list of memory blocks, so address space
306 * fragmentation is avoided).
308 * First, you use @fbmem_create() to create the stream and the fastbuf
309 * used for writing to it. Then you can call @fbmem_clone_read() to get
310 * an arbitrary number of fastbuf for reading from the stream.
313 struct fastbuf *fbmem_create(uns blocksize); /** Create stream and return its writing fastbuf. **/
314 struct fastbuf *fbmem_clone_read(struct fastbuf *f); /** Given a writing fastbuf, create a new reading fastbuf. **/
317 * === Fastbufs on static buffers [[fbbuf]]
319 * The `fbbuf` back-end stores the stream in a given block of memory.
320 * This is useful for parsing and generating of complex data structures.
324 * Creates a read-only fastbuf that takes its data from a given buffer.
325 * The fastbuf structure is allocated by the caller and pointed to by @f.
326 * The @buffer and @size specify the location and size of the buffer.
328 * In some cases, the front-ends can take advantage of rewriting the contents
329 * of the buffer temporarily. In this case, set @can_overwrite as described
330 * in <<internal,Internals>>. If you do not care, keep @can_overwrite zero.
332 * It is not possible to close this fastbuf.
334 void fbbuf_init_read(struct fastbuf *f, byte *buffer, uns size, uns can_overwrite);
337 * Creates a write-only fastbuf which writes into a provided memory buffer.
338 * The fastbuf structure is allocated by the caller and pointed to by @f.
339 * An attempt to write behind the end of the buffer dies.
341 * Data are written directly into the buffer, so it is not necessary to call @bflush()
344 * It is not possible to close this fastbuf.
346 void fbbuf_init_write(struct fastbuf *f, byte *buffer, uns size);
348 static inline uns fbbuf_count_written(struct fastbuf *f) /** Calculates, how many bytes were already written into the buffer. **/
350 return f->bptr - f->bstop;
354 * === Fastbuf on recyclable growing buffers [[fbgrow]]
356 * The `fbgrow` back-end keeps the stream in a contiguous buffer stored in the
357 * main memory, but unlike <<fbmem,`fbmem`>>, the buffer does not have a fixed
358 * size and it is expanded to accomodate all data.
360 * At every moment, you can use `fastbuf->buffer` to gain access to the stream.
363 struct fastbuf *fbgrow_create(unsigned basic_size); /** Create the growing buffer pre-allocated to @basic_size bytes. **/
364 void fbgrow_reset(struct fastbuf *b); /** Reset stream and prepare for writing. **/
365 void fbgrow_rewind(struct fastbuf *b); /** Prepare for reading (of already written data). **/
368 * === Fastbuf on memory pools [[fbpool]]
370 * The write-only `fbpool` back-end also keeps the stream in a contiguous
371 * buffer, but this time the buffer is allocated from within a memory pool.
375 struct fbpool { /** Structure for fastbufs & mempools. **/
381 * Initialize a new `fbpool`. The structure is allocated by the caller.
383 void fbpool_init(struct fbpool *fb); /** Initialize a new mempool fastbuf. **/
385 * Start a new continuous block and prepare for writing (see <<mempool:mp_start()>>).
386 * Provide the memory pool you want to use for this block as @mp.
388 void fbpool_start(struct fbpool *fb, struct mempool *mp, uns init_size);
390 * Close the block and return the address of its start (see <<mempool:mp_end()>>).
391 * The length can be determined by calling <<mempool:mp_size(mp, ptr)>>.
393 void *fbpool_end(struct fbpool *fb);
396 * === Atomic files for multi-threaded programs [[fbatomic]]
398 * This fastbuf backend is designed for cases when several threads
399 * of a single program append records to a common file and while the
400 * record can mix in an arbitrary way, the bytes inside a single
401 * record must remain uninterrupted.
403 * In case of files with fixed record size, we just allocate the
404 * buffer to hold a whole number of records and take advantage
405 * of the atomicity of the write() system call.
407 * With variable-sized records, we need another solution: when
408 * writing a record, we keep the fastbuf in a locked state, which
409 * prevents buffer flushing (and if the buffer becomes full, we extend it),
410 * and we wait for an explicit commit operation which write()s the buffer
411 * if the free space in the buffer falls below the expected maximum record
414 * Please note that initialization of the clones is not thread-safe,
415 * so you have to serialize it yourself.
420 struct fb_atomic_file *af;
421 byte *expected_max_bptr;
424 #define FB_ATOMIC(f) ((struct fb_atomic *)(f)->is_fastbuf)
427 * Open an atomic fastbuf.
428 * If @master is NULL, the file @name is opened. If it is non-null,
429 * a new clone of an existing atomic fastbuf is created.
431 * If the file has fixed record length, just set @record_len to it.
432 * Otherwise set @record_len to the expected maximum record length
433 * with a negative sign (you need not fit in this length, but as long
434 * as you do, the fastbuf is more efficient) and call @fbatomic_commit()
437 * You can specify @record_len, if it is known (for optimisations).
439 * The file is closed when all fastbufs using it are closed.
441 struct fastbuf *fbatomic_open(const char *name, struct fastbuf *master, uns bufsize, int record_len);
442 void fbatomic_internal_write(struct fastbuf *b);
445 * Declare that you have finished writing a record. This is required only
446 * if a fixed record size was not specified.
448 static inline void fbatomic_commit(struct fastbuf *b)
450 if (b->bptr >= ((struct fb_atomic *)b)->expected_max_bptr)
451 fbatomic_internal_write(b);
454 /*** === Configuring stream parameters [[bconfig]] ***/
456 enum bconfig_type { /** Parameters that could be configured. **/
457 BCONFIG_IS_TEMP_FILE, /* 0=normal file, 1=temporary file, 2=shared fd */
458 BCONFIG_KEEP_BACK_BUF, /* Optimize for bi-directional access */
461 int bconfig(struct fastbuf *f, uns type, int data); /** Configure a fastbuf. Returns previous value. **/
463 /*** === Universal functions working on all fastbuf's [[ffbasic]] ***/
466 * Close and free fastbuf.
467 * Can not be used for fastbufs not returned from function (initialized in a parameter, for example the one from `fbbuf_init_read`).
469 void bclose(struct fastbuf *f);
470 void bflush(struct fastbuf *f); /** Write data (if it makes any sense, do not use for in-memory buffers). **/
471 void bseek(struct fastbuf *f, ucw_off_t pos, int whence); /** Seek in the buffer. See `man fseek` for description of @whence. Only for seekable fastbufs. **/
472 void bsetpos(struct fastbuf *f, ucw_off_t pos); /** Set position to @pos bytes from beginning. Only for seekable fastbufs. **/
473 void brewind(struct fastbuf *f); /** Go to the beginning of the fastbuf. Only for seekable ones. **/
474 ucw_off_t bfilesize(struct fastbuf *f); /** How large is the file? -1 if not seekable. **/
476 static inline ucw_off_t btell(struct fastbuf *f) /** Where am I (from the beginning)? **/
478 return f->pos + (f->bptr - f->bstop);
481 int bgetc_slow(struct fastbuf *f);
482 static inline int bgetc(struct fastbuf *f) /** Return next character from the buffer. **/
484 return (f->bptr < f->bstop) ? (int) *f->bptr++ : bgetc_slow(f);
487 int bpeekc_slow(struct fastbuf *f);
488 static inline int bpeekc(struct fastbuf *f) /** Return next character from the buffer, but keep the current position. **/
490 return (f->bptr < f->bstop) ? (int) *f->bptr : bpeekc_slow(f);
493 static inline void bungetc(struct fastbuf *f) /** Return last read character back. Only one back is guaranteed to work. **/
498 void bputc_slow(struct fastbuf *f, uns c);
499 static inline void bputc(struct fastbuf *f, uns c) /** Write a single character. **/
501 if (f->bptr < f->bufend)
507 static inline uns bavailr(struct fastbuf *f)
509 return f->bstop - f->bptr;
512 static inline uns bavailw(struct fastbuf *f)
514 return f->bufend - f->bptr;
517 uns bread_slow(struct fastbuf *f, void *b, uns l, uns check);
519 * Read at most @l bytes of data into @b.
520 * Returns number of bytes read.
521 * 0 means end of file.
523 static inline uns bread(struct fastbuf *f, void *b, uns l)
527 memcpy(b, f->bptr, l);
532 return bread_slow(f, b, l, 0);
536 * Reads exactly @l bytes of data into @b.
537 * If at the end of file, it returns 0.
538 * If there are data, but less than @l, it dies.
540 static inline uns breadb(struct fastbuf *f, void *b, uns l)
544 memcpy(b, f->bptr, l);
549 return bread_slow(f, b, l, 1);
552 void bwrite_slow(struct fastbuf *f, const void *b, uns l);
553 static inline void bwrite(struct fastbuf *f, const void *b, uns l) /** Writes buffer @b of length @l into fastbuf. **/
557 memcpy(f->bptr, b, l);
561 bwrite_slow(f, b, l);
565 * Reads a line into @b and strips trailing `\n`.
566 * Returns pointer to the terminating 0 or NULL on `EOF`.
567 * Dies if the line is longer than @l.
569 char *bgets(struct fastbuf *f, char *b, uns l);
570 char *bgets0(struct fastbuf *f, char *b, uns l); /** The same as @bgets(), but for 0-terminated strings. **/
572 * Returns either length of read string (excluding the terminator) or -1 if it is too long.
573 * In such cases exactly @l bytes are read.
575 int bgets_nodie(struct fastbuf *f, char *b, uns l);
580 * Read a string, strip the trailing `\n` and store it into growing buffer @b.
581 * Dies if the line is longer than @limit.
583 uns bgets_bb(struct fastbuf *f, struct bb_t *b, uns limit);
585 * Read a string, strip the trailing `\n` and store it into buffer allocated from a memory pool.
587 char *bgets_mp(struct fastbuf *f, struct mempool *mp);
589 struct bgets_stk_struct {
591 byte *old_buf, *cur_buf, *src;
592 uns old_len, cur_len, src_len;
594 void bgets_stk_init(struct bgets_stk_struct *s);
595 void bgets_stk_step(struct bgets_stk_struct *s);
598 * Read a string, strip the trailing `\n` and store it on the stack (allocated using alloca()).
600 #define bgets_stk(fb) \
601 ({ struct bgets_stk_struct _s; _s.f = (fb); for (bgets_stk_init(&_s); _s.cur_len; _s.cur_buf = alloca(_s.cur_len), bgets_stk_step(&_s)); _s.cur_buf; })
604 * Write a string, without 0 or `\n` at the end.
606 static inline void bputs(struct fastbuf *f, const char *b)
608 bwrite(f, b, strlen(b));
612 * Write string, including terminating 0.
614 static inline void bputs0(struct fastbuf *f, const char *b)
616 bwrite(f, b, strlen(b)+1);
620 * Write string and append a newline to the end.
622 static inline void bputsn(struct fastbuf *f, const char *b)
628 void bbcopy_slow(struct fastbuf *f, struct fastbuf *t, uns l);
630 * Copy @l bytes of data from fastbuf @f to fastbuf @t.
631 * `UINT_MAX` (`~0U`) means all data, even if more than `UINT_MAX` bytes remain.
633 static inline void bbcopy(struct fastbuf *f, struct fastbuf *t, uns l)
635 if (bavailr(f) >= l && bavailw(t) >= l)
637 memcpy(t->bptr, f->bptr, l);
642 bbcopy_slow(f, t, l);
645 int bskip_slow(struct fastbuf *f, uns len);
646 static inline int bskip(struct fastbuf *f, uns len) /** Skip @len bytes without reading them. **/
648 if (bavailr(f) >= len)
654 return bskip_slow(f, len);
657 /*** === Direct I/O on buffers ***/
658 // TODO Documentation -- what do they do?
661 bdirect_read_prepare(struct fastbuf *f, byte **buf)
663 if (f->bptr == f->bstop && !f->refill(f))
665 *buf = NULL; // This is not needed, but it helps to get rid of spurious warnings
673 bdirect_read_commit(struct fastbuf *f, byte *pos)
679 bdirect_read_commit_modified(struct fastbuf *f, byte *pos)
682 f->buffer = pos; /* Avoid seeking backwards in the buffer */
686 bdirect_write_prepare(struct fastbuf *f, byte **buf)
688 if (f->bptr == f->bufend)
695 bdirect_write_commit(struct fastbuf *f, byte *pos)
700 /*** === Formatted output ***/
703 * printf into a fastbuf.
705 int bprintf(struct fastbuf *b, const char *msg, ...)
706 FORMAT_CHECK(printf,2,3);
707 int vbprintf(struct fastbuf *b, const char *msg, va_list args); /** vprintf into a fastbuf. **/