]> mj.ucw.cz Git - xsv.git/blobdiff - xsv.c
Convert a couple of remaining putchar's to unlocked
[xsv.git] / xsv.c
diff --git a/xsv.c b/xsv.c
index 6263da080216392d87dd42cf2a407ca2ecc3865c..0d2d972b055544ddab45ad06132ed09ea4e2b8c6 100644 (file)
--- a/xsv.c
+++ b/xsv.c
@@ -1,5 +1,5 @@
 /*
- *     A Swiss-Army Knife for CSV-like Files
+ *     The Swiss-Army Knife for CSV-like Files
  *
  *     (c) 2012 Martin Mares <mj@ucw.cz>
  */
@@ -9,28 +9,57 @@
 #include <string.h>
 #include <stdarg.h>
 #include <getopt.h>
+#include <wchar.h>
+#include <locale.h>
 
 #include <pcre.h>
 
+#ifdef __GNUC__
+#define NONRET __attribute__((noreturn))
+#define UNUSED __attribute__((unused))
+#else
+#define NONRET
+#define UNUSED
+#endif
+
+static void select_fields(void);
+static void select_all_fields(void);
+
+/*** General functions ***/
+
+static void NONRET die(char *msg, ...)
+{
+       va_list args;
+       va_start(args, msg);
+       fprintf(stderr, "xsv: ");
+       vfprintf(stderr, msg, args);
+       fputc('\n', stderr);
+       va_end(args);
+       exit(1);
+}
+
 /*** Memory allocation ***/
 
 static void *xmalloc(size_t bytes)
 {
        void *p = malloc(bytes);
-       if (!p) {
-               fprintf(stderr, "xsv: Out of memory (cannot allocate %zu bytes)\n", bytes);
-               exit(1);
-       }
+       if (!p)
+               die("Out of memory (cannot allocate %zu bytes)", bytes);
+       return p;
+}
+
+static void *xmalloc_zero(size_t bytes)
+{
+       void *p = xmalloc(bytes);
+       memset(p, 0, bytes);
        return p;
 }
 
 static void *xrealloc(void *old, size_t bytes)
 {
        void *p = realloc(old, bytes);
-       if (!p) {
-               fprintf(stderr, "xsv: Out of memory (cannot allocate %zu bytes)\n", bytes);
-               exit(1);
-       }
+       if (!p)
+               die("Out of memory (cannot allocate %zu bytes)", bytes);
        return p;
 }
 
@@ -51,6 +80,9 @@ static void *xrealloc(void *old, size_t bytes)
        static inline type *name##_nth(name##_t *b, int n) { return &b->start[n]; }             \
        // end
 
+DECLARE_BUF(intarray, int);
+DECLARE_BUF(stringarray, char *);
+
 /*** Formats and their parameters ***/
 
 enum format_id {
@@ -59,6 +91,8 @@ enum format_id {
        FORM_CSV,
        FORM_WS,
        FORM_REGEX,
+       FORM_TMP,
+       FORM_TABLE,
 };
 
 struct format {
@@ -66,18 +100,34 @@ struct format {
        int fs;
        int quote;
        int quiet;
-       int (*read_line)(void);
-       void (*write_line)(void);
+       int sloppy;
+       int (*read_line)(struct format *fmt);
+       void (*write_line)(struct format *fmt);
+       void (*write_grid)(struct format *fmt, int pos);        // -1=above, 1=below, 0=after header
+       int needs_stats;
+
+       // Field names
+       int has_header;
+       char *set_field_names;
+       struct field_names *field_names;
+
        // CSV backend:
        int always_quote;
-       // WS backend:
-       int strict_ws;
+
        // regex backend:
        pcre *pcre;
        pcre_extra *pcre_extra;
+
+       // Temporary file backend:
+       FILE *tmp_file;
+
+       // Table backend:
+       int table_sep;
+       int table_grid;
 };
 
 static struct format *in_format, *out_format;
+static int want_trim, want_equalize, want_stats;
 
 struct field {
        int start_pos;
@@ -92,6 +142,34 @@ static struct field *in_field;
 static line_t in_line;
 static int line_number;
 
+static int read_line(void)
+{
+       fields_reset(&in_fields);
+       line_reset(&in_line);
+       in_field = NULL;
+       if (!in_format->read_line(in_format))
+               return 0;
+       if (ferror_unlocked(stdin))
+               die("I/O error when reading standard input");
+       return 1;
+}
+
+static void write_line(void)
+{
+       out_format->write_line(out_format);
+       if (ferror_unlocked(stdout))
+               die("I/O error when writing standard input");
+}
+
+static void write_grid(int pos)
+{
+       if (out_format->write_grid) {
+               out_format->write_grid(out_format, pos);
+               if (ferror_unlocked(stdout))
+                       die("I/O error when writing standard input");
+       }
+}
+
 static void new_field(int pos)
 {
        in_field = fields_push(&in_fields);
@@ -105,13 +183,20 @@ static void ensure_field(int pos)
                new_field(pos);
 }
 
+static unsigned char *get_field(fields_t *fields, int i, int *len)
+{
+       struct field *f = fields_nth(fields, i);
+       *len = f->len;
+       return line_nth(&in_line, f->start_pos);
+}
+
 static void warn(struct format *fmt, char *msg, ...)
 {
        if (!fmt->quiet) {
                fprintf(stderr, "Warning at line %d: ", line_number);
                va_list args;
                va_start(args, msg);
-               vfprintf(stderr, args, msg);
+               vfprintf(stderr, msg, args);
                va_end(args);
                fputc('\n', stderr);
        }
@@ -120,7 +205,7 @@ static void warn(struct format *fmt, char *msg, ...)
 static int next_line(void)
 {
        for (;;) {
-               int c = getchar();
+               int c = getchar_unlocked();
                if (c == '\r')
                        continue;
                if (c < 0)
@@ -131,37 +216,79 @@ static int next_line(void)
        }
 }
 
-static int csv_read(void)
+static int field_chars(struct field *f)
+{
+       unsigned char *s = line_nth(&in_line, f->start_pos);
+       int i = 0;
+       mbstate_t mbs;
+       memset(&mbs, 0, sizeof(mbs));
+
+       int chars = 0;
+       while (i < f->len) {
+               size_t k = mbrlen((char *) s + i, f->len - i, &mbs);
+               if ((int) k <= 0)
+                       break;
+               i += k;
+               chars++;
+       }
+
+       return chars;
+}
+
+/*** Field statistics ***/
+
+static intarray_t column_widths;
+
+static void update_stats(void)
+{
+       if (!want_stats)
+               return;
+
+       for (int i = 0; i < fields_count(&out_fields); i++) {
+               struct field *f = fields_nth(&out_fields, i);
+               intarray_t *w = &column_widths;
+
+               while (i >= intarray_count(w))
+                       *intarray_push(w) = 0;
+               int fw = field_chars(f);
+               if (*intarray_nth(w, i) < fw)
+                       *intarray_nth(w, i) = fw;
+       }
+}
+
+/*** CSV/TSV back-end */
+
+static int csv_read(struct format *fmt)
 {
        int quoted = 0;
        for (;;) {
-               int c = getchar();
+               int c = getchar_unlocked();
                int i = line_count(&in_line);
 restart:
                if (c == '\r')
                        continue;
                if (c < 0 || c == '\n') {
                        if (quoted)
-                               warn(in_format, "Missing closing quote.");
+                               warn(fmt, "Missing closing quote.");
                        if (c < 0)
                                return !!fields_count(&in_fields);
                        else
                                return 1;
                }
                if (quoted) {
-                       if (c == in_format->quote) {
-                               c = getchar();
-                               if (c != in_format->quote) {
+                       if (c == fmt->quote) {
+                               c = getchar_unlocked();
+                               if (c != fmt->quote) {
                                        quoted = 0;
                                        goto restart;
                                }
                                // Two quotes assimilate to one
                        }
                        // Fall through to pushing the character
-               } else if (c == in_format->quote) {
+               } else if (c == fmt->quote) {
                        quoted = 1;
                        continue;
-               } else if (c == in_format->fs && !quoted) {
+               } else if (c == fmt->fs && !quoted) {
                        ensure_field(i);
                        new_field(i);
                        continue;
@@ -177,40 +304,41 @@ static int is_ws(int c)
        return (c == ' ' || c == '\t' || c == '\f');
 }
 
-static void csv_write(void)
+static void csv_write(struct format *fmt)
 {
-       unsigned char *line = line_first(&in_line);
-       int n = fields_count(&out_fields);
-       for (int i=0; i<n; i++) {
-               struct field *f = fields_nth(&out_fields, i);
+       for (int i=0; i < fields_count(&out_fields); i++) {
+               int len;
+               unsigned char *p = get_field(&out_fields, i, &len);
+
                int need_quotes = 0;
-               if (out_format->quote >= 0) {
-                       need_quotes = out_format->always_quote;
-                       for (int j=0; !need_quotes && j < f->len; j++) {
-                               int c = line[f->start_pos + j];
-                               if (c == out_format->fs || c == out_format->quote)
+               if (fmt->quote >= 0) {
+                       need_quotes = fmt->always_quote;
+                       for (int j=0; !need_quotes && j < len; j++) {
+                               if (p[j] == fmt->fs || p[j] == fmt->quote)
                                        need_quotes = 1;
                        }
                }
                if (i)
-                       putchar(out_format->fs);
+                       putchar_unlocked(fmt->fs);
                if (need_quotes)
-                       putchar(out_format->quote);
-               for (int j=0; j < f->len; j++) {
-                       int c = line[f->start_pos + j];
-                       if (c == out_format->fs && !need_quotes)
-                               warn(out_format, "Field separator found inside field and quoting is turned off.");
-                       if (c == out_format->quote)
-                               putchar(c);
-                       putchar(c);
+                       putchar_unlocked(fmt->quote);
+               for (int j=0; j < len; j++) {
+                       int c = p[j];
+                       if (c == fmt->fs && !need_quotes)
+                               warn(fmt, "Field separator found inside field and quoting is turned off.");
+                       if (c == fmt->quote)
+                               putchar_unlocked(c);
+                       putchar_unlocked(c);
                }
                if (need_quotes)
-                       putchar(out_format->quote);
+                       putchar_unlocked(fmt->quote);
        }
-       putchar('\n');
+       putchar_unlocked('\n');
 }
 
-static int ws_read(void)
+/*** White-space back-end ***/
+
+static int ws_read(struct format *fmt)
 {
        if (!next_line())
                return 0;
@@ -230,7 +358,7 @@ static int ws_read(void)
                        if (ws) {
                                if (!in_field->start_pos &&
                                    !in_field->len &&
-                                   !in_format->strict_ws)
+                                   fmt->sloppy)
                                        in_field->start_pos = i;
                                else
                                        new_field(i);
@@ -240,11 +368,13 @@ static int ws_read(void)
                }
        }
 
-       if (ws && in_format->strict_ws)
+       if (ws && !fmt->sloppy)
                new_field(n);
        return 1;
 }
 
+/*** Regex back-end ***/
+
 static const char *regex_set(struct format *f, char *rx)
 {
        const char *err;
@@ -260,7 +390,7 @@ static const char *regex_set(struct format *f, char *rx)
        return NULL;
 }
 
-static int regex_read(void)
+static int regex_read(struct format *fmt)
 {
        if (!next_line())
                return 0;
@@ -273,21 +403,143 @@ static int regex_read(void)
        int i = 0;
        for (;;) {
                int ovec[3];
-               int sep = pcre_exec(in_format->pcre, in_format->pcre_extra, (char *) c, n, i, 0, ovec, 3);
-               if (sep < 0) {
-                       if (sep != PCRE_ERROR_NOMATCH)
-                               warn(in_format, "PCRE matching error %d", sep);
+               int err = pcre_exec(fmt->pcre, fmt->pcre_extra, (char *) c, n, i, 0, ovec, 3);
+               if (err < 0) {
+                       if (err != PCRE_ERROR_NOMATCH)
+                               warn(fmt, "PCRE matching error %d", err);
                        // No further occurrence of the separator: the rest is a single field
+                       if (!fmt->sloppy || i < n) {
+                               new_field(i);
+                               in_field->len = n - i;
+                       }
+                       return 1;
+               }
+               if (ovec[0] == ovec[1]) {
+                       warn(fmt, "Regular expression matched an empty separator.");
                        new_field(i);
                        in_field->len = n - i;
                        return 1;
                }
-               new_field(i);
-               in_field->len = ovec[0] - i;
+               if (!fmt->sloppy || ovec[0]) {
+                       new_field(i);
+                       in_field->len = ovec[0] - i;
+               }
                i = ovec[1];
        }
 }
 
+/*** Table back-end ***/
+
+static void table_write(struct format *fmt)
+{
+       for (int i = 0; i < intarray_count(&column_widths); i++) {
+               if (fmt->table_grid) {
+                       putchar_unlocked('|');
+                       printf("%*s", fmt->table_sep / 2, "");
+               } else if (i)
+                       printf("%*s", fmt->table_sep, "");
+
+               int cw = *intarray_nth(&column_widths, i);
+               int fw = 0;
+               if (i < fields_count(&out_fields)) {
+                       int len;
+                       unsigned char *p = get_field(&out_fields, i, &len);
+                       fw = field_chars(fields_nth(&out_fields, i));
+                       if (fw > cw) {
+                               warn(fmt, "Internal error: Wrongly calculated width of column %d (%d > %d)", i, fw, cw);
+                               cw = fw;
+                       }
+                       while (len--)
+                               putchar_unlocked(*p++);
+               }
+               while (fw < cw) {
+                       putchar_unlocked(' ');
+                       fw++;
+               }
+
+               if (fmt->table_grid)
+                       printf("%*s", fmt->table_sep - fmt->table_sep / 2, "");
+       }
+
+       if (fmt->table_grid)
+               putchar_unlocked('|');
+       putchar_unlocked('\n');
+}
+
+static void table_write_grid(struct format *fmt, int pos UNUSED)
+{
+       if (!fmt->table_grid)
+               return;
+
+       for (int i = 0; i < intarray_count(&column_widths); i++) {
+               putchar_unlocked('+');
+               int w = fmt->table_sep + *intarray_nth(&column_widths, i);
+               while (w--)
+                       putchar_unlocked('-');
+       }
+       putchar_unlocked('+');
+       putchar_unlocked('\n');
+}
+
+/*** Temporary file back-end ***/
+
+static int tmp_read(struct format *fmt)
+{
+       FILE *tf = fmt->tmp_file;
+
+       for (;;) {
+               int c = getc_unlocked(tf);
+               if (c < 0)
+                       return 0;
+               if (c == 0xff)
+                       return 1;
+               if (c == 0xfe) {
+                       c = getc_unlocked(tf);
+                       c = (c << 8) | getc_unlocked(tf);
+                       c = (c << 8) | getc_unlocked(tf);
+                       c = (c << 8) | getc_unlocked(tf);
+               }
+               new_field(line_count(&in_line));
+               in_field->len = c;
+               while (c--) {
+                       int x = getc_unlocked(tf);
+                       if (x < 0)
+                               die("Truncated temporary file");
+                       *line_push(&in_line) = x;
+               }
+       }
+
+       if (ferror_unlocked(tf))
+               die("I/O error when reading temporary file");
+}
+
+static void tmp_write(struct format *fmt)
+{
+       FILE *tf = fmt->tmp_file;
+
+       for (int i = 0; i < fields_count(&out_fields); i++) {
+               int len;
+               unsigned char *p = get_field(&out_fields, i, &len);
+
+               if (len < 0xfe)
+                       putc_unlocked(len, tf);
+               else {
+                       putc_unlocked(0xfe, tf);
+                       putc_unlocked((len >> 24) & 0xff, tf);
+                       putc_unlocked((len >> 16) & 0xff, tf);
+                       putc_unlocked((len >> 8) & 0xff, tf);
+                       putc_unlocked(len & 0xff, tf);
+               }
+
+               while (len--)
+                       putc_unlocked(*p++, tf);
+       }
+       putc_unlocked(0xff, tf);
+
+       if (ferror_unlocked(tf))
+               die("I/O error when writing temporary file");
+}
+
 /*** Transforms ***/
 
 static void trim_fields(void)
@@ -302,15 +554,166 @@ static void trim_fields(void)
        }
 }
 
+static void equalize_fields(void)
+{
+       while (fields_count(&out_fields) < intarray_count(&column_widths)) {
+               struct field *f = fields_push(&out_fields);
+               f->start_pos = f->len = 0;
+       }
+}
+
+/*** Field names and headers ***/
+
+struct field_names {
+       stringarray_t names;
+};
+
+static void add_field(struct field_names *fn, char *name, int namelen)
+{
+       char *n = xmalloc(namelen + 1);
+       memcpy(n, name, namelen);
+       n[namelen] = 0;
+       *stringarray_push(&fn->names) = n;
+}
+
+static void add_field_names(struct field_names *fn, char *names)
+{
+       char *p = names;
+       while (p) {
+               char *q = strchr(p, ',');
+               int len = q ? q-p : (int) strlen(p);
+               add_field(fn, p, len);
+               p = q ? q+1 : NULL;
+       }
+}
+
+static void read_header(void)
+{
+       if (!(in_format->has_header || in_format->set_field_names))
+               return;
+
+       struct field_names *fn = xmalloc_zero(sizeof(*fn));
+       in_format->field_names = fn;
+
+       if (in_format->has_header) {
+               if (!read_line())
+                       die("Missing input header");
+       }
+
+       if (in_format->set_field_names) {
+               add_field_names(fn, in_format->set_field_names);
+       } else {
+               for (int i = 0; i < fields_count(&in_fields); i++) {
+                       int len;
+                       char *s = (char *) get_field(&in_fields, i, &len);
+                       add_field(fn, s, len);
+               }
+       }
+}
+
+static void write_header(void)
+{
+       if (!out_format->has_header) {
+               write_grid(-1);
+               return;
+       }
+
+       int want_select_fields = 0;
+       if (out_format->set_field_names) {
+               struct field_names *fn = xmalloc_zero(sizeof(*fn));
+               out_format->field_names = fn;
+               add_field_names(fn, out_format->set_field_names);
+       } else if (in_format->field_names) {
+               out_format->field_names = in_format->field_names;
+               want_select_fields = 1;
+       } else
+               die("Output header requested, but no field names specified");
+
+       line_reset(&in_line);
+       fields_reset(&in_fields);
+       struct field_names *fn = out_format->field_names;
+       for (int i = 0; i < stringarray_count(&fn->names); i++) {
+               struct field *f = fields_push(&in_fields);
+               f->start_pos = line_count(&in_line);
+               f->len = 0;
+               char *s = *stringarray_nth(&fn->names, i);
+               while (*s) {
+                       *line_push(&in_line) = *s++;
+                       f->len++;
+               }
+       }
+
+       fields_reset(&out_fields);
+       if (want_select_fields)
+               select_fields();
+       else
+               select_all_fields();
+
+       // This is tricky: when we are formatting a table, field names are normally
+       // calculated in pass 1, but the header is written in pass 2, so we have to
+       // update column statistics, because field name can be too wide to fit.
+       want_stats++;
+       update_stats();
+       want_stats--;
+       if (want_equalize)
+               equalize_fields();
+       write_grid(-1);
+       write_line();
+       write_grid(0);
+}
+
+static void write_footer(void)
+{
+       write_grid(1);
+}
+
+static int find_field_by_name(struct field_names *fn, char *name)
+{
+       for (int i = 0; i < stringarray_count(&fn->names); i++)
+               if (!strcmp(*stringarray_nth(&fn->names, i), name))
+                       return i + 1;
+       return -1;
+}
+
 /*** Field selection ***/
 
 struct selector {
-       int first_field, last_field;
+       int first_field, last_field;            // 0 means "boundary"
 };
 
 DECLARE_BUF(selectors, struct selector);
 static selectors_t selectors;
 
+static int parse_field_num(char *str)
+{
+       int f = 0;
+
+       while (*str) {
+               if (*str < '0' || *str > '9')
+                       return -1;
+               if (f >= 100000000)
+                       return -1;
+               f = 10*f + *str - '0';
+               str++;
+       }
+       return f;
+}
+
+static int parse_field(char *str)
+{
+       if (!*str)
+               return 0;
+
+       int f = parse_field_num(str);
+       if (f > 0)
+               return f;
+
+       if (in_format->field_names && (f = find_field_by_name(in_format->field_names, str)) > 0)
+               return f;
+
+       die("Unknown field `%s'", str);
+}
+
 static char *parse_selector(char *str)
 {
        char buf[strlen(str) + 1];
@@ -320,10 +723,10 @@ static char *parse_selector(char *str)
        char *sep = strchr(buf, '-');
        if (sep) {
                *sep++ = 0;
-               s->first_field = atoi(buf);
-               s->last_field = atoi(sep);
+               s->first_field = parse_field(buf);
+               s->last_field = parse_field(sep);
        } else
-               s->first_field = s->last_field = atoi(buf);
+               s->first_field = s->last_field = parse_field(buf);
 
        return NULL;
 }
@@ -354,32 +757,101 @@ static void select_fields(void)
        }
 }
 
+static void select_all_fields(void)
+{
+       for (int i = 0; i < fields_count(&in_fields); i++)
+               *fields_push(&out_fields) = *fields_nth(&in_fields, i);
+}
+
+/*** Processing of files ***/
+
+static void one_pass(int pass)
+{
+       if (pass & 2)
+               write_header();
+
+       for (;;) {
+               line_number++;
+               if (!read_line())
+                       break;
+
+               if (want_trim && (pass & 1))
+                       trim_fields();
+
+               fields_reset(&out_fields);
+               if (pass & 1)
+                       select_fields();
+               else
+                       select_all_fields();
+
+               if (want_equalize && (pass & 2))
+                       equalize_fields();
+               update_stats();
+               write_line();
+       }
+
+       if (pass & 2)
+               write_footer();
+}
+
+static void two_pass(void)
+{
+       struct format *final_format = out_format;
+
+       // We need to use character set info from the current locale
+       setlocale(LC_CTYPE, "");
+
+       // Pass 1: Set up writer of intermediate format
+       out_format = xmalloc_zero(sizeof(*out_format));
+       out_format->id = FORM_TMP;
+       out_format->read_line = tmp_read;
+       out_format->write_line = tmp_write;
+       out_format->tmp_file = tmpfile();
+       out_format->field_names = in_format->field_names;
+       one_pass(1);
+
+       // Pass 2: Set up reader of intermediate format
+       in_format = out_format;
+       rewind(in_format->tmp_file);
+       line_number = 0;
+       out_format = final_format;
+       want_stats = 0;
+       one_pass(2);
+       fclose(in_format->tmp_file);
+}
+
 /*** Parsing of arguments ***/
 
-static void usage(void)
+static void NONRET usage(void)
 {
        printf("\
 Usage: xsv <in-format> [<out-format>] <options> [<fields>]\n\
 \n\
 Formats:\n\
--t, --tsv              TAB-separated values (default)\n\
+-t, --tsv              Tab-separated values (default)\n\
 -c, --csv              Comma-separated values\n\
 -w, --ws               Values separated by arbitrary whitespace\n\
--W, --strict-ws                Like --ws, but recognize empty columns at start/end\n\
 -r, --regex=<rx>       Separator given by Perl regular expression (input only)\n\
+    --table            Format a table (output only)\n\
 \n\
 Format parameters:\n\
 -d, --fs=<char>                Delimiter of fields\n\
+-f, --fields=<f>,...   Set field names\n\
+-h, --header           The first line contains field names\n\
 -q, --quiet            Do not show warnings\n\
     --always-quote     Put quotes around all fields (CSV output only)\n\
+    --table-sep=<n>    Separate table columns by <n> spaces (default: 2)\n\
+    --grid             Separate table columns by grid lines\n\
+-s, --sloppy           Ignore separators at the start/end of line (ws/regex only)\n\
 \n\
 Other options:\n\
     --trim             Trim leading and trailing whitespaces in fields\n\
+    --equalize         Pad all lines to the maximum number of fields\n\
 ");
        exit(0);
 }
 
-static void bad_args(const char *msg, ...)
+static void NONRET bad_args(const char *msg, ...)
 {
        if (msg) {
                va_list args;
@@ -393,32 +865,43 @@ static void bad_args(const char *msg, ...)
        exit(1);
 }
 
-static const char short_options[] = "cd:qr:twW";
+static const char short_options[] = "cd:f:hqr:twW";
 
 enum long_options {
        OPT_HELP = 256,
+       OPT_VERSION,
        OPT_TRIM,
        OPT_ALWAYS_QUOTE,
+       OPT_TABLE,
+       OPT_TABLE_SEP,
+       OPT_GRID,
+       OPT_EQUALIZE,
 };
 
 static const struct option long_options[] = {
        { "always-quote",       0,      NULL,   OPT_ALWAYS_QUOTE },
        { "csv",                0,      NULL,   'c' },
+       { "equalize",           0,      NULL,   OPT_EQUALIZE },
+       { "fields",             1,      NULL,   'f' },
        { "fs",                 1,      NULL,   'd' },
+       { "grid",               0,      NULL,   OPT_GRID },
+       { "header",             0,      NULL,   'h' },
+       { "help",               0,      NULL,   OPT_HELP },
        { "quiet",              0,      NULL,   'q' },
        { "regex",              1,      NULL,   'r' },
-       { "strict-ws",          0,      NULL,   'W' },
+       { "sloppy",             0,      NULL,   's' },
+       { "table",              0,      NULL,   OPT_TABLE },
+       { "table-sep",          1,      NULL,   OPT_TABLE_SEP },
        { "trim",               0,      NULL,   OPT_TRIM },
        { "tsv",                0,      NULL,   't' },
+       { "version",            0,      NULL,   OPT_VERSION },
        { "ws",                 0,      NULL,   'w' },
-       { "help",               0,      NULL,   OPT_HELP },
        { NULL,                 0,      NULL,   0 },
 };
 
 static void set_format(int format_id)
 {
-       struct format *f = xmalloc(sizeof(*f));
-       memset(f, 0, sizeof(*f));
+       struct format *f = xmalloc_zero(sizeof(*f));
        f->id = format_id;
 
        switch (format_id) {
@@ -443,6 +926,12 @@ static void set_format(int format_id)
                case FORM_REGEX:
                        f->read_line = regex_read;
                        break;
+               case FORM_TABLE:
+                       f->write_line = table_write;
+                       f->write_grid = table_write_grid;
+                       f->needs_stats = 1;
+                       f->table_sep = 2;
+                       break;
        }
 
        if (!in_format)
@@ -450,7 +939,7 @@ static void set_format(int format_id)
        else if (!out_format)
                out_format = f;
        else
-               bad_args("At most two format may be given.");
+               bad_args("At most two formats may be given.");
 }
 
 static struct format *current_format(void)
@@ -466,7 +955,6 @@ static struct format *current_format(void)
 int main(int argc, char **argv)
 {
        int opt;
-       int want_trim = 0;
        const char *err;
 
        while ((opt = getopt_long(argc, argv, short_options, long_options, NULL)) >= 0)
@@ -480,6 +968,12 @@ int main(int argc, char **argv)
                                else
                                        bad_args("No field delimiter given.");
                                break;
+                       case 'f':
+                               current_format()->set_field_names = optarg;
+                               break;
+                       case 'h':
+                               current_format()->has_header = 1;
+                               break;
                        case 'q':
                                current_format()->quiet = 1;
                                break;
@@ -489,26 +983,42 @@ int main(int argc, char **argv)
                                if (err)
                                        bad_args("Error compiling regex: %s", err);
                                break;
+                       case 's':
+                               if (current_format()->id != FORM_WS && current_format()->id != FORM_REGEX)
+                                       bad_args("--sloppy makes sense only for --ws or --regex.");
+                               current_format()->sloppy = 1;
+                               break;
                        case 't':
                                set_format(FORM_TSV);
                                break;
                        case 'w':
                                set_format(FORM_WS);
                                break;
-                       case 'W':
-                               set_format(FORM_WS);
-                               current_format()->strict_ws = 1;
-                               break;
                        case OPT_ALWAYS_QUOTE:
                                if (current_format()->id != FORM_CSV)
-                                       bad_args("--always-quote makes sense only for CSV.");
+                                       bad_args("--always-quote makes sense only for --csv.");
                                current_format()->always_quote = 1;
                                break;
                        case OPT_HELP:
                                usage();
+                       case OPT_VERSION:
+                               puts("This is xsv version " VERSION ".");
+                               exit(0);
                        case OPT_TRIM:
                                want_trim = 1;
                                break;
+                       case OPT_TABLE:
+                               set_format(FORM_TABLE);
+                               break;
+                       case OPT_TABLE_SEP:
+                               current_format()->table_sep = atoi(optarg);
+                               break;
+                       case OPT_GRID:
+                               current_format()->table_grid = 1;
+                               break;
+                       case OPT_EQUALIZE:
+                               want_equalize = 1;
+                               break;
                        default:
                                bad_args(NULL);
                }
@@ -520,6 +1030,7 @@ int main(int argc, char **argv)
                bad_args("Write-only format selected for input.");
        if (!out_format->write_line)
                bad_args("Read-only format selected for output.");
+       read_header();
 
        for (int i = optind; i < argc; i++) {
                err = parse_selector(argv[i]);
@@ -528,26 +1039,10 @@ int main(int argc, char **argv)
        }
        finish_parse_selectors();
 
-       fields_init(&in_fields);
-       fields_init(&out_fields);
-       line_init(&in_line);
-
-       for (;;) {
-               line_number++;
-               fields_reset(&in_fields);
-               line_reset(&in_line);
-               in_field = NULL;
-               if (!in_format->read_line())
-                       break;
-
-               if (want_trim)
-                       trim_fields();
-
-               fields_reset(&out_fields);
-               select_fields();
-
-               out_format->write_line();
-       }
-
+       want_stats = out_format->needs_stats | want_equalize;
+       if (want_stats)
+               two_pass();
+       else
+               one_pass(3);
        return 0;
 }