X-Git-Url: http://mj.ucw.cz/gitweb/?a=blobdiff_plain;f=xsv.c;h=0d2d972b055544ddab45ad06132ed09ea4e2b8c6;hb=b21be4364c6b246bb13f0e448d7969be8651e248;hp=38f1fdeac88088859fae44f2835f080f971a4fbe;hpb=f13d72b8bb0640b1b52ab7257377e202f7520c4e;p=xsv.git diff --git a/xsv.c b/xsv.c index 38f1fde..0d2d972 100644 --- a/xsv.c +++ b/xsv.c @@ -1,5 +1,5 @@ /* - * A Swiss-Army Knife for CSV-like Files + * The Swiss-Army Knife for CSV-like Files * * (c) 2012 Martin Mares */ @@ -7,27 +7,59 @@ #include #include #include +#include #include +#include +#include + +#include + +#ifdef __GNUC__ +#define NONRET __attribute__((noreturn)) +#define UNUSED __attribute__((unused)) +#else +#define NONRET +#define UNUSED +#endif + +static void select_fields(void); +static void select_all_fields(void); + +/*** General functions ***/ + +static void NONRET die(char *msg, ...) +{ + va_list args; + va_start(args, msg); + fprintf(stderr, "xsv: "); + vfprintf(stderr, msg, args); + fputc('\n', stderr); + va_end(args); + exit(1); +} /*** Memory allocation ***/ static void *xmalloc(size_t bytes) { void *p = malloc(bytes); - if (!p) { - fprintf(stderr, "xsv: Out of memory (cannot allocate %zu bytes)\n", bytes); - exit(1); - } + if (!p) + die("Out of memory (cannot allocate %zu bytes)", bytes); + return p; +} + +static void *xmalloc_zero(size_t bytes) +{ + void *p = xmalloc(bytes); + memset(p, 0, bytes); return p; } static void *xrealloc(void *old, size_t bytes) { void *p = realloc(old, bytes); - if (!p) { - fprintf(stderr, "xsv: Out of memory (cannot allocate %zu bytes)\n", bytes); - exit(1); - } + if (!p) + die("Out of memory (cannot allocate %zu bytes)", bytes); return p; } @@ -44,9 +76,13 @@ static void *xrealloc(void *old, size_t bytes) if (b->count >= b->max) name##_extend(b); \ return &b->start[b->count++]; \ } \ + static inline type *name##_first(name##_t *b) { return b->start; } \ static inline type *name##_nth(name##_t *b, int n) { return &b->start[n]; } \ // end +DECLARE_BUF(intarray, int); +DECLARE_BUF(stringarray, char *); + /*** Formats and their parameters ***/ enum format_id { @@ -54,17 +90,44 @@ enum format_id { FORM_TSV, FORM_CSV, FORM_WS, + FORM_REGEX, + FORM_TMP, + FORM_TABLE, }; struct format { enum format_id id; int fs; int quote; - int (*read_line)(void); - void (*write_line)(void); + int quiet; + int sloppy; + int (*read_line)(struct format *fmt); + void (*write_line)(struct format *fmt); + void (*write_grid)(struct format *fmt, int pos); // -1=above, 1=below, 0=after header + int needs_stats; + + // Field names + int has_header; + char *set_field_names; + struct field_names *field_names; + + // CSV backend: + int always_quote; + + // regex backend: + pcre *pcre; + pcre_extra *pcre_extra; + + // Temporary file backend: + FILE *tmp_file; + + // Table backend: + int table_sep; + int table_grid; }; static struct format *in_format, *out_format; +static int want_trim, want_equalize, want_stats; struct field { int start_pos; @@ -77,123 +140,580 @@ DECLARE_BUF(line, unsigned char); static fields_t in_fields, out_fields; static struct field *in_field; static line_t in_line; +static int line_number; + +static int read_line(void) +{ + fields_reset(&in_fields); + line_reset(&in_line); + in_field = NULL; + if (!in_format->read_line(in_format)) + return 0; + if (ferror_unlocked(stdin)) + die("I/O error when reading standard input"); + return 1; +} + +static void write_line(void) +{ + out_format->write_line(out_format); + if (ferror_unlocked(stdout)) + die("I/O error when writing standard input"); +} + +static void write_grid(int pos) +{ + if (out_format->write_grid) { + out_format->write_grid(out_format, pos); + if (ferror_unlocked(stdout)) + die("I/O error when writing standard input"); + } +} -static void new_field(void) +static void new_field(int pos) { in_field = fields_push(&in_fields); - in_field->start_pos = line_count(&in_line); + in_field->start_pos = pos; in_field->len = 0; } -static void ensure_field(void) +static void ensure_field(int pos) { if (!in_field) - new_field(); + new_field(pos); } -static int csv_read(void) +static unsigned char *get_field(fields_t *fields, int i, int *len) +{ + struct field *f = fields_nth(fields, i); + *len = f->len; + return line_nth(&in_line, f->start_pos); +} + +static void warn(struct format *fmt, char *msg, ...) +{ + if (!fmt->quiet) { + fprintf(stderr, "Warning at line %d: ", line_number); + va_list args; + va_start(args, msg); + vfprintf(stderr, msg, args); + va_end(args); + fputc('\n', stderr); + } +} + +static int next_line(void) { - int quoted = 0; - // FIXME: Complain if closing quote is missing? for (;;) { - int c = getchar(); -restart: - if (c < 0) - return !!fields_count(&in_fields); + int c = getchar_unlocked(); if (c == '\r') continue; + if (c < 0) + return !!line_count(&in_line); if (c == '\n') return 1; + *line_push(&in_line) = c; + } +} + +static int field_chars(struct field *f) +{ + unsigned char *s = line_nth(&in_line, f->start_pos); + int i = 0; + mbstate_t mbs; + memset(&mbs, 0, sizeof(mbs)); + + int chars = 0; + while (i < f->len) { + size_t k = mbrlen((char *) s + i, f->len - i, &mbs); + if ((int) k <= 0) + break; + i += k; + chars++; + } + + return chars; +} + +/*** Field statistics ***/ + +static intarray_t column_widths; + +static void update_stats(void) +{ + if (!want_stats) + return; + + for (int i = 0; i < fields_count(&out_fields); i++) { + struct field *f = fields_nth(&out_fields, i); + intarray_t *w = &column_widths; + + while (i >= intarray_count(w)) + *intarray_push(w) = 0; + int fw = field_chars(f); + if (*intarray_nth(w, i) < fw) + *intarray_nth(w, i) = fw; + } +} + +/*** CSV/TSV back-end */ + +static int csv_read(struct format *fmt) +{ + int quoted = 0; + for (;;) { + int c = getchar_unlocked(); + int i = line_count(&in_line); +restart: + if (c == '\r') + continue; + if (c < 0 || c == '\n') { + if (quoted) + warn(fmt, "Missing closing quote."); + if (c < 0) + return !!fields_count(&in_fields); + else + return 1; + } if (quoted) { - if (c == in_format->quote) { - c = getchar(); - if (c != in_format->quote) { + if (c == fmt->quote) { + c = getchar_unlocked(); + if (c != fmt->quote) { quoted = 0; goto restart; } // Two quotes assimilate to one } // Fall through to pushing the character - } else if (c == in_format->quote) { + } else if (c == fmt->quote) { quoted = 1; continue; - } else if (c == in_format->fs && !quoted) { - ensure_field(); - new_field(); + } else if (c == fmt->fs && !quoted) { + ensure_field(i); + new_field(i); continue; } - ensure_field(); + ensure_field(i); *line_push(&in_line) = c; in_field->len++; } } -static void csv_write(void) +static int is_ws(int c) { - unsigned char *line = line_nth(&in_line, 0); - int n = fields_count(&out_fields); - for (int i=0; iquote >= 0) { - for (int j=0; j < f->len; j++) { - int c = line[f->start_pos + j]; - if (c == out_format->fs || c == out_format->quote) { + if (fmt->quote >= 0) { + need_quotes = fmt->always_quote; + for (int j=0; !need_quotes && j < len; j++) { + if (p[j] == fmt->fs || p[j] == fmt->quote) need_quotes = 1; - break; - } } } if (i) - putchar(out_format->fs); + putchar_unlocked(fmt->fs); if (need_quotes) - putchar(out_format->quote); - for (int j=0; j < f->len; j++) { - int c = line[f->start_pos + j]; - if (c == out_format->quote) - putchar(c); - putchar(c); + putchar_unlocked(fmt->quote); + for (int j=0; j < len; j++) { + int c = p[j]; + if (c == fmt->fs && !need_quotes) + warn(fmt, "Field separator found inside field and quoting is turned off."); + if (c == fmt->quote) + putchar_unlocked(c); + putchar_unlocked(c); } if (need_quotes) - putchar(out_format->quote); + putchar_unlocked(fmt->quote); } - putchar('\n'); + putchar_unlocked('\n'); } -static int ws_read(void) +/*** White-space back-end ***/ + +static int ws_read(struct format *fmt) { + if (!next_line()) + return 0; + + unsigned char *line = line_first(&in_line); + int n = line_count(&in_line); + if (!n) + return 1; + int ws = 0; - for (;;) { - int c = getchar(); - if (c < 0) - return !!fields_count(&in_fields); - if (c == '\r') - continue; - if (c == '\n') - return 1; - if (c == ' ' || c == '\t' || c == '\f') { - ensure_field(); - if (!ws) - new_field(); + new_field(0); + for (int i=0; istart_pos && + !in_field->len && + fmt->sloppy) + in_field->start_pos = i; + else + new_field(i); + ws = 0; + } in_field->len++; - ws = 0; } } + + if (ws && !fmt->sloppy) + new_field(n); + return 1; +} + +/*** Regex back-end ***/ + +static const char *regex_set(struct format *f, char *rx) +{ + const char *err; + int errpos; + f->pcre = pcre_compile(rx, PCRE_DOLLAR_ENDONLY, &err, &errpos, NULL); + if (!f->pcre) + return err; + + f->pcre_extra = pcre_study(f->pcre, 0, &err); + if (!f->pcre_extra) + return err; + + return NULL; +} + +static int regex_read(struct format *fmt) +{ + if (!next_line()) + return 0; + + unsigned char *c = line_first(&in_line); + int n = line_count(&in_line); + if (!n) + return 1; + + int i = 0; + for (;;) { + int ovec[3]; + int err = pcre_exec(fmt->pcre, fmt->pcre_extra, (char *) c, n, i, 0, ovec, 3); + if (err < 0) { + if (err != PCRE_ERROR_NOMATCH) + warn(fmt, "PCRE matching error %d", err); + // No further occurrence of the separator: the rest is a single field + if (!fmt->sloppy || i < n) { + new_field(i); + in_field->len = n - i; + } + return 1; + } + if (ovec[0] == ovec[1]) { + warn(fmt, "Regular expression matched an empty separator."); + new_field(i); + in_field->len = n - i; + return 1; + } + if (!fmt->sloppy || ovec[0]) { + new_field(i); + in_field->len = ovec[0] - i; + } + i = ovec[1]; + } +} + +/*** Table back-end ***/ + +static void table_write(struct format *fmt) +{ + for (int i = 0; i < intarray_count(&column_widths); i++) { + if (fmt->table_grid) { + putchar_unlocked('|'); + printf("%*s", fmt->table_sep / 2, ""); + } else if (i) + printf("%*s", fmt->table_sep, ""); + + int cw = *intarray_nth(&column_widths, i); + int fw = 0; + if (i < fields_count(&out_fields)) { + int len; + unsigned char *p = get_field(&out_fields, i, &len); + fw = field_chars(fields_nth(&out_fields, i)); + if (fw > cw) { + warn(fmt, "Internal error: Wrongly calculated width of column %d (%d > %d)", i, fw, cw); + cw = fw; + } + while (len--) + putchar_unlocked(*p++); + } + while (fw < cw) { + putchar_unlocked(' '); + fw++; + } + + if (fmt->table_grid) + printf("%*s", fmt->table_sep - fmt->table_sep / 2, ""); + } + + if (fmt->table_grid) + putchar_unlocked('|'); + putchar_unlocked('\n'); +} + +static void table_write_grid(struct format *fmt, int pos UNUSED) +{ + if (!fmt->table_grid) + return; + + for (int i = 0; i < intarray_count(&column_widths); i++) { + putchar_unlocked('+'); + int w = fmt->table_sep + *intarray_nth(&column_widths, i); + while (w--) + putchar_unlocked('-'); + } + putchar_unlocked('+'); + putchar_unlocked('\n'); +} + +/*** Temporary file back-end ***/ + +static int tmp_read(struct format *fmt) +{ + FILE *tf = fmt->tmp_file; + + for (;;) { + int c = getc_unlocked(tf); + if (c < 0) + return 0; + if (c == 0xff) + return 1; + if (c == 0xfe) { + c = getc_unlocked(tf); + c = (c << 8) | getc_unlocked(tf); + c = (c << 8) | getc_unlocked(tf); + c = (c << 8) | getc_unlocked(tf); + } + new_field(line_count(&in_line)); + in_field->len = c; + while (c--) { + int x = getc_unlocked(tf); + if (x < 0) + die("Truncated temporary file"); + *line_push(&in_line) = x; + } + } + + if (ferror_unlocked(tf)) + die("I/O error when reading temporary file"); +} + +static void tmp_write(struct format *fmt) +{ + FILE *tf = fmt->tmp_file; + + for (int i = 0; i < fields_count(&out_fields); i++) { + int len; + unsigned char *p = get_field(&out_fields, i, &len); + + if (len < 0xfe) + putc_unlocked(len, tf); + else { + putc_unlocked(0xfe, tf); + putc_unlocked((len >> 24) & 0xff, tf); + putc_unlocked((len >> 16) & 0xff, tf); + putc_unlocked((len >> 8) & 0xff, tf); + putc_unlocked(len & 0xff, tf); + } + + while (len--) + putc_unlocked(*p++, tf); + } + putc_unlocked(0xff, tf); + + if (ferror_unlocked(tf)) + die("I/O error when writing temporary file"); +} + +/*** Transforms ***/ + +static void trim_fields(void) +{ + unsigned char *line = line_first(&in_line); + for (int i = 0; i < fields_count(&in_fields); i++) { + struct field *f = fields_nth(&in_fields, i); + while (f->len && is_ws(line[f->start_pos])) + f->start_pos++, f->len--; + while (f->len && is_ws(line[f->start_pos + f->len - 1])) + f->len--; + } +} + +static void equalize_fields(void) +{ + while (fields_count(&out_fields) < intarray_count(&column_widths)) { + struct field *f = fields_push(&out_fields); + f->start_pos = f->len = 0; + } +} + +/*** Field names and headers ***/ + +struct field_names { + stringarray_t names; +}; + +static void add_field(struct field_names *fn, char *name, int namelen) +{ + char *n = xmalloc(namelen + 1); + memcpy(n, name, namelen); + n[namelen] = 0; + *stringarray_push(&fn->names) = n; +} + +static void add_field_names(struct field_names *fn, char *names) +{ + char *p = names; + while (p) { + char *q = strchr(p, ','); + int len = q ? q-p : (int) strlen(p); + add_field(fn, p, len); + p = q ? q+1 : NULL; + } +} + +static void read_header(void) +{ + if (!(in_format->has_header || in_format->set_field_names)) + return; + + struct field_names *fn = xmalloc_zero(sizeof(*fn)); + in_format->field_names = fn; + + if (in_format->has_header) { + if (!read_line()) + die("Missing input header"); + } + + if (in_format->set_field_names) { + add_field_names(fn, in_format->set_field_names); + } else { + for (int i = 0; i < fields_count(&in_fields); i++) { + int len; + char *s = (char *) get_field(&in_fields, i, &len); + add_field(fn, s, len); + } + } +} + +static void write_header(void) +{ + if (!out_format->has_header) { + write_grid(-1); + return; + } + + int want_select_fields = 0; + if (out_format->set_field_names) { + struct field_names *fn = xmalloc_zero(sizeof(*fn)); + out_format->field_names = fn; + add_field_names(fn, out_format->set_field_names); + } else if (in_format->field_names) { + out_format->field_names = in_format->field_names; + want_select_fields = 1; + } else + die("Output header requested, but no field names specified"); + + line_reset(&in_line); + fields_reset(&in_fields); + struct field_names *fn = out_format->field_names; + for (int i = 0; i < stringarray_count(&fn->names); i++) { + struct field *f = fields_push(&in_fields); + f->start_pos = line_count(&in_line); + f->len = 0; + char *s = *stringarray_nth(&fn->names, i); + while (*s) { + *line_push(&in_line) = *s++; + f->len++; + } + } + + fields_reset(&out_fields); + if (want_select_fields) + select_fields(); + else + select_all_fields(); + + // This is tricky: when we are formatting a table, field names are normally + // calculated in pass 1, but the header is written in pass 2, so we have to + // update column statistics, because field name can be too wide to fit. + want_stats++; + update_stats(); + want_stats--; + if (want_equalize) + equalize_fields(); + write_grid(-1); + write_line(); + write_grid(0); +} + +static void write_footer(void) +{ + write_grid(1); +} + +static int find_field_by_name(struct field_names *fn, char *name) +{ + for (int i = 0; i < stringarray_count(&fn->names); i++) + if (!strcmp(*stringarray_nth(&fn->names, i), name)) + return i + 1; + return -1; } /*** Field selection ***/ struct selector { - int first_field, last_field; + int first_field, last_field; // 0 means "boundary" }; DECLARE_BUF(selectors, struct selector); static selectors_t selectors; +static int parse_field_num(char *str) +{ + int f = 0; + + while (*str) { + if (*str < '0' || *str > '9') + return -1; + if (f >= 100000000) + return -1; + f = 10*f + *str - '0'; + str++; + } + return f; +} + +static int parse_field(char *str) +{ + if (!*str) + return 0; + + int f = parse_field_num(str); + if (f > 0) + return f; + + if (in_format->field_names && (f = find_field_by_name(in_format->field_names, str)) > 0) + return f; + + die("Unknown field `%s'", str); +} + static char *parse_selector(char *str) { char buf[strlen(str) + 1]; @@ -203,10 +723,10 @@ static char *parse_selector(char *str) char *sep = strchr(buf, '-'); if (sep) { *sep++ = 0; - s->first_field = atoi(buf); - s->last_field = atoi(sep); + s->first_field = parse_field(buf); + s->last_field = parse_field(sep); } else - s->first_field = s->last_field = atoi(buf); + s->first_field = s->last_field = parse_field(buf); return NULL; } @@ -237,54 +757,151 @@ static void select_fields(void) } } +static void select_all_fields(void) +{ + for (int i = 0; i < fields_count(&in_fields); i++) + *fields_push(&out_fields) = *fields_nth(&in_fields, i); +} + +/*** Processing of files ***/ + +static void one_pass(int pass) +{ + if (pass & 2) + write_header(); + + for (;;) { + line_number++; + if (!read_line()) + break; + + if (want_trim && (pass & 1)) + trim_fields(); + + fields_reset(&out_fields); + if (pass & 1) + select_fields(); + else + select_all_fields(); + + if (want_equalize && (pass & 2)) + equalize_fields(); + update_stats(); + write_line(); + } + + if (pass & 2) + write_footer(); +} + +static void two_pass(void) +{ + struct format *final_format = out_format; + + // We need to use character set info from the current locale + setlocale(LC_CTYPE, ""); + + // Pass 1: Set up writer of intermediate format + out_format = xmalloc_zero(sizeof(*out_format)); + out_format->id = FORM_TMP; + out_format->read_line = tmp_read; + out_format->write_line = tmp_write; + out_format->tmp_file = tmpfile(); + out_format->field_names = in_format->field_names; + one_pass(1); + + // Pass 2: Set up reader of intermediate format + in_format = out_format; + rewind(in_format->tmp_file); + line_number = 0; + out_format = final_format; + want_stats = 0; + one_pass(2); + fclose(in_format->tmp_file); +} + /*** Parsing of arguments ***/ -static void usage(void) +static void NONRET usage(void) { printf("\ Usage: xsv [] []\n\ \n\ Formats:\n\ --t, --tsv TAB-separated values (default)\n\ +-t, --tsv Tab-separated values (default)\n\ -c, --csv Comma-separated values\n\ -w, --ws Values separated by arbitrary whitespace\n\ +-r, --regex= Separator given by Perl regular expression (input only)\n\ + --table Format a table (output only)\n\ \n\ Format parameters:\n\ --d, --fs= Delimiter of fields\n\ +-d, --fs= Delimiter of fields\n\ +-f, --fields=,... Set field names\n\ +-h, --header The first line contains field names\n\ +-q, --quiet Do not show warnings\n\ + --always-quote Put quotes around all fields (CSV output only)\n\ + --table-sep= Separate table columns by spaces (default: 2)\n\ + --grid Separate table columns by grid lines\n\ +-s, --sloppy Ignore separators at the start/end of line (ws/regex only)\n\ \n\ Other options:\n\ -(so far none)\n\ + --trim Trim leading and trailing whitespaces in fields\n\ + --equalize Pad all lines to the maximum number of fields\n\ "); exit(0); } -static void bad_args(char *msg) +static void NONRET bad_args(const char *msg, ...) { - if (msg) - fprintf(stderr, "xsv: %s\n", msg); + if (msg) { + va_list args; + va_start(args, msg); + fprintf(stderr, "xsv: "); + vfprintf(stderr, msg, args); + fputc('\n', stderr); + va_end(args); + } fprintf(stderr, "Try `xsv --help' for more information.\n"); exit(1); } -static const char short_options[] = "cd:tw"; +static const char short_options[] = "cd:f:hqr:twW"; enum long_options { OPT_HELP = 256, + OPT_VERSION, + OPT_TRIM, + OPT_ALWAYS_QUOTE, + OPT_TABLE, + OPT_TABLE_SEP, + OPT_GRID, + OPT_EQUALIZE, }; static const struct option long_options[] = { + { "always-quote", 0, NULL, OPT_ALWAYS_QUOTE }, { "csv", 0, NULL, 'c' }, + { "equalize", 0, NULL, OPT_EQUALIZE }, + { "fields", 1, NULL, 'f' }, { "fs", 1, NULL, 'd' }, + { "grid", 0, NULL, OPT_GRID }, + { "header", 0, NULL, 'h' }, + { "help", 0, NULL, OPT_HELP }, + { "quiet", 0, NULL, 'q' }, + { "regex", 1, NULL, 'r' }, + { "sloppy", 0, NULL, 's' }, + { "table", 0, NULL, OPT_TABLE }, + { "table-sep", 1, NULL, OPT_TABLE_SEP }, + { "trim", 0, NULL, OPT_TRIM }, { "tsv", 0, NULL, 't' }, + { "version", 0, NULL, OPT_VERSION }, { "ws", 0, NULL, 'w' }, - { "help", 0, NULL, OPT_HELP }, { NULL, 0, NULL, 0 }, }; static void set_format(int format_id) { - struct format *f = xmalloc(sizeof(*f)); - memset(f, 0, sizeof(*f)); + struct format *f = xmalloc_zero(sizeof(*f)); f->id = format_id; switch (format_id) { @@ -306,6 +923,15 @@ static void set_format(int format_id) f->read_line = ws_read; f->write_line = csv_write; break; + case FORM_REGEX: + f->read_line = regex_read; + break; + case FORM_TABLE: + f->write_line = table_write; + f->write_grid = table_write_grid; + f->needs_stats = 1; + f->table_sep = 2; + break; } if (!in_format) @@ -313,7 +939,7 @@ static void set_format(int format_id) else if (!out_format) out_format = f; else - bad_args("At most two format may be given."); + bad_args("At most two formats may be given."); } static struct format *current_format(void) @@ -329,6 +955,7 @@ static struct format *current_format(void) int main(int argc, char **argv) { int opt; + const char *err; while ((opt = getopt_long(argc, argv, short_options, long_options, NULL)) >= 0) switch (opt) { @@ -341,14 +968,57 @@ int main(int argc, char **argv) else bad_args("No field delimiter given."); break; + case 'f': + current_format()->set_field_names = optarg; + break; + case 'h': + current_format()->has_header = 1; + break; + case 'q': + current_format()->quiet = 1; + break; + case 'r': + set_format(FORM_REGEX); + err = regex_set(current_format(), optarg); + if (err) + bad_args("Error compiling regex: %s", err); + break; + case 's': + if (current_format()->id != FORM_WS && current_format()->id != FORM_REGEX) + bad_args("--sloppy makes sense only for --ws or --regex."); + current_format()->sloppy = 1; + break; case 't': set_format(FORM_TSV); break; case 'w': set_format(FORM_WS); break; + case OPT_ALWAYS_QUOTE: + if (current_format()->id != FORM_CSV) + bad_args("--always-quote makes sense only for --csv."); + current_format()->always_quote = 1; + break; case OPT_HELP: usage(); + case OPT_VERSION: + puts("This is xsv version " VERSION "."); + exit(0); + case OPT_TRIM: + want_trim = 1; + break; + case OPT_TABLE: + set_format(FORM_TABLE); + break; + case OPT_TABLE_SEP: + current_format()->table_sep = atoi(optarg); + break; + case OPT_GRID: + current_format()->table_grid = 1; + break; + case OPT_EQUALIZE: + want_equalize = 1; + break; default: bad_args(NULL); } @@ -356,30 +1026,23 @@ int main(int argc, char **argv) current_format(); if (!out_format) out_format = in_format; + if (!in_format->read_line) + bad_args("Write-only format selected for input."); + if (!out_format->write_line) + bad_args("Read-only format selected for output."); + read_header(); for (int i = optind; i < argc; i++) { - char *err = parse_selector(argv[i]); + err = parse_selector(argv[i]); if (err) bad_args(err); } finish_parse_selectors(); - fields_init(&in_fields); - fields_init(&out_fields); - line_init(&in_line); - - for (;;) { - fields_reset(&in_fields); - line_reset(&in_line); - in_field = NULL; - if (!in_format->read_line()) - break; - - fields_reset(&out_fields); - select_fields(); - - out_format->write_line(); - } - + want_stats = out_format->needs_stats | want_equalize; + if (want_stats) + two_pass(); + else + one_pass(3); return 0; }