From: Martin Mares Date: Tue, 24 Jul 2012 13:23:02 +0000 (+0200) Subject: Added support for headers and names fields X-Git-Tag: v1.0~29 X-Git-Url: http://mj.ucw.cz/gitweb/?a=commitdiff_plain;h=d745ce95bcb886cf1664e816601ca4fc11271e48;p=xsv.git Added support for headers and names fields --- diff --git a/xsv.c b/xsv.c index df08c57..31a13ab 100644 --- a/xsv.c +++ b/xsv.c @@ -78,6 +78,7 @@ static void *xrealloc(void *old, size_t bytes) // end DECLARE_BUF(intarray, int); +DECLARE_BUF(stringarray, char *); /*** Formats and their parameters ***/ @@ -100,6 +101,11 @@ struct format { void (*write_line)(struct format *fmt); int needs_stats; + // Field names + int has_header; + char *set_field_names; + struct field_names *field_names; + // CSV backend: int always_quote; @@ -133,6 +139,25 @@ static struct field *in_field; static line_t in_line; static int line_number; +static int read_line(void) +{ + fields_reset(&in_fields); + line_reset(&in_line); + in_field = NULL; + if (!in_format->read_line(in_format)) + return 0; + if (ferror_unlocked(stdin)) + die("I/O error when reading standard input"); + return 1; +} + +static void write_line(void) +{ + out_format->write_line(out_format); + if (ferror_unlocked(stdout)) + die("I/O error when writing standard input"); +} + static void new_field(int pos) { in_field = fields_push(&in_fields); @@ -146,6 +171,14 @@ static void ensure_field(int pos) new_field(pos); } +// FIXME: Use elsewhere +static unsigned char *get_field(fields_t *fields, int i, int *len) +{ + struct field *f = fields_nth(fields, i); + *len = f->len; + return line_nth(&in_line, f->start_pos); +} + static void warn(struct format *fmt, char *msg, ...) { if (!fmt->quiet) { @@ -471,15 +504,132 @@ static void trim_fields(void) } } +/*** Field names and headers ***/ + +struct field_names { + stringarray_t names; +}; + +static void add_field(struct field_names *fn, char *name, int namelen) +{ + char *n = xmalloc(namelen + 1); + memcpy(n, name, namelen); + n[namelen] = 0; + *stringarray_push(&fn->names) = n; +} + +static void add_field_names(struct field_names *fn, char *names) +{ + char *p = names; + while (p) { + char *q = strchr(p, ','); + int len = q ? q-p : (int) strlen(p); + add_field(fn, p, len); + p = q ? q+1 : NULL; + } +} + +static void read_header(void) +{ + if (!(in_format->has_header || in_format->set_field_names)) + return; + + struct field_names *fn = xmalloc_zero(sizeof(*fn)); + in_format->field_names = fn; + + if (in_format->has_header) { + if (!read_line()) + die("Missing input header"); + } + + if (in_format->set_field_names) { + add_field_names(fn, in_format->set_field_names); + } else { + for (int i = 0; i < fields_count(&in_fields); i++) { + int len; + char *s = (char *) get_field(&in_fields, i, &len); + add_field(fn, s, len); + } + } +} + +static void write_header(void) +{ + if (!out_format->has_header) + return; + + if (out_format->set_field_names) { + struct field_names *fn = xmalloc_zero(sizeof(*fn)); + out_format->field_names = fn; + add_field_names(fn, out_format->set_field_names); + } else if (in_format->field_names) + out_format->field_names = in_format->field_names; + else + die("Output header requested, but no field names specified"); + + line_reset(&in_line); + fields_reset(&out_fields); + struct field_names *fn = out_format->field_names; + for (int i = 0; i < stringarray_count(&fn->names); i++) { + struct field *f = fields_push(&out_fields); + f->start_pos = line_count(&in_line); + f->len = 0; + char *s = *stringarray_nth(&fn->names, i); + while (*s) { + *line_push(&in_line) = *s++; + f->len++; + } + } + write_line(); +} + +static int find_field_by_name(struct field_names *fn, char *name) +{ + for (int i = 0; i < stringarray_count(&fn->names); i++) + if (!strcmp(*stringarray_nth(&fn->names, i), name)) + return i + 1; + return -1; +} + /*** Field selection ***/ struct selector { - int first_field, last_field; + int first_field, last_field; // 0 means "boundary" }; DECLARE_BUF(selectors, struct selector); static selectors_t selectors; +static int parse_field_num(char *str) +{ + int f = 0; + + while (*str) { + if (*str < '0' || *str > '9') + return -1; + if (f >= 100000000) + return -1; + f = 10*f + *str - '0'; + str++; + } + return f; +} + +static int parse_field(char *str) +{ + if (!*str) + return 0; + + int f = parse_field_num(str); + if (f > 0) + return f; + + if (in_format->field_names && (f = find_field_by_name(in_format->field_names, str)) > 0) + return f; + + die("Unknown field %s", str); +} + static char *parse_selector(char *str) { char buf[strlen(str) + 1]; @@ -489,10 +639,10 @@ static char *parse_selector(char *str) char *sep = strchr(buf, '-'); if (sep) { *sep++ = 0; - s->first_field = atoi(buf); - s->last_field = atoi(sep); + s->first_field = parse_field(buf); + s->last_field = parse_field(sep); } else - s->first_field = s->last_field = atoi(buf); + s->first_field = s->last_field = parse_field(buf); return NULL; } @@ -533,16 +683,10 @@ static void select_all_fields(void) static void one_pass(int pass) { - line_number = 0; for (;;) { line_number++; - fields_reset(&in_fields); - line_reset(&in_line); - in_field = NULL; - if (!in_format->read_line(in_format)) + if (!read_line()) break; - if (ferror_unlocked(stdin)) - die("I/O error when reading standard input"); if (want_trim && (pass & 1)) trim_fields(); @@ -555,9 +699,8 @@ static void one_pass(int pass) if (out_format->needs_stats) update_stats(); - out_format->write_line(out_format); - if (ferror_unlocked(stdout)) - die("I/O error when writing standard input"); + + write_line(); } } @@ -580,6 +723,7 @@ static void two_pass(void) // Pass 2: Set up reader of intermediate format in_format = out_format; rewind(in_format->tmp_file); + line_number = 0; out_format = final_format; out_format->needs_stats = 0; one_pass(2); @@ -603,6 +747,8 @@ Formats:\n\ \n\ Format parameters:\n\ -d, --fs= Delimiter of fields\n\ +-f, --fields=,... Set field names\n\ +-h, --header The first line contains field names\n\ -q, --quiet Do not show warnings\n\ --always-quote Put quotes around all fields (CSV output only)\n\ --table-sep= Separate table columns by spaces (default: 2)\n\ @@ -627,7 +773,7 @@ static void NONRET bad_args(const char *msg, ...) exit(1); } -static const char short_options[] = "cd:qr:twW"; +static const char short_options[] = "cd:f:hqr:twW"; enum long_options { OPT_HELP = 256, @@ -640,7 +786,9 @@ enum long_options { static const struct option long_options[] = { { "always-quote", 0, NULL, OPT_ALWAYS_QUOTE }, { "csv", 0, NULL, 'c' }, + { "fields", 1, NULL, 'f' }, { "fs", 1, NULL, 'd' }, + { "header", 0, NULL, 'h' }, { "quiet", 0, NULL, 'q' }, { "regex", 1, NULL, 'r' }, { "strict-ws", 0, NULL, 'W' }, @@ -721,6 +869,12 @@ int main(int argc, char **argv) else bad_args("No field delimiter given."); break; + case 'f': + current_format()->set_field_names = optarg; + break; + case 'h': + current_format()->has_header = 1; + break; case 'q': current_format()->quiet = 1; break; @@ -767,6 +921,7 @@ int main(int argc, char **argv) bad_args("Write-only format selected for input."); if (!out_format->write_line) bad_args("Read-only format selected for output."); + read_header(); for (int i = optind; i < argc; i++) { err = parse_selector(argv[i]); @@ -775,6 +930,7 @@ int main(int argc, char **argv) } finish_parse_selectors(); + write_header(); if (out_format->needs_stats) two_pass(); else