]> mj.ucw.cz Git - xsv.git/commitdiff
Added support for headers and names fields
authorMartin Mares <mj@ucw.cz>
Tue, 24 Jul 2012 13:23:02 +0000 (15:23 +0200)
committerMartin Mares <mj@ucw.cz>
Tue, 24 Jul 2012 13:23:02 +0000 (15:23 +0200)
xsv.c

diff --git a/xsv.c b/xsv.c
index df08c570486ff7a775ba7a2e4a2c31ebb8acb150..31a13abc00482d571e3e30d9b4b9252672f2c81a 100644 (file)
--- a/xsv.c
+++ b/xsv.c
@@ -78,6 +78,7 @@ static void *xrealloc(void *old, size_t bytes)
        // end
 
 DECLARE_BUF(intarray, int);
+DECLARE_BUF(stringarray, char *);
 
 /*** Formats and their parameters ***/
 
@@ -100,6 +101,11 @@ struct format {
        void (*write_line)(struct format *fmt);
        int needs_stats;
 
+       // Field names
+       int has_header;
+       char *set_field_names;
+       struct field_names *field_names;
+
        // CSV backend:
        int always_quote;
 
@@ -133,6 +139,25 @@ static struct field *in_field;
 static line_t in_line;
 static int line_number;
 
+static int read_line(void)
+{
+       fields_reset(&in_fields);
+       line_reset(&in_line);
+       in_field = NULL;
+       if (!in_format->read_line(in_format))
+               return 0;
+       if (ferror_unlocked(stdin))
+               die("I/O error when reading standard input");
+       return 1;
+}
+
+static void write_line(void)
+{
+       out_format->write_line(out_format);
+       if (ferror_unlocked(stdout))
+               die("I/O error when writing standard input");
+}
+
 static void new_field(int pos)
 {
        in_field = fields_push(&in_fields);
@@ -146,6 +171,14 @@ static void ensure_field(int pos)
                new_field(pos);
 }
 
+// FIXME: Use elsewhere
+static unsigned char *get_field(fields_t *fields, int i, int *len)
+{
+       struct field *f = fields_nth(fields, i);
+       *len = f->len;
+       return line_nth(&in_line, f->start_pos);
+}
+
 static void warn(struct format *fmt, char *msg, ...)
 {
        if (!fmt->quiet) {
@@ -471,15 +504,132 @@ static void trim_fields(void)
        }
 }
 
+/*** Field names and headers ***/
+
+struct field_names {
+       stringarray_t names;
+};
+
+static void add_field(struct field_names *fn, char *name, int namelen)
+{
+       char *n = xmalloc(namelen + 1);
+       memcpy(n, name, namelen);
+       n[namelen] = 0;
+       *stringarray_push(&fn->names) = n;
+}
+
+static void add_field_names(struct field_names *fn, char *names)
+{
+       char *p = names;
+       while (p) {
+               char *q = strchr(p, ',');
+               int len = q ? q-p : (int) strlen(p);
+               add_field(fn, p, len);
+               p = q ? q+1 : NULL;
+       }
+}
+
+static void read_header(void)
+{
+       if (!(in_format->has_header || in_format->set_field_names))
+               return;
+
+       struct field_names *fn = xmalloc_zero(sizeof(*fn));
+       in_format->field_names = fn;
+
+       if (in_format->has_header) {
+               if (!read_line())
+                       die("Missing input header");
+       }
+
+       if (in_format->set_field_names) {
+               add_field_names(fn, in_format->set_field_names);
+       } else {
+               for (int i = 0; i < fields_count(&in_fields); i++) {
+                       int len;
+                       char *s = (char *) get_field(&in_fields, i, &len);
+                       add_field(fn, s, len);
+               }
+       }
+}
+
+static void write_header(void)
+{
+       if (!out_format->has_header)
+               return;
+
+       if (out_format->set_field_names) {
+               struct field_names *fn = xmalloc_zero(sizeof(*fn));
+               out_format->field_names = fn;
+               add_field_names(fn, out_format->set_field_names);
+       } else if (in_format->field_names)
+               out_format->field_names = in_format->field_names;
+       else
+               die("Output header requested, but no field names specified");
+
+       line_reset(&in_line);
+       fields_reset(&out_fields);
+       struct field_names *fn = out_format->field_names;
+       for (int i = 0; i < stringarray_count(&fn->names); i++) {
+               struct field *f = fields_push(&out_fields);
+               f->start_pos = line_count(&in_line);
+               f->len = 0;
+               char *s = *stringarray_nth(&fn->names, i);
+               while (*s) {
+                       *line_push(&in_line) = *s++;
+                       f->len++;
+               }
+       }
+       write_line();
+}
+
+static int find_field_by_name(struct field_names *fn, char *name)
+{
+       for (int i = 0; i < stringarray_count(&fn->names); i++)
+               if (!strcmp(*stringarray_nth(&fn->names, i), name))
+                       return i + 1;
+       return -1;
+}
+
 /*** Field selection ***/
 
 struct selector {
-       int first_field, last_field;
+       int first_field, last_field;            // 0 means "boundary"
 };
 
 DECLARE_BUF(selectors, struct selector);
 static selectors_t selectors;
 
+static int parse_field_num(char *str)
+{
+       int f = 0;
+
+       while (*str) {
+               if (*str < '0' || *str > '9')
+                       return -1;
+               if (f >= 100000000)
+                       return -1;
+               f = 10*f + *str - '0';
+               str++;
+       }
+       return f;
+}
+
+static int parse_field(char *str)
+{
+       if (!*str)
+               return 0;
+
+       int f = parse_field_num(str);
+       if (f > 0)
+               return f;
+
+       if (in_format->field_names && (f = find_field_by_name(in_format->field_names, str)) > 0)
+               return f;
+
+       die("Unknown field %s", str);
+}
+
 static char *parse_selector(char *str)
 {
        char buf[strlen(str) + 1];
@@ -489,10 +639,10 @@ static char *parse_selector(char *str)
        char *sep = strchr(buf, '-');
        if (sep) {
                *sep++ = 0;
-               s->first_field = atoi(buf);
-               s->last_field = atoi(sep);
+               s->first_field = parse_field(buf);
+               s->last_field = parse_field(sep);
        } else
-               s->first_field = s->last_field = atoi(buf);
+               s->first_field = s->last_field = parse_field(buf);
 
        return NULL;
 }
@@ -533,16 +683,10 @@ static void select_all_fields(void)
 
 static void one_pass(int pass)
 {
-       line_number = 0;
        for (;;) {
                line_number++;
-               fields_reset(&in_fields);
-               line_reset(&in_line);
-               in_field = NULL;
-               if (!in_format->read_line(in_format))
+               if (!read_line())
                        break;
-               if (ferror_unlocked(stdin))
-                       die("I/O error when reading standard input");
 
                if (want_trim && (pass & 1))
                        trim_fields();
@@ -555,9 +699,8 @@ static void one_pass(int pass)
 
                if (out_format->needs_stats)
                        update_stats();
-               out_format->write_line(out_format);
-               if (ferror_unlocked(stdout))
-                       die("I/O error when writing standard input");
+
+               write_line();
        }
 }
 
@@ -580,6 +723,7 @@ static void two_pass(void)
        // Pass 2: Set up reader of intermediate format
        in_format = out_format;
        rewind(in_format->tmp_file);
+       line_number = 0;
        out_format = final_format;
        out_format->needs_stats = 0;
        one_pass(2);
@@ -603,6 +747,8 @@ Formats:\n\
 \n\
 Format parameters:\n\
 -d, --fs=<char>                Delimiter of fields\n\
+-f, --fields=<f>,...   Set field names\n\
+-h, --header           The first line contains field names\n\
 -q, --quiet            Do not show warnings\n\
     --always-quote     Put quotes around all fields (CSV output only)\n\
     --table-sep=<n>    Separate table columns by <n> spaces (default: 2)\n\
@@ -627,7 +773,7 @@ static void NONRET bad_args(const char *msg, ...)
        exit(1);
 }
 
-static const char short_options[] = "cd:qr:twW";
+static const char short_options[] = "cd:f:hqr:twW";
 
 enum long_options {
        OPT_HELP = 256,
@@ -640,7 +786,9 @@ enum long_options {
 static const struct option long_options[] = {
        { "always-quote",       0,      NULL,   OPT_ALWAYS_QUOTE },
        { "csv",                0,      NULL,   'c' },
+       { "fields",             1,      NULL,   'f' },
        { "fs",                 1,      NULL,   'd' },
+       { "header",             0,      NULL,   'h' },
        { "quiet",              0,      NULL,   'q' },
        { "regex",              1,      NULL,   'r' },
        { "strict-ws",          0,      NULL,   'W' },
@@ -721,6 +869,12 @@ int main(int argc, char **argv)
                                else
                                        bad_args("No field delimiter given.");
                                break;
+                       case 'f':
+                               current_format()->set_field_names = optarg;
+                               break;
+                       case 'h':
+                               current_format()->has_header = 1;
+                               break;
                        case 'q':
                                current_format()->quiet = 1;
                                break;
@@ -767,6 +921,7 @@ int main(int argc, char **argv)
                bad_args("Write-only format selected for input.");
        if (!out_format->write_line)
                bad_args("Read-only format selected for output.");
+       read_header();
 
        for (int i = optind; i < argc; i++) {
                err = parse_selector(argv[i]);
@@ -775,6 +930,7 @@ int main(int argc, char **argv)
        }
        finish_parse_selectors();
 
+       write_header();
        if (out_format->needs_stats)
                two_pass();
        else