X-Git-Url: http://mj.ucw.cz/gitweb/?a=blobdiff_plain;f=xsv.c;h=4615d708dadc869542510462f0f8b4d0b660c9d3;hb=24c52bb479bd520afb6618b3adefbc425ab93522;hp=8fd72779d23e5bd11ff80711e2453adc44e31853;hpb=60cd9ac013a349e45a50c0b2d17b0b30931d19c2;p=xsv.git diff --git a/xsv.c b/xsv.c index 8fd7277..4615d70 100644 --- a/xsv.c +++ b/xsv.c @@ -24,6 +24,9 @@ #define UNUSED #endif +static void select_fields(void); +static void select_all_fields(void); + /*** General functions ***/ static void NONRET die(char *msg, ...) @@ -99,6 +102,7 @@ struct format { int fs; int quote; int quiet; + int sloppy; int (*read_line)(struct format *fmt); void (*write_line)(struct format *fmt); void (*write_grid)(struct format *fmt, int pos); // -1=above, 1=below, 0=after header @@ -112,9 +116,6 @@ struct format { // CSV backend: int always_quote; - // WS backend: - int strict_ws; - // regex backend: pcre *pcre; pcre_extra *pcre_extra; @@ -359,7 +360,7 @@ static int ws_read(struct format *fmt) if (ws) { if (!in_field->start_pos && !in_field->len && - !fmt->strict_ws) + fmt->sloppy) in_field->start_pos = i; else new_field(i); @@ -369,7 +370,7 @@ static int ws_read(struct format *fmt) } } - if (ws && fmt->strict_ws) + if (ws && !fmt->sloppy) new_field(n); return 1; } @@ -404,17 +405,21 @@ static int regex_read(struct format *fmt) int i = 0; for (;;) { int ovec[3]; - int sep = pcre_exec(fmt->pcre, fmt->pcre_extra, (char *) c, n, i, 0, ovec, 3); - if (sep < 0) { - if (sep != PCRE_ERROR_NOMATCH) - warn(fmt, "PCRE matching error %d", sep); + int err = pcre_exec(fmt->pcre, fmt->pcre_extra, (char *) c, n, i, 0, ovec, 3); + if (err < 0) { + if (err != PCRE_ERROR_NOMATCH) + warn(fmt, "PCRE matching error %d", err); // No further occurrence of the separator: the rest is a single field - new_field(i); - in_field->len = n - i; + if (!fmt->sloppy || i < n) { + new_field(i); + in_field->len = n - i; + } return 1; } - new_field(i); - in_field->len = ovec[0] - i; + if (!fmt->sloppy || ovec[0]) { + new_field(i); + in_field->len = ovec[0] - i; + } i = ovec[1]; } } @@ -609,20 +614,22 @@ static void write_header(void) return; } + int want_select_fields = 0; if (out_format->set_field_names) { struct field_names *fn = xmalloc_zero(sizeof(*fn)); out_format->field_names = fn; add_field_names(fn, out_format->set_field_names); - } else if (in_format->field_names) + } else if (in_format->field_names) { out_format->field_names = in_format->field_names; - else + want_select_fields = 1; + } else die("Output header requested, but no field names specified"); line_reset(&in_line); - fields_reset(&out_fields); + fields_reset(&in_fields); struct field_names *fn = out_format->field_names; for (int i = 0; i < stringarray_count(&fn->names); i++) { - struct field *f = fields_push(&out_fields); + struct field *f = fields_push(&in_fields); f->start_pos = line_count(&in_line); f->len = 0; char *s = *stringarray_nth(&fn->names, i); @@ -632,6 +639,11 @@ static void write_header(void) } } + if (want_select_fields) + select_fields(); + else + select_all_fields(); + // This is tricky: when we are formatting a table, field names are normally // calculated in pass 1, but the header is written in pass 2, so we have to // update column statistics, because field name can be too wide to fit. @@ -694,7 +706,7 @@ static int parse_field(char *str) if (in_format->field_names && (f = find_field_by_name(in_format->field_names, str)) > 0) return f; - die("Unknown field %s", str); + die("Unknown field `%s'", str); } static char *parse_selector(char *str) @@ -814,7 +826,6 @@ Formats:\n\ -t, --tsv TAB-separated values (default)\n\ -c, --csv Comma-separated values\n\ -w, --ws Values separated by arbitrary whitespace\n\ --W, --strict-ws Like --ws, but recognize empty columns at start/end\n\ -r, --regex= Separator given by Perl regular expression (input only)\n\ --table Format a table (output only)\n\ \n\ @@ -826,6 +837,7 @@ Format parameters:\n\ --always-quote Put quotes around all fields (CSV output only)\n\ --table-sep= Separate table columns by spaces (default: 2)\n\ --grid Separate table columns by grid lines\n\ +-s, --sloppy Ignore separators at the start/end of line (ws/regex only)\n\ \n\ Other options:\n\ --trim Trim leading and trailing whitespaces in fields\n\ @@ -870,7 +882,7 @@ static const struct option long_options[] = { { "header", 0, NULL, 'h' }, { "quiet", 0, NULL, 'q' }, { "regex", 1, NULL, 'r' }, - { "strict-ws", 0, NULL, 'W' }, + { "sloppy", 0, NULL, 's' }, { "table", 0, NULL, OPT_TABLE }, { "table-sep", 1, NULL, OPT_TABLE_SEP }, { "trim", 0, NULL, OPT_TRIM }, @@ -964,19 +976,20 @@ int main(int argc, char **argv) if (err) bad_args("Error compiling regex: %s", err); break; + case 's': + if (current_format()->id != FORM_WS && current_format()->id != FORM_REGEX) + bad_args("--sloppy makes sense only for --ws or --regex."); + current_format()->sloppy = 1; + break; case 't': set_format(FORM_TSV); break; case 'w': set_format(FORM_WS); break; - case 'W': - set_format(FORM_WS); - current_format()->strict_ws = 1; - break; case OPT_ALWAYS_QUOTE: if (current_format()->id != FORM_CSV) - bad_args("--always-quote makes sense only for CSV."); + bad_args("--always-quote makes sense only for --csv."); current_format()->always_quote = 1; break; case OPT_HELP: