From: Martin Mares Date: Tue, 24 Jul 2012 14:53:43 +0000 (+0200) Subject: --strict-ws is gone, added a more general --sloppy switch X-Git-Tag: v1.0~23 X-Git-Url: http://mj.ucw.cz/gitweb/?a=commitdiff_plain;h=64927d791c2d3f86b86aedf70f63abe046c8ed0a;p=xsv.git --strict-ws is gone, added a more general --sloppy switch --- diff --git a/xsv.c b/xsv.c index 8fd7277..f980634 100644 --- a/xsv.c +++ b/xsv.c @@ -99,6 +99,7 @@ struct format { int fs; int quote; int quiet; + int sloppy; int (*read_line)(struct format *fmt); void (*write_line)(struct format *fmt); void (*write_grid)(struct format *fmt, int pos); // -1=above, 1=below, 0=after header @@ -112,9 +113,6 @@ struct format { // CSV backend: int always_quote; - // WS backend: - int strict_ws; - // regex backend: pcre *pcre; pcre_extra *pcre_extra; @@ -359,7 +357,7 @@ static int ws_read(struct format *fmt) if (ws) { if (!in_field->start_pos && !in_field->len && - !fmt->strict_ws) + fmt->sloppy) in_field->start_pos = i; else new_field(i); @@ -369,7 +367,7 @@ static int ws_read(struct format *fmt) } } - if (ws && fmt->strict_ws) + if (ws && !fmt->sloppy) new_field(n); return 1; } @@ -404,17 +402,21 @@ static int regex_read(struct format *fmt) int i = 0; for (;;) { int ovec[3]; - int sep = pcre_exec(fmt->pcre, fmt->pcre_extra, (char *) c, n, i, 0, ovec, 3); - if (sep < 0) { - if (sep != PCRE_ERROR_NOMATCH) - warn(fmt, "PCRE matching error %d", sep); + int err = pcre_exec(fmt->pcre, fmt->pcre_extra, (char *) c, n, i, 0, ovec, 3); + if (err < 0) { + if (err != PCRE_ERROR_NOMATCH) + warn(fmt, "PCRE matching error %d", err); // No further occurrence of the separator: the rest is a single field - new_field(i); - in_field->len = n - i; + if (!fmt->sloppy || i < n) { + new_field(i); + in_field->len = n - i; + } return 1; } - new_field(i); - in_field->len = ovec[0] - i; + if (!fmt->sloppy || ovec[0]) { + new_field(i); + in_field->len = ovec[0] - i; + } i = ovec[1]; } } @@ -814,7 +816,6 @@ Formats:\n\ -t, --tsv TAB-separated values (default)\n\ -c, --csv Comma-separated values\n\ -w, --ws Values separated by arbitrary whitespace\n\ --W, --strict-ws Like --ws, but recognize empty columns at start/end\n\ -r, --regex= Separator given by Perl regular expression (input only)\n\ --table Format a table (output only)\n\ \n\ @@ -826,6 +827,7 @@ Format parameters:\n\ --always-quote Put quotes around all fields (CSV output only)\n\ --table-sep= Separate table columns by spaces (default: 2)\n\ --grid Separate table columns by grid lines\n\ +-s, --sloppy Ignore separators at the start/end of line (ws/regex only)\n\ \n\ Other options:\n\ --trim Trim leading and trailing whitespaces in fields\n\ @@ -870,7 +872,7 @@ static const struct option long_options[] = { { "header", 0, NULL, 'h' }, { "quiet", 0, NULL, 'q' }, { "regex", 1, NULL, 'r' }, - { "strict-ws", 0, NULL, 'W' }, + { "sloppy", 0, NULL, 's' }, { "table", 0, NULL, OPT_TABLE }, { "table-sep", 1, NULL, OPT_TABLE_SEP }, { "trim", 0, NULL, OPT_TRIM }, @@ -964,19 +966,20 @@ int main(int argc, char **argv) if (err) bad_args("Error compiling regex: %s", err); break; + case 's': + if (current_format()->id != FORM_WS && current_format()->id != FORM_REGEX) + bad_args("--sloppy makes sense only for --ws or --regex."); + current_format()->sloppy = 1; + break; case 't': set_format(FORM_TSV); break; case 'w': set_format(FORM_WS); break; - case 'W': - set_format(FORM_WS); - current_format()->strict_ws = 1; - break; case OPT_ALWAYS_QUOTE: if (current_format()->id != FORM_CSV) - bad_args("--always-quote makes sense only for CSV."); + bad_args("--always-quote makes sense only for --csv."); current_format()->always_quote = 1; break; case OPT_HELP: