From: Martin Mares Date: Mon, 23 Jul 2012 21:45:50 +0000 (+0200) Subject: Added --strict-ws (default is non-strict) X-Git-Tag: v1.0~41 X-Git-Url: http://mj.ucw.cz/gitweb/?a=commitdiff_plain;h=ffc6c37752e7b632ef872aceb4124cfa366e4415;p=xsv.git Added --strict-ws (default is non-strict) --- diff --git a/xsv.c b/xsv.c index 4ad9e47..6263da0 100644 --- a/xsv.c +++ b/xsv.c @@ -70,6 +70,8 @@ struct format { void (*write_line)(void); // CSV backend: int always_quote; + // WS backend: + int strict_ws; // regex backend: pcre *pcre; pcre_extra *pcre_extra; @@ -226,14 +228,19 @@ static int ws_read(void) ws++; } else { if (ws) { - new_field(i); + if (!in_field->start_pos && + !in_field->len && + !in_format->strict_ws) + in_field->start_pos = i; + else + new_field(i); ws = 0; } in_field->len++; } } - if (ws) + if (ws && in_format->strict_ws) new_field(n); return 1; } @@ -358,6 +365,7 @@ Formats:\n\ -t, --tsv TAB-separated values (default)\n\ -c, --csv Comma-separated values\n\ -w, --ws Values separated by arbitrary whitespace\n\ +-W, --strict-ws Like --ws, but recognize empty columns at start/end\n\ -r, --regex= Separator given by Perl regular expression (input only)\n\ \n\ Format parameters:\n\ @@ -385,7 +393,7 @@ static void bad_args(const char *msg, ...) exit(1); } -static const char short_options[] = "cd:qr:tw"; +static const char short_options[] = "cd:qr:twW"; enum long_options { OPT_HELP = 256, @@ -399,6 +407,7 @@ static const struct option long_options[] = { { "fs", 1, NULL, 'd' }, { "quiet", 0, NULL, 'q' }, { "regex", 1, NULL, 'r' }, + { "strict-ws", 0, NULL, 'W' }, { "trim", 0, NULL, OPT_TRIM }, { "tsv", 0, NULL, 't' }, { "ws", 0, NULL, 'w' }, @@ -486,6 +495,10 @@ int main(int argc, char **argv) case 'w': set_format(FORM_WS); break; + case 'W': + set_format(FORM_WS); + current_format()->strict_ws = 1; + break; case OPT_ALWAYS_QUOTE: if (current_format()->id != FORM_CSV) bad_args("--always-quote makes sense only for CSV.");