]> mj.ucw.cz Git - xsv.git/commitdiff
Added --strict-ws (default is non-strict)
authorMartin Mares <mj@ucw.cz>
Mon, 23 Jul 2012 21:45:50 +0000 (23:45 +0200)
committerMartin Mares <mj@ucw.cz>
Mon, 23 Jul 2012 21:45:50 +0000 (23:45 +0200)
xsv.c

diff --git a/xsv.c b/xsv.c
index 4ad9e477714db146b918eb600e8f5ba4d9f10301..6263da080216392d87dd42cf2a407ca2ecc3865c 100644 (file)
--- a/xsv.c
+++ b/xsv.c
@@ -70,6 +70,8 @@ struct format {
        void (*write_line)(void);
        // CSV backend:
        int always_quote;
+       // WS backend:
+       int strict_ws;
        // regex backend:
        pcre *pcre;
        pcre_extra *pcre_extra;
@@ -226,14 +228,19 @@ static int ws_read(void)
                        ws++;
                } else {
                        if (ws) {
-                               new_field(i);
+                               if (!in_field->start_pos &&
+                                   !in_field->len &&
+                                   !in_format->strict_ws)
+                                       in_field->start_pos = i;
+                               else
+                                       new_field(i);
                                ws = 0;
                        }
                        in_field->len++;
                }
        }
 
-       if (ws)
+       if (ws && in_format->strict_ws)
                new_field(n);
        return 1;
 }
@@ -358,6 +365,7 @@ Formats:\n\
 -t, --tsv              TAB-separated values (default)\n\
 -c, --csv              Comma-separated values\n\
 -w, --ws               Values separated by arbitrary whitespace\n\
+-W, --strict-ws                Like --ws, but recognize empty columns at start/end\n\
 -r, --regex=<rx>       Separator given by Perl regular expression (input only)\n\
 \n\
 Format parameters:\n\
@@ -385,7 +393,7 @@ static void bad_args(const char *msg, ...)
        exit(1);
 }
 
-static const char short_options[] = "cd:qr:tw";
+static const char short_options[] = "cd:qr:twW";
 
 enum long_options {
        OPT_HELP = 256,
@@ -399,6 +407,7 @@ static const struct option long_options[] = {
        { "fs",                 1,      NULL,   'd' },
        { "quiet",              0,      NULL,   'q' },
        { "regex",              1,      NULL,   'r' },
+       { "strict-ws",          0,      NULL,   'W' },
        { "trim",               0,      NULL,   OPT_TRIM },
        { "tsv",                0,      NULL,   't' },
        { "ws",                 0,      NULL,   'w' },
@@ -486,6 +495,10 @@ int main(int argc, char **argv)
                        case 'w':
                                set_format(FORM_WS);
                                break;
+                       case 'W':
+                               set_format(FORM_WS);
+                               current_format()->strict_ws = 1;
+                               break;
                        case OPT_ALWAYS_QUOTE:
                                if (current_format()->id != FORM_CSV)
                                        bad_args("--always-quote makes sense only for CSV.");