]> mj.ucw.cz Git - xsv.git/commitdiff
--strict-ws is gone, added a more general --sloppy switch
authorMartin Mares <mj@ucw.cz>
Tue, 24 Jul 2012 14:53:43 +0000 (16:53 +0200)
committerMartin Mares <mj@ucw.cz>
Tue, 24 Jul 2012 14:53:43 +0000 (16:53 +0200)
xsv.c

diff --git a/xsv.c b/xsv.c
index 8fd72779d23e5bd11ff80711e2453adc44e31853..f980634d214036ef5e20199ea4e178bd4c235680 100644 (file)
--- a/xsv.c
+++ b/xsv.c
@@ -99,6 +99,7 @@ struct format {
        int fs;
        int quote;
        int quiet;
+       int sloppy;
        int (*read_line)(struct format *fmt);
        void (*write_line)(struct format *fmt);
        void (*write_grid)(struct format *fmt, int pos);        // -1=above, 1=below, 0=after header
@@ -112,9 +113,6 @@ struct format {
        // CSV backend:
        int always_quote;
 
-       // WS backend:
-       int strict_ws;
-
        // regex backend:
        pcre *pcre;
        pcre_extra *pcre_extra;
@@ -359,7 +357,7 @@ static int ws_read(struct format *fmt)
                        if (ws) {
                                if (!in_field->start_pos &&
                                    !in_field->len &&
-                                   !fmt->strict_ws)
+                                   fmt->sloppy)
                                        in_field->start_pos = i;
                                else
                                        new_field(i);
@@ -369,7 +367,7 @@ static int ws_read(struct format *fmt)
                }
        }
 
-       if (ws && fmt->strict_ws)
+       if (ws && !fmt->sloppy)
                new_field(n);
        return 1;
 }
@@ -404,17 +402,21 @@ static int regex_read(struct format *fmt)
        int i = 0;
        for (;;) {
                int ovec[3];
-               int sep = pcre_exec(fmt->pcre, fmt->pcre_extra, (char *) c, n, i, 0, ovec, 3);
-               if (sep < 0) {
-                       if (sep != PCRE_ERROR_NOMATCH)
-                               warn(fmt, "PCRE matching error %d", sep);
+               int err = pcre_exec(fmt->pcre, fmt->pcre_extra, (char *) c, n, i, 0, ovec, 3);
+               if (err < 0) {
+                       if (err != PCRE_ERROR_NOMATCH)
+                               warn(fmt, "PCRE matching error %d", err);
                        // No further occurrence of the separator: the rest is a single field
-                       new_field(i);
-                       in_field->len = n - i;
+                       if (!fmt->sloppy || i < n) {
+                               new_field(i);
+                               in_field->len = n - i;
+                       }
                        return 1;
                }
-               new_field(i);
-               in_field->len = ovec[0] - i;
+               if (!fmt->sloppy || ovec[0]) {
+                       new_field(i);
+                       in_field->len = ovec[0] - i;
+               }
                i = ovec[1];
        }
 }
@@ -814,7 +816,6 @@ Formats:\n\
 -t, --tsv              TAB-separated values (default)\n\
 -c, --csv              Comma-separated values\n\
 -w, --ws               Values separated by arbitrary whitespace\n\
--W, --strict-ws                Like --ws, but recognize empty columns at start/end\n\
 -r, --regex=<rx>       Separator given by Perl regular expression (input only)\n\
     --table            Format a table (output only)\n\
 \n\
@@ -826,6 +827,7 @@ Format parameters:\n\
     --always-quote     Put quotes around all fields (CSV output only)\n\
     --table-sep=<n>    Separate table columns by <n> spaces (default: 2)\n\
     --grid             Separate table columns by grid lines\n\
+-s, --sloppy           Ignore separators at the start/end of line (ws/regex only)\n\
 \n\
 Other options:\n\
     --trim             Trim leading and trailing whitespaces in fields\n\
@@ -870,7 +872,7 @@ static const struct option long_options[] = {
        { "header",             0,      NULL,   'h' },
        { "quiet",              0,      NULL,   'q' },
        { "regex",              1,      NULL,   'r' },
-       { "strict-ws",          0,      NULL,   'W' },
+       { "sloppy",             0,      NULL,   's' },
        { "table",              0,      NULL,   OPT_TABLE },
        { "table-sep",          1,      NULL,   OPT_TABLE_SEP },
        { "trim",               0,      NULL,   OPT_TRIM },
@@ -964,19 +966,20 @@ int main(int argc, char **argv)
                                if (err)
                                        bad_args("Error compiling regex: %s", err);
                                break;
+                       case 's':
+                               if (current_format()->id != FORM_WS && current_format()->id != FORM_REGEX)
+                                       bad_args("--sloppy makes sense only for --ws or --regex.");
+                               current_format()->sloppy = 1;
+                               break;
                        case 't':
                                set_format(FORM_TSV);
                                break;
                        case 'w':
                                set_format(FORM_WS);
                                break;
-                       case 'W':
-                               set_format(FORM_WS);
-                               current_format()->strict_ws = 1;
-                               break;
                        case OPT_ALWAYS_QUOTE:
                                if (current_format()->id != FORM_CSV)
-                                       bad_args("--always-quote makes sense only for CSV.");
+                                       bad_args("--always-quote makes sense only for --csv.");
                                current_format()->always_quote = 1;
                                break;
                        case OPT_HELP: