]> mj.ucw.cz Git - xsv.git/commitdiff
Simplified CSV and WS parsers
authorMartin Mares <mj@ucw.cz>
Mon, 23 Jul 2012 21:37:08 +0000 (23:37 +0200)
committerMartin Mares <mj@ucw.cz>
Mon, 23 Jul 2012 21:37:08 +0000 (23:37 +0200)
xsv.c

diff --git a/xsv.c b/xsv.c
index 7ec94da6440aee49a39f600d3a714829354a6a92..4ad9e477714db146b918eb600e8f5ba4d9f10301 100644 (file)
--- a/xsv.c
+++ b/xsv.c
@@ -90,17 +90,17 @@ static struct field *in_field;
 static line_t in_line;
 static int line_number;
 
-static void new_field(void)
+static void new_field(int pos)
 {
        in_field = fields_push(&in_fields);
-       in_field->start_pos = line_count(&in_line);
+       in_field->start_pos = pos;
        in_field->len = 0;
 }
 
-static void ensure_field(void)
+static void ensure_field(int pos)
 {
        if (!in_field)
-               new_field();
+               new_field(pos);
 }
 
 static void warn(struct format *fmt, char *msg, ...)
@@ -134,6 +134,7 @@ static int csv_read(void)
        int quoted = 0;
        for (;;) {
                int c = getchar();
+               int i = line_count(&in_line);
 restart:
                if (c == '\r')
                        continue;
@@ -159,11 +160,11 @@ restart:
                        quoted = 1;
                        continue;
                } else if (c == in_format->fs && !quoted) {
-                       ensure_field();
-                       new_field();
+                       ensure_field(i);
+                       new_field(i);
                        continue;
                }
-               ensure_field();
+               ensure_field(i);
                *line_push(&in_line) = c;
                in_field->len++;
        }
@@ -209,27 +210,32 @@ static void csv_write(void)
 
 static int ws_read(void)
 {
+       if (!next_line())
+               return 0;
+
+       unsigned char *line = line_first(&in_line);
+       int n = line_count(&in_line);
+       if (!n)
+               return 1;
+
        int ws = 0;
-       for (;;) {
-               int c = getchar();
-               if (c < 0)
-                       return !!fields_count(&in_fields);
-               if (c == '\r')
-                       continue;
-               if (c == '\n')
-                       return 1;
+       new_field(0);
+       for (int i=0; i<n; i++) {
+               int c = line[i];
                if (is_ws(c)) {
-                       ensure_field();
-                       if (!ws)
-                               new_field();
                        ws++;
                } else {
-                       ensure_field();
-                       *line_push(&in_line) = c;
+                       if (ws) {
+                               new_field(i);
+                               ws = 0;
+                       }
                        in_field->len++;
-                       ws = 0;
                }
        }
+
+       if (ws)
+               new_field(n);
+       return 1;
 }
 
 static const char *regex_set(struct format *f, char *rx)
@@ -265,13 +271,11 @@ static int regex_read(void)
                        if (sep != PCRE_ERROR_NOMATCH)
                                warn(in_format, "PCRE matching error %d", sep);
                        // No further occurrence of the separator: the rest is a single field
-                       new_field();
-                       in_field->start_pos = i;
+                       new_field(i);
                        in_field->len = n - i;
                        return 1;
                }
-               new_field();
-               in_field->start_pos = i;
+               new_field(i);
                in_field->len = ovec[0] - i;
                i = ovec[1];
        }