]> mj.ucw.cz Git - xsv.git/blobdiff - xsv.c
Added warnings and --quiet
[xsv.git] / xsv.c
diff --git a/xsv.c b/xsv.c
index 38f1fdeac88088859fae44f2835f080f971a4fbe..c21a97f63aae9bfb41413a6c9bc09b334dd2a490 100644 (file)
--- a/xsv.c
+++ b/xsv.c
@@ -60,6 +60,7 @@ struct format {
        enum format_id id;
        int fs;
        int quote;
+       int quiet;
        int (*read_line)(void);
        void (*write_line)(void);
 };
@@ -77,6 +78,7 @@ DECLARE_BUF(line, unsigned char);
 static fields_t in_fields, out_fields;
 static struct field *in_field;
 static line_t in_line;
+static int line_number;
 
 static void new_field(void)
 {
@@ -91,19 +93,28 @@ static void ensure_field(void)
                new_field();
 }
 
+static void warn(struct format *fmt, char *msg)
+{
+       if (!fmt->quiet)
+               fprintf(stderr, "Warning at line %d: %s\n", line_number, msg);
+}
+
 static int csv_read(void)
 {
        int quoted = 0;
-       // FIXME: Complain if closing quote is missing?
        for (;;) {
                int c = getchar();
 restart:
-               if (c < 0)
-                       return !!fields_count(&in_fields);
                if (c == '\r')
                        continue;
-               if (c == '\n')
-                       return 1;
+               if (c < 0 || c == '\n') {
+                       if (quoted)
+                               warn(in_format, "Missing closing quote.");
+                       if (c < 0)
+                               return !!fields_count(&in_fields);
+                       else
+                               return 1;
+               }
                if (quoted) {
                        if (c == in_format->quote) {
                                c = getchar();
@@ -128,6 +139,11 @@ restart:
        }
 }
 
+static int is_ws(int c)
+{
+       return (c == ' ' || c == '\t' || c == '\f');
+}
+
 static void csv_write(void)
 {
        unsigned char *line = line_nth(&in_line, 0);
@@ -150,6 +166,8 @@ static void csv_write(void)
                        putchar(out_format->quote);
                for (int j=0; j < f->len; j++) {
                        int c = line[f->start_pos + j];
+                       if (c == out_format->fs && !need_quotes)
+                               warn(out_format, "Field separator found inside field and quoting is turned off.");
                        if (c == out_format->quote)
                                putchar(c);
                        putchar(c);
@@ -171,7 +189,7 @@ static int ws_read(void)
                        continue;
                if (c == '\n')
                        return 1;
-               if (c == ' ' || c == '\t' || c == '\f') {
+               if (is_ws(c)) {
                        ensure_field();
                        if (!ws)
                                new_field();
@@ -185,6 +203,20 @@ static int ws_read(void)
        }
 }
 
+/*** Transforms ***/
+
+static void trim_fields(void)
+{
+       unsigned char *line = line_nth(&in_line, 0);
+       for (int i = 0; i < fields_count(&in_fields); i++) {
+               struct field *f = fields_nth(&in_fields, i);
+               while (f->len && is_ws(line[f->start_pos]))
+                       f->start_pos++, f->len--;
+               while (f->len && is_ws(line[f->start_pos + f->len - 1]))
+                       f->len--;
+       }
+}
+
 /*** Field selection ***/
 
 struct selector {
@@ -250,10 +282,11 @@ Formats:\n\
 -w, --ws               Values separated by arbitrary whitespace\n\
 \n\
 Format parameters:\n\
--d, --fs=<char>        Delimiter of fields\n\
+-d, --fs=<char>                Delimiter of fields\n\
+-q, --quiet            Do not show warnings\n\
 \n\
 Other options:\n\
-(so far none)\n\
+    --trim             Trim leading and trailing whitespaces in fields\n\
 ");
        exit(0);
 }
@@ -266,15 +299,18 @@ static void bad_args(char *msg)
        exit(1);
 }
 
-static const char short_options[] = "cd:tw";
+static const char short_options[] = "cd:qtw";
 
 enum long_options {
        OPT_HELP = 256,
+       OPT_TRIM = 257,
 };
 
 static const struct option long_options[] = {
        { "csv",                0,      NULL,   'c' },
        { "fs",                 1,      NULL,   'd' },
+       { "quiet",              0,      NULL,   'q' },
+       { "trim",               0,      NULL,   OPT_TRIM },
        { "tsv",                0,      NULL,   't' },
        { "ws",                 0,      NULL,   'w' },
        { "help",               0,      NULL,   OPT_HELP },
@@ -329,6 +365,7 @@ static struct format *current_format(void)
 int main(int argc, char **argv)
 {
        int opt;
+       int want_trim = 0;
 
        while ((opt = getopt_long(argc, argv, short_options, long_options, NULL)) >= 0)
                switch (opt) {
@@ -341,6 +378,9 @@ int main(int argc, char **argv)
                                else
                                        bad_args("No field delimiter given.");
                                break;
+                       case 'q':
+                               current_format()->quiet = 1;
+                               break;
                        case 't':
                                set_format(FORM_TSV);
                                break;
@@ -349,6 +389,9 @@ int main(int argc, char **argv)
                                break;
                        case OPT_HELP:
                                usage();
+                       case OPT_TRIM:
+                               want_trim = 1;
+                               break;
                        default:
                                bad_args(NULL);
                }
@@ -369,12 +412,16 @@ int main(int argc, char **argv)
        line_init(&in_line);
 
        for (;;) {
+               line_number++;
                fields_reset(&in_fields);
                line_reset(&in_line);
                in_field = NULL;
                if (!in_format->read_line())
                        break;
 
+               if (want_trim)
+                       trim_fields();
+
                fields_reset(&out_fields);
                select_fields();