X-Git-Url: http://mj.ucw.cz/gitweb/?a=blobdiff_plain;f=xsv.c;h=c21a97f63aae9bfb41413a6c9bc09b334dd2a490;hb=1f2e6679a8cbcbe3d79055a85d35a31e8880d60e;hp=38f1fdeac88088859fae44f2835f080f971a4fbe;hpb=f13d72b8bb0640b1b52ab7257377e202f7520c4e;p=xsv.git diff --git a/xsv.c b/xsv.c index 38f1fde..c21a97f 100644 --- a/xsv.c +++ b/xsv.c @@ -60,6 +60,7 @@ struct format { enum format_id id; int fs; int quote; + int quiet; int (*read_line)(void); void (*write_line)(void); }; @@ -77,6 +78,7 @@ DECLARE_BUF(line, unsigned char); static fields_t in_fields, out_fields; static struct field *in_field; static line_t in_line; +static int line_number; static void new_field(void) { @@ -91,19 +93,28 @@ static void ensure_field(void) new_field(); } +static void warn(struct format *fmt, char *msg) +{ + if (!fmt->quiet) + fprintf(stderr, "Warning at line %d: %s\n", line_number, msg); +} + static int csv_read(void) { int quoted = 0; - // FIXME: Complain if closing quote is missing? for (;;) { int c = getchar(); restart: - if (c < 0) - return !!fields_count(&in_fields); if (c == '\r') continue; - if (c == '\n') - return 1; + if (c < 0 || c == '\n') { + if (quoted) + warn(in_format, "Missing closing quote."); + if (c < 0) + return !!fields_count(&in_fields); + else + return 1; + } if (quoted) { if (c == in_format->quote) { c = getchar(); @@ -128,6 +139,11 @@ restart: } } +static int is_ws(int c) +{ + return (c == ' ' || c == '\t' || c == '\f'); +} + static void csv_write(void) { unsigned char *line = line_nth(&in_line, 0); @@ -150,6 +166,8 @@ static void csv_write(void) putchar(out_format->quote); for (int j=0; j < f->len; j++) { int c = line[f->start_pos + j]; + if (c == out_format->fs && !need_quotes) + warn(out_format, "Field separator found inside field and quoting is turned off."); if (c == out_format->quote) putchar(c); putchar(c); @@ -171,7 +189,7 @@ static int ws_read(void) continue; if (c == '\n') return 1; - if (c == ' ' || c == '\t' || c == '\f') { + if (is_ws(c)) { ensure_field(); if (!ws) new_field(); @@ -185,6 +203,20 @@ static int ws_read(void) } } +/*** Transforms ***/ + +static void trim_fields(void) +{ + unsigned char *line = line_nth(&in_line, 0); + for (int i = 0; i < fields_count(&in_fields); i++) { + struct field *f = fields_nth(&in_fields, i); + while (f->len && is_ws(line[f->start_pos])) + f->start_pos++, f->len--; + while (f->len && is_ws(line[f->start_pos + f->len - 1])) + f->len--; + } +} + /*** Field selection ***/ struct selector { @@ -250,10 +282,11 @@ Formats:\n\ -w, --ws Values separated by arbitrary whitespace\n\ \n\ Format parameters:\n\ --d, --fs= Delimiter of fields\n\ +-d, --fs= Delimiter of fields\n\ +-q, --quiet Do not show warnings\n\ \n\ Other options:\n\ -(so far none)\n\ + --trim Trim leading and trailing whitespaces in fields\n\ "); exit(0); } @@ -266,15 +299,18 @@ static void bad_args(char *msg) exit(1); } -static const char short_options[] = "cd:tw"; +static const char short_options[] = "cd:qtw"; enum long_options { OPT_HELP = 256, + OPT_TRIM = 257, }; static const struct option long_options[] = { { "csv", 0, NULL, 'c' }, { "fs", 1, NULL, 'd' }, + { "quiet", 0, NULL, 'q' }, + { "trim", 0, NULL, OPT_TRIM }, { "tsv", 0, NULL, 't' }, { "ws", 0, NULL, 'w' }, { "help", 0, NULL, OPT_HELP }, @@ -329,6 +365,7 @@ static struct format *current_format(void) int main(int argc, char **argv) { int opt; + int want_trim = 0; while ((opt = getopt_long(argc, argv, short_options, long_options, NULL)) >= 0) switch (opt) { @@ -341,6 +378,9 @@ int main(int argc, char **argv) else bad_args("No field delimiter given."); break; + case 'q': + current_format()->quiet = 1; + break; case 't': set_format(FORM_TSV); break; @@ -349,6 +389,9 @@ int main(int argc, char **argv) break; case OPT_HELP: usage(); + case OPT_TRIM: + want_trim = 1; + break; default: bad_args(NULL); } @@ -369,12 +412,16 @@ int main(int argc, char **argv) line_init(&in_line); for (;;) { + line_number++; fields_reset(&in_fields); line_reset(&in_line); in_field = NULL; if (!in_format->read_line()) break; + if (want_trim) + trim_fields(); + fields_reset(&out_fields); select_fields();