--- /dev/null
+/*
+ * A Swiss-Army Knife for CSV-like Files
+ *
+ * (c) 2012 Martin Mares <mj@ucw.cz>
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <getopt.h>
+
+/*** Memory allocation ***/
+
+static void *xmalloc(size_t bytes)
+{
+ void *p = malloc(bytes);
+ if (!p) {
+ fprintf(stderr, "xsv: Out of memory (cannot allocate %zu bytes)\n", bytes);
+ exit(1);
+ }
+ return p;
+}
+
+static void *xrealloc(void *old, size_t bytes)
+{
+ void *p = realloc(old, bytes);
+ if (!p) {
+ fprintf(stderr, "xsv: Out of memory (cannot allocate %zu bytes)\n", bytes);
+ exit(1);
+ }
+ return p;
+}
+
+#define DECLARE_BUF(name, type) \
+ typedef struct { type *start; int count; int max; } name##_t; \
+ static inline void name##_init(name##_t *b) { b->start = NULL; b->count = b->max = 0; } \
+ static inline void name##_reset(name##_t *b) { b->count = 0; } \
+ static inline int name##_count(name##_t *b) { return b->count; } \
+ static void name##_extend(name##_t *b) { \
+ b->max = b->max ? 2*b->max : 16; \
+ b->start = xrealloc(b->start, b->max * sizeof(type)); \
+ } \
+ static inline type *name##_push(name##_t *b) { \
+ if (b->count >= b->max) name##_extend(b); \
+ return &b->start[b->count++]; \
+ } \
+ static inline type *name##_nth(name##_t *b, int n) { return &b->start[n]; } \
+ // end
+
+/*** Formats and their parameters ***/
+
+enum format_id {
+ FORM_UNSPEC,
+ FORM_TSV,
+ FORM_CSV,
+ FORM_WS,
+};
+
+struct format {
+ enum format_id id;
+ int fs;
+ int quote;
+ int (*read_line)(void);
+ void (*write_line)(void);
+};
+
+static struct format *in_format, *out_format;
+
+struct field {
+ int start_pos;
+ int len;
+};
+
+DECLARE_BUF(fields, struct field);
+DECLARE_BUF(line, unsigned char);
+
+static fields_t in_fields, out_fields;
+static struct field *in_field;
+static line_t in_line;
+
+static void new_field(void)
+{
+ in_field = fields_push(&in_fields);
+ in_field->start_pos = line_count(&in_line);
+ in_field->len = 0;
+}
+
+static void ensure_field(void)
+{
+ if (!in_field)
+ new_field();
+}
+
+static int csv_read(void)
+{
+ int quoted = 0;
+ // FIXME: Complain if closing quote is missing?
+ for (;;) {
+ int c = getchar();
+restart:
+ if (c < 0)
+ return !!fields_count(&in_fields);
+ if (c == '\r')
+ continue;
+ if (c == '\n')
+ return 1;
+ if (quoted) {
+ if (c == in_format->quote) {
+ c = getchar();
+ if (c != in_format->quote) {
+ quoted = 0;
+ goto restart;
+ }
+ // Two quotes assimilate to one
+ }
+ // Fall through to pushing the character
+ } else if (c == in_format->quote) {
+ quoted = 1;
+ continue;
+ } else if (c == in_format->fs && !quoted) {
+ ensure_field();
+ new_field();
+ continue;
+ }
+ ensure_field();
+ *line_push(&in_line) = c;
+ in_field->len++;
+ }
+}
+
+static void csv_write(void)
+{
+ unsigned char *line = line_nth(&in_line, 0);
+ int n = fields_count(&out_fields);
+ for (int i=0; i<n; i++) {
+ struct field *f = fields_nth(&out_fields, i);
+ int need_quotes = 0;
+ if (out_format->quote >= 0) {
+ for (int j=0; j < f->len; j++) {
+ int c = line[f->start_pos + j];
+ if (c == out_format->fs || c == out_format->quote) {
+ need_quotes = 1;
+ break;
+ }
+ }
+ }
+ if (i)
+ putchar(out_format->fs);
+ if (need_quotes)
+ putchar(out_format->quote);
+ for (int j=0; j < f->len; j++) {
+ int c = line[f->start_pos + j];
+ if (c == out_format->quote)
+ putchar(c);
+ putchar(c);
+ }
+ if (need_quotes)
+ putchar(out_format->quote);
+ }
+ putchar('\n');
+}
+
+/*** Field selection ***/
+
+struct selector {
+ int first_field, last_field;
+};
+
+DECLARE_BUF(selectors, struct selector);
+static selectors_t selectors;
+
+static char *parse_selector(char *str)
+{
+ char buf[strlen(str) + 1];
+ strcpy(buf, str);
+
+ struct selector *s = selectors_push(&selectors);
+ char *sep = strchr(buf, '-');
+ if (sep) {
+ *sep++ = 0;
+ s->first_field = atoi(buf);
+ s->last_field = atoi(sep);
+ } else
+ s->first_field = s->last_field = atoi(buf);
+
+ return NULL;
+}
+
+static void finish_parse_selectors(void)
+{
+ if (!selectors_count(&selectors))
+ parse_selector("-");
+}
+
+static void select_fields(void)
+{
+ for (int i = 0; i < selectors_count(&selectors); i++) {
+ struct selector *s = selectors_nth(&selectors, i);
+ int first = s->first_field;
+ if (first <= 0)
+ first = 1;
+ int last = s->last_field;
+ if (last <= 0)
+ last = fields_count(&in_fields);
+ for (int j = first; j <= last; j++) {
+ struct field *f = fields_push(&out_fields);
+ if (j >= 1 && j <= fields_count(&in_fields))
+ *f = *fields_nth(&in_fields, j-1);
+ else
+ f->start_pos = f->len = 0;
+ }
+ }
+}
+
+/*** Parsing of arguments ***/
+
+static void usage(void)
+{
+ printf("\
+Usage: xsv <in-format> [<out-format>] <options> [<fields>]\n\
+\n\
+Formats:\n\
+-t, --tsv TAB-separated values (default)\n\
+-c, --csv Comma-separated values\n\
+-w, --ws Values separated by arbitrary whitespace\n\
+\n\
+Format parameters:\n\
+-d, --fs=<char> Delimiter of fields\n\
+\n\
+Other options:\n\
+(so far none)\n\
+");
+ exit(0);
+}
+
+static void bad_args(char *msg)
+{
+ if (msg)
+ fprintf(stderr, "xsv: %s\n", msg);
+ fprintf(stderr, "Try `xsv --help' for more information.\n");
+ exit(1);
+}
+
+static const char short_options[] = "cd:tw";
+
+enum long_options {
+ OPT_HELP = 256,
+};
+
+static const struct option long_options[] = {
+ { "csv", 0, NULL, 'c' },
+ { "fs", 1, NULL, 'd' },
+ { "tsv", 0, NULL, 't' },
+ { "ws", 0, NULL, 'w' },
+ { "help", 0, NULL, OPT_HELP },
+ { NULL, 0, NULL, 0 },
+};
+
+static void set_format(int format_id)
+{
+ struct format *f = xmalloc(sizeof(*f));
+ memset(f, 0, sizeof(*f));
+ f->id = format_id;
+
+ switch (format_id) {
+ case FORM_TSV:
+ f->fs = '\t';
+ f->quote = -1;
+ f->read_line = csv_read;
+ f->write_line = csv_write;
+ break;
+ case FORM_CSV:
+ f->fs = ',';
+ f->quote = '"';
+ f->read_line = csv_read;
+ f->write_line = csv_write;
+ break;
+ case FORM_WS:
+ break;
+ }
+
+ if (!in_format)
+ in_format = f;
+ else if (!out_format)
+ out_format = f;
+ else
+ bad_args("At most two format may be given.");
+}
+
+static struct format *current_format(void)
+{
+ if (out_format)
+ return out_format;
+ if (in_format)
+ return in_format;
+ set_format(FORM_TSV);
+ return in_format;
+}
+
+int main(int argc, char **argv)
+{
+ int opt;
+
+ while ((opt = getopt_long(argc, argv, short_options, long_options, NULL)) >= 0)
+ switch (opt) {
+ case 'c':
+ set_format(FORM_CSV);
+ break;
+ case 'd':
+ if (optarg[0])
+ current_format()->fs = optarg[0];
+ else
+ bad_args("No field delimiter given.");
+ break;
+ case 't':
+ set_format(FORM_TSV);
+ break;
+ case 'w':
+ set_format(FORM_WS);
+ break;
+ case OPT_HELP:
+ usage();
+ default:
+ bad_args(NULL);
+ }
+
+ current_format();
+ if (!out_format)
+ out_format = in_format;
+
+ for (int i = optind; i < argc; i++) {
+ char *err = parse_selector(argv[i]);
+ if (err)
+ bad_args(err);
+ }
+ finish_parse_selectors();
+
+ fields_init(&in_fields);
+ fields_init(&out_fields);
+ line_init(&in_line);
+
+ for (;;) {
+ fields_reset(&in_fields);
+ line_reset(&in_line);
+ in_field = NULL;
+ if (!in_format->read_line())
+ break;
+
+ fields_reset(&out_fields);
+ select_fields();
+
+ out_format->write_line();
+ }
+
+ return 0;
+}