]> mj.ucw.cz Git - xsv.git/blob - xsv.c
872090f02abdb73fa19675cd8e8902895a89fb3a
[xsv.git] / xsv.c
1 /*
2  *      A Swiss-Army Knife for CSV-like Files
3  *
4  *      (c) 2012 Martin Mares <mj@ucw.cz>
5  */
6
7 #define _GNU_SOURCE
8
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <stdarg.h>
13 #include <getopt.h>
14 #include <wchar.h>
15 #include <locale.h>
16
17 #include <pcre.h>
18
19 #ifdef __GNUC__
20 #define NONRET __attribute__((noreturn))
21 #define UNUSED __attribute__((unused))
22 #else
23 #define NONRET
24 #define UNUSED
25 #endif
26
27 /*** General functions ***/
28
29 static void NONRET die(char *msg, ...)
30 {
31         va_list args;
32         va_start(args, msg);
33         fprintf(stderr, "xsv: ");
34         vfprintf(stderr, msg, args);
35         fputc('\n', stderr);
36         va_end(args);
37         exit(1);
38 }
39
40 /*** Memory allocation ***/
41
42 static void *xmalloc(size_t bytes)
43 {
44         void *p = malloc(bytes);
45         if (!p)
46                 die("Out of memory (cannot allocate %zu bytes)", bytes);
47         return p;
48 }
49
50 static void *xmalloc_zero(size_t bytes)
51 {
52         void *p = xmalloc(bytes);
53         memset(p, 0, bytes);
54         return p;
55 }
56
57 static void *xrealloc(void *old, size_t bytes)
58 {
59         void *p = realloc(old, bytes);
60         if (!p)
61                 die("Out of memory (cannot allocate %zu bytes)", bytes);
62         return p;
63 }
64
65 #define DECLARE_BUF(name, type) \
66         typedef struct { type *start; int count; int max; } name##_t;                           \
67         static inline void name##_init(name##_t *b) { b->start = NULL; b->count = b->max = 0; } \
68         static inline void name##_reset(name##_t *b) { b->count = 0; }                          \
69         static inline int name##_count(name##_t *b) { return b->count; }                        \
70         static void name##_extend(name##_t *b) {                                                \
71                 b->max = b->max ? 2*b->max : 16;                                                \
72                 b->start = xrealloc(b->start, b->max * sizeof(type));                           \
73         }                                                                                       \
74         static inline type *name##_push(name##_t *b) {                                          \
75                 if (b->count >= b->max) name##_extend(b);                                       \
76                 return &b->start[b->count++];                                                   \
77         }                                                                                       \
78         static inline type *name##_first(name##_t *b) { return b->start; }                      \
79         static inline type *name##_nth(name##_t *b, int n) { return &b->start[n]; }             \
80         // end
81
82 DECLARE_BUF(intarray, int);
83 DECLARE_BUF(stringarray, char *);
84
85 /*** Formats and their parameters ***/
86
87 enum format_id {
88         FORM_UNSPEC,
89         FORM_TSV,
90         FORM_CSV,
91         FORM_WS,
92         FORM_REGEX,
93         FORM_TMP,
94         FORM_TABLE,
95 };
96
97 struct format {
98         enum format_id id;
99         int fs;
100         int quote;
101         int quiet;
102         int (*read_line)(struct format *fmt);
103         void (*write_line)(struct format *fmt);
104         void (*write_grid)(struct format *fmt, int pos);        // -1=above, 1=below, 0=after header
105         int needs_stats;
106
107         // Field names
108         int has_header;
109         char *set_field_names;
110         struct field_names *field_names;
111
112         // CSV backend:
113         int always_quote;
114
115         // WS backend:
116         int strict_ws;
117
118         // regex backend:
119         pcre *pcre;
120         pcre_extra *pcre_extra;
121
122         // Temporary file backend:
123         FILE *tmp_file;
124
125         // Table backend:
126         int table_sep;
127         int table_grid;
128 };
129
130 static struct format *in_format, *out_format;
131 static int want_trim;
132
133 struct field {
134         int start_pos;
135         int len;
136 };
137
138 DECLARE_BUF(fields, struct field);
139 DECLARE_BUF(line, unsigned char);
140
141 static fields_t in_fields, out_fields;
142 static struct field *in_field;
143 static line_t in_line;
144 static int line_number;
145
146 static int read_line(void)
147 {
148         fields_reset(&in_fields);
149         line_reset(&in_line);
150         in_field = NULL;
151         if (!in_format->read_line(in_format))
152                 return 0;
153         if (ferror_unlocked(stdin))
154                 die("I/O error when reading standard input");
155         return 1;
156 }
157
158 static void write_line(void)
159 {
160         out_format->write_line(out_format);
161         if (ferror_unlocked(stdout))
162                 die("I/O error when writing standard input");
163 }
164
165 static void write_grid(int pos)
166 {
167         if (out_format->write_grid) {
168                 out_format->write_grid(out_format, pos);
169                 if (ferror_unlocked(stdout))
170                         die("I/O error when writing standard input");
171         }
172 }
173
174 static void new_field(int pos)
175 {
176         in_field = fields_push(&in_fields);
177         in_field->start_pos = pos;
178         in_field->len = 0;
179 }
180
181 static void ensure_field(int pos)
182 {
183         if (!in_field)
184                 new_field(pos);
185 }
186
187 // FIXME: Use elsewhere
188 static unsigned char *get_field(fields_t *fields, int i, int *len)
189 {
190         struct field *f = fields_nth(fields, i);
191         *len = f->len;
192         return line_nth(&in_line, f->start_pos);
193 }
194
195 static void warn(struct format *fmt, char *msg, ...)
196 {
197         if (!fmt->quiet) {
198                 fprintf(stderr, "Warning at line %d: ", line_number);
199                 va_list args;
200                 va_start(args, msg);
201                 vfprintf(stderr, msg, args);
202                 va_end(args);
203                 fputc('\n', stderr);
204         }
205 }
206
207 static int next_line(void)
208 {
209         for (;;) {
210                 int c = getchar_unlocked();
211                 if (c == '\r')
212                         continue;
213                 if (c < 0)
214                         return !!line_count(&in_line);
215                 if (c == '\n')
216                         return 1;
217                 *line_push(&in_line) = c;
218         }
219 }
220
221 static int field_chars(struct field *f)
222 {
223         unsigned char *s = line_nth(&in_line, f->start_pos);
224         int i = 0;
225         mbstate_t mbs;
226         memset(&mbs, 0, sizeof(mbs));
227
228         int chars = 0;
229         while (i < f->len) {
230                 size_t k = mbrlen((char *) s + i, f->len - i, &mbs);
231                 if ((int) k <= 0)
232                         break;
233                 i += k;
234                 chars++;
235         }
236
237         return chars;
238 }
239
240 /*** Field statistics ***/
241
242 static intarray_t column_widths;
243
244 static void update_stats(void)
245 {
246         for (int i = 0; i < fields_count(&out_fields); i++) {
247                 struct field *f = fields_nth(&out_fields, i);
248                 intarray_t *w = &column_widths;
249
250                 while (i >= intarray_count(w))
251                         *intarray_push(w) = 0;
252                 int fw = field_chars(f);
253                 if (*intarray_nth(w, i) < fw)
254                         *intarray_nth(w, i) = fw;
255         }
256 }
257
258 /*** CSV/TSV back-end */
259
260 static int csv_read(struct format *fmt)
261 {
262         int quoted = 0;
263         for (;;) {
264                 int c = getchar_unlocked();
265                 int i = line_count(&in_line);
266 restart:
267                 if (c == '\r')
268                         continue;
269                 if (c < 0 || c == '\n') {
270                         if (quoted)
271                                 warn(fmt, "Missing closing quote.");
272                         if (c < 0)
273                                 return !!fields_count(&in_fields);
274                         else
275                                 return 1;
276                 }
277                 if (quoted) {
278                         if (c == fmt->quote) {
279                                 c = getchar_unlocked();
280                                 if (c != fmt->quote) {
281                                         quoted = 0;
282                                         goto restart;
283                                 }
284                                 // Two quotes assimilate to one
285                         }
286                         // Fall through to pushing the character
287                 } else if (c == fmt->quote) {
288                         quoted = 1;
289                         continue;
290                 } else if (c == fmt->fs && !quoted) {
291                         ensure_field(i);
292                         new_field(i);
293                         continue;
294                 }
295                 ensure_field(i);
296                 *line_push(&in_line) = c;
297                 in_field->len++;
298         }
299 }
300
301 static int is_ws(int c)
302 {
303         return (c == ' ' || c == '\t' || c == '\f');
304 }
305
306 static void csv_write(struct format *fmt)
307 {
308         unsigned char *line = line_first(&in_line);
309         int n = fields_count(&out_fields);
310         for (int i=0; i<n; i++) {
311                 struct field *f = fields_nth(&out_fields, i);
312                 int need_quotes = 0;
313                 if (fmt->quote >= 0) {
314                         need_quotes = fmt->always_quote;
315                         for (int j=0; !need_quotes && j < f->len; j++) {
316                                 int c = line[f->start_pos + j];
317                                 if (c == fmt->fs || c == fmt->quote)
318                                         need_quotes = 1;
319                         }
320                 }
321                 if (i)
322                         putchar_unlocked(fmt->fs);
323                 if (need_quotes)
324                         putchar_unlocked(fmt->quote);
325                 for (int j=0; j < f->len; j++) {
326                         int c = line[f->start_pos + j];
327                         if (c == fmt->fs && !need_quotes)
328                                 warn(fmt, "Field separator found inside field and quoting is turned off.");
329                         if (c == fmt->quote)
330                                 putchar_unlocked(c);
331                         putchar_unlocked(c);
332                 }
333                 if (need_quotes)
334                         putchar_unlocked(fmt->quote);
335         }
336         putchar_unlocked('\n');
337 }
338
339 /*** White-space back-end ***/
340
341 static int ws_read(struct format *fmt)
342 {
343         if (!next_line())
344                 return 0;
345
346         unsigned char *line = line_first(&in_line);
347         int n = line_count(&in_line);
348         if (!n)
349                 return 1;
350
351         int ws = 0;
352         new_field(0);
353         for (int i=0; i<n; i++) {
354                 int c = line[i];
355                 if (is_ws(c)) {
356                         ws++;
357                 } else {
358                         if (ws) {
359                                 if (!in_field->start_pos &&
360                                     !in_field->len &&
361                                     !fmt->strict_ws)
362                                         in_field->start_pos = i;
363                                 else
364                                         new_field(i);
365                                 ws = 0;
366                         }
367                         in_field->len++;
368                 }
369         }
370
371         if (ws && fmt->strict_ws)
372                 new_field(n);
373         return 1;
374 }
375
376 /*** Regex back-end ***/
377
378 static const char *regex_set(struct format *f, char *rx)
379 {
380         const char *err;
381         int errpos;
382         f->pcre = pcre_compile(rx, PCRE_DOLLAR_ENDONLY, &err, &errpos, NULL);
383         if (!f->pcre)
384                 return err;
385
386         f->pcre_extra = pcre_study(f->pcre, 0, &err);
387         if (!f->pcre_extra)
388                 return err;
389
390         return NULL;
391 }
392
393 static int regex_read(struct format *fmt)
394 {
395         if (!next_line())
396                 return 0;
397
398         unsigned char *c = line_first(&in_line);
399         int n = line_count(&in_line);
400         if (!n)
401                 return 1;
402
403         int i = 0;
404         for (;;) {
405                 int ovec[3];
406                 int sep = pcre_exec(fmt->pcre, fmt->pcre_extra, (char *) c, n, i, 0, ovec, 3);
407                 if (sep < 0) {
408                         if (sep != PCRE_ERROR_NOMATCH)
409                                 warn(fmt, "PCRE matching error %d", sep);
410                         // No further occurrence of the separator: the rest is a single field
411                         new_field(i);
412                         in_field->len = n - i;
413                         return 1;
414                 }
415                 new_field(i);
416                 in_field->len = ovec[0] - i;
417                 i = ovec[1];
418         }
419 }
420
421 /*** Table back-end ***/
422
423 static void table_write(struct format *fmt)
424 {
425         for (int i = 0; i < intarray_count(&column_widths); i++) {
426                 if (fmt->table_grid) {
427                         putchar_unlocked('|');
428                         printf("%*s", fmt->table_sep / 2, "");
429                 } else if (i)
430                         printf("%*s", fmt->table_sep, "");
431
432                 int cw = *intarray_nth(&column_widths, i);
433                 int fw = 0;
434                 if (i < fields_count(&out_fields)) {
435                         int len;
436                         unsigned char *p = get_field(&out_fields, i, &len);
437                         fw = field_chars(fields_nth(&out_fields, i));
438                         if (fw > cw) {
439                                 warn(fmt, "Internal error: Wrongly calculated column width (%d > %d)", fw, cw);
440                                 cw = fw;
441                         }
442                         while (len--)
443                                 putchar(*p++);
444                 }
445                 while (fw < cw) {
446                         putchar_unlocked(' ');
447                         fw++;
448                 }
449
450                 if (fmt->table_grid)
451                         printf("%*s", fmt->table_sep - fmt->table_sep / 2, "");
452         }
453
454         if (fmt->table_grid)
455                 putchar_unlocked('|');
456         putchar_unlocked('\n');
457 }
458
459 static void table_write_grid(struct format *fmt, int pos UNUSED)
460 {
461         if (!fmt->table_grid)
462                 return;
463
464         for (int i = 0; i < intarray_count(&column_widths); i++) {
465                 putchar_unlocked('+');
466                 int w = fmt->table_sep + *intarray_nth(&column_widths, i);      // FIXME: Avoid the *
467                 while (w--)
468                         putchar('-');
469         }
470         putchar_unlocked('+');
471         putchar_unlocked('\n');
472 }
473
474 /*** Temporary file back-end ***/
475
476 static int tmp_read(struct format *fmt)
477 {
478         FILE *tf = fmt->tmp_file;
479
480         for (;;) {
481                 int c = getc_unlocked(tf);
482                 if (c < 0)
483                         return 0;
484                 if (c == 0xff)
485                         return 1;
486                 if (c == 0xfe) {
487                         c = getc_unlocked(tf);
488                         c = (c << 8) | getc_unlocked(tf);
489                         c = (c << 8) | getc_unlocked(tf);
490                         c = (c << 8) | getc_unlocked(tf);
491                 }
492                 new_field(line_count(&in_line));
493                 in_field->len = c;
494                 while (c--) {
495                         int x = getc_unlocked(tf);
496                         if (x < 0) {
497                                 warn(fmt, "Truncated temporary file");
498                                 return 0;
499                         }
500                         *line_push(&in_line) = x;
501                 }
502         }
503
504         if (ferror_unlocked(tf))
505                 die("I/O error when reading temporary file");
506 }
507
508 static void tmp_write(struct format *fmt)
509 {
510         FILE *tf = fmt->tmp_file;
511
512         for (int i = 0; i < fields_count(&out_fields); i++) {
513                 struct field *f = fields_nth(&out_fields, i);
514                 if (f->len < 0xfe)
515                         putc_unlocked(f->len, tf);
516                 else {
517                         putc_unlocked(0xfe, tf);
518                         putc_unlocked((f->len >> 24) & 0xff, tf);
519                         putc_unlocked((f->len >> 16) & 0xff, tf);
520                         putc_unlocked((f->len >> 8) & 0xff, tf);
521                         putc_unlocked(f->len & 0xff, tf);
522                 }
523
524                 unsigned char *p = line_nth(&in_line, f->start_pos);
525                 for (int j = 0; j < f->len; j++)
526                         putc_unlocked(*p++, tf);
527         }
528         putc_unlocked(0xff, tf);
529
530         if (ferror_unlocked(tf))
531                 die("I/O error when writing temporary file");
532 }
533
534 /*** Transforms ***/
535
536 static void trim_fields(void)
537 {
538         unsigned char *line = line_first(&in_line);
539         for (int i = 0; i < fields_count(&in_fields); i++) {
540                 struct field *f = fields_nth(&in_fields, i);
541                 while (f->len && is_ws(line[f->start_pos]))
542                         f->start_pos++, f->len--;
543                 while (f->len && is_ws(line[f->start_pos + f->len - 1]))
544                         f->len--;
545         }
546 }
547
548 /*** Field names and headers ***/
549
550 struct field_names {
551         stringarray_t names;
552 };
553
554 static void add_field(struct field_names *fn, char *name, int namelen)
555 {
556         char *n = xmalloc(namelen + 1);
557         memcpy(n, name, namelen);
558         n[namelen] = 0;
559         *stringarray_push(&fn->names) = n;
560 }
561
562 static void add_field_names(struct field_names *fn, char *names)
563 {
564         char *p = names;
565         while (p) {
566                 char *q = strchr(p, ',');
567                 int len = q ? q-p : (int) strlen(p);
568                 add_field(fn, p, len);
569                 p = q ? q+1 : NULL;
570         }
571 }
572
573 static void read_header(void)
574 {
575         if (!(in_format->has_header || in_format->set_field_names))
576                 return;
577
578         struct field_names *fn = xmalloc_zero(sizeof(*fn));
579         in_format->field_names = fn;
580
581         if (in_format->has_header) {
582                 if (!read_line())
583                         die("Missing input header");
584         }
585
586         if (in_format->set_field_names) {
587                 add_field_names(fn, in_format->set_field_names);
588         } else {
589                 for (int i = 0; i < fields_count(&in_fields); i++) {
590                         int len;
591                         char *s = (char *) get_field(&in_fields, i, &len);
592                         add_field(fn, s, len);
593                 }
594         }
595 }
596
597 static void write_header(void)
598 {
599         if (!out_format->has_header) {
600                 write_grid(-1);
601                 return;
602         }
603
604         if (out_format->set_field_names) {
605                 struct field_names *fn = xmalloc_zero(sizeof(*fn));
606                 out_format->field_names = fn;
607                 add_field_names(fn, out_format->set_field_names);
608         } else if (in_format->field_names)
609                 out_format->field_names = in_format->field_names;
610         else
611                 die("Output header requested, but no field names specified");
612
613         line_reset(&in_line);
614         fields_reset(&out_fields);
615         struct field_names *fn = out_format->field_names;
616         for (int i = 0; i < stringarray_count(&fn->names); i++) {
617                 struct field *f = fields_push(&out_fields);
618                 f->start_pos = line_count(&in_line);
619                 f->len = 0;
620                 char *s = *stringarray_nth(&fn->names, i);
621                 while (*s) {
622                         *line_push(&in_line) = *s++;
623                         f->len++;
624                 }
625         }
626
627         // This is tricky: when we are formatting a table, field names are normally
628         // calculated in pass 1, but the header is written in pass 2, so we have to
629         // update column statistics, because field name can be too wide to fit.
630         update_stats();
631         write_grid(-1);
632         write_line();
633         write_grid(0);
634 }
635
636 static void write_footer(void)
637 {
638         write_grid(1);
639 }
640
641 static int find_field_by_name(struct field_names *fn, char *name)
642 {
643         for (int i = 0; i < stringarray_count(&fn->names); i++)
644                 if (!strcmp(*stringarray_nth(&fn->names, i), name))
645                         return i + 1;
646         return -1;
647 }
648
649 /*** Field selection ***/
650
651 struct selector {
652         int first_field, last_field;            // 0 means "boundary"
653 };
654
655 DECLARE_BUF(selectors, struct selector);
656 static selectors_t selectors;
657
658 static int parse_field_num(char *str)
659 {
660         int f = 0;
661
662         while (*str) {
663                 if (*str < '0' || *str > '9')
664                         return -1;
665                 if (f >= 100000000)
666                         return -1;
667                 f = 10*f + *str - '0';
668                 str++;
669         }
670         return f;
671 }
672
673 static int parse_field(char *str)
674 {
675         if (!*str)
676                 return 0;
677
678         int f = parse_field_num(str);
679         if (f > 0)
680                 return f;
681
682         if (in_format->field_names && (f = find_field_by_name(in_format->field_names, str)) > 0)
683                 return f;
684
685         die("Unknown field %s", str);
686 }
687
688 static char *parse_selector(char *str)
689 {
690         char buf[strlen(str) + 1];
691         strcpy(buf, str);
692
693         struct selector *s = selectors_push(&selectors);
694         char *sep = strchr(buf, '-');
695         if (sep) {
696                 *sep++ = 0;
697                 s->first_field = parse_field(buf);
698                 s->last_field = parse_field(sep);
699         } else
700                 s->first_field = s->last_field = parse_field(buf);
701
702         return NULL;
703 }
704
705 static void finish_parse_selectors(void)
706 {
707         if (!selectors_count(&selectors))
708                 parse_selector("-");
709 }
710
711 static void select_fields(void)
712 {
713         for (int i = 0; i < selectors_count(&selectors); i++) {
714                 struct selector *s = selectors_nth(&selectors, i);
715                 int first = s->first_field;
716                 if (first <= 0)
717                         first = 1;
718                 int last = s->last_field;
719                 if (last <= 0)
720                         last = fields_count(&in_fields);
721                 for (int j = first; j <= last; j++) {
722                         struct field *f = fields_push(&out_fields);
723                         if (j >= 1 && j <= fields_count(&in_fields))
724                                 *f = *fields_nth(&in_fields, j-1);
725                         else
726                                 f->start_pos = f->len = 0;
727                 }
728         }
729 }
730
731 static void select_all_fields(void)
732 {
733         for (int i = 0; i < fields_count(&in_fields); i++)
734                 *fields_push(&out_fields) = *fields_nth(&in_fields, i);
735 }
736
737 /*** Processing of files ***/
738
739 static void one_pass(int pass)
740 {
741         if (pass & 2)
742                 write_header();
743
744         for (;;) {
745                 line_number++;
746                 if (!read_line())
747                         break;
748
749                 if (want_trim && (pass & 1))
750                         trim_fields();
751
752                 fields_reset(&out_fields);
753                 if (pass & 1)
754                         select_fields();
755                 else
756                         select_all_fields();
757
758                 if (out_format->needs_stats)
759                         update_stats();
760
761                 write_line();
762         }
763
764         if (pass & 2)
765                 write_footer();
766 }
767
768 static void two_pass(void)
769 {
770         struct format *final_format = out_format;
771
772         // We need to use character set info from the current locale
773         setlocale(LC_CTYPE, "");
774
775         // Pass 1: Set up writer of intermediate format
776         out_format = xmalloc_zero(sizeof(*out_format));
777         out_format->id = FORM_TMP;
778         out_format->read_line = tmp_read;
779         out_format->write_line = tmp_write;
780         out_format->tmp_file = tmpfile();
781         out_format->needs_stats = final_format->needs_stats;
782         out_format->field_names = in_format->field_names;
783         one_pass(1);
784
785         // Pass 2: Set up reader of intermediate format
786         in_format = out_format;
787         rewind(in_format->tmp_file);
788         line_number = 0;
789         out_format = final_format;
790         out_format->needs_stats = 0;
791         one_pass(2);
792         fclose(in_format->tmp_file);
793 }
794
795 /*** Parsing of arguments ***/
796
797 static void NONRET usage(void)
798 {
799         printf("\
800 Usage: xsv <in-format> [<out-format>] <options> [<fields>]\n\
801 \n\
802 Formats:\n\
803 -t, --tsv               TAB-separated values (default)\n\
804 -c, --csv               Comma-separated values\n\
805 -w, --ws                Values separated by arbitrary whitespace\n\
806 -W, --strict-ws         Like --ws, but recognize empty columns at start/end\n\
807 -r, --regex=<rx>        Separator given by Perl regular expression (input only)\n\
808     --table             Format a table (output only)\n\
809 \n\
810 Format parameters:\n\
811 -d, --fs=<char>         Delimiter of fields\n\
812 -f, --fields=<f>,...    Set field names\n\
813 -h, --header            The first line contains field names\n\
814 -q, --quiet             Do not show warnings\n\
815     --always-quote      Put quotes around all fields (CSV output only)\n\
816     --table-sep=<n>     Separate table columns by <n> spaces (default: 2)\n\
817     --grid              Separate table columns by grid lines\n\
818 \n\
819 Other options:\n\
820     --trim              Trim leading and trailing whitespaces in fields\n\
821 ");
822         exit(0);
823 }
824
825 static void NONRET bad_args(const char *msg, ...)
826 {
827         if (msg) {
828                 va_list args;
829                 va_start(args, msg);
830                 fprintf(stderr, "xsv: ");
831                 vfprintf(stderr, msg, args);
832                 fputc('\n', stderr);
833                 va_end(args);
834         }
835         fprintf(stderr, "Try `xsv --help' for more information.\n");
836         exit(1);
837 }
838
839 static const char short_options[] = "cd:f:hqr:twW";
840
841 enum long_options {
842         OPT_HELP = 256,
843         OPT_TRIM,
844         OPT_ALWAYS_QUOTE,
845         OPT_TABLE,
846         OPT_TABLE_SEP,
847         OPT_GRID,
848 };
849
850 static const struct option long_options[] = {
851         { "always-quote",       0,      NULL,   OPT_ALWAYS_QUOTE },
852         { "csv",                0,      NULL,   'c' },
853         { "fields",             1,      NULL,   'f' },
854         { "fs",                 1,      NULL,   'd' },
855         { "grid",               0,      NULL,   OPT_GRID },
856         { "header",             0,      NULL,   'h' },
857         { "quiet",              0,      NULL,   'q' },
858         { "regex",              1,      NULL,   'r' },
859         { "strict-ws",          0,      NULL,   'W' },
860         { "table",              0,      NULL,   OPT_TABLE },
861         { "table-sep",          1,      NULL,   OPT_TABLE_SEP },
862         { "trim",               0,      NULL,   OPT_TRIM },
863         { "tsv",                0,      NULL,   't' },
864         { "ws",                 0,      NULL,   'w' },
865         { "help",               0,      NULL,   OPT_HELP },
866         { NULL,                 0,      NULL,   0 },
867 };
868
869 static void set_format(int format_id)
870 {
871         struct format *f = xmalloc_zero(sizeof(*f));
872         f->id = format_id;
873
874         switch (format_id) {
875                 case FORM_TSV:
876                         f->fs = '\t';
877                         f->quote = -1;
878                         f->read_line = csv_read;
879                         f->write_line = csv_write;
880                         break;
881                 case FORM_CSV:
882                         f->fs = ',';
883                         f->quote = '"';
884                         f->read_line = csv_read;
885                         f->write_line = csv_write;
886                         break;
887                 case FORM_WS:
888                         f->fs = ' ';
889                         f->quote = -1;
890                         f->read_line = ws_read;
891                         f->write_line = csv_write;
892                         break;
893                 case FORM_REGEX:
894                         f->read_line = regex_read;
895                         break;
896                 case FORM_TABLE:
897                         f->write_line = table_write;
898                         f->write_grid = table_write_grid;
899                         f->needs_stats = 1;
900                         f->table_sep = 2;
901                         break;
902         }
903
904         if (!in_format)
905                 in_format = f;
906         else if (!out_format)
907                 out_format = f;
908         else
909                 bad_args("At most two formats may be given.");
910 }
911
912 static struct format *current_format(void)
913 {
914         if (out_format)
915                 return out_format;
916         if (in_format)
917                 return in_format;
918         set_format(FORM_TSV);
919         return in_format;
920 }
921
922 int main(int argc, char **argv)
923 {
924         int opt;
925         const char *err;
926
927         while ((opt = getopt_long(argc, argv, short_options, long_options, NULL)) >= 0)
928                 switch (opt) {
929                         case 'c':
930                                 set_format(FORM_CSV);
931                                 break;
932                         case 'd':
933                                 if (optarg[0])
934                                         current_format()->fs = optarg[0];
935                                 else
936                                         bad_args("No field delimiter given.");
937                                 break;
938                         case 'f':
939                                 current_format()->set_field_names = optarg;
940                                 break;
941                         case 'h':
942                                 current_format()->has_header = 1;
943                                 break;
944                         case 'q':
945                                 current_format()->quiet = 1;
946                                 break;
947                         case 'r':
948                                 set_format(FORM_REGEX);
949                                 err = regex_set(current_format(), optarg);
950                                 if (err)
951                                         bad_args("Error compiling regex: %s", err);
952                                 break;
953                         case 't':
954                                 set_format(FORM_TSV);
955                                 break;
956                         case 'w':
957                                 set_format(FORM_WS);
958                                 break;
959                         case 'W':
960                                 set_format(FORM_WS);
961                                 current_format()->strict_ws = 1;
962                                 break;
963                         case OPT_ALWAYS_QUOTE:
964                                 if (current_format()->id != FORM_CSV)
965                                         bad_args("--always-quote makes sense only for CSV.");
966                                 current_format()->always_quote = 1;
967                                 break;
968                         case OPT_HELP:
969                                 usage();
970                         case OPT_TRIM:
971                                 want_trim = 1;
972                                 break;
973                         case OPT_TABLE:
974                                 set_format(FORM_TABLE);
975                                 break;
976                         case OPT_TABLE_SEP:
977                                 current_format()->table_sep = atoi(optarg);
978                                 break;
979                         case OPT_GRID:
980                                 current_format()->table_grid = 1;
981                                 break;
982                         default:
983                                 bad_args(NULL);
984                 }
985
986         current_format();
987         if (!out_format)
988                 out_format = in_format;
989         if (!in_format->read_line)
990                 bad_args("Write-only format selected for input.");
991         if (!out_format->write_line)
992                 bad_args("Read-only format selected for output.");
993         read_header();
994
995         for (int i = optind; i < argc; i++) {
996                 err = parse_selector(argv[i]);
997                 if (err)
998                         bad_args(err);
999         }
1000         finish_parse_selectors();
1001
1002         if (out_format->needs_stats)
1003                 two_pass();
1004         else
1005                 one_pass(3);
1006         return 0;
1007 }