]> mj.ucw.cz Git - xsv.git/blob - xsv.c
Added support for headers and names fields
[xsv.git] / xsv.c
1 /*
2  *      A Swiss-Army Knife for CSV-like Files
3  *
4  *      (c) 2012 Martin Mares <mj@ucw.cz>
5  */
6
7 #define _GNU_SOURCE
8
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <stdarg.h>
13 #include <getopt.h>
14 #include <wchar.h>
15 #include <locale.h>
16
17 #include <pcre.h>
18
19 #ifdef __GNUC__
20 #define NONRET __attribute__((noreturn))
21 #else
22 #define NONRET
23 #endif
24
25 /*** General functions ***/
26
27 static void NONRET die(char *msg, ...)
28 {
29         va_list args;
30         va_start(args, msg);
31         fprintf(stderr, "xsv: ");
32         vfprintf(stderr, msg, args);
33         fputc('\n', stderr);
34         va_end(args);
35         exit(1);
36 }
37
38 /*** Memory allocation ***/
39
40 static void *xmalloc(size_t bytes)
41 {
42         void *p = malloc(bytes);
43         if (!p)
44                 die("Out of memory (cannot allocate %zu bytes)", bytes);
45         return p;
46 }
47
48 static void *xmalloc_zero(size_t bytes)
49 {
50         void *p = xmalloc(bytes);
51         memset(p, 0, bytes);
52         return p;
53 }
54
55 static void *xrealloc(void *old, size_t bytes)
56 {
57         void *p = realloc(old, bytes);
58         if (!p)
59                 die("Out of memory (cannot allocate %zu bytes)", bytes);
60         return p;
61 }
62
63 #define DECLARE_BUF(name, type) \
64         typedef struct { type *start; int count; int max; } name##_t;                           \
65         static inline void name##_init(name##_t *b) { b->start = NULL; b->count = b->max = 0; } \
66         static inline void name##_reset(name##_t *b) { b->count = 0; }                          \
67         static inline int name##_count(name##_t *b) { return b->count; }                        \
68         static void name##_extend(name##_t *b) {                                                \
69                 b->max = b->max ? 2*b->max : 16;                                                \
70                 b->start = xrealloc(b->start, b->max * sizeof(type));                           \
71         }                                                                                       \
72         static inline type *name##_push(name##_t *b) {                                          \
73                 if (b->count >= b->max) name##_extend(b);                                       \
74                 return &b->start[b->count++];                                                   \
75         }                                                                                       \
76         static inline type *name##_first(name##_t *b) { return b->start; }                      \
77         static inline type *name##_nth(name##_t *b, int n) { return &b->start[n]; }             \
78         // end
79
80 DECLARE_BUF(intarray, int);
81 DECLARE_BUF(stringarray, char *);
82
83 /*** Formats and their parameters ***/
84
85 enum format_id {
86         FORM_UNSPEC,
87         FORM_TSV,
88         FORM_CSV,
89         FORM_WS,
90         FORM_REGEX,
91         FORM_TMP,
92         FORM_TABLE,
93 };
94
95 struct format {
96         enum format_id id;
97         int fs;
98         int quote;
99         int quiet;
100         int (*read_line)(struct format *fmt);
101         void (*write_line)(struct format *fmt);
102         int needs_stats;
103
104         // Field names
105         int has_header;
106         char *set_field_names;
107         struct field_names *field_names;
108
109         // CSV backend:
110         int always_quote;
111
112         // WS backend:
113         int strict_ws;
114
115         // regex backend:
116         pcre *pcre;
117         pcre_extra *pcre_extra;
118
119         // Temporary file backend:
120         FILE *tmp_file;
121
122         // Table backend:
123         int table_sep;
124 };
125
126 static struct format *in_format, *out_format;
127 static int want_trim;
128
129 struct field {
130         int start_pos;
131         int len;
132 };
133
134 DECLARE_BUF(fields, struct field);
135 DECLARE_BUF(line, unsigned char);
136
137 static fields_t in_fields, out_fields;
138 static struct field *in_field;
139 static line_t in_line;
140 static int line_number;
141
142 static int read_line(void)
143 {
144         fields_reset(&in_fields);
145         line_reset(&in_line);
146         in_field = NULL;
147         if (!in_format->read_line(in_format))
148                 return 0;
149         if (ferror_unlocked(stdin))
150                 die("I/O error when reading standard input");
151         return 1;
152 }
153
154 static void write_line(void)
155 {
156         out_format->write_line(out_format);
157         if (ferror_unlocked(stdout))
158                 die("I/O error when writing standard input");
159 }
160
161 static void new_field(int pos)
162 {
163         in_field = fields_push(&in_fields);
164         in_field->start_pos = pos;
165         in_field->len = 0;
166 }
167
168 static void ensure_field(int pos)
169 {
170         if (!in_field)
171                 new_field(pos);
172 }
173
174 // FIXME: Use elsewhere
175 static unsigned char *get_field(fields_t *fields, int i, int *len)
176 {
177         struct field *f = fields_nth(fields, i);
178         *len = f->len;
179         return line_nth(&in_line, f->start_pos);
180 }
181
182 static void warn(struct format *fmt, char *msg, ...)
183 {
184         if (!fmt->quiet) {
185                 fprintf(stderr, "Warning at line %d: ", line_number);
186                 va_list args;
187                 va_start(args, msg);
188                 vfprintf(stderr, msg, args);
189                 va_end(args);
190                 fputc('\n', stderr);
191         }
192 }
193
194 static int next_line(void)
195 {
196         for (;;) {
197                 int c = getchar_unlocked();
198                 if (c == '\r')
199                         continue;
200                 if (c < 0)
201                         return !!line_count(&in_line);
202                 if (c == '\n')
203                         return 1;
204                 *line_push(&in_line) = c;
205         }
206 }
207
208 static int field_chars(struct field *f)
209 {
210         unsigned char *s = line_nth(&in_line, f->start_pos);
211         int i = 0;
212         mbstate_t mbs;
213         memset(&mbs, 0, sizeof(mbs));
214
215         int chars = 0;
216         while (i < f->len) {
217                 size_t k = mbrlen((char *) s + i, f->len - i, &mbs);
218                 if ((int) k <= 0)
219                         break;
220                 i += k;
221                 chars++;
222         }
223
224         return chars;
225 }
226
227 /*** Field statistics ***/
228
229 static intarray_t column_widths;
230
231 static void update_stats(void)
232 {
233         for (int i = 0; i < fields_count(&out_fields); i++) {
234                 struct field *f = fields_nth(&out_fields, i);
235                 intarray_t *w = &column_widths;
236
237                 while (i >= intarray_count(w))
238                         *intarray_push(w) = 0;
239                 int fw = field_chars(f);
240                 if (*intarray_nth(w, i) < fw)
241                         *intarray_nth(w, i) = fw;
242         }
243 }
244
245 /*** CSV/TSV back-end */
246
247 static int csv_read(struct format *fmt)
248 {
249         int quoted = 0;
250         for (;;) {
251                 int c = getchar_unlocked();
252                 int i = line_count(&in_line);
253 restart:
254                 if (c == '\r')
255                         continue;
256                 if (c < 0 || c == '\n') {
257                         if (quoted)
258                                 warn(fmt, "Missing closing quote.");
259                         if (c < 0)
260                                 return !!fields_count(&in_fields);
261                         else
262                                 return 1;
263                 }
264                 if (quoted) {
265                         if (c == fmt->quote) {
266                                 c = getchar_unlocked();
267                                 if (c != fmt->quote) {
268                                         quoted = 0;
269                                         goto restart;
270                                 }
271                                 // Two quotes assimilate to one
272                         }
273                         // Fall through to pushing the character
274                 } else if (c == fmt->quote) {
275                         quoted = 1;
276                         continue;
277                 } else if (c == fmt->fs && !quoted) {
278                         ensure_field(i);
279                         new_field(i);
280                         continue;
281                 }
282                 ensure_field(i);
283                 *line_push(&in_line) = c;
284                 in_field->len++;
285         }
286 }
287
288 static int is_ws(int c)
289 {
290         return (c == ' ' || c == '\t' || c == '\f');
291 }
292
293 static void csv_write(struct format *fmt)
294 {
295         unsigned char *line = line_first(&in_line);
296         int n = fields_count(&out_fields);
297         for (int i=0; i<n; i++) {
298                 struct field *f = fields_nth(&out_fields, i);
299                 int need_quotes = 0;
300                 if (fmt->quote >= 0) {
301                         need_quotes = fmt->always_quote;
302                         for (int j=0; !need_quotes && j < f->len; j++) {
303                                 int c = line[f->start_pos + j];
304                                 if (c == fmt->fs || c == fmt->quote)
305                                         need_quotes = 1;
306                         }
307                 }
308                 if (i)
309                         putchar_unlocked(fmt->fs);
310                 if (need_quotes)
311                         putchar_unlocked(fmt->quote);
312                 for (int j=0; j < f->len; j++) {
313                         int c = line[f->start_pos + j];
314                         if (c == fmt->fs && !need_quotes)
315                                 warn(fmt, "Field separator found inside field and quoting is turned off.");
316                         if (c == fmt->quote)
317                                 putchar_unlocked(c);
318                         putchar_unlocked(c);
319                 }
320                 if (need_quotes)
321                         putchar_unlocked(fmt->quote);
322         }
323         putchar_unlocked('\n');
324 }
325
326 /*** White-space back-end ***/
327
328 static int ws_read(struct format *fmt)
329 {
330         if (!next_line())
331                 return 0;
332
333         unsigned char *line = line_first(&in_line);
334         int n = line_count(&in_line);
335         if (!n)
336                 return 1;
337
338         int ws = 0;
339         new_field(0);
340         for (int i=0; i<n; i++) {
341                 int c = line[i];
342                 if (is_ws(c)) {
343                         ws++;
344                 } else {
345                         if (ws) {
346                                 if (!in_field->start_pos &&
347                                     !in_field->len &&
348                                     !fmt->strict_ws)
349                                         in_field->start_pos = i;
350                                 else
351                                         new_field(i);
352                                 ws = 0;
353                         }
354                         in_field->len++;
355                 }
356         }
357
358         if (ws && fmt->strict_ws)
359                 new_field(n);
360         return 1;
361 }
362
363 /*** Regex back-end ***/
364
365 static const char *regex_set(struct format *f, char *rx)
366 {
367         const char *err;
368         int errpos;
369         f->pcre = pcre_compile(rx, PCRE_DOLLAR_ENDONLY, &err, &errpos, NULL);
370         if (!f->pcre)
371                 return err;
372
373         f->pcre_extra = pcre_study(f->pcre, 0, &err);
374         if (!f->pcre_extra)
375                 return err;
376
377         return NULL;
378 }
379
380 static int regex_read(struct format *fmt)
381 {
382         if (!next_line())
383                 return 0;
384
385         unsigned char *c = line_first(&in_line);
386         int n = line_count(&in_line);
387         if (!n)
388                 return 1;
389
390         int i = 0;
391         for (;;) {
392                 int ovec[3];
393                 int sep = pcre_exec(fmt->pcre, fmt->pcre_extra, (char *) c, n, i, 0, ovec, 3);
394                 if (sep < 0) {
395                         if (sep != PCRE_ERROR_NOMATCH)
396                                 warn(fmt, "PCRE matching error %d", sep);
397                         // No further occurrence of the separator: the rest is a single field
398                         new_field(i);
399                         in_field->len = n - i;
400                         return 1;
401                 }
402                 new_field(i);
403                 in_field->len = ovec[0] - i;
404                 i = ovec[1];
405         }
406 }
407
408 /*** Table back-end ***/
409
410 static void table_write(struct format *fmt)
411 {
412         for (int i = 0; i < fields_count(&out_fields); i++) {
413                 if (i)
414                         printf("%*s", fmt->table_sep, "");
415                 struct field *f = fields_nth(&out_fields, i);
416                 int fw = field_chars(f);
417                 int cw = *intarray_nth(&column_widths, i);
418                 if (fw > cw) {
419                         warn(fmt, "Internal error: Wrongly calculated column width (%d > %d)", fw, cw);
420                         cw = fw;
421                 }
422                 unsigned char *p = line_nth(&in_line, f->start_pos);
423                 for (int j = 0; j < f->len; j++)
424                         putchar_unlocked(p[j]);
425                 while (fw < cw) {
426                         putchar_unlocked(' ');
427                         fw++;
428                 }
429         }
430         putchar_unlocked('\n');
431 }
432
433 /*** Temporary file back-end ***/
434
435 static int tmp_read(struct format *fmt)
436 {
437         FILE *tf = fmt->tmp_file;
438
439         for (;;) {
440                 int c = getc_unlocked(tf);
441                 if (c < 0)
442                         return 0;
443                 if (c == 0xff)
444                         return 1;
445                 if (c == 0xfe) {
446                         c = getc_unlocked(tf);
447                         c = (c << 8) | getc_unlocked(tf);
448                         c = (c << 8) | getc_unlocked(tf);
449                         c = (c << 8) | getc_unlocked(tf);
450                 }
451                 new_field(line_count(&in_line));
452                 in_field->len = c;
453                 while (c--) {
454                         int x = getc_unlocked(tf);
455                         if (x < 0) {
456                                 warn(fmt, "Truncated temporary file");
457                                 return 0;
458                         }
459                         *line_push(&in_line) = x;
460                 }
461         }
462
463         if (ferror_unlocked(tf))
464                 die("I/O error when reading temporary file");
465 }
466
467 static void tmp_write(struct format *fmt)
468 {
469         FILE *tf = fmt->tmp_file;
470
471         for (int i = 0; i < fields_count(&out_fields); i++) {
472                 struct field *f = fields_nth(&out_fields, i);
473                 if (f->len < 0xfe)
474                         putc_unlocked(f->len, tf);
475                 else {
476                         putc_unlocked(0xfe, tf);
477                         putc_unlocked((f->len >> 24) & 0xff, tf);
478                         putc_unlocked((f->len >> 16) & 0xff, tf);
479                         putc_unlocked((f->len >> 8) & 0xff, tf);
480                         putc_unlocked(f->len & 0xff, tf);
481                 }
482
483                 unsigned char *p = line_nth(&in_line, f->start_pos);
484                 for (int j = 0; j < f->len; j++)
485                         putc_unlocked(*p++, tf);
486         }
487         putc_unlocked(0xff, tf);
488
489         if (ferror_unlocked(tf))
490                 die("I/O error when writing temporary file");
491 }
492
493 /*** Transforms ***/
494
495 static void trim_fields(void)
496 {
497         unsigned char *line = line_first(&in_line);
498         for (int i = 0; i < fields_count(&in_fields); i++) {
499                 struct field *f = fields_nth(&in_fields, i);
500                 while (f->len && is_ws(line[f->start_pos]))
501                         f->start_pos++, f->len--;
502                 while (f->len && is_ws(line[f->start_pos + f->len - 1]))
503                         f->len--;
504         }
505 }
506
507 /*** Field names and headers ***/
508
509 struct field_names {
510         stringarray_t names;
511 };
512
513 static void add_field(struct field_names *fn, char *name, int namelen)
514 {
515         char *n = xmalloc(namelen + 1);
516         memcpy(n, name, namelen);
517         n[namelen] = 0;
518         *stringarray_push(&fn->names) = n;
519 }
520
521 static void add_field_names(struct field_names *fn, char *names)
522 {
523         char *p = names;
524         while (p) {
525                 char *q = strchr(p, ',');
526                 int len = q ? q-p : (int) strlen(p);
527                 add_field(fn, p, len);
528                 p = q ? q+1 : NULL;
529         }
530 }
531
532 static void read_header(void)
533 {
534         if (!(in_format->has_header || in_format->set_field_names))
535                 return;
536
537         struct field_names *fn = xmalloc_zero(sizeof(*fn));
538         in_format->field_names = fn;
539
540         if (in_format->has_header) {
541                 if (!read_line())
542                         die("Missing input header");
543         }
544
545         if (in_format->set_field_names) {
546                 add_field_names(fn, in_format->set_field_names);
547         } else {
548                 for (int i = 0; i < fields_count(&in_fields); i++) {
549                         int len;
550                         char *s = (char *) get_field(&in_fields, i, &len);
551                         add_field(fn, s, len);
552                 }
553         }
554 }
555
556 static void write_header(void)
557 {
558         if (!out_format->has_header)
559                 return;
560
561         if (out_format->set_field_names) {
562                 struct field_names *fn = xmalloc_zero(sizeof(*fn));
563                 out_format->field_names = fn;
564                 add_field_names(fn, out_format->set_field_names);
565         } else if (in_format->field_names)
566                 out_format->field_names = in_format->field_names;
567         else
568                 die("Output header requested, but no field names specified");
569
570         line_reset(&in_line);
571         fields_reset(&out_fields);
572         struct field_names *fn = out_format->field_names;
573         for (int i = 0; i < stringarray_count(&fn->names); i++) {
574                 struct field *f = fields_push(&out_fields);
575                 f->start_pos = line_count(&in_line);
576                 f->len = 0;
577                 char *s = *stringarray_nth(&fn->names, i);
578                 while (*s) {
579                         *line_push(&in_line) = *s++;
580                         f->len++;
581                 }
582         }
583         write_line();
584 }
585
586 static int find_field_by_name(struct field_names *fn, char *name)
587 {
588         for (int i = 0; i < stringarray_count(&fn->names); i++)
589                 if (!strcmp(*stringarray_nth(&fn->names, i), name))
590                         return i + 1;
591         return -1;
592 }
593
594 /*** Field selection ***/
595
596 struct selector {
597         int first_field, last_field;            // 0 means "boundary"
598 };
599
600 DECLARE_BUF(selectors, struct selector);
601 static selectors_t selectors;
602
603 static int parse_field_num(char *str)
604 {
605         int f = 0;
606
607         while (*str) {
608                 if (*str < '0' || *str > '9')
609                         return -1;
610                 if (f >= 100000000)
611                         return -1;
612                 f = 10*f + *str - '0';
613                 str++;
614         }
615         return f;
616 }
617
618 static int parse_field(char *str)
619 {
620         if (!*str)
621                 return 0;
622
623         int f = parse_field_num(str);
624         if (f > 0)
625                 return f;
626
627         if (in_format->field_names && (f = find_field_by_name(in_format->field_names, str)) > 0)
628                 return f;
629
630         die("Unknown field %s", str);
631 }
632
633 static char *parse_selector(char *str)
634 {
635         char buf[strlen(str) + 1];
636         strcpy(buf, str);
637
638         struct selector *s = selectors_push(&selectors);
639         char *sep = strchr(buf, '-');
640         if (sep) {
641                 *sep++ = 0;
642                 s->first_field = parse_field(buf);
643                 s->last_field = parse_field(sep);
644         } else
645                 s->first_field = s->last_field = parse_field(buf);
646
647         return NULL;
648 }
649
650 static void finish_parse_selectors(void)
651 {
652         if (!selectors_count(&selectors))
653                 parse_selector("-");
654 }
655
656 static void select_fields(void)
657 {
658         for (int i = 0; i < selectors_count(&selectors); i++) {
659                 struct selector *s = selectors_nth(&selectors, i);
660                 int first = s->first_field;
661                 if (first <= 0)
662                         first = 1;
663                 int last = s->last_field;
664                 if (last <= 0)
665                         last = fields_count(&in_fields);
666                 for (int j = first; j <= last; j++) {
667                         struct field *f = fields_push(&out_fields);
668                         if (j >= 1 && j <= fields_count(&in_fields))
669                                 *f = *fields_nth(&in_fields, j-1);
670                         else
671                                 f->start_pos = f->len = 0;
672                 }
673         }
674 }
675
676 static void select_all_fields(void)
677 {
678         for (int i = 0; i < fields_count(&in_fields); i++)
679                 *fields_push(&out_fields) = *fields_nth(&in_fields, i);
680 }
681
682 /*** Processing of files ***/
683
684 static void one_pass(int pass)
685 {
686         for (;;) {
687                 line_number++;
688                 if (!read_line())
689                         break;
690
691                 if (want_trim && (pass & 1))
692                         trim_fields();
693
694                 fields_reset(&out_fields);
695                 if (pass & 1)
696                         select_fields();
697                 else
698                         select_all_fields();
699
700                 if (out_format->needs_stats)
701                         update_stats();
702
703                 write_line();
704         }
705 }
706
707 static void two_pass(void)
708 {
709         struct format *final_format = out_format;
710
711         // We need to use character set info from the current locale
712         setlocale(LC_CTYPE, "");
713
714         // Pass 1: Set up writer of intermediate format
715         out_format = xmalloc_zero(sizeof(*out_format));
716         out_format->id = FORM_TMP;
717         out_format->read_line = tmp_read;
718         out_format->write_line = tmp_write;
719         out_format->tmp_file = tmpfile();
720         out_format->needs_stats = final_format->needs_stats;
721         one_pass(1);
722
723         // Pass 2: Set up reader of intermediate format
724         in_format = out_format;
725         rewind(in_format->tmp_file);
726         line_number = 0;
727         out_format = final_format;
728         out_format->needs_stats = 0;
729         one_pass(2);
730         fclose(in_format->tmp_file);
731 }
732
733 /*** Parsing of arguments ***/
734
735 static void NONRET usage(void)
736 {
737         printf("\
738 Usage: xsv <in-format> [<out-format>] <options> [<fields>]\n\
739 \n\
740 Formats:\n\
741 -t, --tsv               TAB-separated values (default)\n\
742 -c, --csv               Comma-separated values\n\
743 -w, --ws                Values separated by arbitrary whitespace\n\
744 -W, --strict-ws         Like --ws, but recognize empty columns at start/end\n\
745 -r, --regex=<rx>        Separator given by Perl regular expression (input only)\n\
746     --table             Format a table (output only)\n\
747 \n\
748 Format parameters:\n\
749 -d, --fs=<char>         Delimiter of fields\n\
750 -f, --fields=<f>,...    Set field names\n\
751 -h, --header            The first line contains field names\n\
752 -q, --quiet             Do not show warnings\n\
753     --always-quote      Put quotes around all fields (CSV output only)\n\
754     --table-sep=<n>     Separate table columns by <n> spaces (default: 2)\n\
755 \n\
756 Other options:\n\
757     --trim              Trim leading and trailing whitespaces in fields\n\
758 ");
759         exit(0);
760 }
761
762 static void NONRET bad_args(const char *msg, ...)
763 {
764         if (msg) {
765                 va_list args;
766                 va_start(args, msg);
767                 fprintf(stderr, "xsv: ");
768                 vfprintf(stderr, msg, args);
769                 fputc('\n', stderr);
770                 va_end(args);
771         }
772         fprintf(stderr, "Try `xsv --help' for more information.\n");
773         exit(1);
774 }
775
776 static const char short_options[] = "cd:f:hqr:twW";
777
778 enum long_options {
779         OPT_HELP = 256,
780         OPT_TRIM,
781         OPT_ALWAYS_QUOTE,
782         OPT_TABLE,
783         OPT_TABLE_SEP,
784 };
785
786 static const struct option long_options[] = {
787         { "always-quote",       0,      NULL,   OPT_ALWAYS_QUOTE },
788         { "csv",                0,      NULL,   'c' },
789         { "fields",             1,      NULL,   'f' },
790         { "fs",                 1,      NULL,   'd' },
791         { "header",             0,      NULL,   'h' },
792         { "quiet",              0,      NULL,   'q' },
793         { "regex",              1,      NULL,   'r' },
794         { "strict-ws",          0,      NULL,   'W' },
795         { "table",              0,      NULL,   OPT_TABLE },
796         { "table-sep",          1,      NULL,   OPT_TABLE_SEP },
797         { "trim",               0,      NULL,   OPT_TRIM },
798         { "tsv",                0,      NULL,   't' },
799         { "ws",                 0,      NULL,   'w' },
800         { "help",               0,      NULL,   OPT_HELP },
801         { NULL,                 0,      NULL,   0 },
802 };
803
804 static void set_format(int format_id)
805 {
806         struct format *f = xmalloc_zero(sizeof(*f));
807         f->id = format_id;
808
809         switch (format_id) {
810                 case FORM_TSV:
811                         f->fs = '\t';
812                         f->quote = -1;
813                         f->read_line = csv_read;
814                         f->write_line = csv_write;
815                         break;
816                 case FORM_CSV:
817                         f->fs = ',';
818                         f->quote = '"';
819                         f->read_line = csv_read;
820                         f->write_line = csv_write;
821                         break;
822                 case FORM_WS:
823                         f->fs = ' ';
824                         f->quote = -1;
825                         f->read_line = ws_read;
826                         f->write_line = csv_write;
827                         break;
828                 case FORM_REGEX:
829                         f->read_line = regex_read;
830                         break;
831                 case FORM_TABLE:
832                         f->write_line = table_write;
833                         f->needs_stats = 1;
834                         f->table_sep = 2;
835                         break;
836         }
837
838         if (!in_format)
839                 in_format = f;
840         else if (!out_format)
841                 out_format = f;
842         else
843                 bad_args("At most two formats may be given.");
844 }
845
846 static struct format *current_format(void)
847 {
848         if (out_format)
849                 return out_format;
850         if (in_format)
851                 return in_format;
852         set_format(FORM_TSV);
853         return in_format;
854 }
855
856 int main(int argc, char **argv)
857 {
858         int opt;
859         const char *err;
860
861         while ((opt = getopt_long(argc, argv, short_options, long_options, NULL)) >= 0)
862                 switch (opt) {
863                         case 'c':
864                                 set_format(FORM_CSV);
865                                 break;
866                         case 'd':
867                                 if (optarg[0])
868                                         current_format()->fs = optarg[0];
869                                 else
870                                         bad_args("No field delimiter given.");
871                                 break;
872                         case 'f':
873                                 current_format()->set_field_names = optarg;
874                                 break;
875                         case 'h':
876                                 current_format()->has_header = 1;
877                                 break;
878                         case 'q':
879                                 current_format()->quiet = 1;
880                                 break;
881                         case 'r':
882                                 set_format(FORM_REGEX);
883                                 err = regex_set(current_format(), optarg);
884                                 if (err)
885                                         bad_args("Error compiling regex: %s", err);
886                                 break;
887                         case 't':
888                                 set_format(FORM_TSV);
889                                 break;
890                         case 'w':
891                                 set_format(FORM_WS);
892                                 break;
893                         case 'W':
894                                 set_format(FORM_WS);
895                                 current_format()->strict_ws = 1;
896                                 break;
897                         case OPT_ALWAYS_QUOTE:
898                                 if (current_format()->id != FORM_CSV)
899                                         bad_args("--always-quote makes sense only for CSV.");
900                                 current_format()->always_quote = 1;
901                                 break;
902                         case OPT_HELP:
903                                 usage();
904                         case OPT_TRIM:
905                                 want_trim = 1;
906                                 break;
907                         case OPT_TABLE:
908                                 set_format(FORM_TABLE);
909                                 break;
910                         case OPT_TABLE_SEP:
911                                 current_format()->table_sep = atoi(optarg);
912                                 break;
913                         default:
914                                 bad_args(NULL);
915                 }
916
917         current_format();
918         if (!out_format)
919                 out_format = in_format;
920         if (!in_format->read_line)
921                 bad_args("Write-only format selected for input.");
922         if (!out_format->write_line)
923                 bad_args("Read-only format selected for output.");
924         read_header();
925
926         for (int i = optind; i < argc; i++) {
927                 err = parse_selector(argv[i]);
928                 if (err)
929                         bad_args(err);
930         }
931         finish_parse_selectors();
932
933         write_header();
934         if (out_format->needs_stats)
935                 two_pass();
936         else
937                 one_pass(3);
938         return 0;
939 }