]> mj.ucw.cz Git - xsv.git/blob - xsv.c
8fd72779d23e5bd11ff80711e2453adc44e31853
[xsv.git] / xsv.c
1 /*
2  *      A Swiss-Army Knife for CSV-like Files
3  *
4  *      (c) 2012 Martin Mares <mj@ucw.cz>
5  */
6
7 #define _GNU_SOURCE
8
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <stdarg.h>
13 #include <getopt.h>
14 #include <wchar.h>
15 #include <locale.h>
16
17 #include <pcre.h>
18
19 #ifdef __GNUC__
20 #define NONRET __attribute__((noreturn))
21 #define UNUSED __attribute__((unused))
22 #else
23 #define NONRET
24 #define UNUSED
25 #endif
26
27 /*** General functions ***/
28
29 static void NONRET die(char *msg, ...)
30 {
31         va_list args;
32         va_start(args, msg);
33         fprintf(stderr, "xsv: ");
34         vfprintf(stderr, msg, args);
35         fputc('\n', stderr);
36         va_end(args);
37         exit(1);
38 }
39
40 /*** Memory allocation ***/
41
42 static void *xmalloc(size_t bytes)
43 {
44         void *p = malloc(bytes);
45         if (!p)
46                 die("Out of memory (cannot allocate %zu bytes)", bytes);
47         return p;
48 }
49
50 static void *xmalloc_zero(size_t bytes)
51 {
52         void *p = xmalloc(bytes);
53         memset(p, 0, bytes);
54         return p;
55 }
56
57 static void *xrealloc(void *old, size_t bytes)
58 {
59         void *p = realloc(old, bytes);
60         if (!p)
61                 die("Out of memory (cannot allocate %zu bytes)", bytes);
62         return p;
63 }
64
65 #define DECLARE_BUF(name, type) \
66         typedef struct { type *start; int count; int max; } name##_t;                           \
67         static inline void name##_init(name##_t *b) { b->start = NULL; b->count = b->max = 0; } \
68         static inline void name##_reset(name##_t *b) { b->count = 0; }                          \
69         static inline int name##_count(name##_t *b) { return b->count; }                        \
70         static void name##_extend(name##_t *b) {                                                \
71                 b->max = b->max ? 2*b->max : 16;                                                \
72                 b->start = xrealloc(b->start, b->max * sizeof(type));                           \
73         }                                                                                       \
74         static inline type *name##_push(name##_t *b) {                                          \
75                 if (b->count >= b->max) name##_extend(b);                                       \
76                 return &b->start[b->count++];                                                   \
77         }                                                                                       \
78         static inline type *name##_first(name##_t *b) { return b->start; }                      \
79         static inline type *name##_nth(name##_t *b, int n) { return &b->start[n]; }             \
80         // end
81
82 DECLARE_BUF(intarray, int);
83 DECLARE_BUF(stringarray, char *);
84
85 /*** Formats and their parameters ***/
86
87 enum format_id {
88         FORM_UNSPEC,
89         FORM_TSV,
90         FORM_CSV,
91         FORM_WS,
92         FORM_REGEX,
93         FORM_TMP,
94         FORM_TABLE,
95 };
96
97 struct format {
98         enum format_id id;
99         int fs;
100         int quote;
101         int quiet;
102         int (*read_line)(struct format *fmt);
103         void (*write_line)(struct format *fmt);
104         void (*write_grid)(struct format *fmt, int pos);        // -1=above, 1=below, 0=after header
105         int needs_stats;
106
107         // Field names
108         int has_header;
109         char *set_field_names;
110         struct field_names *field_names;
111
112         // CSV backend:
113         int always_quote;
114
115         // WS backend:
116         int strict_ws;
117
118         // regex backend:
119         pcre *pcre;
120         pcre_extra *pcre_extra;
121
122         // Temporary file backend:
123         FILE *tmp_file;
124
125         // Table backend:
126         int table_sep;
127         int table_grid;
128 };
129
130 static struct format *in_format, *out_format;
131 static int want_trim, want_equalize, want_stats;
132
133 struct field {
134         int start_pos;
135         int len;
136 };
137
138 DECLARE_BUF(fields, struct field);
139 DECLARE_BUF(line, unsigned char);
140
141 static fields_t in_fields, out_fields;
142 static struct field *in_field;
143 static line_t in_line;
144 static int line_number;
145
146 static int read_line(void)
147 {
148         fields_reset(&in_fields);
149         line_reset(&in_line);
150         in_field = NULL;
151         if (!in_format->read_line(in_format))
152                 return 0;
153         if (ferror_unlocked(stdin))
154                 die("I/O error when reading standard input");
155         return 1;
156 }
157
158 static void write_line(void)
159 {
160         out_format->write_line(out_format);
161         if (ferror_unlocked(stdout))
162                 die("I/O error when writing standard input");
163 }
164
165 static void write_grid(int pos)
166 {
167         if (out_format->write_grid) {
168                 out_format->write_grid(out_format, pos);
169                 if (ferror_unlocked(stdout))
170                         die("I/O error when writing standard input");
171         }
172 }
173
174 static void new_field(int pos)
175 {
176         in_field = fields_push(&in_fields);
177         in_field->start_pos = pos;
178         in_field->len = 0;
179 }
180
181 static void ensure_field(int pos)
182 {
183         if (!in_field)
184                 new_field(pos);
185 }
186
187 static unsigned char *get_field(fields_t *fields, int i, int *len)
188 {
189         struct field *f = fields_nth(fields, i);
190         *len = f->len;
191         return line_nth(&in_line, f->start_pos);
192 }
193
194 static void warn(struct format *fmt, char *msg, ...)
195 {
196         if (!fmt->quiet) {
197                 fprintf(stderr, "Warning at line %d: ", line_number);
198                 va_list args;
199                 va_start(args, msg);
200                 vfprintf(stderr, msg, args);
201                 va_end(args);
202                 fputc('\n', stderr);
203         }
204 }
205
206 static int next_line(void)
207 {
208         for (;;) {
209                 int c = getchar_unlocked();
210                 if (c == '\r')
211                         continue;
212                 if (c < 0)
213                         return !!line_count(&in_line);
214                 if (c == '\n')
215                         return 1;
216                 *line_push(&in_line) = c;
217         }
218 }
219
220 static int field_chars(struct field *f)
221 {
222         unsigned char *s = line_nth(&in_line, f->start_pos);
223         int i = 0;
224         mbstate_t mbs;
225         memset(&mbs, 0, sizeof(mbs));
226
227         int chars = 0;
228         while (i < f->len) {
229                 size_t k = mbrlen((char *) s + i, f->len - i, &mbs);
230                 if ((int) k <= 0)
231                         break;
232                 i += k;
233                 chars++;
234         }
235
236         return chars;
237 }
238
239 /*** Field statistics ***/
240
241 static intarray_t column_widths;
242
243 static void update_stats(void)
244 {
245         if (!want_stats)
246                 return;
247
248         for (int i = 0; i < fields_count(&out_fields); i++) {
249                 struct field *f = fields_nth(&out_fields, i);
250                 intarray_t *w = &column_widths;
251
252                 while (i >= intarray_count(w))
253                         *intarray_push(w) = 0;
254                 int fw = field_chars(f);
255                 if (*intarray_nth(w, i) < fw)
256                         *intarray_nth(w, i) = fw;
257         }
258 }
259
260 /*** CSV/TSV back-end */
261
262 static int csv_read(struct format *fmt)
263 {
264         int quoted = 0;
265         for (;;) {
266                 int c = getchar_unlocked();
267                 int i = line_count(&in_line);
268 restart:
269                 if (c == '\r')
270                         continue;
271                 if (c < 0 || c == '\n') {
272                         if (quoted)
273                                 warn(fmt, "Missing closing quote.");
274                         if (c < 0)
275                                 return !!fields_count(&in_fields);
276                         else
277                                 return 1;
278                 }
279                 if (quoted) {
280                         if (c == fmt->quote) {
281                                 c = getchar_unlocked();
282                                 if (c != fmt->quote) {
283                                         quoted = 0;
284                                         goto restart;
285                                 }
286                                 // Two quotes assimilate to one
287                         }
288                         // Fall through to pushing the character
289                 } else if (c == fmt->quote) {
290                         quoted = 1;
291                         continue;
292                 } else if (c == fmt->fs && !quoted) {
293                         ensure_field(i);
294                         new_field(i);
295                         continue;
296                 }
297                 ensure_field(i);
298                 *line_push(&in_line) = c;
299                 in_field->len++;
300         }
301 }
302
303 static int is_ws(int c)
304 {
305         return (c == ' ' || c == '\t' || c == '\f');
306 }
307
308 static void csv_write(struct format *fmt)
309 {
310         for (int i=0; i < fields_count(&out_fields); i++) {
311                 int len;
312                 unsigned char *p = get_field(&out_fields, i, &len);
313
314                 int need_quotes = 0;
315                 if (fmt->quote >= 0) {
316                         need_quotes = fmt->always_quote;
317                         for (int j=0; !need_quotes && j < len; j++) {
318                                 if (p[j] == fmt->fs || p[j] == fmt->quote)
319                                         need_quotes = 1;
320                         }
321                 }
322                 if (i)
323                         putchar_unlocked(fmt->fs);
324                 if (need_quotes)
325                         putchar_unlocked(fmt->quote);
326                 for (int j=0; j < len; j++) {
327                         int c = p[j];
328                         if (c == fmt->fs && !need_quotes)
329                                 warn(fmt, "Field separator found inside field and quoting is turned off.");
330                         if (c == fmt->quote)
331                                 putchar_unlocked(c);
332                         putchar_unlocked(c);
333                 }
334                 if (need_quotes)
335                         putchar_unlocked(fmt->quote);
336         }
337         putchar_unlocked('\n');
338 }
339
340 /*** White-space back-end ***/
341
342 static int ws_read(struct format *fmt)
343 {
344         if (!next_line())
345                 return 0;
346
347         unsigned char *line = line_first(&in_line);
348         int n = line_count(&in_line);
349         if (!n)
350                 return 1;
351
352         int ws = 0;
353         new_field(0);
354         for (int i=0; i<n; i++) {
355                 int c = line[i];
356                 if (is_ws(c)) {
357                         ws++;
358                 } else {
359                         if (ws) {
360                                 if (!in_field->start_pos &&
361                                     !in_field->len &&
362                                     !fmt->strict_ws)
363                                         in_field->start_pos = i;
364                                 else
365                                         new_field(i);
366                                 ws = 0;
367                         }
368                         in_field->len++;
369                 }
370         }
371
372         if (ws && fmt->strict_ws)
373                 new_field(n);
374         return 1;
375 }
376
377 /*** Regex back-end ***/
378
379 static const char *regex_set(struct format *f, char *rx)
380 {
381         const char *err;
382         int errpos;
383         f->pcre = pcre_compile(rx, PCRE_DOLLAR_ENDONLY, &err, &errpos, NULL);
384         if (!f->pcre)
385                 return err;
386
387         f->pcre_extra = pcre_study(f->pcre, 0, &err);
388         if (!f->pcre_extra)
389                 return err;
390
391         return NULL;
392 }
393
394 static int regex_read(struct format *fmt)
395 {
396         if (!next_line())
397                 return 0;
398
399         unsigned char *c = line_first(&in_line);
400         int n = line_count(&in_line);
401         if (!n)
402                 return 1;
403
404         int i = 0;
405         for (;;) {
406                 int ovec[3];
407                 int sep = pcre_exec(fmt->pcre, fmt->pcre_extra, (char *) c, n, i, 0, ovec, 3);
408                 if (sep < 0) {
409                         if (sep != PCRE_ERROR_NOMATCH)
410                                 warn(fmt, "PCRE matching error %d", sep);
411                         // No further occurrence of the separator: the rest is a single field
412                         new_field(i);
413                         in_field->len = n - i;
414                         return 1;
415                 }
416                 new_field(i);
417                 in_field->len = ovec[0] - i;
418                 i = ovec[1];
419         }
420 }
421
422 /*** Table back-end ***/
423
424 static void table_write(struct format *fmt)
425 {
426         for (int i = 0; i < intarray_count(&column_widths); i++) {
427                 if (fmt->table_grid) {
428                         putchar_unlocked('|');
429                         printf("%*s", fmt->table_sep / 2, "");
430                 } else if (i)
431                         printf("%*s", fmt->table_sep, "");
432
433                 int cw = *intarray_nth(&column_widths, i);
434                 int fw = 0;
435                 if (i < fields_count(&out_fields)) {
436                         int len;
437                         unsigned char *p = get_field(&out_fields, i, &len);
438                         fw = field_chars(fields_nth(&out_fields, i));
439                         if (fw > cw) {
440                                 warn(fmt, "Internal error: Wrongly calculated width of column %d (%d > %d)", i, fw, cw);
441                                 cw = fw;
442                         }
443                         while (len--)
444                                 putchar(*p++);
445                 }
446                 while (fw < cw) {
447                         putchar_unlocked(' ');
448                         fw++;
449                 }
450
451                 if (fmt->table_grid)
452                         printf("%*s", fmt->table_sep - fmt->table_sep / 2, "");
453         }
454
455         if (fmt->table_grid)
456                 putchar_unlocked('|');
457         putchar_unlocked('\n');
458 }
459
460 static void table_write_grid(struct format *fmt, int pos UNUSED)
461 {
462         if (!fmt->table_grid)
463                 return;
464
465         for (int i = 0; i < intarray_count(&column_widths); i++) {
466                 putchar_unlocked('+');
467                 int w = fmt->table_sep + *intarray_nth(&column_widths, i);
468                 while (w--)
469                         putchar('-');
470         }
471         putchar_unlocked('+');
472         putchar_unlocked('\n');
473 }
474
475 /*** Temporary file back-end ***/
476
477 static int tmp_read(struct format *fmt)
478 {
479         FILE *tf = fmt->tmp_file;
480
481         for (;;) {
482                 int c = getc_unlocked(tf);
483                 if (c < 0)
484                         return 0;
485                 if (c == 0xff)
486                         return 1;
487                 if (c == 0xfe) {
488                         c = getc_unlocked(tf);
489                         c = (c << 8) | getc_unlocked(tf);
490                         c = (c << 8) | getc_unlocked(tf);
491                         c = (c << 8) | getc_unlocked(tf);
492                 }
493                 new_field(line_count(&in_line));
494                 in_field->len = c;
495                 while (c--) {
496                         int x = getc_unlocked(tf);
497                         if (x < 0)
498                                 die("Truncated temporary file");
499                         *line_push(&in_line) = x;
500                 }
501         }
502
503         if (ferror_unlocked(tf))
504                 die("I/O error when reading temporary file");
505 }
506
507 static void tmp_write(struct format *fmt)
508 {
509         FILE *tf = fmt->tmp_file;
510
511         for (int i = 0; i < fields_count(&out_fields); i++) {
512                 int len;
513                 unsigned char *p = get_field(&out_fields, i, &len);
514
515                 if (len < 0xfe)
516                         putc_unlocked(len, tf);
517                 else {
518                         putc_unlocked(0xfe, tf);
519                         putc_unlocked((len >> 24) & 0xff, tf);
520                         putc_unlocked((len >> 16) & 0xff, tf);
521                         putc_unlocked((len >> 8) & 0xff, tf);
522                         putc_unlocked(len & 0xff, tf);
523                 }
524
525                 while (len--)
526                         putc_unlocked(*p++, tf);
527         }
528         putc_unlocked(0xff, tf);
529
530         if (ferror_unlocked(tf))
531                 die("I/O error when writing temporary file");
532 }
533
534 /*** Transforms ***/
535
536 static void trim_fields(void)
537 {
538         unsigned char *line = line_first(&in_line);
539         for (int i = 0; i < fields_count(&in_fields); i++) {
540                 struct field *f = fields_nth(&in_fields, i);
541                 while (f->len && is_ws(line[f->start_pos]))
542                         f->start_pos++, f->len--;
543                 while (f->len && is_ws(line[f->start_pos + f->len - 1]))
544                         f->len--;
545         }
546 }
547
548 static void equalize_fields(void)
549 {
550         while (fields_count(&out_fields) < intarray_count(&column_widths)) {
551                 struct field *f = fields_push(&out_fields);
552                 f->start_pos = f->len = 0;
553         }
554 }
555
556 /*** Field names and headers ***/
557
558 struct field_names {
559         stringarray_t names;
560 };
561
562 static void add_field(struct field_names *fn, char *name, int namelen)
563 {
564         char *n = xmalloc(namelen + 1);
565         memcpy(n, name, namelen);
566         n[namelen] = 0;
567         *stringarray_push(&fn->names) = n;
568 }
569
570 static void add_field_names(struct field_names *fn, char *names)
571 {
572         char *p = names;
573         while (p) {
574                 char *q = strchr(p, ',');
575                 int len = q ? q-p : (int) strlen(p);
576                 add_field(fn, p, len);
577                 p = q ? q+1 : NULL;
578         }
579 }
580
581 static void read_header(void)
582 {
583         if (!(in_format->has_header || in_format->set_field_names))
584                 return;
585
586         struct field_names *fn = xmalloc_zero(sizeof(*fn));
587         in_format->field_names = fn;
588
589         if (in_format->has_header) {
590                 if (!read_line())
591                         die("Missing input header");
592         }
593
594         if (in_format->set_field_names) {
595                 add_field_names(fn, in_format->set_field_names);
596         } else {
597                 for (int i = 0; i < fields_count(&in_fields); i++) {
598                         int len;
599                         char *s = (char *) get_field(&in_fields, i, &len);
600                         add_field(fn, s, len);
601                 }
602         }
603 }
604
605 static void write_header(void)
606 {
607         if (!out_format->has_header) {
608                 write_grid(-1);
609                 return;
610         }
611
612         if (out_format->set_field_names) {
613                 struct field_names *fn = xmalloc_zero(sizeof(*fn));
614                 out_format->field_names = fn;
615                 add_field_names(fn, out_format->set_field_names);
616         } else if (in_format->field_names)
617                 out_format->field_names = in_format->field_names;
618         else
619                 die("Output header requested, but no field names specified");
620
621         line_reset(&in_line);
622         fields_reset(&out_fields);
623         struct field_names *fn = out_format->field_names;
624         for (int i = 0; i < stringarray_count(&fn->names); i++) {
625                 struct field *f = fields_push(&out_fields);
626                 f->start_pos = line_count(&in_line);
627                 f->len = 0;
628                 char *s = *stringarray_nth(&fn->names, i);
629                 while (*s) {
630                         *line_push(&in_line) = *s++;
631                         f->len++;
632                 }
633         }
634
635         // This is tricky: when we are formatting a table, field names are normally
636         // calculated in pass 1, but the header is written in pass 2, so we have to
637         // update column statistics, because field name can be too wide to fit.
638         want_stats++;
639         update_stats();
640         want_stats--;
641         if (want_equalize)
642                 equalize_fields();
643         write_grid(-1);
644         write_line();
645         write_grid(0);
646 }
647
648 static void write_footer(void)
649 {
650         write_grid(1);
651 }
652
653 static int find_field_by_name(struct field_names *fn, char *name)
654 {
655         for (int i = 0; i < stringarray_count(&fn->names); i++)
656                 if (!strcmp(*stringarray_nth(&fn->names, i), name))
657                         return i + 1;
658         return -1;
659 }
660
661 /*** Field selection ***/
662
663 struct selector {
664         int first_field, last_field;            // 0 means "boundary"
665 };
666
667 DECLARE_BUF(selectors, struct selector);
668 static selectors_t selectors;
669
670 static int parse_field_num(char *str)
671 {
672         int f = 0;
673
674         while (*str) {
675                 if (*str < '0' || *str > '9')
676                         return -1;
677                 if (f >= 100000000)
678                         return -1;
679                 f = 10*f + *str - '0';
680                 str++;
681         }
682         return f;
683 }
684
685 static int parse_field(char *str)
686 {
687         if (!*str)
688                 return 0;
689
690         int f = parse_field_num(str);
691         if (f > 0)
692                 return f;
693
694         if (in_format->field_names && (f = find_field_by_name(in_format->field_names, str)) > 0)
695                 return f;
696
697         die("Unknown field %s", str);
698 }
699
700 static char *parse_selector(char *str)
701 {
702         char buf[strlen(str) + 1];
703         strcpy(buf, str);
704
705         struct selector *s = selectors_push(&selectors);
706         char *sep = strchr(buf, '-');
707         if (sep) {
708                 *sep++ = 0;
709                 s->first_field = parse_field(buf);
710                 s->last_field = parse_field(sep);
711         } else
712                 s->first_field = s->last_field = parse_field(buf);
713
714         return NULL;
715 }
716
717 static void finish_parse_selectors(void)
718 {
719         if (!selectors_count(&selectors))
720                 parse_selector("-");
721 }
722
723 static void select_fields(void)
724 {
725         for (int i = 0; i < selectors_count(&selectors); i++) {
726                 struct selector *s = selectors_nth(&selectors, i);
727                 int first = s->first_field;
728                 if (first <= 0)
729                         first = 1;
730                 int last = s->last_field;
731                 if (last <= 0)
732                         last = fields_count(&in_fields);
733                 for (int j = first; j <= last; j++) {
734                         struct field *f = fields_push(&out_fields);
735                         if (j >= 1 && j <= fields_count(&in_fields))
736                                 *f = *fields_nth(&in_fields, j-1);
737                         else
738                                 f->start_pos = f->len = 0;
739                 }
740         }
741 }
742
743 static void select_all_fields(void)
744 {
745         for (int i = 0; i < fields_count(&in_fields); i++)
746                 *fields_push(&out_fields) = *fields_nth(&in_fields, i);
747 }
748
749 /*** Processing of files ***/
750
751 static void one_pass(int pass)
752 {
753         if (pass & 2)
754                 write_header();
755
756         for (;;) {
757                 line_number++;
758                 if (!read_line())
759                         break;
760
761                 if (want_trim && (pass & 1))
762                         trim_fields();
763
764                 fields_reset(&out_fields);
765                 if (pass & 1)
766                         select_fields();
767                 else
768                         select_all_fields();
769
770                 if (want_equalize && (pass & 2))
771                         equalize_fields();
772                 update_stats();
773                 write_line();
774         }
775
776         if (pass & 2)
777                 write_footer();
778 }
779
780 static void two_pass(void)
781 {
782         struct format *final_format = out_format;
783
784         // We need to use character set info from the current locale
785         setlocale(LC_CTYPE, "");
786
787         // Pass 1: Set up writer of intermediate format
788         out_format = xmalloc_zero(sizeof(*out_format));
789         out_format->id = FORM_TMP;
790         out_format->read_line = tmp_read;
791         out_format->write_line = tmp_write;
792         out_format->tmp_file = tmpfile();
793         out_format->field_names = in_format->field_names;
794         one_pass(1);
795
796         // Pass 2: Set up reader of intermediate format
797         in_format = out_format;
798         rewind(in_format->tmp_file);
799         line_number = 0;
800         out_format = final_format;
801         want_stats = 0;
802         one_pass(2);
803         fclose(in_format->tmp_file);
804 }
805
806 /*** Parsing of arguments ***/
807
808 static void NONRET usage(void)
809 {
810         printf("\
811 Usage: xsv <in-format> [<out-format>] <options> [<fields>]\n\
812 \n\
813 Formats:\n\
814 -t, --tsv               TAB-separated values (default)\n\
815 -c, --csv               Comma-separated values\n\
816 -w, --ws                Values separated by arbitrary whitespace\n\
817 -W, --strict-ws         Like --ws, but recognize empty columns at start/end\n\
818 -r, --regex=<rx>        Separator given by Perl regular expression (input only)\n\
819     --table             Format a table (output only)\n\
820 \n\
821 Format parameters:\n\
822 -d, --fs=<char>         Delimiter of fields\n\
823 -f, --fields=<f>,...    Set field names\n\
824 -h, --header            The first line contains field names\n\
825 -q, --quiet             Do not show warnings\n\
826     --always-quote      Put quotes around all fields (CSV output only)\n\
827     --table-sep=<n>     Separate table columns by <n> spaces (default: 2)\n\
828     --grid              Separate table columns by grid lines\n\
829 \n\
830 Other options:\n\
831     --trim              Trim leading and trailing whitespaces in fields\n\
832     --equalize          Pad all lines to the maximum number of fields\n\
833 ");
834         exit(0);
835 }
836
837 static void NONRET bad_args(const char *msg, ...)
838 {
839         if (msg) {
840                 va_list args;
841                 va_start(args, msg);
842                 fprintf(stderr, "xsv: ");
843                 vfprintf(stderr, msg, args);
844                 fputc('\n', stderr);
845                 va_end(args);
846         }
847         fprintf(stderr, "Try `xsv --help' for more information.\n");
848         exit(1);
849 }
850
851 static const char short_options[] = "cd:f:hqr:twW";
852
853 enum long_options {
854         OPT_HELP = 256,
855         OPT_TRIM,
856         OPT_ALWAYS_QUOTE,
857         OPT_TABLE,
858         OPT_TABLE_SEP,
859         OPT_GRID,
860         OPT_EQUALIZE,
861 };
862
863 static const struct option long_options[] = {
864         { "always-quote",       0,      NULL,   OPT_ALWAYS_QUOTE },
865         { "csv",                0,      NULL,   'c' },
866         { "equalize",           0,      NULL,   OPT_EQUALIZE },
867         { "fields",             1,      NULL,   'f' },
868         { "fs",                 1,      NULL,   'd' },
869         { "grid",               0,      NULL,   OPT_GRID },
870         { "header",             0,      NULL,   'h' },
871         { "quiet",              0,      NULL,   'q' },
872         { "regex",              1,      NULL,   'r' },
873         { "strict-ws",          0,      NULL,   'W' },
874         { "table",              0,      NULL,   OPT_TABLE },
875         { "table-sep",          1,      NULL,   OPT_TABLE_SEP },
876         { "trim",               0,      NULL,   OPT_TRIM },
877         { "tsv",                0,      NULL,   't' },
878         { "ws",                 0,      NULL,   'w' },
879         { "help",               0,      NULL,   OPT_HELP },
880         { NULL,                 0,      NULL,   0 },
881 };
882
883 static void set_format(int format_id)
884 {
885         struct format *f = xmalloc_zero(sizeof(*f));
886         f->id = format_id;
887
888         switch (format_id) {
889                 case FORM_TSV:
890                         f->fs = '\t';
891                         f->quote = -1;
892                         f->read_line = csv_read;
893                         f->write_line = csv_write;
894                         break;
895                 case FORM_CSV:
896                         f->fs = ',';
897                         f->quote = '"';
898                         f->read_line = csv_read;
899                         f->write_line = csv_write;
900                         break;
901                 case FORM_WS:
902                         f->fs = ' ';
903                         f->quote = -1;
904                         f->read_line = ws_read;
905                         f->write_line = csv_write;
906                         break;
907                 case FORM_REGEX:
908                         f->read_line = regex_read;
909                         break;
910                 case FORM_TABLE:
911                         f->write_line = table_write;
912                         f->write_grid = table_write_grid;
913                         f->needs_stats = 1;
914                         f->table_sep = 2;
915                         break;
916         }
917
918         if (!in_format)
919                 in_format = f;
920         else if (!out_format)
921                 out_format = f;
922         else
923                 bad_args("At most two formats may be given.");
924 }
925
926 static struct format *current_format(void)
927 {
928         if (out_format)
929                 return out_format;
930         if (in_format)
931                 return in_format;
932         set_format(FORM_TSV);
933         return in_format;
934 }
935
936 int main(int argc, char **argv)
937 {
938         int opt;
939         const char *err;
940
941         while ((opt = getopt_long(argc, argv, short_options, long_options, NULL)) >= 0)
942                 switch (opt) {
943                         case 'c':
944                                 set_format(FORM_CSV);
945                                 break;
946                         case 'd':
947                                 if (optarg[0])
948                                         current_format()->fs = optarg[0];
949                                 else
950                                         bad_args("No field delimiter given.");
951                                 break;
952                         case 'f':
953                                 current_format()->set_field_names = optarg;
954                                 break;
955                         case 'h':
956                                 current_format()->has_header = 1;
957                                 break;
958                         case 'q':
959                                 current_format()->quiet = 1;
960                                 break;
961                         case 'r':
962                                 set_format(FORM_REGEX);
963                                 err = regex_set(current_format(), optarg);
964                                 if (err)
965                                         bad_args("Error compiling regex: %s", err);
966                                 break;
967                         case 't':
968                                 set_format(FORM_TSV);
969                                 break;
970                         case 'w':
971                                 set_format(FORM_WS);
972                                 break;
973                         case 'W':
974                                 set_format(FORM_WS);
975                                 current_format()->strict_ws = 1;
976                                 break;
977                         case OPT_ALWAYS_QUOTE:
978                                 if (current_format()->id != FORM_CSV)
979                                         bad_args("--always-quote makes sense only for CSV.");
980                                 current_format()->always_quote = 1;
981                                 break;
982                         case OPT_HELP:
983                                 usage();
984                         case OPT_TRIM:
985                                 want_trim = 1;
986                                 break;
987                         case OPT_TABLE:
988                                 set_format(FORM_TABLE);
989                                 break;
990                         case OPT_TABLE_SEP:
991                                 current_format()->table_sep = atoi(optarg);
992                                 break;
993                         case OPT_GRID:
994                                 current_format()->table_grid = 1;
995                                 break;
996                         case OPT_EQUALIZE:
997                                 want_equalize = 1;
998                                 break;
999                         default:
1000                                 bad_args(NULL);
1001                 }
1002
1003         current_format();
1004         if (!out_format)
1005                 out_format = in_format;
1006         if (!in_format->read_line)
1007                 bad_args("Write-only format selected for input.");
1008         if (!out_format->write_line)
1009                 bad_args("Read-only format selected for output.");
1010         read_header();
1011
1012         for (int i = optind; i < argc; i++) {
1013                 err = parse_selector(argv[i]);
1014                 if (err)
1015                         bad_args(err);
1016         }
1017         finish_parse_selectors();
1018
1019         want_stats = out_format->needs_stats | want_equalize;
1020         if (want_stats)
1021                 two_pass();
1022         else
1023                 one_pass(3);
1024         return 0;
1025 }