2 * A Swiss-Army Knife for CSV-like Files
4 * (c) 2012 Martin Mares <mj@ucw.cz>
12 /*** Memory allocation ***/
14 static void *xmalloc(size_t bytes)
16 void *p = malloc(bytes);
18 fprintf(stderr, "xsv: Out of memory (cannot allocate %zu bytes)\n", bytes);
24 static void *xrealloc(void *old, size_t bytes)
26 void *p = realloc(old, bytes);
28 fprintf(stderr, "xsv: Out of memory (cannot allocate %zu bytes)\n", bytes);
34 #define DECLARE_BUF(name, type) \
35 typedef struct { type *start; int count; int max; } name##_t; \
36 static inline void name##_init(name##_t *b) { b->start = NULL; b->count = b->max = 0; } \
37 static inline void name##_reset(name##_t *b) { b->count = 0; } \
38 static inline int name##_count(name##_t *b) { return b->count; } \
39 static void name##_extend(name##_t *b) { \
40 b->max = b->max ? 2*b->max : 16; \
41 b->start = xrealloc(b->start, b->max * sizeof(type)); \
43 static inline type *name##_push(name##_t *b) { \
44 if (b->count >= b->max) name##_extend(b); \
45 return &b->start[b->count++]; \
47 static inline type *name##_nth(name##_t *b, int n) { return &b->start[n]; } \
50 /*** Formats and their parameters ***/
64 int (*read_line)(void);
65 void (*write_line)(void);
68 static struct format *in_format, *out_format;
75 DECLARE_BUF(fields, struct field);
76 DECLARE_BUF(line, unsigned char);
78 static fields_t in_fields, out_fields;
79 static struct field *in_field;
80 static line_t in_line;
81 static int line_number;
83 static void new_field(void)
85 in_field = fields_push(&in_fields);
86 in_field->start_pos = line_count(&in_line);
90 static void ensure_field(void)
96 static void warn(struct format *fmt, char *msg)
99 fprintf(stderr, "Warning at line %d: %s\n", line_number, msg);
102 static int csv_read(void)
110 if (c < 0 || c == '\n') {
112 warn(in_format, "Missing closing quote.");
114 return !!fields_count(&in_fields);
119 if (c == in_format->quote) {
121 if (c != in_format->quote) {
125 // Two quotes assimilate to one
127 // Fall through to pushing the character
128 } else if (c == in_format->quote) {
131 } else if (c == in_format->fs && !quoted) {
137 *line_push(&in_line) = c;
142 static int is_ws(int c)
144 return (c == ' ' || c == '\t' || c == '\f');
147 static void csv_write(void)
149 unsigned char *line = line_nth(&in_line, 0);
150 int n = fields_count(&out_fields);
151 for (int i=0; i<n; i++) {
152 struct field *f = fields_nth(&out_fields, i);
154 if (out_format->quote >= 0) {
155 for (int j=0; j < f->len; j++) {
156 int c = line[f->start_pos + j];
157 if (c == out_format->fs || c == out_format->quote) {
164 putchar(out_format->fs);
166 putchar(out_format->quote);
167 for (int j=0; j < f->len; j++) {
168 int c = line[f->start_pos + j];
169 if (c == out_format->fs && !need_quotes)
170 warn(out_format, "Field separator found inside field and quoting is turned off.");
171 if (c == out_format->quote)
176 putchar(out_format->quote);
181 static int ws_read(void)
187 return !!fields_count(&in_fields);
199 *line_push(&in_line) = c;
208 static void trim_fields(void)
210 unsigned char *line = line_nth(&in_line, 0);
211 for (int i = 0; i < fields_count(&in_fields); i++) {
212 struct field *f = fields_nth(&in_fields, i);
213 while (f->len && is_ws(line[f->start_pos]))
214 f->start_pos++, f->len--;
215 while (f->len && is_ws(line[f->start_pos + f->len - 1]))
220 /*** Field selection ***/
223 int first_field, last_field;
226 DECLARE_BUF(selectors, struct selector);
227 static selectors_t selectors;
229 static char *parse_selector(char *str)
231 char buf[strlen(str) + 1];
234 struct selector *s = selectors_push(&selectors);
235 char *sep = strchr(buf, '-');
238 s->first_field = atoi(buf);
239 s->last_field = atoi(sep);
241 s->first_field = s->last_field = atoi(buf);
246 static void finish_parse_selectors(void)
248 if (!selectors_count(&selectors))
252 static void select_fields(void)
254 for (int i = 0; i < selectors_count(&selectors); i++) {
255 struct selector *s = selectors_nth(&selectors, i);
256 int first = s->first_field;
259 int last = s->last_field;
261 last = fields_count(&in_fields);
262 for (int j = first; j <= last; j++) {
263 struct field *f = fields_push(&out_fields);
264 if (j >= 1 && j <= fields_count(&in_fields))
265 *f = *fields_nth(&in_fields, j-1);
267 f->start_pos = f->len = 0;
272 /*** Parsing of arguments ***/
274 static void usage(void)
277 Usage: xsv <in-format> [<out-format>] <options> [<fields>]\n\
280 -t, --tsv TAB-separated values (default)\n\
281 -c, --csv Comma-separated values\n\
282 -w, --ws Values separated by arbitrary whitespace\n\
284 Format parameters:\n\
285 -d, --fs=<char> Delimiter of fields\n\
286 -q, --quiet Do not show warnings\n\
289 --trim Trim leading and trailing whitespaces in fields\n\
294 static void bad_args(char *msg)
297 fprintf(stderr, "xsv: %s\n", msg);
298 fprintf(stderr, "Try `xsv --help' for more information.\n");
302 static const char short_options[] = "cd:qtw";
309 static const struct option long_options[] = {
310 { "csv", 0, NULL, 'c' },
311 { "fs", 1, NULL, 'd' },
312 { "quiet", 0, NULL, 'q' },
313 { "trim", 0, NULL, OPT_TRIM },
314 { "tsv", 0, NULL, 't' },
315 { "ws", 0, NULL, 'w' },
316 { "help", 0, NULL, OPT_HELP },
317 { NULL, 0, NULL, 0 },
320 static void set_format(int format_id)
322 struct format *f = xmalloc(sizeof(*f));
323 memset(f, 0, sizeof(*f));
330 f->read_line = csv_read;
331 f->write_line = csv_write;
336 f->read_line = csv_read;
337 f->write_line = csv_write;
342 f->read_line = ws_read;
343 f->write_line = csv_write;
349 else if (!out_format)
352 bad_args("At most two format may be given.");
355 static struct format *current_format(void)
361 set_format(FORM_TSV);
365 int main(int argc, char **argv)
370 while ((opt = getopt_long(argc, argv, short_options, long_options, NULL)) >= 0)
373 set_format(FORM_CSV);
377 current_format()->fs = optarg[0];
379 bad_args("No field delimiter given.");
382 current_format()->quiet = 1;
385 set_format(FORM_TSV);
401 out_format = in_format;
403 for (int i = optind; i < argc; i++) {
404 char *err = parse_selector(argv[i]);
408 finish_parse_selectors();
410 fields_init(&in_fields);
411 fields_init(&out_fields);
416 fields_reset(&in_fields);
417 line_reset(&in_line);
419 if (!in_format->read_line())
425 fields_reset(&out_fields);
428 out_format->write_line();