// end
DECLARE_BUF(intarray, int);
+DECLARE_BUF(stringarray, char *);
/*** Formats and their parameters ***/
void (*write_line)(struct format *fmt);
int needs_stats;
+ // Field names
+ int has_header;
+ char *set_field_names;
+ struct field_names *field_names;
+
// CSV backend:
int always_quote;
static line_t in_line;
static int line_number;
+static int read_line(void)
+{
+ fields_reset(&in_fields);
+ line_reset(&in_line);
+ in_field = NULL;
+ if (!in_format->read_line(in_format))
+ return 0;
+ if (ferror_unlocked(stdin))
+ die("I/O error when reading standard input");
+ return 1;
+}
+
+static void write_line(void)
+{
+ out_format->write_line(out_format);
+ if (ferror_unlocked(stdout))
+ die("I/O error when writing standard input");
+}
+
static void new_field(int pos)
{
in_field = fields_push(&in_fields);
new_field(pos);
}
+// FIXME: Use elsewhere
+static unsigned char *get_field(fields_t *fields, int i, int *len)
+{
+ struct field *f = fields_nth(fields, i);
+ *len = f->len;
+ return line_nth(&in_line, f->start_pos);
+}
+
static void warn(struct format *fmt, char *msg, ...)
{
if (!fmt->quiet) {
}
}
+/*** Field names and headers ***/
+
+struct field_names {
+ stringarray_t names;
+};
+
+static void add_field(struct field_names *fn, char *name, int namelen)
+{
+ char *n = xmalloc(namelen + 1);
+ memcpy(n, name, namelen);
+ n[namelen] = 0;
+ *stringarray_push(&fn->names) = n;
+}
+
+static void add_field_names(struct field_names *fn, char *names)
+{
+ char *p = names;
+ while (p) {
+ char *q = strchr(p, ',');
+ int len = q ? q-p : (int) strlen(p);
+ add_field(fn, p, len);
+ p = q ? q+1 : NULL;
+ }
+}
+
+static void read_header(void)
+{
+ if (!(in_format->has_header || in_format->set_field_names))
+ return;
+
+ struct field_names *fn = xmalloc_zero(sizeof(*fn));
+ in_format->field_names = fn;
+
+ if (in_format->has_header) {
+ if (!read_line())
+ die("Missing input header");
+ }
+
+ if (in_format->set_field_names) {
+ add_field_names(fn, in_format->set_field_names);
+ } else {
+ for (int i = 0; i < fields_count(&in_fields); i++) {
+ int len;
+ char *s = (char *) get_field(&in_fields, i, &len);
+ add_field(fn, s, len);
+ }
+ }
+}
+
+static void write_header(void)
+{
+ if (!out_format->has_header)
+ return;
+
+ if (out_format->set_field_names) {
+ struct field_names *fn = xmalloc_zero(sizeof(*fn));
+ out_format->field_names = fn;
+ add_field_names(fn, out_format->set_field_names);
+ } else if (in_format->field_names)
+ out_format->field_names = in_format->field_names;
+ else
+ die("Output header requested, but no field names specified");
+
+ line_reset(&in_line);
+ fields_reset(&out_fields);
+ struct field_names *fn = out_format->field_names;
+ for (int i = 0; i < stringarray_count(&fn->names); i++) {
+ struct field *f = fields_push(&out_fields);
+ f->start_pos = line_count(&in_line);
+ f->len = 0;
+ char *s = *stringarray_nth(&fn->names, i);
+ while (*s) {
+ *line_push(&in_line) = *s++;
+ f->len++;
+ }
+ }
+ write_line();
+}
+
+static int find_field_by_name(struct field_names *fn, char *name)
+{
+ for (int i = 0; i < stringarray_count(&fn->names); i++)
+ if (!strcmp(*stringarray_nth(&fn->names, i), name))
+ return i + 1;
+ return -1;
+}
+
/*** Field selection ***/
struct selector {
- int first_field, last_field;
+ int first_field, last_field; // 0 means "boundary"
};
DECLARE_BUF(selectors, struct selector);
static selectors_t selectors;
+static int parse_field_num(char *str)
+{
+ int f = 0;
+
+ while (*str) {
+ if (*str < '0' || *str > '9')
+ return -1;
+ if (f >= 100000000)
+ return -1;
+ f = 10*f + *str - '0';
+ str++;
+ }
+ return f;
+}
+
+static int parse_field(char *str)
+{
+ if (!*str)
+ return 0;
+
+ int f = parse_field_num(str);
+ if (f > 0)
+ return f;
+
+ if (in_format->field_names && (f = find_field_by_name(in_format->field_names, str)) > 0)
+ return f;
+
+ die("Unknown field %s", str);
+}
+
static char *parse_selector(char *str)
{
char buf[strlen(str) + 1];
char *sep = strchr(buf, '-');
if (sep) {
*sep++ = 0;
- s->first_field = atoi(buf);
- s->last_field = atoi(sep);
+ s->first_field = parse_field(buf);
+ s->last_field = parse_field(sep);
} else
- s->first_field = s->last_field = atoi(buf);
+ s->first_field = s->last_field = parse_field(buf);
return NULL;
}
static void one_pass(int pass)
{
- line_number = 0;
for (;;) {
line_number++;
- fields_reset(&in_fields);
- line_reset(&in_line);
- in_field = NULL;
- if (!in_format->read_line(in_format))
+ if (!read_line())
break;
- if (ferror_unlocked(stdin))
- die("I/O error when reading standard input");
if (want_trim && (pass & 1))
trim_fields();
if (out_format->needs_stats)
update_stats();
- out_format->write_line(out_format);
- if (ferror_unlocked(stdout))
- die("I/O error when writing standard input");
+
+ write_line();
}
}
// Pass 2: Set up reader of intermediate format
in_format = out_format;
rewind(in_format->tmp_file);
+ line_number = 0;
out_format = final_format;
out_format->needs_stats = 0;
one_pass(2);
\n\
Format parameters:\n\
-d, --fs=<char> Delimiter of fields\n\
+-f, --fields=<f>,... Set field names\n\
+-h, --header The first line contains field names\n\
-q, --quiet Do not show warnings\n\
--always-quote Put quotes around all fields (CSV output only)\n\
--table-sep=<n> Separate table columns by <n> spaces (default: 2)\n\
exit(1);
}
-static const char short_options[] = "cd:qr:twW";
+static const char short_options[] = "cd:f:hqr:twW";
enum long_options {
OPT_HELP = 256,
static const struct option long_options[] = {
{ "always-quote", 0, NULL, OPT_ALWAYS_QUOTE },
{ "csv", 0, NULL, 'c' },
+ { "fields", 1, NULL, 'f' },
{ "fs", 1, NULL, 'd' },
+ { "header", 0, NULL, 'h' },
{ "quiet", 0, NULL, 'q' },
{ "regex", 1, NULL, 'r' },
{ "strict-ws", 0, NULL, 'W' },
else
bad_args("No field delimiter given.");
break;
+ case 'f':
+ current_format()->set_field_names = optarg;
+ break;
+ case 'h':
+ current_format()->has_header = 1;
+ break;
case 'q':
current_format()->quiet = 1;
break;
bad_args("Write-only format selected for input.");
if (!out_format->write_line)
bad_args("Read-only format selected for output.");
+ read_header();
for (int i = optind; i < argc; i++) {
err = parse_selector(argv[i]);
}
finish_parse_selectors();
+ write_header();
if (out_format->needs_stats)
two_pass();
else