return p;
}
+static void *xmalloc_zero(size_t bytes)
+{
+ void *p = xmalloc(bytes);
+ memset(p, 0, bytes);
+ return p;
+}
+
static void *xrealloc(void *old, size_t bytes)
{
void *p = realloc(old, bytes);
static inline type *name##_nth(name##_t *b, int n) { return &b->start[n]; } \
// end
+DECLARE_BUF(intarray, int);
+
/*** Formats and their parameters ***/
enum format_id {
FORM_CSV,
FORM_WS,
FORM_REGEX,
+ FORM_TMP,
+ FORM_TABLE,
};
struct format {
int quiet;
int (*read_line)(void);
void (*write_line)(void);
+ int needs_two_passes;
+
// CSV backend:
int always_quote;
+
// WS backend:
int strict_ws;
+
// regex backend:
pcre *pcre;
pcre_extra *pcre_extra;
+
+ // Temporary file backend:
+ FILE *tmp_file;
+ intarray_t column_widths;
+
+ // Table backend:
+ int table_sep;
};
static struct format *in_format, *out_format;
+static int want_trim;
struct field {
int start_pos;
}
}
+/*** CSV/TSV back-end */
+
static int csv_read(void)
{
int quoted = 0;
putchar('\n');
}
+/*** White-space back-end ***/
+
static int ws_read(void)
{
if (!next_line())
return 1;
}
+/*** Regex back-end ***/
+
static const char *regex_set(struct format *f, char *rx)
{
const char *err;
}
}
+/*** Table back-end ***/
+
+static void table_write(void)
+{
+ for (int i = 0; i < fields_count(&in_fields); i++) {
+ if (i)
+ printf("%*s", out_format->table_sep, "");
+ struct field *f = fields_nth(&in_fields, i);
+ int w = *intarray_nth(&in_format->column_widths, i);
+ if (f->len > w) {
+ warn(out_format, "Internal error: Wrongly calculated column width (%d > %d)", f->len, w);
+ w = f->len;
+ }
+ int j = 0;
+ unsigned char *p = line_nth(&in_line, f->start_pos);
+ while (j < f->len) {
+ putchar(*p++);
+ j++;
+ }
+ while (j < w) {
+ putchar(' ');
+ j++;
+ }
+ }
+ putchar('\n');
+}
+
+/*** Temporary file back-end ***/
+
+static int tmp_read(void)
+{
+ FILE *tf = in_format->tmp_file;
+
+ for (;;) {
+ int c = fgetc(tf);
+ if (c < 0)
+ return 0;
+ if (c == 0xff)
+ return 1;
+ if (c == 0xfe) {
+ c = fgetc(tf);
+ c = (c << 8) | fgetc(tf);
+ c = (c << 8) | fgetc(tf);
+ c = (c << 8) | fgetc(tf);
+ }
+ new_field(line_count(&in_line));
+ in_field->len = c;
+ while (c--) {
+ int x = fgetc(tf);
+ if (x < 0) {
+ warn(in_format, "Truncated temporary file");
+ return 0;
+ }
+ *line_push(&in_line) = x;
+ }
+ }
+}
+
+static void tmp_write(void)
+{
+ FILE *tf = out_format->tmp_file;
+
+ for (int i = 0; i < fields_count(&in_fields); i++) {
+ struct field *f = fields_nth(&in_fields, i);
+ if (f->len < 0xfe)
+ fputc(f->len, tf);
+ else {
+ fputc(0xfe, tf);
+ fputc((f->len >> 24) & 0xff, tf);
+ fputc((f->len >> 16) & 0xff, tf);
+ fputc((f->len >> 8) & 0xff, tf);
+ fputc(f->len & 0xff, tf);
+ }
+
+ unsigned char *p = line_nth(&in_line, f->start_pos);
+ for (int j = 0; j < f->len; j++)
+ fputc(*p++, tf);
+
+ intarray_t *w = &out_format->column_widths;
+ while (i >= intarray_count(w))
+ *intarray_push(w) = 0;
+ if (*intarray_nth(w, i) < f->len)
+ *intarray_nth(w, i) = f->len;
+ }
+ fputc(0xff, tf);
+}
+
/*** Transforms ***/
static void trim_fields(void)
}
}
+/*** Processing of files ***/
+
+static void one_pass(void)
+{
+ line_number = 0;
+ for (;;) {
+ line_number++;
+ fields_reset(&in_fields);
+ line_reset(&in_line);
+ in_field = NULL;
+ if (!in_format->read_line())
+ break;
+
+ if (want_trim)
+ trim_fields();
+
+ fields_reset(&out_fields);
+ select_fields();
+
+ out_format->write_line();
+ }
+}
+
+static void two_pass(void)
+{
+ struct format *final_format = out_format;
+
+ // Pass 1: Set up writer of intermediate format
+ out_format = xmalloc_zero(sizeof(*out_format));
+ out_format->id = FORM_TMP;
+ out_format->read_line = tmp_read;
+ out_format->write_line = tmp_write;
+ out_format->tmp_file = tmpfile();
+ intarray_init(&out_format->column_widths);
+ one_pass();
+
+ // Pass 2: Set up reader of intermediate format
+ in_format = out_format;
+ rewind(in_format->tmp_file);
+ out_format = final_format;
+ one_pass();
+ fclose(in_format->tmp_file);
+}
+
/*** Parsing of arguments ***/
static void usage(void)
-w, --ws Values separated by arbitrary whitespace\n\
-W, --strict-ws Like --ws, but recognize empty columns at start/end\n\
-r, --regex=<rx> Separator given by Perl regular expression (input only)\n\
+ --table Format a table (output only)\n\
\n\
Format parameters:\n\
-d, --fs=<char> Delimiter of fields\n\
-q, --quiet Do not show warnings\n\
--always-quote Put quotes around all fields (CSV output only)\n\
+ --table-sep=<n> Separate table columns by <n> spaces (default: 2)\n\
\n\
Other options:\n\
--trim Trim leading and trailing whitespaces in fields\n\
OPT_HELP = 256,
OPT_TRIM,
OPT_ALWAYS_QUOTE,
+ OPT_TABLE,
+ OPT_TABLE_SEP,
};
static const struct option long_options[] = {
{ "quiet", 0, NULL, 'q' },
{ "regex", 1, NULL, 'r' },
{ "strict-ws", 0, NULL, 'W' },
+ { "table", 0, NULL, OPT_TABLE },
+ { "table-sep", 1, NULL, OPT_TABLE_SEP },
{ "trim", 0, NULL, OPT_TRIM },
{ "tsv", 0, NULL, 't' },
{ "ws", 0, NULL, 'w' },
static void set_format(int format_id)
{
- struct format *f = xmalloc(sizeof(*f));
- memset(f, 0, sizeof(*f));
+ struct format *f = xmalloc_zero(sizeof(*f));
f->id = format_id;
switch (format_id) {
case FORM_REGEX:
f->read_line = regex_read;
break;
+ case FORM_TABLE:
+ f->write_line = table_write;
+ f->needs_two_passes = 1;
+ f->table_sep = 2;
+ break;
}
if (!in_format)
else if (!out_format)
out_format = f;
else
- bad_args("At most two format may be given.");
+ bad_args("At most two formats may be given.");
}
static struct format *current_format(void)
int main(int argc, char **argv)
{
int opt;
- int want_trim = 0;
const char *err;
while ((opt = getopt_long(argc, argv, short_options, long_options, NULL)) >= 0)
case OPT_TRIM:
want_trim = 1;
break;
+ case OPT_TABLE:
+ set_format(FORM_TABLE);
+ break;
+ case OPT_TABLE_SEP:
+ current_format()->table_sep = atoi(optarg);
+ break;
default:
bad_args(NULL);
}
fields_init(&out_fields);
line_init(&in_line);
- for (;;) {
- line_number++;
- fields_reset(&in_fields);
- line_reset(&in_line);
- in_field = NULL;
- if (!in_format->read_line())
- break;
-
- if (want_trim)
- trim_fields();
-
- fields_reset(&out_fields);
- select_fields();
-
- out_format->write_line();
- }
-
+ if (out_format->needs_two_passes)
+ two_pass();
+ else
+ one_pass();
return 0;
}