enum format_id id;
int fs;
int quote;
+ int quiet;
int (*read_line)(void);
void (*write_line)(void);
};
static fields_t in_fields, out_fields;
static struct field *in_field;
static line_t in_line;
+static int line_number;
static void new_field(void)
{
new_field();
}
+static void warn(struct format *fmt, char *msg)
+{
+ if (!fmt->quiet)
+ fprintf(stderr, "Warning at line %d: %s\n", line_number, msg);
+}
+
static int csv_read(void)
{
int quoted = 0;
- // FIXME: Complain if closing quote is missing?
for (;;) {
int c = getchar();
restart:
- if (c < 0)
- return !!fields_count(&in_fields);
if (c == '\r')
continue;
- if (c == '\n')
- return 1;
+ if (c < 0 || c == '\n') {
+ if (quoted)
+ warn(in_format, "Missing closing quote.");
+ if (c < 0)
+ return !!fields_count(&in_fields);
+ else
+ return 1;
+ }
if (quoted) {
if (c == in_format->quote) {
c = getchar();
}
}
+static int is_ws(int c)
+{
+ return (c == ' ' || c == '\t' || c == '\f');
+}
+
static void csv_write(void)
{
unsigned char *line = line_nth(&in_line, 0);
putchar(out_format->quote);
for (int j=0; j < f->len; j++) {
int c = line[f->start_pos + j];
+ if (c == out_format->fs && !need_quotes)
+ warn(out_format, "Field separator found inside field and quoting is turned off.");
if (c == out_format->quote)
putchar(c);
putchar(c);
continue;
if (c == '\n')
return 1;
- if (c == ' ' || c == '\t' || c == '\f') {
+ if (is_ws(c)) {
ensure_field();
if (!ws)
new_field();
}
}
+/*** Transforms ***/
+
+static void trim_fields(void)
+{
+ unsigned char *line = line_nth(&in_line, 0);
+ for (int i = 0; i < fields_count(&in_fields); i++) {
+ struct field *f = fields_nth(&in_fields, i);
+ while (f->len && is_ws(line[f->start_pos]))
+ f->start_pos++, f->len--;
+ while (f->len && is_ws(line[f->start_pos + f->len - 1]))
+ f->len--;
+ }
+}
+
/*** Field selection ***/
struct selector {
-w, --ws Values separated by arbitrary whitespace\n\
\n\
Format parameters:\n\
--d, --fs=<char> Delimiter of fields\n\
+-d, --fs=<char> Delimiter of fields\n\
+-q, --quiet Do not show warnings\n\
\n\
Other options:\n\
-(so far none)\n\
+ --trim Trim leading and trailing whitespaces in fields\n\
");
exit(0);
}
exit(1);
}
-static const char short_options[] = "cd:tw";
+static const char short_options[] = "cd:qtw";
enum long_options {
OPT_HELP = 256,
+ OPT_TRIM = 257,
};
static const struct option long_options[] = {
{ "csv", 0, NULL, 'c' },
{ "fs", 1, NULL, 'd' },
+ { "quiet", 0, NULL, 'q' },
+ { "trim", 0, NULL, OPT_TRIM },
{ "tsv", 0, NULL, 't' },
{ "ws", 0, NULL, 'w' },
{ "help", 0, NULL, OPT_HELP },
int main(int argc, char **argv)
{
int opt;
+ int want_trim = 0;
while ((opt = getopt_long(argc, argv, short_options, long_options, NULL)) >= 0)
switch (opt) {
else
bad_args("No field delimiter given.");
break;
+ case 'q':
+ current_format()->quiet = 1;
+ break;
case 't':
set_format(FORM_TSV);
break;
break;
case OPT_HELP:
usage();
+ case OPT_TRIM:
+ want_trim = 1;
+ break;
default:
bad_args(NULL);
}
line_init(&in_line);
for (;;) {
+ line_number++;
fields_reset(&in_fields);
line_reset(&in_line);
in_field = NULL;
if (!in_format->read_line())
break;
+ if (want_trim)
+ trim_fields();
+
fields_reset(&out_fields);
select_fields();