]> mj.ucw.cz Git - xsv.git/blobdiff - xsv.c
Use unlocked stdio, it is faster
[xsv.git] / xsv.c
diff --git a/xsv.c b/xsv.c
index 5bf4f01537a3d211d8575b3c3abd5b6609d42138..6456410b4eebd32162912cae3de45203d66e8cde 100644 (file)
--- a/xsv.c
+++ b/xsv.c
@@ -4,11 +4,15 @@
  *     (c) 2012 Martin Mares <mj@ucw.cz>
  */
 
+#define _GNU_SOURCE
+
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdarg.h>
 #include <getopt.h>
+#include <wchar.h>
+#include <locale.h>
 
 #include <pcre.h>
 
@@ -134,7 +138,7 @@ static void warn(struct format *fmt, char *msg, ...)
                fprintf(stderr, "Warning at line %d: ", line_number);
                va_list args;
                va_start(args, msg);
-               vfprintf(stderr, args, msg);
+               vfprintf(stderr, msg, args);
                va_end(args);
                fputc('\n', stderr);
        }
@@ -143,7 +147,7 @@ static void warn(struct format *fmt, char *msg, ...)
 static int next_line(void)
 {
        for (;;) {
-               int c = getchar();
+               int c = getchar_unlocked();
                if (c == '\r')
                        continue;
                if (c < 0)
@@ -154,13 +158,32 @@ static int next_line(void)
        }
 }
 
+static int field_chars(struct field *f)
+{
+       unsigned char *s = line_nth(&in_line, f->start_pos);
+       int i = 0;
+       mbstate_t mbs;
+       memset(&mbs, 0, sizeof(mbs));
+
+       int chars = 0;
+       while (i < f->len) {
+               size_t k = mbrlen((char *) s + i, f->len - i, &mbs);
+               if ((int) k <= 0)
+                       break;
+               i += k;
+               chars++;
+       }
+
+       return chars;
+}
+
 /*** CSV/TSV back-end */
 
 static int csv_read(void)
 {
        int quoted = 0;
        for (;;) {
-               int c = getchar();
+               int c = getchar_unlocked();
                int i = line_count(&in_line);
 restart:
                if (c == '\r')
@@ -175,7 +198,7 @@ restart:
                }
                if (quoted) {
                        if (c == in_format->quote) {
-                               c = getchar();
+                               c = getchar_unlocked();
                                if (c != in_format->quote) {
                                        quoted = 0;
                                        goto restart;
@@ -218,21 +241,21 @@ static void csv_write(void)
                        }
                }
                if (i)
-                       putchar(out_format->fs);
+                       putchar_unlocked(out_format->fs);
                if (need_quotes)
-                       putchar(out_format->quote);
+                       putchar_unlocked(out_format->quote);
                for (int j=0; j < f->len; j++) {
                        int c = line[f->start_pos + j];
                        if (c == out_format->fs && !need_quotes)
                                warn(out_format, "Field separator found inside field and quoting is turned off.");
                        if (c == out_format->quote)
-                               putchar(c);
-                       putchar(c);
+                               putchar_unlocked(c);
+                       putchar_unlocked(c);
                }
                if (need_quotes)
-                       putchar(out_format->quote);
+                       putchar_unlocked(out_format->quote);
        }
-       putchar('\n');
+       putchar_unlocked('\n');
 }
 
 /*** White-space back-end ***/
@@ -325,23 +348,21 @@ static void table_write(void)
                if (i)
                        printf("%*s", out_format->table_sep, "");
                struct field *f = fields_nth(&in_fields, i);
-               int w = *intarray_nth(&in_format->column_widths, i);
-               if (f->len > w) {
-                       warn(out_format, "Internal error: Wrongly calculated column width (%d > %d)", f->len, w);
-                       w = f->len;
+               int fw = field_chars(f);
+               int cw = *intarray_nth(&in_format->column_widths, i);
+               if (fw > cw) {
+                       warn(out_format, "Internal error: Wrongly calculated column width (%d > %d)", fw, cw);
+                       cw = fw;
                }
-               int j = 0;
                unsigned char *p = line_nth(&in_line, f->start_pos);
-               while (j < f->len) {
-                       putchar(*p++);
-                       j++;
-               }
-               while (j < w) {
-                       putchar(' ');
-                       j++;
+               for (int j = 0; j < f->len; j++)
+                       putchar_unlocked(p[j]);
+               while (fw < cw) {
+                       putchar_unlocked(' ');
+                       fw++;
                }
        }
-       putchar('\n');
+       putchar_unlocked('\n');
 }
 
 /*** Temporary file back-end ***/
@@ -351,21 +372,21 @@ static int tmp_read(void)
        FILE *tf = in_format->tmp_file;
 
        for (;;) {
-               int c = fgetc(tf);
+               int c = getc_unlocked(tf);
                if (c < 0)
                        return 0;
                if (c == 0xff)
                        return 1;
                if (c == 0xfe) {
-                       c = fgetc(tf);
-                       c = (c << 8) | fgetc(tf);
-                       c = (c << 8) | fgetc(tf);
-                       c = (c << 8) | fgetc(tf);
+                       c = getc_unlocked(tf);
+                       c = (c << 8) | getc_unlocked(tf);
+                       c = (c << 8) | getc_unlocked(tf);
+                       c = (c << 8) | getc_unlocked(tf);
                }
                new_field(line_count(&in_line));
                in_field->len = c;
                while (c--) {
-                       int x = fgetc(tf);
+                       int x = getc_unlocked(tf);
                        if (x < 0) {
                                warn(in_format, "Truncated temporary file");
                                return 0;
@@ -382,26 +403,27 @@ static void tmp_write(void)
        for (int i = 0; i < fields_count(&in_fields); i++) {
                struct field *f = fields_nth(&in_fields, i);
                if (f->len < 0xfe)
-                       fputc(f->len, tf);
+                       putc_unlocked(f->len, tf);
                else {
-                       fputc(0xfe, tf);
-                       fputc((f->len >> 24) & 0xff, tf);
-                       fputc((f->len >> 16) & 0xff, tf);
-                       fputc((f->len >> 8) & 0xff, tf);
-                       fputc(f->len & 0xff, tf);
+                       putc_unlocked(0xfe, tf);
+                       putc_unlocked((f->len >> 24) & 0xff, tf);
+                       putc_unlocked((f->len >> 16) & 0xff, tf);
+                       putc_unlocked((f->len >> 8) & 0xff, tf);
+                       putc_unlocked(f->len & 0xff, tf);
                }
 
                unsigned char *p = line_nth(&in_line, f->start_pos);
                for (int j = 0; j < f->len; j++)
-                       fputc(*p++, tf);
+                       putc_unlocked(*p++, tf);
 
                intarray_t *w = &out_format->column_widths;
                while (i >= intarray_count(w))
                        *intarray_push(w) = 0;
-               if (*intarray_nth(w, i) < f->len)
-                       *intarray_nth(w, i) = f->len;
+               int fw = field_chars(f);
+               if (*intarray_nth(w, i) < fw)
+                       *intarray_nth(w, i) = fw;
        }
-       fputc(0xff, tf);
+       putc_unlocked(0xff, tf);
 }
 
 /*** Transforms ***/
@@ -497,6 +519,9 @@ static void two_pass(void)
 {
        struct format *final_format = out_format;
 
+       // We need to use character set info from the current locale
+       setlocale(LC_CTYPE, "");
+
        // Pass 1: Set up writer of intermediate format
        out_format = xmalloc_zero(sizeof(*out_format));
        out_format->id = FORM_TMP;