]> mj.ucw.cz Git - xsv.git/commitdiff
Table backend is locale-aware
authorMartin Mares <mj@ucw.cz>
Mon, 23 Jul 2012 22:46:41 +0000 (00:46 +0200)
committerMartin Mares <mj@ucw.cz>
Mon, 23 Jul 2012 22:46:41 +0000 (00:46 +0200)
xsv.c

diff --git a/xsv.c b/xsv.c
index 5bf4f01537a3d211d8575b3c3abd5b6609d42138..1c6dada778a925c3b7eb49859c990ca580987236 100644 (file)
--- a/xsv.c
+++ b/xsv.c
@@ -9,6 +9,8 @@
 #include <string.h>
 #include <stdarg.h>
 #include <getopt.h>
+#include <wchar.h>
+#include <locale.h>
 
 #include <pcre.h>
 
@@ -154,6 +156,25 @@ static int next_line(void)
        }
 }
 
+static int field_chars(struct field *f)
+{
+       unsigned char *s = line_nth(&in_line, f->start_pos);
+       int i = 0;
+       mbstate_t mbs;
+       memset(&mbs, 0, sizeof(mbs));
+
+       int chars = 0;
+       while (i < f->len) {
+               size_t k = mbrlen((char *) s + i, f->len - i, &mbs);
+               if ((int) k <= 0)
+                       break;
+               i += k;
+               chars++;
+       }
+
+       return chars;
+}
+
 /*** CSV/TSV back-end */
 
 static int csv_read(void)
@@ -325,20 +346,18 @@ static void table_write(void)
                if (i)
                        printf("%*s", out_format->table_sep, "");
                struct field *f = fields_nth(&in_fields, i);
-               int w = *intarray_nth(&in_format->column_widths, i);
-               if (f->len > w) {
-                       warn(out_format, "Internal error: Wrongly calculated column width (%d > %d)", f->len, w);
-                       w = f->len;
+               int fw = field_chars(f);
+               int cw = *intarray_nth(&in_format->column_widths, i);
+               if (fw > cw) {
+                       warn(out_format, "Internal error: Wrongly calculated column width (%d > %d)", fw, cw);
+                       cw = fw;
                }
-               int j = 0;
                unsigned char *p = line_nth(&in_line, f->start_pos);
-               while (j < f->len) {
-                       putchar(*p++);
-                       j++;
-               }
-               while (j < w) {
+               for (int j = 0; j < f->len; j++)
+                       putchar(p[j]);
+               while (fw < cw) {
                        putchar(' ');
-                       j++;
+                       fw++;
                }
        }
        putchar('\n');
@@ -398,8 +417,9 @@ static void tmp_write(void)
                intarray_t *w = &out_format->column_widths;
                while (i >= intarray_count(w))
                        *intarray_push(w) = 0;
-               if (*intarray_nth(w, i) < f->len)
-                       *intarray_nth(w, i) = f->len;
+               int fw = field_chars(f);
+               if (*intarray_nth(w, i) < fw)
+                       *intarray_nth(w, i) = fw;
        }
        fputc(0xff, tf);
 }
@@ -497,6 +517,9 @@ static void two_pass(void)
 {
        struct format *final_format = out_format;
 
+       // We need to use character set info from the current locale
+       setlocale(LC_CTYPE, "");
+
        // Pass 1: Set up writer of intermediate format
        out_format = xmalloc_zero(sizeof(*out_format));
        out_format->id = FORM_TMP;