From 7df595d014ccce57d1a9e810b884b4fb945281ed Mon Sep 17 00:00:00 2001 From: Pavel Charvat Date: Fri, 29 Dec 2017 14:17:08 +0100 Subject: [PATCH] UTF-8: More generic sanitization function. I could now directly use it in Gigamail's parser. --- ucw/unicode.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ucw/unicode.h b/ucw/unicode.h index c47d436a..8966167c 100644 --- a/ucw/unicode.h +++ b/ucw/unicode.h @@ -282,7 +282,7 @@ static inline void *utf16_be_get(const void *p, uint *uu) } // Internal helper for functions below -static inline uint unicode_sanitize_char_internal(uint u, uint limit, uint allowed_ctl) +static inline uint unicode_sanitize_char_internal(uint u, uint limit, uint allowed_ctl, uint repl) { if (u - 0x20 >= 0x60) // Otherwise printable ASCII 0x20..0x7f (optimized for this case) { @@ -301,7 +301,7 @@ static inline uint unicode_sanitize_char_internal(uint u, uint limit, uint allow else if (u < 0xa0) { // 0x80..0x9f Latin-1 control chars bad: - u = UNI_REPLACEMENT; + u = repl; } } return u; @@ -314,7 +314,7 @@ bad: **/ static inline uint unicode_sanitize_char(uint u) { - return unicode_sanitize_char_internal(u, 0x10000, 1U<<'\t'); + return unicode_sanitize_char_internal(u, 0x10000, 1U<<'\t', UNI_REPLACEMENT); } /** @@ -324,7 +324,7 @@ static inline uint unicode_sanitize_char(uint u) **/ static inline uint unicode_full_sanitize_char(uint u) { - return unicode_sanitize_char_internal(u, 0x110000, 1U<<'\t'); + return unicode_sanitize_char_internal(u, 0x110000, 1U<<'\t', UNI_REPLACEMENT); } /* unicode-utf8.c */ -- 2.39.2