}
// Internal helper for functions below
-static inline uint unicode_sanitize_char_internal(uint u, uint limit, uint allowed_ctl, uint repl)
+static inline bool unicode_check_char_internal(uint u, uint limit, uint allowed_ctl)
{
if (u - 0x20 >= 0x60) // Otherwise printable ASCII 0x20..0x7f (optimized for this case)
{
if (u < 0x20)
{
- if (!(1U << u) & allowed_ctl)
- goto bad; // Latin-1 control chars except the allowed ones
+ if (!((1U << u) & allowed_ctl))
+ return false; // Latin-1 control chars except the allowed ones
}
else if (u >= 0xd800)
{
if (u < 0xf900) // 0xd800..0xf8ff Surrogate pair
- goto bad;
- if (u >= limit)
- goto bad; // Outside Unicode range
+ return false;
+ if (u > limit)
+ return false; // Outside allowed range
}
else if (u < 0xa0)
- { // 0x80..0x9f Latin-1 control chars
-bad:
- u = repl;
- }
+ return false; // 0x80..0x9f Latin-1 control chars
}
- return u;
+ return true;
}
/**
**/
static inline uint unicode_sanitize_char(uint u)
{
- return unicode_sanitize_char_internal(u, 0x10000, 1U<<'\t', UNI_REPLACEMENT);
+ if (!unicode_check_char_internal(u, 0xffff, 1U << '\t'))
+ u = UNI_REPLACEMENT;
+ return u;
}
/**
**/
static inline uint unicode_full_sanitize_char(uint u)
{
- return unicode_sanitize_char_internal(u, 0x110000, 1U<<'\t', UNI_REPLACEMENT);
+ if (!unicode_check_char_internal(u, 0x10ffff, 1U << '\t'))
+ u = UNI_REPLACEMENT;
+ return u;
}
/* unicode-utf8.c */