}
#define UTF8_GET_NEXT if (unlikely((*p & 0xc0) != 0x80)) goto bad; u = (u << 6) | (*p++ & 0x3f)
+#define UTF8_CHECK_AVAIL(n) if (unlikely(avail < n)) goto bad
#define UTF8_CHECK_RANGE(r) if (unlikely(u < r)) goto bad
/**
return (byte *)p;
}
+/**
+ * Decode a value from the range `[0, 0x7FFFFFFF]`
+ * or return @repl if the encoding has been corrupted.
+ * This function never reads behind @stop (including).
+ * At least one byte must be available (@stop > @p).
+ **/
+static inline byte *utf8_32_get_repl_safe(const byte *p, const byte *stop, uint *uu, uint repl)
+{
+ uint u = *p++;
+ if (u < 0x80)
+ goto ok;
+ else if (unlikely(u < 0xc0))
+ goto bad;
+ uint limit;
+ size_t avail = stop - p;
+ if (u < 0xe0)
+ {
+ UTF8_CHECK_AVAIL(1);
+ u &= 0x1f;
+ limit = 0x80;
+ goto get1;
+ }
+ else if (u < 0xf0)
+ {
+ UTF8_CHECK_AVAIL(2);
+ u &= 0x0f;
+ limit = 0x800;
+ goto get2;
+ }
+ else if (u < 0xf8)
+ {
+ UTF8_CHECK_AVAIL(3);
+ u &= 0x07;
+ limit = 1 << 16;
+ goto get3;
+ }
+ else if (u < 0xfc)
+ {
+ UTF8_CHECK_AVAIL(4);
+ u &= 0x03;
+ limit = 1 << 21;
+ goto get4;
+ }
+ else if (u < 0xfe)
+ {
+ UTF8_CHECK_AVAIL(5);
+ u &= 0x01;
+ limit = 1 << 26;
+ UTF8_GET_NEXT;
+get4: UTF8_GET_NEXT;
+get3: UTF8_GET_NEXT;
+get2: UTF8_GET_NEXT;
+get1: UTF8_GET_NEXT;
+ if (unlikely(u < limit))
+ goto bad;
+ }
+ else
+ goto bad;
+
+ok:
+ *uu = u;
+ return (byte *)p;
+
+bad:
+ /* Incorrect byte sequence */
+ *uu = repl;
+ return (byte *)p;
+}
+
/**
* Decode a value from the range `[0, 0xFFFF]` (basic multilingual plane)
* or return `UNI_REPLACEMENT` if the encoding has been corrupted.