From: Daniel Fiala Date: Tue, 11 May 2010 02:38:49 +0000 (+0200) Subject: Added functions for conversions from string to uns or uintmax_t . X-Git-Tag: holmes-import~10 X-Git-Url: http://mj.ucw.cz/gitweb/?a=commitdiff_plain;h=2c7483b13f06f97b8a3d98f27ebe5ebf003be1c2;p=libucw.git Added functions for conversions from string to uns or uintmax_t . --- diff --git a/ucw/Makefile b/ucw/Makefile index 6593310f..4bb7f8e3 100644 --- a/ucw/Makefile +++ b/ucw/Makefile @@ -31,7 +31,8 @@ LIBUCW_MODS= \ qache \ string str-esc str-split str-match str-imatch str-hex \ bbuf \ - getopt + getopt \ + strtonum LIBUCW_MAIN_INCLUDES= \ lib.h log.h threads.h \ @@ -56,7 +57,8 @@ LIBUCW_MAIN_INCLUDES= \ base64.h base224.h \ qache.h \ kmp.h kmp-search.h binsearch.h \ - partmap.h + partmap.h \ + strnonum.h ifdef CONFIG_UCW_THREADS # Some modules require threading @@ -95,6 +97,7 @@ $(o)/ucw/redblack-test: $(o)/ucw/redblack-test.o $(LIBUCW) $(o)/ucw/binheap-test: $(o)/ucw/binheap-test.o $(LIBUCW) $(o)/ucw/lizard-test: $(o)/ucw/lizard-test.o $(LIBUCW) $(o)/ucw/kmp-test: $(o)/ucw/kmp-test.o $(LIBUCW) +$(o)/ucw/strtonum-test: $(o)/ucw/strtonum-test.o $(LIBUCW) ifdef CONFIG_CHARSET $(o)/ucw/kmp-test: $(LIBCHARSET) endif @@ -105,7 +108,7 @@ TESTS+=$(addprefix $(o)/ucw/,regex.test unicode.test hash-test.test mempool.test slists.test bbuf.test kmp-test.test getopt.test ff-unicode.test eltpool.test \ fb-socket.test trie-test.test string.test sha1.test asort-test.test binheap-test.test \ redblack-test.test fb-file.test fb-grow.test fb-pool.test fb-atomic.test \ - fb-limfd.test fb-temp.test fb-mem.test fb-buffer.test fb-mmap.test url.test) + fb-limfd.test fb-temp.test fb-mem.test fb-buffer.test fb-mmap.test url.test strtonum-test.test) $(o)/ucw/regex.test: $(o)/ucw/regex-t $(o)/ucw/unicode.test: $(o)/ucw/unicode-t @@ -125,6 +128,7 @@ $(o)/ucw/trie-test.test: $(o)/ucw/trie-test $(o)/ucw/asort-test.test: $(o)/ucw/asort-test $(o)/ucw/binheap-test.test: $(o)/ucw/binheap-test $(o)/ucw/redblack-test.test: $(o)/ucw/redblack-test +$(o)/ucw/strtonum-test.test: $(o)/ucw/strtonum-test $(addprefix $(o)/ucw/fb-,file.test grow.test pool.test socket.test atomic.test \ limfd.test temp.test mem.test buffer.test mmap.test): %.test: %-t $(o)/ucw/url.test: $(o)/ucw/url-t diff --git a/ucw/strtonum-gen.h b/ucw/strtonum-gen.h new file mode 100644 index 00000000..60dcca7b --- /dev/null +++ b/ucw/strtonum-gen.h @@ -0,0 +1,116 @@ +/* + * UCW Library -- Conversions of Strings to Numbers + * + * (c) 2010 Daniel Fiala + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +/* This is not a normall header file, it is generator of a function for converting strings to integers + * of a certain type. This file should be used only by ucw/stronum.c . + */ + +#define STN_DECLARE(type, suffix) STN_DECLARE_CONVERTOR(type, suffix) + +#define S_HELPER2(name, suffix) name##suffix +#define S_HELPER1(name, suffix) S_HELPER2(name, suffix) +#define S(name) S_HELPER1(name, STN_SUFFIX) + +#define STN_MAX ((STN_TYPE)(-1)) +static const STN_TYPE S(tops)[STN_DBASES_MASK+1] = { [2] = STN_MAX/2, [8] = STN_MAX/8, [10] = STN_MAX/10, [16] = STN_MAX/16 }; + +STN_DECLARE(STN_TYPE, STN_SUFFIX) +{ + const char *p = str; + const char *err = NULL; + + uns sign, base; + err = str_to_num_init(&p, flags, &sign, &base); + const char *parse_string(void) + { + const STN_TYPE max = STN_MAX; + const STN_TYPE top = S(tops)[base]; + if (!top) + { + return err_unknown_base; + } + + const STN_TYPE sign_max = ((flags & STN_SIGNED) || sign) ? max/2 + sign : max; + + STN_TYPE val = 0; + uns digits = 0; + int overflow = 0; + for (;; p++) + { + const uns c = (byte)*p; + + if (c == '_') + { + if (flags & STN_UNDERSCORE) + continue; + else + break; + } + + const uns d = get_digit(c); + if (d >= base) + break; + digits++; + if (overflow) + continue; + + STN_TYPE v = val; + if ( (overflow = (v > top || (v *= base) > sign_max - d)) ) + continue; + val = v + d; + } + + if (!overflow) + { + if ((flags & STN_ZCHAR) && *p) + { + return err_invalid_character; + } + + if (!digits) + { + return err_no_digits; + } + + if (sign) + { + val = -val; + } + } + else + { + if (flags & STN_TRUNC) + val = sign_max; + else + { + return err_numeric_overflow; + } + } + + if (num) + *num = val; + + return NULL; + } + if (!err) + err = parse_string(); + + if (next) + *next = p; + + return err; +} + +#undef STN_MAX +#undef S +#undef S_HELPER1 +#undef S_HELPER2 +#undef STN_TYPE +#undef STN_SUFFIX +#undef STN_DECLARE diff --git a/ucw/strtonum-test.c b/ucw/strtonum-test.c new file mode 100644 index 00000000..fd15714d --- /dev/null +++ b/ucw/strtonum-test.c @@ -0,0 +1,95 @@ +/* + * UCW Library -- Conversions of Strings to Numbers: Testing + * + * (c) 2010 Daniel Fiala + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "ucw/lib.h" +#include "ucw/chartype.h" +#include "ucw/strtonum.h" + +#include + +static uns str_to_flags(const char *str) +{ + uns flags = 0; + for(const char *p = str; *p; ++p) + { + switch(*p) + { + case 'h': + flags = (flags & ~STN_DBASES_MASK) | 16; + break; + case '8': + flags = (flags & ~STN_DBASES_MASK) | 8; + break; + case '2': + flags = (flags & ~STN_DBASES_MASK) | 2; + break; + case '0': + flags = (flags & ~STN_DBASES_MASK) | 10; + break; + + case 'X': + flags |= STN_HEX; + break; + case 'o': + flags |= STN_OCT; + break; + case 'B': + flags |= STN_BIN; + break; + case 'D': + flags |= STN_DEC; + break; + + case '_': + flags |= STN_UNDERSCORE; + break; + case 't': + flags |= STN_TRUNC; + break; + case '+': + flags |= STN_PLUS; + break; + case '-': + flags |= STN_MINUS; + break; + case 's': + flags |= STN_SIGNED; + break; + case 'Z': + flags |= STN_ZCHAR; + break; + } + } + + return flags; +} + +static void convert(const char *str_flags, const char *str_num) +{ + const uns flags = str_to_flags(str_flags); + + const char *next1, *next2; + uns ux = 1234567890; + uintmax_t um = 1234567890; + const char *err1 = str_to_uns(&ux, str_num, &next1, flags); + const char *err2 = str_to_uintmax(&um, str_num, &next2, flags); + + if (flags & STN_SIGNED) + printf("i%d\nh%x\ne[%s]\nc[%s]\nb%td:0x%x\nI%jd\nH%jx\nE[%s]\nC[%s]\nB%td:0x%x\n", ux, ux, err1, str_num, next1 - str_num, *next1, um, um, err2, str_num, next2 - str_num, *next2); + else + printf("i%u\nh%x\ne[%s]\nc[%s]\nb%td:0x%x\nI%ju\nH%jx\nE[%s]\nC[%s]\nB%td:0x%x\n", ux, ux, err1, str_num, next1 - str_num, *next1, um, um, err2, str_num, next2 - str_num, *next2); +} + +int main(int argc, char *argv[]) +{ + if (argc >= 3) + convert(argv[1], argv[2]); + + return 0; +} diff --git a/ucw/strtonum-test.t b/ucw/strtonum-test.t new file mode 100644 index 00000000..f8747b51 --- /dev/null +++ b/ucw/strtonum-test.t @@ -0,0 +1,209 @@ +# Test for the strtonum module + +## Testing str_to_uns(.) (It is supoposed that uns is 4 bytes integer) + +# 1 +Run: ../obj/ucw/strtonum-test '0o+-_' '0o100_000xc' | grep '^i' +Out: i32768 + +# 2 +Run: ../obj/ucw/strtonum-test '0XoB+-_' '4_294_967_295xcyv' | grep '^i' +Out: i4294967295 + +# 3 +Run: ../obj/ucw/strtonum-test '0XoB+-_' '4_294_967_296xc' | grep '^e' +Out: e[Numeric overflow] + +# 4 +Run: ../obj/ucw/strtonum-test '0XoBt+-_' '4_294_967_296xc' | grep '^h' +Out: hffffffff + +# 5 +Run: ../obj/ucw/strtonum-test '0XoBt+-_' '4_294_967_296xc' | grep '^b' +Out: b13:0x78 + +# 6 +Run: ../obj/ucw/strtonum-test '0XoBt+-_' '0x__4_' | grep '^i' +Out: i4 + +# 7 +Run: ../obj/ucw/strtonum-test '0XoBt+-_' '0x__4_' | grep '^b' +Out: b6:0x0 + +# 8 +Run: ../obj/ucw/strtonum-test '0XoBt+-_' '0xXW' | grep '^e' +Out: e[Number contains no digits] + +# 9 +Run: ../obj/ucw/strtonum-test '0XoBt+-_' '0xXW' | grep '^b' +Out: b2:0x58 + +# 10 +Run: ../obj/ucw/strtonum-test '0oBt+-_' '0xXW' | grep '^i' +Out: i0 + +# 11 +Run: ../obj/ucw/strtonum-test '0oBt+-_' '0xXW' | grep '^b' +Out: b1:0x78 + +# 12 +Run: ../obj/ucw/strtonum-test '0oBt+-_Z' '0xXW' | grep '^e' +Out: e[Invalid character] + +# 13 +Run: ../obj/ucw/strtonum-test '0oBt+-_Z' '0xXW' | grep '^b' +Out: b1:0x78 + +# 14 +Run: ../obj/ucw/strtonum-test '0Bs+-_' '2_147_483_647xxx' | grep '^i' +Out: i2147483647 + +# 15 +Run: ../obj/ucw/strtonum-test '0Bs+-_' '2_147_483_647xxx' | grep '^i' +Out: i2147483647 + +# 16 +Run: ../obj/ucw/strtonum-test '0Bs+-_' '2_147_483_648xxx' | grep '^e' +Out: e[Numeric overflow] + +# 17 +Run: ../obj/ucw/strtonum-test '0Bs+-_' '-2_147_483_648xxx' | grep '^i' +Out: i-2147483648 + +# 18 +Run: ../obj/ucw/strtonum-test '0Bs+-_' '-2_147_483_649xxx' | grep '^e' +Out: e[Numeric overflow] + +# 19 +Run: ../obj/ucw/strtonum-test '0Bts+-_' '2_147_483_648xxx' | grep '^i' +Out: i2147483647 + +# 20 +Run: ../obj/ucw/strtonum-test '0Bts+-_' '-2_147_483_649xxx' | grep '^i' +Out: i-2147483648 + +# 21 +Run: ../obj/ucw/strtonum-test '0Bts+-_' '-2_147_483_649xxx' | grep '^i' +Out: i-2147483648 + +# 22 +Run: ../obj/ucw/strtonum-test '0X+-' '0xABCDEFxxx' | grep '^h' +Out: habcdef + +# 23 +Run: ../obj/ucw/strtonum-test '0X+-_' '0x00_AB_CD_EFxxx' | grep '^h' +Out: habcdef + +# 24 +Run: ../obj/ucw/strtonum-test '0Xs+-_' '-0x00AB_CDEFxxx' | grep '^h' +Out: hff543211 + +# 25 +Run: ../obj/ucw/strtonum-test '0XBs+-_' '-0x00AB_CDEFxxx' | grep '^h' +Out: hff543211 + +# 26 +Run: ../obj/ucw/strtonum-test '0B+-_' '0B1111_0000_1000_0101_1010xxx' | grep '^h' +Out: hf085a + +# 27 +Run: ../obj/ucw/strtonum-test '0+-_' '0B1111_0000_1000_0101_1010xxx' | grep '^b' +Out: b1:0x42 + +# 28 +Run: ../obj/ucw/strtonum-test '0o+-_' '0o70105xxx' | grep '^i' +Out: i28741 + +# 29 +Run: ../obj/ucw/strtonum-test '0os+-_' '-0o70105xxx' | grep '^i' +Out: i-28741 + +# 30 +Run: ../obj/ucw/strtonum-test '0os+-_' '-0o___________xxx' | grep '^e' +Out: e[Number contains no digits] + +# 31 +Run: ../obj/ucw/strtonum-test '2+-_' '10578ABCG' | grep '^i' +Out: i2 + +# 32 +Run: ../obj/ucw/strtonum-test '2s+-_' '-10578ABCG' | grep '^i' +Out: i-2 + +# 33 +Run: ../obj/ucw/strtonum-test '8+-_' '10578ABCG' | grep '^i' +Out: i559 + +# 34 +Run: ../obj/ucw/strtonum-test '8s+-_' '-10578ABCG' | grep '^i' +Out: i-559 + +# 35 +Run: ../obj/ucw/strtonum-test '0+-_' '10578ABCG' | grep '^i' +Out: i10578 + +# 36 +Run: ../obj/ucw/strtonum-test '0s+-_' '-10578ABCG' | grep '^i' +Out: i-10578 + +# 37 +Run: ../obj/ucw/strtonum-test 'h+-_' '10578ABCG' | grep '^i' +Out: i274172604 + +# 38 +Run: ../obj/ucw/strtonum-test 'hs+-_' '-10578ABCG' | grep '^i' +Out: i-274172604 + +## Testing str_to_uintmax(.) (It is supoposed that uintmax_t is 8 bytes integer) +# 39 +Run: ../obj/ucw/strtonum-test 'h+-_' 'FFFF_FFFF_ffff_ffFF' | grep '^H' +Out: Hffffffffffffffff + +# 40 +Run: ../obj/ucw/strtonum-test 'h+-_' 'FFFF_FFFF_ffff_ffFF' | grep '^I' +Out: I18446744073709551615 + +#41 +Run: ../obj/ucw/strtonum-test '0t+-_' '1844674407370000009551616' | grep '^I' +Out: I18446744073709551615 + +#42 +Run: ../obj/ucw/strtonum-test '0+-_' '18446744073709551616' | grep '^E' +Out: E[Numeric overflow] + +# 43 +Run: ../obj/ucw/strtonum-test '0+-_' '18446744073709551614' | grep '^H' +Out: Hfffffffffffffffe + +# 44 +Run: ../obj/ucw/strtonum-test '0s+-_' '9223372036854775807L' | grep '^I' +Out: I9223372036854775807 + +# 45 +Run: ../obj/ucw/strtonum-test '0s+-_' '9223372036854775806L' | grep '^I' +Out: I9223372036854775806 + +# 46 +Run: ../obj/ucw/strtonum-test '0st+-_' '92233720368547758000000L' | grep '^I' +Out: I9223372036854775807 + +# 47 +Run: ../obj/ucw/strtonum-test '0s+-_' '9223372036854775808L' | grep '^E' +Out: E[Numeric overflow] + +# 48 +Run: ../obj/ucw/strtonum-test '0s+-_' '-9223372036854775808L' | grep '^I' +Out: I-9223372036854775808 + +# 49 +Run: ../obj/ucw/strtonum-test '0s+-_' '-9223372036854775807L' | grep '^I' +Out: I-9223372036854775807 + +# 50 +Run: ../obj/ucw/strtonum-test '0st+-_' '-9223372036854775800000L' | grep '^I' +Out: I-9223372036854775808 + +# 51 +Run: ../obj/ucw/strtonum-test '0s+-_' '-9223372036854775809L' | grep '^E' +Out: E[Numeric overflow] + diff --git a/ucw/strtonum.c b/ucw/strtonum.c new file mode 100644 index 00000000..5a494165 --- /dev/null +++ b/ucw/strtonum.c @@ -0,0 +1,109 @@ +/* + * UCW Library -- Conversions of Strings to Numbers + * + * (c) 2010 Daniel Fiala + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#include "ucw/lib.h" +#include "ucw/string.h" +#include "ucw/chartype.h" +#include "ucw/strtonum.h" + +static const char err_numeric_overflow[] = "Numeric overflow"; +static const char err_no_digits[] = "Number contains no digits"; +static const char err_invalid_character[] = "Invalid character"; +static const char err_unknown_base[] = "Unknown base"; + +static uns detect_base(const char *p, const uns flags) +{ + if ((flags & STN_BASES) && *p == '0') + { + switch (p[1] & 0xDF) + { + case 'X': + if (flags & STN_HEX) + { + return 16; + } + break; + + case 'B': + if (flags & STN_BIN) + { + return 2; + } + break; + + case 'O': + if (flags & STN_OCT) + { + return 8; + } + break; + } + } + + return 0; +} + +static const char *str_to_num_init(const char **pp, const uns flags, uns *sign, uns *base) +{ + ASSERT(*pp); + + const char *err = NULL; + const char *p = *pp; + + // Parse sign + *sign = 0; + if (flags & (STN_SIGNS)) + { + if (*p == '-' && (flags & STN_MINUS)) + { + *sign = 1; + p++; + } + else if (*p == '+' && (flags & STN_PLUS)) + p++; + } + + const uns prefix_base = detect_base(p, flags); + if (prefix_base) + { + p += 2; + *base = prefix_base; + } + else + { + *base = flags & STN_DBASES_MASK; + } + + *pp = p; + return err; +} + +static inline uns get_digit(const uns c) +{ + if (c <= '9') + { + return c - '0'; + } + else + { + const int a = c & 0xDF; + unsigned d = a - 'A'; + d &= 0xFF; + d += 10; + return d; + } +} + +#define STN_TYPE uns +#define STN_SUFFIX uns +#include "ucw/strtonum-gen.h" + +#define STN_TYPE uintmax_t +#define STN_SUFFIX uintmax +#include "ucw/strtonum-gen.h" diff --git a/ucw/strtonum.h b/ucw/strtonum.h new file mode 100644 index 00000000..b067b3fa --- /dev/null +++ b/ucw/strtonum.h @@ -0,0 +1,42 @@ +/* + * UCW Library -- Conversions of Strings to Numbers: Declarations + * + * (c) 2010 Daniel Fiala + * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + */ + +#ifndef _STRTONUM_H +#define _STRTONUM_H + +enum str_to_num_flags { + STN_SIGNED = 0x20, // The resulting range is signed + STN_MINUS = 0x40, // Allow optional '-' sign + STN_PLUS = 0x80, // Allow optional '+' sign + STN_TRUNC = 0x100, // Allow range overflow -> truncate to the resulting range + STN_DEC = 0x200, // Support decimal numbers + STN_HEX = 0x400, // Support hexadecimal numbers (0x...) + STN_BIN = 0x800, // Support binary numbers (0b...) + STN_OCT = 0x1000, // Support octal numbers (0o...) + STN_UNDERSCORE = 0x2000, // Number can contain underscore characters to increase readability (eg. 1_000_000) + STN_ZCHAR = 0x4000, // Number can be terminated only by \0 character +}; + +#define STN_DBASES_MASK 0x1F +#define STN_SIGNS (STN_MINUS | STN_PLUS) +#define STN_BASES (STN_DEC | STN_HEX | STN_BIN | STN_OCT) +#define STN_FLAGS (STN_MINUS | STN_PLUS | STN_BASES) +#define STN_UFLAGS (STN_FLAGS | STN_UNDERSCORE) +#define STN_SFLAGS (STN_FLAGS | STN_SIGNED) +#define STN_USFLAGS (STN_SFLAGS | STN_UNDERSCORE) + +#define STN_DECLARE_CONVERTOR(type, suffix) \ +const char *str_to_##suffix(type *num, const char *str, const char **next, const uns flags) + +STN_DECLARE_CONVERTOR(uns, uns); +STN_DECLARE_CONVERTOR(long, long); +STN_DECLARE_CONVERTOR(uintmax_t, uintmax); +STN_DECLARE_CONVERTOR(unsigned long long, ull); + +#endif