From dca5a9e58bee396d155b7c34aff5758e40c4c3d5 Mon Sep 17 00:00:00 2001 From: Anicka Bernathova Date: Thu, 3 Jul 2008 23:01:14 +0200 Subject: [PATCH] lex reloaded --- Makefile | 20 ++++-- cond.y | 195 ++------------------------------------------------ lex.c | 213 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 232 insertions(+), 196 deletions(-) create mode 100644 lex.c diff --git a/Makefile b/Makefile index a58753a..549b9b5 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,18 @@ -all: bison - gcc -Wall -O2 -o cond -g -lpcre cond.tab.c +all: cond -bison: - bison -t cond.y +CC=gcc +CFLAGS=-Wall -W -O2 -g +LDLIBS=-lpcre + +cond: cond.tab.o lex.o + gcc -o $@ $^ $(LDLIBS) + +cond.tab.o: cond.tab.c + +lex.o: lex.c cond.tab.c + +cond.tab.c: cond.y + bison -dvt cond.y clean: - rm -rf cond.tab.c cond + rm -rf cond.tab.[ch] cond.output cond *.o diff --git a/cond.y b/cond.y index d5b2934..6473557 100644 --- a/cond.y +++ b/cond.y @@ -20,8 +20,6 @@ int regex_cmp(char* s, char* r); } %token CONST -%token REGEX -%token ERR %token NUM %token VAR %token KW_IF KW_ELSE KW_PIPE KW_MAIL KW_COPY @@ -47,8 +45,8 @@ line: '\n' boo: CONST EQ CONST { $$ = ! strcmp($1, $3); } | CONST NEQ CONST { $$ = !! strcmp($1, $3); } - | CONST RE REGEX { $$ = regex_cmp($1,$3) >= 0 } - | CONST NRE REGEX { $$ = regex_cmp($1,$3) < 0 } + | CONST RE CONST { $$ = regex_cmp($1,$3) >= 0 } + | CONST NRE CONST { $$ = regex_cmp($1,$3) < 0 } | NUM EQ NUM { $$ = $1 == $3 } | NUM NEQ NUM { $$ = $1 != $3 } | NUM GE NUM { $$ = $1 >= $3 } @@ -64,11 +62,6 @@ boo: CONST EQ CONST { $$ = ! strcmp($1, $3); } ; %% -#include -#include - -#define BUFSIZE 4096 - int regex_cmp(char* s, char* r) { @@ -77,197 +70,17 @@ regex_cmp(char* s, char* r) const char* error; int ovector[OVECCOUNT]; - brum=pcre_compile(r,0,&error,&erroroffset,NULL); + brum = pcre_compile(r,0,&error,&erroroffset,NULL); if (!brum) return -1; - int res=pcre_exec(brum,NULL,s,strlen(s),0,0,ovector,OVECCOUNT); + int res = pcre_exec(brum,NULL,s,strlen(s),0,0,ovector,OVECCOUNT); pcre_free(brum); return res; } -char* -get_string_out(char delim) -{ - int last = delim; - int i = 0; - char* s; - int c; - - if (!(s = malloc(BUFSIZE))){ - puts("Low memory"); - exit(0); - } - - while ((c = getchar()) != delim || last == '\\'){ - if (last=='\\' && c != delim) - s[i-1] = c; - else { - s[i] = c; - i++; - } - last = c; - if (i >= BUFSIZE-1) - break; - } - s[i] = '\0'; - - return s; -} - -int -safe_unget(char c) -{ - if (c==EOF) - return 0; - - ungetc(c,stdin); - return 1; -} - -int -yylex(void) -{ - - int c, last; - - while ((c = getchar ()) == ' ' || c == '\t'); - - if (c == '"'){ - last = '"'; - yylval.str = get_string_out('"'); - return CONST; - } - - if (c == '\''){ - last = '\''; - yylval.str = get_string_out('\''); - return CONST; - } - - if (c == '/'){ - last = '/'; - yylval.str = get_string_out('/'); - return REGEX; - } - - if (isdigit(c)){ - ungetc(c,stdin); - scanf("%d",&yylval.n); - return NUM; - } - - if (c == '!'){ - if ((c = getchar ()) == '=') - return NEQ; - else if (c == '~') - return NRE; - else { - safe_unget(c); - return '!'; - } - } - - if (c == '<'){ - if ((c = getchar ()) == '=') - return LE; - else { - safe_unget(c); - return '<'; - } - } - - if (c == '>'){ - if ((c = getchar ()) == '=') - return GE; - else { - safe_unget(c); - return '>'; - } - } - - if (c == '='){ - if ((c = getchar ()) == '=') - return EQ; - else { - safe_unget(c); - return '='; - } - } - - if (c == '~'){ - if ((c = getchar ()) == '~') - return RE; - else { - safe_unget(c); - return ERR; - } - } - - if (c == '-'){ - if ((c = getchar ()) == '>') - return ARROW; - else { - safe_unget(c); - return '-'; - } - } - - if (c == '$'){ - int i=0; - - if (!(yylval.str=malloc(BUFSIZE))){ - puts("Low memory"); - exit (0); - } - - while (isalnum(c = getchar()) || c == '_' || c == '-'){ - yylval.str[i]=c; - i++; - if (i >= BUFSIZE) - break; - } - - return VAR; - } - - if (c == '\n' || c == '+' || c == '*' || c == '/' || - c == '(' || c == ')' || c == '{' || c == '}') - return c; - -#define KLEN 10 - - if (isalpha(c)){ - char buf[KLEN]; - int i=0; - - ungetc(c,stdin); - while (isalpha(c = getchar()) && i +#include +#include +#include +#include + +#include "cond.tab.h" + +#define BUFSIZE 4096 +#define KLEN 10 + +struct keys { + char* keywords; + enum yytokentype keytoks; +}; + +static int line; + +static struct keys k[] = + { {"copy", KW_COPY}, + {"else", KW_ELSE}, + {"if", KW_IF}, + {"mail", KW_MAIL}, + {"pipe", KW_PIPE} + }; + +void __attribute__ ((noreturn)) +die(char* msg, ...) +{ + va_list args; + + va_start(args, msg); + vfprintf(stderr, msg, args); + fputc('\n', stderr); + va_end(args); + exit(1); +} + +void* +xmalloc(size_t size) +{ + void* ret; + + if (!(ret = malloc(size))) + die("Low memory"); + + return ret; +} + +static void __attribute__ ((noreturn)) +parse_err(char* msg, ...) +{ + va_list args; + + va_start(args, msg); + fprintf(stderr, "Line %d: ", line); + vfprintf(stderr, msg, args); + fputc('\n', stderr); + va_end(args); + exit(1); +} + +static char* +xstrdup(char* s) +{ + void* ret; + + if (!(ret = strdup(s))) + die("Low memory"); + + return ret; +} + +static char* +get_string_out(int delim) +{ + int last = delim; + int i = 0; + int c; + char buf[BUFSIZE]; + + while ((c = getchar()) != delim || last == '\\'){ + if (last=='\\' && c != delim) + buf[i-1] = c; + else { + buf[i] = c; + i++; + } + last = c; + if (i >= BUFSIZE-1) + parse_err("Too long string, max allowed length is %d",BUFSIZE-1); + } + buf[i] = '\0'; + + return xstrdup(buf); +} + +static int +is_var_id(int c) +{ + return (c >= '0' && c <= '9' ) || + (c >= 'a' && c <= 'z') || + (c >= 'A' && c <= 'Z') || + c == '_' || + c == '-'; +} + +static int +is_alpha(int c) +{ + return (c >= 'a' && c <= 'z') || + (c >= 'A' && c <= 'Z'); +} + +int +yylex(void) +{ + int c, nl = 0; + + while ((c = getchar ()) == ' ' || c == '\t' || c =='\n'){ + if (c == '\n'){ + nl = 1; + line++; + } + } + + if (nl) + return '\n'; + + if (c == EOF) + return 0; + +#define CC(a,b) ((a<<8)|b) + int d = getchar(); + if (d >= 0) { + switch (CC(c,d)) { + case CC('!','='): return NEQ; + case CC('!','~'): return NRE; + case CC('<','='): return LE; + case CC('>','='): return GE; + case CC('=','='): return EQ; + case CC('~','~'): return RE; + case CC('-','>'): return ARROW; + } + ungetc(d,stdin); + } + + switch (c) { + case '!': + case '(': + case ')': + case '-': + case '+': + case '*': + case '/': + case '{': + case '}': + case '<': + case '>': + case '=': + return c; + + case '"': + case '\'': + yylval.str = get_string_out(c); + return CONST; + } + + if (c >= '0' && c <= '9'){ + ungetc(c,stdin); + scanf("%d",&yylval.n); + return NUM; + } + + if (c == '$'){ + int i = 0; + char buf[BUFSIZE]; + + while (is_var_id(c = getchar())){ + buf[i]=c; + i++; + if (i >= BUFSIZE-1) + parse_err("Too long identifier, max allowed length is %d",BUFSIZE-1); + } + buf[i] = 0; + yylval.str = xstrdup(buf); + + return VAR; + } + + if (is_alpha(c)){ + char buf[KLEN]; + int n, i = 0; + + ungetc(c,stdin); + while (is_alpha(c = getchar())){ + buf[i++] = c; + if (i >= KLEN) + parse_err("Keyword too long"); + } + buf[i] = 0; + + n = (sizeof(k)/sizeof(struct keys)); + for (i = 0; i < n; i++){ + if (!strcmp(buf,k[i].keywords)) + return k[i].keytoks; + } + + parse_err("Unknown keyword %s", buf); + } + + parse_err("Unknown character %c", c); +} -- 2.39.2