]> mj.ucw.cz Git - umpf.git/commitdiff
lex reloaded
authorAnicka Bernathova <anicka@anicka.net>
Thu, 3 Jul 2008 21:01:14 +0000 (23:01 +0200)
committerAnicka Bernathova <anicka@anicka.net>
Thu, 3 Jul 2008 21:01:14 +0000 (23:01 +0200)
Makefile
cond.y
lex.c [new file with mode: 0644]

index a58753a02d4ed5c4d8e8bd536e0328154612ba02..549b9b5b76e4b623f54adacd3c3cf561b6fcb3bb 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,8 +1,18 @@
-all: bison
-       gcc -Wall -O2 -o cond -g -lpcre cond.tab.c
+all: cond
 
-bison:
-       bison -t cond.y
+CC=gcc
+CFLAGS=-Wall -W -O2 -g
+LDLIBS=-lpcre
+
+cond: cond.tab.o lex.o
+       gcc -o $@ $^ $(LDLIBS)
+
+cond.tab.o: cond.tab.c
+
+lex.o: lex.c cond.tab.c
+
+cond.tab.c: cond.y
+       bison -dvt cond.y
 
 clean:
-       rm -rf cond.tab.c cond  
+       rm -rf cond.tab.[ch] cond.output cond *.o
diff --git a/cond.y b/cond.y
index d5b293413053b19b2d8d71ac13fdc8a65025369b..6473557660d944e472b17612517f7440092b6a4c 100644 (file)
--- a/cond.y
+++ b/cond.y
@@ -20,8 +20,6 @@ int regex_cmp(char* s, char* r);
 }
 
 %token <str> CONST
-%token <str> REGEX
-%token ERR
 %token <n> NUM
 %token <str> VAR
 %token KW_IF KW_ELSE KW_PIPE KW_MAIL KW_COPY
@@ -47,8 +45,8 @@ line: '\n'
 
 boo:    CONST EQ CONST         { $$ = ! strcmp($1, $3); }
        | CONST NEQ CONST       { $$ = !! strcmp($1, $3); }
-       | CONST RE REGEX        { $$ = regex_cmp($1,$3) >= 0 }
-       | CONST NRE REGEX       { $$ = regex_cmp($1,$3) < 0 }
+       | CONST RE CONST        { $$ = regex_cmp($1,$3) >= 0 }
+       | CONST NRE CONST       { $$ = regex_cmp($1,$3) < 0 }
        | NUM EQ NUM            { $$ = $1 == $3 }
        | NUM NEQ NUM           { $$ = $1 != $3 }
        | NUM GE NUM            { $$ = $1 >= $3 }
@@ -64,11 +62,6 @@ boo:  CONST EQ CONST         { $$ = ! strcmp($1, $3); }
 ;
 %%
 
-#include <ctype.h>
-#include <stdlib.h>
-
-#define BUFSIZE 4096
-
 int 
 regex_cmp(char* s, char* r)
 {
@@ -77,197 +70,17 @@ regex_cmp(char* s, char* r)
        const char* error;
        int ovector[OVECCOUNT];
        
-       brum=pcre_compile(r,0,&error,&erroroffset,NULL);
+       brum = pcre_compile(r,0,&error,&erroroffset,NULL);
        if (!brum)
                return -1;
        
-       int res=pcre_exec(brum,NULL,s,strlen(s),0,0,ovector,OVECCOUNT);
+       int res = pcre_exec(brum,NULL,s,strlen(s),0,0,ovector,OVECCOUNT);
        
        pcre_free(brum);
 
        return res;
 }
 
-char*
-get_string_out(char delim)
-{
-       int last = delim; 
-       int i = 0;
-       char* s;
-       int c;
-
-       if (!(s = malloc(BUFSIZE))){
-               puts("Low memory");
-               exit(0);
-       }
-
-       while ((c = getchar()) != delim || last == '\\'){
-               if (last=='\\' && c != delim)
-                       s[i-1] = c;
-               else {          
-                       s[i] = c;
-                       i++;
-               }
-               last = c;
-               if (i >= BUFSIZE-1)
-                       break;
-       }       
-       s[i] = '\0';
-
-       return s;
-}
-
-int
-safe_unget(char c)
-{
-       if (c==EOF)
-               return 0;
-
-       ungetc(c,stdin);
-       return 1;
-}
-
-int
-yylex(void)
-{
-
-       int c, last;
-       
-       while ((c = getchar ()) == ' ' || c == '\t');
-       
-       if (c == '"'){
-               last = '"';
-               yylval.str = get_string_out('"');
-               return CONST;   
-       }
-
-       if (c == '\''){
-               last = '\'';
-               yylval.str = get_string_out('\'');
-               return CONST;   
-       }
-
-       if (c == '/'){
-               last = '/';
-               yylval.str = get_string_out('/');       
-               return REGEX;   
-       }
-
-       if (isdigit(c)){
-               ungetc(c,stdin);
-               scanf("%d",&yylval.n);
-               return NUM;
-       }
-
-       if (c == '!'){
-               if ((c = getchar ()) == '=')
-                       return NEQ;
-               else if (c == '~')
-                       return NRE;
-               else {
-                       safe_unget(c);
-                       return '!';     
-               }
-       }
-
-       if (c == '<'){
-               if ((c = getchar ()) == '=')
-                       return LE;
-               else {
-                       safe_unget(c);
-                       return '<';
-               }
-       }
-
-       if (c == '>'){
-               if ((c = getchar ()) == '=')
-                       return GE;
-               else {
-                       safe_unget(c);
-                       return '>'; 
-               } 
-       }
-
-       if (c == '='){
-               if ((c = getchar ()) == '=')
-                       return EQ;
-               else {
-                       safe_unget(c);
-                       return '='; 
-               }
-       }
-
-       if (c == '~'){
-               if ((c = getchar ()) == '~')
-                       return RE;
-               else {
-                       safe_unget(c);
-                       return ERR;
-               }
-       }
-
-       if (c == '-'){
-               if ((c = getchar ()) == '>')
-                       return ARROW;
-               else {
-                       safe_unget(c);
-                       return '-';
-               }
-       }
-
-       if (c == '$'){
-               int i=0;
-       
-               if (!(yylval.str=malloc(BUFSIZE))){
-                       puts("Low memory");
-                       exit (0);
-               }
-                       
-               while (isalnum(c = getchar()) || c == '_' || c == '-'){
-                       yylval.str[i]=c;
-                       i++;
-                       if (i >= BUFSIZE)
-                               break;
-               }
-
-               return VAR;
-       }
-
-       if (c == '\n' || c == '+' || c == '*' || c == '/' ||
-               c == '(' || c == ')' || c == '{' || c == '}')
-               return c;
-
-#define KLEN 10
-
-       if (isalpha(c)){
-               char buf[KLEN]; 
-               int i=0;
-
-               ungetc(c,stdin);
-               while (isalpha(c = getchar()) && i<KLEN-1)
-                       buf[i++]=c;
-               buf[i]=0;
-               
-               if (!strcmp(buf,"if"))
-                       return KW_IF;
-               else if (!strcmp(buf,"else"))
-                       return KW_ELSE;
-               else if (!strcmp(buf,"pipe"))
-                       return KW_COPY;
-               else if (!strcmp(buf,"mail"))
-                       return KW_MAIL;
-               else if (!strcmp(buf,"copy"))
-                       return KW_COPY;
-               else
-                       return ERR;
-       }
-       
-       if (c == EOF)
-               return 0;
-       
-       return ERR;
-}
-
 void
 yyerror (char const *s)
 {
diff --git a/lex.c b/lex.c
new file mode 100644 (file)
index 0000000..4ee876d
--- /dev/null
+++ b/lex.c
@@ -0,0 +1,213 @@
+#include <stdio.h>
+#include <ctype.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdarg.h>
+
+#include "cond.tab.h"
+
+#define BUFSIZE 4096
+#define KLEN 10 
+
+struct keys {
+       char* keywords;
+       enum yytokentype keytoks;
+};
+
+static int line;
+
+static struct keys k[] = 
+       {       {"copy", KW_COPY},
+               {"else", KW_ELSE},
+               {"if", KW_IF}, 
+               {"mail", KW_MAIL}, 
+               {"pipe", KW_PIPE}
+       };
+
+void __attribute__ ((noreturn)) 
+die(char* msg, ...)
+{
+        va_list args;
+
+        va_start(args, msg);
+        vfprintf(stderr, msg, args);
+        fputc('\n', stderr);
+        va_end(args);
+        exit(1);
+}
+
+void*
+xmalloc(size_t size)
+{
+        void* ret;
+
+        if (!(ret = malloc(size)))
+                die("Low memory");
+
+        return ret;
+}
+
+static void __attribute__ ((noreturn)) 
+parse_err(char* msg, ...)
+{
+        va_list args;
+
+        va_start(args, msg);
+       fprintf(stderr, "Line %d: ", line);
+        vfprintf(stderr, msg, args);
+        fputc('\n', stderr);
+        va_end(args);
+        exit(1);
+}
+
+static char*
+xstrdup(char* s)
+{
+        void* ret;
+
+        if (!(ret = strdup(s)))
+                die("Low memory");
+
+        return ret;
+}
+
+static char*
+get_string_out(int delim)
+{
+       int last = delim; 
+       int i = 0;
+       int c;
+       char buf[BUFSIZE];
+
+       while ((c = getchar()) != delim || last == '\\'){
+               if (last=='\\' && c != delim)
+                       buf[i-1] = c;
+               else {          
+                       buf[i] = c;
+                       i++;
+               }
+               last = c;
+               if (i >= BUFSIZE-1)
+                       parse_err("Too long string, max allowed length is %d",BUFSIZE-1);       
+       }       
+       buf[i] = '\0';
+
+       return xstrdup(buf);
+}
+
+static int
+is_var_id(int c)
+{
+       return  (c >= '0' && c <= '9' ) ||
+               (c >= 'a' && c <= 'z') ||
+               (c >= 'A' && c <= 'Z') ||
+               c == '_' ||
+               c == '-'; 
+}
+
+static int
+is_alpha(int c)
+{
+       return (c >= 'a' && c <= 'z') ||
+                (c >= 'A' && c <= 'Z');
+}
+
+int
+yylex(void)
+{
+       int c, nl = 0;
+       
+       while ((c = getchar ()) == ' ' || c == '\t' || c =='\n'){
+               if (c == '\n'){
+                       nl = 1;
+                       line++;
+               }
+       }
+       
+       if (nl)
+               return '\n';
+       
+       if (c == EOF)
+               return 0;
+       
+#define CC(a,b) ((a<<8)|b)
+       int d = getchar();
+       if (d >= 0) {
+               switch (CC(c,d)) {
+               case CC('!','='): return NEQ;
+               case CC('!','~'): return NRE;
+               case CC('<','='): return LE;
+               case CC('>','='): return GE;
+               case CC('=','='): return EQ;
+               case CC('~','~'): return RE;
+               case CC('-','>'): return ARROW;
+               }
+               ungetc(d,stdin);
+       }
+
+       switch (c) {
+               case '!':
+               case '(':
+               case ')':
+               case '-':
+               case '+':
+               case '*':
+               case '/':
+               case '{':
+               case '}':
+               case '<':
+               case '>':
+               case '=':
+                       return c;
+               
+               case '"':
+               case '\'':
+                       yylval.str = get_string_out(c);
+                       return CONST;   
+       }
+
+       if (c >= '0' && c <= '9'){
+               ungetc(c,stdin);
+               scanf("%d",&yylval.n);
+               return NUM;
+       }
+
+       if (c == '$'){
+               int i = 0;
+               char buf[BUFSIZE];
+       
+               while (is_var_id(c = getchar())){
+                       buf[i]=c;
+                       i++;
+                       if (i >= BUFSIZE-1)
+                               parse_err("Too long identifier, max allowed length is %d",BUFSIZE-1);   
+               }
+               buf[i] = 0;
+               yylval.str = xstrdup(buf);
+
+               return VAR;
+       }
+
+       if (is_alpha(c)){
+               char buf[KLEN]; 
+               int n, i = 0;
+
+               ungetc(c,stdin);
+               while (is_alpha(c = getchar())){
+                       buf[i++] = c;
+                       if (i >= KLEN)
+                               parse_err("Keyword too long");
+               }
+               buf[i] = 0;
+
+               n = (sizeof(k)/sizeof(struct keys));
+               for (i = 0; i < n; i++){
+                       if (!strcmp(buf,k[i].keywords))
+                               return k[i].keytoks;
+               }
+
+               parse_err("Unknown keyword %s", buf);
+       }
+
+       parse_err("Unknown character %c", c);
+}