From 9c57805dc5f9ddbe1f453181a9060558c64e29cd Mon Sep 17 00:00:00 2001 From: Martin Mares Date: Sat, 31 Mar 2018 23:29:44 +0200 Subject: [PATCH] More parsing --- Makefile | 6 +- cmds.cc | 49 ++++++ jam.h | 48 ++++-- paperjam.cc | 283 +--------------------------------- parse.cc | 436 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 524 insertions(+), 298 deletions(-) create mode 100644 cmds.cc create mode 100644 parse.cc diff --git a/Makefile b/Makefile index 88cea96..6504c15 100644 --- a/Makefile +++ b/Makefile @@ -1,11 +1,13 @@ -CXXFLAGS=-O2 -Wall -Wextra -Wno-parentheses -std=gnu++11 +CXXFLAGS=-O2 -Wall -Wextra -Wno-parentheses -std=gnu++11 -g all: paperjam -paperjam: paperjam.o pdf-tools.o +paperjam: paperjam.o pdf-tools.o parse.o cmds.o $(LD) -o $@ $^ $(LDLIBS) paperjam: LDLIBS += -lqpdf paperjam: LD=$(CXX) paperjam.o: jam.h pdf-tools.h pdf-tools.o: jam.h pdf-tools.h +parse.o: jam.h pdf-tools.h +cmds.o: jam.h pdf-tools.h diff --git a/cmds.cc b/cmds.cc new file mode 100644 index 0000000..94ee09e --- /dev/null +++ b/cmds.cc @@ -0,0 +1,49 @@ +#include +#include +#include + +#include "jam.h" + +/*** null ***/ + +class null_cmd : public cmd_exec { +}; + +static const arg_def null_args[] = { + { NULL, 0 } +}; + +static cmd_exec *null_ctor(cmd *c UNUSED) +{ + return new null_cmd; +} + +/*** move ***/ + +class move_cmd : public cmd_exec { +public: + double x, y; +}; + +static const arg_def move_args[] = { + { "x", AT_DIMEN | AT_MANDATORY | AT_POSITIONAL }, + { "y", AT_DIMEN | AT_MANDATORY | AT_POSITIONAL }, + { "str", AT_STRING }, + { NULL, 0 } +}; + +static cmd_exec *move_ctor(cmd *c) +{ + move_cmd *m = new move_cmd; + m->x = c->args.at(0)->double_default(0); + m->y = c->args.at(1)->double_default(0); + return m; +} + +/*** Command table ***/ + +const cmd_def cmd_table[] = { + { "move", move_args, 1, move_ctor }, + { "null", null_args, 0, null_ctor }, + { NULL, NULL, 0, NULL } +}; diff --git a/jam.h b/jam.h index f5ff88a..6fa314c 100644 --- a/jam.h +++ b/jam.h @@ -1,25 +1,22 @@ #include #include +using namespace std; + #include "pdf-tools.h" typedef unsigned int uint; #define NONRET __attribute__((noreturn)) +#define UNUSED __attribute__((unused)) struct pipeline; - -union arg_val { - string *s; - double d; - pipeline *p; -}; +struct cmd; enum arg_type { AT_STRING, AT_DOUBLE, AT_DIMEN, - AT_PIPELINE, // Pipeline has an empty name AT_TYPE_MASK = 0xffff, AT_MANDATORY = 0x10000, AT_POSITIONAL = 0x20000, @@ -30,6 +27,16 @@ struct arg_def { uint type; }; +class arg_val { +public: + virtual bool given() { return false; } + explicit virtual operator double() { abort(); } + explicit virtual operator string() { abort(); } + double double_default(double def) { return given() ? (double) *this : def; } + const string string_default(const string def) { return given() ? (string) *this : def; } + virtual string dump() { return ""; } +}; + struct page_out { }; @@ -39,18 +46,21 @@ struct page { void render(page_out *out, pdf_matrix xform); }; -struct cmd { -}; - -struct cmd_args { - vector arg; - vector arg_given; +struct cmd_exec { }; struct cmd_def { const char *name; const arg_def *arg_defs; - cmd (*constructor)(cmd_args *args); + bool has_pipeline; + cmd_exec *(*constructor)(cmd *cmd); +}; + +struct cmd { + const cmd_def *def; + vector args; + pipeline *pipe; + cmd_exec *exec; }; struct pipeline_selector { @@ -60,9 +70,17 @@ struct pipeline_selector { struct pipeline_branch { vector selectors; - list commands; + list commands; }; struct pipeline { vector branches; }; + +// parse.cc + +void parse(const char *in, list *cmds); + +// cmds.cc + +extern const cmd_def cmd_table[]; diff --git a/paperjam.cc b/paperjam.cc index 6ff37d1..e5648e1 100644 --- a/paperjam.cc +++ b/paperjam.cc @@ -3,286 +3,8 @@ #include #include -using namespace std; - #include "jam.h" -/*** Lexer ***/ - -enum token_type { - TOK_NONE, - TOK_END, - TOK_EQUAL, - TOK_COMMA, - TOK_OPEN_PAREN, - TOK_CLOSE_PAREN, - TOK_OPEN_BRACE, - TOK_CLOSE_BRACE, - TOK_IDENT, - TOK_STRING, - TOK_NUMBER, -}; - -const char *in_pos; -static token_type this_token = TOK_NONE; -static token_type buffered_token = TOK_NONE; -static string token; -static double token_num; - -static void NONRET parse_error(const char *msg, ...); - -static void parse_error(const char *msg, ...) -{ - va_list args; - va_start(args, msg); - fprintf(stderr, "Parse error: "); - vfprintf(stderr, msg, args); - fprintf(stderr, "\n"); - va_end(args); - exit(1); -} - -static token_type get_next_token() -{ - while (*in_pos == ' ' || *in_pos == '\t' || *in_pos == '\r' || *in_pos == '\n') - in_pos++; - - token = ""; - if (!*in_pos) - return TOK_END; - - if (*in_pos >= '0' && *in_pos <= '9' || - *in_pos == '-' && in_pos[1] >= '0' && in_pos[1] <= '9') - { - token += *in_pos++; - while (*in_pos >= '0' && *in_pos <= '9' || *in_pos == '.') - token += *in_pos++; - - size_t end_pos; - token_num = stod(token, &end_pos); - if (end_pos < token.length()) - parse_error("Invalid number %s", token.c_str()); - return TOK_NUMBER; - } - - if (*in_pos >= 'A' && *in_pos <= 'Z' || - *in_pos >= 'a' && *in_pos <= 'z') - { - while (*in_pos >= 'A' && *in_pos <= 'Z' || - *in_pos >= 'a' && *in_pos <= 'z' || - *in_pos >= '0' && *in_pos <= '9') - token += *in_pos++; - return TOK_IDENT; - } - - if (*in_pos == '"') - { - in_pos++; - while (*in_pos != '"') - { - if (!*in_pos) - parse_error("Unterminated string"); - if (*in_pos == '\\') - { - in_pos++; - if (*in_pos == '"') - parse_error("Unrecognized escape sequence \\%c", *in_pos); - } - token += *in_pos++; - } - in_pos++; - return TOK_STRING; - } - - uint c = *in_pos++; - switch (c) - { - case '=': - return TOK_EQUAL; - case ',': - return TOK_COMMA; - case '(': - return TOK_OPEN_PAREN; - case ')': - return TOK_CLOSE_PAREN; - case '{': - return TOK_OPEN_BRACE; - case '}': - return TOK_CLOSE_BRACE; - default: - parse_error("Unrecognized character '%c'", c); - } -} - -static token_type next_token() -{ - this_token = get_next_token(); - return this_token; -} - -static void return_token() -{ - assert(this_token != TOK_NONE); - assert(buffered_token == TOK_NONE); - buffered_token = this_token; - this_token = TOK_NONE; -} - -/*** Parser ***/ - -static const arg_def move_args[] = { - { "x", AT_DIMEN | AT_MANDATORY | AT_POSITIONAL }, - { "y", AT_DIMEN | AT_MANDATORY | AT_POSITIONAL }, - { NULL, 0 } -}; - -static const cmd_def cmd_table[] = { - { "move", move_args, NULL }, - { NULL, NULL, NULL } -}; - -struct unit { - const char *name; - double multiplier; -}; - -#define MM (72/25.4) - -static const unit units[] = { - { "mm", MM }, - { "cm", 10*MM }, - { "dm", 100*MM }, - { "m", 1000*MM }, - { "in", 72 }, - { "pt", 1 }, - { NULL, 0 } -}; - -static double parse_dimen(const arg_def *adef) -{ - token_type t = next_token(); - if (t != TOK_NUMBER) - parse_error("Paremeter %s must be a dimension", adef->name); - double tmp = token_num; - - t = next_token(); - if (t != TOK_IDENT) - parse_error("Paremeter %s must have a unit", adef->name); - for (uint i; units[i].name; i++) - if (token == units[i].name) - return tmp * units[i].multiplier; - parse_error("Unknown unit %s", token.c_str()); -} - -static cmd_args *parse_args(const cmd_def *cdef) -{ - cmd_args *args = new cmd_args; - - const arg_def *adefs = cdef->arg_defs; - uint num_args = 0; - while (adefs[num_args].name) - { - args->arg.push_back(arg_val()); - args->arg_given.push_back(0); - num_args++; - } - - token_type t = next_token(); - if (t != TOK_OPEN_PAREN) - { - return_token(); - return args; - } - - bool saw_named = false; - uint next_pos = 0; - for (;;) - { - t = next_token(); - int argi = 0; - if (t == TOK_IDENT) - { - while (adefs[argi].name && token != adefs[argi].name) - argi++; - if (!adefs[argi].name) - parse_error("Command %s has no parameter %s", cdef->name, token.c_str()); - t = next_token(); - if (t != TOK_EQUAL) - parse_error("Parameter name must be followed by '='"); - saw_named = true; - } - else if (saw_named) - parse_error("Positional parameters must precede named ones"); - else - { - while (next_pos < num_args && !(adefs[next_pos].type & AT_POSITIONAL)) - next_pos++; - if (next_pos >= num_args) - parse_error("Too many positional arguments for command %s", cdef->name); - argi = next_pos++; - } - - const arg_def *adef = &adefs[argi]; - switch (adef->type & AT_TYPE_MASK) - { - case AT_STRING: - t = next_token(); - if (t != TOK_STRING) - parse_error("Paremeter %s must be a string", adef->name); - args->arg[argi].s = token; - break; - case AT_DOUBLE: - t = next_token(); - if (t != TOK_NUMBER) - parse_error("Paremeter %s must be a number", adef->name); - args->arg[argi].d = token_num; - break; - case AT_DIMEN: - args->arg[argi].d = parse_dimen(adef); - break; - default: - abort(); - } - - t = next_token(); - if (t == TOK_CLOSE_PAREN) - break; - if (t != TOK_COMMA) - parse_error("Comma expected after parameter %s", adef->name); - } - - return args; -} - -static cmd *parse_cmd() -{ - const cmd_def *cdef = cmd_table; - while (cdef->name && token != cdef->name) - cdef++; - if (!cdef->name) - parse_error("Unknown command %s", token.c_str()); - - cmd_args *args = parse_args(cdef); -} - -static void parse(list *cmds) -{ - for (;;) - { - token_type t = next_token(); - if (t != TOK_IDENT) - { - return_token(); - return; - } - - cmd *c = parse_cmd(); - cmds->push_back(c); - } -} - -/*** Main ***/ - int main(int argc, char **argv) { if (argc != 4) @@ -291,9 +13,8 @@ int main(int argc, char **argv) return 1; } - list cmds; - in_pos = argv[1]; - parse(&cmds); + list cmds; + parse(argv[1], &cmds); return 0; } diff --git a/parse.cc b/parse.cc new file mode 100644 index 0000000..576795d --- /dev/null +++ b/parse.cc @@ -0,0 +1,436 @@ +#include +#include +#include +#include + +#include "jam.h" + +/*** Lexer ***/ + +enum token_type { + TOK_NONE, + TOK_END, + TOK_EQUAL, + TOK_COMMA, + TOK_COLON, + TOK_DOTDOT, + TOK_OPEN_PAREN, + TOK_CLOSE_PAREN, + TOK_OPEN_BRACE, + TOK_CLOSE_BRACE, + TOK_IDENT, + TOK_STRING, + TOK_NUMBER, +}; + +const char *in_pos; +static token_type this_token = TOK_NONE; +static token_type buffered_token = TOK_NONE; +static string token; +static double token_num; + +static void NONRET parse_error(const char *msg, ...); +static void parse_commands(list *cmds); + +static void parse_error(const char *msg, ...) +{ + va_list args; + va_start(args, msg); + fprintf(stderr, "Parse error: "); + vfprintf(stderr, msg, args); + fprintf(stderr, "\n"); + va_end(args); + exit(1); +} + +static token_type get_next_token() +{ + while (*in_pos == ' ' || *in_pos == '\t' || *in_pos == '\r' || *in_pos == '\n') + in_pos++; + + token = ""; + if (!*in_pos) + return TOK_END; + + if (*in_pos >= '0' && *in_pos <= '9' || + *in_pos == '-' && in_pos[1] >= '0' && in_pos[1] <= '9') + { + token += *in_pos++; + while (*in_pos >= '0' && *in_pos <= '9' || *in_pos == '.' && in_pos[1] != '.') + token += *in_pos++; + + size_t end_pos; + token_num = stod(token, &end_pos); + if (end_pos < token.length()) + parse_error("Invalid number %s", token.c_str()); + return TOK_NUMBER; + } + + if (*in_pos >= 'A' && *in_pos <= 'Z' || + *in_pos >= 'a' && *in_pos <= 'z') + { + while (*in_pos >= 'A' && *in_pos <= 'Z' || + *in_pos >= 'a' && *in_pos <= 'z' || + *in_pos >= '0' && *in_pos <= '9') + token += *in_pos++; + return TOK_IDENT; + } + + if (*in_pos == '"') + { + in_pos++; + while (*in_pos != '"') + { + if (!*in_pos) + parse_error("Unterminated string"); + if (*in_pos == '\\') + { + in_pos++; + if (*in_pos != '"' && *in_pos != '\\') + parse_error("Unrecognized escape sequence \\%c", *in_pos); + } + token += *in_pos++; + } + in_pos++; + return TOK_STRING; + } + + uint c = *in_pos++; + switch (c) + { + case '=': + return TOK_EQUAL; + case ',': + return TOK_COMMA; + case ':': + return TOK_COLON; + case '(': + return TOK_OPEN_PAREN; + case ')': + return TOK_CLOSE_PAREN; + case '{': + return TOK_OPEN_BRACE; + case '}': + return TOK_CLOSE_BRACE; + default: + if (c == '.' && *in_pos == '.') + { + in_pos++; + return TOK_DOTDOT; + } + parse_error("Unrecognized character '%c'", c); + } +} + +static token_type next_token() +{ + if (buffered_token != TOK_NONE) + this_token = buffered_token; + else + this_token = get_next_token(); + buffered_token = TOK_NONE; + return this_token; +} + +static void return_token() +{ + assert(this_token != TOK_NONE); + assert(buffered_token == TOK_NONE); + buffered_token = this_token; + this_token = TOK_NONE; +} + +static token_type peek_token() +{ + next_token(); + return_token(); + return buffered_token; +} + +static bool token_is_int() +{ + return token_num == (double)(int) token_num; +} + +/*** Argument types ***/ + +class arg_double : public arg_val { + double val; +public: + bool given() { return true; } + explicit operator double () { return val; } + arg_double(double x) { val = x; } + string dump() { return to_string(val); } +}; + +class arg_string : public arg_val { + string val; +public: + bool given() { return true; } + explicit operator string () { return val; } + arg_string(string x) { val = x; } + string dump() { return '"' + val + '"'; } +}; + +static arg_val null_arg; + +/*** Parser ***/ + +struct unit { + const char *name; + double multiplier; +}; + +#define MM (72/25.4) + +static const unit units[] = { + { "mm", MM }, + { "cm", 10*MM }, + { "dm", 100*MM }, + { "m", 1000*MM }, + { "in", 72 }, + { "pt", 1 }, + { NULL, 0 } +}; + +static double parse_dimen(const arg_def *adef) +{ + token_type t = next_token(); + if (t != TOK_NUMBER) + parse_error("Parameter %s must be a dimension", adef->name); + double tmp = token_num; + + t = next_token(); + if (t != TOK_IDENT) + parse_error("Parameter %s must have a unit", adef->name); + for (uint i=0; units[i].name; i++) + if (token == units[i].name) + return tmp * units[i].multiplier; + parse_error("Unknown unit %s", token.c_str()); +} + +static void parse_pipeline(cmd *c) +{ + pipeline *pp = new pipeline; + next_token(); + + while (peek_token() != TOK_CLOSE_BRACE) + { + if (pp->branches.size() && next_token() != TOK_COMMA) + parse_error("Comma expected between pipeline branches"); + + pipeline_branch *pb = new pipeline_branch; + pp->branches.push_back(pb); + + for (;;) + { + token_type t = next_token(); + if (t == TOK_CLOSE_BRACE || t == TOK_END) + parse_error("Premature end of pipeline"); + if (t == TOK_COLON) + break; + + if (pb->selectors.size()) + { + if (t != TOK_COMMA) + parse_error("Invalid pipeline selector"); + t = next_token(); + if (t == TOK_CLOSE_BRACE || t == TOK_END) + parse_error("Premature end of pipeline"); + } + + pipeline_selector ps; + if (t != TOK_NUMBER) + parse_error("Pipeline selectors must start with a number"); + if (!token_is_int()) + parse_error("Pipeline selectors must be integers"); + ps.from = (int) token_num; + ps.to = ps.from; + + if (peek_token() == TOK_DOTDOT) + { + next_token(); + t = next_token(); + if (t != TOK_NUMBER) + parse_error("Pipeline selectors must be numbers or ranges"); + if (!token_is_int()) + parse_error("Pipeline selectors must be integers"); + ps.to = (int) token_num; + } + + pb->selectors.push_back(ps); + } + + parse_commands(&pb->commands); + } + + c->pipe = pp; + next_token(); +} + +static void parse_args(cmd *c) +{ + const cmd_def *cdef = c->def; + const arg_def *adefs = cdef->arg_defs; + uint num_args = 0; + while (adefs[num_args].name) + num_args++; + + c->args.resize(num_args, &null_arg); + + token_type t = next_token(); + if (t != TOK_OPEN_PAREN) + { + return_token(); + return; + } + + bool saw_named = false; + uint next_pos = 0; + for (;;) + { + t = next_token(); + uint argi = 0; + if (t == TOK_IDENT) + { + while (adefs[argi].name && token != adefs[argi].name) + argi++; + if (!adefs[argi].name) + parse_error("Command %s has no parameter %s", cdef->name, token.c_str()); + if (c->args.at(argi)->given()) + parse_error("Parameter %s given multiple times", token.c_str()); + t = next_token(); + if (t != TOK_EQUAL) + parse_error("Parameter name must be followed by '='"); + saw_named = true; + } + else if (saw_named) + parse_error("Positional parameters must precede named ones"); + else + { + return_token(); + while (next_pos < num_args && !(adefs[next_pos].type & AT_POSITIONAL)) + next_pos++; + if (next_pos >= num_args) + parse_error("Too many positional arguments for command %s", cdef->name); + argi = next_pos++; + } + + const arg_def *adef = &adefs[argi]; + arg_val *val = NULL; + switch (adef->type & AT_TYPE_MASK) + { + case AT_STRING: + t = next_token(); + if (t != TOK_STRING) + parse_error("Parameter %s must be a string", adef->name); + val = new arg_string(token); + break; + case AT_DOUBLE: + t = next_token(); + if (t != TOK_NUMBER) + parse_error("Parameter %s must be a number", adef->name); + val = new arg_double(token_num); + break; + case AT_DIMEN: + val = new arg_double(parse_dimen(adef)); + break; + default: + abort(); + } + + c->args.at(argi) = val; + + t = next_token(); + if (t == TOK_CLOSE_PAREN) + break; + if (t != TOK_COMMA) + parse_error("Comma expected after parameter %s", adef->name); + } + + for (uint i=0; iargs.at(i)->given()) + parse_error("Command %s is missing a parameter %s", cdef->name, adefs[i].name); +} + +static void debug_cmd(cmd *c, uint indent=0) +{ + printf("%*sCommand %s\n", indent, "", c->def->name); + for (size_t i=0; i < c->args.size(); i++) + { + const arg_def *adef = &c->def->arg_defs[i]; + string dump = c->args.at(i)->dump(); + printf("%*sArg #%d: %s = %s\n", indent+4, "", (int) i, adef->name, dump.c_str()); + } + if (c->pipe) + { + printf("%*sPipeline:\n", indent+4, ""); + for (auto pb: c->pipe->branches) + { + printf("%*sSelector:\n", indent+8, ""); + for (auto ps: pb->selectors) + printf("%*s%d - %d\n", indent+12, "", ps.from, ps.to); + printf("%*sCommands:\n", indent+8, ""); + for (auto cc: pb->commands) + debug_cmd(cc, indent+12); + } + } +} + +static void debug_cmds(list *cmds) +{ + for (auto c: *cmds) + debug_cmd(c); +} + +static cmd *parse_cmd() +{ + const cmd_def *cdef = cmd_table; + while (cdef->name && token != cdef->name) + cdef++; + if (!cdef->name) + parse_error("Unknown command %s", token.c_str()); + + cmd *c = new cmd; + c->def = cdef; + c->pipe = NULL; + + parse_args(c); + + if (peek_token() == TOK_OPEN_BRACE) + { + if (!cdef->has_pipeline) + parse_error("Command %s does not accept a pipeline", cdef->name); + parse_pipeline(c); + } + else if (cdef->has_pipeline) + parse_error("Command %s requires a pipeline", cdef->name); + + return c; +} + +static void parse_commands(list *cmds) +{ + for (;;) + { + token_type t = next_token(); + if (t != TOK_IDENT) + { + return_token(); + return; + } + + cmd *c = parse_cmd(); + cmds->push_back(c); + } +} + +void parse(const char *in, list *cmds) +{ + in_pos = in; + parse_commands(cmds); + if (next_token() != TOK_END) + parse_error("Extra tokens after commands"); + + debug_cmds(cmds); +} -- 2.39.2