From: Martin Mares Date: Sat, 31 Mar 2018 17:38:58 +0000 (+0200) Subject: First attempts... X-Git-Tag: v0.1~49 X-Git-Url: http://mj.ucw.cz/gitweb/?a=commitdiff_plain;h=d8523bb2f59981d9aa4c052850b15beec32c15c6;p=paperjam.git First attempts... --- d8523bb2f59981d9aa4c052850b15beec32c15c6 diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..88cea96 --- /dev/null +++ b/Makefile @@ -0,0 +1,11 @@ +CXXFLAGS=-O2 -Wall -Wextra -Wno-parentheses -std=gnu++11 + +all: paperjam + +paperjam: paperjam.o pdf-tools.o + $(LD) -o $@ $^ $(LDLIBS) +paperjam: LDLIBS += -lqpdf +paperjam: LD=$(CXX) + +paperjam.o: jam.h pdf-tools.h +pdf-tools.o: jam.h pdf-tools.h diff --git a/jam.h b/jam.h new file mode 100644 index 0000000..f5ff88a --- /dev/null +++ b/jam.h @@ -0,0 +1,68 @@ +#include +#include + +#include "pdf-tools.h" + +typedef unsigned int uint; + +#define NONRET __attribute__((noreturn)) + +struct pipeline; + +union arg_val { + string *s; + double d; + pipeline *p; +}; + +enum arg_type { + AT_STRING, + AT_DOUBLE, + AT_DIMEN, + AT_PIPELINE, // Pipeline has an empty name + AT_TYPE_MASK = 0xffff, + AT_MANDATORY = 0x10000, + AT_POSITIONAL = 0x20000, +}; + +struct arg_def { + const char *name; + uint type; +}; + +struct page_out { +}; + +struct page { + double width; + double height; + void render(page_out *out, pdf_matrix xform); +}; + +struct cmd { +}; + +struct cmd_args { + vector arg; + vector arg_given; +}; + +struct cmd_def { + const char *name; + const arg_def *arg_defs; + cmd (*constructor)(cmd_args *args); +}; + +struct pipeline_selector { + int from; + int to; +}; + +struct pipeline_branch { + vector selectors; + list commands; +}; + +struct pipeline { + vector branches; +}; diff --git a/paperjam.cc b/paperjam.cc new file mode 100644 index 0000000..6ff37d1 --- /dev/null +++ b/paperjam.cc @@ -0,0 +1,299 @@ +#include +#include +#include +#include + +using namespace std; + +#include "jam.h" + +/*** Lexer ***/ + +enum token_type { + TOK_NONE, + TOK_END, + TOK_EQUAL, + TOK_COMMA, + TOK_OPEN_PAREN, + TOK_CLOSE_PAREN, + TOK_OPEN_BRACE, + TOK_CLOSE_BRACE, + TOK_IDENT, + TOK_STRING, + TOK_NUMBER, +}; + +const char *in_pos; +static token_type this_token = TOK_NONE; +static token_type buffered_token = TOK_NONE; +static string token; +static double token_num; + +static void NONRET parse_error(const char *msg, ...); + +static void parse_error(const char *msg, ...) +{ + va_list args; + va_start(args, msg); + fprintf(stderr, "Parse error: "); + vfprintf(stderr, msg, args); + fprintf(stderr, "\n"); + va_end(args); + exit(1); +} + +static token_type get_next_token() +{ + while (*in_pos == ' ' || *in_pos == '\t' || *in_pos == '\r' || *in_pos == '\n') + in_pos++; + + token = ""; + if (!*in_pos) + return TOK_END; + + if (*in_pos >= '0' && *in_pos <= '9' || + *in_pos == '-' && in_pos[1] >= '0' && in_pos[1] <= '9') + { + token += *in_pos++; + while (*in_pos >= '0' && *in_pos <= '9' || *in_pos == '.') + token += *in_pos++; + + size_t end_pos; + token_num = stod(token, &end_pos); + if (end_pos < token.length()) + parse_error("Invalid number %s", token.c_str()); + return TOK_NUMBER; + } + + if (*in_pos >= 'A' && *in_pos <= 'Z' || + *in_pos >= 'a' && *in_pos <= 'z') + { + while (*in_pos >= 'A' && *in_pos <= 'Z' || + *in_pos >= 'a' && *in_pos <= 'z' || + *in_pos >= '0' && *in_pos <= '9') + token += *in_pos++; + return TOK_IDENT; + } + + if (*in_pos == '"') + { + in_pos++; + while (*in_pos != '"') + { + if (!*in_pos) + parse_error("Unterminated string"); + if (*in_pos == '\\') + { + in_pos++; + if (*in_pos == '"') + parse_error("Unrecognized escape sequence \\%c", *in_pos); + } + token += *in_pos++; + } + in_pos++; + return TOK_STRING; + } + + uint c = *in_pos++; + switch (c) + { + case '=': + return TOK_EQUAL; + case ',': + return TOK_COMMA; + case '(': + return TOK_OPEN_PAREN; + case ')': + return TOK_CLOSE_PAREN; + case '{': + return TOK_OPEN_BRACE; + case '}': + return TOK_CLOSE_BRACE; + default: + parse_error("Unrecognized character '%c'", c); + } +} + +static token_type next_token() +{ + this_token = get_next_token(); + return this_token; +} + +static void return_token() +{ + assert(this_token != TOK_NONE); + assert(buffered_token == TOK_NONE); + buffered_token = this_token; + this_token = TOK_NONE; +} + +/*** Parser ***/ + +static const arg_def move_args[] = { + { "x", AT_DIMEN | AT_MANDATORY | AT_POSITIONAL }, + { "y", AT_DIMEN | AT_MANDATORY | AT_POSITIONAL }, + { NULL, 0 } +}; + +static const cmd_def cmd_table[] = { + { "move", move_args, NULL }, + { NULL, NULL, NULL } +}; + +struct unit { + const char *name; + double multiplier; +}; + +#define MM (72/25.4) + +static const unit units[] = { + { "mm", MM }, + { "cm", 10*MM }, + { "dm", 100*MM }, + { "m", 1000*MM }, + { "in", 72 }, + { "pt", 1 }, + { NULL, 0 } +}; + +static double parse_dimen(const arg_def *adef) +{ + token_type t = next_token(); + if (t != TOK_NUMBER) + parse_error("Paremeter %s must be a dimension", adef->name); + double tmp = token_num; + + t = next_token(); + if (t != TOK_IDENT) + parse_error("Paremeter %s must have a unit", adef->name); + for (uint i; units[i].name; i++) + if (token == units[i].name) + return tmp * units[i].multiplier; + parse_error("Unknown unit %s", token.c_str()); +} + +static cmd_args *parse_args(const cmd_def *cdef) +{ + cmd_args *args = new cmd_args; + + const arg_def *adefs = cdef->arg_defs; + uint num_args = 0; + while (adefs[num_args].name) + { + args->arg.push_back(arg_val()); + args->arg_given.push_back(0); + num_args++; + } + + token_type t = next_token(); + if (t != TOK_OPEN_PAREN) + { + return_token(); + return args; + } + + bool saw_named = false; + uint next_pos = 0; + for (;;) + { + t = next_token(); + int argi = 0; + if (t == TOK_IDENT) + { + while (adefs[argi].name && token != adefs[argi].name) + argi++; + if (!adefs[argi].name) + parse_error("Command %s has no parameter %s", cdef->name, token.c_str()); + t = next_token(); + if (t != TOK_EQUAL) + parse_error("Parameter name must be followed by '='"); + saw_named = true; + } + else if (saw_named) + parse_error("Positional parameters must precede named ones"); + else + { + while (next_pos < num_args && !(adefs[next_pos].type & AT_POSITIONAL)) + next_pos++; + if (next_pos >= num_args) + parse_error("Too many positional arguments for command %s", cdef->name); + argi = next_pos++; + } + + const arg_def *adef = &adefs[argi]; + switch (adef->type & AT_TYPE_MASK) + { + case AT_STRING: + t = next_token(); + if (t != TOK_STRING) + parse_error("Paremeter %s must be a string", adef->name); + args->arg[argi].s = token; + break; + case AT_DOUBLE: + t = next_token(); + if (t != TOK_NUMBER) + parse_error("Paremeter %s must be a number", adef->name); + args->arg[argi].d = token_num; + break; + case AT_DIMEN: + args->arg[argi].d = parse_dimen(adef); + break; + default: + abort(); + } + + t = next_token(); + if (t == TOK_CLOSE_PAREN) + break; + if (t != TOK_COMMA) + parse_error("Comma expected after parameter %s", adef->name); + } + + return args; +} + +static cmd *parse_cmd() +{ + const cmd_def *cdef = cmd_table; + while (cdef->name && token != cdef->name) + cdef++; + if (!cdef->name) + parse_error("Unknown command %s", token.c_str()); + + cmd_args *args = parse_args(cdef); +} + +static void parse(list *cmds) +{ + for (;;) + { + token_type t = next_token(); + if (t != TOK_IDENT) + { + return_token(); + return; + } + + cmd *c = parse_cmd(); + cmds->push_back(c); + } +} + +/*** Main ***/ + +int main(int argc, char **argv) +{ + if (argc != 4) + { + fprintf(stderr, "Usage: pdfjam \n"); + return 1; + } + + list cmds; + in_pos = argv[1]; + parse(&cmds); + + return 0; +} diff --git a/pdf-tools.cc b/pdf-tools.cc new file mode 100644 index 0000000..45f7b56 --- /dev/null +++ b/pdf-tools.cc @@ -0,0 +1,213 @@ +/* + * Auxiliary functions for processing PDF files + * + * (c) 2018 Martin Mares + */ + +#include +#include +#include + +#include + +using namespace std; + +#include "pdf-tools.h" + +#include +#include + +/*** Messages ***/ + +int debug_mode; + +void debug(const char *msg, ...) +{ + if (!debug_mode) + return; + va_list args; + va_start(args, msg); + vfprintf(stderr, msg, args); + fputc('\n', stderr); + va_end(args); +} + +void warn(const char *msg, ...) +{ + va_list args; + va_start(args, msg); + fprintf(stderr, "WARNING: "); + vfprintf(stderr, msg, args); + fputc('\n', stderr); + va_end(args); +} + +void die(const char *msg, ...) +{ + va_list args; + va_start(args, msg); + fprintf(stderr, "ERROR: "); + vfprintf(stderr, msg, args); + fputc('\n', stderr); + va_end(args); + exit(1); +} + +void bad(const char *msg, ...) +{ + va_list args; + va_start(args, msg); + char buf[1024]; + vsnprintf(buf, sizeof(buf), msg, args); + va_end(args); + + printf("error: %s\n", buf); + die("BAD: %s", buf); +} + +/*** Transformation matrices ***/ + +// Construct string representation of a transformation matrix +string pdf_matrix::to_string() { + string s; + for (int i=0; i<6; i++) { + if (i) + s += " "; + char buf[16]; + snprintf(buf, sizeof(buf), "%.3f", m[i]); + s += buf; + } + return s; +} + +/*** Bounding boxes ***/ + +QPDFObjectHandle BBox::to_array() +{ + QPDFObjectHandle a = QPDFObjectHandle::newArray(); + a.appendItem(QPDFObjectHandle::newReal(x_min, 1)); + a.appendItem(QPDFObjectHandle::newReal(y_min, 1)); + a.appendItem(QPDFObjectHandle::newReal(x_max, 1)); + a.appendItem(QPDFObjectHandle::newReal(y_max, 1)); + return a; +} + +bool BBox::parse(QPDFObjectHandle h) +{ + if (!h.isArray() || h.getArrayNItems() != 4) + return false; + double x[4]; + for (int i=0; i<4; i++) { + QPDFObjectHandle item = h.getArrayItem(i); + if (!item.isNumber()) + return false; + x[i] = item.getNumericValue(); + } + x_min = x[0]; + y_min = x[1]; + x_max = x[2]; + y_max = x[3]; + return true; +} + +/*** Unicode strings ***/ + +// Construct PDF representation of a UTF-8 string +QPDFObjectHandle unicode_string(string s) +{ + // If it is ASCII only, use the string directly + bool ascii_only = true; + for (char c: s) + if (c < 0x20 || c > 0x7e) + ascii_only = false; + if (ascii_only) + return QPDFObjectHandle::newString(s); + + // Use iconv to convert the string to big-endian UTF-16 + iconv_t conv = iconv_open("UTF-16BE", "UTF-8"); + if (conv == (iconv_t) -1) + die("Cannot initialize iconv: %m"); + + char *in_ptr = (char *) s.c_str(); // Work around bad API of iconv() + size_t in_len = strlen(in_ptr); + size_t out_len = 2*in_len + 2; // Worst case (including the BOM) + char out_buf[out_len]; + char *out_ptr = out_buf; + size_t res = iconv(conv, &in_ptr, &in_len, &out_ptr, &out_len); + if (res == (size_t) -1) + die("iconv failed: %m"); + if (in_len) + die("iconv stopped before the end of input"); + + iconv_close(conv); + + // Package UTF-16 in a PDF string + string out; + out += 0xfe; + out += 0xff; + for (char *p = out_buf; p < out_ptr; p++) + out += *p; + return QPDFObjectHandle::newString(out); +} + +/*** Conversion of pages to XObjects ***/ + +static BBox get_trim_box(QPDFObjectHandle page) +{ + static const char * const boxes[] = { "/TrimBox", "/CropBox", "/MediaBox", NULL }; + for (int i=0; boxes[i]; i++) + if (page.hasKey(boxes[i])) + return BBox(page.getKey(boxes[i])); + warn("Page has no trimbox, falling back to A4"); + return BBox(0, 0, A4_WIDTH, A4_HEIGHT); +} + +/* Conversion of pages to XObjects is inspired by CUPS's pdftopdf filter. */ +class CombineFromContents_Provider : public QPDFObjectHandle::StreamDataProvider { + private: + vector contents; + public: + CombineFromContents_Provider(const vector &contents) : contents(contents) { } + void provideStreamData(int objid UNUSED, int generation UNUSED, Pipeline* pipeline) { + Pl_Concatenate concat("concat", pipeline); + for (int i=0; i < (int)contents.size(); i++) + contents[i].pipeStreamData(&concat, true, false, false); + concat.manualFinish(); + } +}; + +QPDFObjectHandle page_to_xobject(QPDF *out, QPDFObjectHandle page) +{ + page.assertPageObject(); + + QPDFObjectHandle xo_stream = QPDFObjectHandle::newStream(out); + QPDFObjectHandle xo_dict = xo_stream.getDict(); + + xo_dict.replaceKey("/Type", QPDFObjectHandle::newName("/XObject")); + xo_dict.replaceKey("/Subtype", QPDFObjectHandle::newName("/Form")); + xo_dict.replaceKey("/FormType", QPDFObjectHandle::newInteger(1)); + + BBox box = get_trim_box(page); + xo_dict.replaceKey("/BBox", box.to_array()); + + xo_dict.replaceKey("/Resources", page.getKey("/Resources")); + if (page.hasKey("/Group")) + xo_dict.replaceKey("/Group", page.getKey("/Group")); + + if (page.hasKey("/UserUnit")) { + double u = page.getKey("/UserUnit").getNumericValue(); + QPDFObjectHandle m = QPDFObjectHandle::newArray(); + m.appendItem(QPDFObjectHandle::newReal(u, 3)); + m.appendItem(QPDFObjectHandle::newReal(0, 0)); + m.appendItem(QPDFObjectHandle::newReal(0, 0)); + m.appendItem(QPDFObjectHandle::newReal(u, 3)); + m.appendItem(QPDFObjectHandle::newReal(0, 0)); + m.appendItem(QPDFObjectHandle::newReal(0, 0)); + xo_dict.replaceKey("/Matrix", m); + } + + vector contents = page.getPageContents(); + auto ph = PointerHolder(new CombineFromContents_Provider(contents)); + xo_stream.replaceStreamData(ph, QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull()); + return xo_stream; +} diff --git a/pdf-tools.h b/pdf-tools.h new file mode 100644 index 0000000..98d4b86 --- /dev/null +++ b/pdf-tools.h @@ -0,0 +1,152 @@ +/* + * Auxiliary functions for processing PDF files + * + * (c) 2018 Martin Mares + */ + +#ifndef _PDF_TOOLS_H +#define _PDF_TOOLS_H + +#include +#include + +#include + +/*** Basic macros and constants ***/ + +#define UNUSED __attribute__((unused)) +#define FORMAT_CHECK(x,y,z) __attribute__((format(x,y,z))) +#define NONRET __attribute__((noreturn)) + +#define A4_WIDTH 595 +#define A4_HEIGHT 842 +static const double mm = 72/25.4; + +/*** Messages ***/ + +void debug(const char *msg, ...) FORMAT_CHECK(printf, 1, 2); +void warn(const char *msg, ...) FORMAT_CHECK(printf, 1, 2); +void die(const char *msg, ...) FORMAT_CHECK(printf, 1, 2) NONRET; +void bad(const char *msg, ...) FORMAT_CHECK(printf, 1, 2) NONRET; + +extern int debug_mode; + +/*** Transformation matrices ***/ + +struct pdf_matrix { + /* + * A transformation matrix corresponds to the linear transform + * + * (a b 0) + * (x y 1) * (c d 0) = (ax+cy+e bx+dy+f 1) + * (e f 1) + * + * We represent the non-trivial coefficients of the matrix by + * an array {a,b,c,d,e,f}. + */ + double m[6]; + + pdf_matrix() { + m[0] = 1; + m[1] = 0; + m[2] = 0; + m[3] = 1; + m[4] = m[5] = 0; + } + + pdf_matrix(double a, double b, double c, double d, double e, double f) + { + m[0] = a; + m[1] = b; + m[2] = c; + m[3] = d; + m[4] = e; + m[5] = f; + } + + // A*B is a matrix which transforms first by A and then by B + pdf_matrix operator *(pdf_matrix y) + { + return pdf_matrix( + m[0]*y.m[0] + m[1]*y.m[2], + m[0]*y.m[1] + m[1]*y.m[3], + m[2]*y.m[0] + m[3]*y.m[2], + m[2]*y.m[1] + m[3]*y.m[3], + m[4]*y.m[0] + m[5]*y.m[2] + y.m[4], + m[4]*y.m[1] + m[5]*y.m[3] + y.m[5] + ); + } + + void concat(pdf_matrix y) + { + pdf_matrix t = *this * y; + for (int i=0; i<6; i++) + m[i] = t.m[i]; + } + + void shift(double dx, double dy) + { + m[4] += dx; + m[5] += dy; + } + + void scale(double s) + { + concat(pdf_matrix(s, 0, 0, s, 0, 0)); + } + + void scale(double sx, double sy) + { + concat(pdf_matrix(sx, 0, 0, sy, 0, 0)); + } + + void rotate_rad(double angle) + { + double c = std::cos(angle), s = std::sin(angle); + concat(pdf_matrix(c, s, -s, c, 0, 0)); + } + + void rotate_deg(double angle) + { + rotate_rad(angle/180. * M_PI); + } + + std::string to_string(); +}; + +/*** Bounding boxes ***/ + +struct BBox { + double x_min, x_max, y_min, y_max; + BBox() { + x_min = y_min = x_max = y_max = 0; + } + BBox(double xmin, double ymin, double xmax, double ymax) { + x_min = xmin, x_max = xmax; + y_min = ymin, y_max = ymax; + } + BBox(double x, double y) { + x_min = 0, x_max = x; + y_min = 0, y_max = y; + } + BBox(QPDFObjectHandle box) { + if (!parse(box)) { + warn("Invalid bounding box, falling back to A4"); + x_min = 0, x_max = A4_WIDTH; + y_min = 0, y_max = A4_HEIGHT; + } + } + QPDFObjectHandle to_array(); + double width() { return x_max - x_min; } + double height() { return y_max - y_min; } +private: + bool parse(QPDFObjectHandle h); +}; + +/*** Miscellaneous ***/ + +QPDFObjectHandle unicode_string(std::string s); +QPDFObjectHandle page_to_xobject(QPDF *out, QPDFObjectHandle page); + +#endif +