From 233f87f9c95f14ef4ddd5ac36cb8c3389501f99b Mon Sep 17 00:00:00 2001 From: Martin Mares Date: Sun, 1 Apr 2018 11:49:46 +0200 Subject: [PATCH] Rudimentary reading and writing PDFs --- TODO | 2 + cmds.cc | 10 ++--- jam.h | 12 +++-- paperjam.cc | 126 +++++++++++++++++++++++++++++++++++++++++++++++++++- parse.cc | 20 ++++----- 5 files changed, 151 insertions(+), 19 deletions(-) create mode 100644 TODO diff --git a/TODO b/TODO new file mode 100644 index 0000000..5ecb29c --- /dev/null +++ b/TODO @@ -0,0 +1,2 @@ +- Integrate pdf-tools.cc with the rest of the code +- What if an input page specifies /Rotate? diff --git a/cmds.cc b/cmds.cc index 10dd4e0..4f6fd1d 100644 --- a/cmds.cc +++ b/cmds.cc @@ -7,7 +7,7 @@ /*** null ***/ class null_cmd : public cmd_exec { - vector process(vector pages) { return pages; } + vector process(vector &pages) { return pages; } }; static const arg_def null_args[] = { @@ -24,7 +24,7 @@ static cmd_exec *null_ctor(cmd *c UNUSED) class move_cmd : public cmd_exec { public: double x, y; - vector process(vector pages); + vector process(vector &pages); }; class xform_page : public page { @@ -40,7 +40,7 @@ void xform_page::render(page_out *out, pdf_matrix parent_xform) orig_page->render(out, xform * parent_xform); } -vector move_cmd::process(vector pages) +vector move_cmd::process(vector &pages) { vector out; for (auto p: pages) @@ -71,10 +71,10 @@ static cmd_exec *move_ctor(cmd *c) class scale_cmd : public cmd_exec { public: double x_factor, y_factor; - vector process(vector pages); + vector process(vector &pages); }; -vector scale_cmd::process(vector pages) +vector scale_cmd::process(vector &pages) { vector out; for (auto p: pages) diff --git a/jam.h b/jam.h index f5d2ca1..d5db7c5 100644 --- a/jam.h +++ b/jam.h @@ -38,17 +38,23 @@ public: }; struct page_out { + QPDFObjectHandle resources; + QPDFObjectHandle xobjects; + string contents; + int res_cnt; + string new_resource(const string type); }; struct page { + int index; double width; double height; virtual void render(page_out *out UNUSED, pdf_matrix xform UNUSED) { abort(); } - page(double _w, double _h) : width(_w), height(_h) { } + page(double _w=0, double _h=0) : width(_w), height(_h) { } }; struct cmd_exec { - virtual vector process(vector pages UNUSED) { abort(); } + virtual vector process(vector &pages UNUSED) { abort(); } }; struct cmd_def { @@ -81,7 +87,7 @@ struct pipeline { // parse.cc -void parse(const char *in, list *cmds); +void parse(const char *in, list &cmds); // cmds.cc diff --git a/paperjam.cc b/paperjam.cc index e5648e1..bebfb92 100644 --- a/paperjam.cc +++ b/paperjam.cc @@ -5,6 +5,119 @@ #include "jam.h" +#include + +static QPDF in_pdf; +static QPDF out_pdf; + +string page_out::new_resource(const string type) +{ + return "/" + type + to_string(++res_cnt); +} + +class in_page : public page { + QPDFObjectHandle pdf_page; +public: + BBox media_box; + void render(page_out *out, pdf_matrix xform); + in_page(QPDFObjectHandle inpg, int idx); +}; + +in_page::in_page(QPDFObjectHandle inpg, int idx) +{ + pdf_page = inpg; + index = idx; + media_box = BBox(inpg.getKey("/MediaBox")); + width = media_box.width(); + height = media_box.height(); +} + +void in_page::render(page_out *out, pdf_matrix xform) +{ + // Convert page to xobject + QPDFObjectHandle page_copy = page_to_xobject(&out_pdf, out_pdf.copyForeignObject(pdf_page)); + string xobj_res = out->new_resource("XO"); + out->xobjects.replaceKey(xobj_res, out_pdf.makeIndirectObject(page_copy)); + + pdf_matrix m; + m.shift(-media_box.x_min, -media_box.y_min); + m.concat(xform); + + out->contents += "q " + m.to_string() + " cm " + xobj_res + " Do Q"; +} + +static void debug_pages(vector &pages) +{ + if (!debug_mode) + return; + + for (auto pg: pages) + debug("Page #%d: w=%.3f h=%.3f", pg->index, pg->width, pg->height); +} + +static void process(list &cmds, const char *in_name, const char *out_name) +{ + in_pdf.processFile(in_name); + in_pdf.pushInheritedAttributesToPage(); + out_pdf.emptyPDF(); + + vector const &in_pages = in_pdf.getAllPages(); + vector pages; + + QPDFObjectHandle page_copy = out_pdf.copyForeignObject(in_pages[0]); + + int cnt = 0; + for (auto inpg: in_pages) + pages.push_back(new in_page(inpg, ++cnt)); + debug("# Input document"); + debug_pages(pages); + + for (auto c: cmds) + { + debug("# Executing %s", c->def->name); + pages = c->exec->process(pages); + debug_pages(pages); + } + + for (auto pg: pages) + { + page_out out; + out.resources = QPDFObjectHandle::newDictionary(); + // FIXME: What if the source page requires a broader ProcSet? + out.resources.replaceKey("/ProcSet", QPDFObjectHandle::parse("[/PDF /Text]")); + out.xobjects = QPDFObjectHandle::newDictionary(); + out.resources.replaceKey("/XObject", out.xobjects); + pg->render(&out, pdf_matrix()); + + QPDFObjectHandle contents = QPDFObjectHandle::newStream(&out_pdf, out.contents); + + // Create the page object + QPDFObjectHandle out_page = out_pdf.makeIndirectObject(QPDFObjectHandle::newDictionary()); + out_page.replaceKey("/Type", QPDFObjectHandle::newName("/Page")); + out_page.replaceKey("/MediaBox", BBox(pg->width, pg->height).to_array()); + out_page.replaceKey("/Contents", contents); + out_page.replaceKey("/Resources", out.resources); + out_pdf.addPage(out_page, false); + } + + // Produce info dictionary + QPDFObjectHandle trailer = out_pdf.getTrailer(); + QPDFObjectHandle info = trailer.getKey("/Info"); + if (info.isNull()) + { + info = QPDFObjectHandle::newDictionary(); + trailer.replaceKey("/Info", info); + } + else + assert(info.isDictionary()); + // FIXME: More meta-data + info.replaceKey("/Producer", unicode_string("PaperJam")); + + // Write the output file + QPDFWriter writer(out_pdf, out_name); + writer.write(); +} + int main(int argc, char **argv) { if (argc != 4) @@ -13,8 +126,19 @@ int main(int argc, char **argv) return 1; } + debug_mode = 100; + list cmds; - parse(argv[1], &cmds); + parse(argv[1], cmds); + + try + { + process(cmds, argv[2], argv[3]); + } + catch (exception& e) + { + die("%s", e.what()); + } return 0; } diff --git a/parse.cc b/parse.cc index c0b8a35..827cb51 100644 --- a/parse.cc +++ b/parse.cc @@ -30,7 +30,7 @@ static string token; static double token_num; static void NONRET parse_error(const char *msg, ...); -static void parse_commands(list *cmds); +static void parse_commands(list &cmds); static void parse_error(const char *msg, ...) { @@ -261,7 +261,7 @@ static void parse_pipeline(cmd *c) pb->selectors.push_back(ps); } - parse_commands(&pb->commands); + parse_commands(pb->commands); } c->pipe = pp; @@ -377,9 +377,9 @@ static void debug_cmd(cmd *c, uint indent=0) } } -static void debug_cmds(list *cmds) +static void debug_cmds(list &cmds) { - for (auto c: *cmds) + for (auto c: cmds) debug_cmd(c); } @@ -409,7 +409,7 @@ static cmd *parse_cmd() return c; } -static void parse_commands(list *cmds) +static void parse_commands(list &cmds) { for (;;) { @@ -421,24 +421,24 @@ static void parse_commands(list *cmds) } cmd *c = parse_cmd(); - cmds->push_back(c); + cmds.push_back(c); } } -static void instantiate(list *cmds) +static void instantiate(list &cmds) { - for (auto c: *cmds) + for (auto c: cmds) { c->exec = c->def->constructor(c); if (c->pipe) { for (auto pb: c->pipe->branches) - instantiate(&pb->commands); + instantiate(pb->commands); } } } -void parse(const char *in, list *cmds) +void parse(const char *in, list &cmds) { in_pos = in; parse_commands(cmds); -- 2.39.2