2 * PaperJam -- Low-level handling of PDFs
4 * (c) 2018--2022 Martin Mares <mj@ucw.cz>
16 #include <qpdf/QPDFWriter.hh>
21 static void do_recalc_bbox(vector<page *> &pages, const char *in_name);
23 string out_context::new_resource(const string type)
25 return "/" + type + to_string(++res_cnt);
28 class in_page : public page {
29 QPDFObjectHandle pdf_page;
30 QPDFObjectHandle xobject;
33 void render(out_context *out, pdf_matrix xform);
34 void debug_dump() { debug("Input page %d", index); }
35 in_page(QPDFObjectHandle inpg, int idx);
39 in_page::in_page(QPDFObjectHandle inpg, int idx)
42 xobject = QPDFObjectHandle::newNull();
45 media_box = BBox(inpg.getKey("/MediaBox"));
46 width = media_box.width();
47 height = media_box.height();
49 QPDFObjectHandle art_box = inpg.getKey("/ArtBox");
51 art_box = inpg.getKey("/CropBox");
53 image_box = BBox(width, height);
56 image_box = BBox(art_box);
57 image_box.x_min -= media_box.x_min;
58 image_box.x_max -= media_box.x_min;
59 image_box.y_min -= media_box.y_min;
60 image_box.y_max -= media_box.y_min;
64 void in_page::render(out_context *out, pdf_matrix xform)
66 // Convert page to xobject
68 xobject = out->pdf->makeIndirectObject( page_to_xobject(out->pdf, out->pdf->copyForeignObject(pdf_page)) );
69 string xobj_res = out->new_resource("XO");
70 out->xobjects.replaceKey(xobj_res, xobject);
73 m.shift(-media_box.x_min, -media_box.y_min);
76 out->contents += "q " + m.to_string() + " cm " + xobj_res + " Do Q ";
79 int in_page::get_rotate()
81 QPDFObjectHandle rotate = pdf_page.getKey("/Rotate");
84 else if (rotate.isInteger())
86 long long deg = rotate.getIntValue();
87 if (deg < 0 || deg >= 360 || deg % 90)
89 warn("Page #%d: /Rotate must be 0, 90, 180 or 270", index);
97 warn("Page #%d: /Rotate is not an integer", index);
102 void debug_pages(vector<page *> &pages)
109 debug("Page #%d: media[%.3f %.3f] image[%.3f %.3f %.3f %.3f][%.3f %.3f]",
111 pg->width, pg->height,
112 pg->image_box.x_min, pg->image_box.y_min, pg->image_box.x_max, pg->image_box.y_max,
113 pg->image_box.width(), pg->image_box.height());
123 static vector<page *> apply_input_xforms(vector<page *> in_pages)
125 vector<page *> out_pages;
127 for (auto pg: in_pages)
129 in_page * in_pg = dynamic_cast<in_page *>(pg);
132 int deg = in_pg->get_rotate();
134 pg = new xform_page(pg, "/Rotate", pdf_rotation_matrix(deg, pg->width, pg->height));
136 out_pages.push_back(pg);
142 vector<page *> run_command_list(list<cmd *> &cmds, vector<page *> &pages)
149 debug("# Executing %s", c->def->name);
153 pages = c->exec->process(pages);
157 die("Error in %s: %s", c->def->name, e.what());
166 static void make_info_dict()
168 // Create info dictionary if it did not exist yet
169 QPDFObjectHandle trailer = out_pdf.getTrailer();
170 QPDFObjectHandle info = trailer.getKey("/Info");
173 info = QPDFObjectHandle::newDictionary();
174 trailer.replaceKey("/Info", info);
177 assert(info.isDictionary());
179 info.replaceKey("/Producer", unicode_string("PaperJam"));
181 // Copy entries from the source file's info dictionary
182 QPDFObjectHandle orig_trailer = in_pdf.getTrailer();
183 QPDFObjectHandle orig_info = orig_trailer.getKey("/Info");
184 if (!orig_info.isNull())
186 const string to_copy[] = { "/Title", "/Author", "/Subject", "/Keywords", "/Creator", "/CreationDate" };
187 for (string key: to_copy)
188 info.replaceOrRemoveKey(key, orig_info.getKey(key));
192 void process(list<cmd *> &cmds)
194 debug("### Reading input");
195 in_pdf.processFile(in_name);
196 in_pdf.pushInheritedAttributesToPage();
199 vector<QPDFObjectHandle> const &in_pages = in_pdf.getAllPages();
200 vector<page *> pages;
202 QPDFObjectHandle page_copy = out_pdf.copyForeignObject(in_pages[0]);
205 for (auto inpg: in_pages)
206 pages.push_back(new in_page(inpg, ++cnt));
209 do_recalc_bbox(pages, in_name);
211 if (!no_auto_transforms)
213 debug("### Applying input transforms");
214 pages = apply_input_xforms(pages);
217 debug("### Running commands");
218 pages = run_command_list(cmds, pages);
220 debug("### Writing output");
227 debug("Page #%d", out_page);
235 out.resources = QPDFObjectHandle::newDictionary();
236 out.resources.replaceKey("/ProcSet", QPDFObjectHandle::parse("[/PDF]"));
237 out.xobjects = QPDFObjectHandle::newDictionary();
238 out.egstates = QPDFObjectHandle::newDictionary();
239 pg->render(&out, pdf_matrix());
241 QPDFObjectHandle contents = QPDFObjectHandle::newStream(&out_pdf, out.contents);
243 // Create the page object
244 QPDFObjectHandle out_page = out_pdf.makeIndirectObject(QPDFObjectHandle::newDictionary());
245 out_page.replaceKey("/Type", QPDFObjectHandle::newName("/Page"));
246 out_page.replaceKey("/MediaBox", BBox(pg->width, pg->height).to_array());
248 // out_page.replaceKey("/CropBox", pg->image_box.to_array());
249 out_page.replaceKey("/Contents", contents);
250 if (!out.xobjects.getKeys().empty())
251 out.resources.replaceKey("/XObject", out.xobjects);
252 if (!out.egstates.getKeys().empty())
253 out.resources.replaceKey("/ExtGState", out.egstates);
254 out_page.replaceKey("/Resources", out.resources);
255 out_pdf.addPage(out_page, false);
258 // Produce info dictionary
261 // Write the output file
262 QPDFWriter writer(out_pdf, out_name);
267 /*** Re-calculation of bboxes ***/
269 vector<BBox> gs_bboxes(const char *in)
273 die("Cannot create pipe: %m");
277 die("Cannot fork: %m");
285 execlp("gs", "gs", "-sDEVICE=bbox", "-dSAFER", "-dBATCH", "-dNOPAUSE", "-q", in, NULL);
286 die("Cannot execute gs: %m");
290 FILE *f = fdopen(pipes[0], "r");
292 die("fdopen failed: %m");
296 while (fgets(line, sizeof(line), f))
298 char *eol = strchr(line, '\n');
300 die("Ghostscript produced too long lines");
303 if (!strncmp(line, "%%HiResBoundingBox: ", 20))
305 double x1, y1, x2, y2;
306 if (sscanf(line+20, "%lf%lf%lf%lf", &x1, &y1, &x2, &y2) != 4)
307 die("Cannot parse Ghostscript output: %s", line);
308 bboxes.push_back(BBox(x1, y1, x2, y2));
310 else if (line[0] != '%')
311 fprintf(stderr, "%s\n", line);
316 if (waitpid(pid, &stat, 0) < 0)
317 die("wait failed: %m");
318 if (!WIFEXITED(stat) || WEXITSTATUS(stat))
319 die("Ghostscript failed");
324 static void do_recalc_bbox(vector<page *> &pages, const char *in_name)
326 debug("Calling Ghostscript to re-calculate bounding boxes");
327 vector<BBox> bboxes = gs_bboxes(in_name);
328 if (pages.size() != bboxes.size())
329 die("Ghostscript failed to produce the right number of bboxes");
331 for (size_t i=0; i<pages.size(); i++)
332 pages[i]->image_box = bboxes[i];
337 xform_page::xform_page(page *p, const char *desc, pdf_matrix xf)
344 BBox media(p->width, p->height);
346 width = media.width();
347 height = media.height();
349 image_box = p->image_box;
350 image_box.transform(xf);
353 void xform_page::debug_dump()
355 debug("Transform (%s): [%s]", description, xform.to_string().c_str());
356 orig_page->debug_dump();
359 void xform_page::render(out_context *out, pdf_matrix parent_xform)
361 orig_page->render(out, xform * parent_xform);