2 * Auxiliary functions for processing PDF files
4 * (c) 2018 Martin Mares <mj@ucw.cz>
15 #include "pdf-tools.h"
17 #include <qpdf/QUtil.hh>
18 #include <qpdf/Pl_Concatenate.hh>
25 void debug(const char *msg, ...)
31 fprintf(stderr, "%*s", debug_indent, "");
32 vfprintf(stderr, msg, args);
37 void warn(const char *msg, ...)
41 fprintf(stderr, "WARNING: ");
42 vfprintf(stderr, msg, args);
47 void die(const char *msg, ...)
51 fprintf(stderr, "ERROR: ");
52 vfprintf(stderr, msg, args);
58 void bad(const char *msg, ...)
63 vsnprintf(buf, sizeof(buf), msg, args);
66 printf("error: %s\n", buf);
70 /*** Transformation matrices ***/
72 // Construct string representation of a transformation matrix
73 string pdf_matrix::to_string() {
75 for (int i=0; i<6; i++) {
79 snprintf(buf, sizeof(buf), "%.3f", m[i]);
85 /*** Bounding boxes ***/
87 QPDFObjectHandle BBox::to_array()
89 QPDFObjectHandle a = QPDFObjectHandle::newArray();
90 a.appendItem(QPDFObjectHandle::newReal(x_min, 1));
91 a.appendItem(QPDFObjectHandle::newReal(y_min, 1));
92 a.appendItem(QPDFObjectHandle::newReal(x_max, 1));
93 a.appendItem(QPDFObjectHandle::newReal(y_max, 1));
97 bool BBox::parse(QPDFObjectHandle h)
99 if (!h.isArray() || h.getArrayNItems() != 4)
102 for (int i=0; i<4; i++) {
103 QPDFObjectHandle item = h.getArrayItem(i);
104 if (!item.isNumber())
106 x[i] = item.getNumericValue();
115 /*** Unicode strings ***/
117 // Construct PDF representation of a UTF-8 string
118 QPDFObjectHandle unicode_string(string s)
120 // If it is ASCII only, use the string directly
121 bool ascii_only = true;
123 if (c < 0x20 || c > 0x7e)
126 return QPDFObjectHandle::newString(s);
128 // Use iconv to convert the string to big-endian UTF-16
129 iconv_t conv = iconv_open("UTF-16BE", "UTF-8");
130 if (conv == (iconv_t) -1)
131 die("Cannot initialize iconv: %m");
133 char *in_ptr = (char *) s.c_str(); // Work around bad API of iconv()
134 size_t in_len = strlen(in_ptr);
135 size_t out_len = 2*in_len + 2; // Worst case (including the BOM)
136 char out_buf[out_len];
137 char *out_ptr = out_buf;
138 size_t res = iconv(conv, &in_ptr, &in_len, &out_ptr, &out_len);
139 if (res == (size_t) -1)
140 die("iconv failed: %m");
142 die("iconv stopped before the end of input");
146 // Package UTF-16 in a PDF string
150 for (char *p = out_buf; p < out_ptr; p++)
152 return QPDFObjectHandle::newString(out);
155 /*** Conversion of pages to XObjects ***/
157 static BBox get_trim_box(QPDFObjectHandle page)
159 static const char * const boxes[] = { "/TrimBox", "/CropBox", "/MediaBox", NULL };
160 for (int i=0; boxes[i]; i++)
161 if (page.hasKey(boxes[i]))
162 return BBox(page.getKey(boxes[i]));
163 warn("Page has no trimbox, falling back to A4");
164 return BBox(0, 0, A4_WIDTH, A4_HEIGHT);
167 /* Conversion of pages to XObjects is inspired by CUPS's pdftopdf filter. */
168 class CombineFromContents_Provider : public QPDFObjectHandle::StreamDataProvider {
170 vector<QPDFObjectHandle> contents;
172 CombineFromContents_Provider(const vector<QPDFObjectHandle> &contents) : contents(contents) { }
173 void provideStreamData(int objid UNUSED, int generation UNUSED, Pipeline* pipeline) {
174 Pl_Concatenate concat("concat", pipeline);
175 for (int i=0; i < (int)contents.size(); i++)
176 contents[i].pipeStreamData(&concat, true, false, false);
177 concat.manualFinish();
181 QPDFObjectHandle page_to_xobject(QPDF *out, QPDFObjectHandle page)
183 page.assertPageObject();
185 QPDFObjectHandle xo_stream = QPDFObjectHandle::newStream(out);
186 QPDFObjectHandle xo_dict = xo_stream.getDict();
188 xo_dict.replaceKey("/Type", QPDFObjectHandle::newName("/XObject"));
189 xo_dict.replaceKey("/Subtype", QPDFObjectHandle::newName("/Form"));
190 xo_dict.replaceKey("/FormType", QPDFObjectHandle::newInteger(1));
192 BBox box = get_trim_box(page);
193 xo_dict.replaceKey("/BBox", box.to_array());
195 xo_dict.replaceKey("/Resources", page.getKey("/Resources"));
196 if (page.hasKey("/Group"))
197 xo_dict.replaceKey("/Group", page.getKey("/Group"));
199 if (page.hasKey("/UserUnit")) {
200 double u = page.getKey("/UserUnit").getNumericValue();
201 QPDFObjectHandle m = QPDFObjectHandle::newArray();
202 m.appendItem(QPDFObjectHandle::newReal(u, 3));
203 m.appendItem(QPDFObjectHandle::newReal(0, 0));
204 m.appendItem(QPDFObjectHandle::newReal(0, 0));
205 m.appendItem(QPDFObjectHandle::newReal(u, 3));
206 m.appendItem(QPDFObjectHandle::newReal(0, 0));
207 m.appendItem(QPDFObjectHandle::newReal(0, 0));
208 xo_dict.replaceKey("/Matrix", m);
211 vector<QPDFObjectHandle> contents = page.getPageContents();
212 auto ph = PointerHolder<QPDFObjectHandle::StreamDataProvider>(new CombineFromContents_Provider(contents));
213 xo_stream.replaceStreamData(ph, QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull());