2 * Auxiliary functions for processing PDF files
4 * (c) 2018 Martin Mares <mj@ucw.cz>
14 #include <qpdf/QUtil.hh>
15 #include <qpdf/Pl_Concatenate.hh>
17 /*** Transformation matrices ***/
19 // Construct string representation of a transformation matrix
20 string pdf_matrix::to_string() {
22 for (int i=0; i<6; i++) {
25 s += pdf_coord(m[i], 6);
30 /*** Bounding boxes ***/
32 QPDFObjectHandle BBox::to_array()
34 QPDFObjectHandle a = QPDFObjectHandle::newArray();
35 a.appendItem(QPDFObjectHandle::newReal(x_min, 1));
36 a.appendItem(QPDFObjectHandle::newReal(y_min, 1));
37 a.appendItem(QPDFObjectHandle::newReal(x_max, 1));
38 a.appendItem(QPDFObjectHandle::newReal(y_max, 1));
42 string BBox::to_rect()
45 pdf_coord(x_min) + " " +
46 pdf_coord(y_min) + " " +
47 pdf_coord(width()) + " " +
51 bool BBox::parse(QPDFObjectHandle h)
53 if (!h.isArray() || h.getArrayNItems() != 4)
56 for (int i=0; i<4; i++) {
57 QPDFObjectHandle item = h.getArrayItem(i);
60 x[i] = item.getNumericValue();
69 BBox::BBox(QPDFObjectHandle box) {
71 warn("Invalid bounding box, falling back to A4");
72 x_min = 0, x_max = A4_WIDTH;
73 y_min = 0, y_max = A4_HEIGHT;
77 void BBox::transform(pdf_matrix &m)
79 m.apply(&x_min, &y_min);
80 m.apply(&x_max, &y_max);
87 BBox BBox::transformed(pdf_matrix &m)
94 void BBox::join(BBox &with)
96 x_min = min(x_min, with.x_min);
97 x_max = max(x_max, with.x_max);
98 y_min = min(y_min, with.y_min);
99 y_max = max(y_max, with.y_max);
102 static double clamp(double x, double min, double max)
111 void BBox::intersect(BBox &with)
113 x_min = clamp(x_min, with.x_min, with.x_max);
114 x_max = clamp(x_max, with.x_min, with.x_max);
115 y_min = clamp(y_min, with.y_min, with.y_max);
116 y_max = clamp(y_max, with.y_min, with.y_max);
119 void BBox::enlarge(double by)
127 BBox BBox::enlarged(double by)
134 /*** Unicode strings ***/
136 // Construct PDF representation of a UTF-8 string
137 QPDFObjectHandle unicode_string(string s)
139 // If it is ASCII only, use the string directly
140 bool ascii_only = true;
142 if (c < 0x20 || c > 0x7e)
145 return QPDFObjectHandle::newString(s);
147 // Use iconv to convert the string to big-endian UTF-16
148 iconv_t conv = iconv_open("UTF-16BE", "UTF-8");
149 if (conv == (iconv_t) -1)
150 die("Cannot initialize iconv: %m");
152 char *in_ptr = (char *) s.c_str(); // Work around bad API of iconv()
153 size_t in_len = strlen(in_ptr);
154 size_t out_len = 2*in_len + 2; // Worst case (including the BOM)
155 char out_buf[out_len];
156 char *out_ptr = out_buf;
157 size_t res = iconv(conv, &in_ptr, &in_len, &out_ptr, &out_len);
158 if (res == (size_t) -1)
159 die("iconv failed: %m");
161 die("iconv stopped before the end of input");
165 // Package UTF-16 in a PDF string
169 for (char *p = out_buf; p < out_ptr; p++)
171 return QPDFObjectHandle::newString(out);
174 /*** Conversion of pages to XObjects ***/
176 static BBox get_trim_box(QPDFObjectHandle page)
178 static const char * const boxes[] = { "/TrimBox", "/CropBox", "/MediaBox", NULL };
179 for (int i=0; boxes[i]; i++)
180 if (page.hasKey(boxes[i]))
181 return BBox(page.getKey(boxes[i]));
182 warn("Page has no trimbox, falling back to A4");
183 return BBox(0, 0, A4_WIDTH, A4_HEIGHT);
186 /* Conversion of pages to XObjects is inspired by CUPS's pdftopdf filter. */
187 class CombineFromContents_Provider : public QPDFObjectHandle::StreamDataProvider {
189 vector<QPDFObjectHandle> contents;
191 CombineFromContents_Provider(const vector<QPDFObjectHandle> &contents) : contents(contents) { }
192 void provideStreamData(int objid UNUSED, int generation UNUSED, Pipeline* pipeline) {
193 Pl_Concatenate concat("concat", pipeline);
194 for (int i=0; i < (int)contents.size(); i++)
195 contents[i].pipeStreamData(&concat, true, false, false);
196 concat.manualFinish();
200 QPDFObjectHandle page_to_xobject(QPDF *out, QPDFObjectHandle page)
202 page.assertPageObject();
204 QPDFObjectHandle xo_stream = QPDFObjectHandle::newStream(out);
205 QPDFObjectHandle xo_dict = xo_stream.getDict();
207 xo_dict.replaceKey("/Type", QPDFObjectHandle::newName("/XObject"));
208 xo_dict.replaceKey("/Subtype", QPDFObjectHandle::newName("/Form"));
209 xo_dict.replaceKey("/FormType", QPDFObjectHandle::newInteger(1));
211 BBox box = get_trim_box(page);
212 xo_dict.replaceKey("/BBox", box.to_array());
214 xo_dict.replaceKey("/Resources", page.getKey("/Resources"));
215 if (page.hasKey("/Group"))
216 xo_dict.replaceKey("/Group", page.getKey("/Group"));
218 if (page.hasKey("/UserUnit")) {
219 double u = page.getKey("/UserUnit").getNumericValue();
220 QPDFObjectHandle m = QPDFObjectHandle::newArray();
221 m.appendItem(QPDFObjectHandle::newReal(u, 3));
222 m.appendItem(QPDFObjectHandle::newReal(0, 0));
223 m.appendItem(QPDFObjectHandle::newReal(0, 0));
224 m.appendItem(QPDFObjectHandle::newReal(u, 3));
225 m.appendItem(QPDFObjectHandle::newReal(0, 0));
226 m.appendItem(QPDFObjectHandle::newReal(0, 0));
227 xo_dict.replaceKey("/Matrix", m);
230 vector<QPDFObjectHandle> contents = page.getPageContents();
231 auto ph = PointerHolder<QPDFObjectHandle::StreamDataProvider>(new CombineFromContents_Provider(contents));
232 xo_stream.replaceStreamData(ph, QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull());
236 /*** Formatting of coordinates ***/
238 string pdf_coord(double x, uint digits)
241 snprintf(buf, sizeof(buf), "%.*f", digits, x);
243 while (n > 0 && buf[n-1] == '0')
245 if (n > 0 && buf[n-1] == '.')