termux-packages/packages/texlive-bin/pdftoepdf-poppler0.83.0.cc

1112 lines
36 KiB
C++

/*
Copyright 1996-2017 Han The Thanh, <thanh@pdftex.org>
This file is part of pdfTeX.
pdfTeX is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
pdfTeX is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
This is based on the patch texlive-poppler-0.59.patch <2017-09-19> at
https://git.archlinux.org/svntogit/packages.git/plain/texlive-bin/trunk
by Arch Linux. A little modifications are made to avoid a crash for
some kind of pdf images, such as figure_missing.pdf in gnuplot.
The poppler should be 0.83.0 or newer versions.
POPPLER_VERSION should be defined.
*/
/* Do this early in order to avoid a conflict between
MINGW32 <rpcndr.h> defining 'boolean' as 'unsigned char' and
<kpathsea/types.h> defining Pascal's boolean as 'int'.
*/
#include <w2c/config.h>
#include <kpathsea/lib.h>
#include <stdlib.h>
#include <math.h>
#include <stddef.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#ifdef POPPLER_VERSION
#include <dirent.h>
#include <poppler-config.h>
#include <goo/GooString.h>
#include <goo/gmem.h>
#include <goo/gfile.h>
#define GString GooString
#else
#error POPPLER_VERSION should be defined.
#endif
#include <assert.h>
#include "Object.h"
#include "Stream.h"
#include "Array.h"
#include "Dict.h"
#include "XRef.h"
#include "Catalog.h"
#include "Link.h"
#include "Page.h"
#include "GfxFont.h"
#include "PDFDoc.h"
#include "GlobalParams.h"
#include "Error.h"
// This file is mostly C and not very much C++; it's just used to interface
// the functions of xpdf, which are written in C++.
extern "C" {
#include <pdftexdir/ptexmac.h>
#include <pdftexdir/pdftex-common.h>
// These functions from pdftex.web gets declared in pdftexcoerce.h in the
// usual web2c way, but we cannot include that file here because C++
// does not allow it.
extern int getpdfsuppresswarningpagegroup(void);
extern integer getpdfsuppressptexinfo(void);
extern integer zround(double);
}
// The prefix "PTEX" for the PDF keys is special to pdfTeX;
// this has been registered with Adobe by Hans Hagen.
#define pdfkeyprefix "PTEX"
#define MASK_SUPPRESS_PTEX_FULLBANNER 0x01
#define MASK_SUPPRESS_PTEX_FILENAME 0x02
#define MASK_SUPPRESS_PTEX_PAGENUMBER 0x04
#define MASK_SUPPRESS_PTEX_INFODICT 0x08
// When copying the Resources of the selected page, all objects are copied
// recusively top-down. Indirect objects however are not fetched during
// copying, but get a new object number from pdfTeX and then will be
// appended into a linked list. Duplicates are checked and removed from the
// list of indirect objects during appending.
enum InObjType {
objFont,
objFontDesc,
objOther
};
struct InObj {
Ref ref; // ref in original PDF
InObjType type; // object type
InObj *next; // next entry in list of indirect objects
int num; // new object number in output PDF
fd_entry *fd; // pointer to /FontDescriptor object structure
int enc_objnum; // Encoding for objFont
int written; // has it been written to output PDF?
};
struct UsedEncoding {
int enc_objnum;
GfxFont *font;
UsedEncoding *next;
};
static InObj *inObjList;
static UsedEncoding *encodingList;
static bool isInit = false;
// --------------------------------------------------------------------
// Maintain list of open embedded PDF files
// --------------------------------------------------------------------
struct PdfDocument {
char *file_name;
PDFDoc *doc;
XRef *xref;
InObj *inObjList;
int occurences; // number of references to the document; the doc can be
// deleted when this is negative
PdfDocument *next;
};
static PdfDocument *pdfDocuments = 0;
static XRef *xref = 0;
// Returns pointer to PdfDocument record for PDF file.
// Creates a new record if it doesn't exist yet.
// xref is made current for the document.
static PdfDocument *find_add_document(char *file_name)
{
PdfDocument *p = pdfDocuments;
while (p && strcmp(p->file_name, file_name) != 0)
p = p->next;
if (p) {
xref = p->xref;
(p->occurences)++;
return p;
}
p = new PdfDocument;
p->file_name = xstrdup(file_name);
p->xref = xref = 0;
p->occurences = 0;
GString *docName = new GString(p->file_name);
p->doc = new PDFDoc(docName); // takes ownership of docName
if (!p->doc->isOk() || !p->doc->okToPrint()) {
pdftex_fail("xpdf: reading PDF image failed");
}
p->inObjList = 0;
p->next = pdfDocuments;
pdfDocuments = p;
return p;
}
// Deallocate a PdfDocument with all its resources
static void delete_document(PdfDocument * pdf_doc)
{
PdfDocument **p = &pdfDocuments;
while (*p && *p != pdf_doc)
p = &((*p)->next);
// should not happen:
if (!*p)
return;
// unlink from list
*p = pdf_doc->next;
// free pdf_doc's resources
InObj *r, *n;
for (r = pdf_doc->inObjList; r != 0; r = n) {
n = r->next;
delete r;
}
xref = pdf_doc->xref;
delete pdf_doc->doc;
xfree(pdf_doc->file_name);
delete pdf_doc;
}
// --------------------------------------------------------------------
static int addEncoding(GfxFont * gfont)
{
UsedEncoding *n;
n = new UsedEncoding;
n->next = encodingList;
encodingList = n;
n->font = gfont;
n->enc_objnum = pdfnewobjnum();
return n->enc_objnum;
}
#define addFont(ref, fd, enc_objnum) \
addInObj(objFont, ref, fd, enc_objnum)
// addFontDesc is only used to avoid writing the original FontDescriptor
// from the PDF file.
#define addFontDesc(ref, fd) \
addInObj(objFontDesc, ref, fd, 0)
#define addOther(ref) \
addInObj(objOther, ref, 0, 0)
static int addInObj(InObjType type, Ref ref, fd_entry * fd, int e)
{
InObj *p, *q, *n = new InObj;
if (ref.num == 0)
pdftex_fail("PDF inclusion: invalid reference");
n->ref = ref;
n->type = type;
n->next = 0;
n->fd = fd;
n->enc_objnum = e;
n->written = 0;
if (inObjList == 0)
inObjList = n;
else {
for (p = inObjList; p != 0; p = p->next) {
if (p->ref.num == ref.num && p->ref.gen == ref.gen) {
delete n;
return p->num;
}
q = p;
}
// it is important to add new objects at the end of the list,
// because new objects are being added while the list is being
// written out.
q->next = n;
}
if (type == objFontDesc)
n->num = get_fd_objnum(fd);
else
n->num = pdfnewobjnum();
return n->num;
}
#if 0 /* unusewd */
static int getNewObjectNumber(Ref ref)
{
InObj *p;
if (inObjList == 0) {
pdftex_fail("No objects copied yet");
} else {
for (p = inObjList; p != 0; p = p->next) {
if (p->ref.num == ref.num && p->ref.gen == ref.gen) {
return p->num;
}
}
pdftex_fail("Object not yet copied: %i %i", ref.num, ref.gen);
}
#ifdef _MSC_VER
/* Never reached, but without __attribute__((noreturn)) for pdftex_fail()
MSVC 5.0 requires an int return value. */
return -60000;
#endif
}
#endif
static void copyObject(Object *);
static void copyName(char *s)
{
pdf_puts("/");
for (; *s != 0; s++) {
if (isdigit(*s) || isupper(*s) || islower(*s) || *s == '_' ||
*s == '.' || *s == '-' || *s == '+')
pdfout(*s);
else
pdf_printf("#%.2X", *s & 0xFF);
}
}
static void copyDictEntry(Object * obj, int i)
{
Object obj1;
copyName((char *)obj->dictGetKey(i));
pdf_puts(" ");
obj1 = obj->dictGetValNF(i).copy();
copyObject(&obj1);
pdf_puts("\n");
}
static void copyDict(Object * obj)
{
int i, l;
if (!obj->isDict())
pdftex_fail("PDF inclusion: invalid dict type <%s>",
obj->getTypeName());
for (i = 0, l = obj->dictGetLength(); i < l; ++i)
copyDictEntry(obj, i);
}
static void copyFontDict(Object * obj, InObj * r)
{
int i, l;
char *key;
if (!obj->isDict())
pdftex_fail("PDF inclusion: invalid dict type <%s>",
obj->getTypeName());
pdf_puts("<<\n");
assert(r->type == objFont); // FontDescriptor is in fd_tree
for (i = 0, l = obj->dictGetLength(); i < l; ++i) {
key = (char *)obj->dictGetKey(i);
if (strncmp("FontDescriptor", key, strlen("FontDescriptor")) == 0
|| strncmp("BaseFont", key, strlen("BaseFont")) == 0
|| strncmp("Encoding", key, strlen("Encoding")) == 0)
continue; // skip original values
copyDictEntry(obj, i);
}
// write new FontDescriptor, BaseFont, and Encoding
pdf_printf("/FontDescriptor %d 0 R\n", get_fd_objnum(r->fd));
pdf_printf("/BaseFont %d 0 R\n", get_fn_objnum(r->fd));
pdf_printf("/Encoding %d 0 R\n", r->enc_objnum);
pdf_puts(">>");
}
static void copyStream(Stream * str)
{
int c, c2 = 0;
str->reset();
while ((c = str->getChar()) != EOF) {
pdfout(c);
c2 = c;
}
pdflastbyte = c2;
}
static void copyProcSet(Object * obj)
{
int i, l;
Object procset;
if (!obj->isArray())
pdftex_fail("PDF inclusion: invalid ProcSet array type <%s>",
obj->getTypeName());
pdf_puts("/ProcSet [ ");
for (i = 0, l = obj->arrayGetLength(); i < l; ++i) {
procset = obj->arrayGetNF(i).copy();
if (!procset.isName())
pdftex_fail("PDF inclusion: invalid ProcSet entry type <%s>",
procset.getTypeName());
copyName((char *)procset.getName());
pdf_puts(" ");
}
pdf_puts("]\n");
}
#define REPLACE_TYPE1C true
static bool embeddableFont(Object * fontdesc)
{
Object fontfile, ffsubtype;
if (!fontdesc->isDict())
return false;
fontfile = fontdesc->dictLookup("FontFile");
if (fontfile.isStream())
return true;
if (REPLACE_TYPE1C) {
fontfile = fontdesc->dictLookup("FontFile3");
if (!fontfile.isStream())
return false;
ffsubtype = fontfile.streamGetDict()->lookup("Subtype");
return ffsubtype.isName() && !strcmp(ffsubtype.getName(), "Type1C");
}
return false;
}
static void copyFont(char *tag, Object * fontRef)
{
Object fontdict, subtype, basefont, fontdescRef, fontdesc, charset,
stemV;
GfxFont *gfont;
fd_entry *fd;
fm_entry *fontmap;
// Check whether the font has already been embedded before analysing it.
InObj *p;
Ref ref = fontRef->getRef();
for (p = inObjList; p; p = p->next) {
if (p->ref.num == ref.num && p->ref.gen == ref.gen) {
copyName(tag);
pdf_printf(" %d 0 R ", p->num);
return;
}
}
// Only handle included Type1 (and Type1C) fonts; anything else will be copied.
// Type1C fonts are replaced by Type1 fonts, if REPLACE_TYPE1C is true.
fontdict = fontRef->fetch(xref);
fontdesc = Object(objNull);
if (fontdict.isDict()) {
subtype = fontdict.dictLookup("Subtype");
basefont = fontdict.dictLookup("BaseFont");
fontdescRef = fontdict.dictLookupNF("FontDescriptor").copy();
if (fontdescRef.isRef()) {
fontdesc = fontdescRef.fetch(xref);
}
}
if (!fixedinclusioncopyfont && fontdict.isDict()
&& subtype.isName()
&& !strcmp(subtype.getName(), "Type1")
&& basefont.isName()
&& fontdescRef.isRef()
&& fontdesc.isDict()
&& embeddableFont(&fontdesc)
&& (fontmap = lookup_fontmap((char *)basefont.getName())) != NULL) {
// round /StemV value, since the PDF input is a float
// (see Font Descriptors in PDF reference), but we only store an
// integer, since we don't want to change the struct.
stemV = fontdesc.dictLookup("StemV");
fd = epdf_create_fontdescriptor(fontmap, zround(stemV.getNum()));
charset = fontdesc.dictLookup("CharSet");
if (!charset.isNull() &&
charset.isString() && is_subsetable(fontmap))
epdf_mark_glyphs(fd, (char *)charset.getString()->c_str());
else
embed_whole_font(fd);
addFontDesc(fontdescRef.getRef(), fd);
copyName(tag);
gfont = GfxFont::makeFont(xref, tag, fontRef->getRef(),
fontdict.getDict());
pdf_printf(" %d 0 R ", addFont(fontRef->getRef(), fd,
addEncoding(gfont)));
} else {
copyName(tag);
pdf_puts(" ");
copyObject(fontRef);
}
}
static void copyFontResources(Object * obj)
{
Object fontRef;
int i, l;
if (!obj->isDict())
pdftex_fail("PDF inclusion: invalid font resources dict type <%s>",
obj->getTypeName());
pdf_puts("/Font << ");
for (i = 0, l = obj->dictGetLength(); i < l; ++i) {
fontRef = obj->dictGetValNF(i).copy();
if (fontRef.isRef())
copyFont((char *)obj->dictGetKey(i), &fontRef);
else if (fontRef.isDict()) { // some programs generate pdf with embedded font object
copyName((char *)obj->dictGetKey(i));
pdf_puts(" ");
copyObject(&fontRef);
}
else
pdftex_fail("PDF inclusion: invalid font in reference type <%s>",
fontRef.getTypeName());
}
pdf_puts(">>\n");
}
static void copyOtherResources(Object * obj, char *key)
{
// copies all other resources (write_epdf handles Fonts and ProcSets),
// if Subtype is present, it must be a name
if (strcmp("Subtype", key) == 0) {
if (!obj->isName()) {
pdftex_warn("PDF inclusion: Subtype in Resources dict is not a name"
" (key '%s', type <%s>); ignored.",
key, obj->getTypeName());
return;
}
} else if (!obj->isDict()) {
//FIXME: Write the message only to the log file
pdftex_warn("PDF inclusion: invalid other resource which is no dict"
" (key '%s', type <%s>); ignored.",
key, obj->getTypeName());
return;
}
copyName(key);
pdf_puts(" ");
copyObject(obj);
}
// Function onverts double to string; very small and very large numbers
// are NOT converted to scientific notation.
// n must be a number or real conforming to the implementation limits
// of PDF as specified in appendix C.1 of the PDF Ref.
// These are:
// maximum value of ints is +2^32
// maximum value of reals is +2^15
// smalles values of reals is 1/(2^16)
static char *convertNumToPDF(double n)
{
static const int precision = 6;
static const int fact = (int) 1E6; // must be 10^precision
static const double epsilon = 0.5E-6; // 2epsilon must be 10^-precision
static char buf[64];
// handle very small values: return 0
if (fabs(n) < epsilon) {
buf[0] = '0';
buf[1] = '\0';
} else {
char ints[64];
int bindex = 0, sindex = 0;
int ival, fval;
// handle the sign part if n is negative
if (n < 0) {
buf[bindex++] = '-';
n = -n;
}
n += epsilon; // for rounding
// handle the integer part, simply with sprintf
ival = (int) floor(n);
n -= ival;
sprintf(ints, "%d", ival);
while (ints[sindex] != 0)
buf[bindex++] = ints[sindex++];
// handle the fractional part up to 'precision' digits
fval = (int) floor(n * fact);
if (fval) {
// set a dot
buf[bindex++] = '.';
sindex = bindex + precision;
buf[sindex--] = '\0';
// fill up trailing zeros with the string terminator NULL
while (((fval % 10) == 0) && (sindex >= bindex)) {
buf[sindex--] = '\0';
fval /= 10;
}
// fill up the fractional part back to front
while (sindex >= bindex) {
buf[sindex--] = (fval % 10) + '0';
fval /= 10;
}
} else
buf[bindex++] = 0;
}
return (char *) buf;
}
static void copyObject(Object * obj)
{
Object obj1;
int i, l, c;
Ref ref;
char *p;
GString *s;
if (obj->isBool()) {
pdf_printf("%s", obj->getBool()? "true" : "false");
} else if (obj->isInt()) {
pdf_printf("%i", obj->getInt());
} else if (obj->isReal()) {
pdf_printf("%s", convertNumToPDF(obj->getReal()));
} else if (obj->isNum()) {
pdf_printf("%s", convertNumToPDF(obj->getNum()));
} else if (obj->isString()) {
s = (GooString *)obj->getString();
p = (char *)s->c_str();
l = s->getLength();
if (strlen(p) == (unsigned int) l) {
pdf_puts("(");
for (; *p != 0; p++) {
c = (unsigned char) *p;
if (c == '(' || c == ')' || c == '\\')
pdf_printf("\\%c", c);
else if (c < 0x20 || c > 0x7F)
pdf_printf("\\%03o", c);
else
pdfout(c);
}
pdf_puts(")");
} else {
pdf_puts("<");
for (i = 0; i < l; i++) {
c = s->getChar(i) & 0xFF;
pdf_printf("%.2x", c);
}
pdf_puts(">");
}
} else if (obj->isName()) {
copyName((char *)obj->getName());
} else if (obj->isNull()) {
pdf_puts("null");
} else if (obj->isArray()) {
pdf_puts("[");
for (i = 0, l = obj->arrayGetLength(); i < l; ++i) {
obj1 = obj->arrayGetNF(i).copy();
if (!obj1.isName())
pdf_puts(" ");
copyObject(&obj1);
}
pdf_puts("]");
} else if (obj->isDict()) {
pdf_puts("<<\n");
copyDict(obj);
pdf_puts(">>");
} else if (obj->isStream()) {
pdf_puts("<<\n");
copyDict(obj->getStream()->getDictObject());
pdf_puts(">>\n");
pdf_puts("stream\n");
copyStream(obj->getStream()->getUndecodedStream());
pdf_puts("\nendstream");
} else if (obj->isRef()) {
ref = obj->getRef();
if (ref.num == 0) {
pdftex_fail
("PDF inclusion: reference to invalid object"
" (is the included pdf broken?)");
} else
pdf_printf("%d 0 R", addOther(ref));
} else {
pdftex_fail("PDF inclusion: type <%s> cannot be copied",
obj->getTypeName());
}
}
static void writeRefs()
{
InObj *r;
for (r = inObjList; r != 0; r = r->next) {
if (!r->written) {
r->written = 1;
Object obj1 = xref->fetch(r->ref.num, r->ref.gen);
if (r->type == objFont) {
assert(!obj1.isStream());
pdfbeginobj(r->num, 2); // \pdfobjcompresslevel = 2 is for this
copyFontDict(&obj1, r);
pdf_puts("\n");
pdfendobj();
} else if (r->type != objFontDesc) { // /FontDescriptor is written via write_fontdescriptor()
if (obj1.isStream())
pdfbeginobj(r->num, 0);
else
pdfbeginobj(r->num, 2); // \pdfobjcompresslevel = 2 is for this
copyObject(&obj1);
pdf_puts("\n");
pdfendobj();
}
}
}
}
static void writeEncodings()
{
UsedEncoding *r, *n;
char *glyphNames[256], *s;
int i;
for (r = encodingList; r != 0; r = r->next) {
for (i = 0; i < 256; i++) {
if (r->font->isCIDFont()) {
pdftex_fail
("PDF inclusion: CID fonts are not supported"
" (try to disable font replacement to fix this)");
}
if ((s = (char *)((Gfx8BitFont *) r->font)->getCharName(i)) != 0)
glyphNames[i] = s;
else
glyphNames[i] = notdef;
}
epdf_write_enc(glyphNames, r->enc_objnum);
}
for (r = encodingList; r != 0; r = n) {
n = r->next;
#ifdef POPPLER_VERSION
r->font->decRefCnt();
#else
#error POPPLER_VERSION should be defined.
#endif
delete r;
}
}
// get the pagebox according to the pagebox_spec
static const PDFRectangle *get_pagebox(Page * page, int pagebox_spec)
{
if (pagebox_spec == pdfboxspecmedia)
return page->getMediaBox();
else if (pagebox_spec == pdfboxspeccrop)
return page->getCropBox();
else if (pagebox_spec == pdfboxspecbleed)
return page->getBleedBox();
else if (pagebox_spec == pdfboxspectrim)
return page->getTrimBox();
else if (pagebox_spec == pdfboxspecart)
return page->getArtBox();
else
pdftex_fail("PDF inclusion: unknown value of pagebox spec (%i)",
(int) pagebox_spec);
return page->getMediaBox(); // to make the compiler happy
}
// Reads various information about the PDF and sets it up for later inclusion.
// This will fail if the PDF version of the PDF is higher than
// minor_pdf_version_wanted or page_name is given and can not be found.
// It makes no sense to give page_name _and_ page_num.
// Returns the page number.
int
read_pdf_info(char *image_name, char *page_name, int page_num,
int pagebox_spec, int minor_pdf_version_wanted,
int pdf_inclusion_errorlevel)
{
PdfDocument *pdf_doc;
Page *page;
const PDFRectangle *pagebox;
#ifdef POPPLER_VERSION
int pdf_major_version_found, pdf_minor_version_found;
#else
#error POPPLER_VERSION should be defined.
#endif
// initialize
if (!isInit) {
globalParams.reset(new GlobalParams());
globalParams->setErrQuiet(false);
isInit = true;
}
// open PDF file
pdf_doc = find_add_document(image_name);
epdf_doc = (void *) pdf_doc;
// check PDF version
// this works only for PDF 1.x -- but since any versions of PDF newer
// than 1.x will not be backwards compatible to PDF 1.x, pdfTeX will
// then have to changed drastically anyway.
#ifdef POPPLER_VERSION
pdf_major_version_found = pdf_doc->doc->getPDFMajorVersion();
pdf_minor_version_found = pdf_doc->doc->getPDFMinorVersion();
if ((pdf_major_version_found > 1)
|| (pdf_minor_version_found > minor_pdf_version_wanted)) {
const char *msg =
"PDF inclusion: found PDF version <%d.%d>, but at most version <1.%d> allowed";
if (pdf_inclusion_errorlevel > 0) {
pdftex_fail(msg, pdf_major_version_found, pdf_minor_version_found, minor_pdf_version_wanted);
} else if (pdf_inclusion_errorlevel < 0) {
; /* do nothing */
} else { /* = 0, give warning */
pdftex_warn(msg, pdf_major_version_found, pdf_minor_version_found, minor_pdf_version_wanted);
}
}
#else
#error POPPLER_VERSION should be defined.
#endif
epdf_num_pages = pdf_doc->doc->getCatalog()->getNumPages();
if (page_name) {
// get page by name
GString name(page_name);
LinkDest *link = pdf_doc->doc->findDest(&name);
if (link == 0 || !link->isOk())
pdftex_fail("PDF inclusion: invalid destination <%s>", page_name);
Ref ref = link->getPageRef();
page_num = pdf_doc->doc->getCatalog()->findPage(ref);
if (page_num == 0)
pdftex_fail("PDF inclusion: destination is not a page <%s>",
page_name);
delete link;
} else {
// get page by number
if (page_num <= 0 || page_num > epdf_num_pages)
pdftex_fail("PDF inclusion: required page does not exist <%i>",
epdf_num_pages);
}
// get the required page
page = pdf_doc->doc->getCatalog()->getPage(page_num);
// get the pagebox (media, crop...) to use.
pagebox = get_pagebox(page, pagebox_spec);
if (pagebox->x2 > pagebox->x1) {
epdf_orig_x = pagebox->x1;
epdf_width = pagebox->x2 - pagebox->x1;
} else {
epdf_orig_x = pagebox->x2;
epdf_width = pagebox->x1 - pagebox->x2;
}
if (pagebox->y2 > pagebox->y1) {
epdf_orig_y = pagebox->y1;
epdf_height = pagebox->y2 - pagebox->y1;
} else {
epdf_orig_y = pagebox->y2;
epdf_height = pagebox->y1 - pagebox->y2;
}
// get page rotation
epdf_rotate = page->getRotate() % 360;
if (epdf_rotate < 0)
epdf_rotate += 360;
// page group
if (page->getGroup() != NULL)
epdf_has_page_group = 1; // only flag that page group is present;
// the actual object number will be
// generated in pdftex.web
else
epdf_has_page_group = 0; // no page group present
pdf_doc->xref = pdf_doc->doc->getXRef();
return page_num;
}
// writes the current epf_doc.
// Here the included PDF is copied, so most errors that can happen during PDF
// inclusion will arise here.
void write_epdf(void)
{
Page *page;
Ref *pageRef;
Dict *pageDict;
Object contents, obj1, obj2, pageObj, dictObj;
Object groupDict;
bool writeSepGroup = false;
Object info;
char *key;
char s[256];
int i, l;
int rotate;
double scale[6] = { 0, 0, 0, 0, 0, 0 };
bool writematrix = false;
int suppress_ptex_info = getpdfsuppressptexinfo();
static const char *pageDictKeys[] = {
"LastModified",
"Metadata",
"PieceInfo",
"SeparationInfo",
// "Group",
// "Resources",
NULL
};
PdfDocument *pdf_doc = (PdfDocument *) epdf_doc;
(pdf_doc->occurences)--;
xref = pdf_doc->xref;
inObjList = pdf_doc->inObjList;
encodingList = 0;
page = pdf_doc->doc->getCatalog()->getPage(epdf_selected_page);
pageRef = pdf_doc->doc->getCatalog()->getPageRef(epdf_selected_page);
pageObj = xref->fetch(pageRef->num, pageRef->gen);
pageDict = pageObj.getDict();
rotate = page->getRotate();
const PDFRectangle *pagebox;
// write the Page header
pdf_puts("/Type /XObject\n");
pdf_puts("/Subtype /Form\n");
pdf_puts("/FormType 1\n");
// write additional information
if ((suppress_ptex_info & MASK_SUPPRESS_PTEX_FILENAME) == 0) {
pdf_printf("/%s.FileName (%s)\n", pdfkeyprefix,
convertStringToPDFString(pdf_doc->file_name,
strlen(pdf_doc->file_name)));
}
if ((suppress_ptex_info & MASK_SUPPRESS_PTEX_PAGENUMBER) == 0) {
pdf_printf("/%s.PageNumber %i\n", pdfkeyprefix, (int) epdf_selected_page);
}
if ((suppress_ptex_info & MASK_SUPPRESS_PTEX_INFODICT) == 0) {
info = pdf_doc->doc->getDocInfoNF().copy();
if (info.isRef()) {
// the info dict must be indirect (PDF Ref p. 61)
pdf_printf("/%s.InfoDict ", pdfkeyprefix);
pdf_printf("%d 0 R\n", addOther(info.getRef()));
}
}
// get the pagebox (media, crop...) to use.
pagebox = get_pagebox(page, epdf_page_box);
// handle page rotation
if (rotate != 0) {
if (rotate % 90 == 0) {
// this handles only the simple case: multiple of 90s but these
// are the only values allowed according to the reference
// (v1.3, p. 78).
// the image is rotated around its center.
// the /Rotate key is clockwise while the matrix is
// counterclockwise :-%
tex_printf(", page is rotated %d degrees", rotate);
switch (rotate) {
case 90:
scale[1] = -1;
scale[2] = 1;
scale[4] = pagebox->x1 - pagebox->y1;
scale[5] = pagebox->y1 + pagebox->x2;
writematrix = true;
break;
case 180:
scale[0] = scale[3] = -1;
scale[4] = pagebox->x1 + pagebox->x2;
scale[5] = pagebox->y1 + pagebox->y2;
writematrix = true;
break; // width and height are exchanged
case 270:
scale[1] = 1;
scale[2] = -1;
scale[4] = pagebox->x1 + pagebox->y2;
scale[5] = pagebox->y1 - pagebox->x1;
writematrix = true;
break;
}
if (writematrix) { // The matrix is only written if the image is rotated.
sprintf(s, "/Matrix [%.8f %.8f %.8f %.8f %.8f %.8f]\n",
scale[0],
scale[1], scale[2], scale[3], scale[4], scale[5]);
pdf_puts(stripzeros(s));
}
}
}
sprintf(s, "/BBox [%.8f %.8f %.8f %.8f]\n",
pagebox->x1, pagebox->y1, pagebox->x2, pagebox->y2);
pdf_puts(stripzeros(s));
// Metadata validity check (as a stream it must be indirect)
dictObj = pageDict->lookupNF("Metadata").copy();
if (!dictObj.isNull() && !dictObj.isRef())
pdftex_warn("PDF inclusion: /Metadata must be indirect object");
// copy selected items in Page dictionary except Resources & Group
for (i = 0; pageDictKeys[i] != NULL; i++) {
dictObj = pageDict->lookupNF(pageDictKeys[i]).copy();
if (!dictObj.isNull()) {
pdf_newline();
pdf_printf("/%s ", pageDictKeys[i]);
copyObject(&dictObj); // preserves indirection
}
}
// handle page group
dictObj = pageDict->lookupNF("Group").copy();
if (!dictObj.isNull()) {
if (pdfpagegroupval == 0) {
// another pdf with page group was included earlier on the
// same page; copy the Group entry as is. See manual for
// info on why this is a warning.
if (getpdfsuppresswarningpagegroup() == 0) {
pdftex_warn
("PDF inclusion: multiple pdfs with page group included in a single page");
}
pdf_newline();
pdf_puts("/Group ");
copyObject(&dictObj);
} else {
// write Group dict as a separate object, since the Page dict also refers to it
dictObj = pageDict->lookup("Group");
if (!dictObj.isDict())
pdftex_fail("PDF inclusion: /Group dict missing");
writeSepGroup = true;
/*
This part is only a single line
groupDict = Object(page->getGroup());
in the original patch. In this case, however, pdftex crashes at
"delete pdf_doc->doc" in "delete_document()" for inclusion of some
kind of pdf images, for example, figure_missing.pdf in gnuplot.
A change
groupDict = Object(page->getGroup()).copy();
does not improve the situation.
The changes below seem to work fine.
*/
// begin modification
groupDict = pageDict->lookup("Group");
const Dict& dic1 = page->getGroup();
const Dict& dic2 = groupDict.getDict();
// replace dic2 in groupDict with dic1
l = dic2.getLength();
for (i = 0; i < l; i++) {
groupDict.dictRemove(dic2.getKey(i));
}
l = dic1.getLength();
for (i = 0; i < l; i++) {
groupDict.dictAdd((const char *)copyString(dic1.getKey(i)),
dic1.getValNF(i).copy());
}
// end modification
pdf_printf("/Group %ld 0 R\n", (long)pdfpagegroupval);
}
}
// write the Resources dictionary
if (page->getResourceDict() == NULL) {
// Resources can be missing (files without them have been spotted
// in the wild); in which case the /Resouces of the /Page will be used.
// "This practice is not recommended".
pdftex_warn
("PDF inclusion: /Resources missing. 'This practice is not recommended' (PDF Ref)");
} else {
Object *obj1 = page->getResourceDictObject();
if (!obj1->isDict())
pdftex_fail("PDF inclusion: invalid resources dict type <%s>",
obj1->getTypeName());
pdf_newline();
pdf_puts("/Resources <<\n");
for (i = 0, l = obj1->dictGetLength(); i < l; ++i) {
obj2 = obj1->dictGetVal(i);
key = (char *)obj1->dictGetKey(i);
if (strcmp("Font", key) == 0)
copyFontResources(&obj2);
else if (strcmp("ProcSet", key) == 0)
copyProcSet(&obj2);
else
copyOtherResources(&obj2, (char *)key);
}
pdf_puts(">>\n");
}
// write the page contents
contents = page->getContents();
if (contents.isStream()) {
// Variant A: get stream and recompress under control
// of \pdfcompresslevel
//
// pdfbeginstream();
// copyStream(contents->getStream());
// pdfendstream();
// Variant B: copy stream without recompressing
//
obj1 = contents.streamGetDict()->lookup("F");
if (!obj1.isNull()) {
pdftex_fail("PDF inclusion: Unsupported external stream");
}
obj1 = contents.streamGetDict()->lookup("Length");
assert(!obj1.isNull());
pdf_puts("/Length ");
copyObject(&obj1);
pdf_puts("\n");
obj1 = contents.streamGetDict()->lookup("Filter");
if (!obj1.isNull()) {
pdf_puts("/Filter ");
copyObject(&obj1);
pdf_puts("\n");
obj1 = contents.streamGetDict()->lookup("DecodeParms");
if (!obj1.isNull()) {
pdf_puts("/DecodeParms ");
copyObject(&obj1);
pdf_puts("\n");
}
}
pdf_puts(">>\nstream\n");
copyStream(contents.getStream()->getUndecodedStream());
pdfendstream();
} else if (contents.isArray()) {
pdfbeginstream();
for (i = 0, l = contents.arrayGetLength(); i < l; ++i) {
Object contentsobj = contents.arrayGet(i);
copyStream(contentsobj.getStream());
if (i < l - 1)
pdf_newline(); // add a newline after each stream except the last
}
pdfendstream();
} else { // the contents are optional, but we need to include an empty stream
pdfbeginstream();
pdfendstream();
}
// write out all indirect objects
writeRefs();
// write out all used encodings (and delete list)
writeEncodings();
// write the Group dict if needed
if (writeSepGroup) {
pdfbeginobj(pdfpagegroupval, 2);
copyObject(&groupDict);
pdf_puts("\n");
pdfendobj();
pdfpagegroupval = 0; // only the 1st included pdf on a page gets its
// Group included in the Page dict
}
// save object list, xref
pdf_doc->inObjList = inObjList;
pdf_doc->xref = xref;
}
// Called when an image has been written and it's resources in image_tab are
// freed and it's not referenced anymore.
void epdf_delete()
{
PdfDocument *pdf_doc = (PdfDocument *) epdf_doc;
xref = pdf_doc->xref;
if (pdf_doc->occurences < 0) {
delete_document(pdf_doc);
}
}
// Called when PDF embedding system is finalized.
// Now deallocate all remaining PdfDocuments.
void epdf_check_mem()
{
if (isInit) {
PdfDocument *p, *n;
for (p = pdfDocuments; p; p = n) {
n = p->next;
delete_document(p);
}
}
}