popplerload hacking

metadata, relative filenames
This commit is contained in:
John Cupitt 2016-02-08 14:41:57 +00:00
parent 69c2fe00f7
commit be574be539
3 changed files with 71 additions and 16 deletions

13
TODO
View File

@ -1,10 +1,17 @@
- pooperload URI generation needs fixing, it currently does not support
relative paths
- popplerload generates ARGB? need to do our own byteswap - popplerload generates ARGB? need to do our own byteswap
- add tests for popplerload, check docs, update c++ - add tests for popplerload, check docs, update c++
- add load PDF from buffer
- add file type sniffing .. "%PDF" should be enough
- metadata: xres/yres
- trim page edges? we often have black now
- does poppler support other formats? SVG / PS / EPS
- new vips_reduce: - new vips_reduce:
affine affine

View File

@ -25,7 +25,6 @@ TARGET_DIR=$(HTML_DIR)/$(DOC_MODULE)
SETUP_FILES = \ SETUP_FILES = \
$(content_files) \ $(content_files) \
$(expand_content_files) \
$(DOC_MAIN_SGML_FILE) \ $(DOC_MAIN_SGML_FILE) \
$(DOC_MODULE)-sections.txt \ $(DOC_MODULE)-sections.txt \
$(DOC_MODULE)-overrides.txt $(DOC_MODULE)-overrides.txt
@ -87,7 +86,7 @@ GTK_DOC_V_SETUP_0=@echo " DOC Preparing build";
setup-build.stamp: setup-build.stamp:
-$(GTK_DOC_V_SETUP)if test "$(abs_srcdir)" != "$(abs_builddir)" ; then \ -$(GTK_DOC_V_SETUP)if test "$(abs_srcdir)" != "$(abs_builddir)" ; then \
files=`echo $(SETUP_FILES) $(DOC_MODULE).types`; \ files=`echo $(SETUP_FILES) $(expand_content_files) $(DOC_MODULE).types`; \
if test "x$$files" != "x" ; then \ if test "x$$files" != "x" ; then \
for file in $$files ; do \ for file in $$files ; do \
destdir=`dirname $(abs_builddir)/$$file`; \ destdir=`dirname $(abs_builddir)/$$file`; \
@ -119,7 +118,7 @@ scan-build.stamp: setup-build.stamp $(HFILE_GLOB) $(CFILE_GLOB)
$(GTK_DOC_V_INTROSPECT)if grep -l '^..*$$' $(DOC_MODULE).types > /dev/null 2>&1 ; then \ $(GTK_DOC_V_INTROSPECT)if grep -l '^..*$$' $(DOC_MODULE).types > /dev/null 2>&1 ; then \
scanobj_options=""; \ scanobj_options=""; \
gtkdoc-scangobj 2>&1 --help | grep >/dev/null "\-\-verbose"; \ gtkdoc-scangobj 2>&1 --help | grep >/dev/null "\-\-verbose"; \
if test "$$?" = "0"; then \ if test "$(?)" = "0"; then \
if test "x$(V)" = "x1"; then \ if test "x$(V)" = "x1"; then \
scanobj_options="--verbose"; \ scanobj_options="--verbose"; \
fi; \ fi; \
@ -163,17 +162,17 @@ GTK_DOC_V_XREF=$(GTK_DOC_V_XREF_$(V))
GTK_DOC_V_XREF_=$(GTK_DOC_V_XREF_$(AM_DEFAULT_VERBOSITY)) GTK_DOC_V_XREF_=$(GTK_DOC_V_XREF_$(AM_DEFAULT_VERBOSITY))
GTK_DOC_V_XREF_0=@echo " DOC Fixing cross-references"; GTK_DOC_V_XREF_0=@echo " DOC Fixing cross-references";
html-build.stamp: sgml.stamp $(DOC_MAIN_SGML_FILE) $(content_files) $(expand_content_files) html-build.stamp: sgml.stamp $(DOC_MAIN_SGML_FILE) $(content_files)
$(GTK_DOC_V_HTML)rm -rf html && mkdir html && \ $(GTK_DOC_V_HTML)rm -rf html && mkdir html && \
mkhtml_options=""; \ mkhtml_options=""; \
gtkdoc-mkhtml 2>&1 --help | grep >/dev/null "\-\-verbose"; \ gtkdoc-mkhtml 2>&1 --help | grep >/dev/null "\-\-verbose"; \
if test "$$?" = "0"; then \ if test "$(?)" = "0"; then \
if test "x$(V)" = "x1"; then \ if test "x$(V)" = "x1"; then \
mkhtml_options="$$mkhtml_options --verbose"; \ mkhtml_options="$$mkhtml_options --verbose"; \
fi; \ fi; \
fi; \ fi; \
gtkdoc-mkhtml 2>&1 --help | grep >/dev/null "\-\-path"; \ gtkdoc-mkhtml 2>&1 --help | grep >/dev/null "\-\-path"; \
if test "$$?" = "0"; then \ if test "$(?)" = "0"; then \
mkhtml_options="$$mkhtml_options --path=\"$(abs_srcdir)\""; \ mkhtml_options="$$mkhtml_options --path=\"$(abs_srcdir)\""; \
fi; \ fi; \
cd html && gtkdoc-mkhtml $$mkhtml_options $(MKHTML_OPTIONS) $(DOC_MODULE) ../$(DOC_MAIN_SGML_FILE) cd html && gtkdoc-mkhtml $$mkhtml_options $(MKHTML_OPTIONS) $(DOC_MODULE) ../$(DOC_MAIN_SGML_FILE)
@ -195,11 +194,11 @@ GTK_DOC_V_PDF=$(GTK_DOC_V_PDF_$(V))
GTK_DOC_V_PDF_=$(GTK_DOC_V_PDF_$(AM_DEFAULT_VERBOSITY)) GTK_DOC_V_PDF_=$(GTK_DOC_V_PDF_$(AM_DEFAULT_VERBOSITY))
GTK_DOC_V_PDF_0=@echo " DOC Building PDF"; GTK_DOC_V_PDF_0=@echo " DOC Building PDF";
pdf-build.stamp: sgml.stamp $(DOC_MAIN_SGML_FILE) $(content_files) $(expand_content_files) pdf-build.stamp: sgml.stamp $(DOC_MAIN_SGML_FILE) $(content_files)
$(GTK_DOC_V_PDF)rm -f $(DOC_MODULE).pdf && \ $(GTK_DOC_V_PDF)rm -f $(DOC_MODULE).pdf && \
mkpdf_options=""; \ mkpdf_options=""; \
gtkdoc-mkpdf 2>&1 --help | grep >/dev/null "\-\-verbose"; \ gtkdoc-mkpdf 2>&1 --help | grep >/dev/null "\-\-verbose"; \
if test "$$?" = "0"; then \ if test "$(?)" = "0"; then \
if test "x$(V)" = "x1"; then \ if test "x$(V)" = "x1"; then \
mkpdf_options="$$mkpdf_options --verbose"; \ mkpdf_options="$$mkpdf_options --verbose"; \
fi; \ fi; \
@ -224,15 +223,12 @@ clean-local:
@if echo $(SCAN_OPTIONS) | grep -q "\-\-rebuild-types" ; then \ @if echo $(SCAN_OPTIONS) | grep -q "\-\-rebuild-types" ; then \
rm -f $(DOC_MODULE).types; \ rm -f $(DOC_MODULE).types; \
fi fi
@if echo $(SCAN_OPTIONS) | grep -q "\-\-rebuild-sections" ; then \
rm -f $(DOC_MODULE)-sections.txt; \
fi
distclean-local: distclean-local:
@rm -rf xml html $(REPORT_FILES) $(DOC_MODULE).pdf \ @rm -rf xml html $(REPORT_FILES) $(DOC_MODULE).pdf \
$(DOC_MODULE)-decl-list.txt $(DOC_MODULE)-decl.txt $(DOC_MODULE)-decl-list.txt $(DOC_MODULE)-decl.txt
@if test "$(abs_srcdir)" != "$(abs_builddir)" ; then \ @if test "$(abs_srcdir)" != "$(abs_builddir)" ; then \
rm -f $(SETUP_FILES) $(DOC_MODULE).types; \ rm -f $(SETUP_FILES) $(expand_content_files) $(DOC_MODULE).types; \
fi fi
maintainer-clean-local: maintainer-clean-local:

View File

@ -45,6 +45,7 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <errno.h>
#include <vips/vips.h> #include <vips/vips.h>
#include <vips/buf.h> #include <vips/buf.h>
@ -110,15 +111,38 @@ vips_foreign_load_poppler_get_flags( VipsForeignLoad *load )
return( VIPS_FOREIGN_PARTIAL ); return( VIPS_FOREIGN_PARTIAL );
} }
/* String-based metadatra fields we extract.
*/
typedef struct _VipsForeignLoadPopperMetadata {
char *(*poppler_fetch)( PopplerDocument *doc );
char *field;
} VipsForeignLoadPopperMetadata;
static VipsForeignLoadPopperMetadata vips_foreign_load_poppler_metadata[] = {
{ poppler_document_get_title, "poppler-title" },
{ poppler_document_get_author, "poppler-author" },
{ poppler_document_get_subject, "poppler-subject" },
{ poppler_document_get_keywords, "poppler-keywords" },
{ poppler_document_get_creator, "poppler-creator" },
{ poppler_document_get_producer, "poppler-producer" },
{ poppler_document_get_metadata, "poppler-metadata" },
};
static int n_metadata = VIPS_NUMBER( vips_foreign_load_poppler_metadata );
static void static void
vips_foreign_load_poppler_parse( VipsForeignLoadPoppler *poppler, vips_foreign_load_poppler_parse( VipsForeignLoadPoppler *poppler,
VipsImage *out ) VipsImage *out )
{ {
PopplerRectangle crop_box;
double width; double width;
double height; double height;
int i;
char *str;
poppler_page_get_size( poppler->page, &width, &height ); poppler_page_get_size( poppler->page, &width, &height );
poppler_page_get_crop_box( poppler->page, &crop_box );
vips_image_init_fields( out, vips_image_init_fields( out,
width * poppler->scale, height * poppler->scale, width * poppler->scale, height * poppler->scale,
4, VIPS_FORMAT_UCHAR, 4, VIPS_FORMAT_UCHAR,
@ -129,6 +153,21 @@ vips_foreign_load_poppler_parse( VipsForeignLoadPoppler *poppler,
/* We render to a linecache, so fat strips work well. /* We render to a linecache, so fat strips work well.
*/ */
vips_image_pipelinev( out, VIPS_DEMAND_STYLE_FATSTRIP, NULL ); vips_image_pipelinev( out, VIPS_DEMAND_STYLE_FATSTRIP, NULL );
/* Extract and attach metadata.
*/
vips_image_set_int( out, "poppler-n_pages",
poppler_document_get_n_pages( poppler->doc ) );
for( i = 0; i < n_metadata; i++ ) {
VipsForeignLoadPopperMetadata *metadata =
&vips_foreign_load_poppler_metadata[i];
if( (str = metadata->poppler_fetch( poppler->doc )) ) {
vips_image_set_string( out, metadata->field, str );
g_free( str );
}
}
} }
static int static int
@ -137,11 +176,24 @@ vips_foreign_load_poppler_header( VipsForeignLoad *load )
VipsObjectClass *class = VIPS_OBJECT_GET_CLASS( load ); VipsObjectClass *class = VIPS_OBJECT_GET_CLASS( load );
VipsForeignLoadPoppler *poppler = (VipsForeignLoadPoppler *) load; VipsForeignLoadPoppler *poppler = (VipsForeignLoadPoppler *) load;
char *path;
GError *error = NULL; GError *error = NULL;
poppler->scale = poppler->dpi / 72.0; poppler->scale = poppler->dpi / 72.0;
poppler->uri = g_strdup_printf( "file://%s", poppler->filename ); /* We need an absolute path for a URI.
*/
if( !(path = realpath( poppler->filename, NULL )) ) {
vips_error_system( errno, class->nickname,
"%s", _( "unable to form filename" ) );
return( -1 );
}
if( !(poppler->uri = g_filename_to_uri( path, NULL, &error )) ) {
free( path );
vips_g_error( &error );
return( -1 );
}
free( path );
if( !(poppler->doc = poppler_document_new_from_file( if( !(poppler->doc = poppler_document_new_from_file(
poppler->uri, NULL, &error )) ) { poppler->uri, NULL, &error )) ) {