From 50288c5f18d2285dadf9853d6cf7a6a9ceb8abfc Mon Sep 17 00:00:00 2001 From: John Cupitt Date: Fri, 16 Oct 2020 16:57:13 +0100 Subject: [PATCH] add pdfium load from source --- ChangeLog | 1 + libvips/foreign/foreign.c | 3 +- libvips/foreign/pdfiumload.c | 239 ++++++++++++++++++++++++++++------- libvips/foreign/pdfload.c | 3 +- 4 files changed, 195 insertions(+), 51 deletions(-) diff --git a/ChangeLog b/ChangeLog index 319e793d..a9bcfa29 100644 --- a/ChangeLog +++ b/ChangeLog @@ -8,6 +8,7 @@ - have a lock just for pdfium [DarthSim] - better GraphicsMagick image write [bfriesen] - get pdfium load building again [Projkt-James] +- add _source load support for pdfium 6/9/20 started 8.10.2 - update magicksave/load profile handling [kelilevi] diff --git a/libvips/foreign/foreign.c b/libvips/foreign/foreign.c index 37e92768..2e6b29d2 100644 --- a/libvips/foreign/foreign.c +++ b/libvips/foreign/foreign.c @@ -2174,7 +2174,7 @@ vips_foreign_operation_init( void ) vips_foreign_save_rad_target_get_type(); #endif /*HAVE_RADIANCE*/ -#if defined(HAVE_POPPLER) +#ifdef HAVE_POPPLER vips_foreign_load_pdf_file_get_type(); vips_foreign_load_pdf_buffer_get_type(); vips_foreign_load_pdf_source_get_type(); @@ -2183,6 +2183,7 @@ vips_foreign_operation_init( void ) #ifdef HAVE_PDFIUM vips_foreign_load_pdf_file_get_type(); vips_foreign_load_pdf_buffer_get_type(); + vips_foreign_load_pdf_source_get_type(); #endif /*HAVE_PDFIUM*/ #ifdef HAVE_RSVG diff --git a/libvips/foreign/pdfiumload.c b/libvips/foreign/pdfiumload.c index cbc34221..d0b46463 100644 --- a/libvips/foreign/pdfiumload.c +++ b/libvips/foreign/pdfiumload.c @@ -10,6 +10,8 @@ * - add locks, since pdfium is not threadsafe in any way * 13/10/20 * - have a lock just for pdfium [DarthSim] + * - update for current pdfium + * - add _source input */ /* @@ -39,6 +41,13 @@ */ +/* TODO + * + * - speed up BRGA -> RGBA conversion + * - what about filename encodings? + * - need to test on Windows + */ + /* How to build against PDFium: * * Download the prebuilt binary from: @@ -70,14 +79,6 @@ EOF * */ -/* TODO - * - * - needs the reopen-after-minimise system that pdfload has, but we'll need - * to be able to actually build and test this before we can do that - * - what about filename encodings - * - need to test on Windows - */ - /* #define DEBUG */ @@ -106,6 +107,10 @@ EOF typedef struct _VipsForeignLoadPdf { VipsForeignLoad parent_object; + /* Set by subclasses. + */ + VipsSource *source; + /* Load this page. */ int page_no; @@ -126,6 +131,7 @@ typedef struct _VipsForeignLoadPdf { */ VipsArrayDouble *background; + FPDF_FILEACCESS file_access; FPDF_DOCUMENT doc; FPDF_PAGE page; int current_page; @@ -212,18 +218,82 @@ vips_pdfium_init_cb( void *dummy ) return( NULL ); } +/* This is the m_GetBlock function for FPDF_FILEACCESS. + */ +static gboolean +vips_pdfium_GetBlock( void *param, + unsigned long position, unsigned char *pBuf, unsigned long size ) +{ + VipsForeignLoadPdf *pdf = (VipsForeignLoadPdf *) param; + + /* PDFium guarantees these. + */ + g_assert( size > 0 ); + g_assert( position >= 0 ); + g_assert( position + size <= pdf->file_access.m_FileLen ); + + if( vips_source_seek( pdf->source, position, SEEK_SET ) < 0 ) + return( FALSE ); + + while( size > 0 ) { + size_t n_read; + + if( (n_read = vips_source_read( pdf->source, pBuf, size )) < 0 ) + return( FALSE ); + pBuf += n_read; + size -= n_read; + } + + return( TRUE ); +} + static int vips_foreign_load_pdf_build( VipsObject *object ) { static GOnce once = G_ONCE_INIT; VipsForeignLoadPdf *pdf = (VipsForeignLoadPdf *) object; + VipsObjectClass *class = VIPS_OBJECT_GET_CLASS( pdf ); + + gint64 length; VIPS_ONCE( &once, vips_pdfium_init_cb, NULL ); if( !vips_object_argument_isset( object, "scale" ) ) pdf->scale = pdf->dpi / 72.0; + /* pdfium must know the file length, unfortunately. + */ + if( pdf->source ) { + if( (length = vips_source_length( pdf->source )) <= 0 ) + return( -1 ); + if( length > 1 << 30 ) { + vips_error( class->nickname, + _( "%s: too large for pdfium" ), + vips_connection_nick( + VIPS_CONNECTION( pdf->source ) ) ); + return( -1 ); + } + pdf->file_access.m_FileLen = length; + pdf->file_access.m_GetBlock = vips_pdfium_GetBlock; + pdf->file_access.m_Param = pdf; + + g_mutex_lock( vips_pdfium_mutex ); + + if( !(pdf->doc = FPDF_LoadCustomDocument( &pdf->file_access, + NULL )) ) { + g_mutex_unlock( vips_pdfium_mutex ); + vips_pdfium_error(); + vips_error( "pdfload", + _( "%s: unable to load" ), + vips_connection_nick( + VIPS_CONNECTION( pdf->source ) ) ); + return( -1 ); + } + + g_mutex_unlock( vips_pdfium_mutex ); + } + if( VIPS_OBJECT_CLASS( vips_foreign_load_pdf_parent_class )-> build( object ) ) return( -1 ); @@ -313,6 +383,8 @@ vips_foreign_load_pdf_set_image( VipsForeignLoadPdf *pdf, VipsImage *out ) vips_image_set_int( out, "pdf-n_pages", pdf->n_pages ); vips_image_set_int( out, VIPS_META_N_PAGES, pdf->n_pages ); + g_mutex_lock( vips_pdfium_mutex ); + for( i = 0; i < n_metadata; i++ ) { VipsForeignLoadPdfMetadata *metadata = &vips_foreign_load_pdf_metadata[i]; @@ -320,9 +392,7 @@ vips_foreign_load_pdf_set_image( VipsForeignLoadPdf *pdf, VipsImage *out ) char text[1024]; int len; - g_mutex_lock( vips_pdfium_mutex ); len = FPDF_GetMetaText( pdf->doc, metadata->tag, text, 1024 ); - g_mutex_unlock( vips_pdfium_mutex ); if( len > 0 ) { char *str; @@ -337,6 +407,8 @@ vips_foreign_load_pdf_set_image( VipsForeignLoadPdf *pdf, VipsImage *out ) } } + g_mutex_unlock( vips_pdfium_mutex ); + /* We need pixels/mm for vips. */ res = pdf->dpi / 25.4; @@ -435,10 +507,7 @@ vips_foreign_load_pdf_header( VipsForeignLoad *load ) static void vips_foreign_load_pdf_minimise( VipsObject *object, VipsForeignLoadPdf *pdf ) { - /* In seq mode, we can shut down the input at the end of computation. - */ - if( VIPS_FOREIGN_LOAD( pdf )->access == VIPS_ACCESS_SEQUENTIAL ) - vips_foreign_load_pdf_close( pdf ); + vips_source_minimise( pdf->source ); } static int @@ -502,7 +571,8 @@ vips_foreign_load_pdf_generate( VipsRegion *or, /* PDFium writes BRGA, we must swap. * - * FIXME ... this is slow. + * FIXME ... this is slow, try using vips__cairo2rgba()? Do we need to + * unpremultiply as well? */ for( y = 0; y < r->height; y++ ) { VipsPel *p; @@ -583,12 +653,13 @@ vips_foreign_load_pdf_class_init( VipsForeignLoadPdfClass *class ) gobject_class->get_property = vips_object_get_property; object_class->nickname = "pdfload_base"; - object_class->description = _( "load PDF with libpoppler" ); + object_class->description = _( "load PDF with PDFium" ); object_class->build = vips_foreign_load_pdf_build; load_class->get_flags_filename = vips_foreign_load_pdf_get_flags_filename; load_class->get_flags = vips_foreign_load_pdf_get_flags; + load_class->header = vips_foreign_load_pdf_header; load_class->load = vips_foreign_load_pdf_load; VIPS_ARG_INT( class, "page", 10, @@ -655,24 +726,12 @@ G_DEFINE_TYPE( VipsForeignLoadPdfFile, vips_foreign_load_pdf_file, static int vips_foreign_load_pdf_file_header( VipsForeignLoad *load ) { - VipsForeignLoadPdf *pdf = (VipsForeignLoadPdf *) load; VipsForeignLoadPdfFile *file = (VipsForeignLoadPdfFile *) load; - g_mutex_lock( vips_pdfium_mutex ); - - if( !(pdf->doc = FPDF_LoadDocument( file->filename, NULL )) ) { - g_mutex_unlock( vips_pdfium_mutex ); - vips_pdfium_error(); - vips_error( "pdfload", - _( "unable to load \"%s\"" ), file->filename ); - return( -1 ); - } - - g_mutex_unlock( vips_pdfium_mutex ); - VIPS_SETSTR( load->out->filename, file->filename ); - return( vips_foreign_load_pdf_header( load ) ); + return( VIPS_FOREIGN_LOAD_CLASS( + vips_foreign_load_pdf_file_parent_class )->header( load ) ); } static const char *vips_foreign_pdf_suffs[] = { @@ -680,6 +739,24 @@ static const char *vips_foreign_pdf_suffs[] = { NULL }; +static int +vips_foreign_load_pdf_file_build( VipsObject *object ) +{ + VipsForeignLoadPdf *pdf = (VipsForeignLoadPdf *) object; + VipsForeignLoadPdfFile *file = (VipsForeignLoadPdfFile *) pdf; + +#ifdef DEBUG + printf( "vips_foreign_load_pdf_file_build: %s\n", file->filename ); +#endif /*DEBUG*/ + + if( file->filename && + !(pdf->source = vips_source_new_from_file( file->filename )) ) + return( -1 ); + + return( VIPS_OBJECT_CLASS( vips_foreign_load_pdf_file_parent_class )-> + build( object ) ); +} + static void vips_foreign_load_pdf_file_class_init( VipsForeignLoadPdfFileClass *class ) @@ -693,6 +770,8 @@ vips_foreign_load_pdf_file_class_init( gobject_class->get_property = vips_object_get_property; object_class->nickname = "pdfload"; + object_class->description = _( "load PDF from file" ); + object_class->build = vips_foreign_load_pdf_file_build; foreign_class->suffs = vips_foreign_pdf_suffs; @@ -728,26 +807,19 @@ G_DEFINE_TYPE( VipsForeignLoadPdfBuffer, vips_foreign_load_pdf_buffer, vips_foreign_load_pdf_get_type() ); static int -vips_foreign_load_pdf_buffer_header( VipsForeignLoad *load ) +vips_foreign_load_pdf_buffer_build( VipsObject *object ) { - VipsForeignLoadPdf *pdf = (VipsForeignLoadPdf *) load; - VipsForeignLoadPdfBuffer *buffer = - (VipsForeignLoadPdfBuffer *) load; + VipsForeignLoadPdf *pdf = (VipsForeignLoadPdf *) object; + VipsForeignLoadPdfBuffer *buffer = (VipsForeignLoadPdfBuffer *) pdf; - g_mutex_lock( vips_pdfium_mutex ); + if( buffer->buf && + !(pdf->source = vips_source_new_from_memory( + VIPS_AREA( buffer->buf )->data, + VIPS_AREA( buffer->buf )->length )) ) + return( -1 ); - if( !(pdf->doc = FPDF_LoadMemDocument( buffer->buf->data, - buffer->buf->length, NULL )) ) { - g_mutex_unlock( vips_pdfium_mutex ); - vips_pdfium_error(); - vips_error( "pdfload", - "%s", _( "unable to load from buffer" ) ); - return( -1 ); - } - - g_mutex_unlock( vips_pdfium_mutex ); - - return( vips_foreign_load_pdf_header( load ) ); + return( VIPS_OBJECT_CLASS( vips_foreign_load_pdf_buffer_parent_class )-> + build( object ) ); } static void @@ -762,9 +834,10 @@ vips_foreign_load_pdf_buffer_class_init( gobject_class->get_property = vips_object_get_property; object_class->nickname = "pdfload_buffer"; + object_class->description = _( "load PDF from buffer" ); + object_class->build = vips_foreign_load_pdf_buffer_build; load_class->is_a_buffer = vips_foreign_load_pdf_is_a_buffer; - load_class->header = vips_foreign_load_pdf_buffer_header; VIPS_ARG_BOXED( class, "buffer", 1, _( "Buffer" ), @@ -780,4 +853,74 @@ vips_foreign_load_pdf_buffer_init( VipsForeignLoadPdfBuffer *buffer ) { } +typedef struct _VipsForeignLoadPdfSource { + VipsForeignLoadPdf parent_object; + + VipsSource *source; + +} VipsForeignLoadPdfSource; + +typedef VipsForeignLoadPdfClass VipsForeignLoadPdfSourceClass; + +G_DEFINE_TYPE( VipsForeignLoadPdfSource, vips_foreign_load_pdf_source, + vips_foreign_load_pdf_get_type() ); + +static int +vips_foreign_load_pdf_source_build( VipsObject *object ) +{ + VipsForeignLoadPdf *pdf = (VipsForeignLoadPdf *) object; + VipsForeignLoadPdfSource *source = (VipsForeignLoadPdfSource *) pdf; + + if( source->source ) { + pdf->source = source->source; + g_object_ref( pdf->source ); + } + + return( VIPS_OBJECT_CLASS( vips_foreign_load_pdf_source_parent_class )-> + build( object ) ); +} + +static gboolean +vips_foreign_load_pdf_source_is_a_source( VipsSource *source ) +{ + const unsigned char *p; + + return( (p = vips_source_sniff( source, 4 )) && + p[0] == '%' && + p[1] == 'P' && + p[2] == 'D' && + p[3] == 'F' ); +} + +static void +vips_foreign_load_pdf_source_class_init( + VipsForeignLoadPdfSourceClass *class ) +{ + GObjectClass *gobject_class = G_OBJECT_CLASS( class ); + VipsObjectClass *object_class = (VipsObjectClass *) class; + VipsForeignLoadClass *load_class = (VipsForeignLoadClass *) class; + + gobject_class->set_property = vips_object_set_property; + gobject_class->get_property = vips_object_get_property; + + object_class->nickname = "pdfload_source"; + object_class->description = _( "load PDF from source" ); + object_class->build = vips_foreign_load_pdf_source_build; + + load_class->is_a_source = vips_foreign_load_pdf_source_is_a_source; + + VIPS_ARG_OBJECT( class, "source", 1, + _( "Source" ), + _( "Source to load from" ), + VIPS_ARGUMENT_REQUIRED_INPUT, + G_STRUCT_OFFSET( VipsForeignLoadPdfSource, source ), + VIPS_TYPE_SOURCE ); + +} + +static void +vips_foreign_load_pdf_source_init( VipsForeignLoadPdfSource *source ) +{ +} + #endif /*HAVE_PDFIUM*/ diff --git a/libvips/foreign/pdfload.c b/libvips/foreign/pdfload.c index 7729a8b0..831ce2e3 100644 --- a/libvips/foreign/pdfload.c +++ b/libvips/foreign/pdfload.c @@ -220,8 +220,7 @@ vips_foreign_load_pdf_get_page( VipsForeignLoadPdf *pdf, int page_no ) printf( "vips_foreign_load_pdf_get_page: %d\n", page_no ); #endif /*DEBUG*/ - if( !(pdf->page = poppler_document_get_page( pdf->doc, - page_no )) ) { + if( !(pdf->page = poppler_document_get_page( pdf->doc, page_no )) ) { vips_error( class->nickname, _( "unable to load page %d" ), page_no ); return( -1 );