From 134ce0560ca605e42d3c5a60e472a07a5affad5e Mon Sep 17 00:00:00 2001 From: John Cupitt Date: Sat, 25 Feb 2017 13:07:43 +0000 Subject: [PATCH] use expat for xml read we were using libxml for xml load, use expat instead, we get it for free with glib --- ChangeLog | 2 + libvips/include/vips/header.h | 8 - libvips/iofuncs/vips.c | 356 +++++++++++++++++----------------- 3 files changed, 182 insertions(+), 184 deletions(-) diff --git a/ChangeLog b/ChangeLog index a5f61e89..01e118fe 100644 --- a/ChangeLog +++ b/ChangeLog @@ -34,6 +34,8 @@ - vips_region_shrink() knows about alpha, helps dzsave and tiffsave - vips_sequential() no longer stalls ahead threads, instead we constrain sinks more tightly ... this makes seq more versatile and more reliable +- use expat, not libxml, for XML load ... removes a required dependency, since + we get expat as part of glib 8/12/16 started 8.4.5 - allow libgsf-1.14.26 to help centos, thanks tdiprima diff --git a/libvips/include/vips/header.h b/libvips/include/vips/header.h index 1ae81011..98229c7f 100644 --- a/libvips/include/vips/header.h +++ b/libvips/include/vips/header.h @@ -77,14 +77,6 @@ extern "C" { */ #define VIPS_META_ICC_NAME "icc-profile-data" -/** - * VIPS_META_XML: - * - * The original XML that was used to code the metadata after reading a VIPS - * format file. - */ -#define VIPS_META_XML "xml-header" - /** * VIPS_META_IMAGEDESCRIPTION: * diff --git a/libvips/iofuncs/vips.c b/libvips/iofuncs/vips.c index 90b76225..7c5342d9 100644 --- a/libvips/iofuncs/vips.c +++ b/libvips/iofuncs/vips.c @@ -17,6 +17,8 @@ * image.c ... this file now just does read / write to disc * 28/3/11 * - moved to vips_ namespace + * 25/2/17 + * - use expat for xml read */ /* @@ -435,9 +437,9 @@ vips__read_extension_block( VipsImage *im, int *size ) psize = image_pixel_length( im ); g_assert( im->file_length > 0 ); - if( im->file_length - psize > 10 * 1024 * 1024 ) { + if( im->file_length - psize > 100 * 1024 * 1024 ) { vips_error( "VipsImage", - "%s", _( "more than a 10 megabytes of XML? " + "%s", _( "more than 100 megabytes of XML? " "sufferin' succotash!" ) ); return( NULL ); } @@ -457,74 +459,108 @@ vips__read_extension_block( VipsImage *im, int *size ) return( buf ); } -/* Read everything after the pixels into memory. - - FIXME ... why can't we use xmlParserInputBufferCreateFd and parse - directly from the fd rather than having to read the stupid thing into - memory - - the libxml API docs are impossible to decipher - - */ -static xmlDoc * -read_xml( VipsImage *im ) +static int +parser_read_fd( XML_Parser parser, int fd ) { - void *buf; - int size; - xmlDoc *doc; - xmlNode *node; + const int chunk_size = 1024; - if( !(buf = vips__read_extension_block( im, &size )) ) - return( NULL ); - if( !(doc = xmlParseMemory( buf, size )) ) { - vips_free( buf ); - return( NULL ); - } - vips_free( buf ); - if( !(node = xmlDocGetRootElement( doc )) || - !node->nsDef || - !vips_isprefix( NAMESPACE, (char *) node->nsDef->href ) ) { - vips_error( "VipsImage", - "%s", _( "incorrect namespace in XML" ) ); - xmlFreeDoc( doc ); - return( NULL ); - } + ssize_t bytes_read; + + do { + void *buf; + + if( !(buf = XML_GetBuffer( parser, chunk_size )) ) { + vips_error( "VipsImage", + "%s", _( "unable to allocate read buffer" ) ); + return( -1 ); + } + bytes_read = read( fd, buf, chunk_size ); + if( bytes_read == (ssize_t) -1 ) { + vips_error( "VipsImage", + "%s", _( "read error while fetching XML" ) ); + return( -1 ); + } + + if( !XML_ParseBuffer( parser, bytes_read, bytes_read == 0 ) ) { + vips_error( "VipsImage", + "%s", _( "XML parse error" ) ); + return( -1 ); + } + } while( bytes_read > 0 ); + + return( 0 ); +} + +#define MAX_PARSE_ATTR (256) + +/* What we track during expat parse. + */ +typedef struct _VipsExpatParse { + VipsImage *image; + + /* TRUE for in header section. + */ + gboolean header; + + /* For the current node, the type and name. + */ + XML_Char type[MAX_PARSE_ATTR]; + XML_Char name[MAX_PARSE_ATTR]; + + /* Accumulate data here. + */ + char *data; + size_t current_size; + + gboolean error; +} VipsExpatParse; + +static void +parser_element_start_handler( void *user_data, + const XML_Char *name, const XML_Char **atts ) +{ + VipsExpatParse *vep = (VipsExpatParse *) user_data; + const XML_Char **p; #ifdef DEBUG - printf( "read_xml: namespace == %s\n", node->nsDef->href ); + printf( "parser_element_start: %s\n", name ); + for( p = atts; *p; p += 2 ) + printf( "%s = %s\n", p[0], p[1] ); #endif /*DEBUG*/ - return( doc ); + if( strcmp( name, "field" ) == 0 ) { + for( p = atts; *p; p += 2 ) { + if( strcmp( p[0], "name" ) == 0 ) + vips_strncpy( vep->name, p[1], MAX_PARSE_ATTR ); + if( strcmp( p[0], "type" ) == 0 ) + vips_strncpy( vep->type, p[1], MAX_PARSE_ATTR ); + } + + vep->current_size = 0; + } + else if( strcmp( name, "header" ) == 0 ) + vep->header = TRUE; + else if( strcmp( name, "meta" ) == 0 ) + vep->header = FALSE; + else if( strcmp( name, "root" ) == 0 ) { + for( p = atts; *p; p += 2 ) + if( strcmp( p[0], "xmlns" ) == 0 && + !vips_isprefix( NAMESPACE, p[1] ) ) { + vips_error( "VipsImage", "%s", + _( "incorrect namespace in XML" ) ); + vep->error = TRUE; + } + } } -/* Find the first child node with a name. - */ -static xmlNode * -get_node( xmlNode *base, const char *name ) +static void +parser_append( VipsExpatParse *vep, const XML_Char *data, int len ) { - xmlNode *i; + size_t new_size = vep->current_size + len; - for( i = base->children; i; i = i->next ) - if( strcmp( (char *) i->name, name ) == 0 ) - return( i ); - - return( NULL ); -} - -/* Read a string property to a buffer. TRUE for success. - */ -static int -get_sprop( xmlNode *xnode, const char *name, char *buf, int sz ) -{ - char *value = (char *) xmlGetProp( xnode, (xmlChar *) name ); - - if( !value ) - return( 0 ); - - vips_strncpy( buf, value, sz ); - VIPS_FREEF( xmlFree, value ); - - return( 1 ); + vep->data = g_realloc( vep->data, new_size ); + memcpy( vep->data + vep->current_size, data, len ); + vep->current_size = new_size; } /* Chop history into lines, add each one as a refstring. @@ -556,119 +592,76 @@ set_history( VipsImage *im, char *history ) im->history_list = g_slist_reverse( history_list ); } -/* Load header fields. - */ static int -rebuild_header_builtin( VipsImage *im, xmlNode *i ) +set_meta( VipsImage *image, GType gtype, const char *name, const char *data ) { - char name[256]; + GValue save_value = { 0 }; + GValue value = { 0 }; - if( get_sprop( i, "name", name, 256 ) ) { - if( strcmp( name, "Hist" ) == 0 ) { - char *history; + g_value_init( &save_value, VIPS_TYPE_SAVE_STRING ); + vips_value_set_save_string( &save_value, data ); - /* Have to take (another) copy, since we need to free - * with xmlFree(). + g_value_init( &value, gtype ); + if( !g_value_transform( &save_value, &value ) ) { + g_value_unset( &save_value ); + vips_error( "VipsImage", "%s", + _( "error transforming from save format" ) ); + return( -1 ); + } + + vips_image_set( image, name, &value ); + g_value_unset( &save_value ); + g_value_unset( &value ); + + return( 0 ); +} + +static void +parser_element_end_handler( void *user_data, const XML_Char *name ) +{ + VipsExpatParse *vep = (VipsExpatParse *) user_data; + +#ifdef DEBUG + printf( "parser_element_end_handler: %s\n", name ); +#endif /*DEBUG*/ + + if( strcmp( name, "field" ) == 0 ) { + parser_append( vep, "", 1 ); + +#ifdef DEBUG + printf( "parser_element_end_handler: %zd bytes\n", + vep->current_size ); +#endif /*DEBUG*/ + + if( vep->header ) { + if( strcmp( name, "Hist" ) == 0 ) + set_history( vep->image, vep->data ); + } + else { + GType gtype = g_type_from_name( vep->type ); + + /* Can we convert from VIPS_SAVE_STRING to type? */ - history = (char *) xmlNodeGetContent( i ); - set_history( im, history ); - xmlFree( history ); + if( gtype && + g_value_type_transformable( + VIPS_TYPE_SAVE_STRING, gtype ) && + set_meta( vep->image, + gtype, vep->name, vep->data ) ) + vep->error = TRUE; } } - - return( 0 ); } -/* Load meta fields. - */ -static int -rebuild_header_meta( VipsImage *im, xmlNode *i ) +static void +parser_data_handler( void *user_data, const XML_Char *data, int len ) { - char name[256]; - char type[256]; + VipsExpatParse *vep = (VipsExpatParse *) user_data; - if( get_sprop( i, "name", name, 256 ) && - get_sprop( i, "type", type, 256 ) ) { - GType gtype = g_type_from_name( type ); +#ifdef DEBUG + printf( "parser_data_handler: %d bytes\n", len ); +#endif /*DEBUG*/ - /* Can we convert from VIPS_SAVE_STRING to type? - */ - if( gtype && - g_value_type_transformable( - VIPS_TYPE_SAVE_STRING, gtype ) ) { - char *content; - GValue save_value = { 0 }; - GValue value = { 0 }; - - content = (char *) xmlNodeGetContent( i ); - g_value_init( &save_value, VIPS_TYPE_SAVE_STRING ); - vips_value_set_save_string( &save_value, content ); - xmlFree( content ); - - g_value_init( &value, gtype ); - if( !g_value_transform( &save_value, &value ) ) { - g_value_unset( &save_value ); - vips_error( "VipsImage", - "%s", _( "error transforming from " - "save format" ) ); - return( -1 ); - } - vips_image_set( im, name, &value ); - g_value_unset( &save_value ); - g_value_unset( &value ); - } - } - - return( 0 ); -} - -static xmlDoc * -get_xml( VipsImage *im ) -{ - if( vips_image_get_typeof( im, VIPS_META_XML ) ) { - xmlDoc *doc; - - if( vips_image_get_area( im, VIPS_META_XML, (void *) &doc ) ) - return( NULL ); - - return( doc ); - } - - return( NULL ); -} - -/* Rebuild header fields that depend on stuff saved in xml. - */ -static int -rebuild_header( VipsImage *im ) -{ - xmlDoc *doc; - - if( (doc = get_xml( im )) ) { - xmlNode *root; - xmlNode *block; - - if( !(root = xmlDocGetRootElement( doc )) ) - return( -1 ); - if( (block = get_node( root, "header" )) ) { - xmlNode *i; - - for( i = block->children; i; i = i->next ) - if( strcmp( (char *) i->name, "field" ) == 0 ) - if( rebuild_header_builtin( im, i ) ) - return( -1 ); - } - if( (block = get_node( root, "meta" )) ) { - xmlNode *i; - - for( i = block->children; i; i = i->next ) - if( strcmp( (char *) i->name, "field" ) == 0 ) - if( rebuild_header_meta( im, i ) ) - return( -1 ); - } - } - - return( 0 ); + parser_append( vep, data, len ); } /* Called at the end of vips open ... get any XML after the pixel data @@ -677,24 +670,35 @@ rebuild_header( VipsImage *im ) static int readhist( VipsImage *im ) { - /* Junk any old xml meta. - */ - if( vips_image_get_typeof( im, VIPS_META_XML ) ) - vips_image_set_area( im, VIPS_META_XML, NULL, NULL ); + XML_Parser parser; + VipsExpatParse vep; - if( vips__has_extension_block( im ) ) { - xmlDoc *doc; - - if( !(doc = read_xml( im )) ) - return( -1 ); - vips_image_set_area( im, VIPS_META_XML, - (VipsCallbackFn) xmlFreeDoc, doc ); - } - - if( rebuild_header( im ) ) + if( vips__seek( im->fd, image_pixel_length( im ) ) ) return( -1 ); - return( 0 ); + parser = XML_ParserCreate( "UTF-8" ); + + vep.image = im; + vep.data = NULL; + vep.current_size = 0; + vep.error = FALSE; + XML_SetUserData( parser, &vep ); + + XML_SetElementHandler( parser, + parser_element_start_handler, parser_element_end_handler ); + XML_SetCharacterDataHandler( parser, parser_data_handler ); + + if( parser_read_fd( parser, im->fd ) || + vep.error ) { + g_free( vep.data ); + XML_ParserFree( parser ); + return( -1 ); + } + + g_free( vep.data ); + XML_ParserFree( parser ); + + return( 0 ); } #define MAX_STRSIZE (32768) /* Max size of text for stack strings */