use expat for xml read

we were using libxml for xml load, use expat instead, we get it for free
with glib
This commit is contained in:
John Cupitt 2017-02-25 13:07:43 +00:00
parent dc2b567ee2
commit 134ce0560c
3 changed files with 182 additions and 184 deletions

View File

@ -34,6 +34,8 @@
- vips_region_shrink() knows about alpha, helps dzsave and tiffsave
- vips_sequential() no longer stalls ahead threads, instead we constrain sinks
more tightly ... this makes seq more versatile and more reliable
- use expat, not libxml, for XML load ... removes a required dependency, since
we get expat as part of glib
8/12/16 started 8.4.5
- allow libgsf-1.14.26 to help centos, thanks tdiprima

View File

@ -77,14 +77,6 @@ extern "C" {
*/
#define VIPS_META_ICC_NAME "icc-profile-data"
/**
* VIPS_META_XML:
*
* The original XML that was used to code the metadata after reading a VIPS
* format file.
*/
#define VIPS_META_XML "xml-header"
/**
* VIPS_META_IMAGEDESCRIPTION:
*

View File

@ -17,6 +17,8 @@
* image.c ... this file now just does read / write to disc
* 28/3/11
* - moved to vips_ namespace
* 25/2/17
* - use expat for xml read
*/
/*
@ -435,9 +437,9 @@ vips__read_extension_block( VipsImage *im, int *size )
psize = image_pixel_length( im );
g_assert( im->file_length > 0 );
if( im->file_length - psize > 10 * 1024 * 1024 ) {
if( im->file_length - psize > 100 * 1024 * 1024 ) {
vips_error( "VipsImage",
"%s", _( "more than a 10 megabytes of XML? "
"%s", _( "more than 100 megabytes of XML? "
"sufferin' succotash!" ) );
return( NULL );
}
@ -457,74 +459,108 @@ vips__read_extension_block( VipsImage *im, int *size )
return( buf );
}
/* Read everything after the pixels into memory.
FIXME ... why can't we use xmlParserInputBufferCreateFd and parse
directly from the fd rather than having to read the stupid thing into
memory
the libxml API docs are impossible to decipher
*/
static xmlDoc *
read_xml( VipsImage *im )
static int
parser_read_fd( XML_Parser parser, int fd )
{
void *buf;
int size;
xmlDoc *doc;
xmlNode *node;
const int chunk_size = 1024;
if( !(buf = vips__read_extension_block( im, &size )) )
return( NULL );
if( !(doc = xmlParseMemory( buf, size )) ) {
vips_free( buf );
return( NULL );
}
vips_free( buf );
if( !(node = xmlDocGetRootElement( doc )) ||
!node->nsDef ||
!vips_isprefix( NAMESPACE, (char *) node->nsDef->href ) ) {
vips_error( "VipsImage",
"%s", _( "incorrect namespace in XML" ) );
xmlFreeDoc( doc );
return( NULL );
}
ssize_t bytes_read;
do {
void *buf;
if( !(buf = XML_GetBuffer( parser, chunk_size )) ) {
vips_error( "VipsImage",
"%s", _( "unable to allocate read buffer" ) );
return( -1 );
}
bytes_read = read( fd, buf, chunk_size );
if( bytes_read == (ssize_t) -1 ) {
vips_error( "VipsImage",
"%s", _( "read error while fetching XML" ) );
return( -1 );
}
if( !XML_ParseBuffer( parser, bytes_read, bytes_read == 0 ) ) {
vips_error( "VipsImage",
"%s", _( "XML parse error" ) );
return( -1 );
}
} while( bytes_read > 0 );
return( 0 );
}
#define MAX_PARSE_ATTR (256)
/* What we track during expat parse.
*/
typedef struct _VipsExpatParse {
VipsImage *image;
/* TRUE for in header section.
*/
gboolean header;
/* For the current node, the type and name.
*/
XML_Char type[MAX_PARSE_ATTR];
XML_Char name[MAX_PARSE_ATTR];
/* Accumulate data here.
*/
char *data;
size_t current_size;
gboolean error;
} VipsExpatParse;
static void
parser_element_start_handler( void *user_data,
const XML_Char *name, const XML_Char **atts )
{
VipsExpatParse *vep = (VipsExpatParse *) user_data;
const XML_Char **p;
#ifdef DEBUG
printf( "read_xml: namespace == %s\n", node->nsDef->href );
printf( "parser_element_start: %s\n", name );
for( p = atts; *p; p += 2 )
printf( "%s = %s\n", p[0], p[1] );
#endif /*DEBUG*/
return( doc );
if( strcmp( name, "field" ) == 0 ) {
for( p = atts; *p; p += 2 ) {
if( strcmp( p[0], "name" ) == 0 )
vips_strncpy( vep->name, p[1], MAX_PARSE_ATTR );
if( strcmp( p[0], "type" ) == 0 )
vips_strncpy( vep->type, p[1], MAX_PARSE_ATTR );
}
vep->current_size = 0;
}
else if( strcmp( name, "header" ) == 0 )
vep->header = TRUE;
else if( strcmp( name, "meta" ) == 0 )
vep->header = FALSE;
else if( strcmp( name, "root" ) == 0 ) {
for( p = atts; *p; p += 2 )
if( strcmp( p[0], "xmlns" ) == 0 &&
!vips_isprefix( NAMESPACE, p[1] ) ) {
vips_error( "VipsImage", "%s",
_( "incorrect namespace in XML" ) );
vep->error = TRUE;
}
}
}
/* Find the first child node with a name.
*/
static xmlNode *
get_node( xmlNode *base, const char *name )
static void
parser_append( VipsExpatParse *vep, const XML_Char *data, int len )
{
xmlNode *i;
size_t new_size = vep->current_size + len;
for( i = base->children; i; i = i->next )
if( strcmp( (char *) i->name, name ) == 0 )
return( i );
return( NULL );
}
/* Read a string property to a buffer. TRUE for success.
*/
static int
get_sprop( xmlNode *xnode, const char *name, char *buf, int sz )
{
char *value = (char *) xmlGetProp( xnode, (xmlChar *) name );
if( !value )
return( 0 );
vips_strncpy( buf, value, sz );
VIPS_FREEF( xmlFree, value );
return( 1 );
vep->data = g_realloc( vep->data, new_size );
memcpy( vep->data + vep->current_size, data, len );
vep->current_size = new_size;
}
/* Chop history into lines, add each one as a refstring.
@ -556,119 +592,76 @@ set_history( VipsImage *im, char *history )
im->history_list = g_slist_reverse( history_list );
}
/* Load header fields.
*/
static int
rebuild_header_builtin( VipsImage *im, xmlNode *i )
set_meta( VipsImage *image, GType gtype, const char *name, const char *data )
{
char name[256];
GValue save_value = { 0 };
GValue value = { 0 };
if( get_sprop( i, "name", name, 256 ) ) {
if( strcmp( name, "Hist" ) == 0 ) {
char *history;
g_value_init( &save_value, VIPS_TYPE_SAVE_STRING );
vips_value_set_save_string( &save_value, data );
/* Have to take (another) copy, since we need to free
* with xmlFree().
g_value_init( &value, gtype );
if( !g_value_transform( &save_value, &value ) ) {
g_value_unset( &save_value );
vips_error( "VipsImage", "%s",
_( "error transforming from save format" ) );
return( -1 );
}
vips_image_set( image, name, &value );
g_value_unset( &save_value );
g_value_unset( &value );
return( 0 );
}
static void
parser_element_end_handler( void *user_data, const XML_Char *name )
{
VipsExpatParse *vep = (VipsExpatParse *) user_data;
#ifdef DEBUG
printf( "parser_element_end_handler: %s\n", name );
#endif /*DEBUG*/
if( strcmp( name, "field" ) == 0 ) {
parser_append( vep, "", 1 );
#ifdef DEBUG
printf( "parser_element_end_handler: %zd bytes\n",
vep->current_size );
#endif /*DEBUG*/
if( vep->header ) {
if( strcmp( name, "Hist" ) == 0 )
set_history( vep->image, vep->data );
}
else {
GType gtype = g_type_from_name( vep->type );
/* Can we convert from VIPS_SAVE_STRING to type?
*/
history = (char *) xmlNodeGetContent( i );
set_history( im, history );
xmlFree( history );
if( gtype &&
g_value_type_transformable(
VIPS_TYPE_SAVE_STRING, gtype ) &&
set_meta( vep->image,
gtype, vep->name, vep->data ) )
vep->error = TRUE;
}
}
return( 0 );
}
/* Load meta fields.
*/
static int
rebuild_header_meta( VipsImage *im, xmlNode *i )
static void
parser_data_handler( void *user_data, const XML_Char *data, int len )
{
char name[256];
char type[256];
VipsExpatParse *vep = (VipsExpatParse *) user_data;
if( get_sprop( i, "name", name, 256 ) &&
get_sprop( i, "type", type, 256 ) ) {
GType gtype = g_type_from_name( type );
#ifdef DEBUG
printf( "parser_data_handler: %d bytes\n", len );
#endif /*DEBUG*/
/* Can we convert from VIPS_SAVE_STRING to type?
*/
if( gtype &&
g_value_type_transformable(
VIPS_TYPE_SAVE_STRING, gtype ) ) {
char *content;
GValue save_value = { 0 };
GValue value = { 0 };
content = (char *) xmlNodeGetContent( i );
g_value_init( &save_value, VIPS_TYPE_SAVE_STRING );
vips_value_set_save_string( &save_value, content );
xmlFree( content );
g_value_init( &value, gtype );
if( !g_value_transform( &save_value, &value ) ) {
g_value_unset( &save_value );
vips_error( "VipsImage",
"%s", _( "error transforming from "
"save format" ) );
return( -1 );
}
vips_image_set( im, name, &value );
g_value_unset( &save_value );
g_value_unset( &value );
}
}
return( 0 );
}
static xmlDoc *
get_xml( VipsImage *im )
{
if( vips_image_get_typeof( im, VIPS_META_XML ) ) {
xmlDoc *doc;
if( vips_image_get_area( im, VIPS_META_XML, (void *) &doc ) )
return( NULL );
return( doc );
}
return( NULL );
}
/* Rebuild header fields that depend on stuff saved in xml.
*/
static int
rebuild_header( VipsImage *im )
{
xmlDoc *doc;
if( (doc = get_xml( im )) ) {
xmlNode *root;
xmlNode *block;
if( !(root = xmlDocGetRootElement( doc )) )
return( -1 );
if( (block = get_node( root, "header" )) ) {
xmlNode *i;
for( i = block->children; i; i = i->next )
if( strcmp( (char *) i->name, "field" ) == 0 )
if( rebuild_header_builtin( im, i ) )
return( -1 );
}
if( (block = get_node( root, "meta" )) ) {
xmlNode *i;
for( i = block->children; i; i = i->next )
if( strcmp( (char *) i->name, "field" ) == 0 )
if( rebuild_header_meta( im, i ) )
return( -1 );
}
}
return( 0 );
parser_append( vep, data, len );
}
/* Called at the end of vips open ... get any XML after the pixel data
@ -677,24 +670,35 @@ rebuild_header( VipsImage *im )
static int
readhist( VipsImage *im )
{
/* Junk any old xml meta.
*/
if( vips_image_get_typeof( im, VIPS_META_XML ) )
vips_image_set_area( im, VIPS_META_XML, NULL, NULL );
XML_Parser parser;
VipsExpatParse vep;
if( vips__has_extension_block( im ) ) {
xmlDoc *doc;
if( !(doc = read_xml( im )) )
return( -1 );
vips_image_set_area( im, VIPS_META_XML,
(VipsCallbackFn) xmlFreeDoc, doc );
}
if( rebuild_header( im ) )
if( vips__seek( im->fd, image_pixel_length( im ) ) )
return( -1 );
return( 0 );
parser = XML_ParserCreate( "UTF-8" );
vep.image = im;
vep.data = NULL;
vep.current_size = 0;
vep.error = FALSE;
XML_SetUserData( parser, &vep );
XML_SetElementHandler( parser,
parser_element_start_handler, parser_element_end_handler );
XML_SetCharacterDataHandler( parser, parser_data_handler );
if( parser_read_fd( parser, im->fd ) ||
vep.error ) {
g_free( vep.data );
XML_ParserFree( parser );
return( -1 );
}
g_free( vep.data );
XML_ParserFree( parser );
return( 0 );
}
#define MAX_STRSIZE (32768) /* Max size of text for stack strings */