libvips/libvips/foreign/csvload.c

767 lines
18 KiB
C

/* load csv from a file
*
* 5/12/11
* - from csvload.c
* 21/2/20
* - rewrite for new source API
*/
/*
This file is part of VIPS.
VIPS is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
*/
/*
These files are distributed with VIPS - http://www.vips.ecs.soton.ac.uk
*/
/*
#define DEBUG
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif /*HAVE_CONFIG_H*/
#include <glib/gi18n-lib.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <vips/vips.h>
#include <vips/buf.h>
#include <vips/internal.h>
#include "pforeign.h"
/* The largest item we can read. It only needs to be big enough for a double.
*/
#define MAX_ITEM_SIZE (256)
typedef struct _VipsForeignLoadCsv {
VipsForeignLoad parent_object;
/* Set by subclasses.
*/
VipsSource *source;
/* Buffered source.
*/
VipsSbuf *sbuf;
/* Load options.
*/
int skip;
int lines;
const char *whitespace;
const char *separator;
/* Current position in file for error messages.
*/
int lineno;
int colno;
/* Our whitespace and separator strings turned into LUTs.
*/
char whitemap[256];
char sepmap[256];
/* Fetch items into this buffer. It just needs to be large enough for
* a double.
*/
char item[MAX_ITEM_SIZE];
/* A line of pixels.
*/
double *linebuf;
} VipsForeignLoadCsv;
typedef VipsForeignLoadClass VipsForeignLoadCsvClass;
G_DEFINE_ABSTRACT_TYPE( VipsForeignLoadCsv, vips_foreign_load_csv,
VIPS_TYPE_FOREIGN_LOAD );
static void
vips_foreign_load_csv_dispose( GObject *gobject )
{
VipsForeignLoadCsv *csv = (VipsForeignLoadCsv *) gobject;
VIPS_UNREF( csv->source );
VIPS_UNREF( csv->sbuf );
VIPS_FREE( csv->linebuf );
G_OBJECT_CLASS( vips_foreign_load_csv_parent_class )->
dispose( gobject );
}
static int
vips_foreign_load_csv_build( VipsObject *object )
{
VipsForeignLoadCsv *csv = (VipsForeignLoadCsv *) object;
int i;
const char *p;
if( !(csv->sbuf = vips_sbuf_new_from_source( csv->source )) )
return( -1 );
/* Make our char maps.
*/
for( i = 0; i < 256; i++ ) {
csv->whitemap[i] = 0;
csv->sepmap[i] = 0;
}
for( p = csv->whitespace; *p; p++ )
csv->whitemap[(int) *p] = 1;
for( p = csv->separator; *p; p++ )
csv->sepmap[(int) *p] = 1;
/* \n must not be in the maps or we'll get very confused.
*/
csv->sepmap[(int) '\n'] = 0;
csv->whitemap[(int) '\n'] = 0;
if( VIPS_OBJECT_CLASS( vips_foreign_load_csv_parent_class )->
build( object ) )
return( -1 );
return( 0 );
}
static VipsForeignFlags
vips_foreign_load_csv_get_flags( VipsForeignLoad *load )
{
return( 0 );
}
/* Skip to the start of the next block of non-whitespace.
*
* Result: !white, \n, EOF
*/
static int
vips_foreign_load_csv_skip_white( VipsForeignLoadCsv *csv )
{
int ch;
do {
ch = VIPS_SBUF_GETC( csv->sbuf );
} while( ch != EOF &&
ch != '\n' &&
csv->whitemap[ch] );
VIPS_SBUF_UNGETC( csv->sbuf );
return( ch );
}
/* We have just seen " (open quotes). Skip to just after the matching close
* quotes.
*
* If there is no matching close quotes before the end of the line, don't
* skip to the next line.
*
* Result: ", \n, EOF
*/
static int
vips_foreign_load_csv_skip_quoted( VipsForeignLoadCsv *csv )
{
int ch;
do {
ch = VIPS_SBUF_GETC( csv->sbuf );
/* Ignore \" (actually \anything) in strings.
*/
if( ch == '\\' )
ch = VIPS_SBUF_GETC( csv->sbuf );
else if( ch == '"' )
break;
} while( ch != EOF &&
ch != '\n' );
if( ch == '\n' )
VIPS_SBUF_UNGETC( csv->sbuf );
return( ch );
}
/* Fetch the next item (not whitespace, separator or \n), as a string. The
* returned string is valid until the next call to fetch item. NULL for EOF.
*/
static const char *
vips_foreign_load_csv_fetch_item( VipsForeignLoadCsv *csv )
{
int write_point;
int space_remaining;
int ch;
/* -1 so there's space for the \0 terminator.
*/
space_remaining = MAX_ITEM_SIZE - 1;
write_point = 0;
while( (ch = VIPS_SBUF_GETC( csv->sbuf )) != -1 &&
ch != '\n' &&
!csv->whitemap[ch] &&
!csv->sepmap[ch] &&
space_remaining > 0 ) {
csv->item[write_point] = ch;
write_point += 1;
space_remaining -= 1;
}
csv->item[write_point] = '\0';
/* If we hit EOF immediately, return EOF.
*/
if( ch == -1 &&
write_point == 0 )
return( NULL );
/* If we filled the item buffer without seeing the end of the item,
* read up to the item end.
*/
while( ch != -1 &&
ch != '\n' &&
!csv->whitemap[ch] &&
!csv->sepmap[ch] )
ch = VIPS_SBUF_GETC( csv->sbuf );
/* We've (probably) read the end of item character. Push it bakc.
*/
if( ch == '\n' ||
csv->whitemap[ch] ||
csv->sepmap[ch] )
VIPS_SBUF_UNGETC( csv->sbuf );
return( csv->item );
}
/* Read a single item. The syntax is:
*
* element :
* whitespace* item whitespace* [EOF|EOL|separator]
*
* item :
* double |
* "anything" |
* empty
*
* the anything in quotes can contain " escaped with \, and can contain
* separator and whitespace characters.
*
* Result: sep, \n, EOF
*/
static int
vips_foreign_load_csv_read_double( VipsForeignLoadCsv *csv, double *out )
{
int ch;
/* The strtod() may change this ... but all other cases need a zero.
*/
*out = 0;
ch = vips_foreign_load_csv_skip_white( csv );
if( ch == EOF ||
ch == '\n' )
return( ch );
if( ch == '"' ) {
(void) VIPS_SBUF_GETC( csv->sbuf );
ch = vips_foreign_load_csv_skip_quoted( csv );
}
else if( !csv->sepmap[ch] ) {
const char *item;
item = vips_foreign_load_csv_fetch_item( csv );
if( !item )
return( EOF );
if( vips_strtod( item, out ) )
/* Only a warning, since (for example) exported
* spreadsheets will often have text or date fields.
*/
g_warning( _( "bad number, line %d, column %d" ),
csv->lineno, csv->colno );
}
ch = vips_foreign_load_csv_skip_white( csv );
if( ch == EOF ||
ch == '\n' )
return( ch );
/* If it's a separator, we have to step over it.
*/
if( csv->sepmap[ch] )
(void) VIPS_SBUF_GETC( csv->sbuf );
return( ch );
}
static int
vips_foreign_load_csv_header( VipsForeignLoad *load )
{
VipsObjectClass *class = VIPS_OBJECT_GET_CLASS( load );
VipsForeignLoadCsv *csv = (VipsForeignLoadCsv *) load;
int i;
double value;
int ch;
int width;
int height;
/* Rewind.
*/
vips_sbuf_unbuffer( csv->sbuf );
if( vips_source_rewind( csv->source ) )
return( -1 );
/* Skip the first few lines.
*/
for( i = 0; i < csv->skip; i++ )
if( !vips_sbuf_get_line( csv->sbuf ) ) {
vips_error( class->nickname,
"%s", _( "unexpected end of file" ) );
return( -1 );
}
/* Parse the first line to get the number of columns.
*/
csv->lineno = csv->skip + 1;
csv->colno = 0;
do {
csv->colno += 1;
ch = vips_foreign_load_csv_read_double( csv, &value );
} while( ch != '\n' &&
ch != EOF );
width = csv->colno;
if( !(csv->linebuf = VIPS_ARRAY( NULL, width, double )) )
return( -1 );
/* If @lines is -1, we must scan the whole file to get the height.
*/
if( csv->lines == -1 )
for( height = 0; vips_sbuf_get_line( csv->sbuf ); height++ )
;
else
height = csv->lines;
vips_image_init_fields( load->out,
width, height, 1,
VIPS_FORMAT_DOUBLE,
VIPS_CODING_NONE, VIPS_INTERPRETATION_B_W, 1.0, 1.0 );
if( vips_image_pipelinev( load->out,
VIPS_DEMAND_STYLE_THINSTRIP, NULL ) )
return( -1 );
VIPS_SETSTR( load->out->filename,
vips_connection_filename( VIPS_CONNECTION( csv->source ) ) );
return( 0 );
}
static int
vips_foreign_load_csv_load( VipsForeignLoad *load )
{
VipsObjectClass *class = VIPS_OBJECT_GET_CLASS( load );
VipsForeignLoadCsv *csv = (VipsForeignLoadCsv *) load;
int i;
int x, y;
int ch;
/* Rewind.
*/
vips_sbuf_unbuffer( csv->sbuf );
if( vips_source_rewind( csv->source ) )
return( -1 );
/* Skip the first few lines.
*/
for( i = 0; i < csv->skip; i++ )
if( !vips_sbuf_get_line( csv->sbuf ) ) {
vips_error( class->nickname,
"%s", _( "unexpected end of file" ) );
return( -1 );
}
vips_image_init_fields( load->real,
load->out->Xsize, load->out->Ysize, 1,
VIPS_FORMAT_DOUBLE,
VIPS_CODING_NONE, VIPS_INTERPRETATION_B_W, 1.0, 1.0 );
if( vips_image_pipelinev( load->real,
VIPS_DEMAND_STYLE_THINSTRIP, NULL ) )
return( -1 );
csv->lineno = csv->skip + 1;
for( y = 0; y < load->real->Ysize; y++ ) {
csv->colno = 0;
/* Not needed, but stops a used-before-set compiler warning.
*/
ch = EOF;
/* Some lines may be shorter.
*/
memset( csv->linebuf, 0, load->real->Xsize * sizeof( double ) );
for( x = 0; x < load->real->Xsize; x++ ) {
double value;
csv->colno += 1;
ch = vips_foreign_load_csv_read_double( csv, &value );
if( ch == EOF &&
load->fail_on >= VIPS_FAIL_ON_TRUNCATED ) {
vips_error( class->nickname,
"%s", _( "unexpected end of file" ) );
return( -1 );
}
if( ch == '\n' &&
x != load->real->Xsize - 1 ) {
vips_error( class->nickname,
_( "line %d has only %d columns" ),
csv->lineno, csv->colno );
/* Unequal length lines, but no EOF.
*/
if( load->fail_on >= VIPS_FAIL_ON_ERROR )
return( -1 );
}
csv->linebuf[x] = value;
}
/* Step over the line separator.
*/
if( ch == '\n' ) {
(void) VIPS_SBUF_GETC( csv->sbuf );
csv->lineno += 1;
}
if( vips_image_write_line( load->real, y,
(VipsPel *) csv->linebuf ) )
return( -1 );
}
return( 0 );
}
static void
vips_foreign_load_csv_class_init( VipsForeignLoadCsvClass *class )
{
GObjectClass *gobject_class = G_OBJECT_CLASS( class );
VipsObjectClass *object_class = (VipsObjectClass *) class;
VipsOperationClass *operation_class = VIPS_OPERATION_CLASS( class );
VipsForeignLoadClass *load_class = (VipsForeignLoadClass *) class;
gobject_class->dispose = vips_foreign_load_csv_dispose;
gobject_class->set_property = vips_object_set_property;
gobject_class->get_property = vips_object_get_property;
object_class->nickname = "csvload_base";
object_class->description = _( "load csv" );
object_class->build = vips_foreign_load_csv_build;
/* This is fuzzed, but you're unlikely to want to use it on
* untrusted files.
*/
operation_class->flags |= VIPS_OPERATION_UNTRUSTED;
load_class->get_flags = vips_foreign_load_csv_get_flags;
load_class->header = vips_foreign_load_csv_header;
load_class->load = vips_foreign_load_csv_load;
VIPS_ARG_INT( class, "skip", 20,
_( "Skip" ),
_( "Skip this many lines at the start of the file" ),
VIPS_ARGUMENT_OPTIONAL_INPUT,
G_STRUCT_OFFSET( VipsForeignLoadCsv, skip ),
0, 10000000, 0 );
VIPS_ARG_INT( class, "lines", 21,
_( "Lines" ),
_( "Read this many lines from the file" ),
VIPS_ARGUMENT_OPTIONAL_INPUT,
G_STRUCT_OFFSET( VipsForeignLoadCsv, lines ),
-1, 10000000, 0 );
VIPS_ARG_STRING( class, "whitespace", 22,
_( "Whitespace" ),
_( "Set of whitespace characters" ),
VIPS_ARGUMENT_OPTIONAL_INPUT,
G_STRUCT_OFFSET( VipsForeignLoadCsv, whitespace ),
" " );
VIPS_ARG_STRING( class, "separator", 23,
_( "Separator" ),
_( "Set of separator characters" ),
VIPS_ARGUMENT_OPTIONAL_INPUT,
G_STRUCT_OFFSET( VipsForeignLoadCsv, separator ),
";,\t" );
}
static void
vips_foreign_load_csv_init( VipsForeignLoadCsv *csv )
{
csv->lines = -1;
csv->whitespace = g_strdup( " " );
csv->separator = g_strdup( ";,\t" );
}
typedef struct _VipsForeignLoadCsvFile {
VipsForeignLoadCsv parent_object;
/* Filename for load.
*/
char *filename;
} VipsForeignLoadCsvFile;
typedef VipsForeignLoadCsvClass VipsForeignLoadCsvFileClass;
G_DEFINE_TYPE( VipsForeignLoadCsvFile, vips_foreign_load_csv_file,
vips_foreign_load_csv_get_type() );
static VipsForeignFlags
vips_foreign_load_csv_file_get_flags_filename( const char *filename )
{
return( 0 );
}
static int
vips_foreign_load_csv_file_build( VipsObject *object )
{
VipsForeignLoadCsv *csv = (VipsForeignLoadCsv *) object;
VipsForeignLoadCsvFile *file = (VipsForeignLoadCsvFile *) object;
if( file->filename )
if( !(csv->source =
vips_source_new_from_file( file->filename )) )
return( -1 );
if( VIPS_OBJECT_CLASS( vips_foreign_load_csv_file_parent_class )->
build( object ) )
return( -1 );
return( 0 );
}
static const char *vips_foreign_load_csv_suffs[] = {
".csv",
NULL
};
static void
vips_foreign_load_csv_file_class_init( VipsForeignLoadCsvFileClass *class )
{
GObjectClass *gobject_class = G_OBJECT_CLASS( class );
VipsObjectClass *object_class = (VipsObjectClass *) class;
VipsForeignClass *foreign_class = (VipsForeignClass *) class;
VipsForeignLoadClass *load_class = (VipsForeignLoadClass *) class;
gobject_class->set_property = vips_object_set_property;
gobject_class->get_property = vips_object_get_property;
object_class->nickname = "csvload";
object_class->build = vips_foreign_load_csv_file_build;
foreign_class->suffs = vips_foreign_load_csv_suffs;
load_class->get_flags_filename =
vips_foreign_load_csv_file_get_flags_filename;
VIPS_ARG_STRING( class, "filename", 1,
_( "Filename" ),
_( "Filename to load from" ),
VIPS_ARGUMENT_REQUIRED_INPUT,
G_STRUCT_OFFSET( VipsForeignLoadCsvFile, filename ),
NULL );
}
static void
vips_foreign_load_csv_file_init( VipsForeignLoadCsvFile *file )
{
}
typedef struct _VipsForeignLoadCsvSource {
VipsForeignLoadCsv parent_object;
VipsSource *source;
} VipsForeignLoadCsvSource;
typedef VipsForeignLoadCsvClass VipsForeignLoadCsvSourceClass;
G_DEFINE_TYPE( VipsForeignLoadCsvSource, vips_foreign_load_csv_source,
vips_foreign_load_csv_get_type() );
static int
vips_foreign_load_csv_source_build( VipsObject *object )
{
VipsForeignLoadCsv *csv = (VipsForeignLoadCsv *) object;
VipsForeignLoadCsvSource *source = (VipsForeignLoadCsvSource *) object;
if( source->source ) {
csv->source = source->source;
g_object_ref( csv->source );
}
if( VIPS_OBJECT_CLASS( vips_foreign_load_csv_source_parent_class )->
build( object ) )
return( -1 );
return( 0 );
}
static gboolean
vips_foreign_load_csv_source_is_a_source( VipsSource *source )
{
/* Detecting CSV files automatically is tricky. Define this method to
* prevent a warning, but users will need to run the csv loader
* explicitly.
*/
return( FALSE );
}
static void
vips_foreign_load_csv_source_class_init( VipsForeignLoadCsvFileClass *class )
{
GObjectClass *gobject_class = G_OBJECT_CLASS( class );
VipsObjectClass *object_class = (VipsObjectClass *) class;
VipsOperationClass *operation_class = VIPS_OPERATION_CLASS( class );
VipsForeignLoadClass *load_class = (VipsForeignLoadClass *) class;
gobject_class->set_property = vips_object_set_property;
gobject_class->get_property = vips_object_get_property;
object_class->nickname = "csvload_source";
object_class->build = vips_foreign_load_csv_source_build;
operation_class->flags |= VIPS_OPERATION_NOCACHE;
load_class->is_a_source = vips_foreign_load_csv_source_is_a_source;
VIPS_ARG_OBJECT( class, "source", 1,
_( "Source" ),
_( "Source to load from" ),
VIPS_ARGUMENT_REQUIRED_INPUT,
G_STRUCT_OFFSET( VipsForeignLoadCsvSource, source ),
VIPS_TYPE_SOURCE );
}
static void
vips_foreign_load_csv_source_init( VipsForeignLoadCsvSource *source )
{
}
/**
* vips_csvload:
* @filename: file to load
* @out: (out): output image
* @...: %NULL-terminated list of optional named arguments
*
* Optional arguments:
*
* * @skip: skip this many lines at start of file
* * @lines: read this many lines from file
* * @whitespace: set of whitespace characters
* * @separator: set of separator characters
* * @fail_on: #VipsFailOn, types of read error to fail on
*
* Load a CSV (comma-separated values) file. The output image is always 1
* band (monochrome), #VIPS_FORMAT_DOUBLE. Use vips_bandfold() to turn
* RGBRGBRGB mono images into colour iamges.
*
* Items in lines can be either floating point numbers in the C locale, or
* strings enclosed in double-quotes ("), or empty.
* You can use a backslash (\) within the quotes to escape special characters,
* such as quote marks.
*
* @skip sets the number of lines to skip at the start of the file.
* Default zero.
*
* @lines sets the number of lines to read from the file. Default -1,
* meaning read all lines to end of file.
*
* @whitespace sets the skippable whitespace characters.
* Default <emphasis>space</emphasis>.
* Whitespace characters are always run together.
*
* @separator sets the characters that separate fields.
* Default ;,<emphasis>tab</emphasis>. Separators are never run together.
*
* Use @fail_on to set the type of error that will cause load to fail. By
* default, loaders are permissive, that is, #VIPS_FAIL_ON_NONE.
*
* See also: vips_image_new_from_file(), vips_bandfold().
*
* Returns: 0 on success, -1 on error.
*/
int
vips_csvload( const char *filename, VipsImage **out, ... )
{
va_list ap;
int result;
va_start( ap, out );
result = vips_call_split( "csvload", ap, filename, out );
va_end( ap );
return( result );
}
/**
* vips_csvload_source:
* @source: source to load
* @out: (out): output image
* @...: %NULL-terminated list of optional named arguments
*
* Optional arguments:
*
* * @skip: skip this many lines at start of file
* * @lines: read this many lines from file
* * @whitespace: set of whitespace characters
* * @separator: set of separator characters
* * @fail_on: #VipsFailOn, types of read error to fail on
*
* Exactly as vips_csvload(), but read from a source.
*
* See also: vips_csvload().
*
* Returns: 0 on success, -1 on error.
*/
int
vips_csvload_source( VipsSource *source, VipsImage **out, ... )
{
va_list ap;
int result;
va_start( ap, out );
result = vips_call_split( "csvload_source", ap, source, out );
va_end( ap );
return( result );
}