libvips/libsrc/format/im_csv2vips.c

317 lines
6.6 KiB
C
Raw Normal View History

2007-08-29 18:23:50 +02:00
/* Read a csv file.
*
* 19/12/05 JC
* - hacked from ppm reader
* 11/9/06
* - now distingushes whitespace and separators, so we can have blank
* fields
* 20/9/06
* - oop, unquoted trailing columns could get missed
* 17/5/07
* - added im_csv2vips_header()
*/
/*
This file is part of VIPS.
VIPS is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
These files are distributed with VIPS - http://www.vips.ecs.soton.ac.uk
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif /*HAVE_CONFIG_H*/
#include <vips/intl.h>
#include <ctype.h>
#include <stdio.h>
#include <assert.h>
#include <errno.h>
#include <string.h>
#include <stdlib.h>
#include <vips/vips.h>
#ifdef WITH_DMALLOC
#include <dmalloc.h>
#endif /*WITH_DMALLOC*/
static int
skip_line( FILE *fp )
{
int ch;
while( (ch = fgetc( fp )) != '\n' && ch != EOF )
;
return( ch );
}
static int
skip_white( FILE *fp, const char whitemap[256] )
{
int ch;
do {
ch = fgetc( fp );
} while (ch != EOF && ch != '\n' && whitemap[ch] );
ungetc( ch, fp );
return( ch );
}
static int
skip_to_sep( FILE *fp, const char sepmap[256] )
{
int ch;
do {
ch = fgetc( fp );
} while (ch != EOF && ch != '\n' && !sepmap[ch] );
ungetc( ch, fp );
return( ch );
}
/* Read a single item. Syntax is:
*
* item : whitespace* double? whitespace* [EOF|EOL|separator]
*
* Return the char that caused failure on fail (EOF or \n).
*/
static int
read_double( FILE *fp, const char whitemap[256], const char sepmap[256],
int lineno, int colno, double *out )
{
int ch;
/* The fscanf() may change this ... but all other cases need a zero.
*/
*out = 0;
ch = skip_white( fp, whitemap );
if( ch == EOF || ch == '\n' )
return( ch );
if( !sepmap[ch] && fscanf( fp, "%lf", out ) != 1 ) {
/* Only a warning, since (for example) exported spreadsheets
* will often have text or date fields.
*/
im_warn( "im_csv2vips",
_( "error parsing number, line %d, column %d" ),
lineno, colno );
/* Step over the bad data to the next separator.
*/
ch = skip_to_sep( fp, sepmap );
}
/* Don't need to check result, we have read a field successfully.
*/
ch = skip_white( fp, whitemap );
/* If it's a separator, we have to step over it.
*/
if( ch != EOF && sepmap[ch] )
(void) fgetc( fp );
return( 0 );
}
static int
read_csv( FILE *fp, IMAGE *out,
int start_skip,
const char *whitespace, const char *separator,
int lines )
{
int i;
char whitemap[256];
char sepmap[256];
const char *p;
fpos_t pos;
int columns;
int ch;
double d;
double *buf;
int y;
/* Make our char maps.
*/
for( i = 0; i < 256; i++ ) {
whitemap[i] = 0;
sepmap[i] = 0;
}
for( p = whitespace; *p; p++ )
whitemap[(int) *p] = 1;
for( p = separator; *p; p++ )
sepmap[(int) *p] = 1;
/* Skip first few lines.
*/
for( i = 0; i < start_skip; i++ )
if( skip_line( fp ) == EOF ) {
im_error( "im_csv2vips",
_( "end of file while skipping start" ) );
return( -1 );
}
/* Parse the first line to get number of columns. Only bother checking
* fgetpos() the first time we use it: assume it's working after this.
*/
if( fgetpos( fp, &pos ) ) {
im_error_system( errno, "im_csv2vips", _( "unable to seek" ) );
return( -1 );
}
for( columns = 0; (ch = read_double( fp, whitemap, sepmap,
start_skip + 1, columns + 1, &d )) == 0; columns++ )
;
fsetpos( fp, &pos );
if( columns == 0 ) {
im_error( "im_csv2vips", _( "empty line" ) );
return( -1 );
}
if( ch == -2 )
/* Failed to parse a number.
*/
return( -1 );
/* If lines is -1, we have to parse the whole file to get the
* number of lines out.
*/
if( lines == -1 ) {
fgetpos( fp, &pos );
for( lines = 0; skip_line( fp ) != EOF; lines++ )
;
fsetpos( fp, &pos );
}
im_initdesc( out, columns, lines, 1,
IM_BBITS_DOUBLE, IM_BANDFMT_DOUBLE,
IM_CODING_NONE, IM_TYPE_B_W, 1.0, 1.0, 0, 0 );
if( im_outcheck( out ) || im_setupout( out ) ||
!(buf = IM_ARRAY( out, IM_IMAGE_N_ELEMENTS( out ), double )) )
return( -1 );
for( y = 0; y < lines; y++ ) {
int x;
for( x = 0; x < columns; x++ ) {
ch = read_double( fp, whitemap, sepmap,
y + start_skip + 1, x + 1, &d );
if( ch == EOF ) {
im_error( "im_csv2vips",
_( "unexpected end of file" ) );
return( -1 );
}
else if( ch == '\n' ) {
im_error( "im_csv2vips",
_( "unexpected end of line" ) );
return( -1 );
}
else if( ch )
/* Parse error.
*/
return( -1 );
buf[x] = d;
}
if( im_writeline( y, out, (PEL *) buf ) )
return( -1 );
/* Skip over the '\n' to the next line.
*/
skip_line( fp );
}
return( 0 );
}
int
im_csv2vips( const char *filename, IMAGE *out )
{
/* Read options.
*/
int start_skip = 0;
char *whitespace = " \"";
char *separator = ";,\t";
int lines = -1;
char name[FILENAME_MAX];
char mode[FILENAME_MAX];
char *p, *q, *r;
FILE *fp;
/* Parse mode string.
*/
im_filename_split( filename, name, mode );
p = &mode[0];
while( (q = im_getnextoption( &p )) ) {
if( im_isprefix( "ski", q ) && (r = im_getsuboption( q )) )
start_skip = atoi( r );
else if( im_isprefix( "whi", q ) && (r = im_getsuboption( q )) )
whitespace = r;
else if( im_isprefix( "sep", q ) && (r = im_getsuboption( q )) )
separator = r;
else if( im_isprefix( "lin", q ) && (r = im_getsuboption( q )) )
lines = atoi( r );
}
if( !(fp = fopen( name, "r" )) ) {
im_error( "im_csv2vips",
_( "unable to open \"%s\"" ), name );
return( -1 );
}
if( read_csv( fp, out, start_skip, whitespace, separator, lines ) ) {
fclose( fp );
return( -1 );
}
fclose( fp );
return( 0 );
}
/* We can't just read the header of a CSV. Instead, we read to a temp image,
* then copy just the header to the output.
*/
int
im_csv2vips_header( const char *filename, IMAGE *out )
{
IMAGE *t;
if( !(t = im_open( "im_csv2vips_header", "p" )) )
return( -1 );
if( im_csv2vips( filename, t ) ||
im_cp_desc( out, t ) ) {
im_close( t );
return( -1 );
}
im_close( t );
return( 0 );
}