/* Read a csv file. * * 19/12/05 JC * - hacked from ppm reader * 11/9/06 * - now distingushes whitespace and separators, so we can have blank * fields * 20/9/06 * - oop, unquoted trailing columns could get missed * 17/5/07 * - added im_csv2vips_header() */ /* This file is part of VIPS. VIPS is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* These files are distributed with VIPS - http://www.vips.ecs.soton.ac.uk */ #ifdef HAVE_CONFIG_H #include #endif /*HAVE_CONFIG_H*/ #include #include #include #include #include #include #include #include #ifdef WITH_DMALLOC #include #endif /*WITH_DMALLOC*/ static int skip_line( FILE *fp ) { int ch; while( (ch = fgetc( fp )) != '\n' && ch != EOF ) ; return( ch ); } static int skip_white( FILE *fp, const char whitemap[256] ) { int ch; do { ch = fgetc( fp ); } while (ch != EOF && ch != '\n' && whitemap[ch] ); ungetc( ch, fp ); return( ch ); } static int skip_to_sep( FILE *fp, const char sepmap[256] ) { int ch; do { ch = fgetc( fp ); } while (ch != EOF && ch != '\n' && !sepmap[ch] ); ungetc( ch, fp ); return( ch ); } /* Read a single item. Syntax is: * * item : whitespace* double? whitespace* [EOF|EOL|separator] * * Return the char that caused failure on fail (EOF or \n). */ static int read_double( FILE *fp, const char whitemap[256], const char sepmap[256], int lineno, int colno, double *out ) { int ch; /* The fscanf() may change this ... but all other cases need a zero. */ *out = 0; ch = skip_white( fp, whitemap ); if( ch == EOF || ch == '\n' ) return( ch ); if( !sepmap[ch] && fscanf( fp, "%lf", out ) != 1 ) { /* Only a warning, since (for example) exported spreadsheets * will often have text or date fields. */ im_warn( "im_csv2vips", _( "error parsing number, line %d, column %d" ), lineno, colno ); /* Step over the bad data to the next separator. */ ch = skip_to_sep( fp, sepmap ); } /* Don't need to check result, we have read a field successfully. */ ch = skip_white( fp, whitemap ); /* If it's a separator, we have to step over it. */ if( ch != EOF && sepmap[ch] ) (void) fgetc( fp ); return( 0 ); } static int read_csv( FILE *fp, IMAGE *out, int start_skip, const char *whitespace, const char *separator, int lines ) { int i; char whitemap[256]; char sepmap[256]; const char *p; fpos_t pos; int columns; int ch; double d; double *buf; int y; /* Make our char maps. */ for( i = 0; i < 256; i++ ) { whitemap[i] = 0; sepmap[i] = 0; } for( p = whitespace; *p; p++ ) whitemap[(int) *p] = 1; for( p = separator; *p; p++ ) sepmap[(int) *p] = 1; /* Skip first few lines. */ for( i = 0; i < start_skip; i++ ) if( skip_line( fp ) == EOF ) { im_error( "im_csv2vips", _( "end of file while skipping start" ) ); return( -1 ); } /* Parse the first line to get number of columns. Only bother checking * fgetpos() the first time we use it: assume it's working after this. */ if( fgetpos( fp, &pos ) ) { im_error_system( errno, "im_csv2vips", _( "unable to seek" ) ); return( -1 ); } for( columns = 0; (ch = read_double( fp, whitemap, sepmap, start_skip + 1, columns + 1, &d )) == 0; columns++ ) ; fsetpos( fp, &pos ); if( columns == 0 ) { im_error( "im_csv2vips", _( "empty line" ) ); return( -1 ); } if( ch == -2 ) /* Failed to parse a number. */ return( -1 ); /* If lines is -1, we have to parse the whole file to get the * number of lines out. */ if( lines == -1 ) { fgetpos( fp, &pos ); for( lines = 0; skip_line( fp ) != EOF; lines++ ) ; fsetpos( fp, &pos ); } im_initdesc( out, columns, lines, 1, IM_BBITS_DOUBLE, IM_BANDFMT_DOUBLE, IM_CODING_NONE, IM_TYPE_B_W, 1.0, 1.0, 0, 0 ); if( im_outcheck( out ) || im_setupout( out ) || !(buf = IM_ARRAY( out, IM_IMAGE_N_ELEMENTS( out ), double )) ) return( -1 ); for( y = 0; y < lines; y++ ) { int x; for( x = 0; x < columns; x++ ) { ch = read_double( fp, whitemap, sepmap, y + start_skip + 1, x + 1, &d ); if( ch == EOF ) { im_error( "im_csv2vips", _( "unexpected end of file" ) ); return( -1 ); } else if( ch == '\n' ) { im_error( "im_csv2vips", _( "unexpected end of line" ) ); return( -1 ); } else if( ch ) /* Parse error. */ return( -1 ); buf[x] = d; } if( im_writeline( y, out, (PEL *) buf ) ) return( -1 ); /* Skip over the '\n' to the next line. */ skip_line( fp ); } return( 0 ); } int im_csv2vips( const char *filename, IMAGE *out ) { /* Read options. */ int start_skip = 0; char *whitespace = " \""; char *separator = ";,\t"; int lines = -1; char name[FILENAME_MAX]; char mode[FILENAME_MAX]; char *p, *q, *r; FILE *fp; /* Parse mode string. */ im_filename_split( filename, name, mode ); p = &mode[0]; while( (q = im_getnextoption( &p )) ) { if( im_isprefix( "ski", q ) && (r = im_getsuboption( q )) ) start_skip = atoi( r ); else if( im_isprefix( "whi", q ) && (r = im_getsuboption( q )) ) whitespace = r; else if( im_isprefix( "sep", q ) && (r = im_getsuboption( q )) ) separator = r; else if( im_isprefix( "lin", q ) && (r = im_getsuboption( q )) ) lines = atoi( r ); } if( !(fp = fopen( name, "r" )) ) { im_error( "im_csv2vips", _( "unable to open \"%s\"" ), name ); return( -1 ); } if( read_csv( fp, out, start_skip, whitespace, separator, lines ) ) { fclose( fp ); return( -1 ); } fclose( fp ); return( 0 ); } /* We can't just read the header of a CSV. Instead, we read to a temp image, * then copy just the header to the output. */ static int csv2vips_header( const char *filename, IMAGE *out ) { IMAGE *t; if( !(t = im_open( "im_csv2vips_header", "p" )) ) return( -1 ); if( im_csv2vips( filename, t ) || im_cp_desc( out, t ) ) { im_close( t ); return( -1 ); } im_close( t ); return( 0 ); } static const char *csv_suffs[] = { ".csv", NULL }; void im__csv_register( void ) { im_format_register( "csv", /* internal name */ N_( "CSV" ), /* i18n'd visible name */ csv_suffs, /* Allowed suffixes */ NULL, /* is_a */ csv2vips_header, /* Load header only */ im_csv2vips, /* Load */ im_vips2csv, /* Save */ NULL /* Flags */ ); }