better CSV parsing

This commit is contained in:
John Cupitt 2011-10-15 12:48:30 +01:00
parent d25e94d0d2
commit 173afe2169
2 changed files with 15 additions and 18 deletions

5
TODO
View File

@ -1,8 +1,3 @@
- improve CSV parsing a bit more, allow quotes on any field not just strings
- vips_object_set_argument_from_string() needs more arg types - vips_object_set_argument_from_string() needs more arg types
must be some way to make this more automatic must be some way to make this more automatic

View File

@ -12,7 +12,7 @@
* 4/2/10 * 4/2/10
* - gtkdoc * - gtkdoc
* 1/3/10 * 1/3/10
* - allow lines that end with EOF rather than \n * - allow lines that end with EOF
* 23/9/11 * 23/9/11
* - allow quoted strings, including escaped quotes * - allow quoted strings, including escaped quotes
*/ */
@ -102,15 +102,13 @@ skip_to_quote( FILE *fp )
do { do {
ch = fgetc( fp ); ch = fgetc( fp );
/* We let people escape " in strings. /* Ignore \" in strings.
*/ */
if( ch == '\\' ) { if( ch == '\\' )
ch = fgetc( fp ); ch = fgetc( fp );
else if( ch == '"' )
if( ch != EOF && ch != '\n' ) break;
ch = fgetc( fp ); } while( ch != EOF && ch != '\n' );
}
} while (ch != EOF && ch != '\n' && ch != '"' );
ungetc( ch, fp ); ungetc( ch, fp );
@ -164,7 +162,8 @@ read_double( FILE *fp, const char whitemap[256], const char sepmap[256],
ch = skip_to_quote( fp ); ch = skip_to_quote( fp );
ch = fgetc( fp ); ch = fgetc( fp );
} }
else if( !sepmap[ch] && fscanf( fp, "%lf", out ) != 1 ) { else if( !sepmap[ch] &&
fscanf( fp, "%lf", out ) != 1 ) {
/* Only a warning, since (for example) exported spreadsheets /* Only a warning, since (for example) exported spreadsheets
* will often have text or date fields. * will often have text or date fields.
*/ */
@ -264,7 +263,8 @@ read_csv( FILE *fp, IMAGE *out,
IM_BBITS_DOUBLE, IM_BANDFMT_DOUBLE, IM_BBITS_DOUBLE, IM_BANDFMT_DOUBLE,
IM_CODING_NONE, IM_TYPE_B_W, 1.0, 1.0, 0, 0 ); IM_CODING_NONE, IM_TYPE_B_W, 1.0, 1.0, 0, 0 );
if( im_outcheck( out ) || im_setupout( out ) || if( im_outcheck( out ) ||
im_setupout( out ) ||
!(buf = IM_ARRAY( out, IM_IMAGE_N_ELEMENTS( out ), double )) ) !(buf = IM_ARRAY( out, IM_IMAGE_N_ELEMENTS( out ), double )) )
return( -1 ); return( -1 );
@ -309,10 +309,12 @@ read_csv( FILE *fp, IMAGE *out,
* @out: image to write to * @out: image to write to
* *
* Load a CSV (comma-separated values) file. The output image is always 1 * Load a CSV (comma-separated values) file. The output image is always 1
* band (monochrome), %IM_BANDFMT_DOUBLE. * band (monochrome), %VIPS_FORMAT_DOUBLE.
* *
* Items in lines can be either floats, or strings enclosed in double-quotes. * Items in lines can be either floating point numbers in the C locale, or
* You can use a backslash (\) within the quotes to escape special characters. * strings enclosed in double-quotes ("), or empty.
* You can use a backslash (\) within the quotes to escape special characters,
* such as quote marks.
* *
* The reader is deliberately rather fussy: it will fail if there are any * The reader is deliberately rather fussy: it will fail if there are any
* short lines, or if the file is too short. It will ignore lines that are * short lines, or if the file is too short. It will ignore lines that are