csv read knows about quoted strings
you can "enclose strings in \", and" it shouldn't get confused
This commit is contained in:
parent
ae8faf6597
commit
2003b5b524
@ -15,6 +15,7 @@
|
||||
- vips_tracked_malloc() tracks allocation size and can report total mem usage
|
||||
- cache limits, drop, init, flush plus command-line controls
|
||||
- remove dmalloc support, was never used and valgrind is better
|
||||
- im_csv2vips() allows quoted strings, including escaped quotes
|
||||
|
||||
10/8/11 started 7.26.3
|
||||
- don't use G_VALUE_COLLECT_INIT(), many platforms do not have a glib this
|
||||
|
9
TODO
9
TODO
@ -5,16 +5,11 @@
|
||||
|
||||
|
||||
|
||||
- im_csv2vips() gets confused by quotes and commas, eg.
|
||||
|
||||
NP_001121179.1,"serine proteinase inhibitor, clade A, member ",ITPNLAEFAFSLYR,0.95588235294118,0.96176470588235, ...
|
||||
|
||||
|
||||
|
||||
|
||||
- add vips_init_argv() which processes argc/argv for you? handy for tiny
|
||||
progs, perhaps
|
||||
|
||||
- add vips_shutdown()? unload plugins, drop cache etc.
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -13,6 +13,8 @@
|
||||
* - gtkdoc
|
||||
* 1/3/10
|
||||
* - allow lines that end with EOF rather than \n
|
||||
* 23/9/11
|
||||
* - allow quoted strings, including escaped quotes
|
||||
*/
|
||||
|
||||
/*
|
||||
@ -93,6 +95,29 @@ skip_white( FILE *fp, const char whitemap[256] )
|
||||
return( ch );
|
||||
}
|
||||
|
||||
static int
|
||||
skip_to_quote( FILE *fp )
|
||||
{
|
||||
int ch;
|
||||
|
||||
do {
|
||||
ch = fgetc( fp );
|
||||
|
||||
/* We let people escape " in strings.
|
||||
*/
|
||||
if( ch == '\\' ) {
|
||||
ch = fgetc( fp );
|
||||
|
||||
if( ch != EOF && ch != '\n' )
|
||||
ch = fgetc( fp );
|
||||
}
|
||||
} while (ch != EOF && ch != '\n' && ch != '"' );
|
||||
|
||||
ungetc( ch, fp );
|
||||
|
||||
return( ch );
|
||||
}
|
||||
|
||||
static int
|
||||
skip_to_sep( FILE *fp, const char sepmap[256] )
|
||||
{
|
||||
@ -109,7 +134,15 @@ skip_to_sep( FILE *fp, const char sepmap[256] )
|
||||
|
||||
/* Read a single item. Syntax is:
|
||||
*
|
||||
* item : whitespace* double? whitespace* [EOF|EOL|separator]
|
||||
* element :
|
||||
* whitespace* item whitespace* [EOF|EOL|separator]
|
||||
*
|
||||
* item :
|
||||
* double |
|
||||
* "anything" |
|
||||
* empty
|
||||
*
|
||||
* the anything in quotes can contain " escaped with \
|
||||
*
|
||||
* Return the char that caused failure on fail (EOF or \n).
|
||||
*/
|
||||
@ -127,7 +160,12 @@ read_double( FILE *fp, const char whitemap[256], const char sepmap[256],
|
||||
if( ch == EOF || ch == '\n' )
|
||||
return( ch );
|
||||
|
||||
if( !sepmap[ch] && fscanf( fp, "%lf", out ) != 1 ) {
|
||||
if( ch == '"' ) {
|
||||
(void) fgetc( fp );
|
||||
ch = skip_to_quote( fp );
|
||||
ch = fgetc( fp );
|
||||
}
|
||||
else if( !sepmap[ch] && fscanf( fp, "%lf", out ) != 1 ) {
|
||||
/* Only a warning, since (for example) exported spreadsheets
|
||||
* will often have text or date fields.
|
||||
*/
|
||||
@ -273,6 +311,10 @@ read_csv( FILE *fp, IMAGE *out,
|
||||
*
|
||||
* Load a CSV (comma-separated values) file. The output image is always 1
|
||||
* band (monochrome), %IM_BANDFMT_DOUBLE.
|
||||
*
|
||||
* Items in lines can be either floats, or strings enclosed in double-quotes.
|
||||
* You can use a backslash (\) within the quotes to escape special characters.
|
||||
*
|
||||
* The reader is deliberately rather fussy: it will fail if there are any
|
||||
* short lines, or if the file is too short. It will ignore lines that are
|
||||
* too long.
|
||||
@ -297,8 +339,7 @@ read_csv( FILE *fp, IMAGE *out,
|
||||
* <listitem>
|
||||
* <para>
|
||||
* <emphasis>whi:whitespace-characters</emphasis>
|
||||
* The skippable whitespace characters. Default <emphasis>space</emphasis> and
|
||||
* double quotes (").
|
||||
* The skippable whitespace characters. Default <emphasis>space</emphasis>.
|
||||
* Whitespace characters are always run together.
|
||||
* </para>
|
||||
* </listitem>
|
||||
@ -330,7 +371,7 @@ im_csv2vips( const char *filename, IMAGE *out )
|
||||
/* Read options.
|
||||
*/
|
||||
int start_skip = 0;
|
||||
char *whitespace = " \"";
|
||||
char *whitespace = " ";
|
||||
char *separator = ";,\t";
|
||||
int lines = -1;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user