csv read knows about quoted strings
you can "enclose strings in \", and" it shouldn't get confused
This commit is contained in:
parent
ae8faf6597
commit
2003b5b524
@ -15,6 +15,7 @@
|
|||||||
- vips_tracked_malloc() tracks allocation size and can report total mem usage
|
- vips_tracked_malloc() tracks allocation size and can report total mem usage
|
||||||
- cache limits, drop, init, flush plus command-line controls
|
- cache limits, drop, init, flush plus command-line controls
|
||||||
- remove dmalloc support, was never used and valgrind is better
|
- remove dmalloc support, was never used and valgrind is better
|
||||||
|
- im_csv2vips() allows quoted strings, including escaped quotes
|
||||||
|
|
||||||
10/8/11 started 7.26.3
|
10/8/11 started 7.26.3
|
||||||
- don't use G_VALUE_COLLECT_INIT(), many platforms do not have a glib this
|
- don't use G_VALUE_COLLECT_INIT(), many platforms do not have a glib this
|
||||||
|
9
TODO
9
TODO
@ -5,16 +5,11 @@
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
- im_csv2vips() gets confused by quotes and commas, eg.
|
|
||||||
|
|
||||||
NP_001121179.1,"serine proteinase inhibitor, clade A, member ",ITPNLAEFAFSLYR,0.95588235294118,0.96176470588235, ...
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
- add vips_init_argv() which processes argc/argv for you? handy for tiny
|
- add vips_init_argv() which processes argc/argv for you? handy for tiny
|
||||||
progs, perhaps
|
progs, perhaps
|
||||||
|
|
||||||
|
- add vips_shutdown()? unload plugins, drop cache etc.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -13,6 +13,8 @@
|
|||||||
* - gtkdoc
|
* - gtkdoc
|
||||||
* 1/3/10
|
* 1/3/10
|
||||||
* - allow lines that end with EOF rather than \n
|
* - allow lines that end with EOF rather than \n
|
||||||
|
* 23/9/11
|
||||||
|
* - allow quoted strings, including escaped quotes
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -93,6 +95,29 @@ skip_white( FILE *fp, const char whitemap[256] )
|
|||||||
return( ch );
|
return( ch );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
skip_to_quote( FILE *fp )
|
||||||
|
{
|
||||||
|
int ch;
|
||||||
|
|
||||||
|
do {
|
||||||
|
ch = fgetc( fp );
|
||||||
|
|
||||||
|
/* We let people escape " in strings.
|
||||||
|
*/
|
||||||
|
if( ch == '\\' ) {
|
||||||
|
ch = fgetc( fp );
|
||||||
|
|
||||||
|
if( ch != EOF && ch != '\n' )
|
||||||
|
ch = fgetc( fp );
|
||||||
|
}
|
||||||
|
} while (ch != EOF && ch != '\n' && ch != '"' );
|
||||||
|
|
||||||
|
ungetc( ch, fp );
|
||||||
|
|
||||||
|
return( ch );
|
||||||
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
skip_to_sep( FILE *fp, const char sepmap[256] )
|
skip_to_sep( FILE *fp, const char sepmap[256] )
|
||||||
{
|
{
|
||||||
@ -109,7 +134,15 @@ skip_to_sep( FILE *fp, const char sepmap[256] )
|
|||||||
|
|
||||||
/* Read a single item. Syntax is:
|
/* Read a single item. Syntax is:
|
||||||
*
|
*
|
||||||
* item : whitespace* double? whitespace* [EOF|EOL|separator]
|
* element :
|
||||||
|
* whitespace* item whitespace* [EOF|EOL|separator]
|
||||||
|
*
|
||||||
|
* item :
|
||||||
|
* double |
|
||||||
|
* "anything" |
|
||||||
|
* empty
|
||||||
|
*
|
||||||
|
* the anything in quotes can contain " escaped with \
|
||||||
*
|
*
|
||||||
* Return the char that caused failure on fail (EOF or \n).
|
* Return the char that caused failure on fail (EOF or \n).
|
||||||
*/
|
*/
|
||||||
@ -127,7 +160,12 @@ read_double( FILE *fp, const char whitemap[256], const char sepmap[256],
|
|||||||
if( ch == EOF || ch == '\n' )
|
if( ch == EOF || ch == '\n' )
|
||||||
return( ch );
|
return( ch );
|
||||||
|
|
||||||
if( !sepmap[ch] && fscanf( fp, "%lf", out ) != 1 ) {
|
if( ch == '"' ) {
|
||||||
|
(void) fgetc( fp );
|
||||||
|
ch = skip_to_quote( fp );
|
||||||
|
ch = fgetc( fp );
|
||||||
|
}
|
||||||
|
else if( !sepmap[ch] && fscanf( fp, "%lf", out ) != 1 ) {
|
||||||
/* Only a warning, since (for example) exported spreadsheets
|
/* Only a warning, since (for example) exported spreadsheets
|
||||||
* will often have text or date fields.
|
* will often have text or date fields.
|
||||||
*/
|
*/
|
||||||
@ -273,6 +311,10 @@ read_csv( FILE *fp, IMAGE *out,
|
|||||||
*
|
*
|
||||||
* Load a CSV (comma-separated values) file. The output image is always 1
|
* Load a CSV (comma-separated values) file. The output image is always 1
|
||||||
* band (monochrome), %IM_BANDFMT_DOUBLE.
|
* band (monochrome), %IM_BANDFMT_DOUBLE.
|
||||||
|
*
|
||||||
|
* Items in lines can be either floats, or strings enclosed in double-quotes.
|
||||||
|
* You can use a backslash (\) within the quotes to escape special characters.
|
||||||
|
*
|
||||||
* The reader is deliberately rather fussy: it will fail if there are any
|
* The reader is deliberately rather fussy: it will fail if there are any
|
||||||
* short lines, or if the file is too short. It will ignore lines that are
|
* short lines, or if the file is too short. It will ignore lines that are
|
||||||
* too long.
|
* too long.
|
||||||
@ -297,8 +339,7 @@ read_csv( FILE *fp, IMAGE *out,
|
|||||||
* <listitem>
|
* <listitem>
|
||||||
* <para>
|
* <para>
|
||||||
* <emphasis>whi:whitespace-characters</emphasis>
|
* <emphasis>whi:whitespace-characters</emphasis>
|
||||||
* The skippable whitespace characters. Default <emphasis>space</emphasis> and
|
* The skippable whitespace characters. Default <emphasis>space</emphasis>.
|
||||||
* double quotes (").
|
|
||||||
* Whitespace characters are always run together.
|
* Whitespace characters are always run together.
|
||||||
* </para>
|
* </para>
|
||||||
* </listitem>
|
* </listitem>
|
||||||
@ -330,7 +371,7 @@ im_csv2vips( const char *filename, IMAGE *out )
|
|||||||
/* Read options.
|
/* Read options.
|
||||||
*/
|
*/
|
||||||
int start_skip = 0;
|
int start_skip = 0;
|
||||||
char *whitespace = " \"";
|
char *whitespace = " ";
|
||||||
char *separator = ";,\t";
|
char *separator = ";,\t";
|
||||||
int lines = -1;
|
int lines = -1;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user