335 lines
7.0 KiB
C
335 lines
7.0 KiB
C
/* Read a csv file.
|
|
*
|
|
* 19/12/05 JC
|
|
* - hacked from ppm reader
|
|
* 11/9/06
|
|
* - now distingushes whitespace and separators, so we can have blank
|
|
* fields
|
|
* 20/9/06
|
|
* - oop, unquoted trailing columns could get missed
|
|
* 17/5/07
|
|
* - added im_csv2vips_header()
|
|
*/
|
|
|
|
/*
|
|
|
|
This file is part of VIPS.
|
|
|
|
VIPS is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU Lesser General Public License as published by
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU Lesser General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
|
|
*/
|
|
|
|
/*
|
|
|
|
These files are distributed with VIPS - http://www.vips.ecs.soton.ac.uk
|
|
|
|
*/
|
|
|
|
#ifdef HAVE_CONFIG_H
|
|
#include <config.h>
|
|
#endif /*HAVE_CONFIG_H*/
|
|
#include <vips/intl.h>
|
|
|
|
#include <ctype.h>
|
|
#include <stdio.h>
|
|
#include <assert.h>
|
|
#include <errno.h>
|
|
#include <string.h>
|
|
#include <stdlib.h>
|
|
|
|
#include <vips/vips.h>
|
|
|
|
#ifdef WITH_DMALLOC
|
|
#include <dmalloc.h>
|
|
#endif /*WITH_DMALLOC*/
|
|
|
|
static int
|
|
skip_line( FILE *fp )
|
|
{
|
|
int ch;
|
|
|
|
while( (ch = fgetc( fp )) != '\n' && ch != EOF )
|
|
;
|
|
|
|
return( ch );
|
|
}
|
|
|
|
static int
|
|
skip_white( FILE *fp, const char whitemap[256] )
|
|
{
|
|
int ch;
|
|
|
|
do {
|
|
ch = fgetc( fp );
|
|
} while (ch != EOF && ch != '\n' && whitemap[ch] );
|
|
|
|
ungetc( ch, fp );
|
|
|
|
return( ch );
|
|
}
|
|
|
|
static int
|
|
skip_to_sep( FILE *fp, const char sepmap[256] )
|
|
{
|
|
int ch;
|
|
|
|
do {
|
|
ch = fgetc( fp );
|
|
} while (ch != EOF && ch != '\n' && !sepmap[ch] );
|
|
|
|
ungetc( ch, fp );
|
|
|
|
return( ch );
|
|
}
|
|
|
|
/* Read a single item. Syntax is:
|
|
*
|
|
* item : whitespace* double? whitespace* [EOF|EOL|separator]
|
|
*
|
|
* Return the char that caused failure on fail (EOF or \n).
|
|
*/
|
|
static int
|
|
read_double( FILE *fp, const char whitemap[256], const char sepmap[256],
|
|
int lineno, int colno, double *out )
|
|
{
|
|
int ch;
|
|
|
|
/* The fscanf() may change this ... but all other cases need a zero.
|
|
*/
|
|
*out = 0;
|
|
|
|
ch = skip_white( fp, whitemap );
|
|
if( ch == EOF || ch == '\n' )
|
|
return( ch );
|
|
|
|
if( !sepmap[ch] && fscanf( fp, "%lf", out ) != 1 ) {
|
|
/* Only a warning, since (for example) exported spreadsheets
|
|
* will often have text or date fields.
|
|
*/
|
|
im_warn( "im_csv2vips",
|
|
_( "error parsing number, line %d, column %d" ),
|
|
lineno, colno );
|
|
|
|
/* Step over the bad data to the next separator.
|
|
*/
|
|
ch = skip_to_sep( fp, sepmap );
|
|
}
|
|
|
|
/* Don't need to check result, we have read a field successfully.
|
|
*/
|
|
ch = skip_white( fp, whitemap );
|
|
|
|
/* If it's a separator, we have to step over it.
|
|
*/
|
|
if( ch != EOF && sepmap[ch] )
|
|
(void) fgetc( fp );
|
|
|
|
return( 0 );
|
|
}
|
|
|
|
static int
|
|
read_csv( FILE *fp, IMAGE *out,
|
|
int start_skip,
|
|
const char *whitespace, const char *separator,
|
|
int lines )
|
|
{
|
|
int i;
|
|
char whitemap[256];
|
|
char sepmap[256];
|
|
const char *p;
|
|
fpos_t pos;
|
|
int columns;
|
|
int ch;
|
|
double d;
|
|
double *buf;
|
|
int y;
|
|
|
|
/* Make our char maps.
|
|
*/
|
|
for( i = 0; i < 256; i++ ) {
|
|
whitemap[i] = 0;
|
|
sepmap[i] = 0;
|
|
}
|
|
for( p = whitespace; *p; p++ )
|
|
whitemap[(int) *p] = 1;
|
|
for( p = separator; *p; p++ )
|
|
sepmap[(int) *p] = 1;
|
|
|
|
/* Skip first few lines.
|
|
*/
|
|
for( i = 0; i < start_skip; i++ )
|
|
if( skip_line( fp ) == EOF ) {
|
|
im_error( "im_csv2vips",
|
|
"%s", _( "end of file while skipping start" ) );
|
|
return( -1 );
|
|
}
|
|
|
|
/* Parse the first line to get number of columns. Only bother checking
|
|
* fgetpos() the first time we use it: assume it's working after this.
|
|
*/
|
|
if( fgetpos( fp, &pos ) ) {
|
|
im_error_system( errno, "im_csv2vips",
|
|
"%s", _( "unable to seek" ) );
|
|
return( -1 );
|
|
}
|
|
for( columns = 0; (ch = read_double( fp, whitemap, sepmap,
|
|
start_skip + 1, columns + 1, &d )) == 0; columns++ )
|
|
;
|
|
fsetpos( fp, &pos );
|
|
|
|
if( columns == 0 ) {
|
|
im_error( "im_csv2vips", "%s", _( "empty line" ) );
|
|
return( -1 );
|
|
}
|
|
if( ch == -2 )
|
|
/* Failed to parse a number.
|
|
*/
|
|
return( -1 );
|
|
|
|
/* If lines is -1, we have to parse the whole file to get the
|
|
* number of lines out.
|
|
*/
|
|
if( lines == -1 ) {
|
|
fgetpos( fp, &pos );
|
|
for( lines = 0; skip_line( fp ) != EOF; lines++ )
|
|
;
|
|
fsetpos( fp, &pos );
|
|
}
|
|
|
|
im_initdesc( out, columns, lines, 1,
|
|
IM_BBITS_DOUBLE, IM_BANDFMT_DOUBLE,
|
|
IM_CODING_NONE, IM_TYPE_B_W, 1.0, 1.0, 0, 0 );
|
|
|
|
if( im_outcheck( out ) || im_setupout( out ) ||
|
|
!(buf = IM_ARRAY( out, IM_IMAGE_N_ELEMENTS( out ), double )) )
|
|
return( -1 );
|
|
|
|
for( y = 0; y < lines; y++ ) {
|
|
int x;
|
|
|
|
for( x = 0; x < columns; x++ ) {
|
|
ch = read_double( fp, whitemap, sepmap,
|
|
y + start_skip + 1, x + 1, &d );
|
|
if( ch == EOF ) {
|
|
im_error( "im_csv2vips",
|
|
"%s", _( "unexpected end of file" ) );
|
|
return( -1 );
|
|
}
|
|
else if( ch == '\n' ) {
|
|
im_error( "im_csv2vips",
|
|
"%s", _( "unexpected end of line" ) );
|
|
return( -1 );
|
|
}
|
|
else if( ch )
|
|
/* Parse error.
|
|
*/
|
|
return( -1 );
|
|
|
|
buf[x] = d;
|
|
}
|
|
|
|
if( im_writeline( y, out, (PEL *) buf ) )
|
|
return( -1 );
|
|
|
|
/* Skip over the '\n' to the next line.
|
|
*/
|
|
skip_line( fp );
|
|
}
|
|
|
|
return( 0 );
|
|
}
|
|
|
|
int
|
|
im_csv2vips( const char *filename, IMAGE *out )
|
|
{
|
|
/* Read options.
|
|
*/
|
|
int start_skip = 0;
|
|
char *whitespace = " \"";
|
|
char *separator = ";,\t";
|
|
int lines = -1;
|
|
|
|
char name[FILENAME_MAX];
|
|
char mode[FILENAME_MAX];
|
|
char *p, *q, *r;
|
|
FILE *fp;
|
|
|
|
/* Parse mode string.
|
|
*/
|
|
im_filename_split( filename, name, mode );
|
|
p = &mode[0];
|
|
while( (q = im_getnextoption( &p )) ) {
|
|
if( im_isprefix( "ski", q ) && (r = im_getsuboption( q )) )
|
|
start_skip = atoi( r );
|
|
else if( im_isprefix( "whi", q ) && (r = im_getsuboption( q )) )
|
|
whitespace = r;
|
|
else if( im_isprefix( "sep", q ) && (r = im_getsuboption( q )) )
|
|
separator = r;
|
|
else if( im_isprefix( "lin", q ) && (r = im_getsuboption( q )) )
|
|
lines = atoi( r );
|
|
}
|
|
|
|
if( !(fp = fopen( name, "r" )) ) {
|
|
im_error( "im_csv2vips",
|
|
_( "unable to open \"%s\"" ), name );
|
|
return( -1 );
|
|
}
|
|
|
|
if( read_csv( fp, out, start_skip, whitespace, separator, lines ) ) {
|
|
fclose( fp );
|
|
return( -1 );
|
|
}
|
|
fclose( fp );
|
|
|
|
return( 0 );
|
|
}
|
|
|
|
/* We can't just read the header of a CSV. Instead, we read to a temp image,
|
|
* then copy just the header to the output.
|
|
*/
|
|
static int
|
|
csv2vips_header( const char *filename, IMAGE *out )
|
|
{
|
|
IMAGE *t;
|
|
|
|
if( !(t = im_open( "im_csv2vips_header", "p" )) )
|
|
return( -1 );
|
|
if( im_csv2vips( filename, t ) ||
|
|
im_cp_desc( out, t ) ) {
|
|
im_close( t );
|
|
return( -1 );
|
|
}
|
|
im_close( t );
|
|
|
|
return( 0 );
|
|
}
|
|
|
|
static const char *csv_suffs[] = { ".csv", NULL };
|
|
|
|
void
|
|
im__csv_register( void )
|
|
{
|
|
im_format_register(
|
|
"csv", /* internal name */
|
|
_( "CSV" ), /* i18n'd visible name */
|
|
csv_suffs, /* Allowed suffixes */
|
|
NULL, /* is_a */
|
|
csv2vips_header, /* Load header only */
|
|
im_csv2vips, /* Load */
|
|
im_vips2csv, /* Save */
|
|
NULL /* Flags */
|
|
);
|
|
}
|