libvips/libvips/iofuncs/sbuf.c

538 lines
12 KiB
C

/* Buffered input from a source.
*
* J.Cupitt, 18/11/19
*/
/*
This file is part of VIPS.
VIPS is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
*/
/*
These files are distributed with VIPS - http://www.vips.ecs.soton.ac.uk
*/
/*
#define VIPS_DEBUG
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif /*HAVE_CONFIG_H*/
#include <glib/gi18n-lib.h>
#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif /*HAVE_UNISTD_H*/
#include <string.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <vips/vips.h>
#include <vips/internal.h>
#include <vips/debug.h>
/**
* SECTION: sbuf
* @short_description: buffered read from a source
* @stability: Stable
* @see_also: <link linkend="libvips-foreign">foreign</link>
* @include: vips/vips.h
* @title: VipsSbuf
*
* #VipsSbuf wraps up a #VipsSource and provides a set of calls for
* text-oriented buffered reading. You can fetch lines of text, skip
* whitespace, and so on.
*
* It is useful for implementing things like CSV readers, for example.
*/
G_DEFINE_TYPE( VipsSbuf, vips_sbuf, VIPS_TYPE_OBJECT );
static void
vips_sbuf_class_init( VipsSbufClass *class )
{
VipsObjectClass *object_class = VIPS_OBJECT_CLASS( class );
GObjectClass *gobject_class = G_OBJECT_CLASS( class );
gobject_class->set_property = vips_object_set_property;
gobject_class->get_property = vips_object_get_property;
object_class->nickname = "sbuf";
object_class->description = _( "buffered source" );
VIPS_ARG_OBJECT( class, "input", 1,
_( "Input" ),
_( "Source to load from" ),
VIPS_ARGUMENT_REQUIRED_INPUT,
G_STRUCT_OFFSET( VipsSbuf, source ),
VIPS_TYPE_SOURCE );
}
static void
vips_sbuf_init( VipsSbuf *sbuf )
{
sbuf->read_point = 0;
sbuf->chars_in_buffer = 0;
sbuf->input_buffer[0] = '\0';
}
/**
* vips_sbuf_new_from_source:
* @source: source to operate on
*
* Create a VipsSbuf wrapping a source.
*
* Returns: a new #VipsSbuf
*/
VipsSbuf *
vips_sbuf_new_from_source( VipsSource *source )
{
VipsSbuf *sbuf;
g_assert( source );
sbuf = VIPS_SBUF( g_object_new( VIPS_TYPE_SBUF,
"input", source,
NULL ) );
if( vips_object_build( VIPS_OBJECT( sbuf ) ) ) {
VIPS_UNREF( sbuf );
return( NULL );
}
return( sbuf );
}
/**
* vips_sbuf_unbuffer:
* @sbuf: source to operate on
*
* Discard the input buffer and reset the read point. You must call this
* before using read or seek on the underlying #VipsSource class.
*/
void
vips_sbuf_unbuffer( VipsSbuf *sbuf )
{
/* We'd read ahead a little way -- seek backwards by that amount.
*/
vips_source_seek( sbuf->source,
sbuf->read_point - sbuf->chars_in_buffer, SEEK_CUR );
sbuf->read_point = 0;
sbuf->chars_in_buffer = 0;
}
/* Returns -1 on error, 0 on EOF, otherwise bytes read.
*/
static gint64
vips_sbuf_refill( VipsSbuf *sbuf )
{
gint64 bytes_read;
VIPS_DEBUG_MSG( "vips_sbuf_refill:\n" );
/* We should not discard any unread bytes.
*/
g_assert( sbuf->read_point == sbuf->chars_in_buffer );
bytes_read = vips_source_read( sbuf->source,
sbuf->input_buffer, VIPS_SBUF_BUFFER_SIZE );
if( bytes_read == -1 )
return( -1 );
sbuf->read_point = 0;
sbuf->chars_in_buffer = bytes_read;
/* Always add a null byte so we can use strchr() etc. on lines. This is
* safe because input_buffer is VIPS_SBUF_BUFFER_SIZE + 1 bytes.
*/
sbuf->input_buffer[bytes_read] = '\0';
return( bytes_read );
}
/**
* vips_sbuf_getc:
* @sbuf: source to operate on
*
* Fetch the next character from the source.
*
* If you can, use the macro VIPS_SBUF_GETC() instead for speed.
*
* Returns: the next char from @sbuf, -1 on read error or EOF.
*/
int
vips_sbuf_getc( VipsSbuf *sbuf )
{
if( sbuf->read_point == sbuf->chars_in_buffer &&
vips_sbuf_refill( sbuf ) <= 0 )
return( -1 );
g_assert( sbuf->read_point < sbuf->chars_in_buffer );
return( sbuf->input_buffer[sbuf->read_point++] );
}
/**
* VIPS_SBUF_GETC:
* @sbuf: source to operate on
*
* Fetch the next character from the source.
*
* Returns: the next char from @sbuf, -1 on read error or EOF.
*/
/**
* vips_sbuf_ungetc:
* @sbuf: source to operate on
*
* The opposite of vips_sbuf_getc(): undo the previous getc.
*
* unget more than one character is undefined. Unget at the start of the file
* does nothing.
*
* If you can, use the macro VIPS_SBUF_UNGETC() instead for speed.
*/
void
vips_sbuf_ungetc( VipsSbuf *sbuf )
{
if( sbuf->read_point > 0 )
sbuf->read_point -= 1;
}
/**
* VIPS_SBUF_UNGETC:
* @sbuf: source to operate on
*
* The opposite of vips_sbuf_getc(): undo the previous getc.
*
* unget more than one character is undefined. Unget at the start of the file
* does nothing.
*/
/**
* vips_sbuf_require:
* @sbuf: source to operate on
* @require: make sure we have at least this many chars available
*
* Make sure there are at least @require bytes of readahead available.
*
* Returns: 0 on success, -1 on error or EOF.
*/
int
vips_sbuf_require( VipsSbuf *sbuf, int require )
{
g_assert( require < VIPS_SBUF_BUFFER_SIZE );
g_assert( sbuf->chars_in_buffer >= 0 );
g_assert( sbuf->chars_in_buffer <= VIPS_SBUF_BUFFER_SIZE );
g_assert( sbuf->read_point >= 0 );
g_assert( sbuf->read_point <= sbuf->chars_in_buffer );
VIPS_DEBUG_MSG( "vips_sbuf_require: %d\n", require );
if( sbuf->read_point + require > sbuf->chars_in_buffer ) {
/* Areas can overlap, so we must memmove().
*/
memmove( sbuf->input_buffer,
sbuf->input_buffer + sbuf->read_point,
sbuf->chars_in_buffer - sbuf->read_point );
sbuf->chars_in_buffer -= sbuf->read_point;
sbuf->read_point = 0;
while( require > sbuf->chars_in_buffer ) {
unsigned char *to = sbuf->input_buffer +
sbuf->chars_in_buffer;
int space_available =
VIPS_SBUF_BUFFER_SIZE -
sbuf->chars_in_buffer;
gint64 bytes_read;
if( (bytes_read = vips_source_read( sbuf->source,
to, space_available )) < 0 )
return( -1 );
if( bytes_read == 0 ) {
vips_error(
vips_connection_nick( VIPS_CONNECTION(
sbuf->source ) ),
"%s", _( "end of file" ) );
return( -1 );
}
to[bytes_read] = '\0';
sbuf->chars_in_buffer += bytes_read;
}
}
return( 0 );
}
/**
* VIPS_SBUF_REQUIRE:
* @sbuf: source to operate on
* @require: need this many characters
*
* Make sure at least @require characters are available for
* VIPS_SBUF_PEEK() and VIPS_SBUF_FETCH().
*
* Returns: 0 on success, -1 on read error or EOF.
*/
/**
* VIPS_SBUF_PEEK:
* @sbuf: source to operate on
*
* After a successful VIPS_SBUF_REQUIRE(), you can index this to get
* require characters of input.
*
* Returns: a pointer to the next requre characters of input.
*/
/**
* VIPS_SBUF_FETCH:
* @sbuf: source to operate on
*
* After a successful VIPS_SBUF_REQUIRE(), you can use this require times
* to fetch characters of input.
*
* Returns: the next input character.
*/
/**
* vips_sbuf_get_line:
* @sbuf: source to operate on
*
* Fetch the next line of text from @sbuf and return it. The end of
* line character (or characters, for DOS files) are removed, and the string
* is terminated with a null (`\0` character).
*
* Returns NULL on end of file or read error.
*
* If the line is longer than some arbitrary (but large) limit, it is
* truncated. If you need to be able to read very long lines, use the
* slower vips_sbuf_get_line_copy().
*
* The return value is owned by @sbuf and must not be freed. It
* is valid until the next get call to @sbuf.
*
* Returns: the next line of text, or NULL on EOF or read error.
*/
const char *
vips_sbuf_get_line( VipsSbuf *sbuf )
{
int write_point;
int space_remaining;
int ch;
VIPS_DEBUG_MSG( "vips_sbuf_get_line:\n" );
write_point = 0;
space_remaining = VIPS_SBUF_BUFFER_SIZE;
while( (ch = VIPS_SBUF_GETC( sbuf )) != -1 &&
ch != '\n' &&
space_remaining > 0 ) {
sbuf->line[write_point] = ch;
write_point += 1;
space_remaining -= 1;
}
sbuf->line[write_point] = '\0';
/* If we hit EOF immediately, return EOF.
*/
if( ch == -1 &&
write_point == 0 )
return( NULL );
/* If the final char in the buffer is \r, this is probably a DOS file
* and we should remove that too.
*
* There's a chance this could incorrectly remove \r in very long
* lines, but ignore this.
*/
if( write_point > 0 &&
sbuf->line[write_point - 1] == '\r' )
sbuf->line[write_point - 1] = '\0';
/* If we filled the output line without seeing \n, keep going to the
* next \n.
*/
if( ch != '\n' &&
space_remaining == 0 ) {
while( (ch = VIPS_SBUF_GETC( sbuf )) != -1 &&
ch != '\n' )
;
}
VIPS_DEBUG_MSG( " %s\n", sbuf->line );
return( (const char *) sbuf->line );
}
/**
* vips_sbuf_get_line_copy:
* @sbuf: source to operate on
*
* Fetch the next line of text from @sbuf and return it. The end of
* line character (or characters, for DOS files) are removed, and the string
* is terminated with a null (`\0` character).
*
* The return result must be freed with g_free().
*
* This is slower than vips_sbuf_get_line(), but can work with lines of
* any length.
*
* Returns: the next line of text, or NULL on EOF or read error.
*/
char *
vips_sbuf_get_line_copy( VipsSbuf *sbuf )
{
static const unsigned char null = '\0';
VIPS_DEBUG_MSG( "vips_sbuf_get_line_copy:\n" );
GByteArray *buffer;
int ch;
char *result;
buffer = g_byte_array_new();
while( (ch = VIPS_SBUF_GETC( sbuf )) != -1 &&
ch != '\n' ) {
unsigned char c = ch;
g_byte_array_append( buffer, &c, 1 );
}
/* Immediate EOF.
*/
if( ch == -1 &&
buffer->len == 0 ) {
VIPS_FREEF( g_byte_array_unref, buffer );
return( NULL );
}
/* If the character before the \n was \r, this is probably a DOS file
* and we should remove the \r.
*/
if( ch == '\n' &&
buffer->len > 0 &&
buffer->data[buffer->len - 1] == '\r' )
g_byte_array_set_size( buffer, buffer->len - 1 );
g_byte_array_append( buffer, &null, 1 );
result = (char *) g_byte_array_free( buffer, FALSE );
VIPS_DEBUG_MSG( " %s\n", result );
return( result );
}
/**
* vips_sbuf_get_non_whitespace:
* @sbuf: source to operate on
*
* Fetch the next chunk of non-whitespace text from the source, and
* null-terminate it.
*
* After this, the next getc will be the first char of the next block of
* whitespace (or EOF).
*
* If the first getc is whitespace, stop instantly and return the empty
* string.
*
* If the item is longer than some arbitrary (but large) limit, it is
* truncated.
*
* The return value is owned by @sbuf and must not be freed. It
* is valid until the next get call to @sbuf.
*
* Returns: the next block of non-whitespace, or NULL on EOF or read error.
*/
const char *
vips_sbuf_get_non_whitespace( VipsSbuf *sbuf )
{
int ch;
int i;
for( i = 0; i < VIPS_SBUF_BUFFER_SIZE &&
!isspace( ch = VIPS_SBUF_GETC( sbuf ) ) &&
ch != EOF; i++ )
sbuf->line[i] = ch;
sbuf->line[i] = '\0';
/* If we stopped before seeing any whitespace, skip to the end of the
* block of non-whitespace.
*/
if( !isspace( ch ) )
while( !isspace( ch = VIPS_SBUF_GETC( sbuf ) ) &&
ch != EOF )
;
/* If we finally stopped on whitespace, step back one so the next get
* will be whitespace (or EOF).
*/
if( isspace( ch ) )
VIPS_SBUF_UNGETC( sbuf );
return( (const char *) sbuf->line );
}
/**
* vips_sbuf_skip_whitespace:
* @sbuf: source to operate on
*
* After this, the next getc will be the first char of the next block of
* non-whitespace (or EOF).
*
* Also skip comments, ie. from any '#' character to the end of the line.
*
* Returns: 0 on success, or -1 on EOF.
*/
int
vips_sbuf_skip_whitespace( VipsSbuf *sbuf )
{
int ch;
do {
ch = VIPS_SBUF_GETC( sbuf );
/* # skip comments too.
*/
if( ch == '#' ) {
/* Probably EOF.
*/
if( !vips_sbuf_get_line( sbuf ) )
return( -1 );
ch = VIPS_SBUF_GETC( sbuf );
}
} while( isspace( ch ) );
VIPS_SBUF_UNGETC( sbuf );
return( 0 );
}