almost there

This commit is contained in:
John Cupitt 2016-06-30 14:59:20 +01:00
parent e45c5a587e
commit df81a6fe24
6 changed files with 129 additions and 74 deletions

View File

@ -27,7 +27,9 @@
- better quality for vips_resize() with linear/cubic kernels
- pyvips8 can create new metadata
- better upsizing with vips_resize()
- added vips_convf() ... im_conv_f() rewritten as a class
- added vips_convf(), vips_convi() ... im_conv*() functions rewritten as
classes
- new fixed-point vector path for convi is up to 2x faster
18/5/16 started 8.3.2
- more robust vips image reading

30
TODO
View File

@ -1,3 +1,33 @@
- test new clip stuff?
- test_hist_ismonotonic is failing
im = Vips.Image.identity()
self.assertTrue(im.hist_ismonotonic())
john@mm-jcupitt5 ~/Desktop/pics $ more mono.mat
2 1
-1 1
john@mm-jcupitt5 ~/Desktop/pics $ vips identity x.v
john@mm-jcupitt5 ~/Desktop/pics $ vips convi x.v x2.v mono.mat --vips-info
--vips-novector
info: convi: using C path
john@mm-jcupitt5 ~/Desktop/pics $ vips max x2.v
1.000000
john@mm-jcupitt5 ~/Desktop/pics $ vips convi x.v x2.v mono.mat --vips-info
info: convi: using vector path
john@mm-jcupitt5 ~/Desktop/pics $ vips max x2.v
0.000000
should be 1.0 (0 for first pixel, 1 thereafter), instead it's 0 everywhere
- test_countlines is failing
im = Vips.Image.black(100, 100)
im = im.draw_line(255, 0, 50, 100, 50)
n_lines = im.countlines(Vips.Direction.HORIZONTAL)
self.assertEqual(n_lines, 1)
- add more webp tests to py suite
- try moving some more of the CLI tests to py

View File

@ -66,12 +66,9 @@ vips_conv_build( VipsObject *object )
VipsObjectClass *class = VIPS_OBJECT_GET_CLASS( object );
VipsConvolution *convolution = (VipsConvolution *) object;
VipsConv *conv = (VipsConv *) object;
VipsImage **t = (VipsImage **)
vips_object_local_array( object, 4 );
VipsImage **t = (VipsImage **) vips_object_local_array( object, 4 );
VipsImage *in;
INTMASK *imsk;
DOUBLEMASK *dmsk;
if( VIPS_OBJECT_CLASS( vips_conv_parent_class )->build( object ) )
return( -1 );
@ -85,13 +82,6 @@ vips_conv_build( VipsObject *object )
vips_matrixprint( convolution->M, NULL );
*/
if( !(imsk = im_vips2imask( convolution->M, class->nickname )) ||
!im_local_imask( convolution->out, imsk ) )
return( -1 );
if( !(dmsk = im_vips2mask( convolution->M, class->nickname )) ||
!im_local_dmask( convolution->out, dmsk ) )
return( -1 );
/* Unpack for processing.
*/
if( vips_image_decode( in, &t[0] ) )
@ -106,14 +96,23 @@ vips_conv_build( VipsObject *object )
break;
case VIPS_PRECISION_INTEGER:
if( im_conv( in, convolution->out, imsk ) )
if( vips_convi( in, &t[1], convolution->M, NULL ) ||
vips_image_write( t[1], convolution->out ) )
return( -1 );
break;
case VIPS_PRECISION_APPROXIMATE:
{
DOUBLEMASK *dmsk;
if( !(dmsk = im_vips2mask( convolution->M, class->nickname )) ||
!im_local_dmask( convolution->out, dmsk ) )
return( -1 );
if( im_aconv( in, convolution->out, dmsk,
conv->layers, conv->cluster ) )
return( -1 );
}
break;
default:

View File

@ -70,7 +70,8 @@
* 26/1/16 Lovell Fuller
* - remove Duff for a 25% speedup
* 23/6/16
* - redone as a class
* - rewritten as a class
* - new fixed-point vector path, up to 2x faster
*/
/*
@ -101,9 +102,9 @@
*/
/*
*/
#define DEBUG_PIXELS
#define DEBUG
*/
#define DEBUG_COMPILE
#ifdef HAVE_CONFIG_H
@ -137,7 +138,6 @@ typedef struct {
int last; /* The index of the last mask coff we use */
int r; /* Set previous result in this var */
int d2; /* Write new temp result here */
/* The code we generate for this section of the mask.
*/
@ -166,6 +166,11 @@ typedef struct {
*/
int n_pass;
Pass pass[MAX_PASS];
/* Code for the final clip back to 8 bits.
*/
int r;
VipsVector *vector;
} VipsConvi;
typedef VipsConvolutionClass VipsConviClass;
@ -265,6 +270,7 @@ vips_convi_compile_free( VipsConvi *convi )
for( i = 0; i < convi->n_pass; i++ )
VIPS_FREEF( vips_vector_free, convi->pass[i].vector );
convi->n_pass = 0;
VIPS_FREEF( vips_vector_free, convi->vector );
}
#define TEMP( N, S ) vips_vector_temporary( v, (char *) N, S )
@ -286,7 +292,6 @@ vips_convi_compile_section( VipsConvi *convi, VipsImage *in, Pass *pass )
{
VipsConvolution *convolution = (VipsConvolution *) convi;
VipsImage *M = convolution->M;
int offset = VIPS_RINT( vips_image_get_offset( M ) );
VipsVector *v;
int i;
@ -295,12 +300,7 @@ vips_convi_compile_section( VipsConvi *convi, VipsImage *in, Pass *pass )
printf( "starting pass %d\n", pass->first );
#endif /*DEBUG_COMPILE*/
pass->vector = v = vips_vector_new( "convi", 1 );
/* We have two destinations: the final output image (8-bit) and the
* intermediate buffer if this is not the final pass (16-bit).
*/
pass->d2 = vips_vector_destination( v, "d2", 2 );
pass->vector = v = vips_vector_new( "convi", 2 );
/* "r" is the array of sums from the previous pass (if any).
*/
@ -358,11 +358,13 @@ vips_convi_compile_section( VipsConvi *convi, VipsImage *in, Pass *pass )
* become a signed 16-bit value. We know only the bottom 8 bits
* of the image and coefficient are interesting, so we can take
* the bottom half of a 16x16->32 multiply.
*
* We accumulate the signed 16-bit result in sum.
*/
CONST( coeff, convi->fixed[i], 2 );
ASM3( "mullw", "value", "value", coeff );
/* We accumulate the signed 16-bit result in sum. Saturated
* add.
*/
ASM3( "addssw", "sum", "sum", "value" );
if( vips_vector_full( v ) )
@ -371,37 +373,9 @@ vips_convi_compile_section( VipsConvi *convi, VipsImage *in, Pass *pass )
pass->last = i;
/* If this is the end of the mask, we write the 8-bit result to the
* image, otherwise write the 16-bit intermediate to our temp buffer.
/* And write to our intermediate buffer.
*/
if( pass->last >= convi->n_point - 1 ) {
char c32[256];
char c6[256];
char c0[256];
char c255[256];
char off[256];
CONST( c32, 32, 2 );
ASM3( "addw", "sum", "sum", c32 );
CONST( c6, 6, 2 );
ASM3( "shrsw", "sum", "sum", c6 );
CONST( off, offset, 2 );
ASM3( "subw", "sum", "sum", off );
/* You'd think "convsuswb", convert signed 16-bit to unsigned
* 8-bit with saturation, would be quicker, but it's a lot
* slower.
*/
CONST( c0, 0, 2 );
ASM3( "maxsw", "sum", c0, "sum" );
CONST( c255, 255, 2 );
ASM3( "minsw", "sum", c255, "sum" );
ASM2( "convwb", "d1", "sum" );
}
else
ASM2( "copyw", "d2", "sum" );
ASM2( "copyw", "d1", "sum" );
if( !vips_vector_compile( v ) )
return( -1 );
@ -414,6 +388,59 @@ vips_convi_compile_section( VipsConvi *convi, VipsImage *in, Pass *pass )
return( 0 );
}
/* Generate code for the final 16->8 conversion.
*
* 0 for success, -1 on error.
*/
static int
vips_convi_compile_clip( VipsConvi *convi )
{
VipsConvolution *convolution = (VipsConvolution *) convi;
VipsImage *M = convolution->M;
int offset = VIPS_RINT( vips_image_get_offset( M ) );
VipsVector *v;
char c32[256];
char c6[256];
char c0[256];
char c255[256];
char off[256];
convi->vector = v = vips_vector_new( "convi", 1 );
/* "r" is the array of sums we clip down.
*/
convi->r = vips_vector_source_name( v, "r", 2 );
/* The value we fetch from the image.
*/
TEMP( "value", 2 );
CONST( c32, 32, 2 );
ASM3( "addw", "value", "r", c32 );
CONST( c6, 6, 2 );
ASM3( "shrsw", "value", "value", c6 );
CONST( off, offset, 2 );
ASM3( "subw", "value", "value", off );
/* You'd think "convsuswb" (convert signed 16-bit to unsigned
* 8-bit with saturation) would be quicker, but it's a lot
* slower.
*/
CONST( c0, 0, 2 );
ASM3( "maxsw", "value", c0, "value" );
CONST( c255, 255, 2 );
ASM3( "minsw", "value", c255, "value" );
ASM2( "convwb", "d1", "value" );
if( !vips_vector_compile( v ) )
return( -1 );
return( 0 );
}
static int
vips_convi_compile( VipsConvi *convi, VipsImage *in )
{
@ -432,7 +459,6 @@ vips_convi_compile( VipsConvi *convi, VipsImage *in )
pass->first = i;
pass->r = -1;
pass->d2 = -1;
if( vips_convi_compile_section( convi, in, pass ) )
return( -1 );
@ -442,6 +468,9 @@ vips_convi_compile( VipsConvi *convi, VipsImage *in )
break;
}
if( vips_convi_compile_clip( convi ) )
return( -1 );
return( 0 );
}
@ -461,6 +490,7 @@ vips_convi_generate_vector( VipsRegion *or,
VipsRect s;
int i, y;
VipsExecutor executor[MAX_PASS];
VipsExecutor clip;
#ifdef DEBUG_PIXELS
printf( "vips_convi_generate_vector: generating %d x %d at %d x %d\n",
@ -479,6 +509,7 @@ vips_convi_generate_vector( VipsRegion *or,
for( i = 0; i < convi->n_pass; i++ )
vips_executor_set_program( &executor[i],
convi->pass[i].vector, ne );
vips_executor_set_program( &clip, convi->vector, ne );
VIPS_GATE_START( "vips_convi_generate_vector: work" );
@ -494,13 +525,15 @@ vips_convi_generate_vector( VipsRegion *or,
ir, r->left, r->top + y );
vips_executor_set_array( &executor[i],
pass->r, seq->t1 );
vips_executor_set_array( &executor[i],
pass->d2, seq->t2 );
vips_executor_set_destination( &executor[i], q );
vips_executor_set_destination( &executor[i], seq->t2 );
vips_executor_run( &executor[i] );
VIPS_SWAP( signed short *, seq->t1, seq->t2 );
}
vips_executor_set_array( &clip, convi->r, seq->t1 );
vips_executor_set_destination( &clip, q );
vips_executor_run( &clip );
}
VIPS_GATE_STOP( "vips_convi_generate_vector: work" );

View File

@ -1057,24 +1057,15 @@ im_conv_raw( IMAGE *in, IMAGE *out, INTMASK *mask )
if( conv->n_pass ) {
generate = convvec_gen;
#ifdef DEBUG
printf( "im_conv_raw: using vector path\n" );
#endif /*DEBUG*/
vips_info( "im_conv_raw", "using vec path" );
}
else if( mask->xsize == 3 && mask->ysize == 3 ) {
generate = conv3x3_gen;
#ifdef DEBUG
printf( "im_conv_raw: using 3x3 path\n" );
#endif /*DEBUG*/
vips_info( "im_conv_raw", "using 3x3 path" );
}
else {
generate = conv_gen;
#ifdef DEBUG
printf( "im_conv_raw: using general path\n" );
#endif /*DEBUG*/
vips_info( "im_conv_raw", "using C path" );
}
if( im_demand_hint( out, IM_SMALLTILE, in, NULL ) ||

View File

@ -31,8 +31,8 @@
*/
#ifndef IM_CONVOLUTION_H
#define IM_CONVOLUTION_H
#ifndef VIPS_CONVOLUTION_H
#define VIPS_CONVOLUTION_H
#ifdef __cplusplus
extern "C" {
@ -69,4 +69,4 @@ int vips_fastcor( VipsImage *in, VipsImage *ref, VipsImage **out, ... )
}
#endif /*__cplusplus*/
#endif /*IM_CONVOLUTION_H*/
#endif /*VIPS_CONVOLUTION_H*/