diff --git a/libvips/arithmetic/im_add.c b/libvips/arithmetic/im_add.c index facfc7a5..773886ea 100644 --- a/libvips/arithmetic/im_add.c +++ b/libvips/arithmetic/im_add.c @@ -345,7 +345,6 @@ im__init_programs( VipsVector *vectors[IM_BANDFMT_LAST], int isize = im__sizeof_bandfmt[fmt]; int osize = im__sizeof_bandfmt[format_table[fmt]]; - char source[256]; VipsVector *v; /* float and double are not handled (well) by ORC. @@ -358,7 +357,7 @@ im__init_programs( VipsVector *vectors[IM_BANDFMT_LAST], v = vectors[fmt] = vips_vector_new_ds( "binary arith", osize, isize ); - vips_vector_source( v, source, 2, isize ); + vips_vector_source_name( v, "s2", isize ); vips_vector_temporary( v, "t1", osize ); vips_vector_temporary( v, "t2", osize ); diff --git a/libvips/convolution/im_conv.c b/libvips/convolution/im_conv.c index 86603bf5..bdfb6f00 100644 --- a/libvips/convolution/im_conv.c +++ b/libvips/convolution/im_conv.c @@ -205,7 +205,7 @@ conv_evalend( Conv *conv ) } #define TEMP( N, S ) vips_vector_temporary( v, N, S ) -#define SRC( N, P, S ) vips_vector_source( v, N, P, S ) +#define SCANLINE( N, P, S ) vips_vector_source_scanline( v, N, P, S ) #define CONST( N, V, S ) vips_vector_constant( v, N, V, S ) #define ASM2( OP, A, B ) vips_vector_asm2( v, OP, A, B ) #define ASM3( OP, A, B, C ) vips_vector_asm3( v, OP, A, B, C ) @@ -278,9 +278,9 @@ conv_compile_convolution_u8s16( Conv *conv ) */ continue; - /* The source. s1 is the first scanline in the mask. + /* The source. sl0 is the first scanline in the mask. */ - SRC( source, y + 1, 1 ); + SCANLINE( source, y, 1 ); /* The offset, only for non-first-columns though. */ @@ -868,8 +868,8 @@ convvec_gen( REGION *or, void *vseq, void *a, void *b ) /* Link the combiner to the intermediate buffer. */ - vips_executor_set_array( &convolve, "d1", seq->sum ); - vips_executor_set_array( &clip, "s1", seq->sum ); + vips_executor_set_destination( &convolve, seq->sum ); + vips_executor_set_array( &clip, conv->clip->s[0], seq->sum ); for( y = 0; y < r->height; y++ ) { #ifdef DEBUG_PIXELS @@ -886,14 +886,15 @@ convvec_gen( REGION *or, void *vseq, void *a, void *b ) } #endif /*DEBUG_PIXELS*/ - vips_executor_set_source( &convolve, ir, r->left, r->top + y ); + vips_executor_set_scanline( &convolve, + ir, r->left, r->top + y ); vips_executor_run( &convolve ); #ifdef DEBUG_PIXELS printf( "before clip: %3d\n", *((signed short *) seq->sum) ); #endif /*DEBUG_PIXELS*/ - vips_executor_set_array( &clip, "d1", + vips_executor_set_destination( &clip, IM_REGION_ADDR( or, r->left, r->top + y ) ); vips_executor_run( &clip ); diff --git a/libvips/include/vips/vector.h b/libvips/include/vips/vector.h index 6a47cd82..aea17509 100644 --- a/libvips/include/vips/vector.h +++ b/libvips/include/vips/vector.h @@ -41,6 +41,8 @@ extern "C" { #endif /*__cplusplus*/ +#define VIPS_VECTOR_SOURCE_MAX (10) + /* An Orc program. */ typedef struct { @@ -51,17 +53,22 @@ typedef struct { /* How many resources we've used so far in this codegen. */ int n_temp; + int n_scanline; int n_source; int n_destination; int n_constant; int n_parameter; int n_instruction; - /* The sources this program needs. "s1"'s var is always in s[0], - * others may skip lines. + /* The scanline sources, and for each variable, the associated line. + * "sl0" onwards. */ - int s[10]; - int line[10]; + int sl[VIPS_VECTOR_SOURCE_MAX]; + int line[VIPS_VECTOR_SOURCE_MAX]; + + /* Non-scanline sources, "s1" etc. s[0] is the var for "s1". + */ + int s[VIPS_VECTOR_SOURCE_MAX]; /* The destination var. */ @@ -101,8 +108,9 @@ VipsVector *vips_vector_new_ds( const char *name, int size1, int size2 ); void vips_vector_constant( VipsVector *vector, char *name, int value, int size ); -int vips_vector_source_name( VipsVector *vector, char *name, int size ); -void vips_vector_source( VipsVector *vector, char *name, int number, int size ); +void vips_vector_source_name( VipsVector *vector, char *name, int size ); +void vips_vector_source_scanline( VipsVector *vector, + char *name, int line, int size ); void vips_vector_temporary( VipsVector *vector, char *name, int size ); void vips_vector_asm2( VipsVector *vector, const char *op, const char *a, const char *b ); @@ -116,7 +124,7 @@ void vips_vector_print( VipsVector *vector ); void vips_executor_set_program( VipsExecutor *executor, VipsVector *vector, int n ); -void vips_executor_set_source( VipsExecutor *executor, +void vips_executor_set_scanline( VipsExecutor *executor, REGION *ir, int x, int y ); void vips_executor_set_destination( VipsExecutor *executor, void *value ); void vips_executor_set_array( VipsExecutor *executor, int var, void *value ); diff --git a/libvips/iofuncs/vector.c b/libvips/iofuncs/vector.c index 5ca8085f..ab3298a1 100644 --- a/libvips/iofuncs/vector.c +++ b/libvips/iofuncs/vector.c @@ -108,30 +108,35 @@ VipsVector * vips_vector_new_ds( const char *name, int size1, int size2 ) { VipsVector *vector; + int i; if( !(vector = IM_NEW( NULL, VipsVector )) ) return( NULL ); vector->name = name; vector->n_temp = 0; + vector->n_scanline = 0; vector->n_source = 0; vector->n_destination = 0; vector->n_constant = 0; vector->n_parameter = 0; vector->n_instruction = 0; + + for( i = 0; i < VIPS_VECTOR_SOURCE_MAX; i++ ) { + vector->s[i] = -1; + vector->sl[i] = -1; + } + + vector->d1 = -1; + vector->compiled = FALSE; #ifdef HAVE_ORC vector->program = orc_program_new_ds( size1, size2 ); -{ - int var; /* We always make s1 / d1 */ - var = orc_program_find_var_by_name( vector->program, "s1" ); - vector->var[0] = var; - vector->line[0] = 0; + vector->s[0] = orc_program_find_var_by_name( vector->program, "s1" ); vector->d1 = orc_program_find_var_by_name( vector->program, "d1" ); -} #endif /*HAVE_ORC*/ vector->n_source += 1; vector->n_destination += 1; @@ -207,21 +212,26 @@ vips_vector_source_name( VipsVector *vector, char *name, int size ) #ifdef HAVE_ORC g_assert( orc_program_find_var_by_name( vector->program, name ) == -1 ); - var = orc_program_add_source( vector->program, size, name ); - vector->var[vector->n_source] = var; + vector->s[vector->n_source] = + orc_program_add_source( vector->program, size, name ); vector->n_source += 1; -#else /*!HAVE_ORC*/ +#endif /*HAVE_ORC*/ } void -vips_vector_source( VipsVector *vector, char *name, int line, int size ) +vips_vector_source_scanline( VipsVector *vector, + char *name, int line, int size ) { #ifdef HAVE_ORC - im_snprintf( name, 256, "s%d", line ); + im_snprintf( name, 256, "sl%d", line ); if( orc_program_find_var_by_name( vector->program, name ) == -1 ) { - vips_vector_source_name( vector, name, size ); - vector->line[n_source - 1] = line - 1; + int var; + + var = orc_program_add_source( vector->program, size, name ); + vector->sl[vector->n_scanline] = var; + vector->line[vector->n_scanline] = line; + vector->n_scanline += 1; } #endif /*HAVE_ORC*/ } @@ -230,6 +240,8 @@ void vips_vector_temporary( VipsVector *vector, char *name, int size ) { #ifdef HAVE_ORC + g_assert( orc_program_find_var_by_name( vector->program, name ) == -1 ); + orc_program_add_temporary( vector->program, size, name ); vector->n_temp += 1; #endif /*HAVE_ORC*/ @@ -244,7 +256,7 @@ vips_vector_full( VipsVector *vector ) */ if( vector->n_constant > 16 - 2 ) return( TRUE ); - if( vector->n_source > 8 - 1 ) + if( vector->n_source + vector->n_scanline > 8 - 1 ) return( TRUE ); if( vector->n_instruction > 50 ) return( TRUE ); @@ -283,10 +295,13 @@ vips_vector_print( VipsVector *vector ) printf( "successfully compiled\n" ); else printf( "not compiled successfully\n" ); + printf( " n_scanline = %d\n", vector->n_scanline ); + for( i = 0; i < vector->n_scanline; i++ ) + printf( " var %d = line %d\n", + vector->sl[i], vector->line[i] ); printf( " n_source = %d\n", vector->n_source ); for( i = 0; i < vector->n_source; i++ ) - printf( " var %d = line %d\n", - vector->var[i], vector->line[i] ); + printf( " var %d\n", vector->s[i] ); printf( " n_parameter = %d\n", vector->n_parameter ); printf( " n_destination = %d\n", vector->n_destination ); printf( " n_constant = %d\n", vector->n_constant ); @@ -306,7 +321,7 @@ vips_executor_set_program( VipsExecutor *executor, VipsVector *vector, int n ) } void -vips_executor_set_source( VipsExecutor *executor, REGION *ir, int x, int y ) +vips_executor_set_scanline( VipsExecutor *executor, REGION *ir, int x, int y ) { #ifdef HAVE_ORC VipsVector *vector = executor->vector; @@ -315,9 +330,9 @@ vips_executor_set_source( VipsExecutor *executor, REGION *ir, int x, int y ) int i; - for( i = 0; i < vector->n_source; i++ ) + for( i = 0; i < vector->n_scanline; i++ ) orc_executor_set_array( &executor->executor, - vector->var[i], base + vector->line[i] * lsk ); + vector->sl[i], base + vector->line[i] * lsk ); #endif /*HAVE_ORC*/ } @@ -335,9 +350,6 @@ void vips_executor_set_array( VipsExecutor *executor, int var, void *value ) { #ifdef HAVE_ORC - VipsVector *vector = executor->vector; - OrcProgram *program = vector->program; - if( var != -1 ) orc_executor_set_array( &executor->executor, var, value ); #endif /*HAVE_ORC*/ diff --git a/libvips/morphology/morphology.c b/libvips/morphology/morphology.c index e0619abe..4e32ef46 100644 --- a/libvips/morphology/morphology.c +++ b/libvips/morphology/morphology.c @@ -45,8 +45,8 @@ */ /* -#define DEBUG */ +#define DEBUG #ifdef HAVE_CONFIG_H #include @@ -85,10 +85,6 @@ typedef struct { /* The code we generate for this section of this mask. */ VipsVector *vector; - - /* The variable number for r, if we set it (or -1). - */ - int r; } Pass; /* Our parameters. @@ -125,7 +121,7 @@ morph_close( Morph *morph ) } #define TEMP( N, S ) vips_vector_temporary( v, N, S ) -#define SRC( N, P, S ) vips_vector_source( v, N, P, S ) +#define SCANLINE( N, P, S ) vips_vector_source_scanline( v, N, P, S ) #define CONST( N, V, S ) vips_vector_constant( v, N, V, S ) #define ASM2( OP, A, B ) vips_vector_asm2( v, OP, A, B ) #define ASM3( OP, A, B, C ) vips_vector_asm3( v, OP, A, B, C ) @@ -158,10 +154,7 @@ pass_compile_section( Morph *morph, int first, int *last ) pass = &morph->pass[morph->n_pass]; morph->n_pass += 1; pass->first = first; - pass->r = -1; - /* Start with a single source scanline, we add more as we need them. - */ pass->vector = v = vips_vector_new_ds( "morph", 1, 1 ); /* The value we fetch from the image, the accumulated sum. @@ -183,9 +176,9 @@ pass_compile_section( Morph *morph, int first, int *last ) ASM2( "copyb", "sum", one ); } else { - /* "r" is the result of the previous pass. + /* "r" is the result of the previous pass. var in s[1]. */ - pass->r = vips_vector_source_name( v, "r", 1 ); + vips_vector_source_name( v, "r", 1 ); ASM2( "loadb", "sum", "r" ); } @@ -198,9 +191,9 @@ pass_compile_section( Morph *morph, int first, int *last ) if( mask->coeff[i] == 128 ) continue; - /* The source. s1 is the first scanline in the mask. + /* The source. sl0 is the first scanline in the mask. */ - SRC( source, y + 1, 1 ); + SCANLINE( source, y, 1 ); /* The offset, only for non-first-columns though. */ @@ -268,7 +261,7 @@ pass_compile( Morph *morph ) /* Skip any don't-care coefficients at the start of the mask * region. */ - for( ; mask->coeff[i] == 128 && i < n_mask; i++ ) + for( ; i < n_mask && mask->coeff[i] == 128; i++ ) ; if( i == n_mask ) break; @@ -677,10 +670,10 @@ morph_vector_gen( REGION *or, void *vseq, void *a, void *b ) else d = seq->t2; - vips_executor_set_source( &executor[j], + vips_executor_set_scanline( &executor[j], ir, r->left, r->top + y ); - vips_executor_set_array( &executor[j].executor, - morph->pass[j].r, seq->t1 ); + vips_executor_set_array( &executor[j], + morph->pass[j].vector->s[1], seq->t1 ); vips_executor_set_destination( &executor[j], d ); vips_executor_run( &executor[j] );