convi/reducev: initialize the sum with the addition (#3052)

* reducev: initialize the sum with the addition

Avoids an Orc opcode.

* convi: initialize the sum with the addition

Avoids an Orc opcode.

* vector: add comments to magic numbers
This commit is contained in:
Kleis Auke Wolthuizen 2022-09-17 15:18:15 +02:00 committed by GitHub
parent d874010d09
commit 6d43755bfa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 13 additions and 18 deletions

View File

@ -328,10 +328,10 @@ vips_convi_compile_section( VipsConvi *convi, VipsImage *in, Pass *pass )
* of the previous pass.
*/
if( pass->first == 0 ) {
char c0[256];
char rnd[256];
CONST( c0, 0, 2 );
ASM2( "loadpw", "sum", c0 );
CONST( rnd, 1 << (convi->exp - 1), 2 );
ASM2( "loadpw", "sum", rnd );
}
else
ASM2( "loadw", "sum", "r" );
@ -418,7 +418,6 @@ vips_convi_compile_clip( VipsConvi *convi )
int offset = VIPS_RINT( vips_image_get_offset( M ) );
VipsVector *v;
char rnd[256];
char exp[256];
char off[256];
@ -432,10 +431,8 @@ vips_convi_compile_clip( VipsConvi *convi )
*/
TEMP( "value", 2 );
CONST( rnd, 1 << (convi->exp - 1), 2 );
ASM3( "addw", "value", "r", rnd );
CONST( exp, convi->exp, 2 );
ASM3( "shrsw", "value", "value", exp );
ASM3( "shrsw", "value", "r", exp );
CONST( off, offset, 2 );
ASM3( "addw", "value", "value", off );

View File

@ -394,24 +394,24 @@ vips_vector_full( VipsVector *vector )
/* We can need a max of 2 constants plus one source per
* coefficient, so stop if we're sure we don't have enough.
*/
if( vector->n_constant + 2 > 8 )
if( vector->n_constant + 2 > 8 /*ORC_MAX_CONST_VARS*/ )
return( TRUE );
/* You can have 8 source, and d1 counts as one of them, so +1
* there.
*/
if( vector->n_source + vector->n_scanline + 1 > 7 )
if( vector->n_source + vector->n_scanline + 1 > 7 /*ORC_MAX_SRC_VARS - 1*/ )
return( TRUE );
/* Need to leave some space, so 1 spare.
*/
if( vector->n_parameter > 7 )
if( vector->n_parameter > 7 /*ORC_MAX_PARAM_VARS - 1*/ )
return( TRUE );
/* After signalling full, some operations will add up to 4 more
* instructions as they finish up. Leave a margin.
*/
if( vector->n_instruction + 10 > 50 )
if( vector->n_instruction + 10 > 50 /*ORC_N_INSNS / 2*/ )
return( TRUE );
return( FALSE );

View File

@ -211,10 +211,10 @@ vips_reducev_compile_section( VipsReducev *reducev, Pass *pass, gboolean first )
* of the previous pass.
*/
if( first ) {
char c0[256];
char c32[256];
CONST( c0, 0, 2 );
ASM2( "loadpw", "sum", c0 );
CONST( c32, 32, 2 );
ASM2( "loadpw", "sum", c32 );
}
else
ASM2( "loadw", "sum", "r" );
@ -242,7 +242,8 @@ vips_reducev_compile_section( VipsReducev *reducev, Pass *pass, gboolean first )
* of the image and coefficient are interesting, so we can take
* the bottom bits of a 16x16->32 multiply.
*
* We accumulate the signed 16-bit result in sum.
* We accumulate the signed 16-bit result in sum. Saturated
* add.
*/
ASM2( "convubw", "value", source );
ASM3( "mullw", "value", "value", coeff );
@ -266,11 +267,8 @@ vips_reducev_compile_section( VipsReducev *reducev, Pass *pass, gboolean first )
* image, otherwise write the 16-bit intermediate to our temp buffer.
*/
if( pass->last >= reducev->n_point - 1 ) {
char c32[256];
char c6[256];
CONST( c32, 32, 2 );
ASM3( "addw", "sum", "sum", c32 );
CONST( c6, 6, 2 );
ASM3( "shrsw", "sum", "sum", c6 );