convi/reducev: initialize the sum with the addition (#3052)
* reducev: initialize the sum with the addition Avoids an Orc opcode. * convi: initialize the sum with the addition Avoids an Orc opcode. * vector: add comments to magic numbers
This commit is contained in:
parent
d874010d09
commit
6d43755bfa
@ -328,10 +328,10 @@ vips_convi_compile_section( VipsConvi *convi, VipsImage *in, Pass *pass )
|
|||||||
* of the previous pass.
|
* of the previous pass.
|
||||||
*/
|
*/
|
||||||
if( pass->first == 0 ) {
|
if( pass->first == 0 ) {
|
||||||
char c0[256];
|
char rnd[256];
|
||||||
|
|
||||||
CONST( c0, 0, 2 );
|
CONST( rnd, 1 << (convi->exp - 1), 2 );
|
||||||
ASM2( "loadpw", "sum", c0 );
|
ASM2( "loadpw", "sum", rnd );
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
ASM2( "loadw", "sum", "r" );
|
ASM2( "loadw", "sum", "r" );
|
||||||
@ -418,7 +418,6 @@ vips_convi_compile_clip( VipsConvi *convi )
|
|||||||
int offset = VIPS_RINT( vips_image_get_offset( M ) );
|
int offset = VIPS_RINT( vips_image_get_offset( M ) );
|
||||||
|
|
||||||
VipsVector *v;
|
VipsVector *v;
|
||||||
char rnd[256];
|
|
||||||
char exp[256];
|
char exp[256];
|
||||||
char off[256];
|
char off[256];
|
||||||
|
|
||||||
@ -432,10 +431,8 @@ vips_convi_compile_clip( VipsConvi *convi )
|
|||||||
*/
|
*/
|
||||||
TEMP( "value", 2 );
|
TEMP( "value", 2 );
|
||||||
|
|
||||||
CONST( rnd, 1 << (convi->exp - 1), 2 );
|
|
||||||
ASM3( "addw", "value", "r", rnd );
|
|
||||||
CONST( exp, convi->exp, 2 );
|
CONST( exp, convi->exp, 2 );
|
||||||
ASM3( "shrsw", "value", "value", exp );
|
ASM3( "shrsw", "value", "r", exp );
|
||||||
|
|
||||||
CONST( off, offset, 2 );
|
CONST( off, offset, 2 );
|
||||||
ASM3( "addw", "value", "value", off );
|
ASM3( "addw", "value", "value", off );
|
||||||
|
@ -394,24 +394,24 @@ vips_vector_full( VipsVector *vector )
|
|||||||
/* We can need a max of 2 constants plus one source per
|
/* We can need a max of 2 constants plus one source per
|
||||||
* coefficient, so stop if we're sure we don't have enough.
|
* coefficient, so stop if we're sure we don't have enough.
|
||||||
*/
|
*/
|
||||||
if( vector->n_constant + 2 > 8 )
|
if( vector->n_constant + 2 > 8 /*ORC_MAX_CONST_VARS*/ )
|
||||||
return( TRUE );
|
return( TRUE );
|
||||||
|
|
||||||
/* You can have 8 source, and d1 counts as one of them, so +1
|
/* You can have 8 source, and d1 counts as one of them, so +1
|
||||||
* there.
|
* there.
|
||||||
*/
|
*/
|
||||||
if( vector->n_source + vector->n_scanline + 1 > 7 )
|
if( vector->n_source + vector->n_scanline + 1 > 7 /*ORC_MAX_SRC_VARS - 1*/ )
|
||||||
return( TRUE );
|
return( TRUE );
|
||||||
|
|
||||||
/* Need to leave some space, so 1 spare.
|
/* Need to leave some space, so 1 spare.
|
||||||
*/
|
*/
|
||||||
if( vector->n_parameter > 7 )
|
if( vector->n_parameter > 7 /*ORC_MAX_PARAM_VARS - 1*/ )
|
||||||
return( TRUE );
|
return( TRUE );
|
||||||
|
|
||||||
/* After signalling full, some operations will add up to 4 more
|
/* After signalling full, some operations will add up to 4 more
|
||||||
* instructions as they finish up. Leave a margin.
|
* instructions as they finish up. Leave a margin.
|
||||||
*/
|
*/
|
||||||
if( vector->n_instruction + 10 > 50 )
|
if( vector->n_instruction + 10 > 50 /*ORC_N_INSNS / 2*/ )
|
||||||
return( TRUE );
|
return( TRUE );
|
||||||
|
|
||||||
return( FALSE );
|
return( FALSE );
|
||||||
|
@ -211,10 +211,10 @@ vips_reducev_compile_section( VipsReducev *reducev, Pass *pass, gboolean first )
|
|||||||
* of the previous pass.
|
* of the previous pass.
|
||||||
*/
|
*/
|
||||||
if( first ) {
|
if( first ) {
|
||||||
char c0[256];
|
char c32[256];
|
||||||
|
|
||||||
CONST( c0, 0, 2 );
|
CONST( c32, 32, 2 );
|
||||||
ASM2( "loadpw", "sum", c0 );
|
ASM2( "loadpw", "sum", c32 );
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
ASM2( "loadw", "sum", "r" );
|
ASM2( "loadw", "sum", "r" );
|
||||||
@ -242,7 +242,8 @@ vips_reducev_compile_section( VipsReducev *reducev, Pass *pass, gboolean first )
|
|||||||
* of the image and coefficient are interesting, so we can take
|
* of the image and coefficient are interesting, so we can take
|
||||||
* the bottom bits of a 16x16->32 multiply.
|
* the bottom bits of a 16x16->32 multiply.
|
||||||
*
|
*
|
||||||
* We accumulate the signed 16-bit result in sum.
|
* We accumulate the signed 16-bit result in sum. Saturated
|
||||||
|
* add.
|
||||||
*/
|
*/
|
||||||
ASM2( "convubw", "value", source );
|
ASM2( "convubw", "value", source );
|
||||||
ASM3( "mullw", "value", "value", coeff );
|
ASM3( "mullw", "value", "value", coeff );
|
||||||
@ -266,11 +267,8 @@ vips_reducev_compile_section( VipsReducev *reducev, Pass *pass, gboolean first )
|
|||||||
* image, otherwise write the 16-bit intermediate to our temp buffer.
|
* image, otherwise write the 16-bit intermediate to our temp buffer.
|
||||||
*/
|
*/
|
||||||
if( pass->last >= reducev->n_point - 1 ) {
|
if( pass->last >= reducev->n_point - 1 ) {
|
||||||
char c32[256];
|
|
||||||
char c6[256];
|
char c6[256];
|
||||||
|
|
||||||
CONST( c32, 32, 2 );
|
|
||||||
ASM3( "addw", "sum", "sum", c32 );
|
|
||||||
CONST( c6, 6, 2 );
|
CONST( c6, 6, 2 );
|
||||||
ASM3( "shrsw", "sum", "sum", c6 );
|
ASM3( "shrsw", "sum", "sum", c6 );
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user