convi/reducev: use convsuswb in Orc path (#3053)

* reducev: use convsuswb in Orc path

Saves a few instructions.

* convi: use convsuswb in Orc path

Saves a few instructions.
This commit is contained in:
Kleis Auke Wolthuizen 2022-09-17 13:26:00 +02:00 committed by GitHub
parent 3b33d912c4
commit d874010d09
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 2 additions and 24 deletions

View File

@ -420,8 +420,6 @@ vips_convi_compile_clip( VipsConvi *convi )
VipsVector *v;
char rnd[256];
char exp[256];
char c0[256];
char c255[256];
char off[256];
convi->vector = v = vips_vector_new( "convi", 1 );
@ -442,16 +440,7 @@ vips_convi_compile_clip( VipsConvi *convi )
CONST( off, offset, 2 );
ASM3( "addw", "value", "value", off );
/* You'd think "convsuswb" (convert signed 16-bit to unsigned
* 8-bit with saturation) would be quicker, but it's a lot
* slower.
*/
CONST( c0, 0, 2 );
ASM3( "maxsw", "value", c0, "value" );
CONST( c255, 255, 2 );
ASM3( "minsw", "value", c255, "value" );
ASM2( "convwb", "d1", "value" );
ASM2( "convsuswb", "d1", "value" );
if( !vips_vector_compile( v ) )
return( -1 );

View File

@ -268,24 +268,13 @@ vips_reducev_compile_section( VipsReducev *reducev, Pass *pass, gboolean first )
if( pass->last >= reducev->n_point - 1 ) {
char c32[256];
char c6[256];
char c0[256];
char c255[256];
CONST( c32, 32, 2 );
ASM3( "addw", "sum", "sum", c32 );
CONST( c6, 6, 2 );
ASM3( "shrsw", "sum", "sum", c6 );
/* You'd think "convsuswb", convert signed 16-bit to unsigned
* 8-bit with saturation, would be quicker, but it's a lot
* slower.
*/
CONST( c0, 0, 2 );
ASM3( "maxsw", "sum", c0, "sum" );
CONST( c255, 255, 2 );
ASM3( "minsw", "sum", c255, "sum" );
ASM2( "convwb", "d1", "sum" );
ASM2( "convsuswb", "d1", "sum" );
}
else
ASM2( "copyw", "d2", "sum" );