convi/reducev: use convsuswb in Orc path (#3053)
* reducev: use convsuswb in Orc path Saves a few instructions. * convi: use convsuswb in Orc path Saves a few instructions.
This commit is contained in:
parent
3b33d912c4
commit
d874010d09
@ -420,8 +420,6 @@ vips_convi_compile_clip( VipsConvi *convi )
|
|||||||
VipsVector *v;
|
VipsVector *v;
|
||||||
char rnd[256];
|
char rnd[256];
|
||||||
char exp[256];
|
char exp[256];
|
||||||
char c0[256];
|
|
||||||
char c255[256];
|
|
||||||
char off[256];
|
char off[256];
|
||||||
|
|
||||||
convi->vector = v = vips_vector_new( "convi", 1 );
|
convi->vector = v = vips_vector_new( "convi", 1 );
|
||||||
@ -442,16 +440,7 @@ vips_convi_compile_clip( VipsConvi *convi )
|
|||||||
CONST( off, offset, 2 );
|
CONST( off, offset, 2 );
|
||||||
ASM3( "addw", "value", "value", off );
|
ASM3( "addw", "value", "value", off );
|
||||||
|
|
||||||
/* You'd think "convsuswb" (convert signed 16-bit to unsigned
|
ASM2( "convsuswb", "d1", "value" );
|
||||||
* 8-bit with saturation) would be quicker, but it's a lot
|
|
||||||
* slower.
|
|
||||||
*/
|
|
||||||
CONST( c0, 0, 2 );
|
|
||||||
ASM3( "maxsw", "value", c0, "value" );
|
|
||||||
CONST( c255, 255, 2 );
|
|
||||||
ASM3( "minsw", "value", c255, "value" );
|
|
||||||
|
|
||||||
ASM2( "convwb", "d1", "value" );
|
|
||||||
|
|
||||||
if( !vips_vector_compile( v ) )
|
if( !vips_vector_compile( v ) )
|
||||||
return( -1 );
|
return( -1 );
|
||||||
|
@ -268,24 +268,13 @@ vips_reducev_compile_section( VipsReducev *reducev, Pass *pass, gboolean first )
|
|||||||
if( pass->last >= reducev->n_point - 1 ) {
|
if( pass->last >= reducev->n_point - 1 ) {
|
||||||
char c32[256];
|
char c32[256];
|
||||||
char c6[256];
|
char c6[256];
|
||||||
char c0[256];
|
|
||||||
char c255[256];
|
|
||||||
|
|
||||||
CONST( c32, 32, 2 );
|
CONST( c32, 32, 2 );
|
||||||
ASM3( "addw", "sum", "sum", c32 );
|
ASM3( "addw", "sum", "sum", c32 );
|
||||||
CONST( c6, 6, 2 );
|
CONST( c6, 6, 2 );
|
||||||
ASM3( "shrsw", "sum", "sum", c6 );
|
ASM3( "shrsw", "sum", "sum", c6 );
|
||||||
|
|
||||||
/* You'd think "convsuswb", convert signed 16-bit to unsigned
|
ASM2( "convsuswb", "d1", "sum" );
|
||||||
* 8-bit with saturation, would be quicker, but it's a lot
|
|
||||||
* slower.
|
|
||||||
*/
|
|
||||||
CONST( c0, 0, 2 );
|
|
||||||
ASM3( "maxsw", "sum", c0, "sum" );
|
|
||||||
CONST( c255, 255, 2 );
|
|
||||||
ASM3( "minsw", "sum", c255, "sum" );
|
|
||||||
|
|
||||||
ASM2( "convwb", "d1", "sum" );
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
ASM2( "copyw", "d2", "sum" );
|
ASM2( "copyw", "d2", "sum" );
|
||||||
|
Loading…
Reference in New Issue
Block a user