From d874010d09c985faf6b22a585f9fe13b232fc90a Mon Sep 17 00:00:00 2001 From: Kleis Auke Wolthuizen Date: Sat, 17 Sep 2022 13:26:00 +0200 Subject: [PATCH] convi/reducev: use convsuswb in Orc path (#3053) * reducev: use convsuswb in Orc path Saves a few instructions. * convi: use convsuswb in Orc path Saves a few instructions. --- libvips/convolution/convi.c | 13 +------------ libvips/resample/reducev.cpp | 13 +------------ 2 files changed, 2 insertions(+), 24 deletions(-) diff --git a/libvips/convolution/convi.c b/libvips/convolution/convi.c index f4eb9c55..eef38f48 100644 --- a/libvips/convolution/convi.c +++ b/libvips/convolution/convi.c @@ -420,8 +420,6 @@ vips_convi_compile_clip( VipsConvi *convi ) VipsVector *v; char rnd[256]; char exp[256]; - char c0[256]; - char c255[256]; char off[256]; convi->vector = v = vips_vector_new( "convi", 1 ); @@ -442,16 +440,7 @@ vips_convi_compile_clip( VipsConvi *convi ) CONST( off, offset, 2 ); ASM3( "addw", "value", "value", off ); - /* You'd think "convsuswb" (convert signed 16-bit to unsigned - * 8-bit with saturation) would be quicker, but it's a lot - * slower. - */ - CONST( c0, 0, 2 ); - ASM3( "maxsw", "value", c0, "value" ); - CONST( c255, 255, 2 ); - ASM3( "minsw", "value", c255, "value" ); - - ASM2( "convwb", "d1", "value" ); + ASM2( "convsuswb", "d1", "value" ); if( !vips_vector_compile( v ) ) return( -1 ); diff --git a/libvips/resample/reducev.cpp b/libvips/resample/reducev.cpp index f201f6fd..2b43c7a8 100644 --- a/libvips/resample/reducev.cpp +++ b/libvips/resample/reducev.cpp @@ -268,24 +268,13 @@ vips_reducev_compile_section( VipsReducev *reducev, Pass *pass, gboolean first ) if( pass->last >= reducev->n_point - 1 ) { char c32[256]; char c6[256]; - char c0[256]; - char c255[256]; CONST( c32, 32, 2 ); ASM3( "addw", "sum", "sum", c32 ); CONST( c6, 6, 2 ); ASM3( "shrsw", "sum", "sum", c6 ); - /* You'd think "convsuswb", convert signed 16-bit to unsigned - * 8-bit with saturation, would be quicker, but it's a lot - * slower. - */ - CONST( c0, 0, 2 ); - ASM3( "maxsw", "sum", c0, "sum" ); - CONST( c255, 255, 2 ); - ASM3( "minsw", "sum", c255, "sum" ); - - ASM2( "convwb", "d1", "sum" ); + ASM2( "convsuswb", "d1", "sum" ); } else ASM2( "copyw", "d2", "sum" );