From c20eb9dca0aedb9f7a5a148a3c953bcba05279e0 Mon Sep 17 00:00:00 2001 From: John Cupitt Date: Tue, 15 Mar 2016 17:34:31 +0000 Subject: [PATCH] remove old unrolled case for reduce not used any more --- TODO | 6 ++++- libvips/resample/reduceh.cpp | 52 +++--------------------------------- libvips/resample/reducev.cpp | 48 +++------------------------------ 3 files changed, 13 insertions(+), 93 deletions(-) diff --git a/TODO b/TODO index f55b57ba..992bc548 100644 --- a/TODO +++ b/TODO @@ -1,7 +1,11 @@ - - try orc version of reducev? and shrinkv? maybe shrinkh? + valgrind --tool=callgrind vipsthumbnail wtc.jpg -s 2000 --vips-info + self called + 54% 256 reducev_gen + 25% 256 reduceh_gen + 18% loads various jpeg decode - try SEQ_UNBUFFERED on jpg source, get out of order error? diff --git a/libvips/resample/reduceh.cpp b/libvips/resample/reduceh.cpp index a32412c5..5a2cd54d 100644 --- a/libvips/resample/reduceh.cpp +++ b/libvips/resample/reduceh.cpp @@ -179,46 +179,6 @@ reduceh_unsigned_int_tab( VipsReduceh *reduceh, } } -/* A 6-point interpolation on uint8 is the most common case ... unroll that. - * - * The inner loop here won't vectorise, but our inner loop doesn't run for - * long enough for vectorisation to be useful :-( gcc says it needs about an - * 11-point kernel for the vector version to be worthwhile. - */ -static void inline -reduceh_unsigned_uint8_6tab( VipsPel *out, const VipsPel *in, - const int bands, const int *cx ) -{ - const int b1 = bands; - const int b2 = b1 + b1; - const int b3 = b1 + b2; - const int b4 = b2 + b2; - const int b5 = b1 + b4; - - const int c0 = cx[0]; - const int c1 = cx[1]; - const int c2 = cx[2]; - const int c3 = cx[3]; - const int c4 = cx[4]; - const int c5 = cx[5]; - - for( int z = 0; z < bands; z++ ) { - int cubich = unsigned_fixed_round( - c0 * in[0] + - c1 * in[b1] + - c2 * in[b2] + - c3 * in[b3] + - c4 * in[b4] + - c5 * in[b5] ); - - cubich = VIPS_CLIP( 0, cubich, 255 ); - - out[z] = cubich; - - in += 1; - } -} - template static void inline reduceh_signed_int_tab( VipsReduceh *reduceh, @@ -376,14 +336,10 @@ vips_reduceh_gen( VipsRegion *out_region, void *seq, switch( in->BandFmt ) { case VIPS_FORMAT_UCHAR: - if( reduceh->n_points == 6 ) - reduceh_unsigned_uint8_6tab( - q, p, bands, cxi ); - else - reduceh_unsigned_int_tab - ( - reduceh, - q, p, bands, cxi ); + reduceh_unsigned_int_tab + ( + reduceh, + q, p, bands, cxi ); break; case VIPS_FORMAT_CHAR: diff --git a/libvips/resample/reducev.cpp b/libvips/resample/reducev.cpp index 9fd2f263..cdfd8a92 100644 --- a/libvips/resample/reducev.cpp +++ b/libvips/resample/reducev.cpp @@ -108,42 +108,6 @@ reducev_unsigned_int_tab( VipsReducev *reducev, } } -/* An unrolled version of ^^ for the most common case. - */ -static void inline -reducev_unsigned_uint8_6tab( VipsPel *out, const VipsPel *in, - const int ne, const int lskip, const int *cy ) -{ - const int l1 = lskip; - const int l2 = l1 + l1; - const int l3 = l1 + l2; - const int l4 = l2 + l2; - const int l5 = l4 + l1; - - const int c0 = cy[0]; - const int c1 = cy[1]; - const int c2 = cy[2]; - const int c3 = cy[3]; - const int c4 = cy[4]; - const int c5 = cy[5]; - - for( int z = 0; z < ne; z++ ) { - int sum = unsigned_fixed_round( - c0 * in[0] + - c1 * in[l1] + - c2 * in[l2] + - c3 * in[l3] + - c4 * in[l4] + - c5 * in[l5] ); - - sum = VIPS_CLIP( 0, sum, 255 ); - - out[z] = sum; - - in += 1; - } -} - template static void inline reducev_signed_int_tab( VipsReducev *reducev, @@ -290,14 +254,10 @@ vips_reducev_gen( VipsRegion *out_region, void *seq, switch( in->BandFmt ) { case VIPS_FORMAT_UCHAR: - if( reducev->n_points == 6 ) - reducev_unsigned_uint8_6tab( - q, p, ne, lskip, cyi ); - else - reducev_unsigned_int_tab - ( - reducev, - q, p, ne, lskip, cyi ); + reducev_unsigned_int_tab + ( + reducev, + q, p, ne, lskip, cyi ); break; case VIPS_FORMAT_CHAR: