From a7d889df06b10ad4bf921eb980bd12064466ef85 Mon Sep 17 00:00:00 2001 From: John Cupitt Date: Sat, 23 Jan 2016 09:50:57 +0000 Subject: [PATCH] use builtin floor / ceil get rid of FAST_PSEUDO_FLOOR, add VIPS_FLOOR and VIPS_CEIL, use them everywhere see https://github.com/jcupitt/libvips/pull/372 --- ChangeLog | 1 + libvips/arithmetic/remainder.c | 2 +- libvips/arithmetic/round.c | 4 ++-- libvips/colour/UCS2LCh.c | 6 +++--- libvips/conversion/cast.c | 4 ++-- libvips/convolution/im_aconv.c | 4 ++-- libvips/foreign/csv.c | 4 ++-- libvips/histogram/hist_plot.c | 2 +- libvips/include/vips/util.h | 9 ++++++--- libvips/resample/affine.c | 4 ++-- libvips/resample/presample.h | 29 ----------------------------- libvips/resample/resize.c | 6 ++++-- libvips/resample/shrink2.c | 8 ++++---- 13 files changed, 30 insertions(+), 53 deletions(-) diff --git a/ChangeLog b/ChangeLog index 11a75e0a..cb62cfbd 100644 --- a/ChangeLog +++ b/ChangeLog @@ -8,6 +8,7 @@ - tune vips_shrinkh(), 30% faster [Lovell Fuller] - remove SEQ hint from vips_subsample(), fixes cli performance [erdmann] - fix double free on attach ICC profile from file in tiff write [erdmann] +- add VIPS_FLOOR()/VIPS_CEIL(), much faster [Lovell Fuller] 1/1/16 started 8.2.1 - add a compat stub [Benjamin Gilbert] diff --git a/libvips/arithmetic/remainder.c b/libvips/arithmetic/remainder.c index b208d621..3d3c6d41 100644 --- a/libvips/arithmetic/remainder.c +++ b/libvips/arithmetic/remainder.c @@ -111,7 +111,7 @@ vips_remainder_build( VipsObject *object ) double a = p1[x]; \ double b = p2[x]; \ \ - q[x] = b ? a - b * floor (a / b) : -1; \ + q[x] = b ? a - b * VIPS_FLOOR (a / b) : -1; \ } \ } diff --git a/libvips/arithmetic/round.c b/libvips/arithmetic/round.c index 4e97aaba..2aac000d 100644 --- a/libvips/arithmetic/round.c +++ b/libvips/arithmetic/round.c @@ -121,8 +121,8 @@ vips_round_buffer( VipsArithmetic *arithmetic, switch( round->round ) { case VIPS_OPERATION_ROUND_RINT: SWITCH( VIPS_RINT ); break; - case VIPS_OPERATION_ROUND_CEIL: SWITCH( ceil ); break; - case VIPS_OPERATION_ROUND_FLOOR: SWITCH( floor ); break; + case VIPS_OPERATION_ROUND_CEIL: SWITCH( VIPS_CEIL ); break; + case VIPS_OPERATION_ROUND_FLOOR: SWITCH( VIPS_FLOOR ); break; default: g_assert( 0 ); diff --git a/libvips/colour/UCS2LCh.c b/libvips/colour/UCS2LCh.c index 2d807e35..26adccef 100644 --- a/libvips/colour/UCS2LCh.c +++ b/libvips/colour/UCS2LCh.c @@ -150,7 +150,7 @@ vips_col_Lcmc2L( float Lcmc ) { int known; - known = floor( Lcmc * 10.0 ); + known = VIPS_FLOOR( Lcmc * 10.0 ); known = VIPS_CLIP( 0, known, 999 ); return( LI[known] + @@ -172,7 +172,7 @@ vips_col_Ccmc2C( float Ccmc ) { int known; - known = floor( Ccmc * 10.0 ); + known = VIPS_FLOOR( Ccmc * 10.0 ); known = VIPS_CLIP( 0, known, 2999 ); return( CI[known] + @@ -201,7 +201,7 @@ vips_col_Chcmc2h( float C, float hcmc ) r = (int) ((C + 1.0) / 2.0); r = VIPS_CLIP( 0, r, 99 ); - known = floor( hcmc ); + known = VIPS_FLOOR( hcmc ); known = VIPS_CLIP( 0, known, 359 ); return( hI[r][known] + diff --git a/libvips/conversion/cast.c b/libvips/conversion/cast.c index 0fae1aae..d724bfb8 100644 --- a/libvips/conversion/cast.c +++ b/libvips/conversion/cast.c @@ -246,7 +246,7 @@ vips_cast_start( VipsImage *out, void *a, void *b ) OTYPE * restrict q = (OTYPE *) out; \ \ for( x = 0; x < sz; x++ ) { \ - ITYPE v = floor( p[x] ); \ + ITYPE v = VIPS_FLOOR( p[x] ); \ \ VIPS_CLIP( v, seq ); \ \ @@ -261,7 +261,7 @@ vips_cast_start( VipsImage *out, void *a, void *b ) OTYPE * restrict q = (OTYPE *) out; \ \ for( x = 0; x < sz; x++ ) { \ - ITYPE v = floor( p[0] ); \ + ITYPE v = VIPS_FLOOR( p[0] ); \ p += 2; \ \ VIPS_CLIP( v, seq ); \ diff --git a/libvips/convolution/im_aconv.c b/libvips/convolution/im_aconv.c index ad3d9ac5..b35463d4 100644 --- a/libvips/convolution/im_aconv.c +++ b/libvips/convolution/im_aconv.c @@ -316,9 +316,9 @@ boxes_break( Boxes *boxes ) * fixed n-lines which includes any negative parts. */ depth = (max - min) / boxes->n_layers; - layers_above = ceil( max / depth ); + layers_above = VIPS_CEIL( max / depth ); depth = max / layers_above; - layers_below = floor( min / depth ); + layers_below = VIPS_FLOOR( min / depth ); boxes->n_layers = layers_above - layers_below; diff --git a/libvips/foreign/csv.c b/libvips/foreign/csv.c index fa3fa101..6236e71e 100644 --- a/libvips/foreign/csv.c +++ b/libvips/foreign/csv.c @@ -541,8 +541,8 @@ vips__matrix_header( char *whitemap, FILE *fp, vips_error( "mask2vips", "%s", _( "no width / height" ) ); return( -1 ); } - if( floor( header[0] ) != header[0] || - floor( header[1] ) != header[1] ) { + if( VIPS_FLOOR( header[0] ) != header[0] || + VIPS_FLOOR( header[1] ) != header[1] ) { vips_error( "mask2vips", "%s", _( "width / height not int" ) ); return( -1 ); } diff --git a/libvips/histogram/hist_plot.c b/libvips/histogram/hist_plot.c index 9afde6ac..2d3c00f1 100644 --- a/libvips/histogram/hist_plot.c +++ b/libvips/histogram/hist_plot.c @@ -237,7 +237,7 @@ plot( IMAGE *in, IMAGE *out ) if( in->BandFmt == IM_BANDFMT_UCHAR ) tsize = 256; else - tsize = ceil( max ); + tsize = VIPS_CEIL( max ); /* Make sure we don't make a zero height image. */ diff --git a/libvips/include/vips/util.h b/libvips/include/vips/util.h index 0362189f..d67b89a0 100644 --- a/libvips/include/vips/util.h +++ b/libvips/include/vips/util.h @@ -94,9 +94,6 @@ G_STMT_START { \ } \ } G_STMT_END -/* Round a float to the nearest integer. Much faster than rint(). - */ -#define VIPS_RINT( R ) ((int) ((R) > 0 ? ((R) + 0.5) : ((R) - 0.5))) /* Various integer range clips. Record over/under flows. */ @@ -164,9 +161,15 @@ G_STMT_START { \ #if defined(__clang__) || (__GNUC__ >= 4) #define VIPS_ISNAN( V ) __builtin_isnan( V ) #define VIPS_ISINF( V ) __builtin_isinf( V ) +#define VIPS_FLOOR( V ) __builtin_floor( V ) +#define VIPS_CEIL( V ) __builtin_ceil( V ) +#define VIPS_RINT( V ) __builtin_rint( V ) #else #define VIPS_ISNAN( V ) isnan( V ) #define VIPS_ISINF( V ) isinf( V ) +#define VIPS_FLOOR( V ) floor( V ) +#define VIPS_CEIL( V ) ceil( V ) +#define VIPS_RINT( R ) ((int) ((R) > 0 ? ((R) + 0.5) : ((R) - 0.5))) #endif /* Not all platforms have PATH_MAX (eg. Hurd) and we don't need a platform one diff --git a/libvips/resample/affine.c b/libvips/resample/affine.c index c74bd363..766e014c 100644 --- a/libvips/resample/affine.c +++ b/libvips/resample/affine.c @@ -330,8 +330,8 @@ vips_affine_gen( VipsRegion *or, void *seq, void *a, void *b, gboolean *stop ) for( x = le; x < ri; x++ ) { int fx, fy; - fx = FAST_PSEUDO_FLOOR( ix ); - fy = FAST_PSEUDO_FLOOR( iy ); + fx = VIPS_FLOOR( ix ); + fy = VIPS_FLOOR( iy ); /* Clip against iarea. */ diff --git a/libvips/resample/presample.h b/libvips/resample/presample.h index 2e4d9c6a..fc678930 100644 --- a/libvips/resample/presample.h +++ b/libvips/resample/presample.h @@ -50,35 +50,6 @@ extern "C" { (G_TYPE_INSTANCE_GET_CLASS( (obj), \ VIPS_TYPE_RESAMPLE, VipsResampleClass )) -/* - * __builtin_floor is the fastest available floor function, if available. - * - * FAST_PSEUDO_FLOOR is a floor and floorf replacement which has been - * found to be faster on several linux boxes than the library - * version. It returns the floor of its argument unless the argument - * is a negative integer, in which case it returns one less than the - * floor. For example: - * - * FAST_PSEUDO_FLOOR(0.5) = 0 - * - * FAST_PSEUDO_FLOOR(0.) = 0 - * - * FAST_PSEUDO_FLOOR(-.5) = -1 - * - * as expected, but - * - * FAST_PSEUDO_FLOOR(-1.) = -2 - * - * The locations of the discontinuities of FAST_PSEUDO_FLOOR are the - * same as floor and floorf; it is just that at negative integers the - * function is discontinuous on the right instead of the left. - */ -#if defined(__clang__) || (__GNUC__ >= 4) -#define FAST_PSEUDO_FLOOR(x) __builtin_floor( x ) -#else -#define FAST_PSEUDO_FLOOR(x) ( (int)(x) - ( (x) < 0. ) ) -#endif - typedef struct _VipsResample { VipsOperation parent_instance; diff --git a/libvips/resample/resize.c b/libvips/resample/resize.c index 419fa7a2..b9bf8e86 100644 --- a/libvips/resample/resize.c +++ b/libvips/resample/resize.c @@ -123,8 +123,10 @@ vips_resize_build( VipsObject *object ) /* If the factor is > 1.0, we need to zoom rather than shrink. * Just set the int part to 1 in this case. */ - int_hshrink = resize->scale > 1.0 ? 1 : floor( 1.0 / resize->scale ); - int_vshrink = resize->vscale > 1.0 ? 1 : floor( 1.0 / resize->vscale ); + int_hshrink = resize->scale > 1.0 ? + 1 : VIPS_FLOOR( 1.0 / resize->scale ); + int_vshrink = resize->vscale > 1.0 ? + 1 : VIPS_FLOOR( 1.0 / resize->vscale ); /* We want to shrink by less for interpolators with larger windows. */ diff --git a/libvips/resample/shrink2.c b/libvips/resample/shrink2.c index 2dda8e9f..c6e70c40 100644 --- a/libvips/resample/shrink2.c +++ b/libvips/resample/shrink2.c @@ -284,8 +284,8 @@ vips_shrink2_gen( VipsRegion *or, void *vseq, void *a, void *b, gboolean *stop ) s.left = r->left * shrink->xshrink; s.top = (r->top + y) * shrink->yshrink; - s.width = ceil( r->width * shrink->xshrink ); - s.height = ceil( height * shrink->yshrink ); + s.width = VIPS_CEIL( r->width * shrink->xshrink ); + s.height = VIPS_CEIL( height * shrink->yshrink ); #ifdef DEBUG printf( "shrink_gen: requesting %d x %d at %d x %d\n", s.width, s.height, s.left, s.top ); @@ -319,8 +319,8 @@ vips_shrink2_build( VipsObject *object ) if( VIPS_OBJECT_CLASS( vips_shrink2_parent_class )->build( object ) ) return( -1 ); - shrink->mw = ceil( shrink->xshrink ); - shrink->mh = ceil( shrink->yshrink ); + shrink->mw = VIPS_CEIL( shrink->xshrink ); + shrink->mh = VIPS_CEIL( shrink->yshrink ); shrink->np = shrink->mw * shrink->mh; in = resample->in;