use builtin floor / ceil

get rid of FAST_PSEUDO_FLOOR, add VIPS_FLOOR and VIPS_CEIL, use them
everywhere

see https://github.com/jcupitt/libvips/pull/372
This commit is contained in:
John Cupitt 2016-01-23 09:50:57 +00:00
parent e025a04d68
commit a7d889df06
13 changed files with 30 additions and 53 deletions

View File

@ -8,6 +8,7 @@
- tune vips_shrinkh(), 30% faster [Lovell Fuller] - tune vips_shrinkh(), 30% faster [Lovell Fuller]
- remove SEQ hint from vips_subsample(), fixes cli performance [erdmann] - remove SEQ hint from vips_subsample(), fixes cli performance [erdmann]
- fix double free on attach ICC profile from file in tiff write [erdmann] - fix double free on attach ICC profile from file in tiff write [erdmann]
- add VIPS_FLOOR()/VIPS_CEIL(), much faster [Lovell Fuller]
1/1/16 started 8.2.1 1/1/16 started 8.2.1
- add a compat stub [Benjamin Gilbert] - add a compat stub [Benjamin Gilbert]

View File

@ -111,7 +111,7 @@ vips_remainder_build( VipsObject *object )
double a = p1[x]; \ double a = p1[x]; \
double b = p2[x]; \ double b = p2[x]; \
\ \
q[x] = b ? a - b * floor (a / b) : -1; \ q[x] = b ? a - b * VIPS_FLOOR (a / b) : -1; \
} \ } \
} }

View File

@ -121,8 +121,8 @@ vips_round_buffer( VipsArithmetic *arithmetic,
switch( round->round ) { switch( round->round ) {
case VIPS_OPERATION_ROUND_RINT: SWITCH( VIPS_RINT ); break; case VIPS_OPERATION_ROUND_RINT: SWITCH( VIPS_RINT ); break;
case VIPS_OPERATION_ROUND_CEIL: SWITCH( ceil ); break; case VIPS_OPERATION_ROUND_CEIL: SWITCH( VIPS_CEIL ); break;
case VIPS_OPERATION_ROUND_FLOOR: SWITCH( floor ); break; case VIPS_OPERATION_ROUND_FLOOR: SWITCH( VIPS_FLOOR ); break;
default: default:
g_assert( 0 ); g_assert( 0 );

View File

@ -150,7 +150,7 @@ vips_col_Lcmc2L( float Lcmc )
{ {
int known; int known;
known = floor( Lcmc * 10.0 ); known = VIPS_FLOOR( Lcmc * 10.0 );
known = VIPS_CLIP( 0, known, 999 ); known = VIPS_CLIP( 0, known, 999 );
return( LI[known] + return( LI[known] +
@ -172,7 +172,7 @@ vips_col_Ccmc2C( float Ccmc )
{ {
int known; int known;
known = floor( Ccmc * 10.0 ); known = VIPS_FLOOR( Ccmc * 10.0 );
known = VIPS_CLIP( 0, known, 2999 ); known = VIPS_CLIP( 0, known, 2999 );
return( CI[known] + return( CI[known] +
@ -201,7 +201,7 @@ vips_col_Chcmc2h( float C, float hcmc )
r = (int) ((C + 1.0) / 2.0); r = (int) ((C + 1.0) / 2.0);
r = VIPS_CLIP( 0, r, 99 ); r = VIPS_CLIP( 0, r, 99 );
known = floor( hcmc ); known = VIPS_FLOOR( hcmc );
known = VIPS_CLIP( 0, known, 359 ); known = VIPS_CLIP( 0, known, 359 );
return( hI[r][known] + return( hI[r][known] +

View File

@ -246,7 +246,7 @@ vips_cast_start( VipsImage *out, void *a, void *b )
OTYPE * restrict q = (OTYPE *) out; \ OTYPE * restrict q = (OTYPE *) out; \
\ \
for( x = 0; x < sz; x++ ) { \ for( x = 0; x < sz; x++ ) { \
ITYPE v = floor( p[x] ); \ ITYPE v = VIPS_FLOOR( p[x] ); \
\ \
VIPS_CLIP( v, seq ); \ VIPS_CLIP( v, seq ); \
\ \
@ -261,7 +261,7 @@ vips_cast_start( VipsImage *out, void *a, void *b )
OTYPE * restrict q = (OTYPE *) out; \ OTYPE * restrict q = (OTYPE *) out; \
\ \
for( x = 0; x < sz; x++ ) { \ for( x = 0; x < sz; x++ ) { \
ITYPE v = floor( p[0] ); \ ITYPE v = VIPS_FLOOR( p[0] ); \
p += 2; \ p += 2; \
\ \
VIPS_CLIP( v, seq ); \ VIPS_CLIP( v, seq ); \

View File

@ -316,9 +316,9 @@ boxes_break( Boxes *boxes )
* fixed n-lines which includes any negative parts. * fixed n-lines which includes any negative parts.
*/ */
depth = (max - min) / boxes->n_layers; depth = (max - min) / boxes->n_layers;
layers_above = ceil( max / depth ); layers_above = VIPS_CEIL( max / depth );
depth = max / layers_above; depth = max / layers_above;
layers_below = floor( min / depth ); layers_below = VIPS_FLOOR( min / depth );
boxes->n_layers = layers_above - layers_below; boxes->n_layers = layers_above - layers_below;

View File

@ -541,8 +541,8 @@ vips__matrix_header( char *whitemap, FILE *fp,
vips_error( "mask2vips", "%s", _( "no width / height" ) ); vips_error( "mask2vips", "%s", _( "no width / height" ) );
return( -1 ); return( -1 );
} }
if( floor( header[0] ) != header[0] || if( VIPS_FLOOR( header[0] ) != header[0] ||
floor( header[1] ) != header[1] ) { VIPS_FLOOR( header[1] ) != header[1] ) {
vips_error( "mask2vips", "%s", _( "width / height not int" ) ); vips_error( "mask2vips", "%s", _( "width / height not int" ) );
return( -1 ); return( -1 );
} }

View File

@ -237,7 +237,7 @@ plot( IMAGE *in, IMAGE *out )
if( in->BandFmt == IM_BANDFMT_UCHAR ) if( in->BandFmt == IM_BANDFMT_UCHAR )
tsize = 256; tsize = 256;
else else
tsize = ceil( max ); tsize = VIPS_CEIL( max );
/* Make sure we don't make a zero height image. /* Make sure we don't make a zero height image.
*/ */

View File

@ -94,9 +94,6 @@ G_STMT_START { \
} \ } \
} G_STMT_END } G_STMT_END
/* Round a float to the nearest integer. Much faster than rint().
*/
#define VIPS_RINT( R ) ((int) ((R) > 0 ? ((R) + 0.5) : ((R) - 0.5)))
/* Various integer range clips. Record over/under flows. /* Various integer range clips. Record over/under flows.
*/ */
@ -164,9 +161,15 @@ G_STMT_START { \
#if defined(__clang__) || (__GNUC__ >= 4) #if defined(__clang__) || (__GNUC__ >= 4)
#define VIPS_ISNAN( V ) __builtin_isnan( V ) #define VIPS_ISNAN( V ) __builtin_isnan( V )
#define VIPS_ISINF( V ) __builtin_isinf( V ) #define VIPS_ISINF( V ) __builtin_isinf( V )
#define VIPS_FLOOR( V ) __builtin_floor( V )
#define VIPS_CEIL( V ) __builtin_ceil( V )
#define VIPS_RINT( V ) __builtin_rint( V )
#else #else
#define VIPS_ISNAN( V ) isnan( V ) #define VIPS_ISNAN( V ) isnan( V )
#define VIPS_ISINF( V ) isinf( V ) #define VIPS_ISINF( V ) isinf( V )
#define VIPS_FLOOR( V ) floor( V )
#define VIPS_CEIL( V ) ceil( V )
#define VIPS_RINT( R ) ((int) ((R) > 0 ? ((R) + 0.5) : ((R) - 0.5)))
#endif #endif
/* Not all platforms have PATH_MAX (eg. Hurd) and we don't need a platform one /* Not all platforms have PATH_MAX (eg. Hurd) and we don't need a platform one

View File

@ -330,8 +330,8 @@ vips_affine_gen( VipsRegion *or, void *seq, void *a, void *b, gboolean *stop )
for( x = le; x < ri; x++ ) { for( x = le; x < ri; x++ ) {
int fx, fy; int fx, fy;
fx = FAST_PSEUDO_FLOOR( ix ); fx = VIPS_FLOOR( ix );
fy = FAST_PSEUDO_FLOOR( iy ); fy = VIPS_FLOOR( iy );
/* Clip against iarea. /* Clip against iarea.
*/ */

View File

@ -50,35 +50,6 @@ extern "C" {
(G_TYPE_INSTANCE_GET_CLASS( (obj), \ (G_TYPE_INSTANCE_GET_CLASS( (obj), \
VIPS_TYPE_RESAMPLE, VipsResampleClass )) VIPS_TYPE_RESAMPLE, VipsResampleClass ))
/*
* __builtin_floor is the fastest available floor function, if available.
*
* FAST_PSEUDO_FLOOR is a floor and floorf replacement which has been
* found to be faster on several linux boxes than the library
* version. It returns the floor of its argument unless the argument
* is a negative integer, in which case it returns one less than the
* floor. For example:
*
* FAST_PSEUDO_FLOOR(0.5) = 0
*
* FAST_PSEUDO_FLOOR(0.) = 0
*
* FAST_PSEUDO_FLOOR(-.5) = -1
*
* as expected, but
*
* FAST_PSEUDO_FLOOR(-1.) = -2
*
* The locations of the discontinuities of FAST_PSEUDO_FLOOR are the
* same as floor and floorf; it is just that at negative integers the
* function is discontinuous on the right instead of the left.
*/
#if defined(__clang__) || (__GNUC__ >= 4)
#define FAST_PSEUDO_FLOOR(x) __builtin_floor( x )
#else
#define FAST_PSEUDO_FLOOR(x) ( (int)(x) - ( (x) < 0. ) )
#endif
typedef struct _VipsResample { typedef struct _VipsResample {
VipsOperation parent_instance; VipsOperation parent_instance;

View File

@ -123,8 +123,10 @@ vips_resize_build( VipsObject *object )
/* If the factor is > 1.0, we need to zoom rather than shrink. /* If the factor is > 1.0, we need to zoom rather than shrink.
* Just set the int part to 1 in this case. * Just set the int part to 1 in this case.
*/ */
int_hshrink = resize->scale > 1.0 ? 1 : floor( 1.0 / resize->scale ); int_hshrink = resize->scale > 1.0 ?
int_vshrink = resize->vscale > 1.0 ? 1 : floor( 1.0 / resize->vscale ); 1 : VIPS_FLOOR( 1.0 / resize->scale );
int_vshrink = resize->vscale > 1.0 ?
1 : VIPS_FLOOR( 1.0 / resize->vscale );
/* We want to shrink by less for interpolators with larger windows. /* We want to shrink by less for interpolators with larger windows.
*/ */

View File

@ -284,8 +284,8 @@ vips_shrink2_gen( VipsRegion *or, void *vseq, void *a, void *b, gboolean *stop )
s.left = r->left * shrink->xshrink; s.left = r->left * shrink->xshrink;
s.top = (r->top + y) * shrink->yshrink; s.top = (r->top + y) * shrink->yshrink;
s.width = ceil( r->width * shrink->xshrink ); s.width = VIPS_CEIL( r->width * shrink->xshrink );
s.height = ceil( height * shrink->yshrink ); s.height = VIPS_CEIL( height * shrink->yshrink );
#ifdef DEBUG #ifdef DEBUG
printf( "shrink_gen: requesting %d x %d at %d x %d\n", printf( "shrink_gen: requesting %d x %d at %d x %d\n",
s.width, s.height, s.left, s.top ); s.width, s.height, s.left, s.top );
@ -319,8 +319,8 @@ vips_shrink2_build( VipsObject *object )
if( VIPS_OBJECT_CLASS( vips_shrink2_parent_class )->build( object ) ) if( VIPS_OBJECT_CLASS( vips_shrink2_parent_class )->build( object ) )
return( -1 ); return( -1 );
shrink->mw = ceil( shrink->xshrink ); shrink->mw = VIPS_CEIL( shrink->xshrink );
shrink->mh = ceil( shrink->yshrink ); shrink->mh = VIPS_CEIL( shrink->yshrink );
shrink->np = shrink->mw * shrink->mh; shrink->np = shrink->mw * shrink->mh;
in = resample->in; in = resample->in;