add VIPS_FABS/MAX/MIN/CLIP

macros which use the gcc __builtin_fabs() etc. functions when they can
This commit is contained in:
John Cupitt 2016-01-26 12:26:21 +00:00
parent baf5e860e3
commit 4d18300560
25 changed files with 147 additions and 115 deletions

View File

@ -11,6 +11,8 @@
- add VIPS_FLOOR()/VIPS_CEIL(), much faster [Lovell Fuller]
- use g_assert_not_reached();
- better vips-from-C docs
- add VIPS_FMIN() / VIPS_FMAX() / VIPS_FABS() / VIPS_FCLIP() ... use builtins
when available
1/1/16 started 8.2.1
- add a compat stub [Benjamin Gilbert]

View File

@ -112,7 +112,7 @@ Clang dynamic analysis:
$ FLAGS="$FLAGS -fno-omit-frame-pointer -fno-optimize-sibling-calls"
$ CC=clang CXX=clang++ LD=clang \
CFLAGS="$FLAGS" CXXFLAGS="$FLAGS" LDFLAGS=-fsanitize=address \
./configure --prefix=/home/john/vips --disable-introspection
./configure --prefix=/home/john/vips
$ FLAGS="-O1 -g -fsanitize=thread"
$ FLAGS="$FLAGS -fPIC -pie"
@ -120,14 +120,14 @@ Clang dynamic analysis:
$ CC=clang CXX=clang++ LD=clang \
CFLAGS="$FLAGS" CXXFLAGS="$FLAGS" \
LDFLAGS="-fsanitize=thread -fPIC -pie" \
./configure --prefix=/home/john/vips --disable-introspection
./configure --prefix=/home/john/vips
Build with the GCC auto-vectorizer and diagnostics (or just -O3):
$ FLAGS="-O2 -msse4.2 -ffast-math"
$ FLAGS="$FLAGS -ftree-vectorize -fdump-tree-vect-details"
$ CFLAGS="$FLAGS" CXXFLAGS="$FLAGS" \
./configure --prefix=/home/john/vips --disable-introspection
./configure --prefix=/home/john/vips
Static analysis with:

2
TODO
View File

@ -1,3 +1,5 @@
- look for FCLIP / FABS use ... VIPS_ABS() on float type
- write a shrinker that does two 1D shrinks, vertical and horizontal, with
bicubic interpolation

View File

@ -112,7 +112,7 @@ vips_abs_build( VipsObject *object )
int x; \
\
for( x = 0; x < sz; x++ ) \
q[x] = fabs( p[x] ); \
q[x] = VIPS_FABS( p[x] ); \
}
/* Complex abs operation: calculate modulus.
@ -141,8 +141,8 @@ vips_abs_build( VipsObject *object )
for( x = 0; x < sz; x++ ) { \
double rp = p[0]; \
double ip = p[1]; \
double abs_rp = fabs( rp ); \
double abs_ip = fabs( ip ); \
double abs_rp = VIPS_FABS( rp ); \
double abs_ip = VIPS_FABS( ip ); \
\
if( abs_rp > abs_ip ) { \
double temp = ip / rp; \

View File

@ -121,7 +121,7 @@ vips_deviate_build( VipsObject *object )
s2 = deviate->sum2;
g_object_set( object,
"out", sqrt( fabs( s2 - (s * s / vals) ) / (vals - 1) ),
"out", sqrt( VIPS_FABS( s2 - (s * s / vals) ) / (vals - 1) ),
NULL );
return( 0 );

View File

@ -131,7 +131,7 @@ G_DEFINE_TYPE( VipsDivide, vips_divide, VIPS_TYPE_BINARY );
q[0] = 0.0; \
q[1] = 0.0; \
} \
else if( fabs( right[0] ) > fabs( right[1] ) ) { \
else if( VIPS_FABS( right[0] ) > VIPS_FABS( right[1] ) ) { \
double a = right[1] / right[0]; \
double b = right[0] + right[1] * a; \
\

View File

@ -241,7 +241,7 @@ vips_linear_build( VipsObject *object )
for( x = 0; x < sz; x++ ) { \
float t = a1 * p[x] + b1; \
\
q[x] = VIPS_CLIP( 0, t, 255 ); \
q[x] = VIPS_FCLIP( 0, t, 255 ); \
} \
}
@ -255,7 +255,7 @@ vips_linear_build( VipsObject *object )
for( k = 0; k < nb; k++, i++ ) { \
double t = a[k] * p[i] + b[k]; \
\
q[i] = VIPS_CLIP( 0, t, 255 ); \
q[i] = VIPS_FCLIP( 0, t, 255 ); \
} \
}
@ -278,7 +278,7 @@ vips_linear_build( VipsObject *object )
for( k = 0; k < nb; k++, i++ ) { \
double t = a[k] * p[0] + b[k]; \
\
q[i] = VIPS_CLIP( 0, t, 255 ); \
q[i] = VIPS_FCLIP( 0, t, 255 ); \
p += 2; \
} \
}

View File

@ -163,7 +163,8 @@ vips_measure_build( VipsObject *object )
* averages near zero (can get these if use
* measure on IM_TYPE_LAB images).
*/
if( dev * 5 > fabs( avg ) && fabs( avg ) > 3 )
if( dev * 5 > VIPS_FABS( avg ) &&
VIPS_FABS( avg ) > 3 )
vips_warn( class->nickname,
_( "patch %d x %d, band %d: "
"avg = %g, sdev = %g" ),

View File

@ -159,12 +159,12 @@ vips_stats_build( VipsObject *object )
double *row = VIPS_MATRIX( stats->out, 0, y );
row[COL_AVG] = row[COL_SUM] / pels;
row[COL_SD] = sqrt( fabs( row[COL_SUM2] -
row[COL_SD] = sqrt( VIPS_FABS( row[COL_SUM2] -
(row[COL_SUM] * row[COL_SUM] / pels) ) / (pels - 1) );
}
row0[COL_AVG] = row0[COL_SUM] / vals;
row0[COL_SD] = sqrt( fabs( row0[COL_SUM2] -
row0[COL_SD] = sqrt( VIPS_FABS( row0[COL_SUM2] -
(row0[COL_SUM] * row0[COL_SUM] / vals) ) / (vals - 1) );
return( 0 );

View File

@ -57,7 +57,7 @@ G_DEFINE_ABSTRACT_TYPE( VipsUnaryConst, vips_unary_const, VIPS_TYPE_UNARY );
for( i = 0; i < m; i++ ) { \
double v = p[VIPS_MIN( n - 1, i )]; \
\
tq[i] = (TYPE) VIPS_CLIP( N, v, X ); \
tq[i] = (TYPE) VIPS_FCLIP( N, v, X ); \
} \
}

View File

@ -52,43 +52,50 @@ typedef VipsColourCodeClass VipsHSV2sRGBClass;
G_DEFINE_TYPE( VipsHSV2sRGB, vips_HSV2sRGB, VIPS_TYPE_COLOUR_CODE );
static void vips_HSV2sRGB_line(VipsColour *colour, VipsPel *out, VipsPel **in,
int width) {
static void
vips_HSV2sRGB_line( VipsColour *colour, VipsPel *out, VipsPel **in, int width )
{
unsigned char *p = (unsigned char *) in[0];
unsigned char *q = (unsigned char *) out;
int i;
float c, x, m;
for (i = 0; i < width; i++) {
for( i = 0; i < width; i++ ) {
float c, x, m;
c = p[2] * p[1] / 255.0f;
x = c * (1 - fabs(fmod(p[0] / SIXTH_OF_CHAR, 2) - 1));
c = p[2] * p[1] / 255.0;
x = c * (1 - VIPS_FABS( fmod( p[0] / SIXTH_OF_CHAR, 2 ) - 1 ));
m = p[2] - c;
if (p[0] < SIXTH_OF_CHAR) {
q[0]= (c+m);
q[1]= (x+m);
q[2]= (0+m);
} else if (p[0] < 2*SIXTH_OF_CHAR) {
q[0]= (x+m);
q[1]= (c+m);
q[2]= (0+m);
} else if (p[0] < 3*SIXTH_OF_CHAR) {
q[0]= (0+m);
q[1]= (c+m);
q[2]= (x+m);
} else if (p[0] < 4*SIXTH_OF_CHAR) {
q[0]= (0+m);
q[1]= (x+m);
q[2]= (c+m);
} else if (p[0] < 5*SIXTH_OF_CHAR) {
q[0]= (x+m);
q[1]= (0+m);
q[2]= (c+m);
} else {
q[0]= (c+m);
q[1]= (0+m);
q[2]= (x+m);
if( p[0] < SIXTH_OF_CHAR ) {
q[0] = c + m;
q[1] = x + m;
q[2] = 0 + m;
}
else if( p[0] < 2 * SIXTH_OF_CHAR ) {
q[0] = x + m;
q[1] = c + m;
q[2] = 0 + m;
}
else if( p[0] < 3 * SIXTH_OF_CHAR ) {
q[0] = 0 + m;
q[1] = c + m;
q[2] = x + m;
}
else if( p[0] < 4 * SIXTH_OF_CHAR ) {
q[0] = 0 + m;
q[1] = x + m;
q[2] = c + m;
}
else if( p[0] < 5 * SIXTH_OF_CHAR ) {
q[0] = x + m;
q[1] = 0 + m;
q[2] = c + m;
}
else {
q[0] = c + m;
q[1] = 0 + m;
q[2] = x + m;
}
p += 3;

View File

@ -165,7 +165,7 @@ vips_col_Ch2hcmc( float C, float h )
}
P = cos( VIPS_RAD( k7 * h + k8 ) );
D = k4 + k5 * P * pow( fabs( P ), k6 );
D = k4 + k5 * P * pow( VIPS_FABS( P ), k6 );
g = C * C * C * C;
f = sqrt( g / (g + 1900.0) );
hcmc = h + D * f;

View File

@ -88,12 +88,13 @@ vips_Lab2LabQ_line( VipsColour *colour, VipsPel *out, VipsPel **in, int width )
float * restrict p = (float *) in[0];
VipsPel * restrict q = out;
float fval;
int lsbs;
int intv;
int i;
for( i = 0; i < width; i++) {
for( i = 0; i < width; i++ ) {
float fval;
int lsbs;
int intv;
/* Scale L up to 10 bits. Add 0.5 rather than call VIPS_RINT
* for speed. This will not round negatives correctly! But
* this does not matter, since L is >0. L*=100.0 -> 1023.
@ -101,19 +102,19 @@ vips_Lab2LabQ_line( VipsColour *colour, VipsPel *out, VipsPel **in, int width )
intv = 10.23 * p[0] + 0.5; /* scale L up to 10 bits */
intv = VIPS_CLIP( 0, intv, 1023 );
lsbs = (intv & 0x3) << 6; /* 00000011 -> 11000000 */
q[0] = (intv >> 2); /* drop bot 2 bits and store */
q[0] = intv >> 2; /* drop bot 2 bits and store */
fval = 8.0 * p[1]; /* do a */
intv = VIPS_RINT( fval );
intv = VIPS_CLIP( -1024, intv, 1023 );
lsbs |= (intv & 0x7) << 3; /* 00000111 -> 00111000 */
q[1] = (intv >> 3); /* drop bot 3 bits & store */
q[1] = intv >> 3; /* drop bot 3 bits & store */
fval = 8.0 * p[2]; /* do b */
intv = VIPS_RINT( fval );
intv = VIPS_CLIP( -1024, intv, 1023 );
lsbs |= (intv & 0x7);
q[2] = (intv >> 3);
q[2] = intv >> 3;
q[3] = lsbs; /* store lsb band */

View File

@ -69,10 +69,11 @@ vips_LabS2LabQ_line( VipsColour *colour, VipsPel *out, VipsPel **in, int width )
unsigned char *q = (unsigned char *) out;
int i;
int l, a, b;
unsigned char ext;
for( i = 0; i < width; i++ ) {
int l, a, b;
unsigned char ext;
/* Get LAB, rounding to 10, 11, 11.
*/
l = p[0] + 16;

View File

@ -133,15 +133,15 @@ vips_XYZ2Lab_line( VipsColour *colour, VipsPel *out, VipsPel **in, int width )
nZ = QUANT_ELEMENTS * p[2] / XYZ2Lab->Z0;
p += 3;
i = VIPS_CLIP( 0, (int) nX, QUANT_ELEMENTS - 2 );
i = VIPS_FCLIP( 0, nX, QUANT_ELEMENTS - 2 );
f = nX - i;
cbx = cbrt_table[i] + f * (cbrt_table[i + 1] - cbrt_table[i]);
i = VIPS_CLIP( 0, (int) nY, QUANT_ELEMENTS - 2 );
i = VIPS_FCLIP( 0, nY, QUANT_ELEMENTS - 2 );
f = nY - i;
cby = cbrt_table[i] + f * (cbrt_table[i + 1] - cbrt_table[i]);
i = VIPS_CLIP( 0, (int) nZ, QUANT_ELEMENTS - 2 );
i = VIPS_FCLIP( 0, nZ, QUANT_ELEMENTS - 2 );
f = nZ - i;
cbz = cbrt_table[i] + f * (cbrt_table[i + 1] - cbrt_table[i]);

View File

@ -132,9 +132,9 @@ vips_col_dE00( float L1, float a1, float b1,
*/
double Ldb = (L1d + L2d) / 2;
double Cdb = (C1d + C2d) / 2;
double hdb = fabs( h1d - h2d ) < 180 ?
double hdb = VIPS_FABS( h1d - h2d ) < 180 ?
(h1d + h2d) / 2 :
fabs( h1d + h2d - 360 ) / 2;
VIPS_FABS( h1d + h2d - 360 ) / 2;
/* dtheta, RC
*/
@ -161,7 +161,7 @@ vips_col_dE00( float L1, float a1, float b1,
/* hue difference ... careful!
*/
double dhd = fabs( h1d - h2d ) < 180 ?
double dhd = VIPS_FABS( h1d - h2d ) < 180 ?
h1d - h2d :
360 - (h1d - h2d);

View File

@ -52,61 +52,71 @@ vips_sRGB2HSV_line( VipsColour *colour, VipsPel *out, VipsPel **in, int width )
{
unsigned char *p = (unsigned char *) in[0];
unsigned char *q = (unsigned char *) out;
int i;
unsigned char c_max,c_min,delta;
float wrap_around_hue, secondary_diff;
for( i = 0; i < width; i++ ) {
unsigned char c_max;
unsigned char c_min;
float secondary_diff;
float wrap_around_hue;
if (p[1] < p[2]) {
if (p[2] < p[0]) {
// Center red (at top)
if( p[1] < p[2] ) {
if( p[2] < p[0] ) {
/* Center red (at top).
*/
c_max = p[0];
c_min = p[1];
secondary_diff = p[1] - p[2];
wrap_around_hue = 255.0f;
} else {
// Center blue
c_max = p[2];
c_min = VIPS_MIN(p[1], p[0]);
secondary_diff = p[0] - p[1];
wrap_around_hue = 170.0f;
wrap_around_hue = 255.0;
}
} else {
if (p[1] < p[0]) {
// Center red (at bottom)
else {
/* Center blue.
*/
c_max = p[2];
c_min = VIPS_MIN( p[1], p[0] );
secondary_diff = p[0] - p[1];
wrap_around_hue = 170.0;
}
}
else {
if( p[1] < p[0] ) {
/* Center red (at bottom)
*/
c_max = p[0];
c_min = p[2];
secondary_diff = p[1] - p[2];
wrap_around_hue = 0.0f;
} else {
// Center green
wrap_around_hue = 0.0;
}
else {
/* Center green
*/
c_max = p[1];
c_min = VIPS_MIN(p[2], p[0]);
c_min = VIPS_MIN( p[2], p[0] );
secondary_diff = p[2] - p[0];
wrap_around_hue = 85.0f;
wrap_around_hue = 85.0;
}
}
if (c_max == 0) {
if( c_max == 0 ) {
q[0] = 0;
q[1] = 0;
q[2] = 0;
} else {
}
else {
unsigned char delta;
q[2] = c_max;
delta = c_max - c_min;
if (delta == 0) {
if( delta == 0 )
q[0] = 0;
} else {
q[0] = (42.5f*(secondary_diff / (float) delta) + wrap_around_hue);
}
else
q[0] = 42.5 * (secondary_diff / (float) delta) +
wrap_around_hue;
q[1] = (( delta*255.0f / (float) c_max));
q[1] = delta * 255.0 / (float) c_max;
}
p += 3;

View File

@ -113,7 +113,7 @@ vips_scRGB2BW_line_16( unsigned short * restrict q, float * restrict p,
q += 1;
for( j = 0; j < extra_bands; j++ )
q[j] = VIPS_CLIP( 0, p[j] * 256.0, USHRT_MAX );
q[j] = VIPS_FCLIP( 0, p[j] * 256.0, USHRT_MAX );
p += extra_bands;
q += extra_bands;
}

View File

@ -142,7 +142,7 @@ vips_scRGB2sRGB_line_16( unsigned short * restrict q, float * restrict p,
q += 3;
for( j = 0; j < extra_bands; j++ )
q[j] = VIPS_CLIP( 0, p[j] * 256.0, USHRT_MAX );
q[j] = VIPS_FCLIP( 0, p[j] * 256.0, USHRT_MAX );
p += extra_bands;
q += extra_bands;
}

View File

@ -129,7 +129,7 @@ vips_buildlut_build_init( VipsBuildlut *lut )
/* Allow for being a bit off.
*/
if( fabs( v - VIPS_RINT( v ) ) > 0.001 ) {
if( VIPS_FABS( v - VIPS_RINT( v ) ) > 0.001 ) {
vips_error( class->nickname,
_( "x value row %d not an int" ), y );
return( -1 );

View File

@ -138,7 +138,7 @@ vips_logmat_build( VipsObject *object )
* is less than the min.
*/
if( val - last >= 0 &&
fabs( val ) < logmat->min_ampl )
VIPS_FABS( val ) < logmat->min_ampl )
break;
last = val;

View File

@ -204,7 +204,7 @@ read_header( FILE *fp, VipsImage *out, int *bits, int *ascii, int *msb_first )
*/
*msb_first = scale > 0;
vips_image_set_double( out,
"pfm-scale", fabs( scale ) );
"pfm-scale", VIPS_FABS( scale ) );
}
else {
int max_value;

View File

@ -398,7 +398,7 @@ vips_exif_set_double( ExifData *ed,
rv = exif_get_rational( entry->data + offset, bo );
old_value = (double) rv.numerator / rv.denominator;
if( fabs( old_value - value ) > 0.0001 ) {
if( VIPS_FABS( old_value - value ) > 0.0001 ) {
vips_exif_double_to_rational( value, &rv );
VIPS_DEBUG_MSG( "vips_exif_set_double: %u / %u\n",
@ -413,7 +413,7 @@ vips_exif_set_double( ExifData *ed,
srv = exif_get_srational( entry->data + offset, bo );
old_value = (double) srv.numerator / srv.denominator;
if( fabs( old_value - value ) > 0.0001 ) {
if( VIPS_FABS( old_value - value ) > 0.0001 ) {
vips_exif_double_to_srational( value, &srv );
VIPS_DEBUG_MSG( "vips_exif_set_double: %d / %d\n",

View File

@ -573,9 +573,9 @@ write_tiff_header( Write *write, Layer *layer )
*/
TIFFSetField( tif, TIFFTAG_RESOLUTIONUNIT, write->resunit );
TIFFSetField( tif, TIFFTAG_XRESOLUTION,
VIPS_CLIP( 0.01, write->xres, 1000000 ) );
VIPS_FCLIP( 0.01, write->xres, 1000000 ) );
TIFFSetField( tif, TIFFTAG_YRESOLUTION,
VIPS_CLIP( 0.01, write->yres, 1000000 ) );
VIPS_FCLIP( 0.01, write->yres, 1000000 ) );
if( write_embed_profile( write, tif ) ||
write_embed_xmp( write, tif ) ||

View File

@ -58,6 +58,31 @@ extern "C" {
#define VIPS_CLIP( A, V, B ) VIPS_MAX( (A), VIPS_MIN( (B), (V) ) )
#define VIPS_NUMBER( R ) ((int) (sizeof(R) / sizeof(R[0])))
/* The built-in isnan and isinf functions provided by gcc 4+ and clang are
* up to 7x faster than their libc equivalent included from <math.h>.
*/
#if defined(__clang__) || (__GNUC__ >= 4)
#define VIPS_ISNAN( V ) __builtin_isnan( V )
#define VIPS_ISINF( V ) __builtin_isinf( V )
#define VIPS_FLOOR( V ) __builtin_floor( V )
#define VIPS_CEIL( V ) __builtin_ceil( V )
#define VIPS_RINT( V ) __builtin_rint( V )
#define VIPS_FABS( V ) __builtin_fabs( V )
#define VIPS_FMAX( A, B ) __builtin_fmax( A, B )
#define VIPS_FMIN( A, B ) __builtin_fmin( A, B )
#else
#define VIPS_ISNAN( V ) isnan( V )
#define VIPS_ISINF( V ) isinf( V )
#define VIPS_FLOOR( V ) floor( V )
#define VIPS_CEIL( V ) ceil( V )
#define VIPS_RINT( R ) ((int) ((R) > 0 ? ((R) + 0.5) : ((R) - 0.5)))
#define VIPS_FABS( V ) VIPS_FABS( V )
#define VIPS_FMAX( A, B ) VIPS_MAX( A, B )
#define VIPS_FMIN( A, B ) VIPS_MIN( A, B )
#endif
#define VIPS_FCLIP( A, V, B ) VIPS_FMAX( (A), VIPS_FMIN( (B), (V) ) )
#define VIPS_SWAP( TYPE, A, B ) \
G_STMT_START { \
TYPE t = (A); \
@ -155,23 +180,6 @@ G_STMT_START { \
#define VIPS_CLIP_NONE( V, SEQ ) {}
/* The built-in isnan and isinf functions provided by gcc 4+ and clang are
* up to 7x faster than their libc equivalent included from <math.h>.
*/
#if defined(__clang__) || (__GNUC__ >= 4)
#define VIPS_ISNAN( V ) __builtin_isnan( V )
#define VIPS_ISINF( V ) __builtin_isinf( V )
#define VIPS_FLOOR( V ) __builtin_floor( V )
#define VIPS_CEIL( V ) __builtin_ceil( V )
#define VIPS_RINT( V ) __builtin_rint( V )
#else
#define VIPS_ISNAN( V ) isnan( V )
#define VIPS_ISINF( V ) isinf( V )
#define VIPS_FLOOR( V ) floor( V )
#define VIPS_CEIL( V ) ceil( V )
#define VIPS_RINT( R ) ((int) ((R) > 0 ? ((R) + 0.5) : ((R) - 0.5)))
#endif
/* Not all platforms have PATH_MAX (eg. Hurd) and we don't need a platform one
* anyway, just a static buffer big enough for almost any path.
*/