From dccc5d5180d6795e6446d18fe05a4ba1c7dcd767 Mon Sep 17 00:00:00 2001 From: John Cupitt Date: Sat, 30 Nov 2013 13:55:10 +0000 Subject: [PATCH] add __restrict__ markup to loop pointers helps auto-vec also remove a couple of if()s from inner loops, again, helps auto-vec --- .gitignore | 1 + ChangeLog | 1 + TODO | 15 ++++++++++++ libvips/arithmetic/abs.c | 28 +++++++++------------- libvips/arithmetic/add.c | 6 ++--- libvips/arithmetic/boolean.c | 12 +++++----- libvips/arithmetic/complex.c | 8 +++---- libvips/arithmetic/divide.c | 23 ++++++++---------- libvips/arithmetic/invert.c | 12 +++++----- libvips/arithmetic/linear.c | 8 +++---- libvips/arithmetic/math.c | 4 ++-- libvips/arithmetic/math2.c | 21 ++++++++--------- libvips/arithmetic/multiply.c | 12 +++++----- libvips/arithmetic/relational.c | 20 ++++++++-------- libvips/arithmetic/remainder.c | 41 ++++++++++++++------------------- libvips/arithmetic/round.c | 4 ++-- libvips/arithmetic/sign.c | 8 +++---- libvips/arithmetic/subtract.c | 6 ++--- libvips/arithmetic/unaryconst.c | 9 ++++---- 19 files changed, 119 insertions(+), 120 deletions(-) diff --git a/.gitignore b/.gitignore index 8cf92bc5..73f59c0d 100644 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,7 @@ Makefile.in TAGS tags *.o +*.vect Vips-8.0.gir Vips-8.0.typelib .*.swp diff --git a/ChangeLog b/ChangeLog index 2c68dfc4..43f75ba7 100644 --- a/ChangeLog +++ b/ChangeLog @@ -25,6 +25,7 @@ - added vips_gaussblur() convenience function - added --vips-profile, records and dumps thread timing info - added vipsprofile, visualises --vips-profile output +- auto-vectorization-friendly inner loops 20/11/13 started 7.36.5 - better cache sizing in unbuffered sequential mode diff --git a/TODO b/TODO index 3b339cb2..7b238beb 100644 --- a/TODO +++ b/TODO @@ -1,3 +1,18 @@ +- check vectorizer on linear.c + + do some more packages, we've just done arithmetic so far + + time add with auto vec and with orc, is orc worthwhile? + + make sure __restrict__ turns off if the compiler does not support it, is + there a configure thing? + + how much would alignment buy us? is there any way we can do this? probably + not, since we need to be able to generate any sub-area + + how about avg? do we need -ffast-math to vec that? + + - seen some leaks from vips dzsave --layout google wtc.jpg x diff --git a/libvips/arithmetic/abs.c b/libvips/arithmetic/abs.c index 8b268463..1faf999f 100644 --- a/libvips/arithmetic/abs.c +++ b/libvips/arithmetic/abs.c @@ -1,4 +1,4 @@ -/* im_abs() +/* absolute value * * Copyright: 1990, N. Dessipris, based on im_powtra() * Author: Nicos Dessipris @@ -93,25 +93,19 @@ vips_abs_build( VipsObject *object ) /* Integer abs operation: just test and negate. */ #define ABS_INT( TYPE ) { \ - TYPE *p = (TYPE *) in[0]; \ - TYPE *q = (TYPE *) out; \ + TYPE * __restrict__ p = (TYPE *) in[0]; \ + TYPE * __restrict__ q = (TYPE *) out; \ int x; \ \ - for( x = 0; x < sz; x++ ) { \ - TYPE v = p[x]; \ - \ - if( v < 0 ) \ - q[x] = 0 - v; \ - else \ - q[x] = v; \ - } \ + for( x = 0; x < sz; x++ ) \ + q[x] = p[x] < 0 ? 0 - p[x] : p[x]; \ } /* Float abs operation: call fabs(). */ #define ABS_FLOAT( TYPE ) { \ - TYPE *p = (TYPE *) in[0]; \ - TYPE *q = (TYPE *) out; \ + TYPE * __restrict__ p = (TYPE *) in[0]; \ + TYPE * __restrict__ q = (TYPE *) out; \ int x; \ \ for( x = 0; x < sz; x++ ) \ @@ -124,8 +118,8 @@ vips_abs_build( VipsObject *object ) #ifdef HAVE_HYPOT #define ABS_COMPLEX( TYPE ) { \ - TYPE *p = (TYPE *) in[0]; \ - TYPE *q = (TYPE *) out; \ + TYPE * __restrict__ p = (TYPE *) in[0]; \ + TYPE * __restrict__ q = (TYPE *) out; \ int x; \ \ for( x = 0; x < sz; x++ ) { \ @@ -137,8 +131,8 @@ vips_abs_build( VipsObject *object ) #else /*HAVE_HYPOT*/ #define ABS_COMPLEX( TYPE ) { \ - TYPE *p = (TYPE *) in[0]; \ - TYPE *q = (TYPE *) out; \ + TYPE * __restrict__ p = (TYPE *) in[0]; \ + TYPE * __restrict__ q = (TYPE *) out; \ int x; \ \ for( x = 0; x < sz; x++ ) { \ diff --git a/libvips/arithmetic/add.c b/libvips/arithmetic/add.c index 30a13ea4..fcee7eb5 100644 --- a/libvips/arithmetic/add.c +++ b/libvips/arithmetic/add.c @@ -88,9 +88,9 @@ typedef VipsBinaryClass VipsAddClass; G_DEFINE_TYPE( VipsAdd, vips_add, VIPS_TYPE_BINARY ); #define LOOP( IN, OUT ) { \ - IN *left = (IN *) in[0]; \ - IN *right = (IN *) in[1]; \ - OUT *q = (OUT *) out; \ + IN * __restrict__ left = (IN *) in[0]; \ + IN * __restrict__ right = (IN *) in[1]; \ + OUT * __restrict__ q = (OUT *) out; \ \ for( x = 0; x < sz; x++ ) \ q[x] = left[x] + right[x]; \ diff --git a/libvips/arithmetic/boolean.c b/libvips/arithmetic/boolean.c index 9c2434cf..9f038ca5 100644 --- a/libvips/arithmetic/boolean.c +++ b/libvips/arithmetic/boolean.c @@ -108,18 +108,18 @@ vips_boolean_build( VipsObject *object ) } #define LOOP( TYPE, OP ) { \ - TYPE *left = (TYPE *) in[0]; \ - TYPE *right = (TYPE *) in[1]; \ - TYPE *q = (TYPE *) out; \ + TYPE * __restrict__ left = (TYPE *) in[0]; \ + TYPE * __restrict__ right = (TYPE *) in[1]; \ + TYPE * __restrict__ q = (TYPE *) out; \ \ for( x = 0; x < sz; x++ ) \ q[x] = left[x] OP right[x]; \ } #define FLOOP( TYPE, OP ) { \ - TYPE *left = (TYPE *) in[0]; \ - TYPE *right = (TYPE *) in[1]; \ - int *q = (int *) out; \ + TYPE * __restrict__ left = (TYPE *) in[0]; \ + TYPE * __restrict__ right = (TYPE *) in[1]; \ + int * __restrict__ q = (int *) out; \ \ for( x = 0; x < sz; x++ ) \ q[x] = ((int) left[x]) OP ((int) right[x]); \ diff --git a/libvips/arithmetic/complex.c b/libvips/arithmetic/complex.c index d610ad68..93095cb6 100644 --- a/libvips/arithmetic/complex.c +++ b/libvips/arithmetic/complex.c @@ -80,8 +80,8 @@ typedef VipsUnaryClass VipsComplexClass; G_DEFINE_TYPE( VipsComplex, vips_complex, VIPS_TYPE_UNARY ); #define LOOP( IN, OUT, OP ) { \ - IN *p = (IN *) in[0]; \ - OUT *q = (OUT *) out; \ + IN * __restrict__ p = (IN *) in[0]; \ + OUT * __restrict__ q = (OUT *) out; \ \ for( x = 0; x < sz; x++ ) { \ OP( q, p[x], 0.0 ); \ @@ -91,8 +91,8 @@ G_DEFINE_TYPE( VipsComplex, vips_complex, VIPS_TYPE_UNARY ); } #define CLOOP( IN, OUT, OP ) { \ - IN *p = (IN *) in[0]; \ - OUT *q = (OUT *) out; \ + IN * __restrict__ p = (IN *) in[0]; \ + OUT * __restrict__ q = (OUT *) out; \ \ for( x = 0; x < sz; x++ ) { \ OP( q, p[0], p[1] ); \ diff --git a/libvips/arithmetic/divide.c b/libvips/arithmetic/divide.c index a82c5718..cb9edb65 100644 --- a/libvips/arithmetic/divide.c +++ b/libvips/arithmetic/divide.c @@ -90,9 +90,9 @@ G_DEFINE_TYPE( VipsDivide, vips_divide, VIPS_TYPE_BINARY ); /* This is going to be much slower */ #define CLOOP( TYPE ) { \ - TYPE *left = (TYPE *) in[0]; \ - TYPE *right = (TYPE *) in[1]; \ - TYPE *q = (TYPE *) out; \ + TYPE * __restrict__ left = (TYPE *) in[0]; \ + TYPE * __restrict__ right = (TYPE *) in[1]; \ + TYPE * __restrict__ q = (TYPE *) out; \ int i; \ \ for( i = 0; i < sz; i++ ) { \ @@ -120,9 +120,9 @@ G_DEFINE_TYPE( VipsDivide, vips_divide, VIPS_TYPE_BINARY ); #else /* USE_MODARG_DIV */ #define CLOOP( TYPE ) { \ - TYPE *left = (TYPE *) in[0]; \ - TYPE *right = (TYPE *) in[1]; \ - TYPE *q = (TYPE *) out; \ + TYPE * __restrict__ left = (TYPE *) in[0]; \ + TYPE * __restrict__ right = (TYPE *) in[1]; \ + TYPE * __restrict__ q = (TYPE *) out; \ int i; \ \ for( i = 0; i < sz; i++ ) { \ @@ -157,15 +157,12 @@ G_DEFINE_TYPE( VipsDivide, vips_divide, VIPS_TYPE_BINARY ); /* Real divide. Cast in to OUT before divide so we work for float output. */ #define RLOOP( IN, OUT ) { \ - IN *left = (IN *) in[0]; \ - IN *right = (IN *) in[1]; \ - OUT *q = (OUT *) out; \ + IN * __restrict__ left = (IN *) in[0]; \ + IN * __restrict__ right = (IN *) in[1]; \ + OUT * __restrict__ q = (OUT *) out; \ \ for( x = 0; x < sz; x++ ) \ - if( right[x] == 0 ) \ - q[x] = 0; \ - else \ - q[x] = (OUT) left[x] / (OUT) right[x]; \ + q[x] = right[x] == 0 ? q[x] : (OUT) left[x] / (OUT) right[x]; \ } static void diff --git a/libvips/arithmetic/invert.c b/libvips/arithmetic/invert.c index 87a66397..d9bac71d 100644 --- a/libvips/arithmetic/invert.c +++ b/libvips/arithmetic/invert.c @@ -69,24 +69,24 @@ typedef VipsUnaryClass VipsInvertClass; G_DEFINE_TYPE( VipsInvert, vips_invert, VIPS_TYPE_UNARY ); #define LOOP( TYPE, L ) { \ - TYPE *p = (TYPE *) in[0]; \ - TYPE *q = (TYPE *) out; \ + TYPE * __restrict__ p = (TYPE *) in[0]; \ + TYPE * __restrict__ q = (TYPE *) out; \ \ for( x = 0; x < sz; x++ ) \ q[x] = (L) - p[x]; \ } #define LOOPN( TYPE ) { \ - TYPE *p = (TYPE *) in[0]; \ - TYPE *q = (TYPE *) out; \ + TYPE * __restrict__ p = (TYPE *) in[0]; \ + TYPE * __restrict__ q = (TYPE *) out; \ \ for( x = 0; x < sz; x++ ) \ q[x] = -1 * p[x]; \ } #define LOOPC( TYPE ) { \ - TYPE *p = (TYPE *) in[0]; \ - TYPE *q = (TYPE *) out; \ + TYPE * __restrict__ p = (TYPE *) in[0]; \ + TYPE * __restrict__ q = (TYPE *) out; \ \ for( x = 0; x < sz; x++ ) { \ q[0] = -1 * p[0]; \ diff --git a/libvips/arithmetic/linear.c b/libvips/arithmetic/linear.c index 98408b51..061f6120 100644 --- a/libvips/arithmetic/linear.c +++ b/libvips/arithmetic/linear.c @@ -163,8 +163,8 @@ vips_linear_build( VipsObject *object ) /* Non-complex input, any output. */ #define LOOPN( IN, OUT ) { \ - IN *p = (IN *) in[0]; \ - OUT *q = (OUT *) out; \ + IN * __restrict__ p = (IN *) in[0]; \ + OUT * __restrict__ q = (OUT *) out; \ \ for( i = 0, x = 0; x < width; x++ ) \ for( k = 0; k < nb; k++, i++ ) \ @@ -174,8 +174,8 @@ vips_linear_build( VipsObject *object ) /* Complex input, complex output. */ #define LOOPCMPLXN( IN, OUT ) { \ - IN *p = (IN *) in[0]; \ - OUT *q = (OUT *) out; \ + IN * __restrict__ p = (IN *) in[0]; \ + OUT * __restrict__ q = (OUT *) out; \ \ for( x = 0; x < width; x++ ) \ for( k = 0; k < nb; k++ ) { \ diff --git a/libvips/arithmetic/math.c b/libvips/arithmetic/math.c index e9182d46..1b649802 100644 --- a/libvips/arithmetic/math.c +++ b/libvips/arithmetic/math.c @@ -97,8 +97,8 @@ vips_math_build( VipsObject *object ) } #define LOOP( IN, OUT, OP ) { \ - IN *p = (IN *) in[0]; \ - OUT *q = (OUT *) out; \ + IN * __restrict__ p = (IN *) in[0]; \ + OUT * __restrict__ q = (OUT *) out; \ \ for( x = 0; x < sz; x++ ) \ q[x] = OP( p[x] ); \ diff --git a/libvips/arithmetic/math2.c b/libvips/arithmetic/math2.c index 16d57af3..36c8410a 100644 --- a/libvips/arithmetic/math2.c +++ b/libvips/arithmetic/math2.c @@ -102,9 +102,9 @@ vips_math2_build( VipsObject *object ) } #define LOOP( IN, OUT, OP ) { \ - IN *p1 = (IN *) in[0]; \ - IN *p2 = (IN *) in[1]; \ - OUT *q = (OUT *) out; \ + IN * __restrict__ p1 = (IN *) in[0]; \ + IN * __restrict__ p2 = (IN *) in[1]; \ + OUT * __restrict__ q = (OUT *) out; \ \ for( x = 0; x < sz; x++ ) \ OP( q[x], p1[x], p2[x] ); \ @@ -137,12 +137,9 @@ vips_math2_build( VipsObject *object ) double left = (double) (X); \ double right = (double) (E); \ \ - if( left == 0.0 && right < 0.0 ) \ - /* Division by zero! Difficult to report tho' \ - */ \ - (Y) = 0.0; \ - else \ - (Y) = pow( left, right ); \ + /* Division by zero! Difficult to report tho' \ + */ \ + (Y) = (left == 0.0 && right < 0.0) ? 0.0 : pow( left, right ); \ } #define WOP( Y, X, E ) POW( Y, E, X ) @@ -355,9 +352,9 @@ vips_math2_const_build( VipsObject *object ) } #define LOOPC( IN, OUT, OP ) { \ - IN *p = (IN *) in[0]; \ - OUT *q = (OUT *) out; \ - double *c = (double *) uconst->c_ready; \ + IN * __restrict__ p = (IN *) in[0]; \ + OUT * __restrict__ q = (OUT *) out; \ + double * __restrict__ c = (double *) uconst->c_ready; \ \ for( i = 0, x = 0; x < width; x++ ) \ for( b = 0; b < bands; b++, i++ ) \ diff --git a/libvips/arithmetic/multiply.c b/libvips/arithmetic/multiply.c index 5e2fd8f3..7e40d86d 100644 --- a/libvips/arithmetic/multiply.c +++ b/libvips/arithmetic/multiply.c @@ -81,9 +81,9 @@ G_DEFINE_TYPE( VipsMultiply, vips_multiply, VIPS_TYPE_BINARY ); /* Complex multiply. */ #define CLOOP( TYPE ) { \ - TYPE *left = (TYPE *) in[0]; \ - TYPE *right = (TYPE *) in[1]; \ - TYPE *q = (TYPE *) out; \ + TYPE * __restrict__ left = (TYPE *) in[0]; \ + TYPE * __restrict__ right = (TYPE *) in[1]; \ + TYPE * __restrict__ q = (TYPE *) out; \ \ for( x = 0; x < sz; x++ ) { \ double x1 = left[0]; \ @@ -104,9 +104,9 @@ G_DEFINE_TYPE( VipsMultiply, vips_multiply, VIPS_TYPE_BINARY ); /* Real multiply. */ #define RLOOP( IN, OUT ) { \ - IN *left = (IN *) in[0]; \ - IN *right = (IN *) in[1]; \ - OUT *q = (OUT *) out; \ + IN * __restrict__ left = (IN *) in[0]; \ + IN * __restrict__ right = (IN *) in[1]; \ + OUT * __restrict__ q = (OUT *) out; \ \ for( x = 0; x < sz; x++ ) \ q[x] = left[x] * right[x]; \ diff --git a/libvips/arithmetic/relational.c b/libvips/arithmetic/relational.c index a6064f2a..80b0d46a 100644 --- a/libvips/arithmetic/relational.c +++ b/libvips/arithmetic/relational.c @@ -110,18 +110,18 @@ vips_relational_build( VipsObject *object ) } #define RLOOP( TYPE, ROP ) { \ - TYPE *left = (TYPE *) in[0]; \ - TYPE *right = (TYPE *) in[1]; \ - VipsPel *q = (VipsPel *) out; \ + TYPE * __restrict__ left = (TYPE *) in[0]; \ + TYPE * __restrict__ right = (TYPE *) in[1]; \ + VipsPel * __restrict__ q = (VipsPel *) out; \ \ for( x = 0; x < sz; x++ ) \ q[x] = (left[x] ROP right[x]) ? 255 : 0; \ } #define CLOOP( TYPE, COP ) { \ - TYPE *left = (TYPE *) in[0]; \ - TYPE *right = (TYPE *) in[1]; \ - VipsPel *q = (VipsPel *) out; \ + TYPE * __restrict__ left = (TYPE *) in[0]; \ + TYPE * __restrict__ right = (TYPE *) in[1]; \ + VipsPel * __restrict__ q = (VipsPel *) out; \ \ for( x = 0; x < sz; x++ ) { \ q[x] = COP( left[0], left[1], right[0], right[1]) ? 255 : 0; \ @@ -478,8 +478,8 @@ vips_relational_const_build( VipsObject *object ) } #define RLOOPC( TYPE, OP ) { \ - TYPE *p = (TYPE *) in[0]; \ - TYPE *c = (TYPE *) uconst->c_ready; \ + TYPE * __restrict__ p = (TYPE *) in[0]; \ + TYPE * __restrict__ c = (TYPE *) uconst->c_ready; \ \ for( i = 0, x = 0; x < width; x++ ) \ for( b = 0; b < bands; b++, i++ ) \ @@ -487,10 +487,10 @@ vips_relational_const_build( VipsObject *object ) } #define CLOOPC( TYPE, OP ) { \ - TYPE *p = (TYPE *) in[0]; \ + TYPE * __restrict__ p = (TYPE *) in[0]; \ \ for( i = 0, x = 0; x < width; x++ ) { \ - TYPE *c = (TYPE *) uconst->c_ready; \ + TYPE * __restrict__ c = (TYPE *) uconst->c_ready; \ \ for( b = 0; b < bands; b++, i++ ) { \ out[i] = OP( p[0], p[1], c[0], c[1]) ? 255 : 0; \ diff --git a/libvips/arithmetic/remainder.c b/libvips/arithmetic/remainder.c index f5ad0cfa..b53881fb 100644 --- a/libvips/arithmetic/remainder.c +++ b/libvips/arithmetic/remainder.c @@ -92,32 +92,26 @@ vips_remainder_build( VipsObject *object ) /* Integer remainder-after-division. */ #define IREMAINDER( TYPE ) { \ - TYPE *p1 = (TYPE *) in[0]; \ - TYPE *p2 = (TYPE *) in[1]; \ - TYPE *q = (TYPE *) out; \ + TYPE * __restrict__ p1 = (TYPE *) in[0]; \ + TYPE * __restrict__ p2 = (TYPE *) in[1]; \ + TYPE * __restrict__ q = (TYPE *) out; \ \ for( x = 0; x < sz; x++ ) \ - if( p2[x] ) \ - q[x] = p1[x] % p2[x]; \ - else \ - q[x] = -1; \ + q[x] = p2[x] ? p1[x] % p2[x] : -1; \ } /* Float remainder-after-division. */ #define FREMAINDER( TYPE ) { \ - TYPE *p1 = (TYPE *) in[0]; \ - TYPE *p2 = (TYPE *) in[1]; \ - TYPE *q = (TYPE *) out; \ + TYPE * __restrict__ p1 = (TYPE *) in[0]; \ + TYPE * __restrict__ p2 = (TYPE *) in[1]; \ + TYPE * __restrict__ q = (TYPE *) out; \ \ for( x = 0; x < sz; x++ ) { \ double a = p1[x]; \ double b = p2[x]; \ \ - if( b ) \ - q[x] = a - b * floor (a / b); \ - else \ - q[x] = -1; \ + q[x] = b ? a - b * floor (a / b) : -1; \ } \ } @@ -262,9 +256,9 @@ vips_remainder_const_build( VipsObject *object ) /* Integer remainder-after-divide, per-band constant. */ #define IREMAINDERCONST( TYPE ) { \ - TYPE *p = (TYPE *) in[0]; \ - TYPE *q = (TYPE *) out; \ - TYPE *c = (TYPE *) uconst->c_ready; \ + TYPE * __restrict__ p = (TYPE *) in[0]; \ + TYPE * __restrict__ q = (TYPE *) out; \ + TYPE * __restrict__ c = (TYPE *) uconst->c_ready; \ \ for( i = 0, x = 0; x < width; x++ ) \ for( b = 0; b < bands; b++, i++ ) \ @@ -274,19 +268,18 @@ vips_remainder_const_build( VipsObject *object ) /* Float remainder-after-divide, per-band constant. */ #define FREMAINDERCONST( TYPE ) { \ - TYPE *p = (TYPE *) in[0]; \ - TYPE *q = (TYPE *) out; \ - TYPE *c = (TYPE *) uconst->c_ready; \ + TYPE * __restrict__ p = (TYPE *) in[0]; \ + TYPE * __restrict__ q = (TYPE *) out; \ + TYPE * __restrict__ c = (TYPE *) uconst->c_ready; \ \ for( i = 0, x = 0; x < width; x++ ) \ for( b = 0; b < bands; b++, i++ ) { \ double left = p[i]; \ double right = c[b]; \ \ - if( right ) \ - q[i] = left - right * floor( left / right ); \ - else \ - q[i] = -1; \ + q[i] = right ? \ + left - right * floor( left / right ) : \ + -1; \ } \ } diff --git a/libvips/arithmetic/round.c b/libvips/arithmetic/round.c index 5a951f4a..602c19e2 100644 --- a/libvips/arithmetic/round.c +++ b/libvips/arithmetic/round.c @@ -85,8 +85,8 @@ vips_round_build( VipsObject *object ) } #define LOOP( TYPE, OP ) { \ - TYPE *p = (TYPE *) in[0]; \ - TYPE *q = (TYPE *) out; \ + TYPE * __restrict__ p = (TYPE *) in[0]; \ + TYPE * __restrict__ q = (TYPE *) out; \ \ for( x = 0; x < sz; x++ ) \ q[x] = OP( p[x] ); \ diff --git a/libvips/arithmetic/sign.c b/libvips/arithmetic/sign.c index 1476dff8..69f11e8e 100644 --- a/libvips/arithmetic/sign.c +++ b/libvips/arithmetic/sign.c @@ -58,8 +58,8 @@ typedef VipsUnaryClass VipsSignClass; G_DEFINE_TYPE( VipsSign, vips_sign, VIPS_TYPE_UNARY ); #define CSIGN( TYPE ) { \ - TYPE *p = (TYPE *) in[0]; \ - TYPE *q = (TYPE *) out; \ + TYPE * __restrict__ p = (TYPE *) in[0]; \ + TYPE * __restrict__ q = (TYPE *) out; \ int x; \ \ for( x = 0; x < sz; x++ ) { \ @@ -83,8 +83,8 @@ G_DEFINE_TYPE( VipsSign, vips_sign, VIPS_TYPE_UNARY ); } #define SIGN( TYPE ) { \ - TYPE *p = (TYPE *) in[0]; \ - signed char *q = (signed char *) out; \ + TYPE * __restrict__ p = (TYPE *) in[0]; \ + signed char * __restrict__ q = (signed char *) out; \ int x; \ \ for( x = 0; x < sz; x++ ) { \ diff --git a/libvips/arithmetic/subtract.c b/libvips/arithmetic/subtract.c index a5f6223d..d7fece09 100644 --- a/libvips/arithmetic/subtract.c +++ b/libvips/arithmetic/subtract.c @@ -83,9 +83,9 @@ typedef VipsBinaryClass VipsSubtractClass; G_DEFINE_TYPE( VipsSubtract, vips_subtract, VIPS_TYPE_BINARY ); #define LOOP( IN, OUT ) { \ - IN *left = (IN *) in[0]; \ - IN *right = (IN *) in[1]; \ - OUT *q = (OUT *) out; \ + IN * __restrict__ left = (IN *) in[0]; \ + IN * __restrict__ right = (IN *) in[1]; \ + OUT * __restrict__ q = (OUT *) out; \ \ for( x = 0; x < sz; x++ ) \ q[x] = left[x] - right[x]; \ diff --git a/libvips/arithmetic/unaryconst.c b/libvips/arithmetic/unaryconst.c index 89a1f672..4740d374 100644 --- a/libvips/arithmetic/unaryconst.c +++ b/libvips/arithmetic/unaryconst.c @@ -52,7 +52,7 @@ G_DEFINE_ABSTRACT_TYPE( VipsUnaryConst, vips_unary_const, VIPS_TYPE_UNARY ); /* Cast a vector of double to a vector of TYPE, clipping to a range. */ #define CAST_CLIP( TYPE, N, X ) { \ - TYPE *tq = (TYPE *) q; \ + TYPE * __restrict__ tq = (TYPE *) q; \ \ for( i = 0; i < m; i++ ) { \ double v = p[VIPS_MIN( n - 1, i )]; \ @@ -64,7 +64,7 @@ G_DEFINE_ABSTRACT_TYPE( VipsUnaryConst, vips_unary_const, VIPS_TYPE_UNARY ); /* Cast a vector of double to a vector of TYPE. */ #define CAST( TYPE ) { \ - TYPE *tq = (TYPE *) q; \ + TYPE * __restrict__ tq = (TYPE *) q; \ \ for( i = 0; i < m; i++ ) \ tq[i] = (TYPE) p[VIPS_MIN( n - 1, i )]; \ @@ -73,7 +73,7 @@ G_DEFINE_ABSTRACT_TYPE( VipsUnaryConst, vips_unary_const, VIPS_TYPE_UNARY ); /* Cast a vector of double to a complex vector of TYPE. */ #define CASTC( TYPE ) { \ - TYPE *tq = (TYPE *) q; \ + TYPE * __restrict__ tq = (TYPE *) q; \ \ for( i = 0; i < m; i++ ) { \ tq[0] = (TYPE) p[VIPS_MIN( n - 1, i )]; \ @@ -86,7 +86,8 @@ G_DEFINE_ABSTRACT_TYPE( VipsUnaryConst, vips_unary_const, VIPS_TYPE_UNARY ); /* Cast a n-band vector of double to a m-band vector in another format. */ static VipsPel * -make_pixel( VipsObject *obj, int m, VipsBandFmt fmt, int n, double *p ) +make_pixel( VipsObject *obj, + int m, VipsBandFmt fmt, int n, double * __restrict__ p ) { VipsPel *q; int i;