add __restrict__ markup to loop pointers

helps auto-vec

also remove a couple of if()s from inner loops, again, helps auto-vec
This commit is contained in:
John Cupitt 2013-11-30 13:55:10 +00:00
parent 29eed7b2b5
commit dccc5d5180
19 changed files with 119 additions and 120 deletions

1
.gitignore vendored
View File

@ -12,6 +12,7 @@ Makefile.in
TAGS TAGS
tags tags
*.o *.o
*.vect
Vips-8.0.gir Vips-8.0.gir
Vips-8.0.typelib Vips-8.0.typelib
.*.swp .*.swp

View File

@ -25,6 +25,7 @@
- added vips_gaussblur() convenience function - added vips_gaussblur() convenience function
- added --vips-profile, records and dumps thread timing info - added --vips-profile, records and dumps thread timing info
- added vipsprofile, visualises --vips-profile output - added vipsprofile, visualises --vips-profile output
- auto-vectorization-friendly inner loops
20/11/13 started 7.36.5 20/11/13 started 7.36.5
- better cache sizing in unbuffered sequential mode - better cache sizing in unbuffered sequential mode

15
TODO
View File

@ -1,3 +1,18 @@
- check vectorizer on linear.c
do some more packages, we've just done arithmetic so far
time add with auto vec and with orc, is orc worthwhile?
make sure __restrict__ turns off if the compiler does not support it, is
there a configure thing?
how much would alignment buy us? is there any way we can do this? probably
not, since we need to be able to generate any sub-area
how about avg? do we need -ffast-math to vec that?
- seen some leaks from - seen some leaks from
vips dzsave --layout google wtc.jpg x vips dzsave --layout google wtc.jpg x

View File

@ -1,4 +1,4 @@
/* im_abs() /* absolute value
* *
* Copyright: 1990, N. Dessipris, based on im_powtra() * Copyright: 1990, N. Dessipris, based on im_powtra()
* Author: Nicos Dessipris * Author: Nicos Dessipris
@ -93,25 +93,19 @@ vips_abs_build( VipsObject *object )
/* Integer abs operation: just test and negate. /* Integer abs operation: just test and negate.
*/ */
#define ABS_INT( TYPE ) { \ #define ABS_INT( TYPE ) { \
TYPE *p = (TYPE *) in[0]; \ TYPE * __restrict__ p = (TYPE *) in[0]; \
TYPE *q = (TYPE *) out; \ TYPE * __restrict__ q = (TYPE *) out; \
int x; \ int x; \
\ \
for( x = 0; x < sz; x++ ) { \ for( x = 0; x < sz; x++ ) \
TYPE v = p[x]; \ q[x] = p[x] < 0 ? 0 - p[x] : p[x]; \
\
if( v < 0 ) \
q[x] = 0 - v; \
else \
q[x] = v; \
} \
} }
/* Float abs operation: call fabs(). /* Float abs operation: call fabs().
*/ */
#define ABS_FLOAT( TYPE ) { \ #define ABS_FLOAT( TYPE ) { \
TYPE *p = (TYPE *) in[0]; \ TYPE * __restrict__ p = (TYPE *) in[0]; \
TYPE *q = (TYPE *) out; \ TYPE * __restrict__ q = (TYPE *) out; \
int x; \ int x; \
\ \
for( x = 0; x < sz; x++ ) \ for( x = 0; x < sz; x++ ) \
@ -124,8 +118,8 @@ vips_abs_build( VipsObject *object )
#ifdef HAVE_HYPOT #ifdef HAVE_HYPOT
#define ABS_COMPLEX( TYPE ) { \ #define ABS_COMPLEX( TYPE ) { \
TYPE *p = (TYPE *) in[0]; \ TYPE * __restrict__ p = (TYPE *) in[0]; \
TYPE *q = (TYPE *) out; \ TYPE * __restrict__ q = (TYPE *) out; \
int x; \ int x; \
\ \
for( x = 0; x < sz; x++ ) { \ for( x = 0; x < sz; x++ ) { \
@ -137,8 +131,8 @@ vips_abs_build( VipsObject *object )
#else /*HAVE_HYPOT*/ #else /*HAVE_HYPOT*/
#define ABS_COMPLEX( TYPE ) { \ #define ABS_COMPLEX( TYPE ) { \
TYPE *p = (TYPE *) in[0]; \ TYPE * __restrict__ p = (TYPE *) in[0]; \
TYPE *q = (TYPE *) out; \ TYPE * __restrict__ q = (TYPE *) out; \
int x; \ int x; \
\ \
for( x = 0; x < sz; x++ ) { \ for( x = 0; x < sz; x++ ) { \

View File

@ -88,9 +88,9 @@ typedef VipsBinaryClass VipsAddClass;
G_DEFINE_TYPE( VipsAdd, vips_add, VIPS_TYPE_BINARY ); G_DEFINE_TYPE( VipsAdd, vips_add, VIPS_TYPE_BINARY );
#define LOOP( IN, OUT ) { \ #define LOOP( IN, OUT ) { \
IN *left = (IN *) in[0]; \ IN * __restrict__ left = (IN *) in[0]; \
IN *right = (IN *) in[1]; \ IN * __restrict__ right = (IN *) in[1]; \
OUT *q = (OUT *) out; \ OUT * __restrict__ q = (OUT *) out; \
\ \
for( x = 0; x < sz; x++ ) \ for( x = 0; x < sz; x++ ) \
q[x] = left[x] + right[x]; \ q[x] = left[x] + right[x]; \

View File

@ -108,18 +108,18 @@ vips_boolean_build( VipsObject *object )
} }
#define LOOP( TYPE, OP ) { \ #define LOOP( TYPE, OP ) { \
TYPE *left = (TYPE *) in[0]; \ TYPE * __restrict__ left = (TYPE *) in[0]; \
TYPE *right = (TYPE *) in[1]; \ TYPE * __restrict__ right = (TYPE *) in[1]; \
TYPE *q = (TYPE *) out; \ TYPE * __restrict__ q = (TYPE *) out; \
\ \
for( x = 0; x < sz; x++ ) \ for( x = 0; x < sz; x++ ) \
q[x] = left[x] OP right[x]; \ q[x] = left[x] OP right[x]; \
} }
#define FLOOP( TYPE, OP ) { \ #define FLOOP( TYPE, OP ) { \
TYPE *left = (TYPE *) in[0]; \ TYPE * __restrict__ left = (TYPE *) in[0]; \
TYPE *right = (TYPE *) in[1]; \ TYPE * __restrict__ right = (TYPE *) in[1]; \
int *q = (int *) out; \ int * __restrict__ q = (int *) out; \
\ \
for( x = 0; x < sz; x++ ) \ for( x = 0; x < sz; x++ ) \
q[x] = ((int) left[x]) OP ((int) right[x]); \ q[x] = ((int) left[x]) OP ((int) right[x]); \

View File

@ -80,8 +80,8 @@ typedef VipsUnaryClass VipsComplexClass;
G_DEFINE_TYPE( VipsComplex, vips_complex, VIPS_TYPE_UNARY ); G_DEFINE_TYPE( VipsComplex, vips_complex, VIPS_TYPE_UNARY );
#define LOOP( IN, OUT, OP ) { \ #define LOOP( IN, OUT, OP ) { \
IN *p = (IN *) in[0]; \ IN * __restrict__ p = (IN *) in[0]; \
OUT *q = (OUT *) out; \ OUT * __restrict__ q = (OUT *) out; \
\ \
for( x = 0; x < sz; x++ ) { \ for( x = 0; x < sz; x++ ) { \
OP( q, p[x], 0.0 ); \ OP( q, p[x], 0.0 ); \
@ -91,8 +91,8 @@ G_DEFINE_TYPE( VipsComplex, vips_complex, VIPS_TYPE_UNARY );
} }
#define CLOOP( IN, OUT, OP ) { \ #define CLOOP( IN, OUT, OP ) { \
IN *p = (IN *) in[0]; \ IN * __restrict__ p = (IN *) in[0]; \
OUT *q = (OUT *) out; \ OUT * __restrict__ q = (OUT *) out; \
\ \
for( x = 0; x < sz; x++ ) { \ for( x = 0; x < sz; x++ ) { \
OP( q, p[0], p[1] ); \ OP( q, p[0], p[1] ); \

View File

@ -90,9 +90,9 @@ G_DEFINE_TYPE( VipsDivide, vips_divide, VIPS_TYPE_BINARY );
/* This is going to be much slower */ /* This is going to be much slower */
#define CLOOP( TYPE ) { \ #define CLOOP( TYPE ) { \
TYPE *left = (TYPE *) in[0]; \ TYPE * __restrict__ left = (TYPE *) in[0]; \
TYPE *right = (TYPE *) in[1]; \ TYPE * __restrict__ right = (TYPE *) in[1]; \
TYPE *q = (TYPE *) out; \ TYPE * __restrict__ q = (TYPE *) out; \
int i; \ int i; \
\ \
for( i = 0; i < sz; i++ ) { \ for( i = 0; i < sz; i++ ) { \
@ -120,9 +120,9 @@ G_DEFINE_TYPE( VipsDivide, vips_divide, VIPS_TYPE_BINARY );
#else /* USE_MODARG_DIV */ #else /* USE_MODARG_DIV */
#define CLOOP( TYPE ) { \ #define CLOOP( TYPE ) { \
TYPE *left = (TYPE *) in[0]; \ TYPE * __restrict__ left = (TYPE *) in[0]; \
TYPE *right = (TYPE *) in[1]; \ TYPE * __restrict__ right = (TYPE *) in[1]; \
TYPE *q = (TYPE *) out; \ TYPE * __restrict__ q = (TYPE *) out; \
int i; \ int i; \
\ \
for( i = 0; i < sz; i++ ) { \ for( i = 0; i < sz; i++ ) { \
@ -157,15 +157,12 @@ G_DEFINE_TYPE( VipsDivide, vips_divide, VIPS_TYPE_BINARY );
/* Real divide. Cast in to OUT before divide so we work for float output. /* Real divide. Cast in to OUT before divide so we work for float output.
*/ */
#define RLOOP( IN, OUT ) { \ #define RLOOP( IN, OUT ) { \
IN *left = (IN *) in[0]; \ IN * __restrict__ left = (IN *) in[0]; \
IN *right = (IN *) in[1]; \ IN * __restrict__ right = (IN *) in[1]; \
OUT *q = (OUT *) out; \ OUT * __restrict__ q = (OUT *) out; \
\ \
for( x = 0; x < sz; x++ ) \ for( x = 0; x < sz; x++ ) \
if( right[x] == 0 ) \ q[x] = right[x] == 0 ? q[x] : (OUT) left[x] / (OUT) right[x]; \
q[x] = 0; \
else \
q[x] = (OUT) left[x] / (OUT) right[x]; \
} }
static void static void

View File

@ -69,24 +69,24 @@ typedef VipsUnaryClass VipsInvertClass;
G_DEFINE_TYPE( VipsInvert, vips_invert, VIPS_TYPE_UNARY ); G_DEFINE_TYPE( VipsInvert, vips_invert, VIPS_TYPE_UNARY );
#define LOOP( TYPE, L ) { \ #define LOOP( TYPE, L ) { \
TYPE *p = (TYPE *) in[0]; \ TYPE * __restrict__ p = (TYPE *) in[0]; \
TYPE *q = (TYPE *) out; \ TYPE * __restrict__ q = (TYPE *) out; \
\ \
for( x = 0; x < sz; x++ ) \ for( x = 0; x < sz; x++ ) \
q[x] = (L) - p[x]; \ q[x] = (L) - p[x]; \
} }
#define LOOPN( TYPE ) { \ #define LOOPN( TYPE ) { \
TYPE *p = (TYPE *) in[0]; \ TYPE * __restrict__ p = (TYPE *) in[0]; \
TYPE *q = (TYPE *) out; \ TYPE * __restrict__ q = (TYPE *) out; \
\ \
for( x = 0; x < sz; x++ ) \ for( x = 0; x < sz; x++ ) \
q[x] = -1 * p[x]; \ q[x] = -1 * p[x]; \
} }
#define LOOPC( TYPE ) { \ #define LOOPC( TYPE ) { \
TYPE *p = (TYPE *) in[0]; \ TYPE * __restrict__ p = (TYPE *) in[0]; \
TYPE *q = (TYPE *) out; \ TYPE * __restrict__ q = (TYPE *) out; \
\ \
for( x = 0; x < sz; x++ ) { \ for( x = 0; x < sz; x++ ) { \
q[0] = -1 * p[0]; \ q[0] = -1 * p[0]; \

View File

@ -163,8 +163,8 @@ vips_linear_build( VipsObject *object )
/* Non-complex input, any output. /* Non-complex input, any output.
*/ */
#define LOOPN( IN, OUT ) { \ #define LOOPN( IN, OUT ) { \
IN *p = (IN *) in[0]; \ IN * __restrict__ p = (IN *) in[0]; \
OUT *q = (OUT *) out; \ OUT * __restrict__ q = (OUT *) out; \
\ \
for( i = 0, x = 0; x < width; x++ ) \ for( i = 0, x = 0; x < width; x++ ) \
for( k = 0; k < nb; k++, i++ ) \ for( k = 0; k < nb; k++, i++ ) \
@ -174,8 +174,8 @@ vips_linear_build( VipsObject *object )
/* Complex input, complex output. /* Complex input, complex output.
*/ */
#define LOOPCMPLXN( IN, OUT ) { \ #define LOOPCMPLXN( IN, OUT ) { \
IN *p = (IN *) in[0]; \ IN * __restrict__ p = (IN *) in[0]; \
OUT *q = (OUT *) out; \ OUT * __restrict__ q = (OUT *) out; \
\ \
for( x = 0; x < width; x++ ) \ for( x = 0; x < width; x++ ) \
for( k = 0; k < nb; k++ ) { \ for( k = 0; k < nb; k++ ) { \

View File

@ -97,8 +97,8 @@ vips_math_build( VipsObject *object )
} }
#define LOOP( IN, OUT, OP ) { \ #define LOOP( IN, OUT, OP ) { \
IN *p = (IN *) in[0]; \ IN * __restrict__ p = (IN *) in[0]; \
OUT *q = (OUT *) out; \ OUT * __restrict__ q = (OUT *) out; \
\ \
for( x = 0; x < sz; x++ ) \ for( x = 0; x < sz; x++ ) \
q[x] = OP( p[x] ); \ q[x] = OP( p[x] ); \

View File

@ -102,9 +102,9 @@ vips_math2_build( VipsObject *object )
} }
#define LOOP( IN, OUT, OP ) { \ #define LOOP( IN, OUT, OP ) { \
IN *p1 = (IN *) in[0]; \ IN * __restrict__ p1 = (IN *) in[0]; \
IN *p2 = (IN *) in[1]; \ IN * __restrict__ p2 = (IN *) in[1]; \
OUT *q = (OUT *) out; \ OUT * __restrict__ q = (OUT *) out; \
\ \
for( x = 0; x < sz; x++ ) \ for( x = 0; x < sz; x++ ) \
OP( q[x], p1[x], p2[x] ); \ OP( q[x], p1[x], p2[x] ); \
@ -137,12 +137,9 @@ vips_math2_build( VipsObject *object )
double left = (double) (X); \ double left = (double) (X); \
double right = (double) (E); \ double right = (double) (E); \
\ \
if( left == 0.0 && right < 0.0 ) \
/* Division by zero! Difficult to report tho' \ /* Division by zero! Difficult to report tho' \
*/ \ */ \
(Y) = 0.0; \ (Y) = (left == 0.0 && right < 0.0) ? 0.0 : pow( left, right ); \
else \
(Y) = pow( left, right ); \
} }
#define WOP( Y, X, E ) POW( Y, E, X ) #define WOP( Y, X, E ) POW( Y, E, X )
@ -355,9 +352,9 @@ vips_math2_const_build( VipsObject *object )
} }
#define LOOPC( IN, OUT, OP ) { \ #define LOOPC( IN, OUT, OP ) { \
IN *p = (IN *) in[0]; \ IN * __restrict__ p = (IN *) in[0]; \
OUT *q = (OUT *) out; \ OUT * __restrict__ q = (OUT *) out; \
double *c = (double *) uconst->c_ready; \ double * __restrict__ c = (double *) uconst->c_ready; \
\ \
for( i = 0, x = 0; x < width; x++ ) \ for( i = 0, x = 0; x < width; x++ ) \
for( b = 0; b < bands; b++, i++ ) \ for( b = 0; b < bands; b++, i++ ) \

View File

@ -81,9 +81,9 @@ G_DEFINE_TYPE( VipsMultiply, vips_multiply, VIPS_TYPE_BINARY );
/* Complex multiply. /* Complex multiply.
*/ */
#define CLOOP( TYPE ) { \ #define CLOOP( TYPE ) { \
TYPE *left = (TYPE *) in[0]; \ TYPE * __restrict__ left = (TYPE *) in[0]; \
TYPE *right = (TYPE *) in[1]; \ TYPE * __restrict__ right = (TYPE *) in[1]; \
TYPE *q = (TYPE *) out; \ TYPE * __restrict__ q = (TYPE *) out; \
\ \
for( x = 0; x < sz; x++ ) { \ for( x = 0; x < sz; x++ ) { \
double x1 = left[0]; \ double x1 = left[0]; \
@ -104,9 +104,9 @@ G_DEFINE_TYPE( VipsMultiply, vips_multiply, VIPS_TYPE_BINARY );
/* Real multiply. /* Real multiply.
*/ */
#define RLOOP( IN, OUT ) { \ #define RLOOP( IN, OUT ) { \
IN *left = (IN *) in[0]; \ IN * __restrict__ left = (IN *) in[0]; \
IN *right = (IN *) in[1]; \ IN * __restrict__ right = (IN *) in[1]; \
OUT *q = (OUT *) out; \ OUT * __restrict__ q = (OUT *) out; \
\ \
for( x = 0; x < sz; x++ ) \ for( x = 0; x < sz; x++ ) \
q[x] = left[x] * right[x]; \ q[x] = left[x] * right[x]; \

View File

@ -110,18 +110,18 @@ vips_relational_build( VipsObject *object )
} }
#define RLOOP( TYPE, ROP ) { \ #define RLOOP( TYPE, ROP ) { \
TYPE *left = (TYPE *) in[0]; \ TYPE * __restrict__ left = (TYPE *) in[0]; \
TYPE *right = (TYPE *) in[1]; \ TYPE * __restrict__ right = (TYPE *) in[1]; \
VipsPel *q = (VipsPel *) out; \ VipsPel * __restrict__ q = (VipsPel *) out; \
\ \
for( x = 0; x < sz; x++ ) \ for( x = 0; x < sz; x++ ) \
q[x] = (left[x] ROP right[x]) ? 255 : 0; \ q[x] = (left[x] ROP right[x]) ? 255 : 0; \
} }
#define CLOOP( TYPE, COP ) { \ #define CLOOP( TYPE, COP ) { \
TYPE *left = (TYPE *) in[0]; \ TYPE * __restrict__ left = (TYPE *) in[0]; \
TYPE *right = (TYPE *) in[1]; \ TYPE * __restrict__ right = (TYPE *) in[1]; \
VipsPel *q = (VipsPel *) out; \ VipsPel * __restrict__ q = (VipsPel *) out; \
\ \
for( x = 0; x < sz; x++ ) { \ for( x = 0; x < sz; x++ ) { \
q[x] = COP( left[0], left[1], right[0], right[1]) ? 255 : 0; \ q[x] = COP( left[0], left[1], right[0], right[1]) ? 255 : 0; \
@ -478,8 +478,8 @@ vips_relational_const_build( VipsObject *object )
} }
#define RLOOPC( TYPE, OP ) { \ #define RLOOPC( TYPE, OP ) { \
TYPE *p = (TYPE *) in[0]; \ TYPE * __restrict__ p = (TYPE *) in[0]; \
TYPE *c = (TYPE *) uconst->c_ready; \ TYPE * __restrict__ c = (TYPE *) uconst->c_ready; \
\ \
for( i = 0, x = 0; x < width; x++ ) \ for( i = 0, x = 0; x < width; x++ ) \
for( b = 0; b < bands; b++, i++ ) \ for( b = 0; b < bands; b++, i++ ) \
@ -487,10 +487,10 @@ vips_relational_const_build( VipsObject *object )
} }
#define CLOOPC( TYPE, OP ) { \ #define CLOOPC( TYPE, OP ) { \
TYPE *p = (TYPE *) in[0]; \ TYPE * __restrict__ p = (TYPE *) in[0]; \
\ \
for( i = 0, x = 0; x < width; x++ ) { \ for( i = 0, x = 0; x < width; x++ ) { \
TYPE *c = (TYPE *) uconst->c_ready; \ TYPE * __restrict__ c = (TYPE *) uconst->c_ready; \
\ \
for( b = 0; b < bands; b++, i++ ) { \ for( b = 0; b < bands; b++, i++ ) { \
out[i] = OP( p[0], p[1], c[0], c[1]) ? 255 : 0; \ out[i] = OP( p[0], p[1], c[0], c[1]) ? 255 : 0; \

View File

@ -92,32 +92,26 @@ vips_remainder_build( VipsObject *object )
/* Integer remainder-after-division. /* Integer remainder-after-division.
*/ */
#define IREMAINDER( TYPE ) { \ #define IREMAINDER( TYPE ) { \
TYPE *p1 = (TYPE *) in[0]; \ TYPE * __restrict__ p1 = (TYPE *) in[0]; \
TYPE *p2 = (TYPE *) in[1]; \ TYPE * __restrict__ p2 = (TYPE *) in[1]; \
TYPE *q = (TYPE *) out; \ TYPE * __restrict__ q = (TYPE *) out; \
\ \
for( x = 0; x < sz; x++ ) \ for( x = 0; x < sz; x++ ) \
if( p2[x] ) \ q[x] = p2[x] ? p1[x] % p2[x] : -1; \
q[x] = p1[x] % p2[x]; \
else \
q[x] = -1; \
} }
/* Float remainder-after-division. /* Float remainder-after-division.
*/ */
#define FREMAINDER( TYPE ) { \ #define FREMAINDER( TYPE ) { \
TYPE *p1 = (TYPE *) in[0]; \ TYPE * __restrict__ p1 = (TYPE *) in[0]; \
TYPE *p2 = (TYPE *) in[1]; \ TYPE * __restrict__ p2 = (TYPE *) in[1]; \
TYPE *q = (TYPE *) out; \ TYPE * __restrict__ q = (TYPE *) out; \
\ \
for( x = 0; x < sz; x++ ) { \ for( x = 0; x < sz; x++ ) { \
double a = p1[x]; \ double a = p1[x]; \
double b = p2[x]; \ double b = p2[x]; \
\ \
if( b ) \ q[x] = b ? a - b * floor (a / b) : -1; \
q[x] = a - b * floor (a / b); \
else \
q[x] = -1; \
} \ } \
} }
@ -262,9 +256,9 @@ vips_remainder_const_build( VipsObject *object )
/* Integer remainder-after-divide, per-band constant. /* Integer remainder-after-divide, per-band constant.
*/ */
#define IREMAINDERCONST( TYPE ) { \ #define IREMAINDERCONST( TYPE ) { \
TYPE *p = (TYPE *) in[0]; \ TYPE * __restrict__ p = (TYPE *) in[0]; \
TYPE *q = (TYPE *) out; \ TYPE * __restrict__ q = (TYPE *) out; \
TYPE *c = (TYPE *) uconst->c_ready; \ TYPE * __restrict__ c = (TYPE *) uconst->c_ready; \
\ \
for( i = 0, x = 0; x < width; x++ ) \ for( i = 0, x = 0; x < width; x++ ) \
for( b = 0; b < bands; b++, i++ ) \ for( b = 0; b < bands; b++, i++ ) \
@ -274,19 +268,18 @@ vips_remainder_const_build( VipsObject *object )
/* Float remainder-after-divide, per-band constant. /* Float remainder-after-divide, per-band constant.
*/ */
#define FREMAINDERCONST( TYPE ) { \ #define FREMAINDERCONST( TYPE ) { \
TYPE *p = (TYPE *) in[0]; \ TYPE * __restrict__ p = (TYPE *) in[0]; \
TYPE *q = (TYPE *) out; \ TYPE * __restrict__ q = (TYPE *) out; \
TYPE *c = (TYPE *) uconst->c_ready; \ TYPE * __restrict__ c = (TYPE *) uconst->c_ready; \
\ \
for( i = 0, x = 0; x < width; x++ ) \ for( i = 0, x = 0; x < width; x++ ) \
for( b = 0; b < bands; b++, i++ ) { \ for( b = 0; b < bands; b++, i++ ) { \
double left = p[i]; \ double left = p[i]; \
double right = c[b]; \ double right = c[b]; \
\ \
if( right ) \ q[i] = right ? \
q[i] = left - right * floor( left / right ); \ left - right * floor( left / right ) : \
else \ -1; \
q[i] = -1; \
} \ } \
} }

View File

@ -85,8 +85,8 @@ vips_round_build( VipsObject *object )
} }
#define LOOP( TYPE, OP ) { \ #define LOOP( TYPE, OP ) { \
TYPE *p = (TYPE *) in[0]; \ TYPE * __restrict__ p = (TYPE *) in[0]; \
TYPE *q = (TYPE *) out; \ TYPE * __restrict__ q = (TYPE *) out; \
\ \
for( x = 0; x < sz; x++ ) \ for( x = 0; x < sz; x++ ) \
q[x] = OP( p[x] ); \ q[x] = OP( p[x] ); \

View File

@ -58,8 +58,8 @@ typedef VipsUnaryClass VipsSignClass;
G_DEFINE_TYPE( VipsSign, vips_sign, VIPS_TYPE_UNARY ); G_DEFINE_TYPE( VipsSign, vips_sign, VIPS_TYPE_UNARY );
#define CSIGN( TYPE ) { \ #define CSIGN( TYPE ) { \
TYPE *p = (TYPE *) in[0]; \ TYPE * __restrict__ p = (TYPE *) in[0]; \
TYPE *q = (TYPE *) out; \ TYPE * __restrict__ q = (TYPE *) out; \
int x; \ int x; \
\ \
for( x = 0; x < sz; x++ ) { \ for( x = 0; x < sz; x++ ) { \
@ -83,8 +83,8 @@ G_DEFINE_TYPE( VipsSign, vips_sign, VIPS_TYPE_UNARY );
} }
#define SIGN( TYPE ) { \ #define SIGN( TYPE ) { \
TYPE *p = (TYPE *) in[0]; \ TYPE * __restrict__ p = (TYPE *) in[0]; \
signed char *q = (signed char *) out; \ signed char * __restrict__ q = (signed char *) out; \
int x; \ int x; \
\ \
for( x = 0; x < sz; x++ ) { \ for( x = 0; x < sz; x++ ) { \

View File

@ -83,9 +83,9 @@ typedef VipsBinaryClass VipsSubtractClass;
G_DEFINE_TYPE( VipsSubtract, vips_subtract, VIPS_TYPE_BINARY ); G_DEFINE_TYPE( VipsSubtract, vips_subtract, VIPS_TYPE_BINARY );
#define LOOP( IN, OUT ) { \ #define LOOP( IN, OUT ) { \
IN *left = (IN *) in[0]; \ IN * __restrict__ left = (IN *) in[0]; \
IN *right = (IN *) in[1]; \ IN * __restrict__ right = (IN *) in[1]; \
OUT *q = (OUT *) out; \ OUT * __restrict__ q = (OUT *) out; \
\ \
for( x = 0; x < sz; x++ ) \ for( x = 0; x < sz; x++ ) \
q[x] = left[x] - right[x]; \ q[x] = left[x] - right[x]; \

View File

@ -52,7 +52,7 @@ G_DEFINE_ABSTRACT_TYPE( VipsUnaryConst, vips_unary_const, VIPS_TYPE_UNARY );
/* Cast a vector of double to a vector of TYPE, clipping to a range. /* Cast a vector of double to a vector of TYPE, clipping to a range.
*/ */
#define CAST_CLIP( TYPE, N, X ) { \ #define CAST_CLIP( TYPE, N, X ) { \
TYPE *tq = (TYPE *) q; \ TYPE * __restrict__ tq = (TYPE *) q; \
\ \
for( i = 0; i < m; i++ ) { \ for( i = 0; i < m; i++ ) { \
double v = p[VIPS_MIN( n - 1, i )]; \ double v = p[VIPS_MIN( n - 1, i )]; \
@ -64,7 +64,7 @@ G_DEFINE_ABSTRACT_TYPE( VipsUnaryConst, vips_unary_const, VIPS_TYPE_UNARY );
/* Cast a vector of double to a vector of TYPE. /* Cast a vector of double to a vector of TYPE.
*/ */
#define CAST( TYPE ) { \ #define CAST( TYPE ) { \
TYPE *tq = (TYPE *) q; \ TYPE * __restrict__ tq = (TYPE *) q; \
\ \
for( i = 0; i < m; i++ ) \ for( i = 0; i < m; i++ ) \
tq[i] = (TYPE) p[VIPS_MIN( n - 1, i )]; \ tq[i] = (TYPE) p[VIPS_MIN( n - 1, i )]; \
@ -73,7 +73,7 @@ G_DEFINE_ABSTRACT_TYPE( VipsUnaryConst, vips_unary_const, VIPS_TYPE_UNARY );
/* Cast a vector of double to a complex vector of TYPE. /* Cast a vector of double to a complex vector of TYPE.
*/ */
#define CASTC( TYPE ) { \ #define CASTC( TYPE ) { \
TYPE *tq = (TYPE *) q; \ TYPE * __restrict__ tq = (TYPE *) q; \
\ \
for( i = 0; i < m; i++ ) { \ for( i = 0; i < m; i++ ) { \
tq[0] = (TYPE) p[VIPS_MIN( n - 1, i )]; \ tq[0] = (TYPE) p[VIPS_MIN( n - 1, i )]; \
@ -86,7 +86,8 @@ G_DEFINE_ABSTRACT_TYPE( VipsUnaryConst, vips_unary_const, VIPS_TYPE_UNARY );
/* Cast a n-band vector of double to a m-band vector in another format. /* Cast a n-band vector of double to a m-band vector in another format.
*/ */
static VipsPel * static VipsPel *
make_pixel( VipsObject *obj, int m, VipsBandFmt fmt, int n, double *p ) make_pixel( VipsObject *obj,
int m, VipsBandFmt fmt, int n, double * __restrict__ p )
{ {
VipsPel *q; VipsPel *q;
int i; int i;