add __restrict__ markup to loop pointers

helps auto-vec

also remove a couple of if()s from inner loops, again, helps auto-vec
This commit is contained in:
John Cupitt 2013-11-30 13:55:10 +00:00
parent 29eed7b2b5
commit dccc5d5180
19 changed files with 119 additions and 120 deletions

1
.gitignore vendored
View File

@ -12,6 +12,7 @@ Makefile.in
TAGS
tags
*.o
*.vect
Vips-8.0.gir
Vips-8.0.typelib
.*.swp

View File

@ -25,6 +25,7 @@
- added vips_gaussblur() convenience function
- added --vips-profile, records and dumps thread timing info
- added vipsprofile, visualises --vips-profile output
- auto-vectorization-friendly inner loops
20/11/13 started 7.36.5
- better cache sizing in unbuffered sequential mode

15
TODO
View File

@ -1,3 +1,18 @@
- check vectorizer on linear.c
do some more packages, we've just done arithmetic so far
time add with auto vec and with orc, is orc worthwhile?
make sure __restrict__ turns off if the compiler does not support it, is
there a configure thing?
how much would alignment buy us? is there any way we can do this? probably
not, since we need to be able to generate any sub-area
how about avg? do we need -ffast-math to vec that?
- seen some leaks from
vips dzsave --layout google wtc.jpg x

View File

@ -1,4 +1,4 @@
/* im_abs()
/* absolute value
*
* Copyright: 1990, N. Dessipris, based on im_powtra()
* Author: Nicos Dessipris
@ -93,25 +93,19 @@ vips_abs_build( VipsObject *object )
/* Integer abs operation: just test and negate.
*/
#define ABS_INT( TYPE ) { \
TYPE *p = (TYPE *) in[0]; \
TYPE *q = (TYPE *) out; \
TYPE * __restrict__ p = (TYPE *) in[0]; \
TYPE * __restrict__ q = (TYPE *) out; \
int x; \
\
for( x = 0; x < sz; x++ ) { \
TYPE v = p[x]; \
\
if( v < 0 ) \
q[x] = 0 - v; \
else \
q[x] = v; \
} \
for( x = 0; x < sz; x++ ) \
q[x] = p[x] < 0 ? 0 - p[x] : p[x]; \
}
/* Float abs operation: call fabs().
*/
#define ABS_FLOAT( TYPE ) { \
TYPE *p = (TYPE *) in[0]; \
TYPE *q = (TYPE *) out; \
TYPE * __restrict__ p = (TYPE *) in[0]; \
TYPE * __restrict__ q = (TYPE *) out; \
int x; \
\
for( x = 0; x < sz; x++ ) \
@ -124,8 +118,8 @@ vips_abs_build( VipsObject *object )
#ifdef HAVE_HYPOT
#define ABS_COMPLEX( TYPE ) { \
TYPE *p = (TYPE *) in[0]; \
TYPE *q = (TYPE *) out; \
TYPE * __restrict__ p = (TYPE *) in[0]; \
TYPE * __restrict__ q = (TYPE *) out; \
int x; \
\
for( x = 0; x < sz; x++ ) { \
@ -137,8 +131,8 @@ vips_abs_build( VipsObject *object )
#else /*HAVE_HYPOT*/
#define ABS_COMPLEX( TYPE ) { \
TYPE *p = (TYPE *) in[0]; \
TYPE *q = (TYPE *) out; \
TYPE * __restrict__ p = (TYPE *) in[0]; \
TYPE * __restrict__ q = (TYPE *) out; \
int x; \
\
for( x = 0; x < sz; x++ ) { \

View File

@ -88,9 +88,9 @@ typedef VipsBinaryClass VipsAddClass;
G_DEFINE_TYPE( VipsAdd, vips_add, VIPS_TYPE_BINARY );
#define LOOP( IN, OUT ) { \
IN *left = (IN *) in[0]; \
IN *right = (IN *) in[1]; \
OUT *q = (OUT *) out; \
IN * __restrict__ left = (IN *) in[0]; \
IN * __restrict__ right = (IN *) in[1]; \
OUT * __restrict__ q = (OUT *) out; \
\
for( x = 0; x < sz; x++ ) \
q[x] = left[x] + right[x]; \

View File

@ -108,18 +108,18 @@ vips_boolean_build( VipsObject *object )
}
#define LOOP( TYPE, OP ) { \
TYPE *left = (TYPE *) in[0]; \
TYPE *right = (TYPE *) in[1]; \
TYPE *q = (TYPE *) out; \
TYPE * __restrict__ left = (TYPE *) in[0]; \
TYPE * __restrict__ right = (TYPE *) in[1]; \
TYPE * __restrict__ q = (TYPE *) out; \
\
for( x = 0; x < sz; x++ ) \
q[x] = left[x] OP right[x]; \
}
#define FLOOP( TYPE, OP ) { \
TYPE *left = (TYPE *) in[0]; \
TYPE *right = (TYPE *) in[1]; \
int *q = (int *) out; \
TYPE * __restrict__ left = (TYPE *) in[0]; \
TYPE * __restrict__ right = (TYPE *) in[1]; \
int * __restrict__ q = (int *) out; \
\
for( x = 0; x < sz; x++ ) \
q[x] = ((int) left[x]) OP ((int) right[x]); \

View File

@ -80,8 +80,8 @@ typedef VipsUnaryClass VipsComplexClass;
G_DEFINE_TYPE( VipsComplex, vips_complex, VIPS_TYPE_UNARY );
#define LOOP( IN, OUT, OP ) { \
IN *p = (IN *) in[0]; \
OUT *q = (OUT *) out; \
IN * __restrict__ p = (IN *) in[0]; \
OUT * __restrict__ q = (OUT *) out; \
\
for( x = 0; x < sz; x++ ) { \
OP( q, p[x], 0.0 ); \
@ -91,8 +91,8 @@ G_DEFINE_TYPE( VipsComplex, vips_complex, VIPS_TYPE_UNARY );
}
#define CLOOP( IN, OUT, OP ) { \
IN *p = (IN *) in[0]; \
OUT *q = (OUT *) out; \
IN * __restrict__ p = (IN *) in[0]; \
OUT * __restrict__ q = (OUT *) out; \
\
for( x = 0; x < sz; x++ ) { \
OP( q, p[0], p[1] ); \

View File

@ -90,9 +90,9 @@ G_DEFINE_TYPE( VipsDivide, vips_divide, VIPS_TYPE_BINARY );
/* This is going to be much slower */
#define CLOOP( TYPE ) { \
TYPE *left = (TYPE *) in[0]; \
TYPE *right = (TYPE *) in[1]; \
TYPE *q = (TYPE *) out; \
TYPE * __restrict__ left = (TYPE *) in[0]; \
TYPE * __restrict__ right = (TYPE *) in[1]; \
TYPE * __restrict__ q = (TYPE *) out; \
int i; \
\
for( i = 0; i < sz; i++ ) { \
@ -120,9 +120,9 @@ G_DEFINE_TYPE( VipsDivide, vips_divide, VIPS_TYPE_BINARY );
#else /* USE_MODARG_DIV */
#define CLOOP( TYPE ) { \
TYPE *left = (TYPE *) in[0]; \
TYPE *right = (TYPE *) in[1]; \
TYPE *q = (TYPE *) out; \
TYPE * __restrict__ left = (TYPE *) in[0]; \
TYPE * __restrict__ right = (TYPE *) in[1]; \
TYPE * __restrict__ q = (TYPE *) out; \
int i; \
\
for( i = 0; i < sz; i++ ) { \
@ -157,15 +157,12 @@ G_DEFINE_TYPE( VipsDivide, vips_divide, VIPS_TYPE_BINARY );
/* Real divide. Cast in to OUT before divide so we work for float output.
*/
#define RLOOP( IN, OUT ) { \
IN *left = (IN *) in[0]; \
IN *right = (IN *) in[1]; \
OUT *q = (OUT *) out; \
IN * __restrict__ left = (IN *) in[0]; \
IN * __restrict__ right = (IN *) in[1]; \
OUT * __restrict__ q = (OUT *) out; \
\
for( x = 0; x < sz; x++ ) \
if( right[x] == 0 ) \
q[x] = 0; \
else \
q[x] = (OUT) left[x] / (OUT) right[x]; \
q[x] = right[x] == 0 ? q[x] : (OUT) left[x] / (OUT) right[x]; \
}
static void

View File

@ -69,24 +69,24 @@ typedef VipsUnaryClass VipsInvertClass;
G_DEFINE_TYPE( VipsInvert, vips_invert, VIPS_TYPE_UNARY );
#define LOOP( TYPE, L ) { \
TYPE *p = (TYPE *) in[0]; \
TYPE *q = (TYPE *) out; \
TYPE * __restrict__ p = (TYPE *) in[0]; \
TYPE * __restrict__ q = (TYPE *) out; \
\
for( x = 0; x < sz; x++ ) \
q[x] = (L) - p[x]; \
}
#define LOOPN( TYPE ) { \
TYPE *p = (TYPE *) in[0]; \
TYPE *q = (TYPE *) out; \
TYPE * __restrict__ p = (TYPE *) in[0]; \
TYPE * __restrict__ q = (TYPE *) out; \
\
for( x = 0; x < sz; x++ ) \
q[x] = -1 * p[x]; \
}
#define LOOPC( TYPE ) { \
TYPE *p = (TYPE *) in[0]; \
TYPE *q = (TYPE *) out; \
TYPE * __restrict__ p = (TYPE *) in[0]; \
TYPE * __restrict__ q = (TYPE *) out; \
\
for( x = 0; x < sz; x++ ) { \
q[0] = -1 * p[0]; \

View File

@ -163,8 +163,8 @@ vips_linear_build( VipsObject *object )
/* Non-complex input, any output.
*/
#define LOOPN( IN, OUT ) { \
IN *p = (IN *) in[0]; \
OUT *q = (OUT *) out; \
IN * __restrict__ p = (IN *) in[0]; \
OUT * __restrict__ q = (OUT *) out; \
\
for( i = 0, x = 0; x < width; x++ ) \
for( k = 0; k < nb; k++, i++ ) \
@ -174,8 +174,8 @@ vips_linear_build( VipsObject *object )
/* Complex input, complex output.
*/
#define LOOPCMPLXN( IN, OUT ) { \
IN *p = (IN *) in[0]; \
OUT *q = (OUT *) out; \
IN * __restrict__ p = (IN *) in[0]; \
OUT * __restrict__ q = (OUT *) out; \
\
for( x = 0; x < width; x++ ) \
for( k = 0; k < nb; k++ ) { \

View File

@ -97,8 +97,8 @@ vips_math_build( VipsObject *object )
}
#define LOOP( IN, OUT, OP ) { \
IN *p = (IN *) in[0]; \
OUT *q = (OUT *) out; \
IN * __restrict__ p = (IN *) in[0]; \
OUT * __restrict__ q = (OUT *) out; \
\
for( x = 0; x < sz; x++ ) \
q[x] = OP( p[x] ); \

View File

@ -102,9 +102,9 @@ vips_math2_build( VipsObject *object )
}
#define LOOP( IN, OUT, OP ) { \
IN *p1 = (IN *) in[0]; \
IN *p2 = (IN *) in[1]; \
OUT *q = (OUT *) out; \
IN * __restrict__ p1 = (IN *) in[0]; \
IN * __restrict__ p2 = (IN *) in[1]; \
OUT * __restrict__ q = (OUT *) out; \
\
for( x = 0; x < sz; x++ ) \
OP( q[x], p1[x], p2[x] ); \
@ -137,12 +137,9 @@ vips_math2_build( VipsObject *object )
double left = (double) (X); \
double right = (double) (E); \
\
if( left == 0.0 && right < 0.0 ) \
/* Division by zero! Difficult to report tho' \
*/ \
(Y) = 0.0; \
else \
(Y) = pow( left, right ); \
/* Division by zero! Difficult to report tho' \
*/ \
(Y) = (left == 0.0 && right < 0.0) ? 0.0 : pow( left, right ); \
}
#define WOP( Y, X, E ) POW( Y, E, X )
@ -355,9 +352,9 @@ vips_math2_const_build( VipsObject *object )
}
#define LOOPC( IN, OUT, OP ) { \
IN *p = (IN *) in[0]; \
OUT *q = (OUT *) out; \
double *c = (double *) uconst->c_ready; \
IN * __restrict__ p = (IN *) in[0]; \
OUT * __restrict__ q = (OUT *) out; \
double * __restrict__ c = (double *) uconst->c_ready; \
\
for( i = 0, x = 0; x < width; x++ ) \
for( b = 0; b < bands; b++, i++ ) \

View File

@ -81,9 +81,9 @@ G_DEFINE_TYPE( VipsMultiply, vips_multiply, VIPS_TYPE_BINARY );
/* Complex multiply.
*/
#define CLOOP( TYPE ) { \
TYPE *left = (TYPE *) in[0]; \
TYPE *right = (TYPE *) in[1]; \
TYPE *q = (TYPE *) out; \
TYPE * __restrict__ left = (TYPE *) in[0]; \
TYPE * __restrict__ right = (TYPE *) in[1]; \
TYPE * __restrict__ q = (TYPE *) out; \
\
for( x = 0; x < sz; x++ ) { \
double x1 = left[0]; \
@ -104,9 +104,9 @@ G_DEFINE_TYPE( VipsMultiply, vips_multiply, VIPS_TYPE_BINARY );
/* Real multiply.
*/
#define RLOOP( IN, OUT ) { \
IN *left = (IN *) in[0]; \
IN *right = (IN *) in[1]; \
OUT *q = (OUT *) out; \
IN * __restrict__ left = (IN *) in[0]; \
IN * __restrict__ right = (IN *) in[1]; \
OUT * __restrict__ q = (OUT *) out; \
\
for( x = 0; x < sz; x++ ) \
q[x] = left[x] * right[x]; \

View File

@ -110,18 +110,18 @@ vips_relational_build( VipsObject *object )
}
#define RLOOP( TYPE, ROP ) { \
TYPE *left = (TYPE *) in[0]; \
TYPE *right = (TYPE *) in[1]; \
VipsPel *q = (VipsPel *) out; \
TYPE * __restrict__ left = (TYPE *) in[0]; \
TYPE * __restrict__ right = (TYPE *) in[1]; \
VipsPel * __restrict__ q = (VipsPel *) out; \
\
for( x = 0; x < sz; x++ ) \
q[x] = (left[x] ROP right[x]) ? 255 : 0; \
}
#define CLOOP( TYPE, COP ) { \
TYPE *left = (TYPE *) in[0]; \
TYPE *right = (TYPE *) in[1]; \
VipsPel *q = (VipsPel *) out; \
TYPE * __restrict__ left = (TYPE *) in[0]; \
TYPE * __restrict__ right = (TYPE *) in[1]; \
VipsPel * __restrict__ q = (VipsPel *) out; \
\
for( x = 0; x < sz; x++ ) { \
q[x] = COP( left[0], left[1], right[0], right[1]) ? 255 : 0; \
@ -478,8 +478,8 @@ vips_relational_const_build( VipsObject *object )
}
#define RLOOPC( TYPE, OP ) { \
TYPE *p = (TYPE *) in[0]; \
TYPE *c = (TYPE *) uconst->c_ready; \
TYPE * __restrict__ p = (TYPE *) in[0]; \
TYPE * __restrict__ c = (TYPE *) uconst->c_ready; \
\
for( i = 0, x = 0; x < width; x++ ) \
for( b = 0; b < bands; b++, i++ ) \
@ -487,10 +487,10 @@ vips_relational_const_build( VipsObject *object )
}
#define CLOOPC( TYPE, OP ) { \
TYPE *p = (TYPE *) in[0]; \
TYPE * __restrict__ p = (TYPE *) in[0]; \
\
for( i = 0, x = 0; x < width; x++ ) { \
TYPE *c = (TYPE *) uconst->c_ready; \
TYPE * __restrict__ c = (TYPE *) uconst->c_ready; \
\
for( b = 0; b < bands; b++, i++ ) { \
out[i] = OP( p[0], p[1], c[0], c[1]) ? 255 : 0; \

View File

@ -92,32 +92,26 @@ vips_remainder_build( VipsObject *object )
/* Integer remainder-after-division.
*/
#define IREMAINDER( TYPE ) { \
TYPE *p1 = (TYPE *) in[0]; \
TYPE *p2 = (TYPE *) in[1]; \
TYPE *q = (TYPE *) out; \
TYPE * __restrict__ p1 = (TYPE *) in[0]; \
TYPE * __restrict__ p2 = (TYPE *) in[1]; \
TYPE * __restrict__ q = (TYPE *) out; \
\
for( x = 0; x < sz; x++ ) \
if( p2[x] ) \
q[x] = p1[x] % p2[x]; \
else \
q[x] = -1; \
q[x] = p2[x] ? p1[x] % p2[x] : -1; \
}
/* Float remainder-after-division.
*/
#define FREMAINDER( TYPE ) { \
TYPE *p1 = (TYPE *) in[0]; \
TYPE *p2 = (TYPE *) in[1]; \
TYPE *q = (TYPE *) out; \
TYPE * __restrict__ p1 = (TYPE *) in[0]; \
TYPE * __restrict__ p2 = (TYPE *) in[1]; \
TYPE * __restrict__ q = (TYPE *) out; \
\
for( x = 0; x < sz; x++ ) { \
double a = p1[x]; \
double b = p2[x]; \
\
if( b ) \
q[x] = a - b * floor (a / b); \
else \
q[x] = -1; \
q[x] = b ? a - b * floor (a / b) : -1; \
} \
}
@ -262,9 +256,9 @@ vips_remainder_const_build( VipsObject *object )
/* Integer remainder-after-divide, per-band constant.
*/
#define IREMAINDERCONST( TYPE ) { \
TYPE *p = (TYPE *) in[0]; \
TYPE *q = (TYPE *) out; \
TYPE *c = (TYPE *) uconst->c_ready; \
TYPE * __restrict__ p = (TYPE *) in[0]; \
TYPE * __restrict__ q = (TYPE *) out; \
TYPE * __restrict__ c = (TYPE *) uconst->c_ready; \
\
for( i = 0, x = 0; x < width; x++ ) \
for( b = 0; b < bands; b++, i++ ) \
@ -274,19 +268,18 @@ vips_remainder_const_build( VipsObject *object )
/* Float remainder-after-divide, per-band constant.
*/
#define FREMAINDERCONST( TYPE ) { \
TYPE *p = (TYPE *) in[0]; \
TYPE *q = (TYPE *) out; \
TYPE *c = (TYPE *) uconst->c_ready; \
TYPE * __restrict__ p = (TYPE *) in[0]; \
TYPE * __restrict__ q = (TYPE *) out; \
TYPE * __restrict__ c = (TYPE *) uconst->c_ready; \
\
for( i = 0, x = 0; x < width; x++ ) \
for( b = 0; b < bands; b++, i++ ) { \
double left = p[i]; \
double right = c[b]; \
\
if( right ) \
q[i] = left - right * floor( left / right ); \
else \
q[i] = -1; \
q[i] = right ? \
left - right * floor( left / right ) : \
-1; \
} \
}

View File

@ -85,8 +85,8 @@ vips_round_build( VipsObject *object )
}
#define LOOP( TYPE, OP ) { \
TYPE *p = (TYPE *) in[0]; \
TYPE *q = (TYPE *) out; \
TYPE * __restrict__ p = (TYPE *) in[0]; \
TYPE * __restrict__ q = (TYPE *) out; \
\
for( x = 0; x < sz; x++ ) \
q[x] = OP( p[x] ); \

View File

@ -58,8 +58,8 @@ typedef VipsUnaryClass VipsSignClass;
G_DEFINE_TYPE( VipsSign, vips_sign, VIPS_TYPE_UNARY );
#define CSIGN( TYPE ) { \
TYPE *p = (TYPE *) in[0]; \
TYPE *q = (TYPE *) out; \
TYPE * __restrict__ p = (TYPE *) in[0]; \
TYPE * __restrict__ q = (TYPE *) out; \
int x; \
\
for( x = 0; x < sz; x++ ) { \
@ -83,8 +83,8 @@ G_DEFINE_TYPE( VipsSign, vips_sign, VIPS_TYPE_UNARY );
}
#define SIGN( TYPE ) { \
TYPE *p = (TYPE *) in[0]; \
signed char *q = (signed char *) out; \
TYPE * __restrict__ p = (TYPE *) in[0]; \
signed char * __restrict__ q = (signed char *) out; \
int x; \
\
for( x = 0; x < sz; x++ ) { \

View File

@ -83,9 +83,9 @@ typedef VipsBinaryClass VipsSubtractClass;
G_DEFINE_TYPE( VipsSubtract, vips_subtract, VIPS_TYPE_BINARY );
#define LOOP( IN, OUT ) { \
IN *left = (IN *) in[0]; \
IN *right = (IN *) in[1]; \
OUT *q = (OUT *) out; \
IN * __restrict__ left = (IN *) in[0]; \
IN * __restrict__ right = (IN *) in[1]; \
OUT * __restrict__ q = (OUT *) out; \
\
for( x = 0; x < sz; x++ ) \
q[x] = left[x] - right[x]; \

View File

@ -52,7 +52,7 @@ G_DEFINE_ABSTRACT_TYPE( VipsUnaryConst, vips_unary_const, VIPS_TYPE_UNARY );
/* Cast a vector of double to a vector of TYPE, clipping to a range.
*/
#define CAST_CLIP( TYPE, N, X ) { \
TYPE *tq = (TYPE *) q; \
TYPE * __restrict__ tq = (TYPE *) q; \
\
for( i = 0; i < m; i++ ) { \
double v = p[VIPS_MIN( n - 1, i )]; \
@ -64,7 +64,7 @@ G_DEFINE_ABSTRACT_TYPE( VipsUnaryConst, vips_unary_const, VIPS_TYPE_UNARY );
/* Cast a vector of double to a vector of TYPE.
*/
#define CAST( TYPE ) { \
TYPE *tq = (TYPE *) q; \
TYPE * __restrict__ tq = (TYPE *) q; \
\
for( i = 0; i < m; i++ ) \
tq[i] = (TYPE) p[VIPS_MIN( n - 1, i )]; \
@ -73,7 +73,7 @@ G_DEFINE_ABSTRACT_TYPE( VipsUnaryConst, vips_unary_const, VIPS_TYPE_UNARY );
/* Cast a vector of double to a complex vector of TYPE.
*/
#define CASTC( TYPE ) { \
TYPE *tq = (TYPE *) q; \
TYPE * __restrict__ tq = (TYPE *) q; \
\
for( i = 0; i < m; i++ ) { \
tq[0] = (TYPE) p[VIPS_MIN( n - 1, i )]; \
@ -86,7 +86,8 @@ G_DEFINE_ABSTRACT_TYPE( VipsUnaryConst, vips_unary_const, VIPS_TYPE_UNARY );
/* Cast a n-band vector of double to a m-band vector in another format.
*/
static VipsPel *
make_pixel( VipsObject *obj, int m, VipsBandFmt fmt, int n, double *p )
make_pixel( VipsObject *obj,
int m, VipsBandFmt fmt, int n, double * __restrict__ p )
{
VipsPel *q;
int i;