restrict keyword throughout -> sometimes 33% faster than before this round

This commit is contained in:
Nicolas Robidoux 2009-03-16 04:47:13 +00:00
parent 1a7b57c347
commit 7261fe939b

View File

@ -200,14 +200,14 @@
#include "templates.h" #include "templates.h"
#ifndef restrict #ifndef vips_restrict
#ifdef __restrict #ifdef __restrict
#define restrict __restrict #define vips_restrict __restrict
#else #else
#ifdef __restrict__ #ifdef __restrict__
#define restrict __restrict__ #define vips_restrict __restrict__
#else #else
#define restrict #define vips_restrict
#endif #endif
#endif #endif
#endif #endif
@ -273,22 +273,21 @@ typedef struct _VipsInterpolateNohaloClass {
} VipsInterpolateNohaloClass; } VipsInterpolateNohaloClass;
static void inline static void inline
nohalo_sharp_level_1( nohalo_sharp_level_1( const double uno_two,
const double uno_two, const double uno_thr,
const double uno_thr, const double dos_one,
const double dos_one, const double dos_two,
const double dos_two, const double dos_thr,
const double dos_thr, const double dos_fou,
const double dos_fou, const double tre_one,
const double tre_one, const double tre_two,
const double tre_two, const double tre_thr,
const double tre_thr, const double tre_fou,
const double tre_fou, const double qua_two,
const double qua_two, const double qua_thr,
const double qua_thr, double* vips_restrict r1,
double *r1, double* vips_restrict r2,
double *r2, double* vips_restrict r3 )
double *r3 )
{ {
/* /*
* This function calculates the missing three double density pixel * This function calculates the missing three double density pixel
@ -445,14 +444,14 @@ nohalo_sharp_level_1(
*/ */
#define NOHALO_SHARP_LEVEL_1_INTER( inter ) \ #define NOHALO_SHARP_LEVEL_1_INTER( inter ) \
template <typename T> static void inline \ template <typename T> static void inline \
nohalo_sharp_level_1_ ## inter( PEL *pout, \ nohalo_sharp_level_1_ ## inter( PEL* vips_restrict pout, \
const PEL *pin, \ const PEL* vips_restrict pin, \
const int bands, \ const int bands, \
const int lskip, \ const int lskip, \
const double relative_x, \ const double relative_x, \
const double relative_y ) \ const double relative_y ) \
{ \ { \
T* restrict out = (T *) pout; \ T* vips_restrict out = (T *) pout; \
\ \
const int relative_x_is_rite = ( relative_x >= 0. ); \ const int relative_x_is_rite = ( relative_x >= 0. ); \
const int relative_y_is_down = ( relative_y >= 0. ); \ const int relative_y_is_down = ( relative_y >= 0. ); \
@ -463,7 +462,7 @@ nohalo_sharp_level_1(
const int corner_reflection_shift = \ const int corner_reflection_shift = \
relative_x_is_rite * bands + relative_y_is_down * lskip; \ relative_x_is_rite * bands + relative_y_is_down * lskip; \
\ \
const T* restrict in = ( (T *) pin ) + corner_reflection_shift; \ const T* vips_restrict in = ( (T *) pin ) + corner_reflection_shift; \
\ \
const int shift_1_pixel = sign_of_relative_x * bands; \ const int shift_1_pixel = sign_of_relative_x * bands; \
const int shift_1_row = sign_of_relative_y * lskip; \ const int shift_1_row = sign_of_relative_y * lskip; \
@ -494,37 +493,38 @@ nohalo_sharp_level_1(
const double x_times_z_over_4 = .25 * x_times_z; \ const double x_times_z_over_4 = .25 * x_times_z; \
const double x_times_y_over_8 = .125 * ( x - x_times_z ); \ const double x_times_y_over_8 = .125 * ( x - x_times_z ); \
\ \
for( int band = 0; band < bands; band++ ) { \ int band = bands; \
double four_times_dos_twothr; \ \
double four_times_dostre_two; \ do \
double eight_times_dostre_twothr; \ { \
\ double four_times_dos_twothr; \
const double dos_two = in[dos_two_shift]; \ double four_times_dostre_two; \
\ double eight_times_dostre_twothr; \
nohalo_sharp_level_1( in[uno_two_shift], in[uno_thr_shift], \ \
in[dos_one_shift], dos_two, \ const double dos_two = in[dos_two_shift]; \
in[dos_thr_shift], in[dos_fou_shift], \ \
in[tre_one_shift], in[tre_two_shift], \ nohalo_sharp_level_1( in[uno_two_shift], in[uno_thr_shift], \
in[tre_thr_shift], in[tre_fou_shift], \ in[dos_one_shift], dos_two, \
in[qua_two_shift], in[qua_thr_shift], \ in[dos_thr_shift], in[dos_fou_shift], \
&four_times_dos_twothr, \ in[tre_one_shift], in[tre_two_shift], \
&four_times_dostre_two, \ in[tre_thr_shift], in[tre_fou_shift], \
&eight_times_dostre_twothr ); \ in[qua_two_shift], in[qua_thr_shift], \
\ &four_times_dos_twothr, \
in += 1; \ &four_times_dostre_two, \
\ &eight_times_dostre_twothr ); \
const T result = bilinear_ ## inter<T>( \ \
w_times_z, \ const T result = bilinear_ ## inter<T>( w_times_z, \
x_times_z_over_4, \ x_times_z_over_4, \
w_times_y_over_4, \ w_times_y_over_4, \
x_times_y_over_8, \ x_times_y_over_8, \
dos_two, \ dos_two, \
four_times_dos_twothr, \ four_times_dos_twothr, \
four_times_dostre_two, \ four_times_dostre_two, \
eight_times_dostre_twothr ); \ eight_times_dostre_twothr ); \
\ \
out[band] = result; \ in++; \
} \ *out++ = result; \
} while (--band); \
} }
NOHALO_SHARP_LEVEL_1_INTER( float ) NOHALO_SHARP_LEVEL_1_INTER( float )
@ -539,11 +539,11 @@ G_DEFINE_TYPE( VipsInterpolateNohalo, vips_interpolate_nohalo,
} }
static void static void
vips_interpolate_nohalo_interpolate( VipsInterpolate *interpolate, vips_interpolate_nohalo_interpolate( VipsInterpolate* vips_restrict interpolate,
PEL *out, PEL* vips_restrict out,
REGION *in, REGION* vips_restrict in,
double absolute_x, double absolute_x,
double absolute_y ) double absolute_y )
{ {
/* /*
* VIPS versions of Nicolas's pixel addressing values. Double bands for * VIPS versions of Nicolas's pixel addressing values. Double bands for
@ -567,17 +567,19 @@ vips_interpolate_nohalo_interpolate( VipsInterpolate *interpolate,
* position of the center of the convex hull of the 2x2 block of * position of the center of the convex hull of the 2x2 block of
* closest pixels. Similarly for y. Range of values: [-.5,.5). * closest pixels. Similarly for y. Range of values: [-.5,.5).
*/ */
const int iy = FAST_PSEUDO_FLOOR (absolute_y); const double absolute_y_minus_half = absolute_y - .5;
const double relative_y = ( absolute_y - .5 ) - iy; const double absolute_x_minus_half = absolute_x - .5;
const int ix = FAST_PSEUDO_FLOOR (absolute_x); const int iy = FAST_PSEUDO_FLOOR (absolute_y);
const double relative_x = ( absolute_x - .5 ) - ix; const double relative_y = absolute_y_minus_half - iy;
const int ix = FAST_PSEUDO_FLOOR (absolute_x);
const double relative_x = absolute_x_minus_half - ix;
/* /*
* Move the pointer to (the first band of) the top/left pixel * Move the pointer to (the first band of) the top/left pixel
* of the 2x2 group of pixel centers which contains the * of the 2x2 group of pixel centers which contains the
* sampling location in its convex hull: * sampling location in its convex hull:
*/ */
const PEL * restrict p = (PEL *) IM_REGION_ADDR( in, ix, iy ); const PEL * vips_restrict p = (PEL *) IM_REGION_ADDR( in, ix, iy );
#define CALL( T, inter ) \ #define CALL( T, inter ) \
nohalo_sharp_level_1_ ## inter<T>( out, \ nohalo_sharp_level_1_ ## inter<T>( out, \