From f026cbb1255b7219a76f461c0bc3092497ea9752 Mon Sep 17 00:00:00 2001 From: Nicolas Robidoux Date: Wed, 19 May 2010 23:04:51 +0000 Subject: [PATCH] nohalo and lbb resampler instruction order, macros and comment tweaks --- libvips/resample/lbb.cpp | 30 ++++--- libvips/resample/nohalo.cpp | 155 +++++++++++++++++++----------------- 2 files changed, 102 insertions(+), 83 deletions(-) diff --git a/libvips/resample/lbb.cpp b/libvips/resample/lbb.cpp index 37b431bc..4ef5e9ca 100644 --- a/libvips/resample/lbb.cpp +++ b/libvips/resample/lbb.cpp @@ -1,8 +1,8 @@ /* lbb (locally bounded bicubic) resampler * - * N. Robidoux, C. Racette and J. Cupitt 23-28/03/2010 + * N. Robidoux, C. Racette and J. Cupitt, 23-28/03/2010 * - * N. Robidoux 16-19/05/2010 + * N. Robidoux, 16-19/05/2010 */ /* @@ -50,8 +50,8 @@ * than images resampled with windowed sincs or other interpolatory * cubic spline filters. Specifically, LBB halos are narrower and the * over/undershoot amplitude is smaller. This is accomplished without - * a significant reduction in the smoothness of the result (compared - * to Catmull-Rom). + * significantly affecting the smoothness of the result (compared to + * Catmull-Rom). * * Another important property is that the resampled values are * contained within the range of nearby input values. Consequently, no @@ -60,8 +60,12 @@ * * LBB was developed by Nicolas Robidoux and Chantal Racette of the * Department of Mathematics and Computer Science of Laurentian - * University in the course of Chantal's Masters Thesis in - * Computational Sciences. + * University in the course of C. Racette's Masters thesis in + * Computational Sciences. Preliminary work directly leading to the + * LBB method and code was performed by C. Racette and N. Robidoux in + * the course of her honours thesis, and by N. Robidoux, A. Turcotte + * and E. Daoust during Google Summer of Code 2009 (through two awards + * made to GIMP to improve GEGL). */ /* @@ -78,21 +82,21 @@ * * --It is C^1 with continuous cross derivatives. * - * --When the limiters are inactive, LBB gives the same results as + * --When the limiters are inactive, LBB gives the same result as * Catmull-Rom. * * --When used on binary images, LBB gives results similar to bicubic * Hermite with all first derivatives---but not necessarily the - * cross derivatives--at the input pixel locations set to zero. + * cross derivatives (this last assertion needs to be double + * checked)--at input pixel locations set to zero. * * --The LBB reconstruction is locally bounded: Over each square * patch, the surface is contained between the minimum and the - * maximum values among the 16 nearest input pixel values (those in - * the stencil). + * maximum of the 16 nearest input pixel values. * * --Consequently, the LBB reconstruction is globally bounded between * the very smallest input pixel value and the very largest input - * pixel value. (It is not necessary to clamp results.) + * pixel value. It is not necessary to clamp results. * * The LBB method is based on the method of Ken Brodlie, Petros * Mashwama and Sohail Butt for constraining Hermite interpolants @@ -224,7 +228,9 @@ lbbicubic( const double c00, /* * Computation of the four min and four max over 3x3 input data - * sub-blocks of the 4x4 input stencil. + * sub-blocks of the 4x4 input stencil, performed with only 28 + * comparisons. If you can figure how to do this more efficiently, + * let us know. */ const double m1 = (dos_two <= dos_thr) ? dos_two : dos_thr ; const double M1 = (dos_two <= dos_thr) ? dos_thr : dos_two ; diff --git a/libvips/resample/nohalo.cpp b/libvips/resample/nohalo.cpp index ca540514..bcbfed1c 100644 --- a/libvips/resample/nohalo.cpp +++ b/libvips/resample/nohalo.cpp @@ -1,8 +1,8 @@ -/* Nohalo subdivision followed by LBB (Locally Bounded Bicubic) - * interpolation +/* nohalo subdivision followed by lbb (locally bounded bicubic) + * interpolation resampler * - * Nohalo level 1 with bilinear finishing scheme hacked for vips based - * on code by N. Robidoux by J. Cupitt, 20/1/09 + * Nohalo level 1 with bilinear finishing scheme hacked for VIPS by + * J. Cupitt based on code by N. Robidoux, 20/1/09 * * N. Robidoux and J. Cupitt, 4-17/3/09 * @@ -65,9 +65,13 @@ * * Nohalo with LBB finishing scheme was developed by Nicolas Robidoux * and Chantal Racette of the Department of Mathematics and Computer - * Science of Laurentian University in the course of Chantal's Masters - * Thesis in Computational Sciences, itself the continuation of - * Chantal's Honours Thesis in Mathematics. + * Science of Laurentian University in the course of C. Racette's + * Masters thesis in Computational Sciences. Preliminary work on + * Nohalo and monotone interpolation was performed by C. Racette and + * N. Robidoux in the course of her honours thesis, by N. Robidoux, + * A. Turcotte and E. Daoust during Google Summer of Code 2009 + * (through two awards made to GIMP to improve GEGL), and, earlier, by + * N. Robidoux, A. Turcotte, J. Cupitt, M. Gong and K. Martinez. */ /* @@ -258,15 +262,15 @@ typedef struct _VipsInterpolateNohaloClass { #define MINMOD(a,b,a_times_a,a_times_b) \ ( (a_times_b)>=0. ? 1. : 0. ) * ( (a_times_b)<(a_times_a) ? (b) : (a) ) -#define LBB_ABS(x) ( ((x)>=0.) ? (x) : -(x) ) -#define LBB_SIGN(x) ( ((x)>=0.) ? 1.0 : -1.0 ) +#define NOHALO_ABS(x) ( ((x)>=0.) ? (x) : -(x) ) +#define NOHALO_SIGN(x) ( ((x)>=0.) ? 1.0 : -1.0 ) /* * MIN and MAX macros set up so that I can put the likely winner in * the first argument (forward branch likely blah blah blah): */ -#define LBB_MIN(x,y) ( ((x)<=(y)) ? (x) : (y) ) -#define LBB_MAX(x,y) ( ((x)>=(y)) ? (x) : (y) ) +#define NOHALO_MIN(x,y) ( ((x)<=(y)) ? (x) : (y) ) +#define NOHALO_MAX(x,y) ( ((x)>=(y)) ? (x) : (y) ) static void inline @@ -789,8 +793,9 @@ lbbicubic( const double c00, * two commented out lines below. * * Overall, only 27 comparisons are needed (to compute 4 mins and 4 - * maxes!). Without the simplification, 28 comparisoins would be - * used. + * maxes!). Without the simplification, 28 comparisons would be + * used. If you can figure how to do this more efficiently, let us + * know. */ const double m1 = (dos_two <= dos_thr) ? dos_two : dos_thr ; const double M1 = (dos_two <= dos_thr) ? dos_thr : dos_two ; @@ -800,8 +805,8 @@ lbbicubic( const double c00, const double M4 = (qua_two <= qua_thr) ? qua_thr : qua_two ; const double m3 = (uno_two <= uno_thr) ? uno_two : uno_thr ; const double M3 = (uno_two <= uno_thr) ? uno_thr : uno_two ; - const double m5 = LBB_MIN( m1, m2 ); - const double M5 = LBB_MAX( M1, M2 ); + const double m5 = NOHALO_MIN( m1, m2 ); + const double M5 = NOHALO_MAX( M1, M2 ); const double m6 = (dos_one <= tre_one) ? dos_one : tre_one ; const double M6 = (dos_one <= tre_one) ? tre_one : dos_one ; const double m7 = (dos_fou <= tre_fou) ? dos_fou : tre_fou ; @@ -813,30 +818,30 @@ lbbicubic( const double c00, * lines can be replaced by the above, simpler, two lines without * changing the results. * - * const double m13 = LBB_MIN( m7, qua_fou ); - * const double M13 = LBB_MAX( M7, qua_fou ); + * const double m13 = NOHALO_MIN( m7, qua_fou ); + * const double M13 = NOHALO_MAX( M7, qua_fou ); * * This also allows reodering the comparisons to put space between * the computation of a result and its use. */ - const double m9 = LBB_MIN( m5, m4 ); - const double M9 = LBB_MAX( M5, M4 ); - const double m8 = LBB_MIN( m5, m3 ); - const double M8 = LBB_MAX( M5, M3 ); - const double m11 = LBB_MIN( m6, qua_one ); - const double M11 = LBB_MAX( M6, qua_one ); - const double m10 = LBB_MIN( m6, uno_one ); - const double M10 = LBB_MAX( M6, uno_one ); - const double m12 = LBB_MIN( m7, uno_fou ); - const double M12 = LBB_MAX( M7, uno_fou ); - const double min11 = LBB_MIN( m9, m13 ); - const double max11 = LBB_MAX( M9, M13 ); - const double min01 = LBB_MIN( m9, m11 ); - const double max01 = LBB_MAX( M9, M11 ); - const double min00 = LBB_MIN( m8, m10 ); - const double max00 = LBB_MAX( M8, M10 ); - const double min10 = LBB_MIN( m8, m12 ); - const double max10 = LBB_MAX( M8, M12 ); + const double m9 = NOHALO_MIN( m5, m4 ); + const double M9 = NOHALO_MAX( M5, M4 ); + const double m8 = NOHALO_MIN( m5, m3 ); + const double M8 = NOHALO_MAX( M5, M3 ); + const double m11 = NOHALO_MIN( m6, qua_one ); + const double M11 = NOHALO_MAX( M6, qua_one ); + const double m10 = NOHALO_MIN( m6, uno_one ); + const double M10 = NOHALO_MAX( M6, uno_one ); + const double m12 = NOHALO_MIN( m7, uno_fou ); + const double M12 = NOHALO_MAX( M7, uno_fou ); + const double min11 = NOHALO_MIN( m9, m13 ); + const double max11 = NOHALO_MAX( M9, M13 ); + const double min01 = NOHALO_MIN( m9, m11 ); + const double max01 = NOHALO_MAX( M9, M11 ); + const double min00 = NOHALO_MIN( m8, m10 ); + const double max00 = NOHALO_MAX( M8, M10 ); + const double min10 = NOHALO_MIN( m8, m12 ); + const double max10 = NOHALO_MAX( M8, M12 ); /* * The remainder of the "per channel" computation involves the * computation of: @@ -891,15 +896,15 @@ lbbicubic( const double c00, * them (except if the clamping sends a negative derivative to 0, in * which case the sign does not matter anyway). */ - const double sign_dzdx00 = LBB_SIGN( dble_dzdx00i ); - const double sign_dzdx10 = LBB_SIGN( dble_dzdx10i ); - const double sign_dzdx01 = LBB_SIGN( dble_dzdx01i ); - const double sign_dzdx11 = LBB_SIGN( dble_dzdx11i ); + const double sign_dzdx00 = NOHALO_SIGN( dble_dzdx00i ); + const double sign_dzdx10 = NOHALO_SIGN( dble_dzdx10i ); + const double sign_dzdx01 = NOHALO_SIGN( dble_dzdx01i ); + const double sign_dzdx11 = NOHALO_SIGN( dble_dzdx11i ); - const double sign_dzdy00 = LBB_SIGN( dble_dzdy00i ); - const double sign_dzdy10 = LBB_SIGN( dble_dzdy10i ); - const double sign_dzdy01 = LBB_SIGN( dble_dzdy01i ); - const double sign_dzdy11 = LBB_SIGN( dble_dzdy11i ); + const double sign_dzdy00 = NOHALO_SIGN( dble_dzdy00i ); + const double sign_dzdy10 = NOHALO_SIGN( dble_dzdy10i ); + const double sign_dzdy01 = NOHALO_SIGN( dble_dzdy01i ); + const double sign_dzdy11 = NOHALO_SIGN( dble_dzdy11i ); /* * Initial values of the cross-derivatives. Factors of 1/4 are left @@ -914,10 +919,10 @@ lbbicubic( const double c00, * Slope limiters. The key multiplier is 3 but we fold a factor of * 2, hence 6: */ - const double dble_slopelimit_00 = 6.0 * LBB_MIN( u00, v00 ); - const double dble_slopelimit_10 = 6.0 * LBB_MIN( u10, v10 ); - const double dble_slopelimit_01 = 6.0 * LBB_MIN( u01, v01 ); - const double dble_slopelimit_11 = 6.0 * LBB_MIN( u11, v11 ); + const double dble_slopelimit_00 = 6.0 * NOHALO_MIN( u00, v00 ); + const double dble_slopelimit_10 = 6.0 * NOHALO_MIN( u10, v10 ); + const double dble_slopelimit_01 = 6.0 * NOHALO_MIN( u01, v01 ); + const double dble_slopelimit_11 = 6.0 * NOHALO_MIN( u11, v11 ); /* * Clamped first derivatives: @@ -962,10 +967,10 @@ lbbicubic( const double c00, /* * Absolute values of the sums: */ - const double twelve_abs_sum00 = LBB_ABS( twelve_sum00 ); - const double twelve_abs_sum10 = LBB_ABS( twelve_sum10 ); - const double twelve_abs_sum01 = LBB_ABS( twelve_sum01 ); - const double twelve_abs_sum11 = LBB_ABS( twelve_sum11 ); + const double twelve_abs_sum00 = NOHALO_ABS( twelve_sum00 ); + const double twelve_abs_sum10 = NOHALO_ABS( twelve_sum10 ); + const double twelve_abs_sum01 = NOHALO_ABS( twelve_sum01 ); + const double twelve_abs_sum11 = NOHALO_ABS( twelve_sum11 ); /* * Scaled distances to the min: @@ -983,10 +988,10 @@ lbbicubic( const double c00, const double first_limit01 = twelve_abs_sum01 - u01_times_36; const double first_limit11 = twelve_abs_sum11 - u11_times_36; - const double quad_d2zdxdy00ii = LBB_MAX( quad_d2zdxdy00i, first_limit00 ); - const double quad_d2zdxdy10ii = LBB_MAX( quad_d2zdxdy10i, first_limit10 ); - const double quad_d2zdxdy01ii = LBB_MAX( quad_d2zdxdy01i, first_limit01 ); - const double quad_d2zdxdy11ii = LBB_MAX( quad_d2zdxdy11i, first_limit11 ); + const double quad_d2zdxdy00ii = NOHALO_MAX( quad_d2zdxdy00i, first_limit00 ); + const double quad_d2zdxdy10ii = NOHALO_MAX( quad_d2zdxdy10i, first_limit10 ); + const double quad_d2zdxdy01ii = NOHALO_MAX( quad_d2zdxdy01i, first_limit01 ); + const double quad_d2zdxdy11ii = NOHALO_MAX( quad_d2zdxdy11i, first_limit11 ); /* * Scaled distances to the max: @@ -1004,18 +1009,22 @@ lbbicubic( const double c00, const double second_limit01 = v01_times_36 - twelve_abs_sum01; const double second_limit11 = v11_times_36 - twelve_abs_sum11; - const double quad_d2zdxdy00iii = LBB_MIN( quad_d2zdxdy00ii, second_limit00 ); - const double quad_d2zdxdy10iii = LBB_MIN( quad_d2zdxdy10ii, second_limit10 ); - const double quad_d2zdxdy01iii = LBB_MIN( quad_d2zdxdy01ii, second_limit01 ); - const double quad_d2zdxdy11iii = LBB_MIN( quad_d2zdxdy11ii, second_limit11 ); + const double quad_d2zdxdy00iii = + NOHALO_MIN( quad_d2zdxdy00ii, second_limit00 ); + const double quad_d2zdxdy10iii = + NOHALO_MIN( quad_d2zdxdy10ii, second_limit10 ); + const double quad_d2zdxdy01iii = + NOHALO_MIN( quad_d2zdxdy01ii, second_limit01 ); + const double quad_d2zdxdy11iii = + NOHALO_MIN( quad_d2zdxdy11ii, second_limit11 ); /* * Absolute values of the differences: */ - const double twelve_abs_dif00 = LBB_ABS( twelve_dif00 ); - const double twelve_abs_dif10 = LBB_ABS( twelve_dif10 ); - const double twelve_abs_dif01 = LBB_ABS( twelve_dif01 ); - const double twelve_abs_dif11 = LBB_ABS( twelve_dif11 ); + const double twelve_abs_dif00 = NOHALO_ABS( twelve_dif00 ); + const double twelve_abs_dif10 = NOHALO_ABS( twelve_dif10 ); + const double twelve_abs_dif01 = NOHALO_ABS( twelve_dif01 ); + const double twelve_abs_dif11 = NOHALO_ABS( twelve_dif11 ); /* * Third cross-derivative limiter: @@ -1025,10 +1034,14 @@ lbbicubic( const double c00, const double third_limit01 = twelve_abs_dif01 - v01_times_36; const double third_limit11 = twelve_abs_dif11 - v11_times_36; - const double quad_d2zdxdy00iiii = LBB_MAX( quad_d2zdxdy00iii, third_limit00); - const double quad_d2zdxdy10iiii = LBB_MAX( quad_d2zdxdy10iii, third_limit10); - const double quad_d2zdxdy01iiii = LBB_MAX( quad_d2zdxdy01iii, third_limit01); - const double quad_d2zdxdy11iiii = LBB_MAX( quad_d2zdxdy11iii, third_limit11); + const double quad_d2zdxdy00iiii = + NOHALO_MAX( quad_d2zdxdy00iii, third_limit00); + const double quad_d2zdxdy10iiii = + NOHALO_MAX( quad_d2zdxdy10iii, third_limit10); + const double quad_d2zdxdy01iiii = + NOHALO_MAX( quad_d2zdxdy01iii, third_limit01); + const double quad_d2zdxdy11iiii = + NOHALO_MAX( quad_d2zdxdy11iii, third_limit11); /* * Fourth cross-derivative limiter: @@ -1038,10 +1051,10 @@ lbbicubic( const double c00, const double fourth_limit01 = u01_times_36 - twelve_abs_dif01; const double fourth_limit11 = u11_times_36 - twelve_abs_dif11; - const double quad_d2zdxdy00 = LBB_MIN( quad_d2zdxdy00iiii, fourth_limit00); - const double quad_d2zdxdy10 = LBB_MIN( quad_d2zdxdy10iiii, fourth_limit10); - const double quad_d2zdxdy01 = LBB_MIN( quad_d2zdxdy01iiii, fourth_limit01); - const double quad_d2zdxdy11 = LBB_MIN( quad_d2zdxdy11iiii, fourth_limit11); + const double quad_d2zdxdy00 = NOHALO_MIN( quad_d2zdxdy00iiii, fourth_limit00); + const double quad_d2zdxdy10 = NOHALO_MIN( quad_d2zdxdy10iiii, fourth_limit10); + const double quad_d2zdxdy01 = NOHALO_MIN( quad_d2zdxdy01iiii, fourth_limit01); + const double quad_d2zdxdy11 = NOHALO_MIN( quad_d2zdxdy11iiii, fourth_limit11); /* * Four times the part of the result which only uses cross