nohalo and lbb resampler instruction order, macros and comment tweaks

This commit is contained in:
Nicolas Robidoux 2010-05-19 23:04:51 +00:00
parent c4f3b8dd51
commit f026cbb125
2 changed files with 102 additions and 83 deletions

View File

@ -1,8 +1,8 @@
/* lbb (locally bounded bicubic) resampler
*
* N. Robidoux, C. Racette and J. Cupitt 23-28/03/2010
* N. Robidoux, C. Racette and J. Cupitt, 23-28/03/2010
*
* N. Robidoux 16-19/05/2010
* N. Robidoux, 16-19/05/2010
*/
/*
@ -50,8 +50,8 @@
* than images resampled with windowed sincs or other interpolatory
* cubic spline filters. Specifically, LBB halos are narrower and the
* over/undershoot amplitude is smaller. This is accomplished without
* a significant reduction in the smoothness of the result (compared
* to Catmull-Rom).
* significantly affecting the smoothness of the result (compared to
* Catmull-Rom).
*
* Another important property is that the resampled values are
* contained within the range of nearby input values. Consequently, no
@ -60,8 +60,12 @@
*
* LBB was developed by Nicolas Robidoux and Chantal Racette of the
* Department of Mathematics and Computer Science of Laurentian
* University in the course of Chantal's Masters Thesis in
* Computational Sciences.
* University in the course of C. Racette's Masters thesis in
* Computational Sciences. Preliminary work directly leading to the
* LBB method and code was performed by C. Racette and N. Robidoux in
* the course of her honours thesis, and by N. Robidoux, A. Turcotte
* and E. Daoust during Google Summer of Code 2009 (through two awards
* made to GIMP to improve GEGL).
*/
/*
@ -78,21 +82,21 @@
*
* --It is C^1 with continuous cross derivatives.
*
* --When the limiters are inactive, LBB gives the same results as
* --When the limiters are inactive, LBB gives the same result as
* Catmull-Rom.
*
* --When used on binary images, LBB gives results similar to bicubic
* Hermite with all first derivatives---but not necessarily the
* cross derivatives--at the input pixel locations set to zero.
* cross derivatives (this last assertion needs to be double
* checked)--at input pixel locations set to zero.
*
* --The LBB reconstruction is locally bounded: Over each square
* patch, the surface is contained between the minimum and the
* maximum values among the 16 nearest input pixel values (those in
* the stencil).
* maximum of the 16 nearest input pixel values.
*
* --Consequently, the LBB reconstruction is globally bounded between
* the very smallest input pixel value and the very largest input
* pixel value. (It is not necessary to clamp results.)
* pixel value. It is not necessary to clamp results.
*
* The LBB method is based on the method of Ken Brodlie, Petros
* Mashwama and Sohail Butt for constraining Hermite interpolants
@ -224,7 +228,9 @@ lbbicubic( const double c00,
/*
* Computation of the four min and four max over 3x3 input data
* sub-blocks of the 4x4 input stencil.
* sub-blocks of the 4x4 input stencil, performed with only 28
* comparisons. If you can figure how to do this more efficiently,
* let us know.
*/
const double m1 = (dos_two <= dos_thr) ? dos_two : dos_thr ;
const double M1 = (dos_two <= dos_thr) ? dos_thr : dos_two ;

View File

@ -1,8 +1,8 @@
/* Nohalo subdivision followed by LBB (Locally Bounded Bicubic)
* interpolation
/* nohalo subdivision followed by lbb (locally bounded bicubic)
* interpolation resampler
*
* Nohalo level 1 with bilinear finishing scheme hacked for vips based
* on code by N. Robidoux by J. Cupitt, 20/1/09
* Nohalo level 1 with bilinear finishing scheme hacked for VIPS by
* J. Cupitt based on code by N. Robidoux, 20/1/09
*
* N. Robidoux and J. Cupitt, 4-17/3/09
*
@ -65,9 +65,13 @@
*
* Nohalo with LBB finishing scheme was developed by Nicolas Robidoux
* and Chantal Racette of the Department of Mathematics and Computer
* Science of Laurentian University in the course of Chantal's Masters
* Thesis in Computational Sciences, itself the continuation of
* Chantal's Honours Thesis in Mathematics.
* Science of Laurentian University in the course of C. Racette's
* Masters thesis in Computational Sciences. Preliminary work on
* Nohalo and monotone interpolation was performed by C. Racette and
* N. Robidoux in the course of her honours thesis, by N. Robidoux,
* A. Turcotte and E. Daoust during Google Summer of Code 2009
* (through two awards made to GIMP to improve GEGL), and, earlier, by
* N. Robidoux, A. Turcotte, J. Cupitt, M. Gong and K. Martinez.
*/
/*
@ -258,15 +262,15 @@ typedef struct _VipsInterpolateNohaloClass {
#define MINMOD(a,b,a_times_a,a_times_b) \
( (a_times_b)>=0. ? 1. : 0. ) * ( (a_times_b)<(a_times_a) ? (b) : (a) )
#define LBB_ABS(x) ( ((x)>=0.) ? (x) : -(x) )
#define LBB_SIGN(x) ( ((x)>=0.) ? 1.0 : -1.0 )
#define NOHALO_ABS(x) ( ((x)>=0.) ? (x) : -(x) )
#define NOHALO_SIGN(x) ( ((x)>=0.) ? 1.0 : -1.0 )
/*
* MIN and MAX macros set up so that I can put the likely winner in
* the first argument (forward branch likely blah blah blah):
*/
#define LBB_MIN(x,y) ( ((x)<=(y)) ? (x) : (y) )
#define LBB_MAX(x,y) ( ((x)>=(y)) ? (x) : (y) )
#define NOHALO_MIN(x,y) ( ((x)<=(y)) ? (x) : (y) )
#define NOHALO_MAX(x,y) ( ((x)>=(y)) ? (x) : (y) )
static void inline
@ -789,8 +793,9 @@ lbbicubic( const double c00,
* two commented out lines below.
*
* Overall, only 27 comparisons are needed (to compute 4 mins and 4
* maxes!). Without the simplification, 28 comparisoins would be
* used.
* maxes!). Without the simplification, 28 comparisons would be
* used. If you can figure how to do this more efficiently, let us
* know.
*/
const double m1 = (dos_two <= dos_thr) ? dos_two : dos_thr ;
const double M1 = (dos_two <= dos_thr) ? dos_thr : dos_two ;
@ -800,8 +805,8 @@ lbbicubic( const double c00,
const double M4 = (qua_two <= qua_thr) ? qua_thr : qua_two ;
const double m3 = (uno_two <= uno_thr) ? uno_two : uno_thr ;
const double M3 = (uno_two <= uno_thr) ? uno_thr : uno_two ;
const double m5 = LBB_MIN( m1, m2 );
const double M5 = LBB_MAX( M1, M2 );
const double m5 = NOHALO_MIN( m1, m2 );
const double M5 = NOHALO_MAX( M1, M2 );
const double m6 = (dos_one <= tre_one) ? dos_one : tre_one ;
const double M6 = (dos_one <= tre_one) ? tre_one : dos_one ;
const double m7 = (dos_fou <= tre_fou) ? dos_fou : tre_fou ;
@ -813,30 +818,30 @@ lbbicubic( const double c00,
* lines can be replaced by the above, simpler, two lines without
* changing the results.
*
* const double m13 = LBB_MIN( m7, qua_fou );
* const double M13 = LBB_MAX( M7, qua_fou );
* const double m13 = NOHALO_MIN( m7, qua_fou );
* const double M13 = NOHALO_MAX( M7, qua_fou );
*
* This also allows reodering the comparisons to put space between
* the computation of a result and its use.
*/
const double m9 = LBB_MIN( m5, m4 );
const double M9 = LBB_MAX( M5, M4 );
const double m8 = LBB_MIN( m5, m3 );
const double M8 = LBB_MAX( M5, M3 );
const double m11 = LBB_MIN( m6, qua_one );
const double M11 = LBB_MAX( M6, qua_one );
const double m10 = LBB_MIN( m6, uno_one );
const double M10 = LBB_MAX( M6, uno_one );
const double m12 = LBB_MIN( m7, uno_fou );
const double M12 = LBB_MAX( M7, uno_fou );
const double min11 = LBB_MIN( m9, m13 );
const double max11 = LBB_MAX( M9, M13 );
const double min01 = LBB_MIN( m9, m11 );
const double max01 = LBB_MAX( M9, M11 );
const double min00 = LBB_MIN( m8, m10 );
const double max00 = LBB_MAX( M8, M10 );
const double min10 = LBB_MIN( m8, m12 );
const double max10 = LBB_MAX( M8, M12 );
const double m9 = NOHALO_MIN( m5, m4 );
const double M9 = NOHALO_MAX( M5, M4 );
const double m8 = NOHALO_MIN( m5, m3 );
const double M8 = NOHALO_MAX( M5, M3 );
const double m11 = NOHALO_MIN( m6, qua_one );
const double M11 = NOHALO_MAX( M6, qua_one );
const double m10 = NOHALO_MIN( m6, uno_one );
const double M10 = NOHALO_MAX( M6, uno_one );
const double m12 = NOHALO_MIN( m7, uno_fou );
const double M12 = NOHALO_MAX( M7, uno_fou );
const double min11 = NOHALO_MIN( m9, m13 );
const double max11 = NOHALO_MAX( M9, M13 );
const double min01 = NOHALO_MIN( m9, m11 );
const double max01 = NOHALO_MAX( M9, M11 );
const double min00 = NOHALO_MIN( m8, m10 );
const double max00 = NOHALO_MAX( M8, M10 );
const double min10 = NOHALO_MIN( m8, m12 );
const double max10 = NOHALO_MAX( M8, M12 );
/*
* The remainder of the "per channel" computation involves the
* computation of:
@ -891,15 +896,15 @@ lbbicubic( const double c00,
* them (except if the clamping sends a negative derivative to 0, in
* which case the sign does not matter anyway).
*/
const double sign_dzdx00 = LBB_SIGN( dble_dzdx00i );
const double sign_dzdx10 = LBB_SIGN( dble_dzdx10i );
const double sign_dzdx01 = LBB_SIGN( dble_dzdx01i );
const double sign_dzdx11 = LBB_SIGN( dble_dzdx11i );
const double sign_dzdx00 = NOHALO_SIGN( dble_dzdx00i );
const double sign_dzdx10 = NOHALO_SIGN( dble_dzdx10i );
const double sign_dzdx01 = NOHALO_SIGN( dble_dzdx01i );
const double sign_dzdx11 = NOHALO_SIGN( dble_dzdx11i );
const double sign_dzdy00 = LBB_SIGN( dble_dzdy00i );
const double sign_dzdy10 = LBB_SIGN( dble_dzdy10i );
const double sign_dzdy01 = LBB_SIGN( dble_dzdy01i );
const double sign_dzdy11 = LBB_SIGN( dble_dzdy11i );
const double sign_dzdy00 = NOHALO_SIGN( dble_dzdy00i );
const double sign_dzdy10 = NOHALO_SIGN( dble_dzdy10i );
const double sign_dzdy01 = NOHALO_SIGN( dble_dzdy01i );
const double sign_dzdy11 = NOHALO_SIGN( dble_dzdy11i );
/*
* Initial values of the cross-derivatives. Factors of 1/4 are left
@ -914,10 +919,10 @@ lbbicubic( const double c00,
* Slope limiters. The key multiplier is 3 but we fold a factor of
* 2, hence 6:
*/
const double dble_slopelimit_00 = 6.0 * LBB_MIN( u00, v00 );
const double dble_slopelimit_10 = 6.0 * LBB_MIN( u10, v10 );
const double dble_slopelimit_01 = 6.0 * LBB_MIN( u01, v01 );
const double dble_slopelimit_11 = 6.0 * LBB_MIN( u11, v11 );
const double dble_slopelimit_00 = 6.0 * NOHALO_MIN( u00, v00 );
const double dble_slopelimit_10 = 6.0 * NOHALO_MIN( u10, v10 );
const double dble_slopelimit_01 = 6.0 * NOHALO_MIN( u01, v01 );
const double dble_slopelimit_11 = 6.0 * NOHALO_MIN( u11, v11 );
/*
* Clamped first derivatives:
@ -962,10 +967,10 @@ lbbicubic( const double c00,
/*
* Absolute values of the sums:
*/
const double twelve_abs_sum00 = LBB_ABS( twelve_sum00 );
const double twelve_abs_sum10 = LBB_ABS( twelve_sum10 );
const double twelve_abs_sum01 = LBB_ABS( twelve_sum01 );
const double twelve_abs_sum11 = LBB_ABS( twelve_sum11 );
const double twelve_abs_sum00 = NOHALO_ABS( twelve_sum00 );
const double twelve_abs_sum10 = NOHALO_ABS( twelve_sum10 );
const double twelve_abs_sum01 = NOHALO_ABS( twelve_sum01 );
const double twelve_abs_sum11 = NOHALO_ABS( twelve_sum11 );
/*
* Scaled distances to the min:
@ -983,10 +988,10 @@ lbbicubic( const double c00,
const double first_limit01 = twelve_abs_sum01 - u01_times_36;
const double first_limit11 = twelve_abs_sum11 - u11_times_36;
const double quad_d2zdxdy00ii = LBB_MAX( quad_d2zdxdy00i, first_limit00 );
const double quad_d2zdxdy10ii = LBB_MAX( quad_d2zdxdy10i, first_limit10 );
const double quad_d2zdxdy01ii = LBB_MAX( quad_d2zdxdy01i, first_limit01 );
const double quad_d2zdxdy11ii = LBB_MAX( quad_d2zdxdy11i, first_limit11 );
const double quad_d2zdxdy00ii = NOHALO_MAX( quad_d2zdxdy00i, first_limit00 );
const double quad_d2zdxdy10ii = NOHALO_MAX( quad_d2zdxdy10i, first_limit10 );
const double quad_d2zdxdy01ii = NOHALO_MAX( quad_d2zdxdy01i, first_limit01 );
const double quad_d2zdxdy11ii = NOHALO_MAX( quad_d2zdxdy11i, first_limit11 );
/*
* Scaled distances to the max:
@ -1004,18 +1009,22 @@ lbbicubic( const double c00,
const double second_limit01 = v01_times_36 - twelve_abs_sum01;
const double second_limit11 = v11_times_36 - twelve_abs_sum11;
const double quad_d2zdxdy00iii = LBB_MIN( quad_d2zdxdy00ii, second_limit00 );
const double quad_d2zdxdy10iii = LBB_MIN( quad_d2zdxdy10ii, second_limit10 );
const double quad_d2zdxdy01iii = LBB_MIN( quad_d2zdxdy01ii, second_limit01 );
const double quad_d2zdxdy11iii = LBB_MIN( quad_d2zdxdy11ii, second_limit11 );
const double quad_d2zdxdy00iii =
NOHALO_MIN( quad_d2zdxdy00ii, second_limit00 );
const double quad_d2zdxdy10iii =
NOHALO_MIN( quad_d2zdxdy10ii, second_limit10 );
const double quad_d2zdxdy01iii =
NOHALO_MIN( quad_d2zdxdy01ii, second_limit01 );
const double quad_d2zdxdy11iii =
NOHALO_MIN( quad_d2zdxdy11ii, second_limit11 );
/*
* Absolute values of the differences:
*/
const double twelve_abs_dif00 = LBB_ABS( twelve_dif00 );
const double twelve_abs_dif10 = LBB_ABS( twelve_dif10 );
const double twelve_abs_dif01 = LBB_ABS( twelve_dif01 );
const double twelve_abs_dif11 = LBB_ABS( twelve_dif11 );
const double twelve_abs_dif00 = NOHALO_ABS( twelve_dif00 );
const double twelve_abs_dif10 = NOHALO_ABS( twelve_dif10 );
const double twelve_abs_dif01 = NOHALO_ABS( twelve_dif01 );
const double twelve_abs_dif11 = NOHALO_ABS( twelve_dif11 );
/*
* Third cross-derivative limiter:
@ -1025,10 +1034,14 @@ lbbicubic( const double c00,
const double third_limit01 = twelve_abs_dif01 - v01_times_36;
const double third_limit11 = twelve_abs_dif11 - v11_times_36;
const double quad_d2zdxdy00iiii = LBB_MAX( quad_d2zdxdy00iii, third_limit00);
const double quad_d2zdxdy10iiii = LBB_MAX( quad_d2zdxdy10iii, third_limit10);
const double quad_d2zdxdy01iiii = LBB_MAX( quad_d2zdxdy01iii, third_limit01);
const double quad_d2zdxdy11iiii = LBB_MAX( quad_d2zdxdy11iii, third_limit11);
const double quad_d2zdxdy00iiii =
NOHALO_MAX( quad_d2zdxdy00iii, third_limit00);
const double quad_d2zdxdy10iiii =
NOHALO_MAX( quad_d2zdxdy10iii, third_limit10);
const double quad_d2zdxdy01iiii =
NOHALO_MAX( quad_d2zdxdy01iii, third_limit01);
const double quad_d2zdxdy11iiii =
NOHALO_MAX( quad_d2zdxdy11iii, third_limit11);
/*
* Fourth cross-derivative limiter:
@ -1038,10 +1051,10 @@ lbbicubic( const double c00,
const double fourth_limit01 = u01_times_36 - twelve_abs_dif01;
const double fourth_limit11 = u11_times_36 - twelve_abs_dif11;
const double quad_d2zdxdy00 = LBB_MIN( quad_d2zdxdy00iiii, fourth_limit00);
const double quad_d2zdxdy10 = LBB_MIN( quad_d2zdxdy10iiii, fourth_limit10);
const double quad_d2zdxdy01 = LBB_MIN( quad_d2zdxdy01iiii, fourth_limit01);
const double quad_d2zdxdy11 = LBB_MIN( quad_d2zdxdy11iiii, fourth_limit11);
const double quad_d2zdxdy00 = NOHALO_MIN( quad_d2zdxdy00iiii, fourth_limit00);
const double quad_d2zdxdy10 = NOHALO_MIN( quad_d2zdxdy10iiii, fourth_limit10);
const double quad_d2zdxdy01 = NOHALO_MIN( quad_d2zdxdy01iiii, fourth_limit01);
const double quad_d2zdxdy11 = NOHALO_MIN( quad_d2zdxdy11iiii, fourth_limit11);
/*
* Four times the part of the result which only uses cross