From f026cbb1255b7219a76f461c0bc3092497ea9752 Mon Sep 17 00:00:00 2001
From: Nicolas Robidoux <nicolas.robidoux@gmail.com>
Date: Wed, 19 May 2010 23:04:51 +0000
Subject: [PATCH] nohalo and lbb resampler instruction order, macros and
 comment tweaks

---
 libvips/resample/lbb.cpp    |  30 ++++---
 libvips/resample/nohalo.cpp | 155 +++++++++++++++++++-----------------
 2 files changed, 102 insertions(+), 83 deletions(-)

diff --git a/libvips/resample/lbb.cpp b/libvips/resample/lbb.cpp
index 37b431bc..4ef5e9ca 100644
--- a/libvips/resample/lbb.cpp
+++ b/libvips/resample/lbb.cpp
@@ -1,8 +1,8 @@
 /* lbb (locally bounded bicubic) resampler
  *
- * N. Robidoux, C. Racette and J. Cupitt 23-28/03/2010
+ * N. Robidoux, C. Racette and J. Cupitt, 23-28/03/2010
  *
- * N. Robidoux 16-19/05/2010
+ * N. Robidoux, 16-19/05/2010
  */
 
 /*
@@ -50,8 +50,8 @@
  * than images resampled with windowed sincs or other interpolatory
  * cubic spline filters. Specifically, LBB halos are narrower and the
  * over/undershoot amplitude is smaller. This is accomplished without
- * a significant reduction in the smoothness of the result (compared
- * to Catmull-Rom).
+ * significantly affecting the smoothness of the result (compared to
+ * Catmull-Rom).
  *
  * Another important property is that the resampled values are
  * contained within the range of nearby input values. Consequently, no
@@ -60,8 +60,12 @@
  *
  * LBB was developed by Nicolas Robidoux and Chantal Racette of the
  * Department of Mathematics and Computer Science of Laurentian
- * University in the course of Chantal's Masters Thesis in
- * Computational Sciences.
+ * University in the course of C. Racette's Masters thesis in
+ * Computational Sciences. Preliminary work directly leading to the
+ * LBB method and code was performed by C. Racette and N. Robidoux in
+ * the course of her honours thesis, and by N. Robidoux, A. Turcotte
+ * and E. Daoust during Google Summer of Code 2009 (through two awards
+ * made to GIMP to improve GEGL).
  */
 
 /*
@@ -78,21 +82,21 @@
  *
  * --It is C^1 with continuous cross derivatives.
  *
- * --When the limiters are inactive, LBB gives the same results as
+ * --When the limiters are inactive, LBB gives the same result as
  *   Catmull-Rom.
  *
  * --When used on binary images, LBB gives results similar to bicubic
  *   Hermite with all first derivatives---but not necessarily the
- *   cross derivatives--at the input pixel locations set to zero.
+ *   cross derivatives (this last assertion needs to be double
+ *   checked)--at input pixel locations set to zero.
  *
  * --The LBB reconstruction is locally bounded: Over each square
  *   patch, the surface is contained between the minimum and the
- *   maximum values among the 16 nearest input pixel values (those in
- *   the stencil).
+ *   maximum of the 16 nearest input pixel values.
  *
  * --Consequently, the LBB reconstruction is globally bounded between
  *   the very smallest input pixel value and the very largest input
- *   pixel value. (It is not necessary to clamp results.)
+ *   pixel value. It is not necessary to clamp results.
  *
  * The LBB method is based on the method of Ken Brodlie, Petros
  * Mashwama and Sohail Butt for constraining Hermite interpolants
@@ -224,7 +228,9 @@ lbbicubic( const double c00,
 
   /*
    * Computation of the four min and four max over 3x3 input data
-   * sub-blocks of the 4x4 input stencil.
+   * sub-blocks of the 4x4 input stencil, performed with only 28
+   * comparisons. If you can figure how to do this more efficiently,
+   * let us know.
    */
   const double m1    = (dos_two <= dos_thr) ? dos_two : dos_thr  ;
   const double M1    = (dos_two <= dos_thr) ? dos_thr : dos_two  ;
diff --git a/libvips/resample/nohalo.cpp b/libvips/resample/nohalo.cpp
index ca540514..bcbfed1c 100644
--- a/libvips/resample/nohalo.cpp
+++ b/libvips/resample/nohalo.cpp
@@ -1,8 +1,8 @@
-/* Nohalo subdivision followed by LBB (Locally Bounded Bicubic)
- * interpolation
+/* nohalo subdivision followed by lbb (locally bounded bicubic)
+ * interpolation resampler
  *
- * Nohalo level 1 with bilinear finishing scheme hacked for vips based
- * on code by N. Robidoux by J. Cupitt, 20/1/09
+ * Nohalo level 1 with bilinear finishing scheme hacked for VIPS by
+ * J. Cupitt based on code by N. Robidoux, 20/1/09
  *
  * N. Robidoux and J. Cupitt, 4-17/3/09
  *
@@ -65,9 +65,13 @@
  *
  * Nohalo with LBB finishing scheme was developed by Nicolas Robidoux
  * and Chantal Racette of the Department of Mathematics and Computer
- * Science of Laurentian University in the course of Chantal's Masters
- * Thesis in Computational Sciences, itself the continuation of
- * Chantal's Honours Thesis in Mathematics.
+ * Science of Laurentian University in the course of C. Racette's
+ * Masters thesis in Computational Sciences. Preliminary work on
+ * Nohalo and monotone interpolation was performed by C. Racette and
+ * N. Robidoux in the course of her honours thesis, by N. Robidoux,
+ * A. Turcotte and E. Daoust during Google Summer of Code 2009
+ * (through two awards made to GIMP to improve GEGL), and, earlier, by
+ * N. Robidoux, A. Turcotte, J. Cupitt, M. Gong and K. Martinez.
  */
 
 /*
@@ -258,15 +262,15 @@ typedef struct _VipsInterpolateNohaloClass {
 #define MINMOD(a,b,a_times_a,a_times_b) \
   ( (a_times_b)>=0. ? 1. : 0. ) * ( (a_times_b)<(a_times_a) ? (b) : (a) )
 
-#define LBB_ABS(x)  ( ((x)>=0.) ? (x) : -(x) )
-#define LBB_SIGN(x) ( ((x)>=0.) ? 1.0 : -1.0 )
+#define NOHALO_ABS(x)  ( ((x)>=0.) ? (x) : -(x) )
+#define NOHALO_SIGN(x) ( ((x)>=0.) ? 1.0 : -1.0 )
 
 /*
  * MIN and MAX macros set up so that I can put the likely winner in
  * the first argument (forward branch likely blah blah blah):
  */
-#define LBB_MIN(x,y) ( ((x)<=(y)) ? (x) : (y) )
-#define LBB_MAX(x,y) ( ((x)>=(y)) ? (x) : (y) )
+#define NOHALO_MIN(x,y) ( ((x)<=(y)) ? (x) : (y) )
+#define NOHALO_MAX(x,y) ( ((x)>=(y)) ? (x) : (y) )
 
 
 static void inline
@@ -789,8 +793,9 @@ lbbicubic( const double c00,
    * two commented out lines below.
    *
    * Overall, only 27 comparisons are needed (to compute 4 mins and 4
-   * maxes!). Without the simplification, 28 comparisoins would be
-   * used.
+   * maxes!). Without the simplification, 28 comparisons would be
+   * used. If you can figure how to do this more efficiently, let us
+   * know.
    */
   const double m1    = (dos_two <= dos_thr) ? dos_two : dos_thr  ;
   const double M1    = (dos_two <= dos_thr) ? dos_thr : dos_two  ;
@@ -800,8 +805,8 @@ lbbicubic( const double c00,
   const double M4    = (qua_two <= qua_thr) ? qua_thr : qua_two  ;
   const double m3    = (uno_two <= uno_thr) ? uno_two : uno_thr  ;
   const double M3    = (uno_two <= uno_thr) ? uno_thr : uno_two  ;
-  const double m5    = LBB_MIN(               m1,       m2      );
-  const double M5    = LBB_MAX(               M1,       M2      );
+  const double m5    = NOHALO_MIN(            m1,       m2      );
+  const double M5    = NOHALO_MAX(            M1,       M2      );
   const double m6    = (dos_one <= tre_one) ? dos_one : tre_one  ;
   const double M6    = (dos_one <= tre_one) ? tre_one : dos_one  ;
   const double m7    = (dos_fou <= tre_fou) ? dos_fou : tre_fou  ;
@@ -813,30 +818,30 @@ lbbicubic( const double c00,
    * lines can be replaced by the above, simpler, two lines without
    * changing the results.
    *
-   * const double m13   = LBB_MIN(               m7,       qua_fou );
-   * const double M13   = LBB_MAX(               M7,       qua_fou );
+   * const double m13   = NOHALO_MIN(               m7,       qua_fou );
+   * const double M13   = NOHALO_MAX(               M7,       qua_fou );
    *
    * This also allows reodering the comparisons to put space between
    * the computation of a result and its use.
    */
-  const double m9    = LBB_MIN(               m5,       m4      );
-  const double M9    = LBB_MAX(               M5,       M4      );
-  const double m8    = LBB_MIN(               m5,       m3      );
-  const double M8    = LBB_MAX(               M5,       M3      );
-  const double m11   = LBB_MIN(               m6,       qua_one );
-  const double M11   = LBB_MAX(               M6,       qua_one );
-  const double m10   = LBB_MIN(               m6,       uno_one );
-  const double M10   = LBB_MAX(               M6,       uno_one );
-  const double m12   = LBB_MIN(               m7,       uno_fou );
-  const double M12   = LBB_MAX(               M7,       uno_fou );
-  const double min11 = LBB_MIN(               m9,       m13     );
-  const double max11 = LBB_MAX(               M9,       M13     );
-  const double min01 = LBB_MIN(               m9,       m11     );
-  const double max01 = LBB_MAX(               M9,       M11     );
-  const double min00 = LBB_MIN(               m8,       m10     );
-  const double max00 = LBB_MAX(               M8,       M10     );
-  const double min10 = LBB_MIN(               m8,       m12     );
-  const double max10 = LBB_MAX(               M8,       M12     );
+  const double m9    = NOHALO_MIN(            m5,       m4      );
+  const double M9    = NOHALO_MAX(            M5,       M4      );
+  const double m8    = NOHALO_MIN(            m5,       m3      );
+  const double M8    = NOHALO_MAX(            M5,       M3      );
+  const double m11   = NOHALO_MIN(            m6,       qua_one );
+  const double M11   = NOHALO_MAX(            M6,       qua_one );
+  const double m10   = NOHALO_MIN(            m6,       uno_one );
+  const double M10   = NOHALO_MAX(            M6,       uno_one );
+  const double m12   = NOHALO_MIN(            m7,       uno_fou );
+  const double M12   = NOHALO_MAX(            M7,       uno_fou );
+  const double min11 = NOHALO_MIN(            m9,       m13     );
+  const double max11 = NOHALO_MAX(            M9,       M13     );
+  const double min01 = NOHALO_MIN(            m9,       m11     );
+  const double max01 = NOHALO_MAX(            M9,       M11     );
+  const double min00 = NOHALO_MIN(            m8,       m10     );
+  const double max00 = NOHALO_MAX(            M8,       M10     );
+  const double min10 = NOHALO_MIN(            m8,       m12     );
+  const double max10 = NOHALO_MAX(            M8,       M12     );
   /*
    * The remainder of the "per channel" computation involves the
    * computation of:
@@ -891,15 +896,15 @@ lbbicubic( const double c00,
    * them (except if the clamping sends a negative derivative to 0, in
    * which case the sign does not matter anyway).
    */
-  const double sign_dzdx00 = LBB_SIGN( dble_dzdx00i );
-  const double sign_dzdx10 = LBB_SIGN( dble_dzdx10i );
-  const double sign_dzdx01 = LBB_SIGN( dble_dzdx01i );
-  const double sign_dzdx11 = LBB_SIGN( dble_dzdx11i );
+  const double sign_dzdx00 = NOHALO_SIGN( dble_dzdx00i );
+  const double sign_dzdx10 = NOHALO_SIGN( dble_dzdx10i );
+  const double sign_dzdx01 = NOHALO_SIGN( dble_dzdx01i );
+  const double sign_dzdx11 = NOHALO_SIGN( dble_dzdx11i );
 
-  const double sign_dzdy00 = LBB_SIGN( dble_dzdy00i );
-  const double sign_dzdy10 = LBB_SIGN( dble_dzdy10i );
-  const double sign_dzdy01 = LBB_SIGN( dble_dzdy01i );
-  const double sign_dzdy11 = LBB_SIGN( dble_dzdy11i );
+  const double sign_dzdy00 = NOHALO_SIGN( dble_dzdy00i );
+  const double sign_dzdy10 = NOHALO_SIGN( dble_dzdy10i );
+  const double sign_dzdy01 = NOHALO_SIGN( dble_dzdy01i );
+  const double sign_dzdy11 = NOHALO_SIGN( dble_dzdy11i );
 
   /*
    * Initial values of the cross-derivatives. Factors of 1/4 are left
@@ -914,10 +919,10 @@ lbbicubic( const double c00,
    * Slope limiters. The key multiplier is 3 but we fold a factor of
    * 2, hence 6:
    */
-  const double dble_slopelimit_00 = 6.0 * LBB_MIN( u00, v00 );
-  const double dble_slopelimit_10 = 6.0 * LBB_MIN( u10, v10 );
-  const double dble_slopelimit_01 = 6.0 * LBB_MIN( u01, v01 );
-  const double dble_slopelimit_11 = 6.0 * LBB_MIN( u11, v11 );
+  const double dble_slopelimit_00 = 6.0 * NOHALO_MIN( u00, v00 );
+  const double dble_slopelimit_10 = 6.0 * NOHALO_MIN( u10, v10 );
+  const double dble_slopelimit_01 = 6.0 * NOHALO_MIN( u01, v01 );
+  const double dble_slopelimit_11 = 6.0 * NOHALO_MIN( u11, v11 );
 
   /*
    * Clamped first derivatives:
@@ -962,10 +967,10 @@ lbbicubic( const double c00,
   /*
    * Absolute values of the sums:
    */
-  const double twelve_abs_sum00 = LBB_ABS( twelve_sum00 );
-  const double twelve_abs_sum10 = LBB_ABS( twelve_sum10 );
-  const double twelve_abs_sum01 = LBB_ABS( twelve_sum01 );
-  const double twelve_abs_sum11 = LBB_ABS( twelve_sum11 );
+  const double twelve_abs_sum00 = NOHALO_ABS( twelve_sum00 );
+  const double twelve_abs_sum10 = NOHALO_ABS( twelve_sum10 );
+  const double twelve_abs_sum01 = NOHALO_ABS( twelve_sum01 );
+  const double twelve_abs_sum11 = NOHALO_ABS( twelve_sum11 );
 
   /*
    * Scaled distances to the min:
@@ -983,10 +988,10 @@ lbbicubic( const double c00,
   const double first_limit01 = twelve_abs_sum01 - u01_times_36;
   const double first_limit11 = twelve_abs_sum11 - u11_times_36;
 
-  const double quad_d2zdxdy00ii = LBB_MAX( quad_d2zdxdy00i, first_limit00 );
-  const double quad_d2zdxdy10ii = LBB_MAX( quad_d2zdxdy10i, first_limit10 );
-  const double quad_d2zdxdy01ii = LBB_MAX( quad_d2zdxdy01i, first_limit01 );
-  const double quad_d2zdxdy11ii = LBB_MAX( quad_d2zdxdy11i, first_limit11 );
+  const double quad_d2zdxdy00ii = NOHALO_MAX( quad_d2zdxdy00i, first_limit00 );
+  const double quad_d2zdxdy10ii = NOHALO_MAX( quad_d2zdxdy10i, first_limit10 );
+  const double quad_d2zdxdy01ii = NOHALO_MAX( quad_d2zdxdy01i, first_limit01 );
+  const double quad_d2zdxdy11ii = NOHALO_MAX( quad_d2zdxdy11i, first_limit11 );
 
   /*
    * Scaled distances to the max:
@@ -1004,18 +1009,22 @@ lbbicubic( const double c00,
   const double second_limit01 = v01_times_36 - twelve_abs_sum01;
   const double second_limit11 = v11_times_36 - twelve_abs_sum11;
 
-  const double quad_d2zdxdy00iii = LBB_MIN( quad_d2zdxdy00ii, second_limit00 );
-  const double quad_d2zdxdy10iii = LBB_MIN( quad_d2zdxdy10ii, second_limit10 );
-  const double quad_d2zdxdy01iii = LBB_MIN( quad_d2zdxdy01ii, second_limit01 );
-  const double quad_d2zdxdy11iii = LBB_MIN( quad_d2zdxdy11ii, second_limit11 );
+  const double quad_d2zdxdy00iii =
+    NOHALO_MIN( quad_d2zdxdy00ii, second_limit00 );
+  const double quad_d2zdxdy10iii =
+    NOHALO_MIN( quad_d2zdxdy10ii, second_limit10 );
+  const double quad_d2zdxdy01iii =
+    NOHALO_MIN( quad_d2zdxdy01ii, second_limit01 );
+  const double quad_d2zdxdy11iii =
+    NOHALO_MIN( quad_d2zdxdy11ii, second_limit11 );
 
   /*
    * Absolute values of the differences:
    */
-  const double twelve_abs_dif00 = LBB_ABS( twelve_dif00 );
-  const double twelve_abs_dif10 = LBB_ABS( twelve_dif10 );
-  const double twelve_abs_dif01 = LBB_ABS( twelve_dif01 );
-  const double twelve_abs_dif11 = LBB_ABS( twelve_dif11 );
+  const double twelve_abs_dif00 = NOHALO_ABS( twelve_dif00 );
+  const double twelve_abs_dif10 = NOHALO_ABS( twelve_dif10 );
+  const double twelve_abs_dif01 = NOHALO_ABS( twelve_dif01 );
+  const double twelve_abs_dif11 = NOHALO_ABS( twelve_dif11 );
 
   /*
    * Third cross-derivative limiter:
@@ -1025,10 +1034,14 @@ lbbicubic( const double c00,
   const double third_limit01 = twelve_abs_dif01 - v01_times_36;
   const double third_limit11 = twelve_abs_dif11 - v11_times_36;
 
-  const double quad_d2zdxdy00iiii = LBB_MAX( quad_d2zdxdy00iii, third_limit00);
-  const double quad_d2zdxdy10iiii = LBB_MAX( quad_d2zdxdy10iii, third_limit10);
-  const double quad_d2zdxdy01iiii = LBB_MAX( quad_d2zdxdy01iii, third_limit01);
-  const double quad_d2zdxdy11iiii = LBB_MAX( quad_d2zdxdy11iii, third_limit11);
+  const double quad_d2zdxdy00iiii =
+    NOHALO_MAX( quad_d2zdxdy00iii, third_limit00);
+  const double quad_d2zdxdy10iiii =
+    NOHALO_MAX( quad_d2zdxdy10iii, third_limit10);
+  const double quad_d2zdxdy01iiii =
+    NOHALO_MAX( quad_d2zdxdy01iii, third_limit01);
+  const double quad_d2zdxdy11iiii =
+    NOHALO_MAX( quad_d2zdxdy11iii, third_limit11);
 
   /*
    * Fourth cross-derivative limiter:
@@ -1038,10 +1051,10 @@ lbbicubic( const double c00,
   const double fourth_limit01 = u01_times_36 - twelve_abs_dif01;
   const double fourth_limit11 = u11_times_36 - twelve_abs_dif11;
 
-  const double quad_d2zdxdy00 = LBB_MIN( quad_d2zdxdy00iiii, fourth_limit00);
-  const double quad_d2zdxdy10 = LBB_MIN( quad_d2zdxdy10iiii, fourth_limit10);
-  const double quad_d2zdxdy01 = LBB_MIN( quad_d2zdxdy01iiii, fourth_limit01);
-  const double quad_d2zdxdy11 = LBB_MIN( quad_d2zdxdy11iiii, fourth_limit11);
+  const double quad_d2zdxdy00 = NOHALO_MIN( quad_d2zdxdy00iiii, fourth_limit00);
+  const double quad_d2zdxdy10 = NOHALO_MIN( quad_d2zdxdy10iiii, fourth_limit10);
+  const double quad_d2zdxdy01 = NOHALO_MIN( quad_d2zdxdy01iiii, fourth_limit01);
+  const double quad_d2zdxdy11 = NOHALO_MIN( quad_d2zdxdy11iiii, fourth_limit11);
 
   /*
    * Four times the part of the result which only uses cross