From b23a1ece6c610a88f036f866505ea35ce8eb580d Mon Sep 17 00:00:00 2001 From: Nicolas Robidoux Date: Wed, 4 Mar 2009 22:09:54 +0000 Subject: [PATCH] nohalo resampler simplification/speed up --- libsrc/resample/nohalo.cpp | 575 +++++++++++++++++-------------------- 1 file changed, 269 insertions(+), 306 deletions(-) diff --git a/libsrc/resample/nohalo.cpp b/libsrc/resample/nohalo.cpp index bfe6bc36..29d41053 100644 --- a/libsrc/resample/nohalo.cpp +++ b/libsrc/resample/nohalo.cpp @@ -5,19 +5,20 @@ This file is part of VIPS. - VIPS is free software; you can redistribute it and/or modify - it under the terms of the GNU Lesser General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. + VIPS is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU Lesser General Public License for more details. + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. - You should have received a copy of the GNU Lesser General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + You should have received a copy of the GNU Lesser General Public + License along with this program; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA */ @@ -30,15 +31,16 @@ /* * 2009 (c) Nicolas Robidoux * - * Thanks: Geert Jordaens, John Cupitt, Minglun Gong, Øyvind Kolås and - * Sven Neumann for useful comments and code. + * Nicolas thanks Geert Jordaens, John Cupitt, Minglun Gong, Øyvind + * Kolås and Sven Neumann for useful comments and code. * - * Acknowledgement: Nicolas Robidoux's research on nohalo funded in - * part by an NSERC (National Science and Engineering Research Council - * of Canada) Discovery Grant. + * Nicolas Robidoux's research on nohalo funded in part by an NSERC + * (National Science and Engineering Research Council of Canada) + * Discovery Grant. */ /* Hacked for vips by J. Cupitt, 20/1/09 + * Tweaks by N. Robidoux, 03/3/09 */ /* @@ -63,9 +65,9 @@ * not change the answer, and consequently do not increase its * quality. * - * ============================================================ - * WARNING: THIS CODE ONLY IMPLEMENTS THE LOWEST QUALITY NOHALO - * ============================================================ + * =================================================== + * THIS CODE ONLY IMPLEMENTS THE LOWEST QUALITY NOHALO + * =================================================== * * This code implement nohalo for (quality) level = 1. Nohalo for * higher quality levels will be implemented later. @@ -114,11 +116,7 @@ * * The value of the reconstructed intensity surface at any point * depends on the values of (at most) 12 nearby input values, located - * in a "cross" centered at the closest four input pixel centers. For - * computational expediency, the input values corresponding to the - * nearest 21 input pixel locations (5x5 minus the four corners) - * should be made available through a data pointer. The code then - * selects the needed ones from this enlarged stencil. + * in a "cross" centered at the closest four input pixel centers. * * =========================================================== * When level = infinity, nohalo's intensity surface is smooth @@ -140,25 +138,26 @@ * * (Except possibly near the boundary: it is easy to make this * property carry over everywhere but this requires a tuned abyss - * policy or building the boundary conditions inside the sampler.) - * Nohalo is exact on linear intensity profiles, meaning that if the - * input pixel values (in the stencil) are obtained from a function of - * the form f(x,y) = a + b*x + c*y (a, b, c constants), then the - * computed pixel value is exactly the value of f(x,y) at the - * asked-for sampling location. The boundary condition which is - * emulated by VIPS throught the "extend" extension of the input - * image---this corresponds to the nearest neighbour abyss - * policy---does NOT make this resampler exact on linears at the - * boundary. It does, however, guarantee that no clamping is required - * even when resampled values are computed at positions outside of the - * extent of the input image (when extrapolation is required). + * policy---linear extrapolation, say---or building the boundary + * conditions inside the sampler.) Nohalo is exact on linear + * intensity profiles, meaning that if the input pixel values (in the + * stencil) are obtained from a function of the form f(x,y) = a + b*x + * + c*y (a, b, c constants), then the computed pixel value is exactly + * the value of f(x,y) at the asked-for sampling location. The + * boundary condition which is emulated by VIPS throught the "extend" + * extension of the input image---this corresponds to the nearest + * neighbour abyss policy---does NOT make this resampler exact on + * linears at the boundary. It does, however, guarantee that no + * clamping is required even when resampled values are computed at + * positions outside of the extent of the input image (when + * extrapolation is required). * * =================== * Nohalo is nonlinear * =================== * * In particular, resampling a sum of images may not be the same as - * summing the resamples (this occurs even without taking into account + * summing the resamples. (This occurs even without taking into account * over and underflow issues: images can only take values within a * banded range, and consequently no sampler is truly linear.) * @@ -188,10 +187,6 @@ * intensity surface can be treated as if smooth.) */ -/* -#define DEBUG - */ - #ifdef HAVE_CONFIG_H #include #endif /*HAVE_CONFIG_H*/ @@ -274,260 +269,236 @@ typedef struct _VipsInterpolateNohaloClass { } VipsInterpolateNohaloClass; -/* Calculate the four results surrounding the target point, our caller does - * bilinear interpolation of them. - */ - static void inline nohalo_sharp_level_1( + const double uno_two, + const double uno_thr, + const double dos_one, + const double dos_two, const double dos_thr, const double dos_fou, + const double tre_one, const double tre_two, const double tre_thr, const double tre_fou, - const double tre_fiv, const double qua_two, const double qua_thr, - const double qua_fou, - const double qua_fiv, - const double cin_thr, - const double cin_fou, double *r1, double *r2, double *r3 ) { - /* Start of copy-paste from Nicolas's source. - */ - - /* - * THE ENLARGED STENCIL (prior to entering this function): - * - * The potentially needed input pixel values are described by the - * following stencil, where (ix,iy) are the coordinates of the - * closest input pixel center (with ties resolved arbitrarily). - * - * Spanish abbreviations are used to label positions from top to - * bottom (rows), English ones to label positions from left to right - * (columns). - * - * (ix-1,iy-2) (ix,iy-2) (ix+1,iy-2) - * = uno_two = uno_thr = uno_fou - * - * (ix-2,iy-1) (ix-1,iy-1) (ix,iy-1) (ix+1,iy-1) (ix+2,iy-1) - * = dos_one = dos_two = dos_thr = dos_fou = dos_fiv - * - * (ix-2,iy) (ix-1,iy) (ix,iy) (ix+1,iy) (ix+2,iy) - * = tre_one = tre_two = tre_thr = tre_fou = tre_fiv - * - * (ix-2,iy+1) (ix-1,iy+1) (ix,iy+1) (ix+1,iy+1) (ix+2,iy+1) - * = qua_one = qua_two = qua_thr = qua_fou = qua_fiv - * - * (ix-1,iy+2) (ix,iy+2) (ix+1,iy+2) - * = cin_two = cin_thr = cin_fou - * - * THE STENCIL OF ACTUALLY READ VALUES: - * - * The above is the "enlarged" stencil: about half the values will - * not be used. Once symmetry has been used to assume that the - * sampling point is to the right and bottom of tre_thr---this is - * done by implicitly reflecting the data if needed---the actually - * used input values are named thus: - * - * dos_thr dos_fou - * - * tre_two tre_thr tre_fou tre_fiv - * - * qua_two qua_thr qua_fou qua_fiv - * - * cin_thr cin_fou - * - * (If, for exammple, relative_x_is_left is 1 but relative_y_is___up - * = 0, then dos_fou in this post-reflexion reduced stencil really - * corresponds to dos_two in the "enlarged" one, etc.) - * - * Given that the reflexions are performed "outside of the - * nohalo_sharp_level_1 function," the above 12 input values are the - * only ones which are read from the buffer. + /* + * This function calculates the missing three double density pixel + * values. The caller does bilinear interpolation on them and + * dos_two. + */ + /* + * THE STENCIL OF INPUT VALUES: + * + * Nohalo's stencil is the same as, say, Catmull-Rom, with the + * exception that the four corner values are not used: + * + * (ix-1,iy-2) (ix,iy-2) + * = uno_two = uno_thr + * + * (ix-2,iy-1) (ix-1,iy-1) (ix,iy-1) (ix+1,iy-1) + * = dos_one = dos_two = dos_thr = dos_fou + * + * (ix-2,iy) (ix-1,iy) (ix,iy) (ix+1,iy) + * = tre_one = tre_two = tre_thr = tre_fou + * + * (ix-1,iy+1) (ix,iy+1) + * = qua_two = qua_thr + * + * The indices associated with the values shown above are in the + * case that the resampling point is closer to (ix-1,iy-1) than the + * other three central positions. Pointer arithmetic is used to + * implicitly reflect the input stencil in the other three cases, + * For example, if the sampling position is closer to dos_two (that + * is, if relative_x_is_rite = 1 but relative_y_is_down = 0 below), + * then dos_two corresponds to (ix,iy-1), dos_thr corresponds to + * (ix-1,iy-1) etc. Consequently, the three missing double density + * values are halfway between dos_two and dos_thr, halfway between + * dos_two and tre_two, and at the average of the four central + * positions. */ - /* * Computation of the nonlinear slopes: If two consecutive pixel * value differences have the same sign, the smallest one (in * absolute value) is taken to be the corresponding slope; if the * two consecutive pixel value differences don't have the same sign, - * the corresponding slope is set to 0. + * the corresponding slope is set to 0. (In other words, apply + * minmod to comsecutive slopes.) */ + /* + * Dos(s) horizontal differences: + */ + const double prem_dos = dos_two - dos_one; + const double deux_dos = dos_thr - dos_two; + const double troi_dos = dos_fou - dos_thr; /* * Tre(s) horizontal differences: */ + const double prem_tre = tre_two - tre_one; const double deux_tre = tre_thr - tre_two; const double troi_tre = tre_fou - tre_thr; - const double quat_tre = tre_fiv - tre_fou; /* - * Qua(ttro) horizontal differences: + * Two vertical differences: */ - const double deux_qua = qua_thr - qua_two; - const double troi_qua = qua_fou - qua_thr; - const double quat_qua = qua_fiv - qua_fou; + const double prem_two = dos_two - uno_two; + const double deux_two = tre_two - dos_two; + const double troi_two = qua_two - tre_two; /* * Thr(ee) vertical differences: */ + const double prem_thr = dos_thr - uno_thr; const double deux_thr = tre_thr - dos_thr; const double troi_thr = qua_thr - tre_thr; - const double quat_thr = cin_thr - qua_thr; - /* - * Fou(r) vertical differences: - */ - const double deux_fou = tre_fou - dos_fou; - const double troi_fou = qua_fou - tre_fou; - const double quat_fou = cin_fou - qua_fou; - - /* - * Tre: - */ - const double half_sign_deux_tre = deux_tre >= 0. ? .5 : -.5; - const double half_sign_troi_tre = troi_tre >= 0. ? .5 : -.5; - const double half_sign_quat_tre = quat_tre >= 0. ? .5 : -.5; - /* - * Qua: - */ - const double half_sign_deux_qua = deux_qua >= 0. ? .5 : -.5; - const double half_sign_troi_qua = troi_qua >= 0. ? .5 : -.5; - const double half_sign_quat_qua = quat_qua >= 0. ? .5 : -.5; - /* - * Thr: - */ - const double half_sign_deux_thr = deux_thr >= 0. ? .5 : -.5; - const double half_sign_troi_thr = troi_thr >= 0. ? .5 : -.5; - const double half_sign_quat_thr = quat_thr >= 0. ? .5 : -.5; - /* - * Fou: - */ - const double half_sign_deux_fou = deux_fou >= 0. ? .5 : -.5; - const double half_sign_troi_fou = troi_fou >= 0. ? .5 : -.5; - const double half_sign_quat_fou = quat_fou >= 0. ? .5 : -.5; /* * Useful later: */ - const double tre_thr_plus_tre_fou = tre_thr + tre_fou; - const double tre_thr_plus_qua_thr = tre_thr + qua_thr; - const double qua_fou_minus_tre_thr = qua_fou - tre_thr; + const double dos_two_plus_dos_thr = dos_two + dos_thr; + const double dos_two_plus_tre_two = dos_two + tre_two; + /* + * Dos: + */ + const double half_sign_prem_dos = prem_dos >= 0. ? .5 : -.5; + const double half_sign_deux_dos = deux_dos >= 0. ? .5 : -.5; + const double half_sign_troi_dos = troi_dos >= 0. ? .5 : -.5; /* * Tre: */ + const double half_sign_prem_tre = prem_tre >= 0. ? .5 : -.5; + const double half_sign_deux_tre = deux_tre >= 0. ? .5 : -.5; + const double half_sign_troi_tre = troi_tre >= 0. ? .5 : -.5; + /* + * Two: + */ + const double half_sign_prem_two = prem_two >= 0. ? .5 : -.5; + const double half_sign_deux_two = deux_two >= 0. ? .5 : -.5; + const double half_sign_troi_two = troi_two >= 0. ? .5 : -.5; + /* + * Thr: + */ + const double half_sign_prem_thr = prem_thr >= 0. ? .5 : -.5; + const double half_sign_deux_thr = deux_thr >= 0. ? .5 : -.5; + const double half_sign_troi_thr = troi_thr >= 0. ? .5 : -.5; + + /* + * Dos: + */ + const double half_abs_prem_dos = half_sign_prem_dos * prem_dos; + const double sign_dos_two_horizo = half_sign_prem_dos + half_sign_deux_dos; + const double half_abs_deux_dos = half_sign_deux_dos * deux_dos; + const double sign_dos_thr_horizo = half_sign_deux_dos + half_sign_troi_dos; + const double half_abs_troi_dos = half_sign_troi_dos * troi_dos; + /* + * Two: + */ + const double half_abs_prem_two = half_sign_prem_two * prem_two; + const double sign_dos_two_vertic = half_sign_prem_two + half_sign_deux_two; + const double half_abs_deux_two = half_sign_deux_two * deux_two; + const double sign_tre_two_vertic = half_sign_deux_two + half_sign_troi_two; + const double half_abs_troi_two = half_sign_troi_two * troi_two; + /* + * Tre: + */ + const double half_abs_prem_tre = half_sign_prem_tre * prem_tre; + const double sign_tre_two_horizo = half_sign_prem_tre + half_sign_deux_tre; const double half_abs_deux_tre = half_sign_deux_tre * deux_tre; const double sign_tre_thr_horizo = half_sign_deux_tre + half_sign_troi_tre; const double half_abs_troi_tre = half_sign_troi_tre * troi_tre; - const double sign_tre_fou_horizo = half_sign_troi_tre + half_sign_quat_tre; - const double half_abs_quat_tre = half_sign_quat_tre * quat_tre; /* * Thr: */ + const double half_abs_prem_thr = half_sign_prem_thr * prem_thr; + const double sign_dos_thr_vertic = half_sign_prem_thr + half_sign_deux_thr; const double half_abs_deux_thr = half_sign_deux_thr * deux_thr; const double sign_tre_thr_vertic = half_sign_deux_thr + half_sign_troi_thr; const double half_abs_troi_thr = half_sign_troi_thr * troi_thr; - const double sign_qua_thr_vertic = half_sign_troi_thr + half_sign_quat_thr; - const double half_abs_quat_thr = half_sign_quat_thr * quat_thr; - /* - * Qua: - */ - const double half_abs_deux_qua = half_sign_deux_qua * deux_qua; - const double sign_qua_thr_horizo = half_sign_deux_qua + half_sign_troi_qua; - const double half_abs_troi_qua = half_sign_troi_qua * troi_qua; - const double sign_qua_fou_horizo = half_sign_troi_qua + half_sign_quat_qua; - const double half_abs_quat_qua = half_sign_quat_qua * quat_qua; - /* - * Fou: - */ - const double half_abs_deux_fou = half_sign_deux_fou * deux_fou; - const double sign_tre_fou_vertic = half_sign_deux_fou + half_sign_troi_fou; - const double half_abs_troi_fou = half_sign_troi_fou * troi_fou; - const double sign_qua_fou_vertic = half_sign_troi_fou + half_sign_quat_fou; - const double half_abs_quat_fou = half_sign_quat_fou * quat_fou; + /* + * Useful later: + */ + const double tre_thr_minus_dos_two = deux_thr + deux_dos; + + /* + * Dos: + */ + const double half_size_dos_two_horizo = + FAST_MIN( half_abs_prem_dos, half_abs_deux_dos ); + const double half_size_dos_thr_horizo = + FAST_MIN( half_abs_troi_dos, half_abs_deux_dos ); + /* + * Two: + */ + const double half_size_dos_two_vertic = + FAST_MIN( half_abs_prem_two, half_abs_deux_two ); + const double half_size_tre_two_vertic = + FAST_MIN( half_abs_troi_two, half_abs_deux_two ); /* * Tre: */ + const double half_size_tre_two_horizo = + FAST_MIN( half_abs_prem_tre, half_abs_deux_tre ); const double half_size_tre_thr_horizo = - FAST_MIN( half_abs_deux_tre, half_abs_troi_tre ); - const double half_size_tre_fou_horizo = - FAST_MIN( half_abs_quat_tre, half_abs_troi_tre ); + FAST_MIN( half_abs_troi_tre, half_abs_deux_tre ); /* * Thr: */ + const double half_size_dos_thr_vertic = + FAST_MIN( half_abs_prem_thr, half_abs_deux_thr ); const double half_size_tre_thr_vertic = - FAST_MIN( half_abs_deux_thr, half_abs_troi_thr ); - const double half_size_qua_thr_vertic = - FAST_MIN( half_abs_quat_thr, half_abs_troi_thr ); - /* - * Qua: - */ - const double half_size_qua_thr_horizo = - FAST_MIN( half_abs_deux_qua, half_abs_troi_qua ); - const double half_size_qua_fou_horizo = - FAST_MIN( half_abs_quat_qua, half_abs_troi_qua ); - /* - * Fou: - */ - const double half_size_tre_fou_vertic = - FAST_MIN( half_abs_deux_fou, half_abs_troi_fou ); - const double half_size_qua_fou_vertic = - FAST_MIN( half_abs_quat_fou, half_abs_troi_fou ); + FAST_MIN( half_abs_troi_thr, half_abs_deux_thr ); /* - * Compute the needed "right" (at the boundary between two input + * Compute the needed "right" (at the boundary between one input * pixel areas) double resolution pixel value: */ - /* - * Tre: - */ - const double two_times_tre_thrfou = - tre_thr_plus_tre_fou + const double two_times_dos_twothr = + dos_two_plus_dos_thr + - sign_tre_thr_horizo * half_size_tre_thr_horizo + sign_dos_two_horizo * half_size_dos_two_horizo - - sign_tre_fou_horizo * half_size_tre_fou_horizo; + sign_dos_thr_horizo * half_size_dos_thr_horizo; /* * Compute the needed "down" double resolution pixel value: */ - /* - * Thr: - */ - const double two_times_trequa_thr = - tre_thr_plus_qua_thr + const double two_times_dostre_two = + dos_two_plus_tre_two + - sign_tre_thr_vertic * half_size_tre_thr_vertic + sign_dos_two_vertic * half_size_dos_two_vertic - - sign_qua_thr_vertic * half_size_qua_thr_vertic; + sign_tre_two_vertic * half_size_tre_two_vertic; /* - * Compute the "diagonal" (at the boundary between four input + * Compute the "diagonal" (at the boundary between thrr input * pixel areas) double resolution pixel value: */ - const double four_times_trequa_thrfou = - qua_fou_minus_tre_thr + const double four_times_dostre_twothr = + tre_thr_minus_dos_two + - sign_qua_thr_horizo * half_size_qua_thr_horizo + sign_tre_two_horizo * half_size_tre_two_horizo - - sign_qua_fou_horizo * half_size_qua_fou_horizo + sign_tre_thr_horizo * half_size_tre_thr_horizo + - sign_tre_fou_vertic * half_size_tre_fou_vertic + sign_dos_thr_vertic * half_size_dos_thr_vertic - - sign_qua_fou_vertic * half_size_qua_fou_vertic + sign_tre_thr_vertic * half_size_tre_thr_vertic + - two_times_tre_thrfou + two_times_dos_twothr + - two_times_trequa_thr; + two_times_dostre_two; - /* End of copy-paste from Nicolas' source. - */ - - *r1 = two_times_tre_thrfou; - *r2 = two_times_trequa_thr; - *r3 = four_times_trequa_thrfou; + /* + * Return the newly computed double density values: + */ + *r1 = two_times_dos_twothr; + *r2 = two_times_dostre_two; + *r3 = four_times_dostre_twothr; } /* Call nohalo_sharp_level_1 with an interpolator as a parameter. @@ -536,94 +507,85 @@ nohalo_sharp_level_1( */ #define NOHALO_SHARP_LEVEL_1_INTER( inter ) \ template static void inline \ - nohalo_sharp_level_1_ ## inter( PEL *pout, \ - const PEL *pin, \ - const int bands, \ - const int lskip, \ - const double relative_x, \ - const double relative_y ) \ + nohalo_sharp_level_1_ ## inter( PEL *pout, \ + const PEL *pin, \ + const int bands, \ + const int lskip, \ + const double relative_x, \ + const double relative_y ) \ { \ T* restrict out = (T *) pout; \ - const T* restrict in = (T *) pin; \ \ - const int relative_x_is_left = ( relative_x < 0. ); \ - const int relative_y_is___up = ( relative_y < 0. ); \ + const int relative_x_is_rite = ( relative_x >= 0. ); \ + const int relative_y_is_down = ( relative_y >= 0. ); \ + \ + const int sign_of_relative_x = 2 * relative_x_is_rite - 1; \ + const int sign_of_relative_y = 2 * relative_y_is_down - 1; \ \ const int corner_reflection_shift = \ - ( -2 + 4 * relative_x_is_left ) * bands \ - + \ - ( -2 + 4 * relative_y_is___up ) * lskip; \ + relative_x_is_rite * bands + relative_y_is_down * lskip; \ \ - const int sign_of_relative_x = 1 - 2 * relative_x_is_left; \ - const int sign_of_relative_y = 1 - 2 * relative_y_is___up; \ - \ - const double x = ( 2 * sign_of_relative_x ) * relative_x; \ - const double y = ( 2 * sign_of_relative_y ) * relative_y; \ - \ - const double x_times_y = x * y; \ - const double w_times_y = y - x_times_y; \ - const double x_times_z = x - x_times_y; \ - const double w_times_z = 1. - x - w_times_y; \ - \ - const double x_times_y_over_4 = .25 * x_times_y; \ - const double w_times_y_over_2 = .5 * w_times_y; \ - const double x_times_z_over_2 = .5 * x_times_z; \ + const T* restrict in = ( (T *) pin ) + corner_reflection_shift; \ \ const int shift_1_pixel = sign_of_relative_x * bands; \ const int shift_1_row = sign_of_relative_y * lskip; \ \ - const int b1 = shift_1_pixel + corner_reflection_shift; \ - const int b2 = 2 * shift_1_pixel + corner_reflection_shift; \ - const int b3 = 3 * shift_1_pixel + corner_reflection_shift; \ - const int b4 = 4 * shift_1_pixel + corner_reflection_shift; \ + const double w = ( 2 * sign_of_relative_x ) * relative_x; \ + const double z = ( 2 * sign_of_relative_y ) * relative_y; \ \ - const int l1 = shift_1_row; \ - const int l2 = 2 * shift_1_row; \ - const int l3 = 3 * shift_1_row; \ - const int l4 = 4 * shift_1_row; \ + const int uno_two_shift = shift_1_row; \ + const int uno_thr_shift = shift_1_row - shift_1_pixel; \ \ - for( int z = 0; z < bands; z++ ) { \ - const T dos_thr = in[b2 + l1]; \ - const T dos_fou = in[b3 + l1]; \ + const int dos_one_shift = shift_1_pixel; \ + const int dos_two_shift = 0; \ + const int dos_thr_shift = -shift_1_pixel; \ + const int dos_fou_shift = -2 * shift_1_pixel; \ + \ + const int tre_one_shift = dos_one_shift - shift_1_row; \ + const int tre_two_shift = -shift_1_row; \ + const int tre_thr_shift = dos_thr_shift - shift_1_row; \ + const int tre_fou_shift = dos_fou_shift - shift_1_row; \ + \ + const int qua_two_shift = tre_two_shift - shift_1_row; \ + const int qua_thr_shift = tre_thr_shift - shift_1_row; \ + \ + const double x = 1. - w; \ + const double w_times_z = w * z; \ + const double x_times_z = x * z; \ + const double w_times_y_over_2 = .5 * ( w - w_times_z ); \ + const double x_times_z_over_2 = .5 * x_times_z; \ + const double x_times_y_over_4 = .25 * ( x - x_times_z ); \ + \ + for( int band = 0; band < bands; band++ ) { \ + double two_times_dos_twothr; \ + double two_times_dostre_two; \ + double four_times_dostre_twothr; \ \ - const T tre_two = in[b1 + l2]; \ - const T tre_thr = in[b2 + l2]; \ - const T tre_fou = in[b3 + l2]; \ - const T tre_fiv = in[b4 + l2]; \ - \ - const T qua_two = in[b1 + l3]; \ - const T qua_thr = in[b2 + l3]; \ - const T qua_fou = in[b3 + l3]; \ - const T qua_fiv = in[b4 + l3]; \ - \ - const T cin_thr = in[b2 + l4]; \ - const T cin_fou = in[b3 + l4]; \ - \ - double two_times_tre_thrfou; \ - double two_times_trequa_thr; \ - double four_times_trequa_thrfou; \ + const double dos_two = in[dos_two_shift]; \ \ - nohalo_sharp_level_1( dos_thr, dos_fou, \ - tre_two, tre_thr, tre_fou, tre_fiv, \ - qua_two, qua_thr, qua_fou, qua_fiv, \ - cin_thr, cin_fou, \ - &two_times_tre_thrfou, \ - &two_times_trequa_thr, \ - &four_times_trequa_thrfou ); \ + nohalo_sharp_level_1( in[uno_two_shift], in[uno_thr_shift], \ + in[dos_one_shift], dos_two, \ + in[dos_thr_shift], in[dos_fou_shift], \ + in[tre_one_shift], in[tre_two_shift], \ + in[tre_thr_shift], in[tre_fou_shift], \ + in[qua_two_shift], in[qua_thr_shift], \ + &two_times_dos_twothr, \ + &two_times_dostre_two, \ + &four_times_dostre_twothr ); \ + \ + in += 1; \ \ const T result = bilinear_ ## inter( \ w_times_z, \ x_times_z_over_2, \ w_times_y_over_2, \ x_times_y_over_4, \ - tre_thr, \ - two_times_tre_thrfou, \ - two_times_trequa_thr, \ - four_times_trequa_thrfou ); \ + dos_two, \ + two_times_dos_twothr, \ + two_times_dostre_two, \ + four_times_dostre_twothr ); \ \ - out[z] = result; \ - \ - in += 1; \ + out[band] = result; \ } \ } @@ -640,44 +602,45 @@ G_DEFINE_TYPE( VipsInterpolateNohalo, vips_interpolate_nohalo, static void vips_interpolate_nohalo_interpolate( VipsInterpolate *interpolate, - PEL *out, - REGION *in, - double absolute_x, - double absolute_y ) + PEL *out, + REGION *in, + double absolute_x, + double absolute_y ) { /* * floor's surrogate FAST_PSEUDO_FLOOR is used to make sure that the * transition through 0 is smooth. If it is known that absolute_x - * and absolute_y will never be less than -.5, plain cast---that is, - * const int ix = absolute_x + .5---should be used instead. Any - * function which agrees with floor for non-integer values, and + * and absolute_y will never be less than 0, plain cast---that is, + * const int ix = absolute_x---should be used instead. Actually, + * any function which agrees with floor for non-integer values, and * picks one of the two possibilities for integer values, can be - * used. + * used. FAST_PSEUDO_FLOOR fits the bill. */ - const int ix = FAST_PSEUDO_FLOOR (absolute_x + 0.5); - const int iy = FAST_PSEUDO_FLOOR (absolute_y + 0.5); + const int ix = FAST_PSEUDO_FLOOR (absolute_x); + const int iy = FAST_PSEUDO_FLOOR (absolute_y); - /* Move the pointer to (the first band of) the central - pixel of the extended 5x5 stencil (tre_thr): - */ - const PEL * restrict p = - (PEL *) IM_REGION_ADDR( in, ix, iy ); + /* + * Move the pointer to (the first band of) the top/left pixel + * of the 2x2 group of pixel centers which contains the + * sampling location in its convex hull: + */ + const PEL * restrict p = (PEL *) IM_REGION_ADDR( in, ix, iy ); - /* VIPS versions of Nicolas's pixel addressing values. - */ - const int bands = in->im->Bands; - const int lskip = - IM_REGION_LSKIP( in ) / IM_IMAGE_SIZEOF_ELEMENT( in->im ); + /* + * VIPS versions of Nicolas's pixel addressing values: + */ + const int bands = in->im->Bands; + const int lskip = IM_REGION_LSKIP( in ) / IM_IMAGE_SIZEOF_ELEMENT( in->im ); /* * x is the x-coordinate of the sampling point relative to the - * position of the tre_thr pixel center. Similarly for y. Range of - * values: (-.5,.5]. + * position of the center of the convex hull of the 2x2 block of + * closest pixels. Similarly for y. Range of values: [-.5,.5). */ - const double relative_x = absolute_x - ix; - const double relative_y = absolute_y - iy; + const double relative_x = absolute_x - ix - .5; + const double relative_y = absolute_y - iy - .5; -#define CALL( T, inter ) \ +#define CALL( T, inter ) \ nohalo_sharp_level_1_ ## inter( out, \ p, \ bands, \ @@ -750,11 +713,11 @@ vips_interpolate_nohalo_class_init( VipsInterpolateNohaloClass *klass ) VIPS_INTERPOLATE_CLASS( klass ); object_class->nickname = "nohalo"; - object_class->description = _( "Bilinear plus edge enhance" ); + object_class->description = _( "Edge-enhancing bilinear" ); interpolate_class->interpolate = vips_interpolate_nohalo_interpolate; - interpolate_class->window_size = 5; + interpolate_class->window_size = 4; } static void