libvips/libvips/resample/templates.h

/* various interpolation templates
 */

/*

    This file is part of VIPS.

    VIPS is free software; you can redistribute it and/or modify
    it under the terms of the GNU Lesser General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Lesser General Public License for more details.

    You should have received a copy of the GNU Lesser General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
    02110-1301  USA

 */

/*

    These files are distributed with VIPS - http://www.vips.ecs.soton.ac.uk

 */

/*
 * FAST_PSEUDO_FLOOR is a floor and floorf replacement which has been
 * found to be faster on several linux boxes than the library
 * version. It returns the floor of its argument unless the argument
 * is a negative integer, in which case it returns one less than the
 * floor. For example:
 *
 * FAST_PSEUDO_FLOOR(0.5) = 0
 *
 * FAST_PSEUDO_FLOOR(0.) = 0
 *
 * FAST_PSEUDO_FLOOR(-.5) = -1
 *
 * as expected, but
 *
 * FAST_PSEUDO_FLOOR(-1.) = -2
 *
 * The locations of the discontinuities of FAST_PSEUDO_FLOOR are the
 * same as floor and floorf; it is just that at negative integers the
 * function is discontinuous on the right instead of the left.
 */
#define FAST_PSEUDO_FLOOR(x) ( (int)(x) - ( (x) < 0. ) )

/*
 * Various casts which assume that the data is already in range. (That
 * is, they are to be used with monotone samplers.)
 */
template <typename T> static T inline
to_fptypes( const double val )
{
	const T newval = val;

	return( newval );
}

template <typename T> static T inline
to_withsign( const double val )
{
	const int sign_of_val = 2 * ( val >= 0. ) - 1;
	const int rounded_abs_val = .5 + sign_of_val * val;
	const T newval = sign_of_val * rounded_abs_val;

	return( newval );
}

template <typename T> static T inline
to_nosign( const double val )
{
	const T newval = .5 + val;

	return( newval );
}

/*
 * Various bilinear implementation templates. Note that no clampling
 * is used: There is an assumption that the data is such that
 * over/underflow is not an issue:
 */

/*
 * Bilinear interpolation for float and double types. The first four
 * inputs are weights, the last four are the corresponding pixel
 * values:
 */
template <typename T> static T inline
bilinear_fptypes(
	const double w_times_z,
	const double x_times_z,
	const double w_times_y,
	const double x_times_y,
	const double tre_thr,
	const double tre_thrfou,
	const double trequa_thr,
	const double trequa_thrfou )
{
	const T newval =
		w_times_z * tre_thr +
		x_times_z * tre_thrfou +
		w_times_y * trequa_thr +
		x_times_y * trequa_thrfou;

	return( newval );
}

/*
 * Bilinear interpolation for signed integer types:
 */
template <typename T> static T inline
bilinear_withsign(
	const double w_times_z,
	const double x_times_z,
	const double w_times_y,
	const double x_times_y,
	const double tre_thr,
	const double tre_thrfou,
	const double trequa_thr,
	const double trequa_thrfou )
{
	const double val =
		w_times_z * tre_thr +
		x_times_z * tre_thrfou +
		w_times_y * trequa_thr +
		x_times_y * trequa_thrfou;

	const int sign_of_val = 2 * ( val >= 0. ) - 1;

	const int rounded_abs_val = .5 + sign_of_val * val;

	const T newval = sign_of_val * rounded_abs_val;

	return( newval );
}

/*
 * Bilinear Interpolation for unsigned integer types:
 */
template <typename T> static T inline
bilinear_nosign(
	const double w_times_z,
	const double x_times_z,
	const double w_times_y,
	const double x_times_y,
	const double tre_thr,
	const double tre_thrfou,
	const double trequa_thr,
	const double trequa_thrfou )
{
	const T newval =
		w_times_z * tre_thr +
		x_times_z * tre_thrfou +
		w_times_y * trequa_thr +
		x_times_y * trequa_thrfou +
		0.5;

	return( newval );
}

/*
 * Bicubic (Catmull-Rom) interpolation templates:
 */

static int inline
unsigned_fixed_round( int v )
{
	const int round_by = VIPS_INTERPOLATE_SCALE >> 1;

	return( (v + round_by) >> VIPS_INTERPOLATE_SHIFT );
}

/* Fixed-point integer bicubic, used for 8 and 16-bit types.
 */
template <typename T> static int inline
bicubic_unsigned_int(
	const T uno_one, const T uno_two, const T uno_thr, const T uno_fou,
	const T dos_one, const T dos_two, const T dos_thr, const T dos_fou,
	const T tre_one, const T tre_two, const T tre_thr, const T tre_fou,
	const T qua_one, const T qua_two, const T qua_thr, const T qua_fou,
	const int* restrict cx, const int* restrict cy )
{
	const int r0 = unsigned_fixed_round(
		cx[0] * uno_one +
		cx[1] * uno_two +
		cx[2] * uno_thr +
		cx[3] * uno_fou );

	const int r1 = unsigned_fixed_round(
		cx[0] * dos_one +
		cx[1] * dos_two +
		cx[2] * dos_thr +
		cx[3] * dos_fou );

	const int r2 = unsigned_fixed_round(
		cx[0] * tre_one +
		cx[1] * tre_two +
		cx[2] * tre_thr +
		cx[3] * tre_fou );

	const int r3 = unsigned_fixed_round(
		cx[0] * qua_one +
		cx[1] * qua_two +
		cx[2] * qua_thr +
		cx[3] * qua_fou );

	return( unsigned_fixed_round(
		cy[0] * r0 +
		cy[1] * r1 +
		cy[2] * r2 +
		cy[3] * r3 ) );
}

static int inline
signed_fixed_round( int v )
{
	const int sign_of_v = 2 * (v > 0) - 1;
	const int round_by = sign_of_v * (VIPS_INTERPOLATE_SCALE >> 1);

	return( (v + round_by) >> VIPS_INTERPOLATE_SHIFT );
}

/* Fixed-point integer bicubic, used for 8 and 16-bit types.
 */
template <typename T> static int inline
bicubic_signed_int(
	const T uno_one, const T uno_two, const T uno_thr, const T uno_fou,
	const T dos_one, const T dos_two, const T dos_thr, const T dos_fou,
	const T tre_one, const T tre_two, const T tre_thr, const T tre_fou,
	const T qua_one, const T qua_two, const T qua_thr, const T qua_fou,
	const int* restrict cx, const int* restrict cy )
{
	const int r0 = signed_fixed_round(
		cx[0] * uno_one +
		cx[1] * uno_two +
		cx[2] * uno_thr +
		cx[3] * uno_fou );

	const int r1 = signed_fixed_round(
		cx[0] * dos_one +
		cx[1] * dos_two +
		cx[2] * dos_thr +
		cx[3] * dos_fou );

	const int r2 = signed_fixed_round(
		cx[0] * tre_one +
		cx[1] * tre_two +
		cx[2] * tre_thr +
		cx[3] * tre_fou );

	const int r3 = signed_fixed_round(
		cx[0] * qua_one +
		cx[1] * qua_two +
		cx[2] * qua_thr +
		cx[3] * qua_fou );

	return( signed_fixed_round(
		cy[0] * r0 +
		cy[1] * r1 +
		cy[2] * r2 +
		cy[3] * r3 ) );
}

/* Floating-point bicubic, used for int/float/double types.
 */
template <typename T> static T inline
bicubic_float(
	const T uno_one, const T uno_two, const T uno_thr, const T uno_fou,
	const T dos_one, const T dos_two, const T dos_thr, const T dos_fou,
	const T tre_one, const T tre_two, const T tre_thr, const T tre_fou,
	const T qua_one, const T qua_two, const T qua_thr, const T qua_fou,
	const double* restrict cx, const double* restrict cy )
{
	return(
		cy[0] * (cx[0] * uno_one +
			 cx[1] * uno_two +
			 cx[2] * uno_thr +
			 cx[3] * uno_fou)
                +
		cy[1] * (cx[0] * dos_one +
			 cx[1] * dos_two +
			 cx[2] * dos_thr +
			 cx[3] * dos_fou)
                +
		cy[2] * (cx[0] * tre_one +
			 cx[1] * tre_two +
			 cx[2] * tre_thr +
			 cx[3] * tre_fou)
                +
		cy[3] * (cx[0] * qua_one +
			 cx[1] * qua_two +
			 cx[2] * qua_thr +
			 cx[3] * qua_fou) );
}

/* Given an offset in [0,1] (we can have x == 1 when building tables),
 * calculate c0, c1, c2, c3, the catmull-rom coefficients. This is called
 * from the interpolator as well as from the table builder.
 */
static void inline
calculate_coefficients_catmull( const double x, double c[4] )
{
	/* Nicolas believes that the following is an hitherto unknown
	 * hyper-efficient method of computing Catmull-Rom coefficients. It
	 * only uses 4* & 1+ & 5- for a total of only 10 flops to compute
	 * four coefficients.
	 */
	const double cr1  = 1. - x;
	const double cr2  = -.5 * x;
	const double cr3  = cr1 * cr2;
	const double cone = cr1 * cr3;
	const double cfou = x * cr3;
	const double cr4  = cfou - cone;
	const double ctwo = cr1 - cone + cr4;
	const double cthr = x - cfou - cr4;

	g_assert( x >= 0. && x <= 1. );

	c[0] = cone;
	c[3] = cfou;
	c[1] = ctwo;
	c[2] = cthr;
}