finish hl3 version
vl3 nest
This commit is contained in:
parent
a26291a426
commit
013102de01
@ -138,7 +138,7 @@ reduceh_signed_int_tab( VipsPel *pout, const VipsPel *pin,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Floating-point version, for int/float types.
|
/* Floating-point version.
|
||||||
*/
|
*/
|
||||||
template <typename T>
|
template <typename T>
|
||||||
static void inline
|
static void inline
|
||||||
|
@ -2,6 +2,8 @@
|
|||||||
*
|
*
|
||||||
* 29/1/16
|
* 29/1/16
|
||||||
* - from shrinkh.c
|
* - from shrinkh.c
|
||||||
|
* 10/3/16
|
||||||
|
* - add other kernels
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -156,6 +158,56 @@ vips_reducehl3_make_mask( VipsKernel kernel, double x, double *c )
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* A 4-point interpolation on uint8 is the most common case ... unroll that.
|
||||||
|
*
|
||||||
|
* The inner loop here won't vectorise, but our inner loop doesn't run for
|
||||||
|
* long enough for vectorisation to be useful :-( gcc says it needs about an
|
||||||
|
* 11-point kernel for the vector version to be worthwhile.
|
||||||
|
*/
|
||||||
|
static void inline
|
||||||
|
reducehl3_unsigned_uint8_4tab( VipsPel *out, const VipsPel *in,
|
||||||
|
const int bands, const int *cx )
|
||||||
|
{
|
||||||
|
const int b1 = bands;
|
||||||
|
const int b2 = b1 + b1;
|
||||||
|
const int b3 = b1 + b2;
|
||||||
|
|
||||||
|
const int c0 = cx[0];
|
||||||
|
const int c1 = cx[1];
|
||||||
|
const int c2 = cx[2];
|
||||||
|
const int c3 = cx[3];
|
||||||
|
|
||||||
|
for( int z = 0; z < bands; z++ ) {
|
||||||
|
int cubich = unsigned_fixed_round(
|
||||||
|
c0 * in[0] +
|
||||||
|
c1 * in[b1] +
|
||||||
|
c2 * in[b2] +
|
||||||
|
c3 * in[b3] );
|
||||||
|
|
||||||
|
cubich = VIPS_CLIP( 0, cubich, 255 );
|
||||||
|
|
||||||
|
out[z] = cubich;
|
||||||
|
|
||||||
|
in += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Our inner loop. Operate on elements of size T, gather results in an
|
||||||
|
* intermediate of type IT.
|
||||||
|
*/
|
||||||
|
template <typename T, typename IT>
|
||||||
|
static IT
|
||||||
|
reducehl3_sum( const T * restrict in, int bands, const IT * restrict c, int n )
|
||||||
|
{
|
||||||
|
IT sum;
|
||||||
|
|
||||||
|
sum = 0;
|
||||||
|
for( int i = 0; i < n; i++ )
|
||||||
|
sum += c[i] * in[i * bands];
|
||||||
|
|
||||||
|
return( sum );
|
||||||
|
}
|
||||||
|
|
||||||
template <typename T, int max_value>
|
template <typename T, int max_value>
|
||||||
static void inline
|
static void inline
|
||||||
reducehl3_unsigned_int_tab( VipsReducehl3 *reducehl3,
|
reducehl3_unsigned_int_tab( VipsReducehl3 *reducehl3,
|
||||||
@ -167,13 +219,9 @@ reducehl3_unsigned_int_tab( VipsReducehl3 *reducehl3,
|
|||||||
|
|
||||||
for( int z = 0; z < bands; z++ ) {
|
for( int z = 0; z < bands; z++ ) {
|
||||||
int sum;
|
int sum;
|
||||||
|
|
||||||
sum = 0;
|
sum = reducehl3_sum<T, int>(in, bands, cx, reducehl3->n_points);
|
||||||
for( int i = 0; i < reducehl3->n_points; i++ )
|
|
||||||
sum += cx[i] * in[i * bands];
|
|
||||||
|
|
||||||
sum = unsigned_fixed_round( sum );
|
sum = unsigned_fixed_round( sum );
|
||||||
|
|
||||||
sum = VIPS_CLIP( 0, sum, max_value );
|
sum = VIPS_CLIP( 0, sum, max_value );
|
||||||
|
|
||||||
out[z] = sum;
|
out[z] = sum;
|
||||||
@ -182,6 +230,113 @@ reducehl3_unsigned_int_tab( VipsReducehl3 *reducehl3,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename T, int min_value, int max_value>
|
||||||
|
static void inline
|
||||||
|
reducehl3_signed_int_tab( VipsReducehl3 *reducehl3,
|
||||||
|
VipsPel *pout, const VipsPel *pin,
|
||||||
|
const int bands, const int * restrict cx )
|
||||||
|
{
|
||||||
|
T* restrict out = (T *) pout;
|
||||||
|
const T* restrict in = (T *) pin;
|
||||||
|
|
||||||
|
for( int z = 0; z < bands; z++ ) {
|
||||||
|
int sum;
|
||||||
|
|
||||||
|
sum = reducehl3_sum<T, int>(in, bands, cx, reducehl3->n_points);
|
||||||
|
sum = signed_fixed_round( sum );
|
||||||
|
sum = VIPS_CLIP( min_value, sum, max_value );
|
||||||
|
|
||||||
|
out[z] = sum;
|
||||||
|
|
||||||
|
in += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Floating-point version.
|
||||||
|
*/
|
||||||
|
template <typename T>
|
||||||
|
static void inline
|
||||||
|
reducehl3_float_tab( VipsReducehl3 *reducehl3,
|
||||||
|
VipsPel *pout, const VipsPel *pin,
|
||||||
|
const int bands, const double *cx )
|
||||||
|
{
|
||||||
|
T* restrict out = (T *) pout;
|
||||||
|
const T* restrict in = (T *) pin;
|
||||||
|
|
||||||
|
for( int z = 0; z < bands; z++ ) {
|
||||||
|
out[z] = reducehl3_sum<T, double>
|
||||||
|
(in, bands, cx, reducehl3->n_points);
|
||||||
|
in += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 32-bit int output needs a double intermediate.
|
||||||
|
*/
|
||||||
|
|
||||||
|
template <typename T, int max_value>
|
||||||
|
static void inline
|
||||||
|
reducehl3_unsigned_int32_tab( VipsReducehl3 *reducehl3,
|
||||||
|
VipsPel *pout, const VipsPel *pin,
|
||||||
|
const int bands, const double * restrict cx )
|
||||||
|
{
|
||||||
|
T* restrict out = (T *) pout;
|
||||||
|
const T* restrict in = (T *) pin;
|
||||||
|
|
||||||
|
for( int z = 0; z < bands; z++ ) {
|
||||||
|
double sum;
|
||||||
|
|
||||||
|
sum = reducehl3_sum<T, double>
|
||||||
|
(in, bands, cx, reducehl3->n_points);
|
||||||
|
out[z] = VIPS_CLIP( 0, sum, max_value );
|
||||||
|
|
||||||
|
in += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T, int min_value, int max_value>
|
||||||
|
static void inline
|
||||||
|
reducehl3_signed_int32_tab( VipsReducehl3 *reducehl3,
|
||||||
|
VipsPel *pout, const VipsPel *pin,
|
||||||
|
const int bands, const double * restrict cx )
|
||||||
|
{
|
||||||
|
T* restrict out = (T *) pout;
|
||||||
|
const T* restrict in = (T *) pin;
|
||||||
|
|
||||||
|
for( int z = 0; z < bands; z++ ) {
|
||||||
|
double sum;
|
||||||
|
|
||||||
|
sum = reducehl3_sum<T, double>
|
||||||
|
(in, bands, cx, reducehl3->n_points);
|
||||||
|
sum = VIPS_CLIP( min_value, sum, max_value );
|
||||||
|
out[z] = sum;
|
||||||
|
|
||||||
|
in += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Ultra-high-quality version for double images.
|
||||||
|
*/
|
||||||
|
template <typename T>
|
||||||
|
static void inline
|
||||||
|
reducehl3_notab( VipsReducehl3 *reducehl3,
|
||||||
|
VipsPel *pout, const VipsPel *pin,
|
||||||
|
const int bands, double x )
|
||||||
|
{
|
||||||
|
T* restrict out = (T *) pout;
|
||||||
|
const T* restrict in = (T *) pin;
|
||||||
|
|
||||||
|
double cx[MAX_POINTS];
|
||||||
|
|
||||||
|
vips_reducehl3_make_mask( reducehl3->kernel, x, cx );
|
||||||
|
|
||||||
|
for( int z = 0; z < bands; z++ ) {
|
||||||
|
out[z] = reducehl3_sum<T, double>
|
||||||
|
(in, bands, cx, reducehl3->n_points);
|
||||||
|
|
||||||
|
in += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
vips_reducehl3_gen( VipsRegion *out_region, void *seq,
|
vips_reducehl3_gen( VipsRegion *out_region, void *seq,
|
||||||
void *a, void *b, gboolean *stop )
|
void *a, void *b, gboolean *stop )
|
||||||
@ -231,12 +386,63 @@ vips_reducehl3_gen( VipsRegion *out_region, void *seq,
|
|||||||
|
|
||||||
switch( in->BandFmt ) {
|
switch( in->BandFmt ) {
|
||||||
case VIPS_FORMAT_UCHAR:
|
case VIPS_FORMAT_UCHAR:
|
||||||
reducehl3_unsigned_int_tab
|
if( reducehl3->n_points == 4 )
|
||||||
<unsigned char, UCHAR_MAX>(
|
reducehl3_unsigned_uint8_4tab(
|
||||||
|
q, p, bands, cxi );
|
||||||
|
else
|
||||||
|
reducehl3_unsigned_int_tab
|
||||||
|
<unsigned char, UCHAR_MAX>(
|
||||||
|
reducehl3,
|
||||||
|
q, p, bands, cxi );
|
||||||
|
break;
|
||||||
|
|
||||||
|
case VIPS_FORMAT_CHAR:
|
||||||
|
reducehl3_signed_int_tab
|
||||||
|
<signed char, SCHAR_MIN, SCHAR_MAX>(
|
||||||
reducehl3,
|
reducehl3,
|
||||||
q, p, bands, cxi );
|
q, p, bands, cxi );
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case VIPS_FORMAT_USHORT:
|
||||||
|
reducehl3_unsigned_int_tab
|
||||||
|
<unsigned short, USHRT_MAX>(
|
||||||
|
reducehl3,
|
||||||
|
q, p, bands, cxi );
|
||||||
|
break;
|
||||||
|
|
||||||
|
case VIPS_FORMAT_SHORT:
|
||||||
|
reducehl3_signed_int_tab
|
||||||
|
<signed short, SHRT_MIN, SHRT_MAX>(
|
||||||
|
reducehl3,
|
||||||
|
q, p, bands, cxi );
|
||||||
|
break;
|
||||||
|
|
||||||
|
case VIPS_FORMAT_UINT:
|
||||||
|
reducehl3_unsigned_int32_tab
|
||||||
|
<unsigned int, INT_MAX>(
|
||||||
|
reducehl3,
|
||||||
|
q, p, bands, cxf );
|
||||||
|
break;
|
||||||
|
|
||||||
|
case VIPS_FORMAT_INT:
|
||||||
|
reducehl3_signed_int32_tab
|
||||||
|
<signed int, INT_MIN, INT_MAX>(
|
||||||
|
reducehl3,
|
||||||
|
q, p, bands, cxf );
|
||||||
|
break;
|
||||||
|
|
||||||
|
case VIPS_FORMAT_FLOAT:
|
||||||
|
case VIPS_FORMAT_COMPLEX:
|
||||||
|
reducehl3_float_tab<float>( reducehl3,
|
||||||
|
q, p, bands, cxf );
|
||||||
|
break;
|
||||||
|
|
||||||
|
case VIPS_FORMAT_DOUBLE:
|
||||||
|
case VIPS_FORMAT_DPCOMPLEX:
|
||||||
|
reducehl3_notab<double>( reducehl3,
|
||||||
|
q, p, bands, X - ix );
|
||||||
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
g_assert_not_reached();
|
g_assert_not_reached();
|
||||||
break;
|
break;
|
||||||
|
Loading…
Reference in New Issue
Block a user