still trying to get reducevl3 to vectorise
This commit is contained in:
parent
f58d941d99
commit
4d10bd12f9
@ -65,8 +65,12 @@ typedef struct _VipsResampleClass {
|
||||
|
||||
GType vips_resample_get_type( void );
|
||||
|
||||
int vips_reducehl3_get_points( VipsKernel kernel );
|
||||
void vips_reducehl3_make_mask( VipsKernel kernel, double x, double *c );
|
||||
/* The max size of the vector we use.
|
||||
*/
|
||||
#define MAX_POINTS (6)
|
||||
|
||||
int vips_reduce_get_points( VipsKernel kernel );
|
||||
void vips_reduce_make_mask( VipsKernel kernel, double x, double *c );
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
@ -64,10 +64,6 @@
|
||||
* 1D resampling kernels.
|
||||
*/
|
||||
|
||||
/* The max size of the vector we use.
|
||||
*/
|
||||
#define MAX_POINTS (6)
|
||||
|
||||
typedef struct _VipsReducehl3 {
|
||||
VipsResample parent_instance;
|
||||
|
||||
@ -101,7 +97,7 @@ G_DEFINE_TYPE( VipsReducehl3, vips_reducehl3, VIPS_TYPE_RESAMPLE );
|
||||
/* Get n points.
|
||||
*/
|
||||
int
|
||||
vips_reducehl3_get_points( VipsKernel kernel )
|
||||
vips_reduce_get_points( VipsKernel kernel )
|
||||
{
|
||||
switch( kernel ) {
|
||||
case VIPS_KERNEL_NEAREST:
|
||||
@ -128,7 +124,7 @@ vips_reducehl3_get_points( VipsKernel kernel )
|
||||
/* Calculate a mask.
|
||||
*/
|
||||
void
|
||||
vips_reducehl3_make_mask( VipsKernel kernel, double x, double *c )
|
||||
vips_reduce_make_mask( VipsKernel kernel, double x, double *c )
|
||||
{
|
||||
switch( kernel ) {
|
||||
case VIPS_KERNEL_NEAREST:
|
||||
@ -136,8 +132,8 @@ vips_reducehl3_make_mask( VipsKernel kernel, double x, double *c )
|
||||
break;
|
||||
|
||||
case VIPS_KERNEL_LINEAR:
|
||||
c[0] = x;
|
||||
c[1] = 1.0 - x;
|
||||
c[0] = 1.0 - x;
|
||||
c[1] = x;
|
||||
break;
|
||||
|
||||
case VIPS_KERNEL_CUBIC:
|
||||
@ -192,22 +188,6 @@ reducehl3_unsigned_uint8_4tab( VipsPel *out, const VipsPel *in,
|
||||
}
|
||||
}
|
||||
|
||||
/* Our inner loop. Operate on elements of size T, gather results in an
|
||||
* intermediate of type IT.
|
||||
*/
|
||||
template <typename T, typename IT>
|
||||
static IT
|
||||
reducehl3_sum( const T * restrict in, int bands, const IT * restrict c, int n )
|
||||
{
|
||||
IT sum;
|
||||
|
||||
sum = 0;
|
||||
for( int i = 0; i < n; i++ )
|
||||
sum += c[i] * in[i * bands];
|
||||
|
||||
return( sum );
|
||||
}
|
||||
|
||||
template <typename T, int max_value>
|
||||
static void inline
|
||||
reducehl3_unsigned_int_tab( VipsReducehl3 *reducehl3,
|
||||
@ -216,11 +196,12 @@ reducehl3_unsigned_int_tab( VipsReducehl3 *reducehl3,
|
||||
{
|
||||
T* restrict out = (T *) pout;
|
||||
const T* restrict in = (T *) pin;
|
||||
const int n = reducehl3->n_points;
|
||||
|
||||
for( int z = 0; z < bands; z++ ) {
|
||||
int sum;
|
||||
|
||||
sum = reducehl3_sum<T, int>(in, bands, cx, reducehl3->n_points);
|
||||
sum = reduce_sum<T, int>( in, bands, cx, n );
|
||||
sum = unsigned_fixed_round( sum );
|
||||
sum = VIPS_CLIP( 0, sum, max_value );
|
||||
|
||||
@ -238,11 +219,12 @@ reducehl3_signed_int_tab( VipsReducehl3 *reducehl3,
|
||||
{
|
||||
T* restrict out = (T *) pout;
|
||||
const T* restrict in = (T *) pin;
|
||||
const int n = reducehl3->n_points;
|
||||
|
||||
for( int z = 0; z < bands; z++ ) {
|
||||
int sum;
|
||||
|
||||
sum = reducehl3_sum<T, int>(in, bands, cx, reducehl3->n_points);
|
||||
sum = reduce_sum<T, int>( in, bands, cx, n );
|
||||
sum = signed_fixed_round( sum );
|
||||
sum = VIPS_CLIP( min_value, sum, max_value );
|
||||
|
||||
@ -262,10 +244,10 @@ reducehl3_float_tab( VipsReducehl3 *reducehl3,
|
||||
{
|
||||
T* restrict out = (T *) pout;
|
||||
const T* restrict in = (T *) pin;
|
||||
const int n = reducehl3->n_points;
|
||||
|
||||
for( int z = 0; z < bands; z++ ) {
|
||||
out[z] = reducehl3_sum<T, double>
|
||||
(in, bands, cx, reducehl3->n_points);
|
||||
out[z] = reduce_sum<T, double>( in, bands, cx, n );
|
||||
in += 1;
|
||||
}
|
||||
}
|
||||
@ -281,12 +263,12 @@ reducehl3_unsigned_int32_tab( VipsReducehl3 *reducehl3,
|
||||
{
|
||||
T* restrict out = (T *) pout;
|
||||
const T* restrict in = (T *) pin;
|
||||
const int n = reducehl3->n_points;
|
||||
|
||||
for( int z = 0; z < bands; z++ ) {
|
||||
double sum;
|
||||
|
||||
sum = reducehl3_sum<T, double>
|
||||
(in, bands, cx, reducehl3->n_points);
|
||||
sum = reduce_sum<T, double>( in, bands, cx, n );
|
||||
out[z] = VIPS_CLIP( 0, sum, max_value );
|
||||
|
||||
in += 1;
|
||||
@ -301,12 +283,12 @@ reducehl3_signed_int32_tab( VipsReducehl3 *reducehl3,
|
||||
{
|
||||
T* restrict out = (T *) pout;
|
||||
const T* restrict in = (T *) pin;
|
||||
const int n = reducehl3->n_points;
|
||||
|
||||
for( int z = 0; z < bands; z++ ) {
|
||||
double sum;
|
||||
|
||||
sum = reducehl3_sum<T, double>
|
||||
(in, bands, cx, reducehl3->n_points);
|
||||
sum = reduce_sum<T, double>( in, bands, cx, n );
|
||||
sum = VIPS_CLIP( min_value, sum, max_value );
|
||||
out[z] = sum;
|
||||
|
||||
@ -324,14 +306,14 @@ reducehl3_notab( VipsReducehl3 *reducehl3,
|
||||
{
|
||||
T* restrict out = (T *) pout;
|
||||
const T* restrict in = (T *) pin;
|
||||
const int n = reducehl3->n_points;
|
||||
|
||||
double cx[MAX_POINTS];
|
||||
|
||||
vips_reducehl3_make_mask( reducehl3->kernel, x, cx );
|
||||
vips_reduce_make_mask( reducehl3->kernel, x, cx );
|
||||
|
||||
for( int z = 0; z < bands; z++ ) {
|
||||
out[z] = reducehl3_sum<T, double>
|
||||
(in, bands, cx, reducehl3->n_points);
|
||||
out[z] = reduce_sum<T, double>( in, bands, cx, n );
|
||||
|
||||
in += 1;
|
||||
}
|
||||
@ -488,9 +470,9 @@ vips_reducehl3_build( VipsObject *object )
|
||||
|
||||
/* Build the tables of pre-computed coefficients.
|
||||
*/
|
||||
reducehl3->n_points = vips_reducehl3_get_points( reducehl3->kernel );
|
||||
reducehl3->n_points = vips_reduce_get_points( reducehl3->kernel );
|
||||
for( int x = 0; x < VIPS_TRANSFORM_SCALE + 1; x++ ) {
|
||||
vips_reducehl3_make_mask( reducehl3->kernel,
|
||||
vips_reduce_make_mask( reducehl3->kernel,
|
||||
(float) x / VIPS_TRANSFORM_SCALE,
|
||||
reducehl3->matrixf[x] );
|
||||
|
||||
@ -575,7 +557,7 @@ vips_reducehl3_class_init( VipsReducehl3Class *reducehl3_class )
|
||||
|
||||
VIPS_ARG_ENUM( reducehl3_class, "kernel", 3,
|
||||
_( "Kernel" ),
|
||||
_( "Resamling kernel" ),
|
||||
_( "Resampling kernel" ),
|
||||
VIPS_ARGUMENT_OPTIONAL_INPUT,
|
||||
G_STRUCT_OFFSET( VipsReducehl3, kernel ),
|
||||
VIPS_TYPE_KERNEL, VIPS_KERNEL_CUBIC );
|
||||
|
@ -2,6 +2,8 @@
|
||||
*
|
||||
* 29/1/16
|
||||
* - from shrinkv.c
|
||||
* 10/3/16
|
||||
* - add other kernels
|
||||
*/
|
||||
|
||||
/*
|
||||
@ -51,25 +53,34 @@
|
||||
#include "presample.h"
|
||||
#include "templates.h"
|
||||
|
||||
/* The max size of the vector we use.
|
||||
*/
|
||||
#define MAX_POINTS (6)
|
||||
|
||||
typedef struct _VipsReducevl3 {
|
||||
VipsResample parent_instance;
|
||||
|
||||
double yshrink; /* Shrink factor */
|
||||
|
||||
/* The thing we use to make the kernel.
|
||||
*/
|
||||
VipsKernel kernel;
|
||||
|
||||
/* Number of points in kernel.
|
||||
*/
|
||||
int n_points;
|
||||
|
||||
/* Precalculated interpolation matrices. int (used for pel
|
||||
* sizes up to short), and double (for all others). We go to
|
||||
* scale + 1 so we can round-to-nearest safely.
|
||||
*/
|
||||
int matrixi[VIPS_TRANSFORM_SCALE + 1][MAX_POINTS];
|
||||
double matrixf[VIPS_TRANSFORM_SCALE + 1][MAX_POINTS];
|
||||
|
||||
} VipsReducevl3;
|
||||
|
||||
typedef VipsResampleClass VipsReducevl3Class;
|
||||
|
||||
/* Precalculated interpolation matrices. int (used for pel
|
||||
* sizes up to short), and double (for all others). We go to
|
||||
* scale + 1 so we can round-to-nearest safely.
|
||||
*/
|
||||
|
||||
const int n_points = 6;
|
||||
|
||||
static int vips_reducevl3_matrixi[VIPS_TRANSFORM_SCALE + 1][n_points];
|
||||
static double vips_reducevl3_matrixf[VIPS_TRANSFORM_SCALE + 1][n_points];
|
||||
|
||||
/* We need C linkage for this.
|
||||
*/
|
||||
extern "C" {
|
||||
@ -78,29 +89,33 @@ G_DEFINE_TYPE( VipsReducevl3, vips_reducevl3, VIPS_TYPE_RESAMPLE );
|
||||
|
||||
template <typename T, int max_value>
|
||||
static void inline
|
||||
reducevl3_unsigned_int_tab( VipsPel *pout, const VipsPel *pin,
|
||||
reducevl3_unsigned_int_tab( VipsReducevl3 *reducevl3,
|
||||
VipsPel *pout, const VipsPel *pin,
|
||||
const int ne, const int lskip,
|
||||
const int * restrict cy )
|
||||
{
|
||||
T* restrict out = (T *) pout;
|
||||
const T* restrict in = (T *) pin;
|
||||
|
||||
const int n = reducevl3->n_points;
|
||||
const int l1 = lskip / sizeof( T );
|
||||
const int round_by = VIPS_INTERPOLATE_SCALE >> 1;
|
||||
|
||||
for( int z = 0; z < ne; z++ ) {
|
||||
int sum;
|
||||
|
||||
sum = 0;
|
||||
for( int i = 0; i < n_points; i++ )
|
||||
sum += cy[i] * in[i * l1];
|
||||
sum = 0;
|
||||
for( int i = 0; i < n; i++ )
|
||||
sum += cy[i] * in[z + i * l1];
|
||||
|
||||
sum = unsigned_fixed_round( sum );
|
||||
sum = (sum + round_by) >> VIPS_INTERPOLATE_SHIFT;
|
||||
|
||||
sum = VIPS_CLIP( 0, sum, max_value );
|
||||
//sum = reduce_sum<T, int>( in, l1, cy, n );
|
||||
//sum = unsigned_fixed_round( sum );
|
||||
//sum = VIPS_CLIP( 0, sum, max_value );
|
||||
|
||||
out[z] = sum;
|
||||
|
||||
in += 1;
|
||||
//in += 1;
|
||||
}
|
||||
}
|
||||
|
||||
@ -129,7 +144,7 @@ vips_reducevl3_gen( VipsRegion *out_region, void *seq,
|
||||
s.left = r->left;
|
||||
s.top = r->top * reducevl3->yshrink;
|
||||
s.width = r->width;
|
||||
s.height = r->height * reducevl3->yshrink + n_points;
|
||||
s.height = r->height * reducevl3->yshrink + reducevl3->n_points;
|
||||
if( vips_region_prepare( ir, &s ) )
|
||||
return( -1 );
|
||||
|
||||
@ -142,14 +157,15 @@ vips_reducevl3_gen( VipsRegion *out_region, void *seq,
|
||||
const int sy = Y * VIPS_TRANSFORM_SCALE * 2;
|
||||
const int siy = sy & (VIPS_TRANSFORM_SCALE * 2 - 1);
|
||||
const int ty = (siy + 1) >> 1;
|
||||
const int *cyi = vips_reducevl3_matrixi[ty];
|
||||
const double *cyf = vips_reducevl3_matrixf[ty];
|
||||
const int *cyi = reducevl3->matrixi[ty];
|
||||
const double *cyf = reducevl3->matrixf[ty];
|
||||
const int lskip = VIPS_REGION_LSKIP( ir );
|
||||
|
||||
switch( in->BandFmt ) {
|
||||
case VIPS_FORMAT_UCHAR:
|
||||
reducevl3_unsigned_int_tab
|
||||
<unsigned char, UCHAR_MAX>(
|
||||
reducevl3,
|
||||
q, p, ne, lskip, cyi );
|
||||
break;
|
||||
|
||||
@ -191,6 +207,19 @@ vips_reducevl3_build( VipsObject *object )
|
||||
if( reducevl3->yshrink == 1 )
|
||||
return( vips_image_write( in, resample->out ) );
|
||||
|
||||
/* Build the tables of pre-computed coefficients.
|
||||
*/
|
||||
reducevl3->n_points = vips_reduce_get_points( reducevl3->kernel );
|
||||
for( int y = 0; y < VIPS_TRANSFORM_SCALE + 1; y++ ) {
|
||||
vips_reduce_make_mask( reducevl3->kernel,
|
||||
(float) y / VIPS_TRANSFORM_SCALE,
|
||||
reducevl3->matrixf[y] );
|
||||
|
||||
for( int i = 0; i < reducevl3->n_points; i++ )
|
||||
reducevl3->matrixi[y][i] = reducevl3->matrixf[y][i] *
|
||||
VIPS_INTERPOLATE_SCALE;
|
||||
}
|
||||
|
||||
/* Unpack for processing.
|
||||
*/
|
||||
if( vips_image_decode( in, &t[0] ) )
|
||||
@ -200,8 +229,8 @@ vips_reducevl3_build( VipsObject *object )
|
||||
/* Add new pixels around the input so we can interpolate at the edges.
|
||||
*/
|
||||
if( vips_embed( in, &t[1],
|
||||
0, n_points / 2,
|
||||
in->Xsize, in->Ysize + n_points - 1,
|
||||
0, reducevl3->n_points / 2,
|
||||
in->Xsize, in->Ysize + reducevl3->n_points - 1,
|
||||
"extend", VIPS_EXTEND_COPY,
|
||||
NULL ) )
|
||||
return( -1 );
|
||||
@ -217,7 +246,7 @@ vips_reducevl3_build( VipsObject *object )
|
||||
* example, vipsthumbnail knows the true reduce factor (including the
|
||||
* fractional part), we just see the integer part here.
|
||||
*/
|
||||
resample->out->Ysize = (in->Ysize - n_points + 1) / reducevl3->yshrink;
|
||||
resample->out->Ysize = (in->Ysize - reducevl3->n_points + 1) / reducevl3->yshrink;
|
||||
if( resample->out->Ysize <= 0 ) {
|
||||
vips_error( object_class->nickname,
|
||||
"%s", _( "image has shrunk to nothing" ) );
|
||||
@ -264,24 +293,20 @@ vips_reducevl3_class_init( VipsReducevl3Class *reducevl3_class )
|
||||
G_STRUCT_OFFSET( VipsReducevl3, yshrink ),
|
||||
1, 1000000, 1 );
|
||||
|
||||
/* Build the tables of pre-computed coefficients.
|
||||
*/
|
||||
for( int y = 0; y < VIPS_TRANSFORM_SCALE + 1; y++ ) {
|
||||
calculate_coefficients_lanczos( 3,
|
||||
(float) y / VIPS_TRANSFORM_SCALE,
|
||||
vips_reducevl3_matrixf[y] );
|
||||
VIPS_ARG_ENUM( reducevl3_class, "kernel", 3,
|
||||
_( "Kernel" ),
|
||||
_( "Resampling kernel" ),
|
||||
VIPS_ARGUMENT_OPTIONAL_INPUT,
|
||||
G_STRUCT_OFFSET( VipsReducevl3, kernel ),
|
||||
VIPS_TYPE_KERNEL, VIPS_KERNEL_CUBIC );
|
||||
|
||||
for( int i = 0; i < n_points; i++ )
|
||||
vips_reducevl3_matrixi[y][i] =
|
||||
vips_reducevl3_matrixf[y][i] *
|
||||
VIPS_INTERPOLATE_SCALE;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static void
|
||||
vips_reducevl3_init( VipsReducevl3 *reducevl3 )
|
||||
{
|
||||
reducevl3->kernel = VIPS_KERNEL_CUBIC;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -291,8 +316,12 @@ vips_reducevl3_init( VipsReducevl3 *reducevl3 )
|
||||
* @yshrink: horizontal reduce
|
||||
* @...: %NULL-terminated list of optional named arguments
|
||||
*
|
||||
* Optional arguments:
|
||||
*
|
||||
* @kernel: #VipsKernel to use to interpolate (default: cubic)
|
||||
*
|
||||
* Reduce @in vertically by a float factor. The pixels in @out are
|
||||
* interpolated with a 1D cubic mask. This operation will not work well for
|
||||
* interpolated with a 1D mask. This operation will not work well for
|
||||
* a reduction of more than a factor of two.
|
||||
*
|
||||
* This is a very low-level operation: see vips_resize() for a more
|
||||
|
@ -339,3 +339,19 @@ calculate_coefficients_lanczos( int a, const double x, double *c )
|
||||
c[i] = l;
|
||||
}
|
||||
}
|
||||
|
||||
/* Our inner loop for resampling with a convolution. Operate on elements of
|
||||
* size T, gather results in an intermediate of type IT.
|
||||
*/
|
||||
template <typename T, typename IT>
|
||||
static IT
|
||||
reduce_sum( const T * restrict in, int stride, const IT * restrict c, int n )
|
||||
{
|
||||
IT sum;
|
||||
|
||||
sum = 0;
|
||||
for( int i = 0; i < n; i++ )
|
||||
sum += c[i] * in[i * stride];
|
||||
|
||||
return( sum );
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user