all done
This commit is contained in:
parent
36f21138d7
commit
008fd21728
@ -27,6 +27,7 @@
|
||||
- better gobject-introspection annotations, thanks astavale
|
||||
- vips_image_write() severs all links between images, when it can ... thanks
|
||||
Warren and Nakilon
|
||||
- vector path for convolution is more accurate and can handle larger masks
|
||||
|
||||
29/8/17 started 8.5.9
|
||||
- make --fail stop jpeg read on any libjpeg warning, thanks @mceachen
|
||||
|
@ -76,6 +76,8 @@
|
||||
* - remove pts for a small speedup
|
||||
* 12/10/17
|
||||
* - fix leak of vectors, thanks MHeimbuc
|
||||
* 14/10/17
|
||||
* - switch to half-float for vector path
|
||||
*/
|
||||
|
||||
/*
|
||||
@ -108,8 +110,8 @@
|
||||
/*
|
||||
#define DEBUG
|
||||
#define DEBUG_PIXELS
|
||||
*/
|
||||
#define DEBUG_COMPILE
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include <config.h>
|
||||
@ -124,13 +126,6 @@
|
||||
|
||||
#include "pconvolution.h"
|
||||
|
||||
/* We do the 8-bit vector path with fixed-point arithmetic. We use 3.5 bits
|
||||
* for the mask coefficients, so our range is -4 to +3.99, after using scale
|
||||
* on the mask.
|
||||
*/
|
||||
#define FIXED_BITS (5)
|
||||
#define FIXED_SCALE (1 << FIXED_BITS)
|
||||
|
||||
/* Larger than this and we fall back to C.
|
||||
*/
|
||||
#define MAX_PASS (20)
|
||||
@ -154,6 +149,7 @@ typedef struct {
|
||||
/* An int version of M.
|
||||
*/
|
||||
VipsImage *iM;
|
||||
int n_point; /* w * h for our matrix */
|
||||
|
||||
/* We make a smaller version of the mask with the zeros squeezed out.
|
||||
*/
|
||||
@ -161,7 +157,7 @@ typedef struct {
|
||||
int *coeff; /* Array of non-zero mask coefficients */
|
||||
int *coeff_pos; /* Index of each nnz element in mask->coeff */
|
||||
|
||||
/* And a half float version for a vector path. mant has the signed
|
||||
/* And a half float version for the vector path. mant has the signed
|
||||
* 8-bit mantissas in [-1, +1), sexp has the exponent shift after the
|
||||
* mul and before the add, and exp has the final exponent shift before
|
||||
* write-back.
|
||||
@ -169,7 +165,6 @@ typedef struct {
|
||||
int *mant;
|
||||
int sexp;
|
||||
int exp;
|
||||
int n_point; /* Number of points in fixed-point array */
|
||||
|
||||
/* The set of passes we need for this mask.
|
||||
*/
|
||||
@ -180,10 +175,6 @@ typedef struct {
|
||||
*/
|
||||
int r;
|
||||
VipsVector *vector;
|
||||
|
||||
/* Remove later.
|
||||
*/
|
||||
int *fixed;
|
||||
} VipsConvi;
|
||||
|
||||
typedef VipsConvolutionClass VipsConviClass;
|
||||
@ -353,11 +344,13 @@ vips_convi_compile_section( VipsConvi *convi, VipsImage *in, Pass *pass )
|
||||
|
||||
char source[256];
|
||||
char off[256];
|
||||
char rnd[256];
|
||||
char sexp[256];
|
||||
char coeff[256];
|
||||
|
||||
/* Exclude zero elements.
|
||||
*/
|
||||
if( !convi->fixed[i] )
|
||||
if( !convi->mant[i] )
|
||||
continue;
|
||||
|
||||
/* The source. sl0 is the first scanline in the mask.
|
||||
@ -379,9 +372,16 @@ vips_convi_compile_section( VipsConvi *convi, VipsImage *in, Pass *pass )
|
||||
* of the image and coefficient are interesting, so we can take
|
||||
* the bottom half of a 16x16->32 multiply.
|
||||
*/
|
||||
CONST( coeff, convi->fixed[i], 2 );
|
||||
CONST( coeff, convi->mant[i], 2 );
|
||||
ASM3( "mullw", "value", "value", coeff );
|
||||
|
||||
/* Shift right before add to prevent overflow on large masks.
|
||||
*/
|
||||
CONST( sexp, convi->sexp, 2 );
|
||||
CONST( rnd, 1 << (convi->sexp - 1), 2 );
|
||||
ASM3( "addw", "value", "value", rnd );
|
||||
ASM3( "shrsw", "value", "value", sexp );
|
||||
|
||||
/* We accumulate the signed 16-bit result in sum. Saturated
|
||||
* add.
|
||||
*/
|
||||
@ -420,8 +420,8 @@ vips_convi_compile_clip( VipsConvi *convi )
|
||||
int offset = VIPS_RINT( vips_image_get_offset( M ) );
|
||||
|
||||
VipsVector *v;
|
||||
char c16[256];
|
||||
char c5[256];
|
||||
char rnd[256];
|
||||
char exp[256];
|
||||
char c0[256];
|
||||
char c255[256];
|
||||
char off[256];
|
||||
@ -436,10 +436,10 @@ vips_convi_compile_clip( VipsConvi *convi )
|
||||
*/
|
||||
TEMP( "value", 2 );
|
||||
|
||||
CONST( c16, 16, 2 );
|
||||
ASM3( "addw", "value", "r", c16 );
|
||||
CONST( c5, 5, 2 );
|
||||
ASM3( "shrsw", "value", "value", c5 );
|
||||
CONST( rnd, 1 << (convi->exp - 1), 2 );
|
||||
ASM3( "addw", "value", "r", rnd );
|
||||
CONST( exp, convi->exp, 2 );
|
||||
ASM3( "shrsw", "value", "value", exp );
|
||||
|
||||
CONST( off, offset, 2 );
|
||||
ASM3( "addw", "value", "value", off );
|
||||
@ -852,8 +852,7 @@ vips__image_intize( VipsImage *in, VipsImage **out )
|
||||
static int
|
||||
vips_convi_intize( VipsConvi *convi, VipsImage *M )
|
||||
{
|
||||
int n_point = M->Xsize * M->Ysize;
|
||||
|
||||
int n_point;
|
||||
VipsImage *t;
|
||||
double scale;
|
||||
double *scaled;
|
||||
@ -862,6 +861,10 @@ vips_convi_intize( VipsConvi *convi, VipsImage *M )
|
||||
int shift;
|
||||
int i;
|
||||
|
||||
n_point = M->Xsize * M->Ysize;
|
||||
|
||||
g_assert( convi->n_point == n_point );
|
||||
|
||||
if( vips_check_matrix( "vips2imask", M, &t ) )
|
||||
return( -1 );
|
||||
|
||||
@ -1003,7 +1006,7 @@ vips_convi_build( VipsObject *object )
|
||||
|
||||
in = convolution->in;
|
||||
M = convolution->M;
|
||||
convi->n_point = n_point = M->Xsize * M->Ysize;
|
||||
convi->n_point = M->Xsize * M->Ysize;
|
||||
|
||||
if( vips_embed( in, &t[0],
|
||||
M->Xsize / 2, M->Ysize / 2,
|
||||
@ -1042,12 +1045,15 @@ vips_convi_build( VipsObject *object )
|
||||
convi->iM = M = t[1];
|
||||
|
||||
coeff = VIPS_MATRIX( M, 0, 0 );
|
||||
n_point = M->Xsize * M->Ysize;
|
||||
if( !(convi->coeff = VIPS_ARRAY( object, n_point, int )) ||
|
||||
!(convi->coeff_pos = VIPS_ARRAY( object, n_point, int )) )
|
||||
!(convi->coeff_pos =
|
||||
VIPS_ARRAY( object, n_point, int )) )
|
||||
return( -1 );
|
||||
|
||||
/* Squeeze out zero mask elements.
|
||||
*/
|
||||
convi->nnz = 0;
|
||||
for( i = 0; i < n_point; i++ )
|
||||
if( coeff[i] ) {
|
||||
convi->coeff[convi->nnz] = coeff[i];
|
||||
@ -1127,7 +1133,7 @@ vips_convi_init( VipsConvi *convi )
|
||||
* The output image always has the same #VipsBandFormat as the input image.
|
||||
*
|
||||
* For #VIPS_FORMAT_UCHAR images, vips_convi() uses a fast vector path based on
|
||||
* fixed-point arithmetic. This can produce slightly different results.
|
||||
* half-float arithmetic. This can produce slightly different results.
|
||||
* Disable the vector path with `--vips-novector` or `VIPS_NOVECTOR` or
|
||||
* vips_vector_set_enabled().
|
||||
*
|
||||
|
Loading…
Reference in New Issue
Block a user