all done
This commit is contained in:
parent
36f21138d7
commit
008fd21728
@ -27,6 +27,7 @@
|
|||||||
- better gobject-introspection annotations, thanks astavale
|
- better gobject-introspection annotations, thanks astavale
|
||||||
- vips_image_write() severs all links between images, when it can ... thanks
|
- vips_image_write() severs all links between images, when it can ... thanks
|
||||||
Warren and Nakilon
|
Warren and Nakilon
|
||||||
|
- vector path for convolution is more accurate and can handle larger masks
|
||||||
|
|
||||||
29/8/17 started 8.5.9
|
29/8/17 started 8.5.9
|
||||||
- make --fail stop jpeg read on any libjpeg warning, thanks @mceachen
|
- make --fail stop jpeg read on any libjpeg warning, thanks @mceachen
|
||||||
|
@ -76,6 +76,8 @@
|
|||||||
* - remove pts for a small speedup
|
* - remove pts for a small speedup
|
||||||
* 12/10/17
|
* 12/10/17
|
||||||
* - fix leak of vectors, thanks MHeimbuc
|
* - fix leak of vectors, thanks MHeimbuc
|
||||||
|
* 14/10/17
|
||||||
|
* - switch to half-float for vector path
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -108,8 +110,8 @@
|
|||||||
/*
|
/*
|
||||||
#define DEBUG
|
#define DEBUG
|
||||||
#define DEBUG_PIXELS
|
#define DEBUG_PIXELS
|
||||||
*/
|
|
||||||
#define DEBUG_COMPILE
|
#define DEBUG_COMPILE
|
||||||
|
*/
|
||||||
|
|
||||||
#ifdef HAVE_CONFIG_H
|
#ifdef HAVE_CONFIG_H
|
||||||
#include <config.h>
|
#include <config.h>
|
||||||
@ -124,13 +126,6 @@
|
|||||||
|
|
||||||
#include "pconvolution.h"
|
#include "pconvolution.h"
|
||||||
|
|
||||||
/* We do the 8-bit vector path with fixed-point arithmetic. We use 3.5 bits
|
|
||||||
* for the mask coefficients, so our range is -4 to +3.99, after using scale
|
|
||||||
* on the mask.
|
|
||||||
*/
|
|
||||||
#define FIXED_BITS (5)
|
|
||||||
#define FIXED_SCALE (1 << FIXED_BITS)
|
|
||||||
|
|
||||||
/* Larger than this and we fall back to C.
|
/* Larger than this and we fall back to C.
|
||||||
*/
|
*/
|
||||||
#define MAX_PASS (20)
|
#define MAX_PASS (20)
|
||||||
@ -154,6 +149,7 @@ typedef struct {
|
|||||||
/* An int version of M.
|
/* An int version of M.
|
||||||
*/
|
*/
|
||||||
VipsImage *iM;
|
VipsImage *iM;
|
||||||
|
int n_point; /* w * h for our matrix */
|
||||||
|
|
||||||
/* We make a smaller version of the mask with the zeros squeezed out.
|
/* We make a smaller version of the mask with the zeros squeezed out.
|
||||||
*/
|
*/
|
||||||
@ -161,7 +157,7 @@ typedef struct {
|
|||||||
int *coeff; /* Array of non-zero mask coefficients */
|
int *coeff; /* Array of non-zero mask coefficients */
|
||||||
int *coeff_pos; /* Index of each nnz element in mask->coeff */
|
int *coeff_pos; /* Index of each nnz element in mask->coeff */
|
||||||
|
|
||||||
/* And a half float version for a vector path. mant has the signed
|
/* And a half float version for the vector path. mant has the signed
|
||||||
* 8-bit mantissas in [-1, +1), sexp has the exponent shift after the
|
* 8-bit mantissas in [-1, +1), sexp has the exponent shift after the
|
||||||
* mul and before the add, and exp has the final exponent shift before
|
* mul and before the add, and exp has the final exponent shift before
|
||||||
* write-back.
|
* write-back.
|
||||||
@ -169,7 +165,6 @@ typedef struct {
|
|||||||
int *mant;
|
int *mant;
|
||||||
int sexp;
|
int sexp;
|
||||||
int exp;
|
int exp;
|
||||||
int n_point; /* Number of points in fixed-point array */
|
|
||||||
|
|
||||||
/* The set of passes we need for this mask.
|
/* The set of passes we need for this mask.
|
||||||
*/
|
*/
|
||||||
@ -180,10 +175,6 @@ typedef struct {
|
|||||||
*/
|
*/
|
||||||
int r;
|
int r;
|
||||||
VipsVector *vector;
|
VipsVector *vector;
|
||||||
|
|
||||||
/* Remove later.
|
|
||||||
*/
|
|
||||||
int *fixed;
|
|
||||||
} VipsConvi;
|
} VipsConvi;
|
||||||
|
|
||||||
typedef VipsConvolutionClass VipsConviClass;
|
typedef VipsConvolutionClass VipsConviClass;
|
||||||
@ -353,11 +344,13 @@ vips_convi_compile_section( VipsConvi *convi, VipsImage *in, Pass *pass )
|
|||||||
|
|
||||||
char source[256];
|
char source[256];
|
||||||
char off[256];
|
char off[256];
|
||||||
|
char rnd[256];
|
||||||
|
char sexp[256];
|
||||||
char coeff[256];
|
char coeff[256];
|
||||||
|
|
||||||
/* Exclude zero elements.
|
/* Exclude zero elements.
|
||||||
*/
|
*/
|
||||||
if( !convi->fixed[i] )
|
if( !convi->mant[i] )
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
/* The source. sl0 is the first scanline in the mask.
|
/* The source. sl0 is the first scanline in the mask.
|
||||||
@ -379,9 +372,16 @@ vips_convi_compile_section( VipsConvi *convi, VipsImage *in, Pass *pass )
|
|||||||
* of the image and coefficient are interesting, so we can take
|
* of the image and coefficient are interesting, so we can take
|
||||||
* the bottom half of a 16x16->32 multiply.
|
* the bottom half of a 16x16->32 multiply.
|
||||||
*/
|
*/
|
||||||
CONST( coeff, convi->fixed[i], 2 );
|
CONST( coeff, convi->mant[i], 2 );
|
||||||
ASM3( "mullw", "value", "value", coeff );
|
ASM3( "mullw", "value", "value", coeff );
|
||||||
|
|
||||||
|
/* Shift right before add to prevent overflow on large masks.
|
||||||
|
*/
|
||||||
|
CONST( sexp, convi->sexp, 2 );
|
||||||
|
CONST( rnd, 1 << (convi->sexp - 1), 2 );
|
||||||
|
ASM3( "addw", "value", "value", rnd );
|
||||||
|
ASM3( "shrsw", "value", "value", sexp );
|
||||||
|
|
||||||
/* We accumulate the signed 16-bit result in sum. Saturated
|
/* We accumulate the signed 16-bit result in sum. Saturated
|
||||||
* add.
|
* add.
|
||||||
*/
|
*/
|
||||||
@ -420,8 +420,8 @@ vips_convi_compile_clip( VipsConvi *convi )
|
|||||||
int offset = VIPS_RINT( vips_image_get_offset( M ) );
|
int offset = VIPS_RINT( vips_image_get_offset( M ) );
|
||||||
|
|
||||||
VipsVector *v;
|
VipsVector *v;
|
||||||
char c16[256];
|
char rnd[256];
|
||||||
char c5[256];
|
char exp[256];
|
||||||
char c0[256];
|
char c0[256];
|
||||||
char c255[256];
|
char c255[256];
|
||||||
char off[256];
|
char off[256];
|
||||||
@ -436,10 +436,10 @@ vips_convi_compile_clip( VipsConvi *convi )
|
|||||||
*/
|
*/
|
||||||
TEMP( "value", 2 );
|
TEMP( "value", 2 );
|
||||||
|
|
||||||
CONST( c16, 16, 2 );
|
CONST( rnd, 1 << (convi->exp - 1), 2 );
|
||||||
ASM3( "addw", "value", "r", c16 );
|
ASM3( "addw", "value", "r", rnd );
|
||||||
CONST( c5, 5, 2 );
|
CONST( exp, convi->exp, 2 );
|
||||||
ASM3( "shrsw", "value", "value", c5 );
|
ASM3( "shrsw", "value", "value", exp );
|
||||||
|
|
||||||
CONST( off, offset, 2 );
|
CONST( off, offset, 2 );
|
||||||
ASM3( "addw", "value", "value", off );
|
ASM3( "addw", "value", "value", off );
|
||||||
@ -852,8 +852,7 @@ vips__image_intize( VipsImage *in, VipsImage **out )
|
|||||||
static int
|
static int
|
||||||
vips_convi_intize( VipsConvi *convi, VipsImage *M )
|
vips_convi_intize( VipsConvi *convi, VipsImage *M )
|
||||||
{
|
{
|
||||||
int n_point = M->Xsize * M->Ysize;
|
int n_point;
|
||||||
|
|
||||||
VipsImage *t;
|
VipsImage *t;
|
||||||
double scale;
|
double scale;
|
||||||
double *scaled;
|
double *scaled;
|
||||||
@ -862,6 +861,10 @@ vips_convi_intize( VipsConvi *convi, VipsImage *M )
|
|||||||
int shift;
|
int shift;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
|
n_point = M->Xsize * M->Ysize;
|
||||||
|
|
||||||
|
g_assert( convi->n_point == n_point );
|
||||||
|
|
||||||
if( vips_check_matrix( "vips2imask", M, &t ) )
|
if( vips_check_matrix( "vips2imask", M, &t ) )
|
||||||
return( -1 );
|
return( -1 );
|
||||||
|
|
||||||
@ -1003,7 +1006,7 @@ vips_convi_build( VipsObject *object )
|
|||||||
|
|
||||||
in = convolution->in;
|
in = convolution->in;
|
||||||
M = convolution->M;
|
M = convolution->M;
|
||||||
convi->n_point = n_point = M->Xsize * M->Ysize;
|
convi->n_point = M->Xsize * M->Ysize;
|
||||||
|
|
||||||
if( vips_embed( in, &t[0],
|
if( vips_embed( in, &t[0],
|
||||||
M->Xsize / 2, M->Ysize / 2,
|
M->Xsize / 2, M->Ysize / 2,
|
||||||
@ -1042,12 +1045,15 @@ vips_convi_build( VipsObject *object )
|
|||||||
convi->iM = M = t[1];
|
convi->iM = M = t[1];
|
||||||
|
|
||||||
coeff = VIPS_MATRIX( M, 0, 0 );
|
coeff = VIPS_MATRIX( M, 0, 0 );
|
||||||
|
n_point = M->Xsize * M->Ysize;
|
||||||
if( !(convi->coeff = VIPS_ARRAY( object, n_point, int )) ||
|
if( !(convi->coeff = VIPS_ARRAY( object, n_point, int )) ||
|
||||||
!(convi->coeff_pos = VIPS_ARRAY( object, n_point, int )) )
|
!(convi->coeff_pos =
|
||||||
|
VIPS_ARRAY( object, n_point, int )) )
|
||||||
return( -1 );
|
return( -1 );
|
||||||
|
|
||||||
/* Squeeze out zero mask elements.
|
/* Squeeze out zero mask elements.
|
||||||
*/
|
*/
|
||||||
|
convi->nnz = 0;
|
||||||
for( i = 0; i < n_point; i++ )
|
for( i = 0; i < n_point; i++ )
|
||||||
if( coeff[i] ) {
|
if( coeff[i] ) {
|
||||||
convi->coeff[convi->nnz] = coeff[i];
|
convi->coeff[convi->nnz] = coeff[i];
|
||||||
@ -1127,7 +1133,7 @@ vips_convi_init( VipsConvi *convi )
|
|||||||
* The output image always has the same #VipsBandFormat as the input image.
|
* The output image always has the same #VipsBandFormat as the input image.
|
||||||
*
|
*
|
||||||
* For #VIPS_FORMAT_UCHAR images, vips_convi() uses a fast vector path based on
|
* For #VIPS_FORMAT_UCHAR images, vips_convi() uses a fast vector path based on
|
||||||
* fixed-point arithmetic. This can produce slightly different results.
|
* half-float arithmetic. This can produce slightly different results.
|
||||||
* Disable the vector path with `--vips-novector` or `VIPS_NOVECTOR` or
|
* Disable the vector path with `--vips-novector` or `VIPS_NOVECTOR` or
|
||||||
* vips_vector_set_enabled().
|
* vips_vector_set_enabled().
|
||||||
*
|
*
|
||||||
|
Loading…
Reference in New Issue
Block a user