remove the orc stuff from arithmetic

no faster than gcc auto vec
This commit is contained in:
John Cupitt 2013-12-02 11:22:08 +00:00
parent f387d13106
commit 7c43e3d332
19 changed files with 70 additions and 230 deletions

1
.gitignore vendored
View File

@ -12,7 +12,6 @@ Makefile.in
TAGS
tags
*.o
*.vect
Vips-8.0.gir
Vips-8.0.typelib
.*.swp

9
TODO
View File

@ -1,12 +1,8 @@
- check vectorizer on linear.c
do some more packages, we've just done arithmetic so far
time add with auto vec and with orc, is orc worthwhile?
make sure __restrict__ turns off if the compiler does not support it, is
there a configure thing?
how much would alignment buy us? is there any way we can do this? probably
not, since we need to be able to generate any sub-area
@ -15,6 +11,9 @@
not seen more than x2 from auto-vec of abs(), perhaps the bool ops?
- seen some leaks from
vips dzsave --layout google wtc.jpg x

View File

@ -195,7 +195,7 @@ vips_abs_buffer( VipsArithmetic *arithmetic,
/* Format doesn't change with abs, other than complex -> real.
*/
static const VipsBandFormat vips_bandfmt_abs[10] = {
static const VipsBandFormat vips_abs_format_table[10] = {
/* UC C US S UI I F X D DX */
UC, C, US, S, UI, I, F, F, D, D
};
@ -210,7 +210,7 @@ vips_abs_class_init( VipsAbsClass *class )
object_class->description = _( "absolute value of an image" );
object_class->build = vips_abs_build;
vips_arithmetic_set_format_table( aclass, vips_bandfmt_abs );
aclass->format_table = vips_abs_format_table;
aclass->process_line = vips_abs_buffer;
}

View File

@ -36,6 +36,8 @@
* - argh vector int/uint was broken
* 4/4/11
* - rewrite as a class
* 2/12/13
* - remove vector code, gcc autovec with -O3 is now as fast
*/
/*
@ -108,50 +110,35 @@ add_buffer( VipsArithmetic *arithmetic, VipsPel *out, VipsPel **in, int width )
(vips_band_format_iscomplex( vips_image_get_format( im ) ) ?
2 : 1);
VipsVector *v;
int x;
if( (v = vips_arithmetic_get_vector( class,
vips_image_get_format( im ) )) ) {
VipsExecutor ex;
/* Add all input types. Keep types here in sync with
* bandfmt_add[] below.
*/
switch( vips_image_get_format( im ) ) {
case VIPS_FORMAT_UCHAR:
LOOP( unsigned char, unsigned short ); break;
case VIPS_FORMAT_CHAR:
LOOP( signed char, signed short ); break;
case VIPS_FORMAT_USHORT:
LOOP( unsigned short, unsigned int ); break;
case VIPS_FORMAT_SHORT:
LOOP( signed short, signed int ); break;
case VIPS_FORMAT_UINT:
LOOP( unsigned int, unsigned int ); break;
case VIPS_FORMAT_INT:
LOOP( signed int, signed int ); break;
vips_executor_set_program( &ex, v, sz );
vips_executor_set_array( &ex, v->s[0], in[0] );
vips_executor_set_array( &ex, v->s[1], in[1] );
vips_executor_set_destination( &ex, out );
case VIPS_FORMAT_FLOAT:
case VIPS_FORMAT_COMPLEX:
LOOP( float, float ); break;
vips_executor_run( &ex );
}
else {
int x;
case VIPS_FORMAT_DOUBLE:
case VIPS_FORMAT_DPCOMPLEX:
LOOP( double, double ); break;
/* Add all input types. Keep types here in sync with
* bandfmt_add[] below.
*/
switch( vips_image_get_format( im ) ) {
case VIPS_FORMAT_UCHAR:
LOOP( unsigned char, unsigned short ); break;
case VIPS_FORMAT_CHAR:
LOOP( signed char, signed short ); break;
case VIPS_FORMAT_USHORT:
LOOP( unsigned short, unsigned int ); break;
case VIPS_FORMAT_SHORT:
LOOP( signed short, signed int ); break;
case VIPS_FORMAT_UINT:
LOOP( unsigned int, unsigned int ); break;
case VIPS_FORMAT_INT:
LOOP( signed int, signed int ); break;
case VIPS_FORMAT_FLOAT:
case VIPS_FORMAT_COMPLEX:
LOOP( float, float ); break;
case VIPS_FORMAT_DOUBLE:
case VIPS_FORMAT_DPCOMPLEX:
LOOP( double, double ); break;
default:
g_assert( 0 );
}
default:
g_assert( 0 );
}
}
@ -171,7 +158,7 @@ add_buffer( VipsArithmetic *arithmetic, VipsPel *out, VipsPel **in, int width )
/* Type promotion for addition. Sign and value preserving. Make sure these
* match the case statement in add_buffer() above.
*/
static const VipsBandFormat bandfmt_add[10] = {
static const VipsBandFormat vips_add_format_table[10] = {
/* UC C US S UI I F X D DX */
US, S, UI, I, UI, I, F, X, D, DX
};
@ -181,48 +168,11 @@ vips_add_class_init( VipsAddClass *class )
{
VipsObjectClass *object_class = (VipsObjectClass *) class;
VipsArithmeticClass *aclass = VIPS_ARITHMETIC_CLASS( class );
VipsVector *v;
object_class->nickname = "add";
object_class->description = _( "add two images" );
vips_arithmetic_set_format_table( aclass, bandfmt_add );
v = vips_arithmetic_get_program( aclass, VIPS_FORMAT_UCHAR );
vips_vector_asm2( v, "convubw", "t1", "s1" );
vips_vector_asm2( v, "convubw", "t2", "s2" );
vips_vector_asm3( v, "addw", "d1", "t1", "t2" );
v = vips_arithmetic_get_program( aclass, VIPS_FORMAT_CHAR );
vips_vector_asm2( v, "convsbw", "t1", "s1" );
vips_vector_asm2( v, "convsbw", "t2", "s2" );
vips_vector_asm3( v, "addw", "d1", "t1", "t2" );
v = vips_arithmetic_get_program( aclass, VIPS_FORMAT_USHORT );
vips_vector_asm2( v, "convuwl", "t1", "s1" );
vips_vector_asm2( v, "convuwl", "t2", "s2" );
vips_vector_asm3( v, "addl", "d1", "t1", "t2" );
v = vips_arithmetic_get_program( aclass, VIPS_FORMAT_SHORT );
vips_vector_asm2( v, "convswl", "t1", "s1" );
vips_vector_asm2( v, "convswl", "t2", "s2" );
vips_vector_asm3( v, "addl", "d1", "t1", "t2" );
/*
uint/int are a little slower than C, on a c2d anyway
float/double/complex are not handled well
v = vips_arithmetic_get_vector( aclass, VIPS_FORMAT_UINT );
vips_vector_asm3( v, "addl", "d1", "s1", "s2" );
v = vips_arithmetic_get_vector( aclass, VIPS_FORMAT_INT );
vips_vector_asm3( v, "addl", "d1", "s1", "s2" );
*/
vips_arithmetic_compile( aclass );
aclass->format_table = vips_add_format_table;
aclass->process_line = add_buffer;
}

View File

@ -594,86 +594,6 @@ vips_arithmetic_init( VipsArithmetic *arithmetic )
arithmetic->base_bands = 1;
}
void
vips_arithmetic_set_format_table( VipsArithmeticClass *class,
const VipsBandFormat *format_table )
{
int i;
g_assert( !class->format_table );
class->format_table = format_table;
for( i = 0; i < VIPS_FORMAT_LAST; i++ ) {
int isize = vips_format_sizeof( i );
int osize = vips_format_sizeof( (int) format_table[i] );
VipsVector *v;
v = vips_vector_new( "arithmetic", osize );
vips_vector_source_name( v, "s1", isize );
vips_vector_source_name( v, "s2", isize );
vips_vector_temporary( v, "t1", osize );
vips_vector_temporary( v, "t2", osize );
class->vectors[i] = v;
}
}
/* Get the stub for this program ... use _get_vector() to get the compiled
* code.
*/
VipsVector *
vips_arithmetic_get_program( VipsArithmeticClass *class, VipsBandFormat fmt )
{
g_assert( (int) fmt >= 0 && (int) fmt < VIPS_FORMAT_LAST );
g_assert( !class->vector_program[fmt] );
class->vector_program[fmt] = TRUE;
return( class->vectors[fmt] );
}
/* Get the compiled code for this type, if available.
*/
VipsVector *
vips_arithmetic_get_vector( VipsArithmeticClass *class, VipsBandFormat fmt )
{
g_assert( fmt >= 0 && fmt < VIPS_FORMAT_LAST );
if( !vips_vector_isenabled() ||
!class->vector_program[fmt] )
return( NULL );
return( class->vectors[fmt] );
}
void
vips_arithmetic_compile( VipsArithmeticClass *class )
{
int i;
g_assert( class->format_table );
for( i = 0; i < VIPS_FORMAT_LAST; i++ )
if( class->vector_program[i] &&
!vips_vector_compile( class->vectors[i] ) )
/* If compilation fails, turn off the vector for this
* type.
*/
class->vector_program[i] = FALSE;
#ifdef DEBUG
printf( "vips_arithmetic_compile: " );
for( i = 0; i < VIPS_FORMAT_LAST; i++ )
if( class->vector_program[i] )
printf( "%s ",
vips_enum_nick( VIPS_TYPE_BAND_FORMAT, i ) );
printf( "\n" );
#endif /*DEBUG*/
}
/* Called from iofuncs to init all operations in this dir. Use a plugin system
* instead?
*/

View File

@ -191,7 +191,7 @@ vips_boolean_buffer( VipsArithmetic *arithmetic,
/* Type conversions for boolean.
*/
static const VipsBandFormat vips_bandfmt_boolean[10] = {
static const VipsBandFormat vips_boolean_format_table[10] = {
/* UC C US S UI I F X D DX */
UC, C, US, S, UI, I, I, I, I, I,
};
@ -210,7 +210,7 @@ vips_boolean_class_init( VipsBooleanClass *class )
object_class->description = _( "boolean operation on two images" );
object_class->build = vips_boolean_build;
vips_arithmetic_set_format_table( aclass, vips_bandfmt_boolean );
aclass->format_table = vips_boolean_format_table;
aclass->process_line = vips_boolean_buffer;
@ -513,7 +513,7 @@ vips_boolean_const_class_init( VipsBooleanConstClass *class )
_( "boolean operations against a constant" );
object_class->build = vips_boolean_const_build;
vips_arithmetic_set_format_table( aclass, vips_bandfmt_boolean );
aclass->format_table = vips_boolean_format_table;
aclass->process_line = vips_boolean_const_buffer;

View File

@ -196,7 +196,7 @@ vips_complex_buffer( VipsArithmetic *arithmetic,
#define D VIPS_FORMAT_DOUBLE
#define DX VIPS_FORMAT_DPCOMPLEX
static const VipsBandFormat vips_bandfmt_complex[10] = {
static const VipsBandFormat vips_complex_format_table[10] = {
/* UC C US S UI I F X D DX */
X, X, X, X, X, X, X, X, DX, DX
};
@ -215,7 +215,7 @@ vips_complex_class_init( VipsComplexClass *class )
object_class->description =
_( "perform a complex operation on an image" );
vips_arithmetic_set_format_table( aclass, vips_bandfmt_complex );
aclass->format_table = vips_complex_format_table;
aclass->process_line = vips_complex_buffer;
@ -475,7 +475,7 @@ vips_complex2_buffer( VipsArithmetic *arithmetic,
#define D VIPS_FORMAT_DOUBLE
#define DX VIPS_FORMAT_DPCOMPLEX
static const VipsBandFormat vips_bandfmt_complex2[10] = {
static const VipsBandFormat vips_complex2_format_table[10] = {
/* UC C US S UI I F X D DX */
X, X, X, X, X, X, X, X, DX, DX
};
@ -494,7 +494,7 @@ vips_complex2_class_init( VipsComplex2Class *class )
object_class->description =
_( "perform a binary complex operation on two images" );
vips_arithmetic_set_format_table( aclass, vips_bandfmt_complex2 );
aclass->format_table = vips_complex2_format_table;
aclass->process_line = vips_complex2_buffer;
@ -689,7 +689,7 @@ vips_complexget_buffer( VipsArithmetic *arithmetic,
#define D VIPS_FORMAT_DOUBLE
#define DX VIPS_FORMAT_DPCOMPLEX
static const VipsBandFormat vips_bandfmt_complexget[10] = {
static const VipsBandFormat vips_complexget_format_table[10] = {
/* UC C US S UI I F X D DX */
UC, C, US, S, UI, I, F, F, D, D
};
@ -708,7 +708,7 @@ vips_complexget_class_init( VipsComplexgetClass *class )
object_class->description = _( "get a component from a complex image" );
object_class->build = vips_complexget_build;
vips_arithmetic_set_format_table( aclass, vips_bandfmt_complexget );
aclass->format_table = vips_complexget_format_table;
aclass->process_line = vips_complexget_buffer;
@ -891,7 +891,7 @@ vips_complexform_buffer( VipsArithmetic *arithmetic,
/* Type promotion for division. Sign and value preserving. Make sure
* these match the case statement in complexform_buffer() above.
*/
static int vips_bandfmt_complexform[10] = {
static int vips_complexform_format_table[10] = {
/* UC C US S UI I F X D DX */
X, X, X, X, X, X, X, X, DX,DX
};
@ -907,7 +907,7 @@ vips_complexform_class_init( VipsComplexformClass *class )
_( "form a complex image from two real images" );
object_class->build = vips_complexform_build;
vips_arithmetic_set_format_table( aclass, vips_bandfmt_complexform );
aclass->format_table = vips_complexform_format_table;
aclass->process_line = vips_complexform_buffer;
}

View File

@ -211,7 +211,7 @@ vips_divide_buffer( VipsArithmetic *arithmetic,
/* Type promotion for division. Sign and value preserving. Make sure
* these match the case statement in divide_buffer() above.
*/
static int vips_bandfmt_divide[10] = {
static int vips_divide_format_table[10] = {
/* UC C US S UI I F X D DX */
F, F, F, F, F, F, F, X, D, DX
};
@ -225,7 +225,7 @@ vips_divide_class_init( VipsDivideClass *class )
object_class->nickname = "divide";
object_class->description = _( "divide two images" );
vips_arithmetic_set_format_table( aclass, vips_bandfmt_divide );
aclass->format_table = vips_divide_format_table;
aclass->process_line = vips_divide_buffer;
}

View File

@ -150,7 +150,7 @@ vips_invert_buffer( VipsArithmetic *arithmetic,
/* Format doesn't change with invert.
*/
static const VipsBandFormat vips_bandfmt_invert[10] = {
static const VipsBandFormat vips_invert_format_table[10] = {
/* UC C US S UI I F X D DX */
UC, C, US, S, UI, I, F, X, D, DX
};
@ -164,7 +164,7 @@ vips_invert_class_init( VipsInvertClass *class )
object_class->nickname = "invert";
object_class->description = _( "invert an image" );
vips_arithmetic_set_format_table( aclass, vips_bandfmt_invert );
aclass->format_table = vips_invert_format_table;
aclass->process_line = vips_invert_buffer;
}

View File

@ -257,7 +257,7 @@ vips_linear_buffer( VipsArithmetic *arithmetic,
/* Format doesn't change with linear.
*/
static const VipsBandFormat vips_bandfmt_linear[10] = {
static const VipsBandFormat vips_linear_format_table[10] = {
/* UC C US S UI I F X D DX */
F, F, F, F, F, F, F, X, D, DX
};
@ -276,8 +276,7 @@ vips_linear_class_init( VipsLinearClass *class )
object_class->description = _( "calculate (a * in + b)" );
object_class->build = vips_linear_build;
vips_arithmetic_set_format_table( aclass, vips_bandfmt_linear );
aclass->format_table = vips_linear_format_table;
aclass->process_line = vips_linear_buffer;
VIPS_ARG_BOXED( class, "a", 110,

View File

@ -180,7 +180,7 @@ vips_math_buffer( VipsArithmetic *arithmetic,
#define D VIPS_FORMAT_DOUBLE
#define DX VIPS_FORMAT_DPCOMPLEX
static const VipsBandFormat vips_bandfmt_math[10] = {
static const VipsBandFormat vips_math_format_table[10] = {
/* UC C US S UI I F X D DX */
F, F, F, F, F, F, F, X, D, DX
};
@ -199,8 +199,7 @@ vips_math_class_init( VipsMathClass *class )
object_class->description = _( "perform a math function on an image" );
object_class->build = vips_math_build;
vips_arithmetic_set_format_table( aclass, vips_bandfmt_math );
aclass->format_table = vips_math_format_table;
aclass->process_line = vips_math_buffer;
VIPS_ARG_ENUM( class, "math", 200,

View File

@ -178,7 +178,7 @@ vips_math2_buffer( VipsArithmetic *arithmetic,
/* Type promotion for math2. Keep in sync with math2_buffer() above.
*/
static int vips_bandfmt_math2[10] = {
static int vips_math2_format_table[10] = {
/* UC C US S UI I F X D DX */
F, F, F, F, F, F, F, X, D, DX
};
@ -197,8 +197,7 @@ vips_math2_class_init( VipsMath2Class *class )
object_class->description = _( "binary math operations" );
object_class->build = vips_math2_build;
vips_arithmetic_set_format_table( aclass, vips_bandfmt_math2 );
aclass->format_table = vips_math2_format_table;
aclass->process_line = vips_math2_buffer;
VIPS_ARG_ENUM( class, "math2", 200,
@ -395,8 +394,7 @@ vips_math2_const_class_init( VipsMath2ConstClass *class )
object_class->description = _( "pow( @in, @c )" );
object_class->build = vips_math2_const_build;
vips_arithmetic_set_format_table( aclass, vips_bandfmt_math2 );
aclass->format_table = vips_math2_format_table;
aclass->process_line = vips_math2_const_buffer;
VIPS_ARG_ENUM( class, "math2", 200,

View File

@ -172,8 +172,7 @@ vips_multiply_class_init( VipsMultiplyClass *class )
object_class->nickname = "multiply";
object_class->description = _( "multiply two images" );
vips_arithmetic_set_format_table( aclass, vips_multiply_format_table );
aclass->format_table = vips_multiply_format_table;
aclass->process_line = vips_multiply_buffer;
}

View File

@ -87,14 +87,6 @@ typedef struct _VipsArithmeticClass {
*/
const VipsBandFormat *format_table;
/* A vector program for each input type.
*/
VipsVector *vectors[VIPS_FORMAT_LAST];
/* ... and if we've set a program for this format.
*/
gboolean vector_program[VIPS_FORMAT_LAST];
/* The buffer processor.
*/
VipsArithmeticProcessFn process_line;
@ -102,14 +94,6 @@ typedef struct _VipsArithmeticClass {
GType vips_arithmetic_get_type( void );
void vips_arithmetic_set_format_table( VipsArithmeticClass *klass,
const VipsBandFormat *format_table );
VipsVector *vips_arithmetic_get_vector( VipsArithmeticClass *klass,
VipsBandFormat fmt );
void vips_arithmetic_compile( VipsArithmeticClass *klass );
VipsVector *vips_arithmetic_get_program( VipsArithmeticClass *klass,
VipsBandFormat fmt );
#ifdef __cplusplus
}
#endif /*__cplusplus*/

View File

@ -200,7 +200,7 @@ vips_relational_buffer( VipsArithmetic *arithmetic,
#define D VIPS_FORMAT_DOUBLE
#define DX VIPS_FORMAT_DPCOMPLEX
static const VipsBandFormat vips_bandfmt_relational[10] = {
static const VipsBandFormat vips_relational_format_table[10] = {
/* UC C US S UI I F X D DX */
UC, UC, UC, UC, UC, UC, UC, UC, UC, UC
};
@ -220,8 +220,7 @@ vips_relational_class_init( VipsRelationalClass *class )
_( "a relational operation on a pair of images" );
object_class->build = vips_relational_build;
vips_arithmetic_set_format_table( aclass, vips_bandfmt_relational );
aclass->format_table = vips_relational_format_table;
aclass->process_line = vips_relational_buffer;
VIPS_ARG_ENUM( class, "relational", 200,
@ -557,8 +556,7 @@ vips_relational_const_class_init( VipsRelationalConstClass *class )
_( "relational operations against a constant" );
object_class->build = vips_relational_const_build;
vips_arithmetic_set_format_table( aclass, vips_bandfmt_relational );
aclass->format_table = vips_relational_format_table;
aclass->process_line = vips_relational_const_buffer;
VIPS_ARG_ENUM( class, "relational", 200,

View File

@ -154,7 +154,7 @@ vips_remainder_buffer( VipsArithmetic *arithmetic,
/* Type promotion for remainder. Keep in sync with remainder_buffer() above.
*/
static int vips_bandfmt_remainder[10] = {
static int vips_remainder_format_table[10] = {
/* UC C US S UI I F X D DX */
UC, C, US, S, UI, I, F, X, D, DX
};
@ -174,8 +174,7 @@ vips_remainder_class_init( VipsRemainderClass *class )
_( "remainder after integer division of two images" );
object_class->build = vips_remainder_build;
vips_arithmetic_set_format_table( aclass, vips_bandfmt_remainder );
aclass->format_table = vips_remainder_format_table;
aclass->process_line = vips_remainder_buffer;
}
@ -324,8 +323,7 @@ vips_remainder_const_class_init( VipsRemainderConstClass *class )
"and a constant" );
object_class->build = vips_remainder_const_build;
vips_arithmetic_set_format_table( aclass, vips_bandfmt_remainder );
aclass->format_table = vips_remainder_format_table;
aclass->process_line = vips_remainder_const_buffer;
}

View File

@ -142,7 +142,7 @@ vips_round_buffer( VipsArithmetic *arithmetic,
#define D VIPS_FORMAT_DOUBLE
#define DX VIPS_FORMAT_DPCOMPLEX
static const VipsBandFormat vips_bandfmt_round[10] = {
static const VipsBandFormat vips_round_format_table[10] = {
/* UC C US S UI I F X D DX */
UC, C, US, S, UI, I, F, X, D, DX
};
@ -161,8 +161,7 @@ vips_round_class_init( VipsRoundClass *class )
object_class->description = _( "perform a round function on an image" );
object_class->build = vips_round_build;
vips_arithmetic_set_format_table( aclass, vips_bandfmt_round );
aclass->format_table = vips_round_format_table;
aclass->process_line = vips_round_buffer;
VIPS_ARG_ENUM( class, "round", 200,

View File

@ -137,7 +137,7 @@ vips_sign_buffer( VipsArithmetic *arithmetic,
#define D VIPS_FORMAT_DOUBLE
#define DX VIPS_FORMAT_DPCOMPLEX
static const VipsBandFormat vips_bandfmt_sign[10] = {
static const VipsBandFormat vips_sign_format_table[10] = {
/* UC C US S UI I F X D DX */
C, C, C, C, C, C, C, X, C, DX
};
@ -151,8 +151,7 @@ vips_sign_class_init( VipsSignClass *class )
object_class->nickname = "sign";
object_class->description = _( "unit vector of pixel" );
vips_arithmetic_set_format_table( aclass, vips_bandfmt_sign );
aclass->format_table = vips_sign_format_table;
aclass->process_line = vips_sign_buffer;
}

View File

@ -147,7 +147,7 @@ vips_subtract_buffer( VipsArithmetic *arithmetic,
/* Type promotion for subtraction. Sign and value preserving. Make sure these
* match the case statement in vips_subtract_buffer() above.
*/
static const VipsBandFormat bandfmt_subtract[10] = {
static const VipsBandFormat vips_subtract_format_table[10] = {
/* UC C US S UI I F X D DX */
S, S, I, I, I, I, F, X, D, DX
};
@ -161,8 +161,7 @@ vips_subtract_class_init( VipsSubtractClass *class )
object_class->nickname = "subtract";
object_class->description = _( "subtract two images" );
vips_arithmetic_set_format_table( aclass, bandfmt_subtract );
aclass->format_table = vips_subtract_format_table;
aclass->process_line = vips_subtract_buffer;
}