vector im_add was broken for int/uint

oops, im_add() with Orc was broken for uint and uint. Added more tests
to nip2 to pick this up earlier in future.
This commit is contained in:
John Cupitt 2011-02-28 11:15:09 +00:00
parent 3378c2ba2c
commit 3e112602f2
2 changed files with 33 additions and 38 deletions

View File

@ -32,6 +32,7 @@
files larger than memory files larger than memory
- removed man pages, we are all gtk-doc now - removed man pages, we are all gtk-doc now
- im_jpeg2vips() ignores weird APP1 chunks - im_jpeg2vips() ignores weird APP1 chunks
- im_add() for int/uint was broken
30/11/10 started 7.24.0 30/11/10 started 7.24.0
- bump for new stable - bump for new stable

View File

@ -32,6 +32,8 @@
* - remove oil support again ... we'll try Orc instead * - remove oil support again ... we'll try Orc instead
* 29/10/10 * 29/10/10
* - move to VipsVector for Orc support * - move to VipsVector for Orc support
* 28/2/11
* - argh vector int/uint was broken
*/ */
/* /*
@ -350,42 +352,25 @@ im__arith_binary( const char *domain,
return( 0 ); return( 0 );
} }
/* Type promotion for addition. Sign and value preserving. Make sure these VipsVector *
* match the case statement in add_buffer() above. im__init_program( VipsVector *vectors[IM_BANDFMT_LAST],
*/ VipsBandFmt format_table[IM_BANDFMT_LAST], VipsBandFmt fmt )
static int bandfmt_add[10] = {
/* UC C US S UI I F X D DX */
US, S, UI, I, UI, I, F, X, D, DX
};
void
im__init_programs( VipsVector *vectors[IM_BANDFMT_LAST],
int format_table[IM_BANDFMT_LAST] )
{ {
int fmt;
for( fmt = 0; fmt < IM_BANDFMT_LAST; fmt++ ) {
int isize = im__sizeof_bandfmt[fmt]; int isize = im__sizeof_bandfmt[fmt];
int osize = im__sizeof_bandfmt[format_table[fmt]]; int osize = im__sizeof_bandfmt[format_table[fmt]];
VipsVector *v; VipsVector *v;
/* float and double are not handled (well) by ORC. v = vips_vector_new( "binary arith", osize );
*/
if( fmt == IM_BANDFMT_DOUBLE ||
fmt == IM_BANDFMT_FLOAT ||
fmt == IM_BANDFMT_COMPLEX ||
fmt == IM_BANDFMT_DPCOMPLEX )
continue;
v = vectors[fmt] =
vips_vector_new( "binary arith", osize );
vips_vector_source_name( v, "s1", isize ); vips_vector_source_name( v, "s1", isize );
vips_vector_source_name( v, "s2", isize ); vips_vector_source_name( v, "s2", isize );
vips_vector_temporary( v, "t1", osize ); vips_vector_temporary( v, "t1", osize );
vips_vector_temporary( v, "t2", osize ); vips_vector_temporary( v, "t2", osize );
}
vectors[fmt] = v;
return( v );
} }
void void
@ -408,6 +393,14 @@ im__compile_programs( VipsVector *vectors[IM_BANDFMT_LAST] )
#endif /*DEBUG*/ #endif /*DEBUG*/
} }
/* Type promotion for addition. Sign and value preserving. Make sure these
* match the case statement in add_buffer() above.
*/
static int bandfmt_add[10] = {
/* UC C US S UI I F X D DX */
US, S, UI, I, UI, I, F, X, D, DX
};
static void static void
build_programs( void ) build_programs( void )
{ {
@ -419,24 +412,22 @@ build_programs( void )
return; return;
done = TRUE; done = TRUE;
im__init_programs( add_vectors, bandfmt_add ); v = im__init_program( add_vectors, bandfmt_add, IM_BANDFMT_UCHAR );
v = add_vectors[IM_BANDFMT_UCHAR];
vips_vector_asm2( v, "convubw", "t1", "s1" ); vips_vector_asm2( v, "convubw", "t1", "s1" );
vips_vector_asm2( v, "convubw", "t2", "s2" ); vips_vector_asm2( v, "convubw", "t2", "s2" );
vips_vector_asm3( v, "addw", "d1", "t1", "t2" ); vips_vector_asm3( v, "addw", "d1", "t1", "t2" );
v = add_vectors[IM_BANDFMT_CHAR]; v = im__init_program( add_vectors, bandfmt_add, IM_BANDFMT_CHAR );
vips_vector_asm2( v, "convsbw", "t1", "s1" ); vips_vector_asm2( v, "convsbw", "t1", "s1" );
vips_vector_asm2( v, "convsbw", "t2", "s2" ); vips_vector_asm2( v, "convsbw", "t2", "s2" );
vips_vector_asm3( v, "addw", "d1", "t1", "t2" ); vips_vector_asm3( v, "addw", "d1", "t1", "t2" );
v = add_vectors[IM_BANDFMT_USHORT]; v = im__init_program( add_vectors, bandfmt_add, IM_BANDFMT_USHORT );
vips_vector_asm2( v, "convuwl", "t1", "s1" ); vips_vector_asm2( v, "convuwl", "t1", "s1" );
vips_vector_asm2( v, "convuwl", "t2", "s2" ); vips_vector_asm2( v, "convuwl", "t2", "s2" );
vips_vector_asm3( v, "addl", "d1", "t1", "t2" ); vips_vector_asm3( v, "addl", "d1", "t1", "t2" );
v = add_vectors[IM_BANDFMT_SHORT]; v = im__init_program( add_vectors, bandfmt_add, IM_BANDFMT_SHORT );
vips_vector_asm2( v, "convswl", "t1", "s1" ); vips_vector_asm2( v, "convswl", "t1", "s1" );
vips_vector_asm2( v, "convswl", "t2", "s2" ); vips_vector_asm2( v, "convswl", "t2", "s2" );
vips_vector_asm3( v, "addl", "d1", "t1", "t2" ); vips_vector_asm3( v, "addl", "d1", "t1", "t2" );
@ -445,11 +436,14 @@ build_programs( void )
uint/int are a little slower than C, on a c2d anyway uint/int are a little slower than C, on a c2d anyway
v = add_vectors[IM_BANDFMT_UINT]; float/double/complex are not handled well
v = im__init_program( add_vectors, IM_BANDFMT_UINT );
vips_vector_asm3( v, "addl", "d1", "s1", "s2" ); vips_vector_asm3( v, "addl", "d1", "s1", "s2" );
v = add_vectors[IM_BANDFMT_INT]; v = im__init_program( add_vectors, IM_BANDFMT_INT );
vips_vector_asm3( v, "addl", "d1", "s1", "s2" ); vips_vector_asm3( v, "addl", "d1", "s1", "s2" );
*/ */
im__compile_programs( add_vectors ); im__compile_programs( add_vectors );