tiny polish to hist_find

slightly faster again
This commit is contained in:
John Cupitt 2016-02-28 12:08:06 +00:00
parent e46e2b7b28
commit e582f13d13
2 changed files with 46 additions and 28 deletions

View File

@ -12,6 +12,7 @@
- sharpen defaults now suitable for screen output - sharpen defaults now suitable for screen output
- better handling of deprecated args in python - better handling of deprecated args in python
- much better handling of arrayimage command-line args - much better handling of arrayimage command-line args
- faster hist_find (Lovell Fuller)
27/1/16 started 8.2.3 27/1/16 started 8.2.3
- fix a crash with SPARC byte-order labq vips images - fix a crash with SPARC byte-order labq vips images

View File

@ -24,6 +24,8 @@
* - cast @in to u8/u16. * - cast @in to u8/u16.
* 12/8/13 * 12/8/13
* - redo as a class * - redo as a class
* 28/2/16 lovell
* - unroll common cases
*/ */
/* /*
@ -233,50 +235,62 @@ vips_hist_find_uchar_scan( VipsStatistic *statistic,
void *seq, int x, int y, void *in, int n ) void *seq, int x, int y, void *in, int n )
{ {
Histogram *hist = (Histogram *) seq; Histogram *hist = (Histogram *) seq;
unsigned int *bins = hist->bins[0];
int nb = statistic->ready->Bands; int nb = statistic->ready->Bands;
VipsPel *p = (VipsPel *) in; VipsPel *p = (VipsPel *) in;
int i, j, z; int j;
/* The inner loop cannot be auto-vectorized by the compiler. /* The inner loop cannot be auto-vectorized by the compiler.
* Images with 1-4 bands are manually unrolled to improve * Unroll for common cases.
* performance for the most common cases by a factor of two.
*/ */
switch( nb ) { switch( nb ) {
case 1: case 1:
for( i = 0, j = 0; j < n; j++, i++ ) for( j = 0; j < n; j++ )
hist->bins[0][p[i]] += 1; bins[p[j]] += 1;
break; break;
case 2: case 2:
for( i = 0, j = 0; j < n; j++ ) { for( j = 0; j < n; j++ ) {
hist->bins[0][p[i]] += 1; hist->bins[0][p[0]] += 1;
hist->bins[1][p[i + 1]] += 1; hist->bins[1][p[1]] += 1;
i += 2;
p += 2;
} }
break; break;
case 3: case 3:
for( i = 0, j = 0; j < n; j++ ) { for( j = 0; j < n; j++ ) {
hist->bins[0][p[i]] += 1; hist->bins[0][p[0]] += 1;
hist->bins[1][p[i + 1]] += 1; hist->bins[1][p[1]] += 1;
hist->bins[2][p[i + 2]] += 1; hist->bins[2][p[2]] += 1;
i += 3;
p += 3;
} }
break; break;
case 4: case 4:
for( i = 0, j = 0; j < n; j++ ) { for( j = 0; j < n; j++ ) {
hist->bins[0][p[i]] += 1; hist->bins[0][p[0]] += 1;
hist->bins[1][p[i + 1]] += 1; hist->bins[1][p[1]] += 1;
hist->bins[2][p[i + 2]] += 1; hist->bins[2][p[2]] += 1;
hist->bins[3][p[i + 3]] += 1; hist->bins[3][p[3]] += 1;
i += 4;
p += 4;
} }
break; break;
default: default:
/* Loop when >4 bands /* Loop when >4 bands
*/ */
for( i = 0, j = 0; j < n; j++ ) for( j = 0; j < n; j++ ) {
for( z = 0; z < nb; z++, i++ ) int z;
hist->bins[z][p[i]] += 1;
for( z = 0; z < nb; z++ )
hist->bins[z][p[z]] += 1;
p += nb;
}
} }
/* Note the maximum. /* Note the maximum.
@ -321,11 +335,11 @@ vips_hist_find_ushort_scan( VipsStatistic *statistic,
int nb = statistic->ready->Bands; int nb = statistic->ready->Bands;
unsigned short *p = (unsigned short *) in; unsigned short *p = (unsigned short *) in;
int i, j, z; int j, z;
for( i = 0, j = 0; j < n; j++ ) for( j = 0; j < n; j++ ) {
for( z = 0; z < nb; z++, i++ ) { for( z = 0; z < nb; z++ ) {
int v = p[i]; int v = p[z];
/* Adjust maximum. /* Adjust maximum.
*/ */
@ -335,6 +349,9 @@ vips_hist_find_ushort_scan( VipsStatistic *statistic,
hist->bins[z][v] += 1; hist->bins[z][v] += 1;
} }
p += nb;
}
/* Note the maximum. /* Note the maximum.
*/ */
hist->mx = mx; hist->mx = mx;