diff --git a/libvips/resample/bicubic.cpp b/libvips/resample/bicubic.cpp
index 97e4a403..7388ab06 100644
--- a/libvips/resample/bicubic.cpp
+++ b/libvips/resample/bicubic.cpp
@@ -79,16 +79,15 @@ typedef VipsInterpolate VipsInterpolateBicubic;
 typedef VipsInterpolateClass VipsInterpolateBicubicClass;
 
 /* Precalculated interpolation matrices. int (used for pel
- * sizes up to short), and double (for all others). We go to
- * scale + 1 so we can round-to-nearest safely.
+ * sizes up to short), and double (for all others).
  */
 
 /* We could keep a large set of 2d 4x4 matricies, but this actually
  * works out slower since for many resizes the thing will no longer
  * fit in L1.
  */
-static int vips_bicubic_matrixi[VIPS_TRANSFORM_SCALE + 1][4];
-static double vips_bicubic_matrixf[VIPS_TRANSFORM_SCALE + 1][4];
+static int vips_bicubic_matrixi[VIPS_TRANSFORM_SCALE][4];
+static double vips_bicubic_matrixf[VIPS_TRANSFORM_SCALE][4];
 
 /* We need C linkage for this.
  */
@@ -498,19 +497,13 @@ static void
 vips_interpolate_bicubic_interpolate( VipsInterpolate *interpolate,
 	void *out, VipsRegion *in, double x, double y )
 {
-	/* Find the mask index. We round-to-nearest, so we need to generate 
-	 * indexes in 0 to VIPS_TRANSFORM_SCALE, 2^n + 1 values. We multiply 
-	 * by 2 more than we need to, add one, mask, then shift down again to 
-	 * get the extra range.
+	/* Find the mask index.
 	 */
-	const int sx = x * VIPS_TRANSFORM_SCALE * 2;
-	const int sy = y * VIPS_TRANSFORM_SCALE * 2;
+	const int sx = x * VIPS_TRANSFORM_SCALE;
+	const int sy = y * VIPS_TRANSFORM_SCALE;
 
-	const int six = sx & (VIPS_TRANSFORM_SCALE * 2 - 1);
-	const int siy = sy & (VIPS_TRANSFORM_SCALE * 2 - 1);
-
-	const int tx = (six + 1) >> 1;
-	const int ty = (siy + 1) >> 1;
+	const int tx = sx & (VIPS_TRANSFORM_SCALE - 1);
+	const int ty = sy & (VIPS_TRANSFORM_SCALE - 1);
 
 	/* We know x/y are always positive, so we can just (int) them. 
 	 */
@@ -643,7 +636,7 @@ vips_interpolate_bicubic_class_init( VipsInterpolateBicubicClass *iclass )
 
 	/* Build the tables of pre-computed coefficients.
 	 */
-	for( int x = 0; x < VIPS_TRANSFORM_SCALE + 1; x++ ) {
+	for( int x = 0; x < VIPS_TRANSFORM_SCALE; x++ ) {
 		calculate_coefficients_catmull( vips_bicubic_matrixf[x], 
 			(float) x / VIPS_TRANSFORM_SCALE ); 
 
diff --git a/libvips/resample/reduceh.cpp b/libvips/resample/reduceh.cpp
index 07ddff32..cc829c01 100644
--- a/libvips/resample/reduceh.cpp
+++ b/libvips/resample/reduceh.cpp
@@ -11,6 +11,7 @@
  * 6/6/20 kleisauke
  * 	- deprecate @centre option, it's now always on
  * 	- fix pixel shift
+ * 	- remove unnecessary round-to-nearest behaviour
  */
 
 /*
@@ -78,11 +79,10 @@ typedef struct _VipsReduceh {
 	double hoffset;
 
 	/* Precalculated interpolation matrices. int (used for pel
-	 * sizes up to short), and double (for all others). We go to
-	 * scale + 1 so we can round-to-nearest safely.
+	 * sizes up to short), and double (for all others).
 	 */
-	int *matrixi[VIPS_TRANSFORM_SCALE + 1];
-	double *matrixf[VIPS_TRANSFORM_SCALE + 1];
+	int *matrixi[VIPS_TRANSFORM_SCALE];
+	double *matrixf[VIPS_TRANSFORM_SCALE];
 
 	/* Deprecated.
 	 */
@@ -320,7 +320,7 @@ vips_reduceh_gen( VipsRegion *out_region, void *seq,
 
 	VIPS_GATE_START( "vips_reduceh_gen: work" ); 
 
-	for( int y = 0; y < r->height; y ++ ) { 
+	for( int y = 0; y < r->height; y++ ) { 
 		VipsPel *p0;
 		VipsPel *q;
 
@@ -346,9 +346,8 @@ vips_reduceh_gen( VipsRegion *out_region, void *seq,
 		for( int x = 0; x < r->width; x++ ) {
 			const int ix = (int) X;
 			VipsPel *p = p0 + ix * ps;
-			const int sx = X * VIPS_TRANSFORM_SCALE * 2;
-			const int six = sx & (VIPS_TRANSFORM_SCALE * 2 - 1);
-			const int tx = (six + 1) >> 1;
+			const int sx = X * VIPS_TRANSFORM_SCALE;
+			const int tx = sx & (VIPS_TRANSFORM_SCALE - 1);
 			const int *cxi = reduceh->matrixi[tx];
 			const double *cxf = reduceh->matrixf[tx];
 
@@ -480,7 +479,7 @@ vips_reduceh_build( VipsObject *object )
 
 	/* Build the tables of pre-computed coefficients.
 	 */
-	for( int x = 0; x < VIPS_TRANSFORM_SCALE + 1; x++ ) {
+	for( int x = 0; x < VIPS_TRANSFORM_SCALE; x++ ) {
 		reduceh->matrixf[x] = 
 			VIPS_ARRAY( object, reduceh->n_point, double ); 
 		reduceh->matrixi[x] = 
diff --git a/libvips/resample/reducev.cpp b/libvips/resample/reducev.cpp
index 7f876433..fa965134 100644
--- a/libvips/resample/reducev.cpp
+++ b/libvips/resample/reducev.cpp
@@ -21,6 +21,7 @@
  * 	- deprecate @centre option, it's now always on
  * 	- fix pixel shift
  * 	- speed up the mask construction for uchar/ushort images
+ * 	- remove unnecessary round-to-nearest behaviour
  */
 
 /*
@@ -117,15 +118,14 @@ typedef struct _VipsReducev {
 	double voffset;
 
 	/* Precalculated interpolation matrices. int (used for pel
-	 * sizes up to short), and double (for all others). We go to
-	 * scale + 1 so we can round-to-nearest safely.
+	 * sizes up to short), and double (for all others).
 	 */
-	int *matrixi[VIPS_TRANSFORM_SCALE + 1];
-	double *matrixf[VIPS_TRANSFORM_SCALE + 1];
+	int *matrixi[VIPS_TRANSFORM_SCALE];
+	double *matrixf[VIPS_TRANSFORM_SCALE];
 
 	/* And another set for orc: we want 2.6 precision.
 	 */
-	int *matrixo[VIPS_TRANSFORM_SCALE + 1];
+	int *matrixo[VIPS_TRANSFORM_SCALE];
 
 	/* The passes we generate for this mask.
 	 */
@@ -154,7 +154,7 @@ vips_reducev_finalize( GObject *gobject )
 	for( int i = 0; i < reducev->n_pass; i++ )
 		VIPS_FREEF( vips_vector_free, reducev->pass[i].vector );
 	reducev->n_pass = 0;
-	for( int i = 0; i < VIPS_TRANSFORM_SCALE + 1; i++ ) {
+	for( int i = 0; i < VIPS_TRANSFORM_SCALE; i++ ) {
 		VIPS_FREE( reducev->matrixf[i] );
 		VIPS_FREE( reducev->matrixi[i] );
 		VIPS_FREE( reducev->matrixo[i] );
@@ -550,14 +550,13 @@ vips_reducev_gen( VipsRegion *out_region, void *vseq,
 	double Y = (r->top + 0.5) * reducev->vshrink - 0.5 - 
 		reducev->voffset;
 
-	for( int y = 0; y < r->height; y ++ ) { 
+	for( int y = 0; y < r->height; y++ ) { 
 		VipsPel *q = 
 			VIPS_REGION_ADDR( out_region, r->left, r->top + y );
 		const int py = (int) Y;
 		VipsPel *p = VIPS_REGION_ADDR( ir, r->left, py );
-		const int sy = Y * VIPS_TRANSFORM_SCALE * 2;
-		const int siy = sy & (VIPS_TRANSFORM_SCALE * 2 - 1);
-		const int ty = (siy + 1) >> 1;
+		const int sy = Y * VIPS_TRANSFORM_SCALE;
+		const int ty = sy & (VIPS_TRANSFORM_SCALE - 1);
 		const int *cyi = reducev->matrixi[ty];
 		const double *cyf = reducev->matrixf[ty];
 		const int lskip = VIPS_REGION_LSKIP( ir );
@@ -675,13 +674,12 @@ vips_reducev_vector_gen( VipsRegion *out_region, void *vseq,
 	double Y = (r->top + 0.5) * reducev->vshrink - 0.5 - 
 		reducev->voffset;
 
-	for( int y = 0; y < r->height; y ++ ) { 
+	for( int y = 0; y < r->height; y++ ) { 
 		VipsPel *q = 
 			VIPS_REGION_ADDR( out_region, r->left, r->top + y );
 		const int py = (int) Y;
-		const int sy = Y * VIPS_TRANSFORM_SCALE * 2;
-		const int siy = sy & (VIPS_TRANSFORM_SCALE * 2 - 1);
-		const int ty = (siy + 1) >> 1;
+		const int sy = Y * VIPS_TRANSFORM_SCALE;
+		const int ty = sy & (VIPS_TRANSFORM_SCALE - 1);
 		const int *cyo = reducev->matrixo[ty];
 
 #ifdef DEBUG_PIXELS
@@ -742,7 +740,7 @@ vips_reducev_raw( VipsReducev *reducev, VipsImage *in, VipsImage **out )
 	 */
 	if( in->BandFmt == VIPS_FORMAT_UCHAR &&
 		vips_vector_isenabled() ) 
-		for( int y = 0; y < VIPS_TRANSFORM_SCALE + 1; y++ ) {
+		for( int y = 0; y < VIPS_TRANSFORM_SCALE; y++ ) {
 			reducev->matrixo[y] = 
 				VIPS_ARRAY( NULL, reducev->n_point, int ); 
 			if( !reducev->matrixo[y] )
@@ -854,7 +852,7 @@ vips_reducev_build( VipsObject *object )
 
 	/* Build the tables of pre-computed coefficients.
 	 */
-	for( int y = 0; y < VIPS_TRANSFORM_SCALE + 1; y++ ) {
+	for( int y = 0; y < VIPS_TRANSFORM_SCALE; y++ ) {
 		reducev->matrixf[y] = 
 			VIPS_ARRAY( NULL, reducev->n_point, double ); 
 		reducev->matrixi[y] =