Implement floating point gradient computation, v2.

This patch modifies the gradient walker to be able to generate floating
point values directly in addition to a8r8g8b8 32 bit values.  This is
then used by the various gradient implementations to render in floating
point when asked to do so, instead of rendering to a8r8g8b8 and then
expanding to floating point as they were doing previously.

Changes since v1 (mlankhorst):
- Implement pixman_gradient_walker_pixel_32 without calling
  pixman_gradient_walker_pixel_float, to prevent performance degradation.
  Suggested by Adam Jackson.
- Fix whitespace errors.
- Remove unnecessary function prototypes in pixman-private.h

Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
[mlankhorst: Add comment about pixman_contract_from_float,
             based on Basille's suggestion]
Acked-by: Basile Clement <basile-pixman@clement.pm>
diff --git a/pixman/pixman-conical-gradient.c b/pixman/pixman-conical-gradient.c
index 8bb46ae..a39e20c 100644
--- a/pixman/pixman-conical-gradient.c
+++ b/pixman/pixman-conical-gradient.c
@@ -51,7 +51,10 @@
 }
 
 static uint32_t *
-conical_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask)
+conical_get_scanline (pixman_iter_t                 *iter,
+		      const uint32_t                *mask,
+		      int                            Bpp,
+		      pixman_gradient_walker_write_t write_pixel)
 {
     pixman_image_t *image = iter->image;
     int x = iter->x;
@@ -61,7 +64,7 @@
 
     gradient_t *gradient = (gradient_t *)image;
     conical_gradient_t *conical = (conical_gradient_t *)image;
-    uint32_t       *end = buffer + width;
+    uint32_t       *end = buffer + width * (Bpp / 4);
     pixman_gradient_walker_t walker;
     pixman_bool_t affine = TRUE;
     double cx = 1.;
@@ -109,11 +112,12 @@
 	    {
 		double t = coordinates_to_parameter (rx, ry, conical->angle);
 
-		*buffer = _pixman_gradient_walker_pixel (
-		    &walker, (pixman_fixed_48_16_t)pixman_double_to_fixed (t));
+		write_pixel (&walker,
+			     (pixman_fixed_48_16_t)pixman_double_to_fixed (t),
+			     buffer);
 	    }
 
-	    ++buffer;
+	    buffer += (Bpp / 4);
 
 	    rx += cx;
 	    ry += cy;
@@ -144,11 +148,12 @@
 
 		t = coordinates_to_parameter (x, y, conical->angle);
 
-		*buffer = _pixman_gradient_walker_pixel (
-		    &walker, (pixman_fixed_48_16_t)pixman_double_to_fixed (t));
+		write_pixel (&walker,
+			     (pixman_fixed_48_16_t)pixman_double_to_fixed (t),
+			     buffer);
 	    }
 
-	    ++buffer;
+	    buffer += (Bpp / 4);
 
 	    rx += cx;
 	    ry += cy;
@@ -161,14 +166,17 @@
 }
 
 static uint32_t *
+conical_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask)
+{
+    return conical_get_scanline (iter, mask, 4,
+				 _pixman_gradient_walker_write_narrow);
+}
+
+static uint32_t *
 conical_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask)
 {
-    uint32_t *buffer = conical_get_scanline_narrow (iter, NULL);
-
-    pixman_expand_to_float (
-	(argb_t *)buffer, buffer, PIXMAN_a8r8g8b8, iter->width);
-
-    return buffer;
+    return conical_get_scanline (iter, NULL, 16,
+				 _pixman_gradient_walker_write_wide);
 }
 
 void
diff --git a/pixman/pixman-gradient-walker.c b/pixman/pixman-gradient-walker.c
index 822f8e6..af4df58 100644
--- a/pixman/pixman-gradient-walker.c
+++ b/pixman/pixman-gradient-walker.c
@@ -122,10 +122,9 @@
 	    left_c = right_c;
     }
 
-    /* The alpha channel is scaled to be in the [0, 255] interval,
-     * and the red/green/blue channels are scaled to be in [0, 1].
+    /* The alpha/red/green/blue channels are scaled to be in [0, 1].
      * This ensures that after premultiplication all channels will
-     * be in the [0, 255] interval.
+     * be in the [0, 1] interval.
      */
     la = (left_c->alpha * (1.0f/257.0f));
     lr = (left_c->red * (1.0f/257.0f));
@@ -143,7 +142,7 @@
     if (FLOAT_IS_ZERO (rx - lx) || left_x == INT32_MIN || right_x == INT32_MAX)
     {
 	walker->a_s = walker->r_s = walker->g_s = walker->b_s = 0.0f;
-	walker->a_b = (la + ra) / 2.0f;
+	walker->a_b = (la + ra) / 510.0f;
 	walker->r_b = (lr + rr) / 510.0f;
 	walker->g_b = (lg + rg) / 510.0f;
 	walker->b_b = (lb + rb) / 510.0f;
@@ -152,12 +151,12 @@
     {
 	float w_rec = 1.0f / (rx - lx);
 
-	walker->a_b = (la * rx - ra * lx) * w_rec;
+	walker->a_b = (la * rx - ra * lx) * w_rec * (1.0f/255.0f);
 	walker->r_b = (lr * rx - rr * lx) * w_rec * (1.0f/255.0f);
 	walker->g_b = (lg * rx - rg * lx) * w_rec * (1.0f/255.0f);
 	walker->b_b = (lb * rx - rb * lx) * w_rec * (1.0f/255.0f);
 
-	walker->a_s = (ra - la) * w_rec;
+	walker->a_s = (ra - la) * w_rec * (1.0f/255.0f);
 	walker->r_s = (rr - lr) * w_rec * (1.0f/255.0f);
 	walker->g_s = (rg - lg) * w_rec * (1.0f/255.0f);
 	walker->b_s = (rb - lb) * w_rec * (1.0f/255.0f);
@@ -169,34 +168,97 @@
     walker->need_reset = FALSE;
 }
 
-uint32_t
-_pixman_gradient_walker_pixel (pixman_gradient_walker_t *walker,
-                               pixman_fixed_48_16_t      x)
+static argb_t
+pixman_gradient_walker_pixel_float (pixman_gradient_walker_t *walker,
+				    pixman_fixed_48_16_t      x)
 {
-    float a, r, g, b;
-    uint8_t a8, r8, g8, b8;
-    uint32_t v;
+    argb_t f;
     float y;
 
     if (walker->need_reset || x < walker->left_x || x >= walker->right_x)
-        gradient_walker_reset (walker, x);
+	gradient_walker_reset (walker, x);
 
     y = x * (1.0f / 65536.0f);
 
-    a = walker->a_s * y + walker->a_b;
-    r = a * (walker->r_s * y + walker->r_b);
-    g = a * (walker->g_s * y + walker->g_b);
-    b = a * (walker->b_s * y + walker->b_b);
+    f.a = walker->a_s * y + walker->a_b;
+    f.r = f.a * (walker->r_s * y + walker->r_b);
+    f.g = f.a * (walker->g_s * y + walker->g_b);
+    f.b = f.a * (walker->b_s * y + walker->b_b);
 
-    a8 = a + 0.5f;
-    r8 = r + 0.5f;
-    g8 = g + 0.5f;
-    b8 = b + 0.5f;
+    return f;
+}
 
-    v = ((a8 << 24) & 0xff000000) |
-        ((r8 << 16) & 0x00ff0000) |
-        ((g8 <<  8) & 0x0000ff00) |
-        ((b8 >>  0) & 0x000000ff);
+static uint32_t
+pixman_gradient_walker_pixel_32 (pixman_gradient_walker_t *walker,
+				 pixman_fixed_48_16_t      x)
+{
+    argb_t f;
+    float y;
 
-    return v;
+    if (walker->need_reset || x < walker->left_x || x >= walker->right_x)
+	gradient_walker_reset (walker, x);
+
+    y = x * (1.0f / 65536.0f);
+
+    /* Instead of [0...1] for ARGB, we want [0...255],
+     * multiply alpha with 255 and the color channels
+     * also get multiplied by the alpha multiplier.
+     *
+     * We don't use pixman_contract_from_float because it causes a 2x
+     * slowdown to do so, and the values are already normalized,
+     * so we don't have to worry about values < 0.f or > 1.f
+     */
+    f.a = 255.f * (walker->a_s * y + walker->a_b);
+    f.r = f.a * (walker->r_s * y + walker->r_b);
+    f.g = f.a * (walker->g_s * y + walker->g_b);
+    f.b = f.a * (walker->b_s * y + walker->b_b);
+
+    return (((uint8_t)(f.a + .5f) << 24) & 0xff000000) |
+           (((uint8_t)(f.r + .5f) << 16) & 0x00ff0000) |
+           (((uint8_t)(f.g + .5f) <<  8) & 0x0000ff00) |
+           (((uint8_t)(f.b + .5f) >>  0) & 0x000000ff);
+}
+
+void
+_pixman_gradient_walker_write_narrow (pixman_gradient_walker_t *walker,
+				      pixman_fixed_48_16_t      x,
+				      uint32_t                 *buffer)
+{
+    *buffer = pixman_gradient_walker_pixel_32 (walker, x);
+}
+
+void
+_pixman_gradient_walker_write_wide (pixman_gradient_walker_t *walker,
+				    pixman_fixed_48_16_t      x,
+				    uint32_t                 *buffer)
+{
+    *(argb_t *)buffer = pixman_gradient_walker_pixel_float (walker, x);
+}
+
+void
+_pixman_gradient_walker_fill_narrow (pixman_gradient_walker_t *walker,
+				     pixman_fixed_48_16_t      x,
+				     uint32_t                 *buffer,
+				     uint32_t                 *end)
+{
+    register uint32_t color;
+
+    color = pixman_gradient_walker_pixel_32 (walker, x);
+    while (buffer < end)
+	*buffer++ = color;
+}
+
+void
+_pixman_gradient_walker_fill_wide (pixman_gradient_walker_t *walker,
+				   pixman_fixed_48_16_t      x,
+				   uint32_t                 *buffer,
+				   uint32_t                 *end)
+{
+    register argb_t color;
+    argb_t *buffer_wide = (argb_t *)buffer;
+    argb_t *end_wide    = (argb_t *)end;
+
+    color = pixman_gradient_walker_pixel_float (walker, x);
+    while (buffer_wide < end_wide)
+	*buffer_wide++ = color;
 }
diff --git a/pixman/pixman-linear-gradient.c b/pixman/pixman-linear-gradient.c
index 40c8c9f..3f52850 100644
--- a/pixman/pixman-linear-gradient.c
+++ b/pixman/pixman-linear-gradient.c
@@ -89,8 +89,11 @@
 }
 
 static uint32_t *
-linear_get_scanline_narrow (pixman_iter_t  *iter,
-			    const uint32_t *mask)
+linear_get_scanline (pixman_iter_t                 *iter,
+		     const uint32_t                *mask,
+		     int                            Bpp,
+		     pixman_gradient_walker_write_t write_pixel,
+		     pixman_gradient_walker_fill_t  fill_pixel)
 {
     pixman_image_t *image  = iter->image;
     int             x      = iter->x;
@@ -103,7 +106,7 @@
     pixman_fixed_48_16_t dx, dy;
     gradient_t *gradient = (gradient_t *)image;
     linear_gradient_t *linear = (linear_gradient_t *)image;
-    uint32_t *end = buffer + width;
+    uint32_t *end = buffer + width * (Bpp / 4);
     pixman_gradient_walker_t walker;
 
     _pixman_gradient_walker_init (&walker, gradient, image->common.repeat);
@@ -137,7 +140,7 @@
     if (l == 0 || unit.vector[2] == 0)
     {
 	/* affine transformation only */
-        pixman_fixed_32_32_t t, next_inc;
+	pixman_fixed_32_32_t t, next_inc;
 	double inc;
 
 	if (l == 0 || v.vector[2] == 0)
@@ -152,7 +155,7 @@
 	    invden = pixman_fixed_1 * (double) pixman_fixed_1 /
 		(l * (double) v.vector[2]);
 	    v2 = v.vector[2] * (1. / pixman_fixed_1);
-	    t = ((dx * v.vector[0] + dy * v.vector[1]) - 
+	    t = ((dx * v.vector[0] + dy * v.vector[1]) -
 		 (dx * linear->p1.x + dy * linear->p1.y) * v2) * invden;
 	    inc = (dx * unit.vector[0] + dy * unit.vector[1]) * invden;
 	}
@@ -160,11 +163,7 @@
 
 	if (((pixman_fixed_32_32_t )(inc * width)) == 0)
 	{
-	    register uint32_t color;
-
-	    color = _pixman_gradient_walker_pixel (&walker, t);
-	    while (buffer < end)
-		*buffer++ = color;
+	    fill_pixel (&walker, t, buffer, end);
 	}
 	else
 	{
@@ -175,12 +174,11 @@
 	    {
 		if (!mask || *mask++)
 		{
-		    *buffer = _pixman_gradient_walker_pixel (&walker,
-							     t + next_inc);
+		    write_pixel (&walker, t + next_inc, buffer);
 		}
 		i++;
 		next_inc = inc * i;
-		buffer++;
+		buffer += (Bpp / 4);
 	    }
 	}
     }
@@ -202,14 +200,14 @@
 		    invden = pixman_fixed_1 * (double) pixman_fixed_1 /
 			(l * (double) v.vector[2]);
 		    v2 = v.vector[2] * (1. / pixman_fixed_1);
-		    t = ((dx * v.vector[0] + dy * v.vector[1]) - 
+		    t = ((dx * v.vector[0] + dy * v.vector[1]) -
 			 (dx * linear->p1.x + dy * linear->p1.y) * v2) * invden;
 		}
 
-		*buffer = _pixman_gradient_walker_pixel (&walker, t);
+		write_pixel (&walker, t, buffer);
 	    }
 
-	    ++buffer;
+	    buffer += (Bpp / 4);
 
 	    v.vector[0] += unit.vector[0];
 	    v.vector[1] += unit.vector[1];
@@ -223,14 +221,21 @@
 }
 
 static uint32_t *
+linear_get_scanline_narrow (pixman_iter_t  *iter,
+			    const uint32_t *mask)
+{
+    return linear_get_scanline (iter, mask, 4,
+				_pixman_gradient_walker_write_narrow,
+				_pixman_gradient_walker_fill_narrow);
+}
+
+
+static uint32_t *
 linear_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask)
 {
-    uint32_t *buffer = linear_get_scanline_narrow (iter, NULL);
-
-    pixman_expand_to_float (
-	(argb_t *)buffer, buffer, PIXMAN_a8r8g8b8, iter->width);
-
-    return buffer;
+    return linear_get_scanline (iter, NULL, 16,
+				_pixman_gradient_walker_write_wide,
+				_pixman_gradient_walker_fill_wide);
 }
 
 void
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index 73a5414..1bd9695 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -363,9 +363,38 @@
 _pixman_gradient_walker_reset (pixman_gradient_walker_t *walker,
                                pixman_fixed_48_16_t      pos);
 
-uint32_t
-_pixman_gradient_walker_pixel (pixman_gradient_walker_t *walker,
-                               pixman_fixed_48_16_t      x);
+typedef void (*pixman_gradient_walker_write_t) (
+    pixman_gradient_walker_t *walker,
+    pixman_fixed_48_16_t      x,
+    uint32_t                 *buffer);
+
+void
+_pixman_gradient_walker_write_narrow(pixman_gradient_walker_t *walker,
+				     pixman_fixed_48_16_t      x,
+				     uint32_t                 *buffer);
+
+void
+_pixman_gradient_walker_write_wide(pixman_gradient_walker_t *walker,
+				   pixman_fixed_48_16_t      x,
+				   uint32_t                 *buffer);
+
+typedef void (*pixman_gradient_walker_fill_t) (
+    pixman_gradient_walker_t *walker,
+    pixman_fixed_48_16_t      x,
+    uint32_t                 *buffer,
+    uint32_t                 *end);
+
+void
+_pixman_gradient_walker_fill_narrow(pixman_gradient_walker_t *walker,
+				    pixman_fixed_48_16_t      x,
+				    uint32_t                 *buffer,
+				    uint32_t                 *end);
+
+void
+_pixman_gradient_walker_fill_wide(pixman_gradient_walker_t *walker,
+				  pixman_fixed_48_16_t      x,
+				  uint32_t                 *buffer,
+				  uint32_t                 *end);
 
 /*
  * Edges
diff --git a/pixman/pixman-radial-gradient.c b/pixman/pixman-radial-gradient.c
index 6a21796..0367d78 100644
--- a/pixman/pixman-radial-gradient.c
+++ b/pixman/pixman-radial-gradient.c
@@ -66,15 +66,18 @@
     return x1 * x2 + y1 * y2 + z1 * z2;
 }
 
-static uint32_t
-radial_compute_color (double                    a,
-		      double                    b,
-		      double                    c,
-		      double                    inva,
-		      double                    dr,
-		      double                    mindr,
-		      pixman_gradient_walker_t *walker,
-		      pixman_repeat_t           repeat)
+static void
+radial_write_color (double                         a,
+		    double                         b,
+		    double                         c,
+		    double                         inva,
+		    double                         dr,
+		    double                         mindr,
+		    pixman_gradient_walker_t      *walker,
+		    pixman_repeat_t                repeat,
+		    int                            Bpp,
+		    pixman_gradient_walker_write_t write_pixel,
+		    uint32_t                      *buffer)
 {
     /*
      * In this function error propagation can lead to bad results:
@@ -99,21 +102,25 @@
 	double t;
 
 	if (b == 0)
-	    return 0;
+	{
+	    memset (buffer, 0, Bpp);
+	    return;
+	}
 
 	t = pixman_fixed_1 / 2 * c / b;
 	if (repeat == PIXMAN_REPEAT_NONE)
 	{
 	    if (0 <= t && t <= pixman_fixed_1)
-		return _pixman_gradient_walker_pixel (walker, t);
+		return write_pixel (walker, t, buffer);
 	}
 	else
 	{
 	    if (t * dr >= mindr)
-		return _pixman_gradient_walker_pixel (walker, t);
+		return write_pixel (walker, t, buffer);
 	}
 
-	return 0;
+	memset (buffer, 0, Bpp);
+	return;
     }
 
     discr = fdot (b, a, 0, b, -c, 0);
@@ -139,24 +146,28 @@
 	if (repeat == PIXMAN_REPEAT_NONE)
 	{
 	    if (0 <= t0 && t0 <= pixman_fixed_1)
-		return _pixman_gradient_walker_pixel (walker, t0);
+		return write_pixel (walker, t0, buffer);
 	    else if (0 <= t1 && t1 <= pixman_fixed_1)
-		return _pixman_gradient_walker_pixel (walker, t1);
+		return write_pixel (walker, t1, buffer);
 	}
 	else
 	{
 	    if (t0 * dr >= mindr)
-		return _pixman_gradient_walker_pixel (walker, t0);
+		return write_pixel (walker, t0, buffer);
 	    else if (t1 * dr >= mindr)
-		return _pixman_gradient_walker_pixel (walker, t1);
+		return write_pixel (walker, t1, buffer);
 	}
     }
 
-    return 0;
+    memset (buffer, 0, Bpp);
+    return;
 }
 
 static uint32_t *
-radial_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask)
+radial_get_scanline (pixman_iter_t                 *iter,
+		     const uint32_t                *mask,
+		     int                            Bpp,
+		     pixman_gradient_walker_write_t write_pixel)
 {
     /*
      * Implementation of radial gradients following the PDF specification.
@@ -247,7 +258,7 @@
 
     gradient_t *gradient = (gradient_t *)image;
     radial_gradient_t *radial = (radial_gradient_t *)image;
-    uint32_t *end = buffer + width;
+    uint32_t *end = buffer + width * (Bpp / 4);
     pixman_gradient_walker_t walker;
     pixman_vector_t v, unit;
 
@@ -330,18 +341,21 @@
 	{
 	    if (!mask || *mask++)
 	    {
-		*buffer = radial_compute_color (radial->a, b, c,
-						radial->inva,
-						radial->delta.radius,
-						radial->mindr,
-						&walker,
-						image->common.repeat);
+		radial_write_color (radial->a, b, c,
+				    radial->inva,
+				    radial->delta.radius,
+				    radial->mindr,
+				    &walker,
+				    image->common.repeat,
+				    Bpp,
+				    write_pixel,
+				    buffer);
 	    }
 
 	    b += db;
 	    c += dc;
 	    dc += ddc;
-	    ++buffer;
+	    buffer += (Bpp / 4);
 	}
     }
     else
@@ -375,20 +389,23 @@
 			      pdx, pdy, radial->c1.radius);
 		    /*  / pixman_fixed_1 / pixman_fixed_1 */
 
-		    *buffer = radial_compute_color (radial->a, b, c,
-						    radial->inva,
-						    radial->delta.radius,
-						    radial->mindr,
-						    &walker,
-						    image->common.repeat);
+		    radial_write_color (radial->a, b, c,
+					radial->inva,
+					radial->delta.radius,
+					radial->mindr,
+					&walker,
+					image->common.repeat,
+					Bpp,
+					write_pixel,
+					buffer);
 		}
 		else
 		{
-		    *buffer = 0;
+		    memset (buffer, 0, Bpp);
 		}
 	    }
 
-	    ++buffer;
+	    buffer += (Bpp / 4);
 
 	    v.vector[0] += unit.vector[0];
 	    v.vector[1] += unit.vector[1];
@@ -401,14 +418,17 @@
 }
 
 static uint32_t *
+radial_get_scanline_narrow (pixman_iter_t *iter, const uint32_t *mask)
+{
+    return radial_get_scanline (iter, mask, 4,
+				_pixman_gradient_walker_write_narrow);
+}
+
+static uint32_t *
 radial_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask)
 {
-    uint32_t *buffer = radial_get_scanline_narrow (iter, NULL);
-
-    pixman_expand_to_float (
-	(argb_t *)buffer, buffer, PIXMAN_a8r8g8b8, iter->width);
-
-    return buffer;
+    return radial_get_scanline (iter, NULL, 16,
+				_pixman_gradient_walker_write_wide);
 }
 
 void
@@ -422,11 +442,11 @@
 
 PIXMAN_EXPORT pixman_image_t *
 pixman_image_create_radial_gradient (const pixman_point_fixed_t *  inner,
-                                     const pixman_point_fixed_t *  outer,
-                                     pixman_fixed_t                inner_radius,
-                                     pixman_fixed_t                outer_radius,
-                                     const pixman_gradient_stop_t *stops,
-                                     int                           n_stops)
+				     const pixman_point_fixed_t *  outer,
+				     pixman_fixed_t                inner_radius,
+				     pixman_fixed_t                outer_radius,
+				     const pixman_gradient_stop_t *stops,
+				     int                           n_stops)
 {
     pixman_image_t *image;
     radial_gradient_t *radial;