bits: optimise fetching width==1 repeats

Profiling ign.com, 20% of the entire render time was absorbed in this
single operation:

<< /content //COLOR_ALPHA /width 480 /height 800 >> surface context
<< /width 1 /height 677 /format //ARGB32 /source <|!!!@jGb!m5gD']#$jFHGWtZcK&2i)Up=!TuR9`G<8;ZQp[FQk;emL9ibhbEL&NTh-j63LhHo$E=mSG,0p71`cRJHcget4%<S\X+~> >> image pattern
  //EXTEND_REPEAT set-extend
  set-source
n 0 0 480 677 rectangle
fill+
pop

which is a simple composition of a single pixel wide image. Sadly this
is a workaround for lack of independent repeat-x/y handling in cairo and
pixman. Worse still is that the worst-case behaviour of the general repeat
path is for width 1 images...

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
diff --git a/pixman/pixman-bits-image.c b/pixman/pixman-bits-image.c
index f540c76..f382c65 100644
--- a/pixman/pixman-bits-image.c
+++ b/pixman/pixman-bits-image.c
@@ -935,6 +935,41 @@
 MAKE_FETCHERS (normal_r5g6b5,    r5g6b5,   PIXMAN_REPEAT_NORMAL)
 
 static void
+replicate_pixel_32 (bits_image_t *   bits,
+		    int              x,
+		    int              y,
+		    int              width,
+		    uint32_t *       buffer)
+{
+    uint32_t color;
+    uint32_t *end;
+
+    color = bits->fetch_pixel_32 (bits, x, y);
+
+    end = buffer + width;
+    while (buffer < end)
+	*(buffer++) = color;
+}
+
+static void
+replicate_pixel_64 (bits_image_t *   bits,
+		    int              x,
+		    int              y,
+		    int              width,
+		    uint32_t *       b)
+{
+    uint64_t color;
+    uint64_t *buffer = (uint64_t *)b;
+    uint64_t *end;
+
+    color = bits->fetch_pixel_64 (bits, x, y);
+
+    end = buffer + width;
+    while (buffer < end)
+	*(buffer++) = color;
+}
+
+static void
 bits_image_fetch_solid_32 (pixman_image_t * image,
                            int              x,
                            int              y,
@@ -942,14 +977,7 @@
                            uint32_t *       buffer,
                            const uint32_t * mask)
 {
-    uint32_t color;
-    uint32_t *end;
-
-    color = image->bits.fetch_pixel_32 (&image->bits, 0, 0);
-
-    end = buffer + width;
-    while (buffer < end)
-	*(buffer++) = color;
+    replicate_pixel_32 (&image->bits, 0, 0, width, buffer);
 }
 
 static void
@@ -960,15 +988,7 @@
                            uint32_t *       b,
                            const uint32_t * unused)
 {
-    uint64_t color;
-    uint64_t *buffer = (uint64_t *)b;
-    uint64_t *end;
-
-    color = image->bits.fetch_pixel_64 (&image->bits, 0, 0);
-
-    end = buffer + width;
-    while (buffer < end)
-	*(buffer++) = color;
+    replicate_pixel_64 (&image->bits, 0, 0, width, b);
 }
 
 static void
@@ -1031,6 +1051,16 @@
     while (y >= image->height)
 	y -= image->height;
 
+    if (image->width == 1)
+    {
+	if (wide)
+	    replicate_pixel_64 (image, 0, y, width, buffer);
+	else
+	    replicate_pixel_32 (image, 0, y, width, buffer);
+
+	return;
+    }
+
     while (width)
     {
 	while (x < 0)