Support more flexibility in qcms output format.

This will help support Chrome and should also let us output directly to a cairo
compatible format.

Reviewed-by: Benoit Girard <b56girard@gmail.com>
Mozilla-bug: 791422
diff --git a/qcmsint.h b/qcmsint.h
index b9b7e1c..15c18ff 100644
--- a/qcmsint.h
+++ b/qcmsint.h
@@ -295,3 +295,26 @@
                                           size_t length);
 
 extern qcms_bool qcms_supports_iccv4;
+
+#ifdef NATIVE_OUTPUT
+# define RGB_OUTPUT_COMPONENTS 4
+# define RGBA_OUTPUT_COMPONENTS 4
+# ifdef IS_LITTLE_ENDIAN
+#  define OUTPUT_INDEX_A 3
+#  define OUTPUT_INDEX_R 2
+#  define OUTPUT_INDEX_G 1
+#  define OUTPUT_INDEX_B 0
+# else
+#  define OUTPUT_INDEX_A 0
+#  define OUTPUT_INDEX_R 1
+#  define OUTPUT_INDEX_G 2
+#  define OUTPUT_INDEX_B 3
+# endif
+#else
+# define RGB_OUTPUT_COMPONENTS 3
+# define RGBA_OUTPUT_COMPONENTS 4
+# define OUTPUT_R_INDEX 0
+# define OUTPUT_G_INDEX 1
+# define OUTPUT_B_INDEX 2
+# define OUTPUT_A_INDEX 3
+#endif
diff --git a/transform-sse1.c b/transform-sse1.c
index cdd4b27..ecca4cc 100644
--- a/transform-sse1.c
+++ b/transform-sse1.c
@@ -117,10 +117,10 @@
         src += 3;
 
         /* use calc'd indices to output RGB values */
-        dest[0] = otdata_r[output[0]];
-        dest[1] = otdata_g[output[1]];
-        dest[2] = otdata_b[output[2]];
-        dest += 3;
+        dest[OUTPUT_R_INDEX] = otdata_r[output[0]];
+        dest[OUTPUT_G_INDEX] = otdata_g[output[1]];
+        dest[OUTPUT_B_INDEX] = otdata_b[output[2]];
+        dest += RGB_OUTPUT_COMPONENTS;
     }
 
     /* handle final (maybe only) pixel */
@@ -142,9 +142,9 @@
     result = _mm_movehl_ps(result, result);
     *((__m64 *)&output[2]) = _mm_cvtps_pi32(result);
 
-    dest[0] = otdata_r[output[0]];
-    dest[1] = otdata_g[output[1]];
-    dest[2] = otdata_b[output[2]];
+    dest[OUTPUT_R_INDEX] = otdata_r[output[0]];
+    dest[OUTPUT_G_INDEX] = otdata_g[output[1]];
+    dest[OUTPUT_B_INDEX] = otdata_b[output[2]];
 
     _mm_empty();
 }
@@ -219,7 +219,7 @@
         vec_b = _mm_mul_ps(vec_b, mat2);
 
         /* store alpha for this pixel; load alpha for next */
-        dest[3] = alpha;
+        dest[OUTPUT_A_INDEX] = alpha;
         alpha   = src[3];
 
         /* crunch, crunch, crunch */
@@ -240,9 +240,9 @@
         src += 4;
 
         /* use calc'd indices to output RGB values */
-        dest[0] = otdata_r[output[0]];
-        dest[1] = otdata_g[output[1]];
-        dest[2] = otdata_b[output[2]];
+        dest[OUTPUT_R_INDEX] = otdata_r[output[0]];
+        dest[OUTPUT_G_INDEX] = otdata_g[output[1]];
+        dest[OUTPUT_B_INDEX] = otdata_b[output[2]];
         dest += 4;
     }
 
@@ -256,7 +256,7 @@
     vec_g = _mm_mul_ps(vec_g, mat1);
     vec_b = _mm_mul_ps(vec_b, mat2);
 
-    dest[3] = alpha;
+    dest[OUTPUT_A_INDEX] = alpha;
 
     vec_r  = _mm_add_ps(vec_r, _mm_add_ps(vec_g, vec_b));
     vec_r  = _mm_max_ps(min, vec_r);
@@ -267,9 +267,9 @@
     result = _mm_movehl_ps(result, result);
     *((__m64 *)&output[2]) = _mm_cvtps_pi32(result);
 
-    dest[0] = otdata_r[output[0]];
-    dest[1] = otdata_g[output[1]];
-    dest[2] = otdata_b[output[2]];
+    dest[OUTPUT_R_INDEX] = otdata_r[output[0]];
+    dest[OUTPUT_G_INDEX] = otdata_g[output[1]];
+    dest[OUTPUT_B_INDEX] = otdata_b[output[2]];
 
     _mm_empty();
 }
diff --git a/transform-sse2.c b/transform-sse2.c
index 634396a..7c814e4 100644
--- a/transform-sse2.c
+++ b/transform-sse2.c
@@ -116,10 +116,10 @@
         src += 3;
 
         /* use calc'd indices to output RGB values */
-        dest[0] = otdata_r[output[0]];
-        dest[1] = otdata_g[output[1]];
-        dest[2] = otdata_b[output[2]];
-        dest += 3;
+        dest[OUTPUT_R_INDEX] = otdata_r[output[0]];
+        dest[OUTPUT_G_INDEX] = otdata_g[output[1]];
+        dest[OUTPUT_B_INDEX] = otdata_b[output[2]];
+        dest += RGB_OUTPUT_COMPONENTS;
     }
 
     /* handle final (maybe only) pixel */
@@ -139,9 +139,9 @@
 
     _mm_store_si128((__m128i*)output, _mm_cvtps_epi32(result));
 
-    dest[0] = otdata_r[output[0]];
-    dest[1] = otdata_g[output[1]];
-    dest[2] = otdata_b[output[2]];
+    dest[OUTPUT_R_INDEX] = otdata_r[output[0]];
+    dest[OUTPUT_G_INDEX] = otdata_g[output[1]];
+    dest[OUTPUT_B_INDEX] = otdata_b[output[2]];
 }
 
 void qcms_transform_data_rgba_out_lut_sse2(qcms_transform *transform,
@@ -214,7 +214,7 @@
         vec_b = _mm_mul_ps(vec_b, mat2);
 
         /* store alpha for this pixel; load alpha for next */
-        dest[3] = alpha;
+        dest[OUTPUT_A_INDEX] = alpha;
         alpha   = src[3];
 
         /* crunch, crunch, crunch */
@@ -233,10 +233,10 @@
         src += 4;
 
         /* use calc'd indices to output RGB values */
-        dest[0] = otdata_r[output[0]];
-        dest[1] = otdata_g[output[1]];
-        dest[2] = otdata_b[output[2]];
-        dest += 4;
+        dest[OUTPUT_R_INDEX] = otdata_r[output[0]];
+        dest[OUTPUT_G_INDEX] = otdata_g[output[1]];
+        dest[OUTPUT_B_INDEX] = otdata_b[output[2]];
+        dest += RGBA_OUTPUT_COMPONENTS;
     }
 
     /* handle final (maybe only) pixel */
@@ -249,7 +249,7 @@
     vec_g = _mm_mul_ps(vec_g, mat1);
     vec_b = _mm_mul_ps(vec_b, mat2);
 
-    dest[3] = alpha;
+    dest[OUTPUT_A_INDEX] = alpha;
 
     vec_r  = _mm_add_ps(vec_r, _mm_add_ps(vec_g, vec_b));
     vec_r  = _mm_max_ps(min, vec_r);
@@ -258,9 +258,9 @@
 
     _mm_store_si128((__m128i*)output, _mm_cvtps_epi32(result));
 
-    dest[0] = otdata_r[output[0]];
-    dest[1] = otdata_g[output[1]];
-    dest[2] = otdata_b[output[2]];
+    dest[OUTPUT_R_INDEX] = otdata_r[output[0]];
+    dest[OUTPUT_G_INDEX] = otdata_g[output[1]];
+    dest[OUTPUT_B_INDEX] = otdata_b[output[2]];
 }
 
 
diff --git a/transform.c b/transform.c
index ce271fb..51ab83f 100644
--- a/transform.c
+++ b/transform.c
@@ -251,9 +251,10 @@
 		float out_device_g = pow(out_linear_g, transform->out_gamma_g);
 		float out_device_b = pow(out_linear_b, transform->out_gamma_b);
 
-		*dest++ = clamp_u8(255*out_device_r);
-		*dest++ = clamp_u8(255*out_device_g);
-		*dest++ = clamp_u8(255*out_device_b);
+		dest[OUTPUT_R_INDEX] = clamp_u8(255*out_device_r);
+		dest[OUTPUT_G_INDEX] = clamp_u8(255*out_device_g);
+		dest[OUTPUT_B_INDEX] = clamp_u8(255*out_device_b);
+		dest += RGB_OUTPUT_COMPONENTS;
 	}
 }
 #endif
@@ -271,9 +272,10 @@
 		out_device_g = lut_interp_linear(linear, transform->output_gamma_lut_g, transform->output_gamma_lut_g_length);
 		out_device_b = lut_interp_linear(linear, transform->output_gamma_lut_b, transform->output_gamma_lut_b_length);
 
-		*dest++ = clamp_u8(out_device_r*255);
-		*dest++ = clamp_u8(out_device_g*255);
-		*dest++ = clamp_u8(out_device_b*255);
+		dest[OUTPUT_R_INDEX] = clamp_u8(out_device_r*255);
+		dest[OUTPUT_G_INDEX] = clamp_u8(out_device_g*255);
+		dest[OUTPUT_B_INDEX] = clamp_u8(out_device_b*255);
+		dest += RGB_OUTPUT_COMPONENTS;
 	}
 }
 
@@ -297,10 +299,11 @@
 		out_device_g = lut_interp_linear(linear, transform->output_gamma_lut_g, transform->output_gamma_lut_g_length);
 		out_device_b = lut_interp_linear(linear, transform->output_gamma_lut_b, transform->output_gamma_lut_b_length);
 
-		*dest++ = clamp_u8(out_device_r*255);
-		*dest++ = clamp_u8(out_device_g*255);
-		*dest++ = clamp_u8(out_device_b*255);
-		*dest++ = alpha;
+		dest[OUTPUT_R_INDEX] = clamp_u8(out_device_r*255);
+		dest[OUTPUT_G_INDEX] = clamp_u8(out_device_g*255);
+		dest[OUTPUT_B_INDEX] = clamp_u8(out_device_b*255);
+		dest[OUTPUT_A_INDEX] = alpha;
+		dest += RGBA_OUTPUT_COMPONENTS;
 	}
 }
 
@@ -317,9 +320,10 @@
 		/* we could round here... */
 		gray = linear * PRECACHE_OUTPUT_MAX;
 
-		*dest++ = transform->output_table_r->data[gray];
-		*dest++ = transform->output_table_g->data[gray];
-		*dest++ = transform->output_table_b->data[gray];
+		dest[OUTPUT_R_INDEX] = transform->output_table_r->data[gray];
+		dest[OUTPUT_G_INDEX] = transform->output_table_g->data[gray];
+		dest[OUTPUT_B_INDEX] = transform->output_table_b->data[gray];
+		dest += RGB_OUTPUT_COMPONENTS;
 	}
 }
 
@@ -336,10 +340,11 @@
 		/* we could round here... */
 		gray = linear * PRECACHE_OUTPUT_MAX;
 
-		*dest++ = transform->output_table_r->data[gray];
-		*dest++ = transform->output_table_g->data[gray];
-		*dest++ = transform->output_table_b->data[gray];
-		*dest++ = alpha;
+		dest[OUTPUT_R_INDEX] = transform->output_table_r->data[gray];
+		dest[OUTPUT_G_INDEX] = transform->output_table_g->data[gray];
+		dest[OUTPUT_B_INDEX] = transform->output_table_b->data[gray];
+		dest[OUTPUT_A_INDEX] = alpha;
+		dest += RGBA_OUTPUT_COMPONENTS;
 	}
 }
 
@@ -370,9 +375,10 @@
 		g = out_linear_g * PRECACHE_OUTPUT_MAX;
 		b = out_linear_b * PRECACHE_OUTPUT_MAX;
 
-		*dest++ = transform->output_table_r->data[r];
-		*dest++ = transform->output_table_g->data[g];
-		*dest++ = transform->output_table_b->data[b];
+		dest[OUTPUT_R_INDEX] = transform->output_table_r->data[r];
+		dest[OUTPUT_G_INDEX] = transform->output_table_g->data[g];
+		dest[OUTPUT_B_INDEX] = transform->output_table_b->data[b];
+		dest += RGB_OUTPUT_COMPONENTS;
 	}
 }
 
@@ -404,10 +410,11 @@
 		g = out_linear_g * PRECACHE_OUTPUT_MAX;
 		b = out_linear_b * PRECACHE_OUTPUT_MAX;
 
-		*dest++ = transform->output_table_r->data[r];
-		*dest++ = transform->output_table_g->data[g];
-		*dest++ = transform->output_table_b->data[b];
-		*dest++ = alpha;
+		dest[OUTPUT_R_INDEX] = transform->output_table_r->data[r];
+		dest[OUTPUT_G_INDEX] = transform->output_table_g->data[g];
+		dest[OUTPUT_B_INDEX] = transform->output_table_b->data[b];
+		dest[OUTPUT_A_INDEX] = alpha;
+		dest += RGBA_OUTPUT_COMPONENTS;
 	}
 }
 
@@ -577,10 +584,11 @@
 		clut_g = c0_g + c1_g*rx + c2_g*ry + c3_g*rz;
 		clut_b = c0_b + c1_b*rx + c2_b*ry + c3_b*rz;
 
-		*dest++ = clamp_u8(clut_r*255.0f);
-		*dest++ = clamp_u8(clut_g*255.0f);
-		*dest++ = clamp_u8(clut_b*255.0f);
-		*dest++ = in_a;
+		dest[OUTPUT_R_INDEX] = clamp_u8(clut_r*255.0f);
+		dest[OUTPUT_G_INDEX] = clamp_u8(clut_g*255.0f);
+		dest[OUTPUT_B_INDEX] = clamp_u8(clut_b*255.0f);
+		dest[OUTPUT_A_INDEX] = in_a;
+		dest += RGBA_OUTPUT_COMPONENTS;
 	}	
 }
 
@@ -691,9 +699,10 @@
 		clut_g = c0_g + c1_g*rx + c2_g*ry + c3_g*rz;
 		clut_b = c0_b + c1_b*rx + c2_b*ry + c3_b*rz;
 
-		*dest++ = clamp_u8(clut_r*255.0f);
-		*dest++ = clamp_u8(clut_g*255.0f);
-		*dest++ = clamp_u8(clut_b*255.0f);
+		dest[OUTPUT_R_INDEX] = clamp_u8(clut_r*255.0f);
+		dest[OUTPUT_G_INDEX] = clamp_u8(clut_g*255.0f);
+		dest[OUTPUT_B_INDEX] = clamp_u8(clut_b*255.0f);
+		dest += RGB_OUTPUT_COMPONENTS;
 	}	
 }
 
@@ -726,9 +735,10 @@
 		out_device_b = lut_interp_linear(out_linear_b, 
 				transform->output_gamma_lut_b, transform->output_gamma_lut_b_length);
 
-		*dest++ = clamp_u8(out_device_r*255);
-		*dest++ = clamp_u8(out_device_g*255);
-		*dest++ = clamp_u8(out_device_b*255);
+		dest[OUTPUT_R_INDEX] = clamp_u8(out_device_r*255);
+		dest[OUTPUT_G_INDEX] = clamp_u8(out_device_g*255);
+		dest[OUTPUT_B_INDEX] = clamp_u8(out_device_b*255);
+		dest += RGB_OUTPUT_COMPONENTS;
 	}
 }
 
@@ -762,10 +772,11 @@
 		out_device_b = lut_interp_linear(out_linear_b, 
 				transform->output_gamma_lut_b, transform->output_gamma_lut_b_length);
 
-		*dest++ = clamp_u8(out_device_r*255);
-		*dest++ = clamp_u8(out_device_g*255);
-		*dest++ = clamp_u8(out_device_b*255);
-		*dest++ = alpha;
+		dest[OUTPUT_R_INDEX] = clamp_u8(out_device_r*255);
+		dest[OUTPUT_G_INDEX] = clamp_u8(out_device_g*255);
+		dest[OUTPUT_B_INDEX] = clamp_u8(out_device_b*255);
+		dest[OUTPUT_A_INDEX] = alpha;
+		dest += RGBA_OUTPUT_COMPONENTS;
 	}
 }