Start each pipeline with (x,y) in (dr,dg) registers for the shader.

Image shaders need to do some geometry work before sampling the image colors:
  1) determine dst coordinates
  2) map back to src coordinates
  3) tiling

Feeding (x,y) through as (dr,dg) registers makes step 1) easy, perhaps trivial, while leaving (r,g,b,a) with their usual meanings, "the color", starting with the paint color.

This is easy to tweak into something like (x+0.5, y+0.5, 1) in (dr,dg,db) once this lands.  Mostly I just want to get all the uninteresting boilerplate out of the way first.

BUG=skia:

GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=4791
CQ_INCLUDE_TRYBOTS=master.client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot

Change-Id: Ia07815d942ded6672dc1df785caf80a508fc8f37
Reviewed-on: https://skia-review.googlesource.com/4791
Reviewed-by: Herb Derby <herb@google.com>
Commit-Queue: Mike Klein <mtklein@chromium.org>
diff --git a/bench/SkRasterPipelineBench.cpp b/bench/SkRasterPipelineBench.cpp
index 624b795..15576c4 100644
--- a/bench/SkRasterPipelineBench.cpp
+++ b/bench/SkRasterPipelineBench.cpp
@@ -47,7 +47,7 @@
         auto compiled = p.compile();
 
         while (loops --> 0) {
-            compiled(0, N);
+            compiled(0,0, N);
         }
     }
 };
diff --git a/src/core/SkColorSpaceXform_A2B.cpp b/src/core/SkColorSpaceXform_A2B.cpp
index 04ecf77..e54cb27 100644
--- a/src/core/SkColorSpaceXform_A2B.cpp
+++ b/src/core/SkColorSpaceXform_A2B.cpp
@@ -113,7 +113,7 @@
 
     auto p = pipeline.compile();
 
-    p(0, count);
+    p(0,0, count);
 
     return true;
 }
diff --git a/src/core/SkOpts.h b/src/core/SkOpts.h
index 76ca68a..ccd38ac 100644
--- a/src/core/SkOpts.h
+++ b/src/core/SkOpts.h
@@ -74,8 +74,8 @@
         return hash_fn(data, bytes, seed);
     }
 
-    extern
-    std::function<void(size_t, size_t)> (*compile_pipeline)(const SkRasterPipeline::Stage*, int);
+    extern std::function<void(size_t, size_t, size_t)>
+    (*compile_pipeline)(const SkRasterPipeline::Stage*, int);
 }
 
 #endif//SkOpts_DEFINED
diff --git a/src/core/SkPM4fPriv.h b/src/core/SkPM4fPriv.h
index 304a49f..aedb0ad 100644
--- a/src/core/SkPM4fPriv.h
+++ b/src/core/SkPM4fPriv.h
@@ -126,7 +126,7 @@
                                SkColorSpace::MakeNamed(SkColorSpace::kSRGB_Named).get(), dst);
         p.append(SkRasterPipeline::store_f32, &color4f_ptr);
 
-        p.compile()(0,1);
+        p.compile()(0,0,1);
     } else {
         // Linear gamma, dst gamut.
         swizzle_rb(SkNx_cast<float>(Sk4b::Load(&color)) * (1/255.0f)).store(&color4f);
diff --git a/src/core/SkRasterPipeline.cpp b/src/core/SkRasterPipeline.cpp
index 54de679..c768f1c 100644
--- a/src/core/SkRasterPipeline.cpp
+++ b/src/core/SkRasterPipeline.cpp
@@ -22,6 +22,6 @@
     }
 }
 
-std::function<void(size_t, size_t)> SkRasterPipeline::compile() const {
+std::function<void(size_t, size_t, size_t)> SkRasterPipeline::compile() const {
     return SkOpts::compile_pipeline(fStages, fNum);
 }
diff --git a/src/core/SkRasterPipeline.h b/src/core/SkRasterPipeline.h
index 361793b..13d4e28 100644
--- a/src/core/SkRasterPipeline.h
+++ b/src/core/SkRasterPipeline.h
@@ -91,8 +91,8 @@
     // Append all stages to this pipeline.
     void extend(const SkRasterPipeline&);
 
-    // Runs the pipeline walking x through [x,x+n).
-    std::function<void(size_t x, size_t n)> compile() const;
+    // Runs the pipeline walking x through [x,x+n), holding y constant.
+    std::function<void(size_t x, size_t y, size_t n)> compile() const;
 
     struct Stage {
         StockStage stage;
diff --git a/src/core/SkRasterPipelineBlitter.cpp b/src/core/SkRasterPipelineBlitter.cpp
index 5a7ee67..71267a8 100644
--- a/src/core/SkRasterPipelineBlitter.cpp
+++ b/src/core/SkRasterPipelineBlitter.cpp
@@ -50,10 +50,10 @@
     SkRasterPipeline fShader;
 
     // These functions are compiled lazily when first used.
-    std::function<void(size_t, size_t)> fBlitH         = nullptr,
-                                        fBlitAntiH     = nullptr,
-                                        fBlitMaskA8    = nullptr,
-                                        fBlitMaskLCD16 = nullptr;
+    std::function<void(size_t, size_t, size_t)> fBlitH         = nullptr,
+                                                fBlitAntiH     = nullptr,
+                                                fBlitMaskA8    = nullptr,
+                                                fBlitMaskLCD16 = nullptr;
 
     // These values are pointed to by the compiled blit functions
     // above, which allows us to adjust them from call to call.
@@ -131,7 +131,7 @@
 
     if (is_constant) {
         pipeline->append(SkRasterPipeline::store_f32, &paintColor);
-        pipeline->compile()(0,1);
+        pipeline->compile()(0,0, 1);
 
         *pipeline = SkRasterPipeline();
         pipeline->append(SkRasterPipeline::constant_color, paintColor);
@@ -206,7 +206,7 @@
     }
 
     fDstPtr = fDst.writable_addr(0,y);
-    fBlitH(x,w);
+    fBlitH(x,y, w);
 }
 
 void SkRasterPipelineBlitter::blitAntiH(int x, int y, const SkAlpha aa[], const int16_t runs[]) {
@@ -234,7 +234,7 @@
             case 0xff: this->blitH(x,y,run); break;
             default:
                 fConstantCoverage = *aa * (1/255.0f);
-                fBlitAntiH(x, run);
+                fBlitAntiH(x,y, run);
         }
         x    += run;
         runs += run;
@@ -283,11 +283,11 @@
         switch (mask.fFormat) {
             case SkMask::kA8_Format:
                 fMaskPtr = mask.getAddr8(x,y)-x;
-                fBlitMaskA8(x, clip.width());
+                fBlitMaskA8(x,y, clip.width());
                 break;
             case SkMask::kLCD16_Format:
                 fMaskPtr = mask.getAddrLCD16(x,y)-x;
-                fBlitMaskLCD16(x, clip.width());
+                fBlitMaskLCD16(x,y, clip.width());
                 break;
             default:
                 // TODO
diff --git a/src/opts/SkRasterPipeline_opts.h b/src/opts/SkRasterPipeline_opts.h
index a687627..dbdd00d 100644
--- a/src/opts/SkRasterPipeline_opts.h
+++ b/src/opts/SkRasterPipeline_opts.h
@@ -626,23 +626,23 @@
     struct Memset16 {
         uint16_t** dst;
         uint16_t val;
-        void operator()(size_t x, size_t n) { sk_memset16(*dst + x, val, n); }
+        void operator()(size_t x, size_t, size_t n) { sk_memset16(*dst + x, val, n); }
     };
 
     struct Memset32 {
         uint32_t** dst;
         uint32_t val;
-        void operator()(size_t x, size_t n) { sk_memset32(*dst + x, val, n); }
+        void operator()(size_t x, size_t, size_t n) { sk_memset32(*dst + x, val, n); }
     };
 
     struct Memset64 {
         uint64_t** dst;
         uint64_t val;
-        void operator()(size_t x, size_t n) { sk_memset64(*dst + x, val, n); }
+        void operator()(size_t x, size_t, size_t n) { sk_memset64(*dst + x, val, n); }
     };
 
-    SI std::function<void(size_t, size_t)> compile_pipeline(const SkRasterPipeline::Stage* stages,
-                                                            int nstages) {
+    SI std::function<void(size_t, size_t, size_t)>
+    compile_pipeline(const SkRasterPipeline::Stage* stages, int nstages) {
         if (nstages == 2 && stages[0].stage == SkRasterPipeline::constant_color) {
             SkPM4f src = *(const SkPM4f*)stages[0].ctx;
             void* dst = stages[1].ctx;
@@ -679,16 +679,22 @@
                 fBody[nstages-1].ctx = fTail[nstages-1].ctx = stages[nstages-1].ctx;
             }
 
-            void operator()(size_t x, size_t n) {
+            void operator()(size_t x, size_t y, size_t n) {
                 SkNf v;  // Fastest to start uninitialized.
 
+                float dx[] = { 0,1,2,3,4,5,6,7 };
+                SkNf X = SkNf(x),
+                     Y = SkNf(y),
+                     DX = SkNf::Load(dx);
+
                 while (n >= N) {
-                    fBodyStart(fBody, x, v,v,v,v, v,v,v,v);
+                    fBodyStart(fBody, x, v,v,v,v, X,Y,v,v);
+                    X += DX;
                     x += N;
                     n -= N;
                 }
                 if (n) {
-                    fTailStart(fTail, x,n, v,v,v,v, v,v,v,v);
+                    fTailStart(fTail, x,n, v,v,v,v, X,Y,v,v);
                 }
             }
 
diff --git a/tests/SkRasterPipelineTest.cpp b/tests/SkRasterPipelineTest.cpp
index 96798fc..e3f2ca2 100644
--- a/tests/SkRasterPipelineTest.cpp
+++ b/tests/SkRasterPipelineTest.cpp
@@ -25,7 +25,7 @@
     p.append(SkRasterPipeline::load_d_f16, &load_d_ctx);
     p.append(SkRasterPipeline::srcover);
     p.append(SkRasterPipeline::store_f16, &store_ctx);
-    p.compile()(0, 1);
+    p.compile()(0,0, 1);
 
     // We should see half-intensity magenta.
     REPORTER_ASSERT(r, ((result >>  0) & 0xffff) == 0x3800);
@@ -37,7 +37,7 @@
 DEF_TEST(SkRasterPipeline_empty, r) {
     // No asserts... just a test that this is safe to run.
     SkRasterPipeline p;
-    p.compile()(0,20);
+    p.compile()(0,0, 20);
 }
 
 DEF_TEST(SkRasterPipeline_nonsense, r) {
@@ -45,5 +45,5 @@
     // srcover() calls st->next(); this makes sure we've always got something there to call.
     SkRasterPipeline p;
     p.append(SkRasterPipeline::srcover);
-    p.compile()(0, 20);
+    p.compile()(0,0, 20);
 }