More shader preliminaries / refactoring

    - thread through ctm
    - make blitter handle paint modulation instead of each shader

TBR=herb@google.com

GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=4830
CQ_INCLUDE_TRYBOTS=master.client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot

Change-Id: I8161e6b3864c4e48e4d47d5ad40a56a13c02fee8
Reviewed-on: https://skia-review.googlesource.com/4830
Reviewed-by: Mike Klein <mtklein@chromium.org>
Reviewed-by: Herb Derby <herb@google.com>
Commit-Queue: Mike Klein <mtklein@chromium.org>
diff --git a/include/core/SkShader.h b/include/core/SkShader.h
index 48e8419..d873ad1 100644
--- a/include/core/SkShader.h
+++ b/include/core/SkShader.h
@@ -475,7 +475,8 @@
     SK_DEFINE_FLATTENABLE_TYPE(SkShader)
     SK_DECLARE_FLATTENABLE_REGISTRAR_GROUP()
 
-    bool appendStages(SkRasterPipeline*, SkColorSpace*, SkFallbackAlloc*) const;
+    bool appendStages(SkRasterPipeline*, SkColorSpace*, SkFallbackAlloc*,
+                      const SkMatrix& ctm) const;
 
 protected:
     void flatten(SkWriteBuffer&) const override;
@@ -508,7 +509,8 @@
         return nullptr;
     }
 
-    virtual bool onAppendStages(SkRasterPipeline*, SkColorSpace*, SkFallbackAlloc*) const {
+    virtual bool onAppendStages(SkRasterPipeline*, SkColorSpace*, SkFallbackAlloc*,
+                                const SkMatrix& ctm) const {
         return false;
     }
 
diff --git a/src/core/SkBlitter.cpp b/src/core/SkBlitter.cpp
index 35a213b..af3cc04 100644
--- a/src/core/SkBlitter.cpp
+++ b/src/core/SkBlitter.cpp
@@ -858,7 +858,7 @@
         p->setColor(0);
     }
 
-    if (SkBlitter* blitter = SkCreateRasterPipelineBlitter(device, *paint, allocator)) {
+    if (SkBlitter* blitter = SkCreateRasterPipelineBlitter(device, *paint, matrix, allocator)) {
         return blitter;
     }
 
diff --git a/src/core/SkColorShader.cpp b/src/core/SkColorShader.cpp
index 69d9e46..072e2a9 100644
--- a/src/core/SkColorShader.cpp
+++ b/src/core/SkColorShader.cpp
@@ -309,34 +309,21 @@
     return choose_blitprocs(&fPM4f, info, state);
 }
 
-// To shade a constant color:
-//    1) move the paint color to dst registers
-//    2) load the constant color into the src registers
-//    3) srcin, s' = s*da, modulating the src color by the paint alpha.
-
 bool SkColorShader::onAppendStages(SkRasterPipeline* p,
                                    SkColorSpace* dst,
-                                   SkFallbackAlloc* scratch) const {
+                                   SkFallbackAlloc* scratch,
+                                   const SkMatrix& ctm) const {
     auto color = scratch->make<SkPM4f>(SkPM4f_from_SkColor(fColor, dst));
-    p->append(SkRasterPipeline::move_src_dst);
     p->append(SkRasterPipeline::constant_color, color);
-    if (!append_gamut_transform(p, scratch,
-                                SkColorSpace::MakeNamed(SkColorSpace::kSRGB_Named).get(), dst)) {
-        return false;
-    }
-    p->append(SkRasterPipeline::srcin);
-    return true;
+    return append_gamut_transform(p, scratch,
+                                  SkColorSpace::MakeNamed(SkColorSpace::kSRGB_Named).get(), dst);
 }
 
 bool SkColor4Shader::onAppendStages(SkRasterPipeline* p,
                                     SkColorSpace* dst,
-                                    SkFallbackAlloc* scratch) const {
+                                    SkFallbackAlloc* scratch,
+                                    const SkMatrix& ctm) const {
     auto color = scratch->make<SkPM4f>(fColor4.premul());
-    p->append(SkRasterPipeline::move_src_dst);
     p->append(SkRasterPipeline::constant_color, color);
-    if (!append_gamut_transform(p, scratch, fColorSpace.get(), dst)) {
-        return false;
-    }
-    p->append(SkRasterPipeline::srcin);
-    return true;
+    return append_gamut_transform(p, scratch, fColorSpace.get(), dst);
 }
diff --git a/src/core/SkColorShader.h b/src/core/SkColorShader.h
index 9aee365..1ca4752 100644
--- a/src/core/SkColorShader.h
+++ b/src/core/SkColorShader.h
@@ -65,7 +65,8 @@
         *lum = fColor;
         return true;
     }
-    bool onAppendStages(SkRasterPipeline*, SkColorSpace*, SkFallbackAlloc*) const override;
+    bool onAppendStages(SkRasterPipeline*, SkColorSpace*, SkFallbackAlloc*,
+                        const SkMatrix& ctm) const override;
 
 private:
     SkColor fColor;
@@ -120,7 +121,8 @@
         *lum = fCachedByteColor;
         return true;
     }
-    bool onAppendStages(SkRasterPipeline*, SkColorSpace*, SkFallbackAlloc*) const override;
+    bool onAppendStages(SkRasterPipeline*, SkColorSpace*, SkFallbackAlloc*,
+                        const SkMatrix& ctm) const override;
 
 private:
     sk_sp<SkColorSpace> fColorSpace;
diff --git a/src/core/SkCoreBlitters.h b/src/core/SkCoreBlitters.h
index 46b2b7e..aa5deb2 100644
--- a/src/core/SkCoreBlitters.h
+++ b/src/core/SkCoreBlitters.h
@@ -212,6 +212,7 @@
 
 
 // Returns nullptr if no SkRasterPipeline blitter can be constructed for this paint.
-SkBlitter* SkCreateRasterPipelineBlitter(const SkPixmap&, const SkPaint&, SkTBlitterAllocator*);
+SkBlitter* SkCreateRasterPipelineBlitter(const SkPixmap&, const SkPaint&, const SkMatrix& ctm,
+                                         SkTBlitterAllocator*);
 
 #endif
diff --git a/src/core/SkRasterPipeline.h b/src/core/SkRasterPipeline.h
index 13d4e28..2e7f3ee 100644
--- a/src/core/SkRasterPipeline.h
+++ b/src/core/SkRasterPipeline.h
@@ -55,7 +55,8 @@
 // the Stage*.  This mostly matters on 64-bit Windows where every register is precious.
 
 #define SK_RASTER_PIPELINE_STAGES(M)                             \
-    M(move_src_dst) M(clamp_0) M(clamp_a) M(unpremul) M(premul)  \
+    M(move_src_dst) M(swap_src_dst)                              \
+    M(clamp_0) M(clamp_a) M(unpremul) M(premul)                  \
     M(constant_color) M(store_f32)                               \
     M(load_s_565)  M(load_d_565)  M(store_565)                   \
     M(load_s_srgb) M(load_d_srgb) M(store_srgb)                  \
diff --git a/src/core/SkRasterPipelineBlitter.cpp b/src/core/SkRasterPipelineBlitter.cpp
index 71267a8..0adf33a 100644
--- a/src/core/SkRasterPipelineBlitter.cpp
+++ b/src/core/SkRasterPipelineBlitter.cpp
@@ -20,7 +20,8 @@
 
 class SkRasterPipelineBlitter : public SkBlitter {
 public:
-    static SkBlitter* Create(const SkPixmap&, const SkPaint&, SkTBlitterAllocator*);
+    static SkBlitter* Create(const SkPixmap&, const SkPaint&, const SkMatrix& ctm,
+                             SkTBlitterAllocator*);
 
     SkRasterPipelineBlitter(SkPixmap dst, SkBlendMode blend, SkPM4f paintColor)
         : fDst(dst)
@@ -71,8 +72,9 @@
 
 SkBlitter* SkCreateRasterPipelineBlitter(const SkPixmap& dst,
                                          const SkPaint& paint,
+                                         const SkMatrix& ctm,
                                          SkTBlitterAllocator* alloc) {
-    return SkRasterPipelineBlitter::Create(dst, paint, alloc);
+    return SkRasterPipelineBlitter::Create(dst, paint, ctm, alloc);
 }
 
 static bool supported(const SkImageInfo& info) {
@@ -86,6 +88,7 @@
 
 SkBlitter* SkRasterPipelineBlitter::Create(const SkPixmap& dst,
                                            const SkPaint& paint,
+                                           const SkMatrix& ctm,
                                            SkTBlitterAllocator* alloc) {
     auto blitter = alloc->createT<SkRasterPipelineBlitter>(
             dst,
@@ -114,11 +117,17 @@
     pipeline->append(SkRasterPipeline::constant_color, paintColor);
 
     if (shader) {
-        is_opaque   = is_opaque && shader->isOpaque();
-        is_constant = shader->isConstant();
-        if (!shader->appendStages(pipeline, dst.colorSpace(), &blitter->fScratchFallback)) {
+        // Shaders start with the paint color in (r,g,b,a) and dst-space (x,y) in (dr,dg).
+        // Before the shader runs, move the paint color to (dr,dg,db,da), and put (x,y) in (r,g).
+        pipeline->append(SkRasterPipeline::swap_src_dst);
+        if (!shader->appendStages(pipeline, dst.colorSpace(), &blitter->fScratchFallback, ctm)) {
             return earlyOut();
         }
+        // srcin, s' = s * da, i.e. modulate the output of the shader by the paint alpha.
+        pipeline->append(SkRasterPipeline::srcin);
+
+        is_opaque   = is_opaque && shader->isOpaque();
+        is_constant = shader->isConstant();
     }
 
     if (colorFilter) {
diff --git a/src/core/SkShader.cpp b/src/core/SkShader.cpp
index c232cf7..057365b 100644
--- a/src/core/SkShader.cpp
+++ b/src/core/SkShader.cpp
@@ -259,8 +259,9 @@
 
 bool SkShader::appendStages(SkRasterPipeline* pipeline,
                             SkColorSpace* dst,
-                            SkFallbackAlloc* scratch) const {
-    return this->onAppendStages(pipeline, dst, scratch);
+                            SkFallbackAlloc* scratch,
+                            const SkMatrix& ctm) const {
+    return this->onAppendStages(pipeline, dst, scratch, ctm);
 }
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/opts/SkRasterPipeline_opts.h b/src/opts/SkRasterPipeline_opts.h
index 1ca4322..4584f3d 100644
--- a/src/opts/SkRasterPipeline_opts.h
+++ b/src/opts/SkRasterPipeline_opts.h
@@ -229,6 +229,13 @@
     da = a;
 }
 
+STAGE(swap_src_dst, true) {
+    SkTSwap(r, dr);
+    SkTSwap(g, dg);
+    SkTSwap(b, db);
+    SkTSwap(a, da);
+}
+
 // The default shader produces a constant color (from the SkPaint).
 STAGE(constant_color, true) {
     auto color = (const SkPM4f*)ctx;