Hook into parametric and table raster pipeline stages

BUG:664864

GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=4913
CQ_INCLUDE_TRYBOTS=master.client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot

Change-Id: I909152f1abba60803f0ce2f970eec1f8f1816d78
Reviewed-on: https://skia-review.googlesource.com/4913
Commit-Queue: Matt Sarett <msarett@google.com>
Reviewed-by: Mike Klein <mtklein@chromium.org>
diff --git a/src/core/SkColorSpaceXformPriv.h b/src/core/SkColorSpaceXformPriv.h
index c2418a9..03be68a 100644
--- a/src/core/SkColorSpaceXformPriv.h
+++ b/src/core/SkColorSpaceXformPriv.h
@@ -12,13 +12,11 @@
 #include "SkHalf.h"
 #include "SkSRGB.h"
 
-#define AI SK_ALWAYS_INLINE
-
 #define SkCSXformPrintfDefined 0
 #define SkCSXformPrintf(...)
 
 // Interpolating lookup in a variably sized table.
-static AI float interp_lut(float input, const float* table, int tableSize) {
+static inline float interp_lut(float input, const float* table, int tableSize) {
     float index = input * (tableSize - 1);
     float diff = index - sk_float_floor2int(index);
     return table[(int) sk_float_floor2int(index)] * (1.0f - diff) +
@@ -28,7 +26,7 @@
 // Inverse table lookup.  Ex: what index corresponds to the input value?  This will
 // have strange results when the table is non-increasing.  But any sane gamma
 // function will be increasing.
-static float inverse_interp_lut(float input, const float* table, int tableSize) {
+static inline float inverse_interp_lut(float input, const float* table, int tableSize) {
     if (input <= table[0]) {
         return table[0];
     } else if (input >= table[tableSize - 1]) {
diff --git a/src/core/SkColorSpaceXform_A2B.cpp b/src/core/SkColorSpaceXform_A2B.cpp
index e54cb27..ead48f3 100644
--- a/src/core/SkColorSpaceXform_A2B.cpp
+++ b/src/core/SkColorSpaceXform_A2B.cpp
@@ -21,52 +21,6 @@
 
 #define AI SK_ALWAYS_INLINE
 
-namespace {
-
-class ApplyParametric {
-public:
-    ApplyParametric(const SkColorSpaceTransferFn& fn)
-        : fFn(fn)
-    {}
-
-    float operator()(float x) const {
-        float y;
-        if (x >= fFn.fD) {
-            y = ::powf(fFn.fA * x + fFn.fB, fFn.fG) + fFn.fC;
-        } else {
-            y = fFn.fE * x + fFn.fF;
-        }
-        if (y >= 1.f) {
-            return 1.f;
-        } else if (y >= 0.f) {
-            return y;
-        }
-        return 0.f;
-    }
-
-private:
-    SkColorSpaceTransferFn fFn;
-};
-
-class ApplyTable {
-public:
-    ApplyTable(const float* table, int size)
-        : fTable(table)
-        , fSize(size)
-    {}
-
-    float operator()(float x) const {
-        return interp_lut(x, fTable, fSize);
-    }
-
-private:
-    const float* fTable;
-    int          fSize;
-};
-
-}
-
-///////////////////////////////////////////////////////////////////////////////////////////////////
 bool SkColorSpaceXform_A2B::onApply(ColorFormat dstFormat, void* dst, ColorFormat srcFormat,
                                     const void* src, int count, SkAlphaType alphaType) const {
     SkRasterPipeline pipeline;
@@ -226,34 +180,43 @@
                 if (kLinear_SkGammaNamed != e.gammaNamed()) {
                     SkCSXformPrintf("Gamma stage added: %s\n",
                                     debugGammaNamed[(int)e.gammaNamed()]);
-                    addGamma(ApplyParametric(gammanamed_to_parametric(e.gammaNamed())),
-                             kRGB_Channels);
+                    SkColorSpaceTransferFn fn = gammanamed_to_parametric(e.gammaNamed());
+                    this->addTransferFn(fn, kRGB_Channels);
+
+                    fElementsPipeline.append(SkRasterPipeline::clamp_0);
+                    fElementsPipeline.append(SkRasterPipeline::clamp_1);
                 }
                 break;
             case SkColorSpace_A2B::Element::Type::kGammas: {
-                    const SkGammas& gammas = e.gammas();
-                    SkCSXformPrintf("Gamma stage added:");
-                    for (int channel = 0; channel < 3; ++channel) {
-                        SkCSXformPrintf("  %s", debugGammas[(int)gammas.type(channel)]);
-                    }
-                    SkCSXformPrintf("\n");
-                    bool gammaNeedsRef = false;
-                    for (int channel = 0; channel < 3; ++channel) {
-                        if (SkGammas::Type::kTable_Type == gammas.type(channel)) {
-                            addGamma(ApplyTable(gammas.table(channel),
-                                                gammas.data(channel).fTable.fSize),
-                                                static_cast<Channels>(channel));
-                            gammaNeedsRef = true;
-                        } else {
-                            addGamma(ApplyParametric(gamma_to_parametric(gammas, channel)),
-                                     static_cast<Channels>(channel));
-                        }
-                    }
-                    if (gammaNeedsRef) {
-                        fGammaRefs.push_back(sk_ref_sp(&gammas));
+                const SkGammas& gammas = e.gammas();
+                SkCSXformPrintf("Gamma stage added:");
+                for (int channel = 0; channel < 3; ++channel) {
+                    SkCSXformPrintf("  %s", debugGammas[(int)gammas.type(channel)]);
+                }
+                SkCSXformPrintf("\n");
+                bool gammaNeedsRef = false;
+                for (int channel = 0; channel < 3; ++channel) {
+                    if (SkGammas::Type::kTable_Type == gammas.type(channel)) {
+                        SkTableTransferFn table = {
+                                gammas.table(channel),
+                                gammas.data(channel).fTable.fSize,
+                        };
+
+                        this->addTableFn(table, static_cast<Channels>(channel));
+                        gammaNeedsRef = true;
+                    } else {
+                        SkColorSpaceTransferFn fn = gamma_to_parametric(gammas, channel);
+                        this->addTransferFn(fn, static_cast<Channels>(channel));
                     }
                 }
+                if (gammaNeedsRef) {
+                    fGammaRefs.push_back(sk_ref_sp(&gammas));
+                }
+
+                fElementsPipeline.append(SkRasterPipeline::clamp_0);
+                fElementsPipeline.append(SkRasterPipeline::clamp_1);
                 break;
+            }
             case SkColorSpace_A2B::Element::Type::kCLUT:
                 SkCSXformPrintf("CLUT stage added [%d][%d][%d]\n", e.colorLUT().fGridPoints[0],
                                 e.colorLUT().fGridPoints[1], e.colorLUT().fGridPoints[2]);
@@ -283,42 +246,74 @@
 
     if (kNonStandard_SkGammaNamed != dstSpace->gammaNamed()) {
         if (!fLinearDstGamma) {
-            addGamma(ApplyParametric(
-                            invert_parametric(gammanamed_to_parametric(dstSpace->gammaNamed()))),
-                     kRGB_Channels);
+            SkColorSpaceTransferFn fn =
+                    invert_parametric(gammanamed_to_parametric(dstSpace->gammaNamed()));
+            this->addTransferFn(fn, kRGB_Channels);
+            fElementsPipeline.append(SkRasterPipeline::clamp_0);
+            fElementsPipeline.append(SkRasterPipeline::clamp_1);
         }
     } else {
         for (int channel = 0; channel < 3; ++channel) {
             const SkGammas& gammas = *dstSpace->gammas();
             if (SkGammas::Type::kTable_Type == gammas.type(channel)) {
-                fGammaTables.push_front(build_inverse_table(gammas.table(channel),
-                                                            gammas.data(channel).fTable.fSize));
-                addGamma(ApplyTable(fGammaTables.front().data(), fGammaTables.front().size()),
-                         static_cast<Channels>(channel));
+                std::vector<float> storage = build_inverse_table(gammas.table(channel),
+                                                                 gammas.data(channel).fTable.fSize);
+                SkTableTransferFn table = {
+                        storage.data(),
+                        (int) storage.size(),
+                };
+                fTableStorage.push_front(std::move(storage));
+
+                this->addTableFn(table, static_cast<Channels>(channel));
             } else {
-                addGamma(ApplyParametric(invert_parametric(gamma_to_parametric(gammas, channel))),
-                         static_cast<Channels>(channel));
+                SkColorSpaceTransferFn fn = invert_parametric(gamma_to_parametric(gammas, channel));
+                this->addTransferFn(fn, static_cast<Channels>(channel));
             }
         }
+
+        fElementsPipeline.append(SkRasterPipeline::clamp_0);
+        fElementsPipeline.append(SkRasterPipeline::clamp_1);
     }
 }
 
-void SkColorSpaceXform_A2B::addGamma(std::function<float(float)> fn, Channels channels) {
-    fGammaFunctions.push_front(std::move(fn));
+void SkColorSpaceXform_A2B::addTransferFn(const SkColorSpaceTransferFn& fn, Channels channels) {
+    fTransferFns.push_front(fn);
     switch (channels) {
         case kRGB_Channels:
-            fElementsPipeline.append(SkRasterPipeline::fn_1_r, &fGammaFunctions.front());
-            fElementsPipeline.append(SkRasterPipeline::fn_1_g, &fGammaFunctions.front());
-            fElementsPipeline.append(SkRasterPipeline::fn_1_b, &fGammaFunctions.front());
+            fElementsPipeline.append(SkRasterPipeline::parametric_r, &fTransferFns.front());
+            fElementsPipeline.append(SkRasterPipeline::parametric_g, &fTransferFns.front());
+            fElementsPipeline.append(SkRasterPipeline::parametric_b, &fTransferFns.front());
             break;
         case kR_Channels:
-            fElementsPipeline.append(SkRasterPipeline::fn_1_r, &fGammaFunctions.front());
+            fElementsPipeline.append(SkRasterPipeline::parametric_r, &fTransferFns.front());
             break;
         case kG_Channels:
-            fElementsPipeline.append(SkRasterPipeline::fn_1_g, &fGammaFunctions.front());
+            fElementsPipeline.append(SkRasterPipeline::parametric_g, &fTransferFns.front());
             break;
         case kB_Channels:
-            fElementsPipeline.append(SkRasterPipeline::fn_1_b, &fGammaFunctions.front());
+            fElementsPipeline.append(SkRasterPipeline::parametric_b, &fTransferFns.front());
+            break;
+        default:
+            SkASSERT(false);
+    }
+}
+
+void SkColorSpaceXform_A2B::addTableFn(const SkTableTransferFn& fn, Channels channels) {
+    fTableTransferFns.push_front(fn);
+    switch (channels) {
+        case kRGB_Channels:
+            fElementsPipeline.append(SkRasterPipeline::table_r, &fTableTransferFns.front());
+            fElementsPipeline.append(SkRasterPipeline::table_g, &fTableTransferFns.front());
+            fElementsPipeline.append(SkRasterPipeline::table_b, &fTableTransferFns.front());
+            break;
+        case kR_Channels:
+            fElementsPipeline.append(SkRasterPipeline::table_r, &fTableTransferFns.front());
+            break;
+        case kG_Channels:
+            fElementsPipeline.append(SkRasterPipeline::table_g, &fTableTransferFns.front());
+            break;
+        case kB_Channels:
+            fElementsPipeline.append(SkRasterPipeline::table_b, &fTableTransferFns.front());
             break;
         default:
             SkASSERT(false);
@@ -346,7 +341,5 @@
     SkASSERT(matrix.get(3, 3) == 1.f);
     fElementsPipeline.append(SkRasterPipeline::matrix_3x4, m.data());
     fElementsPipeline.append(SkRasterPipeline::clamp_0);
-    fElementsPipeline.append(SkRasterPipeline::clamp_a);
+    fElementsPipeline.append(SkRasterPipeline::clamp_1);
 }
-
-
diff --git a/src/core/SkColorSpaceXform_A2B.h b/src/core/SkColorSpaceXform_A2B.h
index 681261a..6beda28 100644
--- a/src/core/SkColorSpaceXform_A2B.h
+++ b/src/core/SkColorSpaceXform_A2B.h
@@ -19,6 +19,10 @@
 class SkColorSpace_A2B;
 class SkColorSpace_XYZ;
 
+struct SkTableTransferFn {
+    const float* fData;
+    int          fSize;
+};
 
 class SkColorSpaceXform_A2B : public SkColorSpaceXform_Base {
 public:
@@ -34,19 +38,26 @@
         kG_Channels   =  1,
         kB_Channels   =  2
     };
-    void addGamma(std::function<float(float)> fn, Channels channels);
+
+
+
+    void addTransferFn(const SkColorSpaceTransferFn& fn, Channels channels);
+    void addTableFn(const SkTableTransferFn& table, Channels channels);
 
     void addMatrix(const SkMatrix44& matrix);
 
-    SkRasterPipeline                               fElementsPipeline;
-    bool                                           fLinearDstGamma;
+    SkRasterPipeline                             fElementsPipeline;
+    bool                                         fLinearDstGamma;
+
     // storage used by the pipeline
-    std::forward_list<std::function<float(float)>> fGammaFunctions;
-    std::forward_list<std::vector<float>>          fMatrices;
-    std::forward_list<std::vector<float>>          fGammaTables;
-    std::vector<sk_sp<const SkColorLookUpTable>>   fCLUTs;
+    std::forward_list<SkColorSpaceTransferFn>    fTransferFns;
+    std::forward_list<SkTableTransferFn>         fTableTransferFns;
+    std::forward_list<std::vector<float>>        fMatrices;
+    std::vector<sk_sp<const SkColorLookUpTable>> fCLUTs;
+
     // these are here to maintain ownership of tables used in the pipeline
-    std::vector<sk_sp<const SkGammas>>             fGammaRefs;
+    std::forward_list<std::vector<float>>        fTableStorage;
+    std::vector<sk_sp<const SkGammas>>           fGammaRefs;
 
     friend class SkColorSpaceXform;
 };
diff --git a/src/core/SkRasterPipeline.h b/src/core/SkRasterPipeline.h
index 484f283..b361ab6 100644
--- a/src/core/SkRasterPipeline.h
+++ b/src/core/SkRasterPipeline.h
@@ -57,7 +57,7 @@
 #define SK_RASTER_PIPELINE_STAGES(M)                             \
     M(trace) M(registers)                                        \
     M(move_src_dst) M(swap_src_dst)                              \
-    M(clamp_0) M(clamp_a) M(unpremul) M(premul)                  \
+    M(clamp_0) M(clamp_a) M(clamp_1) M(unpremul) M(premul)       \
     M(constant_color) M(store_f32)                               \
     M(load_s_565)  M(load_d_565)  M(store_565)                   \
     M(load_s_srgb) M(load_d_srgb) M(store_srgb)                  \
@@ -72,7 +72,6 @@
     M(colorburn) M(colordodge) M(darken) M(difference)           \
     M(exclusion) M(hardlight) M(lighten) M(overlay) M(softlight) \
     M(luminance_to_alpha) M(matrix_3x4) M(matrix_4x5)            \
-    M(fn_1_r) M(fn_1_g) M(fn_1_b)                                \
     M(parametric_r) M(parametric_g) M(parametric_b)              \
     M(table_r) M(table_g) M(table_b)                             \
     M(color_lookup_table) M(lab_to_xyz) M(swap_rb)
diff --git a/src/opts/SkRasterPipeline_opts.h b/src/opts/SkRasterPipeline_opts.h
index 087a630..f1aa250 100644
--- a/src/opts/SkRasterPipeline_opts.h
+++ b/src/opts/SkRasterPipeline_opts.h
@@ -10,6 +10,8 @@
 
 #include "SkColorPriv.h"
 #include "SkColorLookUpTable.h"
+#include "SkColorSpaceXform_A2B.h"
+#include "SkColorSpaceXformPriv.h"
 #include "SkHalf.h"
 #include "SkPM4f.h"
 #include "SkPM4fPriv.h"
@@ -232,6 +234,13 @@
     b = SkNf::Min(b, a);
 }
 
+STAGE(clamp_1, true) {
+    a = SkNf::Min(a, 1.0f);
+    r = SkNf::Min(r, 1.0f);
+    g = SkNf::Min(g, 1.0f);
+    b = SkNf::Min(b, 1.0f);
+}
+
 STAGE(unpremul, true) {
     r *= a.invert();
     g *= a.invert();
@@ -585,51 +594,22 @@
     b = parametric(b, *(const SkColorSpaceTransferFn*)ctx);
 }
 
-SI SkNf table(const SkNf& v, const float t[1024]) {
-    SkNi ix = SkNx_cast<int>(SkNf::Max(0, SkNf::Min(v, 1)) * 1023 + 0.5);
-
-    float result[N];   // TODO: vgatherdps?
+SI SkNf table(const SkNf& v, const SkTableTransferFn& table) {
+    float result[N];
     for (int i = 0; i < N; i++) {
-        result[i] = t[ix[i]];
+        result[i] = interp_lut(v[i], table.fData, table.fSize);
     }
     return SkNf::Load(result);
 }
 
 STAGE(table_r, true) {
-    r = table(r, (const float*)ctx);
+    r = table(r, *(const SkTableTransferFn*)ctx);
 }
 STAGE(table_g, true) {
-    g = table(g, (const float*)ctx);
+    g = table(g, *(const SkTableTransferFn*)ctx);
 }
 STAGE(table_b, true) {
-    b = table(b, (const float*)ctx);
-}
-
-STAGE(fn_1_r, true) {
-    auto fn = (const std::function<float(float)>*)ctx;
-    float result[N];
-    for (int i = 0; i < N; ++i) {
-        result[i] = (*fn)(r[i]);
-    }
-    r = SkNf::Load(result);
-}
-
-STAGE(fn_1_g, true) {
-    auto fn = (const std::function<float(float)>*)ctx;
-    float result[N];
-    for (int i = 0; i < N; ++i) {
-        result[i] = (*fn)(g[i]);
-    }
-    g = SkNf::Load(result);
-}
-
-STAGE(fn_1_b, true) {
-    auto fn = (const std::function<float(float)>*)ctx;
-    float result[N];
-    for (int i = 0; i < N; ++i) {
-        result[i] = (*fn)(b[i]);
-    }
-    b = SkNf::Load(result);
+    b = table(b, *(const SkTableTransferFn*)ctx);
 }
 
 STAGE(color_lookup_table, true) {