Make SkSmallAllocator obey the RAII invariants and move to heap structures when needed.

The biggest change is to the API which allowed code to bypass the
destruction invariants. This destruction bypass feature was needed in
only one use, and is totally encapsulated using createWithIniterT.

BUG=skia:
GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2488523003

Committed: https://skia.googlesource.com/skia/+/d5dc657b8c3ac916f98005dafdedafe02f023449
Committed: https://skia.googlesource.com/skia/+/c18b5f8f57a4efc5d5d1e399ed8bd3bd02c592ab
Review-Url: https://codereview.chromium.org/2488523003
diff --git a/src/core/SkBlitter.cpp b/src/core/SkBlitter.cpp
index b814325..35a213b 100644
--- a/src/core/SkBlitter.cpp
+++ b/src/core/SkBlitter.cpp
@@ -896,14 +896,15 @@
         size_t contextSize = shader->contextSize(rec);
         if (contextSize) {
             // Try to create the ShaderContext
-            void* storage = allocator->reserveT<SkShader::Context>(contextSize);
-            shaderContext = shader->createContext(rec, storage);
+            shaderContext = allocator->createWithIniter(
+                contextSize,
+                [&rec, shader](void* storage) {
+                    return shader->createContext(rec, storage);
+                });
             if (!shaderContext) {
-                allocator->freeLast();
                 return allocator->createT<SkNullBlitter>();
             }
             SkASSERT(shaderContext);
-            SkASSERT((void*) shaderContext == storage);
         } else {
             return allocator->createT<SkNullBlitter>();
         }
diff --git a/src/core/SkCanvas.cpp b/src/core/SkCanvas.cpp
index a8d73a9..eddf106 100644
--- a/src/core/SkCanvas.cpp
+++ b/src/core/SkCanvas.cpp
@@ -496,9 +496,11 @@
         }
 
         if (SkDrawLooper* looper = paint.getLooper()) {
-            void* buffer = fLooperContextAllocator.reserveT<SkDrawLooper::Context>(
-                    looper->contextSize());
-            fLooperContext = looper->createContext(canvas, buffer);
+            fLooperContext = fLooperContextAllocator.createWithIniter(
+                looper->contextSize(),
+                [&](void* buffer) {
+                    return looper->createContext(canvas, buffer);
+                });
             fIsSimple = false;
         } else {
             fLooperContext = nullptr;
diff --git a/src/core/SkDrawLooper.cpp b/src/core/SkDrawLooper.cpp
index aa53f2e..e372e8f 100644
--- a/src/core/SkDrawLooper.cpp
+++ b/src/core/SkDrawLooper.cpp
@@ -15,9 +15,12 @@
 bool SkDrawLooper::canComputeFastBounds(const SkPaint& paint) const {
     SkCanvas canvas;
     SkSmallAllocator<1, 32> allocator;
-    void* buffer = allocator.reserveT<SkDrawLooper::Context>(this->contextSize());
 
-    SkDrawLooper::Context* context = this->createContext(&canvas, buffer);
+    SkDrawLooper::Context* context = allocator.createWithIniter(
+        this->contextSize(),
+        [&](void* buffer) {
+            return this->createContext(&canvas, buffer);
+        });
     for (;;) {
         SkPaint p(paint);
         if (context->next(&canvas, &p)) {
@@ -39,10 +42,13 @@
 
     SkCanvas canvas;
     SkSmallAllocator<1, 32> allocator;
-    void* buffer = allocator.reserveT<SkDrawLooper::Context>(this->contextSize());
 
     *dst = src;   // catch case where there are no loops
-    SkDrawLooper::Context* context = this->createContext(&canvas, buffer);
+    SkDrawLooper::Context* context = allocator.createWithIniter(
+        this->contextSize(),
+        [&](void* buffer) {
+            return this->createContext(&canvas, buffer);
+        });
     for (bool firstTime = true;; firstTime = false) {
         SkPaint p(paint);
         if (context->next(&canvas, &p)) {
diff --git a/src/core/SkRasterPipelineBlitter.cpp b/src/core/SkRasterPipelineBlitter.cpp
index 2cd84f0..5a7ee67 100644
--- a/src/core/SkRasterPipelineBlitter.cpp
+++ b/src/core/SkRasterPipelineBlitter.cpp
@@ -93,8 +93,7 @@
             SkPM4f_from_SkColor(paint.getColor(), dst.colorSpace()));
 
     auto earlyOut = [&] {
-        blitter->~SkRasterPipelineBlitter();
-        alloc->freeLast();
+        alloc->deleteLast();
         return nullptr;
     };
 
diff --git a/src/core/SkSmallAllocator.h b/src/core/SkSmallAllocator.h
index 13b1505..efe0bc7 100644
--- a/src/core/SkSmallAllocator.h
+++ b/src/core/SkSmallAllocator.h
@@ -8,132 +8,145 @@
 #ifndef SkSmallAllocator_DEFINED
 #define SkSmallAllocator_DEFINED
 
-#include "SkTDArray.h"
+#include "SkTArray.h"
 #include "SkTypes.h"
 
-#include <new>
+#include <functional>
+#include <type_traits>
 #include <utility>
 
+
+// max_align_t is needed to calculate the alignment for createWithIniterT when the T used is an
+// abstract type. The complication with max_align_t is that it is defined differently for
+// different builds.
+namespace {
+#if defined(SK_BUILD_FOR_WIN32) || defined(SK_BUILD_FOR_MAC)
+    // Use std::max_align_t for compiles that follow the standard.
+    #include <cstddef>
+    using SystemAlignment = std::max_align_t;
+#else
+    // Ubuntu compiles don't have std::max_align_t defined, but MSVC does not define max_align_t.
+    #include <stddef.h>
+    using SystemAlignment = max_align_t;
+#endif
+}
+
 /*
  *  Template class for allocating small objects without additional heap memory
- *  allocations. kMaxObjects is a hard limit on the number of objects that can
- *  be allocated using this class. After that, attempts to create more objects
- *  with this class will assert and return nullptr.
+ *  allocations.
  *
  *  kTotalBytes is the total number of bytes provided for storage for all
  *  objects created by this allocator. If an object to be created is larger
  *  than the storage (minus storage already used), it will be allocated on the
  *  heap. This class's destructor will handle calling the destructor for each
  *  object it allocated and freeing its memory.
- *
- *  Current the class always aligns each allocation to 16-bytes to be safe, but future
- *  may reduce this to only the alignment that is required per alloc.
  */
-template<uint32_t kMaxObjects, size_t kTotalBytes>
+template<uint32_t kExpectedObjects, size_t kTotalBytes>
 class SkSmallAllocator : SkNoncopyable {
 public:
-    SkSmallAllocator()
-    : fStorageUsed(0)
-    , fNumObjects(0)
-    {}
-
     ~SkSmallAllocator() {
         // Destruct in reverse order, in case an earlier object points to a
         // later object.
-        while (fNumObjects > 0) {
-            fNumObjects--;
-            Rec* rec = &fRecs[fNumObjects];
-            rec->fKillProc(rec->fObj);
-            // Safe to do if fObj is in fStorage, since fHeapStorage will
-            // point to nullptr.
-            sk_free(rec->fHeapStorage);
+        while (fRecs.count() > 0) {
+            this->deleteLast();
         }
     }
 
     /*
      *  Create a new object of type T. Its lifetime will be handled by this
      *  SkSmallAllocator.
-     *  Note: If kMaxObjects have been created by this SkSmallAllocator, nullptr
-     *  will be returned.
      */
     template<typename T, typename... Args>
     T* createT(Args&&... args) {
-        void* buf = this->reserveT<T>();
-        if (nullptr == buf) {
-            return nullptr;
-        }
+        void* buf = this->reserve(sizeof(T), DefaultDestructor<T>);
         return new (buf) T(std::forward<Args>(args)...);
     }
 
     /*
-     *  Reserve a specified amount of space (must be enough space for one T).
-     *  The space will be in fStorage if there is room, or on the heap otherwise.
-     *  Either way, this class will call ~T() in its destructor and free the heap
-     *  allocation if necessary.
-     *  Unlike createT(), this method will not call the constructor of T.
+     * Create a new object of size using initer to initialize the memory. The initer function has
+     * the signature T* initer(void* storage). If initer is unable to initialize the memory it
+     * should return nullptr where SkSmallAllocator will free the memory.
      */
-    template<typename T> void* reserveT(size_t storageRequired = sizeof(T)) {
-        SkASSERT(fNumObjects < kMaxObjects);
-        SkASSERT(storageRequired >= sizeof(T));
-        if (kMaxObjects == fNumObjects) {
-            return nullptr;
-        }
-        const size_t storageRemaining = sizeof(fStorage) - fStorageUsed;
-        Rec* rec = &fRecs[fNumObjects];
-        if (storageRequired > storageRemaining) {
-            // Allocate on the heap. Ideally we want to avoid this situation.
+    template <typename Initer>
+    auto createWithIniter(size_t size, Initer initer) -> decltype(initer(nullptr)) {
+        using ObjType = typename std::remove_pointer<decltype(initer(nullptr))>::type;
+        SkASSERT(size >= sizeof(ObjType));
 
-            // With the gm composeshader_bitmap2, storage required is 4476
-            // and storage remaining is 3392. Increasing the base storage
-            // causes google 3 tests to fail.
-
-            rec->fStorageSize = 0;
-            rec->fHeapStorage = sk_malloc_throw(storageRequired);
-            rec->fObj = static_cast<void*>(rec->fHeapStorage);
-        } else {
-            // There is space in fStorage.
-            rec->fStorageSize = storageRequired;
-            rec->fHeapStorage = nullptr;
-            rec->fObj = static_cast<void*>(fStorage + fStorageUsed);
-            fStorageUsed += storageRequired;
+        void* storage = this->reserve(size, DefaultDestructor<ObjType>);
+        auto candidate = initer(storage);
+        if (!candidate) {
+            // Initializing didn't workout so free the memory.
+            this->freeLast();
         }
-        rec->fKillProc = DestroyT<T>;
-        fNumObjects++;
-        return rec->fObj;
+
+        return candidate;
     }
 
     /*
-     *  Free the memory reserved last without calling the destructor.
-     *  Can be used in a nested way, i.e. after reserving A and B, calling
-     *  freeLast once will free B and calling it again will free A.
+     * Free the last object allocated and call its destructor. This can be called multiple times
+     * removing objects from the pool in reverse order.
      */
-    void freeLast() {
-        SkASSERT(fNumObjects > 0);
-        Rec* rec = &fRecs[fNumObjects - 1];
-        sk_free(rec->fHeapStorage);
-        fStorageUsed -= rec->fStorageSize;
-
-        fNumObjects--;
+    void deleteLast() {
+        SkASSERT(fRecs.count() > 0);
+        Rec& rec = fRecs.back();
+        rec.fDestructor(rec.fObj);
+        this->freeLast();
     }
 
 private:
+    using Destructor = void(*)(void*);
     struct Rec {
-        size_t fStorageSize;  // 0 if allocated on heap
-        void*  fObj;
-        void*  fHeapStorage;
-        void   (*fKillProc)(void*);
+        char*      fObj;
+        Destructor fDestructor;
     };
 
     // Used to call the destructor for allocated objects.
     template<typename T>
-    static void DestroyT(void* ptr) {
+    static void DefaultDestructor(void* ptr) {
         static_cast<T*>(ptr)->~T();
     }
 
-    alignas(16) char fStorage[kTotalBytes];
-    size_t           fStorageUsed;  // Number of bytes used so far.
-    uint32_t         fNumObjects;
-    Rec              fRecs[kMaxObjects];
+    static constexpr size_t kAlignment = alignof(SystemAlignment);
+
+    static constexpr size_t AlignSize(size_t size) {
+        return (size + kAlignment - 1) & ~(kAlignment - 1);
+    }
+
+    // Reserve storageRequired from fStorage if possible otherwise allocate on the heap.
+    void* reserve(size_t storageRequired, Destructor destructor) {
+        // Make sure that all allocations stay aligned by rounding the storageRequired up to the
+        // aligned value.
+        char* objectStart = fStorageEnd;
+        char* objectEnd = objectStart + AlignSize(storageRequired);
+        Rec& rec = fRecs.push_back();
+        if (objectEnd > &fStorage[kTotalBytes]) {
+            // Allocate on the heap. Ideally we want to avoid this situation.
+            rec.fObj = new char [storageRequired];
+        } else {
+            // There is space in fStorage.
+            rec.fObj = objectStart;
+            fStorageEnd = objectEnd;
+        }
+        rec.fDestructor = destructor;
+        return rec.fObj;
+    }
+
+    void freeLast() {
+        Rec& rec = fRecs.back();
+        if (std::less<char*>()(rec.fObj, fStorage)
+            || !std::less<char*>()(rec.fObj, &fStorage[kTotalBytes])) {
+            delete [] rec.fObj;
+        } else {
+            fStorageEnd = rec.fObj;
+        }
+        fRecs.pop_back();
+    }
+
+    SkSTArray<kExpectedObjects, Rec, true> fRecs;
+    char*                                  fStorageEnd {fStorage};
+    // Since char have an alignment of 1, it should be forced onto an alignment the compiler
+    // expects which is the alignment of std::max_align_t.
+    alignas (kAlignment) char              fStorage[kTotalBytes];
 };
 
 #endif // SkSmallAllocator_DEFINED
diff --git a/tests/LayerDrawLooperTest.cpp b/tests/LayerDrawLooperTest.cpp
index 4897fd2..7b3aa29 100644
--- a/tests/LayerDrawLooperTest.cpp
+++ b/tests/LayerDrawLooperTest.cpp
@@ -60,8 +60,11 @@
     SkPaint paint;
     auto looper(looperBuilder.detach());
     SkSmallAllocator<1, 32> allocator;
-    void* buffer = allocator.reserveT<SkDrawLooper::Context>(looper->contextSize());
-    SkDrawLooper::Context* context = looper->createContext(&canvas, buffer);
+    SkDrawLooper::Context* context = allocator.createWithIniter(
+        looper->contextSize(),
+        [&](void* buffer) {
+            return looper->createContext(&canvas, buffer);
+        });
 
     // The back layer should come first.
     REPORTER_ASSERT(reporter, context->next(&canvas, &paint));
@@ -100,8 +103,11 @@
     SkPaint paint;
     auto looper(looperBuilder.detach());
     SkSmallAllocator<1, 32> allocator;
-    void* buffer = allocator.reserveT<SkDrawLooper::Context>(looper->contextSize());
-    SkDrawLooper::Context* context = looper->createContext(&canvas, buffer);
+    SkDrawLooper::Context* context = allocator.createWithIniter(
+        looper->contextSize(),
+        [&](void* buffer) {
+            return looper->createContext(&canvas, buffer);
+        });
 
     // The back layer should come first.
     REPORTER_ASSERT(reporter, context->next(&canvas, &paint));
@@ -140,8 +146,11 @@
     SkPaint paint;
     sk_sp<SkDrawLooper> looper(looperBuilder.detach());
     SkSmallAllocator<1, 32> allocator;
-    void* buffer = allocator.reserveT<SkDrawLooper::Context>(looper->contextSize());
-    SkDrawLooper::Context* context = looper->createContext(&canvas, buffer);
+    SkDrawLooper::Context* context = allocator.createWithIniter(
+        looper->contextSize(),
+        [&](void* buffer) {
+            return looper->createContext(&canvas, buffer);
+        });
 
     // The back layer should come first.
     REPORTER_ASSERT(reporter, context->next(&canvas, &paint));
diff --git a/tests/SmallAllocatorTest.cpp b/tests/SmallAllocatorTest.cpp
index 774e0c9..a5f45b1 100644
--- a/tests/SmallAllocatorTest.cpp
+++ b/tests/SmallAllocatorTest.cpp
@@ -30,7 +30,7 @@
 template<uint32_t kMaxObjects, size_t kBytes> void test_allocator(skiatest::Reporter* reporter) {
     {
         SkSmallAllocator<kMaxObjects, kBytes> alloc;
-        for (uint32_t i = 0; i < kMaxObjects; ++i) {
+        for (uint32_t i = 0; i < kMaxObjects + 1; ++i) {
             CountingClass* c = alloc.template createT<CountingClass>();
             REPORTER_ASSERT(reporter, c != nullptr);
             REPORTER_ASSERT(reporter, CountingClass::GetCount() == static_cast<int>(i+1));
@@ -43,18 +43,15 @@
 // were created in fStorage or on the heap.
 DEF_TEST(SmallAllocator_destructor, reporter) {
     // Four times as many bytes as objects will never require any heap
-    // allocations (since SkAlign4(sizeof(CountingClass)) == 4 and the allocator
-    // will stop once it reaches kMaxObjects).
+    // allocations (since SkAlign4(sizeof(CountingClass)) == 4).
     test_allocator<5, 20>(reporter);
     test_allocator<10, 40>(reporter);
     test_allocator<20, 80>(reporter);
 
-#ifndef SK_DEBUG
     // Allowing less bytes than objects means some will be allocated on the
     // heap. Don't run these in debug where we assert.
     test_allocator<50, 20>(reporter);
     test_allocator<100, 20>(reporter);
-#endif
 }
 
 class Dummy {
@@ -81,3 +78,16 @@
     REPORTER_ASSERT(reporter, container != nullptr);
     REPORTER_ASSERT(reporter, container->getDummy() == &d);
 }
+
+// Test that using a createWithIniterT works as expected.
+DEF_TEST(SmallAllocator_initer, reporter) {
+    SkSmallAllocator<1, 8> alloc;
+    Dummy d;
+    DummyContainer* container = alloc.createWithIniter(
+        sizeof(DummyContainer),
+        [&](void* storage) {
+            return new (storage) DummyContainer(&d);
+        });
+    REPORTER_ASSERT(reporter, container != nullptr);
+    REPORTER_ASSERT(reporter, container->getDummy() == &d);
+}