Merge pull request #514 from smola/use-cache-delta

cache: reuse object cache for delta resolution, use LRU policy
diff --git a/plumbing/cache/common.go b/plumbing/cache/common.go
index 7b90c55..9efc26c 100644
--- a/plumbing/cache/common.go
+++ b/plumbing/cache/common.go
@@ -11,8 +11,14 @@
 
 type FileSize int64
 
+// Object is an interface to a object cache.
 type Object interface {
-	Add(o plumbing.EncodedObject)
-	Get(k plumbing.Hash) plumbing.EncodedObject
+	// Put puts the given object into the cache. Whether this object will
+	// actually be put into the cache or not is implementation specific.
+	Put(o plumbing.EncodedObject)
+	// Get gets an object from the cache given its hash. The second return value
+	// is true if the object was returned, and false otherwise.
+	Get(k plumbing.Hash) (plumbing.EncodedObject, bool)
+	// Clear clears every object from the cache.
 	Clear()
 }
diff --git a/plumbing/cache/object.go b/plumbing/cache/object.go
deleted file mode 100644
index 44238ce..0000000
--- a/plumbing/cache/object.go
+++ /dev/null
@@ -1,68 +0,0 @@
-package cache
-
-import "gopkg.in/src-d/go-git.v4/plumbing"
-
-const (
-	initialQueueSize = 20
-	MaxSize          = 10 * MiByte
-)
-
-type ObjectFIFO struct {
-	objects map[plumbing.Hash]plumbing.EncodedObject
-	order   *queue
-
-	maxSize    FileSize
-	actualSize FileSize
-}
-
-// NewObjectFIFO returns an Object cache that keeps the newest objects that fit
-// into the specific memory size
-func NewObjectFIFO(size FileSize) *ObjectFIFO {
-	return &ObjectFIFO{
-		objects: make(map[plumbing.Hash]plumbing.EncodedObject),
-		order:   newQueue(initialQueueSize),
-		maxSize: size,
-	}
-}
-
-// Add adds a new object to the cache. If the object size is greater than the
-// cache size, the object is not added.
-func (c *ObjectFIFO) Add(o plumbing.EncodedObject) {
-	// if the size of the object is bigger or equal than the cache size,
-	// skip it
-	if FileSize(o.Size()) >= c.maxSize {
-		return
-	}
-
-	// if the object is into the cache, do not add it again
-	if _, ok := c.objects[o.Hash()]; ok {
-		return
-	}
-
-	// delete the oldest object if cache is full
-	if c.actualSize >= c.maxSize {
-		h := c.order.Pop()
-		o := c.objects[h]
-		if o != nil {
-			c.actualSize -= FileSize(o.Size())
-			delete(c.objects, h)
-		}
-	}
-
-	c.objects[o.Hash()] = o
-	c.order.Push(o.Hash())
-	c.actualSize += FileSize(o.Size())
-}
-
-// Get returns an object by his hash. If the object is not found in the cache, it
-// returns nil
-func (c *ObjectFIFO) Get(k plumbing.Hash) plumbing.EncodedObject {
-	return c.objects[k]
-}
-
-// Clear the content of this object cache
-func (c *ObjectFIFO) Clear() {
-	c.objects = make(map[plumbing.Hash]plumbing.EncodedObject)
-	c.order = newQueue(initialQueueSize)
-	c.actualSize = 0
-}
diff --git a/plumbing/cache/object_lru.go b/plumbing/cache/object_lru.go
new file mode 100644
index 0000000..e4c3160
--- /dev/null
+++ b/plumbing/cache/object_lru.go
@@ -0,0 +1,84 @@
+package cache
+
+import (
+	"container/list"
+
+	"gopkg.in/src-d/go-git.v4/plumbing"
+)
+
+// ObjectLRU implements an object cache with an LRU eviction policy and a
+// maximum size (measured in object size).
+type ObjectLRU struct {
+	MaxSize FileSize
+
+	actualSize FileSize
+	ll         *list.List
+	cache      map[interface{}]*list.Element
+}
+
+// NewObjectLRU creates a new ObjectLRU with the given maximum size. The maximum
+// size will never be exceeded.
+func NewObjectLRU(maxSize FileSize) *ObjectLRU {
+	return &ObjectLRU{MaxSize: maxSize}
+}
+
+// Put puts an object into the cache. If the object is already in the cache, it
+// will be marked as used. Otherwise, it will be inserted. A single object might
+// be evicted to make room for the new object.
+func (c *ObjectLRU) Put(obj plumbing.EncodedObject) {
+	if c.cache == nil {
+		c.actualSize = 0
+		c.cache = make(map[interface{}]*list.Element, 1000)
+		c.ll = list.New()
+	}
+
+	key := obj.Hash()
+	if ee, ok := c.cache[key]; ok {
+		c.ll.MoveToFront(ee)
+		ee.Value = obj
+		return
+	}
+
+	objSize := FileSize(obj.Size())
+
+	if objSize >= c.MaxSize {
+		return
+	}
+
+	if c.actualSize+objSize > c.MaxSize {
+		last := c.ll.Back()
+		lastObj := last.Value.(plumbing.EncodedObject)
+		lastSize := FileSize(lastObj.Size())
+
+		c.ll.Remove(last)
+		delete(c.cache, lastObj.Hash())
+		c.actualSize -= lastSize
+
+		if c.actualSize+objSize > c.MaxSize {
+			return
+		}
+	}
+
+	ee := c.ll.PushFront(obj)
+	c.cache[key] = ee
+	c.actualSize += objSize
+}
+
+// Get returns an object by its hash. It marks the object as used. If the object
+// is not in the cache, (nil, false) will be returned.
+func (c *ObjectLRU) Get(k plumbing.Hash) (plumbing.EncodedObject, bool) {
+	ee, ok := c.cache[k]
+	if !ok {
+		return nil, false
+	}
+
+	c.ll.MoveToFront(ee)
+	return ee.Value.(plumbing.EncodedObject), true
+}
+
+// Clear the content of this object cache.
+func (c *ObjectLRU) Clear() {
+	c.ll = nil
+	c.cache = nil
+	c.actualSize = 0
+}
diff --git a/plumbing/cache/object_test.go b/plumbing/cache/object_test.go
index 80cd17b..9359455 100644
--- a/plumbing/cache/object_test.go
+++ b/plumbing/cache/object_test.go
@@ -12,7 +12,7 @@
 func Test(t *testing.T) { TestingT(t) }
 
 type ObjectSuite struct {
-	c       *ObjectFIFO
+	c       Object
 	aObject plumbing.EncodedObject
 	bObject plumbing.EncodedObject
 	cObject plumbing.EncodedObject
@@ -27,44 +27,44 @@
 	s.cObject = newObject("cccccccccccccccccccccccccccccccccccccccc", 1*Byte)
 	s.dObject = newObject("dddddddddddddddddddddddddddddddddddddddd", 1*Byte)
 
-	s.c = NewObjectFIFO(2 * Byte)
+	s.c = NewObjectLRU(2 * Byte)
 }
 
-func (s *ObjectSuite) TestAdd_SameObject(c *C) {
-	s.c.Add(s.aObject)
-	c.Assert(s.c.actualSize, Equals, 1*Byte)
-	s.c.Add(s.aObject)
-	c.Assert(s.c.actualSize, Equals, 1*Byte)
+func (s *ObjectSuite) TestPutSameObject(c *C) {
+	s.c.Put(s.aObject)
+	s.c.Put(s.aObject)
+	_, ok := s.c.Get(s.aObject.Hash())
+	c.Assert(ok, Equals, true)
 }
 
-func (s *ObjectSuite) TestAdd_BigObject(c *C) {
-	s.c.Add(s.bObject)
-	c.Assert(s.c.actualSize, Equals, 0*Byte)
-	c.Assert(s.c.actualSize, Equals, 0*KiByte)
-	c.Assert(s.c.actualSize, Equals, 0*MiByte)
-	c.Assert(s.c.actualSize, Equals, 0*GiByte)
-	c.Assert(len(s.c.objects), Equals, 0)
+func (s *ObjectSuite) TestPutBigObject(c *C) {
+	s.c.Put(s.bObject)
+	_, ok := s.c.Get(s.aObject.Hash())
+	c.Assert(ok, Equals, false)
 }
 
-func (s *ObjectSuite) TestAdd_CacheOverflow(c *C) {
-	s.c.Add(s.aObject)
-	c.Assert(s.c.actualSize, Equals, 1*Byte)
-	s.c.Add(s.cObject)
-	c.Assert(len(s.c.objects), Equals, 2)
-	s.c.Add(s.dObject)
-	c.Assert(len(s.c.objects), Equals, 2)
+func (s *ObjectSuite) TestPutCacheOverflow(c *C) {
+	s.c.Put(s.aObject)
+	s.c.Put(s.cObject)
+	s.c.Put(s.dObject)
 
-	c.Assert(s.c.Get(s.aObject.Hash()), IsNil)
-	c.Assert(s.c.Get(s.cObject.Hash()), NotNil)
-	c.Assert(s.c.Get(s.dObject.Hash()), NotNil)
+	obj, ok := s.c.Get(s.aObject.Hash())
+	c.Assert(ok, Equals, false)
+	c.Assert(obj, IsNil)
+	obj, ok = s.c.Get(s.cObject.Hash())
+	c.Assert(ok, Equals, true)
+	c.Assert(obj, NotNil)
+	obj, ok = s.c.Get(s.dObject.Hash())
+	c.Assert(ok, Equals, true)
+	c.Assert(obj, NotNil)
 }
 
 func (s *ObjectSuite) TestClear(c *C) {
-	s.c.Add(s.aObject)
-	c.Assert(s.c.actualSize, Equals, 1*Byte)
+	s.c.Put(s.aObject)
 	s.c.Clear()
-	c.Assert(s.c.actualSize, Equals, 0*Byte)
-	c.Assert(s.c.Get(s.aObject.Hash()), IsNil)
+	obj, ok := s.c.Get(s.aObject.Hash())
+	c.Assert(ok, Equals, false)
+	c.Assert(obj, IsNil)
 }
 
 type dummyObject struct {
diff --git a/plumbing/format/packfile/decoder.go b/plumbing/format/packfile/decoder.go
index 39680a3..e49de51 100644
--- a/plumbing/format/packfile/decoder.go
+++ b/plumbing/format/packfile/decoder.go
@@ -52,6 +52,8 @@
 // is destroyed. The Offsets and CRCs are calculated whether an
 // ObjectStorer was provided or not.
 type Decoder struct {
+	DeltaBaseCache cache.Object
+
 	s  *Scanner
 	o  storer.EncodedObjectStorer
 	tx storer.Transaction
@@ -65,8 +67,6 @@
 
 	offsetToType map[int64]plumbing.ObjectType
 	decoderType  plumbing.ObjectType
-
-	cache cache.Object
 }
 
 // NewDecoder returns a new Decoder that decodes a Packfile using the given
@@ -107,8 +107,6 @@
 		idx:          NewIndex(0),
 		offsetToType: make(map[int64]plumbing.ObjectType, 0),
 		decoderType:  t,
-
-		cache: cache.NewObjectFIFO(cache.MaxSize),
 	}, nil
 }
 
@@ -355,9 +353,8 @@
 		return 0, err
 	}
 
-	base := d.cache.Get(ref)
-
-	if base == nil {
+	base, ok := d.cacheGet(ref)
+	if !ok {
 		base, err = d.recallByHash(ref)
 		if err != nil {
 			return 0, err
@@ -366,7 +363,7 @@
 
 	obj.SetType(base.Type())
 	err = ApplyDelta(obj, base, buf.Bytes())
-	d.cache.Add(obj)
+	d.cachePut(obj)
 
 	return crc, err
 }
@@ -381,10 +378,10 @@
 	e, ok := d.idx.LookupOffset(uint64(offset))
 	var base plumbing.EncodedObject
 	if ok {
-		base = d.cache.Get(e.Hash)
+		base, ok = d.cacheGet(e.Hash)
 	}
 
-	if base == nil {
+	if !ok {
 		base, err = d.recallByOffset(offset)
 		if err != nil {
 			return 0, err
@@ -393,11 +390,27 @@
 
 	obj.SetType(base.Type())
 	err = ApplyDelta(obj, base, buf.Bytes())
-	d.cache.Add(obj)
+	d.cachePut(obj)
 
 	return crc, err
 }
 
+func (d *Decoder) cacheGet(h plumbing.Hash) (plumbing.EncodedObject, bool) {
+	if d.DeltaBaseCache == nil {
+		return nil, false
+	}
+
+	return d.DeltaBaseCache.Get(h)
+}
+
+func (d *Decoder) cachePut(obj plumbing.EncodedObject) {
+	if d.DeltaBaseCache == nil {
+		return
+	}
+
+	d.DeltaBaseCache.Put(obj)
+}
+
 func (d *Decoder) recallByOffset(o int64) (plumbing.EncodedObject, error) {
 	if d.s.IsSeekable {
 		return d.DecodeObjectAt(o)
@@ -455,7 +468,5 @@
 // Close closes the Scanner. usually this mean that the whole reader is read and
 // discarded
 func (d *Decoder) Close() error {
-	d.cache.Clear()
-
 	return d.s.Close()
 }
diff --git a/storage/filesystem/object.go b/storage/filesystem/object.go
index e235b33..6dd910b 100644
--- a/storage/filesystem/object.go
+++ b/storage/filesystem/object.go
@@ -5,6 +5,7 @@
 	"os"
 
 	"gopkg.in/src-d/go-git.v4/plumbing"
+	"gopkg.in/src-d/go-git.v4/plumbing/cache"
 	"gopkg.in/src-d/go-git.v4/plumbing/format/idxfile"
 	"gopkg.in/src-d/go-git.v4/plumbing/format/objfile"
 	"gopkg.in/src-d/go-git.v4/plumbing/format/packfile"
@@ -16,14 +17,20 @@
 	"gopkg.in/src-d/go-billy.v3"
 )
 
+const DefaultMaxDeltaBaseCacheSize = 92 * cache.MiByte
+
 type ObjectStorage struct {
+	// DeltaBaseCache is an object cache uses to cache delta's bases when
+	DeltaBaseCache cache.Object
+
 	dir   *dotgit.DotGit
 	index map[plumbing.Hash]*packfile.Index
 }
 
 func newObjectStorage(dir *dotgit.DotGit) (ObjectStorage, error) {
 	s := ObjectStorage{
-		dir: dir,
+		DeltaBaseCache: cache.NewObjectLRU(DefaultMaxDeltaBaseCacheSize),
+		dir:            dir,
 	}
 
 	return s, nil
@@ -198,6 +205,7 @@
 		return nil, err
 	}
 
+	d.DeltaBaseCache = s.DeltaBaseCache
 	d.SetIndex(s.index[pack])
 	obj, err := d.DecodeObjectAt(offset)
 	return obj, err