leveldb/filter/bloom.go - third_party/goleveldb - Git at Google

 // Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
 // All rights reserved.
 //
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 package filter

 import (
 	"github.com/syndtr/goleveldb/leveldb/util"
 )

 func bloomHash(key []byte) uint32 {
 	return util.Hash(key, 0xbc9f1d34)
 }

 type bloomFilter int

 // The bloom filter serializes its parameters and is backward compatible
 // with respect to them. Therefor, its parameters are not added to its
 // name.
 func (bloomFilter) Name() string {
 	return "leveldb.BuiltinBloomFilter"
 }

 func (f bloomFilter) Contains(filter, key []byte) bool {
 	nBytes := len(filter) - 1
 	if nBytes < 1 {
 		return false
 	}
 	nBits := uint32(nBytes * 8)

 	// Use the encoded k so that we can read filters generated by
 	// bloom filters created using different parameters.
 	k := filter[nBytes]
 	if k > 30 {
 		// Reserved for potentially new encodings for short bloom filters.
 		// Consider it a match.
 		return true
 	}

 	kh := bloomHash(key)
 	delta := (kh >> 17) | (kh << 15) // Rotate right 17 bits
 	for j := uint8(0); j < k; j++ {
 		bitpos := kh % nBits
 		if (uint32(filter[bitpos/8]) & (1 << (bitpos % 8))) == 0 {
 			return false
 		}
 		kh += delta
 	}
 	return true
 }

 func (f bloomFilter) NewGenerator() FilterGenerator {
 	// Round down to reduce probing cost a little bit.
 	k := uint8(f * 69 / 100) // 0.69 =~ ln(2)
 	if k < 1 {
 		k = 1
 	} else if k > 30 {
 		k = 30
 	}
 	return &bloomFilterGenerator{
 		n: int(f),
 		k: k,
 	}
 }

 type bloomFilterGenerator struct {
 	n int
 	k uint8

 	keyHashes []uint32
 }

 func (g *bloomFilterGenerator) Add(key []byte) {
 	// Use double-hashing to generate a sequence of hash values.
 	// See analysis in [Kirsch,Mitzenmacher 2006].
 	g.keyHashes = append(g.keyHashes, bloomHash(key))
 }

 func (g *bloomFilterGenerator) Generate(b Buffer) {
 	// Compute bloom filter size (in both bits and bytes)
 	nBits := uint32(len(g.keyHashes) * g.n)
 	// For small n, we can see a very high false positive rate.  Fix it
 	// by enforcing a minimum bloom filter length.
 	if nBits < 64 {
 		nBits = 64
 	}
 	nBytes := (nBits + 7) / 8
 	nBits = nBytes * 8

 	dest := b.Alloc(int(nBytes) + 1)
 	dest[nBytes] = g.k
 	for _, kh := range g.keyHashes {
 		delta := (kh >> 17) | (kh << 15) // Rotate right 17 bits
 		for j := uint8(0); j < g.k; j++ {
 			bitpos := kh % nBits
 			dest[bitpos/8] |= (1 << (bitpos % 8))
 			kh += delta
 		}
 	}

 	g.keyHashes = g.keyHashes[:0]
 }

 // NewBloomFilter creates a new initialized bloom filter for given
 // bitsPerKey.
 //
 // Since bitsPerKey is persisted individually for each bloom filter
 // serialization, bloom filters are backwards compatible with respect to
 // changing bitsPerKey. This means that no big performance penalty will
 // be experienced when changing the parameter. See documentation for
 // opt.Options.Filter for more information.
 func NewBloomFilter(bitsPerKey int) Filter {
 	return bloomFilter(bitsPerKey)
 }
	// Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
	// All rights reserved.
	//
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	package filter

	import (
	"github.com/syndtr/goleveldb/leveldb/util"
	)

	func bloomHash(key []byte) uint32 {
	return util.Hash(key, 0xbc9f1d34)
	}

	type bloomFilter int

	// The bloom filter serializes its parameters and is backward compatible
	// with respect to them. Therefor, its parameters are not added to its
	// name.
	func (bloomFilter) Name() string {
	return "leveldb.BuiltinBloomFilter"
	}

	func (f bloomFilter) Contains(filter, key []byte) bool {
	nBytes := len(filter) - 1
	if nBytes < 1 {
	return false
	}
	nBits := uint32(nBytes * 8)

	// Use the encoded k so that we can read filters generated by
	// bloom filters created using different parameters.
	k := filter[nBytes]
	if k > 30 {
	// Reserved for potentially new encodings for short bloom filters.
	// Consider it a match.
	return true
	}

	kh := bloomHash(key)
	delta := (kh >> 17) \| (kh << 15) // Rotate right 17 bits
	for j := uint8(0); j < k; j++ {
	bitpos := kh % nBits
	if (uint32(filter[bitpos/8]) & (1 << (bitpos % 8))) == 0 {
	return false
	}
	kh += delta
	}
	return true
	}

	func (f bloomFilter) NewGenerator() FilterGenerator {
	// Round down to reduce probing cost a little bit.
	k := uint8(f * 69 / 100) // 0.69 =~ ln(2)
	if k < 1 {
	k = 1
	} else if k > 30 {
	k = 30
	}
	return &bloomFilterGenerator{
	n: int(f),
	k: k,
	}
	}

	type bloomFilterGenerator struct {
	n int
	k uint8

	keyHashes []uint32
	}

	func (g *bloomFilterGenerator) Add(key []byte) {
	// Use double-hashing to generate a sequence of hash values.
	// See analysis in [Kirsch,Mitzenmacher 2006].
	g.keyHashes = append(g.keyHashes, bloomHash(key))
	}

	func (g *bloomFilterGenerator) Generate(b Buffer) {
	// Compute bloom filter size (in both bits and bytes)
	nBits := uint32(len(g.keyHashes) * g.n)
	// For small n, we can see a very high false positive rate. Fix it
	// by enforcing a minimum bloom filter length.
	if nBits < 64 {
	nBits = 64
	}
	nBytes := (nBits + 7) / 8
	nBits = nBytes * 8

	dest := b.Alloc(int(nBytes) + 1)
	dest[nBytes] = g.k
	for _, kh := range g.keyHashes {
	delta := (kh >> 17) \| (kh << 15) // Rotate right 17 bits
	for j := uint8(0); j < g.k; j++ {
	bitpos := kh % nBits
	dest[bitpos/8] \|= (1 << (bitpos % 8))
	kh += delta
	}
	}

	g.keyHashes = g.keyHashes[:0]
	}

	// NewBloomFilter creates a new initialized bloom filter for given
	// bitsPerKey.
	//
	// Since bitsPerKey is persisted individually for each bloom filter
	// serialization, bloom filters are backwards compatible with respect to
	// changing bitsPerKey. This means that no big performance penalty will
	// be experienced when changing the parameter. See documentation for
	// opt.Options.Filter for more information.
	func NewBloomFilter(bitsPerKey int) Filter {
	return bloomFilter(bitsPerKey)
	}