| // Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> |
| // All rights reserved. |
| // |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| package filter |
| |
| import ( |
| "github.com/syndtr/goleveldb/leveldb/util" |
| ) |
| |
| func bloomHash(key []byte) uint32 { |
| return util.Hash(key, 0xbc9f1d34) |
| } |
| |
| type bloomFilter int |
| |
| // The bloom filter serializes its parameters and is backward compatible |
| // with respect to them. Therefor, its parameters are not added to its |
| // name. |
| func (bloomFilter) Name() string { |
| return "leveldb.BuiltinBloomFilter" |
| } |
| |
| func (f bloomFilter) Contains(filter, key []byte) bool { |
| nBytes := len(filter) - 1 |
| if nBytes < 1 { |
| return false |
| } |
| nBits := uint32(nBytes * 8) |
| |
| // Use the encoded k so that we can read filters generated by |
| // bloom filters created using different parameters. |
| k := filter[nBytes] |
| if k > 30 { |
| // Reserved for potentially new encodings for short bloom filters. |
| // Consider it a match. |
| return true |
| } |
| |
| kh := bloomHash(key) |
| delta := (kh >> 17) | (kh << 15) // Rotate right 17 bits |
| for j := uint8(0); j < k; j++ { |
| bitpos := kh % nBits |
| if (uint32(filter[bitpos/8]) & (1 << (bitpos % 8))) == 0 { |
| return false |
| } |
| kh += delta |
| } |
| return true |
| } |
| |
| func (f bloomFilter) NewGenerator() FilterGenerator { |
| // Round down to reduce probing cost a little bit. |
| k := uint8(f * 69 / 100) // 0.69 =~ ln(2) |
| if k < 1 { |
| k = 1 |
| } else if k > 30 { |
| k = 30 |
| } |
| return &bloomFilterGenerator{ |
| n: int(f), |
| k: k, |
| } |
| } |
| |
| type bloomFilterGenerator struct { |
| n int |
| k uint8 |
| |
| keyHashes []uint32 |
| } |
| |
| func (g *bloomFilterGenerator) Add(key []byte) { |
| // Use double-hashing to generate a sequence of hash values. |
| // See analysis in [Kirsch,Mitzenmacher 2006]. |
| g.keyHashes = append(g.keyHashes, bloomHash(key)) |
| } |
| |
| func (g *bloomFilterGenerator) Generate(b Buffer) { |
| // Compute bloom filter size (in both bits and bytes) |
| nBits := uint32(len(g.keyHashes) * g.n) |
| // For small n, we can see a very high false positive rate. Fix it |
| // by enforcing a minimum bloom filter length. |
| if nBits < 64 { |
| nBits = 64 |
| } |
| nBytes := (nBits + 7) / 8 |
| nBits = nBytes * 8 |
| |
| dest := b.Alloc(int(nBytes) + 1) |
| dest[nBytes] = g.k |
| for _, kh := range g.keyHashes { |
| delta := (kh >> 17) | (kh << 15) // Rotate right 17 bits |
| for j := uint8(0); j < g.k; j++ { |
| bitpos := kh % nBits |
| dest[bitpos/8] |= (1 << (bitpos % 8)) |
| kh += delta |
| } |
| } |
| |
| g.keyHashes = g.keyHashes[:0] |
| } |
| |
| // NewBloomFilter creates a new initialized bloom filter for given |
| // bitsPerKey. |
| // |
| // Since bitsPerKey is persisted individually for each bloom filter |
| // serialization, bloom filters are backwards compatible with respect to |
| // changing bitsPerKey. This means that no big performance penalty will |
| // be experienced when changing the parameter. See documentation for |
| // opt.Options.Filter for more information. |
| func NewBloomFilter(bitsPerKey int) Filter { |
| return bloomFilter(bitsPerKey) |
| } |