blob: f0327893b29c874c8670031fd512d2b0ee703f8d [file] [log] [blame]
// Copyright © 2022 Collabora, Ltd.
// SPDX-License-Identifier: MIT
//! A set of usizes, represented as a bit vector
//!
//! In addition to some basic operations like `insert()` and `remove()`, this
//! module also lets you write expressions on sets that are lazily evaluated. To
//! do so, call `.s(..)` on the set to reference the bitset in a
//! lazily-evaluated `BitSetStream`, and then use typical binary operators on
//! the `BitSetStream`s.
//! ```rust
//! let a = BitSet::new();
//! let b = BitSet::new();
//! let c = BitSet::new();
//!
//! c.assign(a.s(..) | b.s(..));
//! c ^= a.s(..);
//! ```
//! Supported binary operations are `&`, `|`, `^`, `-`. Note that there is no
//! unary negation, because that would result in an infinite result set. For
//! patterns like `a & !b`, instead use set subtraction `a - b`.
use std::cmp::{max, min};
use std::marker::PhantomData;
use std::ops::{
BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, RangeFull,
Sub, SubAssign,
};
/// Converts a value into a bit index
///
/// Unlike a hashing algorithm that attempts to scatter the data through
/// the integer range, implementations of IntoBitIndex should attempt to
/// compact the resulting range as much as possible because it will be used
/// to index into an array of bits. The better the compaction, the better
/// the memory efficiency of [BitSet] will be.
///
/// Because the index is used blindly to index bits, implementations must
/// ensure that `a == b` if and only if
/// `a.into_bit_index() == b.into_bit_index()`.
pub trait IntoBitIndex {
/// Converts a self to a bit index
fn into_bit_index(self) -> usize;
}
impl IntoBitIndex for usize {
fn into_bit_index(self) -> usize {
self
}
}
/// Converts a bit index back into a value
///
/// The implementation must ensure that
/// `x.into_bit_index().from_bit_index() == x` and
/// `X::from_bit_index(i).into_bit_index() == i`.
pub trait FromBitIndex: IntoBitIndex {
fn from_bit_index(i: usize) -> Self;
}
impl FromBitIndex for usize {
fn from_bit_index(i: usize) -> Self {
i
}
}
/// A set implemented as an array of bits
///
/// Unlike `HashSet` and similar containers which actually store the provided
/// data, `BitSet` only stores an array of bits with one bit per potential set
/// item. By default, a `BitSet` is a set of `usize`. However, it can be used
/// to store any type which implementss [`IntoBitIndex`].
///
/// Because `BitSet` only stores one bit per item, you can only iterate over a
/// `BitSet<K>` if `K` implements [`FromBitIndex`].
#[derive(Clone)]
pub struct BitSet<K = usize> {
words: Vec<u32>,
phantom: PhantomData<K>,
}
impl<K> BitSet<K> {
pub fn new() -> BitSet<K> {
BitSet {
words: Vec::new(),
phantom: PhantomData,
}
}
fn reserve_words(&mut self, words: usize) {
if self.words.len() < words {
self.words.resize(words, 0);
}
}
pub fn reserve(&mut self, bits: usize) {
self.reserve_words(bits.div_ceil(32));
}
pub fn clear(&mut self) {
for w in self.words.iter_mut() {
*w = 0;
}
}
pub fn is_empty(&self) -> bool {
for w in self.words.iter() {
if *w != 0 {
return false;
}
}
true
}
}
impl<K: IntoBitIndex> BitSet<K> {
pub fn contains(&self, key: K) -> bool {
let idx = key.into_bit_index();
let w = idx / 32;
let b = idx % 32;
if w < self.words.len() {
self.words[w] & (1_u32 << b) != 0
} else {
false
}
}
pub fn insert(&mut self, key: K) -> bool {
let idx = key.into_bit_index();
let w = idx / 32;
let b = idx % 32;
self.reserve_words(w + 1);
let exists = self.words[w] & (1_u32 << b) != 0;
self.words[w] |= 1_u32 << b;
!exists
}
pub fn remove(&mut self, key: K) -> bool {
let idx = key.into_bit_index();
let w = idx / 32;
let b = idx % 32;
self.reserve_words(w + 1);
let exists = self.words[w] & (1_u32 << b) != 0;
self.words[w] &= !(1_u32 << b);
exists
}
}
impl<K: FromBitIndex> BitSet<K> {
pub fn iter(&self) -> impl '_ + Iterator<Item = K> {
BitSetIter::new(self)
}
}
impl BitSet<usize> {
pub fn next_unset(&self, start: usize) -> usize {
if start >= self.words.len() * 32 {
return start;
}
let mut w = start / 32;
let mut mask = !(u32::MAX << (start % 32));
while w < self.words.len() {
let b = (self.words[w] | mask).trailing_ones();
if b < 32 {
return w * 32 + usize::try_from(b).unwrap();
}
mask = 0;
w += 1;
}
self.words.len() * 32
}
/// Search for a set of `count` consecutive elements that are not present in
/// the set. The found set must obey the alignment requirements specified by
/// align_offset and align_mul. All elements in the found set will be >=
/// start_point. Returns the least element of the found set.
///
/// align_mul must be a power of two <= 16
pub fn find_aligned_unset_range(
&self,
start_point: usize,
count: usize,
align_mul: usize,
align_offset: usize,
) -> usize {
assert!(align_mul <= 16);
assert!(align_offset + count <= align_mul);
assert!(count > 0);
let every_n = every_nth_bit(align_mul) << align_offset;
let mut word_idx = start_point / 32;
let mut mask = !(u32::MAX << (start_point % 32));
loop {
let word = mask | self.words.get(word_idx).unwrap_or(&0);
let unset_word = u64::from(!word);
let every_n_64 = u64::from(every_n);
// If every bit in a sequence is set, then adding one to the bottom
// bit will cause it to carry past the top bit. Carry-in for a bit
// is true if the bit in the addition result does not match the same
// bit in a ^ b. We do this in u64 to handle the case where we carry
// past the top bit.
let carry = (unset_word + every_n_64) ^ (unset_word ^ every_n_64);
let found = u32::try_from(carry >> count).unwrap() & every_n;
if found != 0 {
return word_idx * 32
+ usize::try_from(found.trailing_zeros()).unwrap();
}
word_idx += 1;
mask = 0;
}
}
}
fn every_nth_bit(n: usize) -> u32 {
assert!(0 < n && n < 32);
assert!(n.is_power_of_two());
u32::MAX / ((1 << n) - 1)
}
impl<K> BitSet<K> {
/// Evaluate an expression and store its value in self
pub fn assign<B>(&mut self, value: BitSetStream<B, K>)
where
B: BitSetStreamTrait,
{
let mut value = value.0;
let len = value.len();
self.words.clear();
self.words.resize_with(len, || value.next());
for _ in 0..16 {
debug_assert_eq!(value.next(), 0);
}
}
/// Calculate the union of self and an expression, and store the result in
/// self.
///
/// Returns true if the value of self changes, or false otherwise. If you
/// don't need the return value of this function, consider using the `|=`
/// operator instead.
pub fn union_with<B>(&mut self, other: BitSetStream<B, K>) -> bool
where
B: BitSetStreamTrait,
{
let mut other = other.0;
let mut added_bits = false;
let other_len = other.len();
self.reserve_words(other_len);
for w in 0..other_len {
let uw = self.words[w] | other.next();
if uw != self.words[w] {
added_bits = true;
self.words[w] = uw;
}
}
added_bits
}
pub fn s<'a>(
&'a self,
_: RangeFull,
) -> BitSetStream<impl 'a + BitSetStreamTrait, K> {
BitSetStream(
BitSetStreamFromBitSet {
iter: self.words.iter().copied(),
},
PhantomData,
)
}
}
impl<K> Default for BitSet<K> {
fn default() -> BitSet<K> {
BitSet::new()
}
}
impl FromIterator<usize> for BitSet {
fn from_iter<T>(iter: T) -> Self
where
T: IntoIterator<Item = usize>,
{
let mut res = BitSet::new();
for i in iter {
res.insert(i);
}
res
}
}
pub trait BitSetStreamTrait {
/// Get the next word
///
/// Guaranteed to return 0 after len() elements
fn next(&mut self) -> u32;
/// Get the number of output words
fn len(&self) -> usize;
}
struct BitSetStreamFromBitSet<T>
where
T: ExactSizeIterator<Item = u32>,
{
iter: T,
}
impl<T> BitSetStreamTrait for BitSetStreamFromBitSet<T>
where
T: ExactSizeIterator<Item = u32>,
{
fn next(&mut self) -> u32 {
self.iter.next().unwrap_or(0)
}
fn len(&self) -> usize {
self.iter.len()
}
}
pub struct BitSetStream<T, K>(T, PhantomData<K>)
where
T: BitSetStreamTrait;
impl<T, K> From<BitSetStream<T, K>> for BitSet<K>
where
T: BitSetStreamTrait,
{
fn from(value: BitSetStream<T, K>) -> Self {
let mut out = BitSet::new();
out.assign(value);
out
}
}
macro_rules! binop {
(
$BinOp:ident,
$bin_op:ident,
$AssignBinOp:ident,
$assign_bin_op:ident,
$Struct:ident,
|$a:ident, $b:ident| $next_impl:expr,
|$a_len: ident, $b_len: ident| $len_impl:expr,
) => {
pub struct $Struct<A, B>
where
A: BitSetStreamTrait,
B: BitSetStreamTrait,
{
a: A,
b: B,
}
impl<A, B> BitSetStreamTrait for $Struct<A, B>
where
A: BitSetStreamTrait,
B: BitSetStreamTrait,
{
fn next(&mut self) -> u32 {
let $a = self.a.next();
let $b = self.b.next();
$next_impl
}
fn len(&self) -> usize {
let $a_len = self.a.len();
let $b_len = self.b.len();
let new_len = $len_impl;
new_len
}
}
impl<A, B, K> $BinOp<BitSetStream<B, K>> for BitSetStream<A, K>
where
A: BitSetStreamTrait,
B: BitSetStreamTrait,
{
type Output = BitSetStream<$Struct<A, B>, K>;
fn $bin_op(self, rhs: BitSetStream<B, K>) -> Self::Output {
BitSetStream(
$Struct {
a: self.0,
b: rhs.0,
},
PhantomData,
)
}
}
impl<B, K> $AssignBinOp<BitSetStream<B, K>> for BitSet<K>
where
B: BitSetStreamTrait,
{
fn $assign_bin_op(&mut self, rhs: BitSetStream<B, K>) {
let mut rhs = rhs.0;
let $a_len = self.words.len();
let $b_len = rhs.len();
let expected_word_len = $len_impl;
self.words.resize(expected_word_len, 0);
for lhs in &mut self.words {
let $a = *lhs;
let $b = rhs.next();
*lhs = $next_impl;
}
for _ in 0..16 {
debug_assert_eq!(
{
let $a = 0;
let $b = rhs.next();
$next_impl
},
0
);
}
}
}
};
}
binop!(
BitAnd,
bitand,
BitAndAssign,
bitand_assign,
BitSetStreamAnd,
|a, b| a & b,
|a, b| min(a, b),
);
binop!(
BitOr,
bitor,
BitOrAssign,
bitor_assign,
BitSetStreamOr,
|a, b| a | b,
|a, b| max(a, b),
);
binop!(
BitXor,
bitxor,
BitXorAssign,
bitxor_assign,
BitSetStreamXor,
|a, b| a ^ b,
|a, b| max(a, b),
);
binop!(
Sub,
sub,
SubAssign,
sub_assign,
BitSetStreamSub,
|a, b| a & !b,
|a, _b| a,
);
struct BitSetIter<'a, K> {
set: &'a BitSet<K>,
w: usize,
mask: u32,
}
impl<'a, K> BitSetIter<'a, K> {
fn new(set: &'a BitSet<K>) -> Self {
Self {
set,
w: 0,
mask: u32::MAX,
}
}
}
impl<'a, K: FromBitIndex> Iterator for BitSetIter<'a, K> {
type Item = K;
fn next(&mut self) -> Option<K> {
while self.w < self.set.words.len() {
let b = (self.set.words[self.w] & self.mask).trailing_zeros();
if b < 32 {
self.mask &= !(1 << b);
let idx = self.w * 32 + usize::try_from(b).unwrap();
return Some(K::from_bit_index(idx));
}
self.mask = u32::MAX;
self.w += 1;
}
None
}
}
#[cfg(test)]
mod tests {
use super::*;
fn to_vec(set: &BitSet) -> Vec<usize> {
set.iter().collect()
}
#[test]
fn test_basic() {
let mut set = BitSet::new();
assert_eq!(to_vec(&set), &[]);
assert!(set.is_empty());
set.insert(0);
assert_eq!(to_vec(&set), &[0]);
set.insert(73);
set.insert(1);
assert_eq!(to_vec(&set), &[0, 1, 73]);
assert!(!set.is_empty());
assert!(set.contains(73));
assert!(!set.contains(197));
assert!(set.remove(1));
assert!(!set.remove(7));
let mut set2 = set.clone();
assert_eq!(to_vec(&set), &[0, 73]);
assert!(!set.is_empty());
assert!(set.remove(0));
assert!(set.remove(73));
assert!(set.is_empty());
set.clear();
assert!(set.is_empty());
set2.clear();
assert!(set2.is_empty());
}
#[test]
fn test_next_unset() {
for test_range in
&[0..0, 42..1337, 1337..1337, 31..32, 32..33, 63..64, 64..65]
{
let mut set = BitSet::new();
for i in test_range.clone() {
set.insert(i);
}
for extra_bit in [17, 34, 39] {
assert!(test_range.end != extra_bit);
set.insert(extra_bit);
}
assert_eq!(set.next_unset(test_range.start), test_range.end);
}
}
#[test]
fn test_from_iter() {
let vec = vec![0, 1, 99];
let set: BitSet = vec.clone().into_iter().collect();
assert_eq!(to_vec(&set), vec);
}
#[test]
fn test_or() {
let a: BitSet = vec![9, 23, 18, 72].into_iter().collect();
let b: BitSet = vec![7, 23, 1337].into_iter().collect();
let expected = vec![7, 9, 18, 23, 72, 1337];
assert_eq!(to_vec(&(a.s(..) | b.s(..)).into()), &expected[..]);
assert_eq!(to_vec(&(b.s(..) | a.s(..)).into()), &expected[..]);
let mut actual_1 = a.clone();
actual_1 |= b.s(..);
assert_eq!(to_vec(&actual_1), &expected[..]);
let mut actual_2 = b.clone();
actual_2 |= a.s(..);
assert_eq!(to_vec(&actual_2), &expected[..]);
let mut actual_3 = a.clone();
assert_eq!(actual_3.union_with(a.s(..)), false);
assert_eq!(actual_3.union_with(b.s(..)), true);
assert_eq!(to_vec(&actual_3), &expected[..]);
let mut actual_4 = b.clone();
assert_eq!(actual_4.union_with(b.s(..)), false);
assert_eq!(actual_4.union_with(a.s(..)), true);
assert_eq!(to_vec(&actual_4), &expected[..]);
}
#[test]
fn test_and() {
let a: BitSet = vec![1337, 42, 7, 1].into_iter().collect();
let b: BitSet = vec![42, 783, 2, 7].into_iter().collect();
let expected = vec![7, 42];
assert_eq!(to_vec(&(a.s(..) & b.s(..)).into()), &expected[..]);
assert_eq!(to_vec(&(b.s(..) & a.s(..)).into()), &expected[..]);
let mut actual_1 = a.clone();
actual_1 &= b.s(..);
assert_eq!(to_vec(&actual_1), &expected[..]);
let mut actual_2 = b.clone();
actual_2 &= a.s(..);
assert_eq!(to_vec(&actual_2), &expected[..]);
}
#[test]
fn test_xor() {
let a: BitSet = vec![1337, 42, 7, 1].into_iter().collect();
let b: BitSet = vec![42, 127, 2, 7].into_iter().collect();
let expected = vec![1, 2, 127, 1337];
assert_eq!(to_vec(&(a.s(..) ^ b.s(..)).into()), &expected[..]);
assert_eq!(to_vec(&(b.s(..) ^ a.s(..)).into()), &expected[..]);
let mut actual_1 = a.clone();
actual_1 ^= b.s(..);
assert_eq!(to_vec(&actual_1), &expected[..]);
let mut actual_2 = b.clone();
actual_2 ^= a.s(..);
assert_eq!(to_vec(&actual_2), &expected[..]);
}
#[test]
fn test_sub() {
let a: BitSet = vec![1337, 42, 7, 1].into_iter().collect();
let b: BitSet = vec![42, 127, 2, 7].into_iter().collect();
let expected_1 = vec![1, 1337];
let expected_2 = vec![2, 127];
assert_eq!(to_vec(&(a.s(..) - b.s(..)).into()), &expected_1[..]);
assert_eq!(to_vec(&(b.s(..) - a.s(..)).into()), &expected_2[..]);
let mut actual_1 = a.clone();
actual_1 -= b.s(..);
assert_eq!(to_vec(&actual_1), &expected_1[..]);
let mut actual_2 = b.clone();
actual_2 -= a.s(..);
assert_eq!(to_vec(&actual_2), &expected_2[..]);
}
#[test]
fn test_compund() {
let a: BitSet = vec![1337, 42, 7, 1].into_iter().collect();
let b: BitSet = vec![42, 127, 2, 7].into_iter().collect();
let mut c = BitSet::new();
c &= a.s(..) | b.s(..);
assert!(c.is_empty());
}
fn every_nth_bit_naive(n: usize) -> u32 {
assert!(n <= 32);
assert!(n.is_power_of_two());
let mut x = 0;
for i in 0..32 {
if i % n == 0 {
x |= 1 << i;
}
}
x
}
#[test]
fn test_every_nth_bit() {
for i in 1_usize..=16 {
if i.is_power_of_two() {
assert_eq!(every_nth_bit(i), every_nth_bit_naive(i));
}
}
}
#[test]
fn test_find_aligned_unset_range() {
let a: BitSet =
[0, 4, 5, 6, 7, 61, 128, 129, 130].into_iter().collect();
/* (start, count, align_mul, align_offset) */
assert_eq!(a.find_aligned_unset_range(0, 1, 1, 0), 1);
assert_eq!(a.find_aligned_unset_range(4, 1, 1, 0), 8);
assert_eq!(a.find_aligned_unset_range(128, 1, 1, 0), 131);
assert_eq!(a.find_aligned_unset_range(0, 4, 4, 0), 8);
assert_eq!(a.find_aligned_unset_range(128, 4, 4, 0), 132);
assert_eq!(a.find_aligned_unset_range(0, 3, 4, 1), 1);
assert_eq!(a.find_aligned_unset_range(0, 3, 8, 1), 1);
assert_eq!(a.find_aligned_unset_range(0, 4, 8, 1), 9);
assert_eq!(a.find_aligned_unset_range(0, 2, 2, 0), 2);
assert_eq!(a.find_aligned_unset_range(2, 2, 2, 0), 2);
assert_eq!(a.find_aligned_unset_range(3, 2, 2, 0), 8);
assert_eq!(a.find_aligned_unset_range(0, 2, 4, 2), 2);
assert_eq!(a.find_aligned_unset_range(3, 2, 4, 2), 10);
assert_eq!(a.find_aligned_unset_range(40, 16, 16, 0), 64);
assert_eq!(a.find_aligned_unset_range(1337, 1, 1, 0), 1337);
assert_eq!(a.find_aligned_unset_range(161, 1, 2, 0), 162);
}
}