Bloom filters

The purpose of this code is to illustrate the use of Why3's module system, both for modularity and genericity. We use the example of a Bloom filter, which is built incrementally and then instantiated to filter strings. A tiny test client is also provided. Once verified, the code is translated into a C library.

Authors: Jean-Christophe FilliĆ¢tre (CNRS) Andrei Paskevich (Univ Paris-Saclay)

An interface for arrays of Boolean values

module BoolArray

  use int.Int
  use mach.int.Int32
  use mach.int.UInt32
  use seq.Seq

  type t = abstract {
    mutable contents: seq bool;
  }

  meta coercion function contents
  (* this way, we can use `a` instead of `a.contents` in the following *)

  (* needed for `by` later *)
  val ghost make (n: int) : t
    requires { 0 <= n }
    ensures  { length result = n }

  val partial create (size: uint32) : t
    requires { 0 < size < max_int32 }
    ensures  { length result = size }
    ensures  { forall i. 0 <= i < size -> result[i] = false }

  val get (a: t) (i: uint32) : bool
    requires { i < length a }
    ensures  { result = a[i] }

  val set (a: t) (i: uint32) : unit
    requires { i < length a }
    writes   { a }
    ensures  { a = (old a)[i <- true] }

end

An implementation of BoolArray using an array of chars

This is a straightforward, rather naive, implementation using one char per Boolean.

module BAchar

  use int.Int
  use mach.c.UChar
  use mach.int.Int32
  use mach.int.UInt32GMP
  use mach.c.C
  use seq.Seq

  type t = {
    mutable ghost contents: seq bool;
                       arr: ptr uchar;
  }
  invariant { arr.offset = arr.min = 0 }
  invariant { length contents <= arr.max = plength arr }
  invariant { length contents < max_int32 }
  invariant { writable arr }
  invariant { forall i. 0 <= i < length contents ->
              contents[i] <-> pelts arr i <> 0 }
  by { contents = empty; arr = malloc 0 }

  meta coercion function contents

  let create (size: uint32) : t
    requires { 0 < size < max_int32 }
    ensures  { length result = size }
    ensures  { forall i. 0 <= i < size -> result[i] = false }
  = let p = malloc size in
    c_assert (is_not_null p);
    for i = 0 to size - 1 do
      invariant { forall j. 0 <= j < i -> pelts p j = 0 }
      set_ofs p (to_int32 i) 0;
    done;
    { arr = p;
      contents = Seq.create (to_int size) (fun _ -> false) }

  let get (a: t) (i: uint32) : bool
    requires { i < length a }
    ensures  { result = a[i] }
  = get_ofs a.arr (to_int32 i) <> 0

  let set (a: t) (i: uint32) : unit
    requires { i < length a }
    writes   { a.arr.data.Array.elts, a.contents }
    ensures  { a = (old a)[i <- true] }
  = set_ofs a.arr (to_int32 i) 1;
    ghost (a.contents <- a.contents[to_int i <- true])

all this implements interface BoolArray

  clone BoolArray with type t, val create, val get, val set
end

A better implementation of BoolArray using an array of 32-bit integers, each integer storing 32 Boolean values.

module BAint32

  use int.Int
  use int.ComputerDivision
  use mach.int.Int32
  use mach.int.UInt32BV
  use mach.int.UInt32GMP
  use mach.c.C
  use seq.Seq

  type bv32 = BV32.t

  predicate bit_set (x: bv32) (i: int) =
    BV32.nth x i

  let one_bit (i: uint32) : bv32
    requires { 0 <= i < 32 }
    ensures  { bit_set result i }
    ensures  { forall j. 0 <= j < 32 -> j <> i -> not (bit_set result j) }
  = let ibv = to_bv i in
    let b = BV32.lsl_bv BV32.one ibv in
    assert { BV32.nth_bv b ibv};
    assert { forall j. 0 <= BV32.to_int j < 32 -> j <> ibv ->
             not (BV32.nth_bv b j) };
    let lemma tmp (j: int)
      requires { 0 <= j < 32 } requires { j <> i }
      ensures { not (bit_set b j) } =
      let j = of_int j in let j = to_bv j in
      assert { j <> ibv }; assert { not (BV32.nth_bv b j) } in
    b

  let bit_set (x: bv32) (i: uint32) : bool
    requires { 0 <= i < 32 }
    ensures  { result <-> bit_set x i }
  = let bi = one_bit i in
    let ghost ibv = to_bv i in
    let ba = BV32.bw_and x bi in
    assert { BV32.nth_bv ba ibv <-> BV32.nth_bv x ibv };
    assert { forall j. 0 <= BV32.to_int j < 32 -> j <> ibv ->
             not (BV32.nth_bv ba j) };
    assert { BV32.(==) ba BV32.zeros <-> not (BV32.nth_bv ba ibv) };
    not (BV32.eq ba BV32.zeros)

  let set_bit (x: bv32) (i: uint32) : bv32
    requires { 0 <= i < 32 }
    ensures  { bit_set result i }
    ensures  { forall j. 0 <= j < 32 -> j <> i ->
                 bit_set result j <-> bit_set x j }
  = BV32.bw_or x (one_bit i)

  type t = {
    mutable ghost contents: seq bool;
                       arr: ptr bv32;
  }
  invariant { arr.offset = arr.min = 0 }
  invariant { arr.max = plength arr }
  invariant { length contents <= plength arr * 32 }
  invariant { writable arr }
  invariant { forall i. 0 <= i < length contents ->
              contents[i] <-> bit_set (pelts arr (div i 32)) (mod i 32) }
  by { contents = empty; arr = malloc 0 }

  meta coercion function contents

  let create (size: uint32) : t
    requires { 0 < size < max_int32 }
    ensures  { length result = size }
    ensures  { forall i. 0 <= i < size -> result[i] = false }
  = let n = 1 + (size - 1) / 32 in
    let p = malloc n in
    c_assert (is_not_null p);
    for i = 0 to n - 1 do
      invariant { forall j. 0 <= j < i -> pelts p j = BV32.zeros }
      set_ofs p (to_int32 i) 0;
    done;
    { arr = p;
      contents = Seq.create (to_int size) (fun _ -> false) }

  let get (a: t) (i: uint32) : bool
    requires { i < length a }
    ensures  { result = a[i] }
  = bit_set (get_ofs a.arr (to_int32 (i / 32))) (i % 32)

  let set (a: t) (i: uint32) : unit
    requires { i < length a }
    writes   { a.arr.data.Array.elts, a.contents }
    ensures  { a = (old a)[i <- true] }
  = let j = to_int32 (i / 32) in
    let x = get_ofs a.arr j in
    set_ofs a.arr j (set_bit x (i % 32));
    ghost (a.contents <- a.contents[to_int i <- true])

all this implements interface BoolArray

  clone BoolArray with type t, val create, val get, val set
end

The interface of a filter. A filter is a set for which membership is ensured for elements that were previously added.

module Filter

  use int.Int
  use mach.int.Int32
  use mach.int.UInt32
  use set.Fset

  type elt

  type filter = abstract {
    mutable contents: fset elt;
  }

  meta coercion function contents

  val partial create (m: uint32) : filter
    requires { 0 < m < max_int32 }
    ensures  { result == empty }

  val add (x: elt) (s: filter) : unit
    writes  { s }
    ensures { s == add x (old s) }

  val mem (x: elt) (s: filter) : bool
    ensures { mem x s -> result }

end

A Bloom filter implements a filter using an array of bits of size m and k hash functions. When element x is added, we set the bits h_i(x) mod m for each hash function h_i.

module BloomFilter

  use int.Int
  use int.ComputerDivision
  use mach.int.UInt32
  use mach.int.Int32
  use seq.Seq
  use set.Fset
  clone BoolArray as BA

  val constant k: uint32
    ensures { 0 < result }

  type elt

  val function hash (i: uint32) (_: elt) : uint32
    requires { i < k }

  let function bit (i: uint32) (x: elt) (m: uint32) : uint32
    requires { i < k }
    requires { 0 < m }
  = (hash i x) % m

  type filter = {
    mutable ghost contents: fset elt;
                         m: uint32;
                      barr: BA.t;
  }
  invariant { length barr = m > 0 }
  invariant {
    forall x. mem x contents -> forall i:uint32. i < k -> barr[bit i x m]
  }
  by { contents = Fset.empty; m = 1; barr = BA.make 1 }

  meta coercion function contents

  let bloom_filter (m: uint32) : filter
    requires { 0 < m < max_int32 }
    ensures  { result == empty }
  = { contents = Fset.empty;
      m = m;
      barr = BA.create m }

  let add (x: elt) (s: filter) : unit
    writes  { s.contents, s.barr }
    ensures { s == add x (old s) }
  = for i = 0 to k - 1 do
      invariant { length s.barr = s.m }
      invariant { forall y. mem y s.contents ->
                  forall j:uint32. j < k -> s.barr[bit j y s.m] }
      invariant { forall j:uint32. j < i -> s.barr[bit j x s.m] }
      BA.set s.barr (bit i x s.m)
    done;
    ghost (s.contents <- Fset.add x s.contents)

  let mem (x: elt) (s: filter) : bool
    ensures { mem x s -> result }
  =
    for i = 0 to k - 1 do
      invariant { forall j:uint32. j < i -> s.barr[bit j x s.m] }
      if not (BA.get s.barr (bit i x s.m)) then return false
    done;
    return true

all this implements interface Filter

  clone Filter with type elt, type filter, val create = bloom_filter, val add, val mem
end

A generic client that filters strings

module GenericClient

  use mach.c.C
  clone Filter with type elt = string

  let main () =
    let f = Filter.create 0x10000 in
    Filter.add "foo" f;
    Filter.add "bar" f;
    let b = Filter.mem "foo" f in
    assert { b };
    c_assert b;
    ()

end

A Bloom filter for strings, using three FNV hash functions

module BFstring
  use int.Int
  use mach.c.String
  use mach.c.C
  use string.Char
  use bv.BV32 as B
  use mach.int.UInt32GMP
  use mach.int.Int32
  use mach.int.Int32BV as S
  use mach.int.UInt32BV as U

  let constant k: uint32 = 3

  type elt = string

  (* set of Fowler-Noll-Vo hash functions
     (from Real-World Algorithms: A Beginner's Guide, by Panos Louridas) *)

  let function hash (h: uint32) (x: elt) : uint32
    requires { h < k }
  = let offset_basis = U.to_bv 0x811c9dc5 in
    let prime = U.to_bv 0x01000193 in
    let ref hash = B.bw_xor offset_basis (U.to_bv h) in
    let n = String.length x in
    let n = if n > 0x7fff_ffff then 0x7fff_ffff else n in
    let n = to_int32 n in
    for i = 0 to n - 1 do
      let c = code (String.get x i) in
      let c = S.to_bv c in
      hash <- B.bw_xor hash c;
      hash <- B.mul hash prime
    done;
    U.of_bv hash

  use BAint32

  clone export BloomFilter with val k, type elt, val hash,
    type BA.t = BAint32.t, val BA.create = BAint32.create,
    val BA.get = BAint32.get, val BA.set = BAint32.set

end

module Client

  use BFstring

  clone export GenericClient with
    type Filter.filter = filter,
    val Filter.create = bloom_filter,
    val Filter.add = add,
    val Filter.mem = mem

end

Generated by why3doc 1.3.1+git