Library Diqt.Table

From Coq Require Import PArray Nat PeanoNat ZArith Bool Lia List Uint63.
Import ListNotations.

Require Import Int_utils.

Open Scope uint63_scope.

hashtable implantation with PArray

Section Hashtable.
  Context {A B: Set}.
  Variable eq: A A bool.
  Variable eq_spec: x y : A, reflect (x = y) (eq x y).
  Variable hash: A int.

  Inductive bucket : Set :=
    | Empty : bucket
    | Cons : int A B bucket bucket.

  Definition table := PArray.array bucket.

  Record t : Set := hash_tab {
    size : int;
    hashtab : table;
  }.

Hashtable functions


  Definition length t : int :=
    PArray.length (hashtab t).

  Definition create (size: int) : t :=
    let h := make size Empty in
    hash_tab 0 h.

  Definition key_index (s: int) (k: int) : int :=
    k mod s.

  Definition get_bucket (h: t) (k: int) : bucket :=
    if length h =? 0 then Empty
    else (hashtab h).[key_index (length h) k].

  Definition resize_heurisitic (h: t) : bool :=
    (length h << 1 <=? size h) && negb (length h =? PArray.max_length).

  Opaque resize_heurisitic.

  Fixpoint fold_right {T:Type} f (a: T) b :=
    match b with
    | Emptya
    | Cons h k v b'f h k v (fold_right f a b')
    end.

  Definition rehash_bucket (nt lt: table) (ns ls i: int) : table :=
    fold_right (fun h k v a
      if i =? key_index ls h then
        let h_b := key_index ns h in
        a.[h_b <- Cons h k v a.[h_b]]
else a)
    nt lt.[i].

  Definition copy_tab (ht: t) (i: int) : t :=
    let last_size := length ht in
    let new_size := last_size × 2 in
    let a := make new_size Empty in
    let new_tab :=
      fold_int (fun i a
        rehash_bucket a (hashtab ht) (PArray.length a) (length ht) i)
      i a
    in
    hash_tab (size ht) new_tab.

  Definition resize (ht: t): t :=
    if length ht =? 0 then
      hash_tab 0 (make 16 Empty)
    else if resize_heurisitic ht then
      copy_tab ht (length ht)
    else ht.

  Fixpoint bucket_remove (b: bucket) (hash: int) (key: A) :=
    match b with
    | Empty(false, Empty)
    | Cons hash' key' v b
        if if hash =? hash' then eq key key' else false
        then (true, b)
        else
          let new_bucket := bucket_remove b hash key in
          (fst new_bucket, Cons hash' key' v (snd new_bucket))
    end.

  Definition remove (h: t) (key: A) : t :=
    let tab := hashtab h in
    let hash := hash key in
    let i := key_index (length h) hash in
    let b_remove := bucket_remove tab.[i] hash key in
    if fst b_remove
    then hash_tab (size h - 1) (tab.[i <- snd b_remove])
    else h.

  Definition add (h: t) (key: A) (v: B) : t :=
    let h := resize h in
    let tab := hashtab h in
    let hash := hash key in
    let i := key_index (length h) hash in
    let new := Cons hash key v tab.[i] in
    hash_tab (size h + 1) tab.[i<-new].

  Definition replace (h: t) (key: A) (v: B) : t :=
    let h := resize h in
    let tab := hashtab h in
    let hash := hash key in
    let i := key_index (length h) hash in
    let remove_result := bucket_remove tab.[i] hash key in
    let new_size := if fst remove_result then (size h) else (size h) + 1 in
    hash_tab new_size tab.[i <- Cons hash key v (snd remove_result)].

  Fixpoint find_rec (l: bucket) (h: int) (k: A): option B :=
    match l with
    | EmptyNone
    | Cons h' k' v l'
        if if h =? h' then eq k k' else false
        then Some v
        else find_rec l' h k
    end.

  Definition find (h: t) (key: A) : option B :=
    let hash := hash key in
    let bucket := get_bucket h hash in
    find_rec bucket hash key.

  Fixpoint find_all_rec (l: bucket) (h: int) (k: A) (acc: list B): list B :=
    match l with
    | EmptyList.rev' acc
    | Cons h' k' v l'
        if if h =? h' then eq k k' else false
        then find_all_rec l' h k (v :: acc)
        else find_all_rec l' h k acc
    end.

  Definition mem (h: t) (key: A) : bool :=
    match find h key with
    | Some _true
    | Nonefalse
    end.

  Definition find_all (h: t) (key: A) : list B :=
    let hash := hash key in
    find_all_rec (get_bucket h hash) hash key [].

  Fixpoint elements_bucket l b i s :=
    match b with
    | Emptyl
    | Cons h k v b'
        if key_index s h =? i
        then elements_bucket ((k, v) :: l) b' i s
        else elements_bucket l b' i s
    end.

  Definition elements h :=
    let s := length h in
    fold_int (fun i lelements_bucket l (hashtab h).[i] i s) s [].

  Fixpoint fold_bucket {Acc: Type} b f i s (acc: Acc) :=
    match b with
    | Emptyacc
    | Cons h k v b'
        if key_index s h =? i
        then fold_bucket b' f i s (f k v acc)
        else fold_bucket b' f i s acc
    end.

  Definition fold {Acc: Type} h f (acc: Acc) :=
    let length := length h in
    fold_int (fun ifold_bucket (hashtab h).[i] f i length) length acc.

Hashtables facts

Eq
  Lemma eq_true:
     (k1 k2 : A), k1 = k2 eq k1 k2 = true.

  Lemma eq_false:
     (k1 k2 : A), k1 k2 eq k1 k2 = false.

  Lemma eq_refl:
     (k : A), eq k k = true.

  Lemma neq_sym:
     (k k' : A), k k' k' k.

find_all

  Fixpoint find_all_rec' (l: bucket) (h: int) (k: A) : list B :=
    match l with
    | Empty[]
    | Cons h' k' v l'
        if if h =? h' then eq k k' else false
        then v :: find_all_rec' l' h k
        else find_all_rec' l' h k
    end.

  Lemma find_all_rec_correct:
     (l: bucket) (h: int) (key: A) acc,
    find_all_rec l h key acc = rev acc ++ find_all_rec' l h key.

resize

  Lemma rehash_bucket_length:
     (nt lt: table) i,
    PArray.length (rehash_bucket nt lt (PArray.length nt) (PArray.length lt) i)
    = PArray.length nt.

  Lemma copy_tab_lenght:
     (ht: t) (i: int),
    PArray.length (hashtab ht) =? 0 = false
    PArray.length (hashtab (copy_tab ht i)) =? 0 = false.

  Lemma length_resize_non_neg:
     (h: t),
    (length (resize h)) =? 0 = false.

  Lemma fold_right_length:
     nt lt i0 i b,
    (PArray.length nt =? 0) = false
    (i0 mod PArray.length nt <?
      PArray.length
       (fold_right
          (fun (h : int) (k : A) (v : B) (a0 : array bucket) ⇒
           if i =? key_index (length lt) h
           then
            a0.[key_index (PArray.length nt) h <-Cons h k v a0.[key_index (PArray.length nt) h]]
else a0) nt b)) = true.

  Lemma rehash_bucket_correct1:
     (nt: table) (lt: t) (k: A) (i s: int),
    length lt =? 0 = false
    PArray.length nt =? 0 = false
    i = key_index (length lt) (hash k)
    find_all (hash_tab s nt) k = []
    find_all (hash_tab s (rehash_bucket nt (hashtab lt)
                            (PArray.length nt) (length lt) i)) k =
    find_all lt k.

  Lemma rehash_bucket_correct2:
     (nt: table) (lt: t) (k: A) (i s: int),
    length lt =? 0 = false
    PArray.length nt =? 0 = false
    i key_index (length lt) (hash k)
    (to_Z (length lt) to_Z (PArray.length nt))%Z
    find_all (hash_tab s (rehash_bucket nt (hashtab lt)
                            (PArray.length nt) (length lt) i)) k =
    find_all (hash_tab s nt) k.

  Lemma length_rect_non_neg:
     ht n,
    length ht =? 0 = false
    PArray.length
      (nat_rect (fun _ : nattable) (make (length ht × 2) Empty)
        (fun (n0 : nat) (acc : table) ⇒
          rehash_bucket acc (hashtab ht) (PArray.length acc)
          (length ht) (of_Z (Z.of_nat n0))) n) =? 0 = false.

  Lemma length_rect_min:
     ht n,
    length ht <=?
      PArray.length
        (nat_rect (fun _ : nattable) (make (length ht × 2) Empty)
          (fun (n0 : nat) (acc : table) ⇒
            rehash_bucket acc (hashtab ht) (PArray.length acc)
            (length ht) (of_Z (Z.of_nat n0))) n) = true.

  Lemma copy_tab_correct:
     (ht: t) (k: A) (i: int),
      length ht =? 0 = false
      key_index (length ht) (hash k) <? i = true
      find_all (copy_tab ht i) k = find_all ht k.

  Lemma find_all_resize:
     (h: t) (k: A),
    find_all (resize h) k = find_all h k.

empty

  Theorem hempty:
     (k: A) (size: int), find (create size) k = None.

key_index

  Lemma key_index_max:
     (h: t) (k: int),
    (length h) =? 0 = false
    (to_Z (key_index (length h) k) < to_Z (PArray.length (hashtab h)))%Z.

add

  Theorem find_add_same:
     (k: A) (v: B) (h: t),
    find_all (add h k v) k = v :: (find_all h k).

  Theorem find_add_other:
     (k k': A) (v: B) (h: t),
    k' k find_all (add h k v) k' = find_all h k'.

find

  Theorem find_spec:
     ht key,
    find ht key = List.hd_error (find_all ht key).

remove

  Lemma bucket_remove_same:
     (b: bucket) (k: A),
    let h := hash k in
    (fst (bucket_remove b h k) = false snd (bucket_remove b h k) = b)
    find_all_rec (snd (bucket_remove b h k)) h k [] =
    List.tl (find_all_rec b h k []).

  Lemma bucket_remove_other:
     (b: bucket) (k k': A),
    let h := hash k in
    let h' := hash k' in
    k' k
    find_all_rec (snd (bucket_remove b h k)) h' k' [] =
    find_all_rec b h' k' [].

  Theorem remove_same:
     (ht: t) (k: A),
    find_all (remove ht k) k =
    List.tl (find_all ht k).

  Theorem remove_other:
     (ht: t) (k: A) (k': A),
    k' k
    find_all (remove ht k) k' = find_all ht k'.

replace

  Theorem replace_same:
     ht key v,
    find_all (replace ht key v) key = v :: (List.tl (find_all ht key)).

  Theorem replace_other:
     ht k1 k2 v,
    k1 k2
    find_all (replace ht k1 v) k2 = find_all ht k2.

elements


fold

  Lemma fold_correct:
     T h f (acc: T),
    fold h f acc =
    List.fold_right (fun v accf (fst v) (snd v) acc) acc (elements h).

End Hashtable.