Theory LBD

theory LBD
imports IsaSAT_Literals
theory LBD
  imports IsaSAT_Literals
begin

chapter ‹LBD›

text ‹LBD (literal block distance) or glue is a measure of usefulness of clauses: It is the number
of different levels involved in a clause. This measure has been introduced by Glucose in 2009
(Audemart and Simon).

LBD has also another advantage, explaining why we implemented it even before working on restarts: It
can speed the conflict minimisation. Indeed a literal might be redundant only if there is a literal
of the same level in the conflict.

The LBD data structure is well-suited to do so: We mark every level that appears in the conflict in
a hash-table like data structure.

Remark that we combine the LBD with a MTF scheme.
›

section ‹Types and relations›
type_synonym lbd = ‹bool list›
type_synonym lbd_ref = ‹bool list × nat × nat›


text ‹Beside the actual ``lookup'' table, we also keep the highest level marked so far to unmark
all levels faster (but we currently don't save the LBD and have to iterate over the data structure).
We also handle growing of the structure by hand instead of using a proper hash-table. We do so,
because there are much stronger guarantees on the key that there is in a general hash-table
(especially, our numbers are all small).
›
definition lbd_ref where
  ‹lbd_ref = {((lbd, n, m), lbd'). lbd = lbd' ∧ n < length lbd ∧
      (∀k > n. k < length lbd ⟶ ¬lbd!k) ∧
      length lbd ≤ Suc (Suc (uint32_max div 2)) ∧ n < length lbd ∧
      m = length (filter id lbd)}›


section ‹Testing if a level is marked›

definition level_in_lbd :: ‹nat ⇒ lbd ⇒ bool› where
  ‹level_in_lbd i = (λlbd. i < length lbd ∧ lbd!i)›

definition level_in_lbd_ref :: ‹nat ⇒ lbd_ref ⇒ bool› where
  ‹level_in_lbd_ref = (λi (lbd, _). i < length_uint32_nat lbd ∧ lbd!i)›

lemma level_in_lbd_ref_level_in_lbd:
  ‹(uncurry (RETURN oo level_in_lbd_ref), uncurry (RETURN oo level_in_lbd)) ∈
    nat_rel ×r lbd_ref →f ⟨bool_rel⟩nres_rel›
  by (intro frefI nres_relI) (auto simp: level_in_lbd_ref_def level_in_lbd_def lbd_ref_def)

section ‹Marking more levels›
definition list_grow where
  ‹list_grow xs n x = xs @ replicate (n - length xs) x›

definition lbd_write :: ‹lbd ⇒ nat ⇒ lbd› where
  ‹lbd_write = (λlbd i.
    (if i < length lbd then (lbd[i := True])
     else ((list_grow lbd (i + 1) False)[i := True])))›


definition lbd_ref_write :: ‹lbd_ref ⇒ nat ⇒ lbd_ref nres›  where
  ‹lbd_ref_write = (λ(lbd, m, n) i. do {
    ASSERT(length lbd ≤ uint32_max ∧ n + 1 ≤ uint32_max);
    (if i < length_uint32_nat lbd then
       let n = if lbd ! i then n else n+1 in
       RETURN (lbd[i := True], max i m, n)
     else do {
        ASSERT(i + 1 ≤ uint32_max);
        RETURN ((list_grow lbd (i + 1) False)[i := True], max i m, n + 1)
     })
  })›

lemma length_list_grow[simp]:
  ‹length (list_grow xs n a) = max (length xs) n›
  by (auto simp: list_grow_def)

lemma list_update_append2: ‹i ≥ length xs ⟹ (xs @ ys)[i := x] = xs @ ys[i - length xs := x]›
  by (induction xs arbitrary: i) (auto split: nat.splits)

lemma lbd_ref_write_lbd_write:
  ‹(uncurry (lbd_ref_write), uncurry (RETURN oo lbd_write)) ∈
    [λ(lbd, i). i ≤ Suc (uint32_max div 2)]f
     lbd_ref ×f nat_rel → ⟨lbd_ref⟩nres_rel›
  unfolding lbd_ref_write_def lbd_write_def
  by (intro frefI nres_relI)
    (auto simp: level_in_lbd_ref_def level_in_lbd_def lbd_ref_def list_grow_def
        nth_append uint32_max_def length_filter_update_true list_update_append2
        length_filter_update_false
      intro!: ASSERT_leI le_trans[OF length_filter_le])

section ‹Cleaning the marked levels›

definition lbd_emtpy_inv :: ‹nat ⇒ bool list × nat ⇒ bool› where
  ‹lbd_emtpy_inv m = (λ(xs, i). i ≤ Suc m ∧ (∀j < i. xs ! j = False) ∧
    (∀j > m. j < length xs ⟶ xs ! j = False))›

definition lbd_empty_ref where
  ‹lbd_empty_ref = (λ(xs, m, _). do {
    (xs, i) ←
       WHILETlbd_emtpy_inv m
         (λ(xs, i). i ≤ m)
         (λ(xs, i). do {
            ASSERT(i < length xs);
            ASSERT(i + 1 < uint32_max);
            RETURN (xs[i := False], i + 1)})
         (xs, 0);
     RETURN (xs, 0, 0)
  })›

definition lbd_empty where
   ‹lbd_empty xs = RETURN (replicate (length xs) False)›

lemma lbd_empty_ref:
  assumes ‹((xs, m, n), xs) ∈ lbd_ref›
  shows
    ‹lbd_empty_ref (xs, m, n) ≤ ⇓ lbd_ref (RETURN (replicate (length xs) False))›
proof -
  have m_xs: ‹m ≤ length xs› and [simp]: ‹xs ≠ []› and le_xs: ‹length xs ≤ uint32_max div 2 + 2›
    using assms by (auto simp: lbd_ref_def)
  have [iff]: ‹(∀j. ¬ j < (b :: nat)) ⟷ b = 0› for b
    by auto
  have init: ‹lbd_emtpy_inv m (xs, 0)›
    using assms m_xs unfolding lbd_emtpy_inv_def
    by (auto simp: lbd_ref_def)
  have lbd_remove: ‹lbd_emtpy_inv m
       (a[b := False], b + 1)›
    if
      inv: ‹lbd_emtpy_inv m s› and
      ‹case s of (ys, i) ⇒ length ys = length xs› and
      cond: ‹case s of (xs, i) ⇒ i ≤ m› and
      s: ‹s = (a, b)› and
      [simp]: ‹b < length a›
    for s a b
  proof -
    have 1: ‹a[b := False] ! j = False› if ‹j<b› for j
      using inv that unfolding lbd_emtpy_inv_def s
      by auto
    have 2: ‹∀j>m. j < length (a[b := False]) ⟶ a[b := False] ! j = False›
      using inv that unfolding lbd_emtpy_inv_def s
      by auto
    have ‹b + 1 ≤ Suc m›
      using cond unfolding s by simp
    moreover have ‹a[b := False] ! j = False› if ‹j<b + 1› for j
      using 1[of j] that cond by (cases ‹j = b›) auto
    moreover have ‹∀j>m. j < length (a[b := False]) ⟶ a[b := False] ! j = False›
      using 2 by auto
    ultimately show ?thesis
      unfolding lbd_emtpy_inv_def by auto
  qed
  have lbd_final: ‹((a, 0, 0), replicate (length xs) False) ∈ lbd_ref›
    if
      lbd: ‹lbd_emtpy_inv m s› and
      I': ‹case s of (ys, i) ⇒ length ys = length xs› and
      cond: ‹¬ (case s of (xs, i) ⇒ i ≤ m)› and
      s: ‹s = (a, b)›
      for s a b
  proof -
    have 1: ‹a[b := False] ! j = False› if ‹j<b› for j
      using lbd that unfolding lbd_emtpy_inv_def s
      by auto
    have 2: ‹j>m ⟶ j < length a ⟶ a ! j = False› for j
      using lbd that unfolding lbd_emtpy_inv_def s
      by auto
    have [simp]: ‹length a = length xs›
      using I' unfolding s by auto
    have [simp]: ‹b = Suc m›
      using cond lbd unfolding lbd_emtpy_inv_def s
      by auto
    have [dest]: ‹i < length xs ⟹ ¬a ! i› for i
      using 1[of i] 2[of i] by (cases ‹i < Suc m›) auto

    have [simp]: ‹a = replicate (length xs) False›
      unfolding list_eq_iff_nth_eq
      apply (intro conjI)
      subgoal by simp
      subgoal by auto
      done
    show ?thesis
      using le_xs by (auto simp: lbd_ref_def)
  qed
  show ?thesis
    unfolding lbd_empty_ref_def conc_fun_RETURN
    apply clarify
    apply (refine_vcg WHILEIT_rule_stronger_inv[where R = ‹measure (λ(xs, i). Suc m - i)› and
      I' = ‹λ(ys, i). length ys = length xs›])
    subgoal by auto
    subgoal by (rule init)
    subgoal by auto
    subgoal using assms by (auto simp: lbd_ref_def)
    subgoal using m_xs le_xs by (auto simp: uint32_max_def)
    subgoal by (rule lbd_remove)
    subgoal by auto
    subgoal by auto
    subgoal by (rule lbd_final)
    done
qed

lemma lbd_empty_ref_lbd_empty:
  ‹(lbd_empty_ref, lbd_empty) ∈ lbd_ref →f ⟨lbd_ref⟩nres_rel›
  apply (intro frefI nres_relI)
  apply clarify
  subgoal for lbd m lbd'
    using lbd_empty_ref[of lbd m]
    by (auto simp: lbd_empty_def lbd_ref_def)
  done

definition (in -)empty_lbd :: ‹lbd› where
   ‹empty_lbd = (replicate 32 False)›

definition empty_lbd_ref :: ‹lbd_ref› where
   ‹empty_lbd_ref = (replicate 32 False, 0, 0)›

lemma empty_lbd_ref_empty_lbd:
  ‹(λ_. (RETURN empty_lbd_ref), λ_. (RETURN empty_lbd)) ∈ unit_rel →f ⟨lbd_ref⟩nres_rel›
  by (intro frefI nres_relI) (auto simp: empty_lbd_def lbd_ref_def empty_lbd_ref_def
      uint32_max_def nth_Cons split: nat.splits)

section ‹Extracting the LBD›

text ‹We do not prove correctness of our algorithm, as we don't care about the actual returned
value (for correctness).›
definition get_LBD :: ‹lbd ⇒ nat nres› where
  ‹get_LBD lbd = SPEC(λ_. True)›

definition get_LBD_ref :: ‹lbd_ref ⇒ nat nres› where
  ‹get_LBD_ref = (λ(xs, m, n). RETURN n)›

lemma get_LBD_ref:
 ‹((lbd, m), lbd') ∈ lbd_ref ⟹ get_LBD_ref (lbd, m) ≤ ⇓ nat_rel (get_LBD lbd')›
  unfolding get_LBD_ref_def get_LBD_def
  by (auto split:prod.splits)

lemma get_LBD_ref_get_LBD:
  ‹(get_LBD_ref, get_LBD) ∈ lbd_ref →f ⟨nat_rel⟩nres_rel›
  apply (intro frefI nres_relI)
  apply clarify
  subgoal for lbd m n lbd'
    using get_LBD_ref[of lbd]
    by (auto simp: lbd_empty_def lbd_ref_def)
  done

end