Theory BPlusTree_Set
theory BPlusTree_Set
  imports
    BPlusTree_Split
    "HOL-Data_Structures.Set_Specs"
begin
section "Set interpretation"
lemma insert_list_length[simp]:
  assumes "sorted_less ks"
    and "set (insert_list k ks) = set ks ∪ {k}"
    and "sorted_less ks ⟹ sorted_less (insert_list k ks)"
  shows "length (insert_list k ks) = length ks + (if k ∈ set ks then 0 else 1)"
proof -
  have "distinct (insert_list k ks)"
    using assms(1) assms(3) strict_sorted_iff by blast
  then have "length (insert_list k ks) = card (set (insert_list k ks))"
    by (simp add: distinct_card)
  also have "… = card (set ks ∪ {k})"
    using assms(2) by presburger
  also have "… = card (set ks) + (if k ∈ set ks then 0 else 1)"
    by (cases "k ∈ set ks") (auto simp add: insert_absorb)
  also have "… = length ks + (if k ∈ set ks then 0 else 1)"
    using assms(1) distinct_card strict_sorted_iff by auto
  finally show ?thesis.
qed
lemma delete_list_length[simp]:
  assumes "sorted_less ks"
    and "set (delete_list k ks) = set ks - {k}"
    and "sorted_less ks ⟹ sorted_less (delete_list k ks)"
  shows "length (delete_list k ks) = length ks - (if k ∈ set ks then 1 else 0)"
proof -
  have "distinct (delete_list k ks)"
    using assms(1) assms(3) strict_sorted_iff by blast
  then have "length (delete_list k ks) = card (set (delete_list k ks))"
    by (simp add: distinct_card)
  also have "… = card (set ks - {k})"
    using assms(2) by presburger
  also have "… = card (set ks) - (if k ∈ set ks then 1 else 0)"
    by (cases "k ∈ set ks") (auto)
  also have "… = length ks - (if k ∈ set ks then 1 else 0)"
    by (metis assms(1) distinct_card strict_sorted_iff)
  finally show ?thesis.
qed
lemma ins_list_length[simp]:
  assumes "sorted_less ks"
  shows "length (ins_list k ks) = length ks + (if k ∈ set ks then 0 else 1)"
  using insert_list_length[of ks ins_list k]
  by (simp add: assms set_ins_list sorted_ins_list)
lemma del_list_length[simp]:
  assumes "sorted_less ks"
  shows "length (del_list k ks) = length ks - (if k ∈ set ks then 1 else 0)"
  using delete_list_length[of ks ins_list k]
  by (simp add: assms set_del_list sorted_del_list)
locale split_set = split_tree: split_tree split
  for split::
    "('a bplustree × 'a::{linorder,order_top}) list ⇒ 'a
       ⇒ ('a bplustree × 'a) list × ('a bplustree × 'a) list" +
  fixes isin_list ::  "'a ⇒ ('a::{linorder,order_top}) list ⇒ bool"
  and insert_list ::  "'a ⇒ ('a::{linorder,order_top}) list ⇒ 'a list"
  and delete_list ::  "'a ⇒ ('a::{linorder,order_top}) list ⇒ 'a list"
  assumes insert_list_req:
    
    "sorted_less ks ⟹ isin_list x ks = (x ∈ set ks)"
    "sorted_less ks ⟹ insert_list x ks = ins_list x ks"
    "sorted_less ks ⟹ delete_list x ks = del_list x ks"
begin
lemmas split_req = split_tree.split_req
lemmas split_conc = split_tree.split_req(1)
lemmas split_sorted = split_tree.split_req(2,3)
lemma insert_list_length[simp]:
  assumes "sorted_less ks"
  shows "length (insert_list k ks) = length ks + (if k ∈ set ks then 0 else 1)"
  using insert_list_req
  by (simp add: assms)
lemma set_insert_list[simp]:
  "sorted_less ks ⟹ set (insert_list k ks) = set ks ∪ {k}"
  by (simp add: insert_list_req set_ins_list)
lemma sorted_insert_list[simp]:
  "sorted_less ks ⟹ sorted_less (insert_list k ks)"
  by (simp add: insert_list_req sorted_ins_list)
lemma delete_list_length[simp]:
  assumes "sorted_less ks"
  shows "length (delete_list k ks) = length ks - (if k ∈ set ks then 1 else 0)"
  using insert_list_req
  by (simp add: assms)
lemma set_delete_list[simp]:
  "sorted_less ks ⟹ set (delete_list k ks) = set ks - {k}"
  by (simp add: insert_list_req set_del_list)
lemma sorted_delete_list[simp]:
  "sorted_less ks ⟹ sorted_less (delete_list k ks)"
  by (simp add: insert_list_req sorted_del_list)
definition "empty_bplustree = (Leaf [])"
subsection "Membership"
fun isin:: "'a bplustree ⇒ 'a ⇒ bool" where
  "isin (Leaf ks) x = (isin_list x ks)" |
  "isin (Node ts t) x = (
      case split ts x of (_,(sub,sep)#rs) ⇒ (
             isin sub x
      )
   | (_,[]) ⇒ isin t x
  )"
text "Isin proof"
thm isin_simps
  
lemma sorted_ConsD: "sorted_less (y # xs) ⟹ x ≤ y ⟹ x ∉ set xs"
  by (auto simp: sorted_Cons_iff)
lemma sorted_snocD: "sorted_less (xs @ [y]) ⟹ y ≤ x ⟹ x ∉ set xs"
  by (auto simp: sorted_snoc_iff)
lemmas isin_simps2 = sorted_lems sorted_ConsD sorted_snocD
  
lemma isin_sorted: "sorted_less (xs@a#ys) ⟹
  (x ∈ set (xs@a#ys)) = (if x < a then x ∈ set xs else x ∈ set (a#ys))"
  by (auto simp: isin_simps2)
lemma isin_sorted_split:
  assumes "Laligned (Node ts t) u"
    and "sorted_less (leaves (Node ts t))"
    and "split ts x = (ls, rs)"
  shows "x ∈ set (leaves (Node ts t)) = (x ∈ set (leaves_list rs @ leaves t))"
proof (cases ls)
  case Nil
  then have "ts = rs"
    using assms by (auto dest!: split_conc)
  then show ?thesis by simp
next
  case Cons
  then obtain ls' sub sep where ls_tail_split: "ls = ls' @ [(sub,sep)]"
    by (metis list.simps(3) rev_exhaust surj_pair)
  then have x_sm_sep: "sep < x"
    using split_req(2)[of ts x ls' sub sep rs]
    using Laligned_sorted_separators[OF assms(1)]
    using assms sorted_cons sorted_snoc 
    by blast
  moreover have leaves_split: "leaves (Node ts t) = leaves_list ls @ leaves_list rs @ leaves t"
    using assms(3) split_tree.leaves_split by blast
  then show ?thesis
  proof (cases "leaves_list ls")
    case Nil
    then show ?thesis
      using leaves_split by auto
  next
    case Cons
    then obtain leavesls' l' where leaves_tail_split: "leaves_list ls = leavesls' @ [l']"
      by (metis list.simps(3) rev_exhaust)
    then have "l' ≤ sep"
    proof -
      have "l' ∈ set (leaves_list ls)"
        using leaves_tail_split by force
      then have "l' ∈ set (leaves (Node ls' sub))"
        using ls_tail_split
        by auto
      moreover have "Laligned (Node ls' sub) sep" 
        using assms split_conc[OF assms(3)] Cons ls_tail_split
        using Laligned_split_left[of ls' sub sep rs t u]
        by simp
      ultimately show ?thesis
        using Laligned_leaves_inbetween[of "Node ls' sub" sep]
        by blast
    qed
    then show ?thesis
      using assms(2) ls_tail_split leaves_tail_split leaves_split x_sm_sep
      using isin_sorted[of "leavesls'" l' "leaves_list rs @ leaves t" x]
      by auto
  qed
qed
lemma isin_sorted_split_right:
  assumes "split ts x = (ls, (sub,sep)#rs)"
    and "sorted_less (leaves (Node ts t))"
    and "Laligned (Node ts t) u"
  shows "x ∈ set (leaves_list ((sub,sep)#rs) @ leaves t) = (x ∈ set (leaves sub))"
proof -
  from assms have "x ≤ sep"
  proof -
    from assms have "sorted_less (separators ts)"
      by (meson Laligned_sorted_inorder sorted_cons sorted_inorder_separators sorted_snoc)
    then show ?thesis
      using split_req(3)
      using assms
      by fastforce
  qed
  moreover have leaves_split: "leaves (Node ts t) = leaves_list ls @ leaves sub @ leaves_list rs @ leaves t"
    using split_conc[OF assms(1)] by auto
  ultimately show ?thesis
  proof (cases "leaves_list rs @ leaves t")
    case Nil
    then show ?thesis 
      using leaves_split by auto
  next
    case (Cons r' rs')
    then have "sep < r'"
      by (metis Laligned_split_right aligned_leaves_inbetween assms(1) assms(3) leaves.simps(2) list.set_intros(1) split_set.split_conc split_set_axioms)
    then have  "x < r'"
      using ‹x ≤ sep› by auto
    moreover have "sorted_less (leaves_list ((sub,sep)#rs) @ leaves t)"
      using assms sorted_wrt_append split_conc
      by fastforce
    ultimately show ?thesis
      using isin_sorted[of "leaves sub" "r'" "rs'" x] Cons 
      by auto
  qed
qed
theorem isin_set_inorder: 
  assumes "sorted_less (leaves t)"
    and "aligned l t u"
  shows "isin t x = (x ∈ set (leaves t))"
  using assms
proof(induction t x arbitrary: l u rule: isin.induct)
  case (2 ts t x)
  then obtain ls rs where list_split: "split ts x = (ls, rs)"
    by (meson surj_pair)
  then have list_conc: "ts = ls @ rs" 
    using split_conc by auto
  show ?case
  proof (cases rs)
    case Nil
    then have "isin (Node ts t) x = isin t x"
      by (simp add: list_split)
    also have "… = (x ∈ set (leaves t))"
      using "2.IH"(1)[of ls rs] list_split Nil
      using "2.prems" sorted_leaves_induct_last align_last'
      by metis
    also have "… = (x ∈ set (leaves (Node ts t)))"
      using isin_sorted_split
      using "2.prems" list_split list_conc Nil
      by (metis aligned_imp_Laligned leaves.simps(2) leaves_conc same_append_eq self_append_conv)
    finally show ?thesis .
  next
    case (Cons a list)
    then obtain sub sep where a_split: "a = (sub,sep)"
      by (cases a)
      then have "isin (Node ts t) x = isin sub x"
        using list_split Cons a_split
        by auto
      also have "… = (x ∈ set (leaves sub))"
        using "2.IH"(2)[of ls rs "(sub,sep)" list sub sep]
        using "2.prems" a_split list_conc list_split local.Cons sorted_leaves_induct_subtree
              align_sub
        by (metis in_set_conv_decomp)
      also have "… = (x ∈ set (leaves (Node ts t)))"
        using isin_sorted_split
        using isin_sorted_split_right "2.prems" list_split Cons a_split
        using aligned_imp_Laligned by blast
      finally show ?thesis  .
    qed
qed (auto simp add: insert_list_req)
theorem isin_set_Linorder: 
  assumes "sorted_less (leaves t)"
    and "Laligned t u"
  shows "isin t x = (x ∈ set (leaves t))"
  using assms
proof(induction t x arbitrary: u rule: isin.induct)
  case (2 ts t x)
  then obtain ls rs where list_split: "split ts x = (ls, rs)"
    by (meson surj_pair)
  then have list_conc: "ts = ls @ rs" 
    using split_conc by auto
  show ?case
  proof (cases rs)
    case Nil
    then have "isin (Node ts t) x = isin t x"
      by (simp add: list_split)
    also have "… = (x ∈ set (leaves t))"
      by (metis "2.IH"(1) "2.prems"(1) "2.prems"(2) Lalign_Llast list_split local.Nil sorted_leaves_induct_last)
    also have "… = (x ∈ set (leaves (Node ts t)))"
      using isin_sorted_split
      using "2.prems" list_split list_conc Nil
      by simp
    finally show ?thesis .
  next
    case (Cons a list)
    then obtain sub sep where a_split: "a = (sub,sep)"
      by (cases a)
      then have "isin (Node ts t) x = isin sub x"
        using list_split Cons a_split
        by auto
      also have "… = (x ∈ set (leaves sub))"
        using "2.IH"(2)[of ls rs "(sub,sep)" list sub sep]
        using "2.prems" a_split list_conc list_split local.Cons sorted_leaves_induct_subtree
              align_sub
        by (metis Lalign_Llast Laligned_split_left)
      also have "… = (x ∈ set (leaves (Node ts t)))"
        using isin_sorted_split
        using isin_sorted_split_right "2.prems" list_split Cons a_split
        by simp
      finally show ?thesis  .
    qed
qed (auto simp add: insert_list_req)
corollary isin_set_Linorder_top: 
  assumes "sorted_less (leaves t)"
    and "Laligned t top"
  shows "isin t x = (x ∈ set (leaves t))"
  using assms isin_set_Linorder
  by simp
subsection "Insertion"
text "The insert function requires an auxiliary data structure
and auxiliary invariant functions."