Theory Generic_Join
section ‹The algorithm›
theory Generic_Join
  imports MFOTL_Monitor.Table
begin
type_synonym 'a atable = "nat set × 'a table"
type_synonym 'a query = "'a atable set"
type_synonym vertices = "nat set"
subsection ‹Generic algorithm›
locale getIJ =
  fixes getIJ :: "'a query ⇒ 'a query ⇒ vertices ⇒ vertices × vertices"
  assumes coreProperties: "card V ≥ 2 ⟹ getIJ Q_pos Q_neg V = (I, J) ⟹
    card I ≥ 1 ∧ card J ≥ 1 ∧ V = I ∪ J ∧ I ∩ J = {}"
begin
lemma getIJProperties:
  assumes "card V ≥ 2"
  assumes "(I, J) = getIJ Q_pos Q_neg V"
  shows "card I ≥ 1" and "card J ≥ 1" and "card I < card V" and "card J < card V"
    and "V = I ∪ J" and "I ∩ J = {}"
proof -
  show "1 ≤ card I" using coreProperties[of V Q_pos Q_neg I J] assms by auto
  show "1 ≤ card J" using coreProperties[of V Q_pos Q_neg I J] assms by auto
  show "card I < card V" by (metis (no_types, lifting) Int_ac(3) One_nat_def Suc_le_lessD assms(1)
        assms(2) card_gt_0_iff card_seteq dual_order.trans getIJ.coreProperties getIJ_axioms leI
        le_iff_inf one_le_numeral sup_ge1 sup_ge2)
  show "card J < card V" by (metis One_nat_def Suc_1 assms(1) assms(2) card_gt_0_iff card_seteq
        getIJ.coreProperties getIJ_axioms leI le_0_eq le_iff_inf nat.simps(3) sup_ge1 sup_ge2)
  show "V = I ∪ J" by (metis assms(1) assms(2) getIJ.coreProperties getIJ_axioms)
  show "I ∩ J = {}" by (metis assms(1) assms(2) getIJ_axioms getIJ_def)
qed
fun projectTable :: "vertices ⇒ 'a atable ⇒ 'a atable" where
  "projectTable V (s, t) = (s ∩ V, Set.image (restrict V) t)"
fun filterQuery :: "vertices ⇒ 'a query ⇒ 'a query" where
  "filterQuery V Q = Set.filter (λ(s, _). ¬ Set.is_empty (s ∩ V)) Q"
fun filterQueryNeg :: "vertices ⇒ 'a query ⇒ 'a query" where
  "filterQueryNeg V Q = Set.filter (λ(A, _). A ⊆ V) Q"
fun projectQuery :: "vertices ⇒ 'a query ⇒ 'a query" where
  "projectQuery V s = Set.image (projectTable V) s"
fun isSameIntersection :: "'a tuple ⇒ nat set ⇒ 'a tuple ⇒ bool" where
  "isSameIntersection t1 s t2 = (∀i∈s. t1!i = t2!i)"
fun semiJoin :: "'a atable ⇒ (nat set × 'a tuple) ⇒ 'a atable" where
  "semiJoin (s, tab) (st, tup) = (s, Set.filter (isSameIntersection tup (s ∩ st)) tab)"
fun newQuery :: "vertices ⇒ 'a query ⇒ (nat set × 'a tuple) ⇒ 'a query" where
  "newQuery V Q (st, t) = Set.image (λtab. projectTable V (semiJoin tab (st, t))) Q"
fun merge_option :: "'a option × 'a option ⇒ 'a option" where
  "merge_option (None, None) = None"
| "merge_option (Some x, None) = Some x"
| "merge_option (None, Some x) = Some x"
| "merge_option (Some a, Some b) = Some a"
fun merge :: "'a tuple ⇒ 'a tuple ⇒ 'a tuple" where
  "merge t1 t2 = map merge_option (zip t1 t2)"
function (sequential) genericJoin :: "vertices ⇒ 'a query ⇒ 'a query ⇒ 'a table" where
  "genericJoin V Q_pos Q_neg =
    (if card V ≤ 1 then
      (⋂(_, x) ∈ Q_pos. x) - (⋃(_, x) ∈ Q_neg. x)
    else
      let (I, J) = getIJ Q_pos Q_neg V in
      let Q_I_pos = projectQuery I (filterQuery I Q_pos) in
      let Q_I_neg = filterQueryNeg I Q_neg in
      let R_I = genericJoin I Q_I_pos Q_I_neg in
      let Q_J_neg = Q_neg - Q_I_neg in
      let Q_J_pos = filterQuery J Q_pos in
      let X = {(t, genericJoin J (newQuery J Q_J_pos (I, t)) (newQuery J Q_J_neg (I, t))) | t . t ∈ R_I} in
      (⋃(t, x) ∈ X. {merge xx t | xx . xx ∈ x}))"
by pat_completeness auto
termination
  by (relation "measure (λ(V, Q_pos, Q_neg). card V)") (auto simp add: getIJProperties)
fun wrapperGenericJoin :: "'a query ⇒ 'a query ⇒ 'a table" where
  "wrapperGenericJoin Q_pos Q_neg =
    (if ((∃(A, X)∈Q_pos. Set.is_empty X) ∨ (∃(A, X)∈Q_neg. Set.is_empty A ∧ ¬ Set.is_empty X)) then
      {}
    else
      let Q = Set.filter (λ(A, _). ¬ Set.is_empty A) Q_pos in
      if Set.is_empty Q then
        (⋂(A, X)∈Q_pos. X) -  (⋃(A, X)∈Q_neg. X)
      else
        let V = (⋃(A, X)∈Q. A) in
        let Qn = Set.filter (λ(A, _). A ⊆ V ∧ card A ≥ 1) Q_neg in
        genericJoin V Q Qn)"
end
subsection ‹An instantation›
fun score :: "'a query ⇒ nat ⇒ nat" where
  "score Q i = (let relevant = Set.image (λ(_, x). card x) (Set.filter (λ(sign, _). i ∈ sign) Q) in
    let l = sorted_list_of_set relevant in
    foldl (+) 0 l
)"
fun arg_max_list :: "('a ⇒ nat) ⇒ 'a list ⇒ 'a" where
  "arg_max_list f l = (let m = Max (set (map f l)) in arg_min_list (λx. m - f x) l)"
lemma arg_max_list_element:
  assumes "length l ≥ 1" shows "arg_max_list f l ∈ set l"
  by (metis One_nat_def arg_max_list.simps arg_min_list_in assms le_imp_less_Suc less_irrefl list.size(3))
fun max_getIJ :: "'a query ⇒ 'a query ⇒ vertices ⇒ vertices × vertices" where
  "max_getIJ Q_pos Q_neg V = (
  let l = sorted_list_of_set V in
  if Set.is_empty Q_neg then
    let x = arg_max_list (score Q_pos) l in
    ({x}, V - {x})
  else
    let x = arg_max_list (score Q_neg) l in
    (V - {x}, {x}))"
lemma max_getIJ_coreProperties:
  assumes "card V ≥ 2"
  assumes "(I, J) = max_getIJ Q_pos Q_neg V"
  shows "card I ≥ 1 ∧ card J ≥ 1 ∧ V = I ∪ J ∧ I ∩ J = {}"
proof -
  have "finite V" using assms(1) card.infinite by force
  define l where "l = sorted_list_of_set V"
  then have "length l ≥ 1" by (metis Suc_1 Suc_le_lessD ‹finite V› assms(1) distinct_card
        distinct_sorted_list_of_set less_imp_le set_sorted_list_of_set)
  show ?thesis
  proof (cases "Set.is_empty Q_neg")
    case True
    define x where "x = arg_max_list (score Q_pos) l"
    then have "x ∈ (set l)" using ‹1 ≤ length l› arg_max_list_element by blast
    then have "x ∈ V" by (simp add: ‹finite V› l_def)
    moreover have "(I, J) = ({x}, V - {x})" 
    proof -
      have "(I, J) =  (let l = sorted_list_of_set V in
    let x = arg_max_list (score Q_pos) l in
    ({x}, V - {x}))" by (simp add: True assms(2))
      then show ?thesis by (metis l_def x_def)
    qed
    then show ?thesis using Pair_inject ‹finite V› assms(1) calculation by auto
  next
    case False
    define x where "x = arg_max_list (score Q_neg) l"
    then have "x ∈ (set l)" using ‹1 ≤ length l› arg_max_list_element by blast
    then have "x ∈ V" by (simp add: ‹finite V› l_def)
    moreover have "(I, J) = (V - {x}, {x})"
    proof -
      have "(I, J) = (let l = sorted_list_of_set V in
  let x = arg_max_list (score Q_neg) l in (V - {x}, {x}))" by (simp add: False assms(2))
      then show ?thesis by (metis l_def x_def)
    qed
    then show ?thesis using Pair_inject ‹finite V› assms(1) calculation by auto
  qed
qed