Theory Tseytin

subsubsection‹Tseytin transformation›

theory Tseytin
imports Formulas CNF_Formulas
begin

text‹The @{const cnf} transformation clearly has exponential complexity.
If the intention is to use Resolution to decide validity of a formula,
that is clearly a deal-breaker for any practical implementation,
since validity can be decided by brute force in exponential time.
This theory pair shows the Tseytin transformation, a way to transform a formula
while preserving validity.
The @{const cnf} of the transformed formula will have clauses with maximally 3 atoms,
and an amount of clauses linear in the size of the formula,
at the cost of introducing one new atom for each subformula of F› (i.e. @{term "size F"} many).
›
  
definition "pair_fun_upd f p  (case p of (k,v)  fun_upd f k v)"

lemma fold_pair_upd_triv: "A  fst ` set U  foldl pair_fun_upd F U A = F A"
  by(induction U arbitrary: F; simp)
    (metis fun_upd_apply pair_fun_upd_def prod.simps(2) surjective_pairing)

lemma distinct_pair_update_one: "(k,v)  set U  distinct (map fst U)  foldl pair_fun_upd F U k = v"
  by(induction U arbitrary: F; clarsimp simp add: pair_fun_upd_def fold_pair_upd_triv split: prod.splits) 
    (insert fold_pair_upd_triv, fastforce)
    
lemma distinct_zipunzip: "distinct xs  distinct (map fst (zip xs ys))" by (simp add: distinct_conv_nth)
    
(* hm, this foldl pair_fun_upd business might have been a bad idea. *)
lemma foldl_pair_fun_upd_map_of: "distinct (map fst U)  foldl pair_fun_upd F U = (λk. case map_of U k of Some v  v | None  F k)"
  by(unfold fun_eq_iff; induction U arbitrary: F; clarsimp split: option.splits simp: pair_fun_upd_def rev_image_eqI)

lemma map_of_map_apsnd: "map_of (map (apsnd t) M) = map_option t  (map_of M)"
  by(unfold fun_eq_iff comp_def; induction M; simp)
    

definition biimp (infix "" 67) where "F  G  (F  G)  (G  F)"
lemma atoms_biimp[simp]: "atoms (F  G) = atoms F  atoms G"
  unfolding biimp_def by auto
lemma biimp_size[simp]: "size (F  G) = (2 * (size F + size G)) + 3"
  by(simp add: biimp_def)
  
locale freshstuff =
  fixes fresh :: "'a set  'a"
  assumes isfresh: "finite S  fresh S  S"
begin
  
primrec nfresh where
"nfresh S 0 = []" |
"nfresh S (Suc n) = (let f = fresh S in f # nfresh (f  S) n)"

lemma length_nfresh: "length (nfresh S n) = n"
  by(induction n arbitrary: S; simp add: Let_def)
    
lemma nfresh_isfresh: "finite S  set (nfresh S n)  S = {}"
  by(induction n arbitrary: S; auto simp add: Let_def isfresh)
    
lemma nfresh_distinct: "finite S  distinct (nfresh S n)"
  by(induction n arbitrary: S; simp add: Let_def; meson disjoint_iff_not_equal finite_insert insertI1 nfresh_isfresh)

definition "tseytin_assmt F  let SF = remdups (subformulae F) in zip (nfresh (atoms F) (length SF)) SF"

lemma tseytin_assmt_distinct: "distinct (map fst (tseytin_assmt F))"
  unfolding tseytin_assmt_def using nfresh_distinct by (simp add: Let_def length_nfresh)
    
lemma tseytin_assmt_has: "G  set (subformulae F)  n. (n,G)  set (tseytin_assmt F)"
proof -
  assume "G  set (subformulae F)"
  then have "n. G = subformulae F ! n  n < length (subformulae F)"
    by (simp add: set_conv_nth)
  then have "a. (a, G)  set (zip (nfresh (atoms F) (length (subformulae F))) (subformulae F))"
    by (metis (no_types) in_set_zip length_nfresh prod.sel(1) prod.sel(2))
  thus ?thesis by(simp add: tseytin_assmt_def Let_def) (metis fst_conv in_set_conv_nth in_set_zip length_nfresh set_remdups snd_conv)
qed

lemma tseytin_assmt_new_atoms: "(k,l)  set (tseytin_assmt F)  k  atoms F"
  unfolding tseytin_assmt_def Let_def using nfresh_isfresh by (fastforce dest: set_zip_leftD)

primrec tseytin_tran1 where
"tseytin_tran1 S (Atom k) = [Atom k  S (Atom k)]" |
"tseytin_tran1 S  = [  S ]" |
"tseytin_tran1 S (Not F) = [S (Not F)  Not (S F)] @ tseytin_tran1 S F" |
"tseytin_tran1 S (And F G) = [S (And F G)  And (S F) (S G)] @ tseytin_tran1 S F @ tseytin_tran1 S G" |
"tseytin_tran1 S (Or F G) = [S (Or F G)  Or (S F) (S G)] @ tseytin_tran1 S F @ tseytin_tran1 S G" |
"tseytin_tran1 S (Imp F G) = [S (Imp F G)  Imp (S F) (S G)] @ tseytin_tran1 S F @ tseytin_tran1 S G"
definition "tseytin_toatom F  Atom  the  map_of (map (λ(a,b). (b,a)) (tseytin_assmt F))"
definition "tseytin_tran F  (let S = tseytin_toatom F in S F # tseytin_tran1 S F)"
  
lemma distinct_snd_tseytin_assmt: "distinct (map snd (tseytin_assmt F))" 
  unfolding tseytin_assmt_def by(simp add: Let_def length_nfresh)

lemma tseytin_assmt_backlookup: assumes "J  set (subformulae F)" 
  shows "(the (map_of (map (λ(a, b). (b, a)) (tseytin_assmt F)) J), J)  set (tseytin_assmt F)"
proof -
  have 1: "distinct (map snd M)  J  snd ` set M  (the (map_of (map (λ(a, b). (b, a)) M) J), J)  set M" for J M
    by(induction M; clarsimp split: prod.splits)
  have 2: "J  set (subformulae F)  J  snd ` set (tseytin_assmt F)" for J using image_iff tseytin_assmt_has by fastforce
  from 1[OF distinct_snd_tseytin_assmt 2, OF assms] show ?thesis .
qed

lemma tseytin_tran_small_clauses: "C  cnf (nnf (tseytin_tran F)). card C  3"
proof -
  have 3: "card S  2  card (a  S)  3" for a S 
    by(cases "finite S"; simp add: card_insert_le_m1)
  have 2: "card S  1  card (a  S)  2" for a S 
    by(cases "finite S"; simp add: card_insert_le_m1)
  have 1: "card S  0  card (a  S)  1" for a S 
    by(cases "finite S"; simp add: card_insert_le_m1)
  have *: "G  set (tseytin_tran1 (Atom  S) F); C  cnf (nnf G)  card C  3" for G C S
    by(induction F arbitrary: G; simp add: biimp_def; (elim disjE exE conjE | intro 1 2 3 | simp)+)
  show ?thesis
    unfolding tseytin_tran_def  tseytin_toatom_def Let_def
    by(clarsimp simp add: cnf_BigAnd nnf_BigAnd comp_assoc *)
qed
  
lemma tseytin_tran_few_clauses: "card (cnf (nnf (tseytin_tran F)))  3 * size F + 1"
proof -
  have "size Bot = 1" by simp  (* just a thing to keep in mind. *)
  have ws: "{c  D |D. D = {c1}  D = {c2}} = {{c,c1},{c,c2}}" for c1 c2 c by auto
  have grr: "Suc (card S)  c  card (a  S)  c" for a S c
    by(cases "finite S"; simp add: card_insert_le_m1)
  have *: "card (aset (tseytin_tran1 (Atom  S) F). cnf (nnf a))  3 * size F" for S
    by(induction F; simp add: biimp_def; ((intro grr card_Un_le[THEN le_trans] | simp add: ws)+)?) 
  show ?thesis
    unfolding tseytin_tran_def tseytin_toatom_def Let_def
    by(clarsimp simp: nnf_BigAnd cnf_BigAnd; intro grr; simp add: comp_assoc *)
qed
  
lemma tseytin_tran_new_atom_count: "card (atoms (tseytin_tran F))  size F + card (atoms F)"
proof -
  have tseytin_tran1_atoms: "H  set (tseytin_tran1 (tseytin_toatom F) G)  G  set (subformulae F) 
    atoms H  atoms F  (I  set (subformulae F). atoms (tseytin_toatom F I))" for G H
  proof(induction G arbitrary: H)
    case (Atom k)
    hence "k  atoms F"
      by simp (meson formula.set_intros(1) rev_subsetD subformula_atoms)
    with Atom show ?case by simp blast
  next
    case Bot then show ?case by simp blast
  next
    case (Not G)
    show ?case by(insert Not.prems(1,2); 
         frule subformulas_in_subformulas; simp; elim disjE; (elim Not.IH | force))
  next
    case (And G1 G2)
    show ?case by(insert And.prems(1,2); 
         frule subformulas_in_subformulas; simp; elim disjE; (elim And.IH; simp | force))
  next
    case (Or G1 G2)
    show ?case by(insert Or.prems(1,2); 
         frule subformulas_in_subformulas; simp; elim disjE; (elim Or.IH; simp | force))
  next
    case (Imp G1 G2)
    show ?case by(insert Imp.prems(1,2); 
         frule subformulas_in_subformulas; simp; elim disjE; (elim Imp.IH; simp | force))
  qed
  have tseytin_tran1_atoms: "(Gset (tseytin_tran1 (tseytin_toatom F) F). atoms G) 
      atoms F  (Iset (subformulae F). atoms (tseytin_toatom F I))"
    using tseytin_tran1_atoms[OF _ subformulae_self] by blast
  have 1: "card (atoms (tseytin_tran F)) 
  card (atoms (tseytin_toatom F F)  (xset (tseytin_tran1 (tseytin_toatom F) F). atoms x))" 
    unfolding tseytin_tran_def by(simp add: Let_def tseytin_tran1_atoms)
  have 2: "atoms (tseytin_toatom F F)  (xset (tseytin_tran1 (tseytin_toatom F) F). atoms x) 
     (atoms F  (Iset (subformulae F). atoms (tseytin_toatom F I)))"
    using tseytin_tran1_atoms by blast
  have twofin: "finite (atoms F  (Iset (subformulae F). atoms (tseytin_toatom F I)))" by simp
  have card_subformulae: "card (set (subformulae F))  size F" using length_subformulae by (metis card_length)
  have card_singleton_union: "finite S  card (xS. {f x})  card S" for f S 
    by(induction S rule: finite_induct; simp add: card_insert_if)
  have 3: "card (Iset (subformulae F). atoms (tseytin_toatom F I))  size F"
    unfolding tseytin_toatom_def using le_trans[OF card_singleton_union card_subformulae]
    by simp fast
  have 4: "card (atoms (tseytin_tran F))  card (atoms F) + card (fset (subformulae F). atoms (tseytin_toatom F f))"
    using le_trans[OF 1 card_mono[OF twofin 2]] card_Un_le le_trans by blast
  show ?thesis using 3 4 by linarith
qed
    
end

definition "freshnat S   Suc (Max (0  S))"
primrec nfresh_natcode where
"nfresh_natcode S 0 = []" |
"nfresh_natcode S (Suc n) = (let f = freshnat S in f # nfresh_natcode (f  S) n)"
interpretation freshnats: freshstuff freshnat unfolding freshnat_def by standard (meson Max_ge Suc_n_not_le_n finite_insert insertCI)
(* interpreting the locale is easy. making it executable… not so. *)
lemma [code_unfold]: "freshnats.nfresh = nfresh_natcode"
proof -
  have "freshnats.nfresh S n  = nfresh_natcode S n" for S n by(induction n arbitrary: S; simp)
  thus ?thesis by auto
qed
lemmas freshnats_code[code_unfold] = freshnats.tseytin_tran_def freshnats.tseytin_toatom_def freshnats.tseytin_assmt_def freshnats.nfresh.simps
  
lemma "freshnats.tseytin_tran (Atom 0  (¬ (Atom 1))) = [
  Atom 2,
  Atom 2  Atom 3  Atom 4,
  Atom 0  Atom 3,
  Atom 4  ¬ (Atom 5),
  Atom 1  Atom 5
]" (is "?l = ?r")
proof -
  have "cnf (nnf ?r) =
    {{Pos 2},
     {Neg 4, Pos 2}, {Pos 3, Pos 2}, {Neg 2, Neg 3, Pos 4},
     {Neg 3, Pos 0}, {Neg 0, Pos 3},
     {Pos 5, Pos 4}, {Neg 4, Neg 5},
     {Neg 5, Pos 1}, {Neg 1, Pos 5}}" by eval
  have ?thesis by eval
  show ?thesis by code_simp
qed
    
end