Theory Chomsky_Normal_Form
section ‹Conversion to Chomsky Normal Form›
theory Chomsky_Normal_Form
imports
Unit_Elimination
Epsilon_Elimination
Replace_Terminals
begin
text ‹The conversion to Chomsky Normal Form (CNF) is achieved by, in that order,
epsilon and unit elimination, uniformization and binarization.
A production ‹A → α› is
➧ uniform if ‹α› contains no terminal unless \<^prop>‹length α = 1›,
➧ binary if \<^prop>‹length α ≤ 2›.
The start symbol ‹S› is passed around explicitly to avoid generating ‹S› as a fresh name.
Of course the nonterminals in the productions ‹ps› are avoided. However,
if \<^prop>‹S ∉ Nts(set ps)› or \<^prop>‹lang ps S = {[]}›
(in which case epsilon elimination eliminates ‹S›),
‹S› could accidentally be generated as a fresh name.
One could perform the CNF conversion without avoiding ‹S› explicitly.
As a result one would get a CNF that is independent of ‹S› (in contrast to now),
but would need to add the preconditions \<^prop>‹S ∈ Nts(set ps)› and \<^prop>‹lang ps S ≠ {[]}›,
which would also be inherited by any application of the CNF conversion.
›
definition CNF :: "('n, 't) Prods ⇒ bool" where
"CNF P = (∀(A,α) ∈ P. (∃B C. α = [Nt B, Nt C]) ∨ (∃a. α = [Tm a]))"
subsection ‹Uniformization›
definition uniform :: "('n, 't) Prods ⇒ bool" where
"uniform P ≡ ∀(A, α) ∈ P. (∄t. Tm t ∈ set α) ∨ (∃t. α = [Tm t])"
definition Bad_tms :: "('n,'t) Prods ⇒ 't set" where
"Bad_tms P = (⋃(A,α) ∈ P. if length α ≥ 2 then Tms_syms α else {})"
definition bad_tms :: "('n,'t) prods ⇒ 't list" where
"bad_tms ps = remdups(concat ((map tms_syms o filter (λu. length u ≥ 2) o map snd) ps))"
lemma set_bad_tms: "set(bad_tms ps) = Bad_tms (set ps)"
unfolding Bad_tms_def bad_tms_def
by (auto simp: set_tms_syms split: if_splits)
definition replace_Tm_2_syms where
"replace_Tm_2_syms f xs = (if length xs < 2 then xs else map (replace_Tm_sym f) xs)"
definition Replace_Tm_2 :: "('t ⇀ 'n) ⇒ ('n,'t) Prods ⇒ ('n,'t) Prods" where
[code_unfold]: "Replace_Tm_2 f = Replace_Tm f (replace_Tm_2_syms f)"
definition replace_Tm_2 :: "('t × 'n) list ⇒ ('n,'t) prods ⇒ ('n,'t) prods" where
"replace_Tm_2 f = replace_Tm f (replace_Tm_2_syms (map_of f))"
lemma set_replace_Tm_2:
"distinct (map fst f) ⟹ set (replace_Tm_2 f ps) = Replace_Tm_2 (map_of f) (set ps)"
by (auto simp add: replace_Tm_2_def Replace_Tm_2_def set_replace_Tm)
lemma replace_Tm_2_syms_ops:
"replace_Tm_2_syms f α ∈ Replace_Tm_syms_ops f α"
proof (cases "length α < 2")
case False
thus ?thesis
by (simp add: replace_Tm_2_syms_def map_replace_Tm_sym_ops)
next
case True
thus ?thesis
by(cases α)
(auto simp: replace_Tm_2_syms_def Replace_Tm_sym_ops_def)
qed
lemma replace_Tm_2_ops:
"replace_Tm_2_syms f ∈ Replace_Tm_ops f"
by (simp add: Replace_Tm_ops_def replace_Tm_2_syms_ops)
corollary Lang_Replace_Tm_2:
assumes "inj_on f (dom f)" "ran f ∩ Nts P = {}" "A ∉ ran f"
shows "Lang (Replace_Tm_2 f P) A = Lang P A"
using Lang_Replace_Tm[OF replace_Tm_2_ops assms] by (simp add: Replace_Tm_2_def)
corollary lang_replace_Tm_2:
assumes dist: "distinct (map fst f)"
and inj: "inj_on (map_of f) (fst ` (set f))" and disj: "snd ` set f ∩ Nts(set ps) = {}"
and A: "A ∉ snd ` set f"
shows "lang (replace_Tm_2 f ps) A = lang ps A"
apply (unfold set_replace_Tm_2[OF dist])
apply (rule Lang_Replace_Tm_2)
using assms
by(simp_all add: dom_map_of_conv_image_fst ran_distinct)
lemma map_replace_Tm_sym_id: "α = map (replace_Tm_sym f) α ⟷ Tms_syms α ∩ dom f = {}"
by(induction α)(auto simp: replace_Tm_sym_def split: sym.split)
lemma uniform_Replace_Tm_2:
assumes Pf: "Bad_tms P ⊆ dom f" shows "uniform (Replace_Tm_2 f P)"
unfolding uniform_def
proof (safe del: disjCI)
fix A β assume Aβ: "(A,β) ∈ Replace_Tm_2 f P"
show "(∄t. Tm t ∈ set β) ∨ (∃t. β = [Tm t])"
proof(cases "(A,β) ∈ Replace_Tm_new f")
case True
then show ?thesis by (auto simp: Replace_Tm_new_def)
next
case False
with Aβ obtain α where Aα: "(A,α) ∈ P"
and [simp]: "β = (if length α < 2 then α else map (replace_Tm_sym f) α)"
by (auto simp: Replace_Tm_2_def Replace_Tm_def replace_Tm_2_syms_def)
show ?thesis
proof (cases "length α < 2")
case True
then show ?thesis
by (auto simp: numeral_2_eq_2 less_Suc_eq_le le_Suc_eq length_Suc_conv
replace_Tm_sym_def)
next
case False
{ fix a assume "Tm a ∈ set α"
with False Aα have "a ∈ Bad_tms P"
by (auto simp: Bad_tms_def Tms_syms_def split: prod.splits)
with Pf have "a ∈ dom f" by auto
} note * = this
show ?thesis
by (auto simp: False replace_Tm_sym_def dest!: * split: sym.splits)
qed
qed
qed
definition Uniformize :: "('n::fresh0) ⇒ 't list ⇒ ('n, 't) Prods ⇒ ('n, 't) Prods" where
[code_unfold]: "Uniformize S ts P = Replace_Tm_2 (fresh_map (insert S (Nts P)) ts) P"
lemma "Uniformize 0 [1,2] {(0::nat, [Tm 1, Tm (2::int)])} =
{(0, [Nt 1, Nt 2]), (1, [Tm 1]), (2, [Tm 2])}"
by eval
definition uniformize :: "('n::fresh0) ⇒ ('n, 't) prods ⇒ ('n, 't) prods" where
"uniformize S ps =
(let ts = bad_tms ps;
tmap = fresh_assoc (insert S (Nts(set ps))) ts
in replace_Tm_2 tmap ps)"
lemma "uniformize 0 [(0::nat, [Tm 1, Tm (2::int)])] =
[(0, [Nt 1, Nt 2]), (1, [Tm 1]), (2, [Tm 2])]"
by eval
lemma distinct_bad_tms: "distinct (bad_tms ps)"
by (simp add: bad_tms_def)
lemma set_uniformize: "set (uniformize S ps) = Uniformize S (bad_tms ps) (set ps)"
by (simp add: uniformize_def Uniformize_def
set_replace_Tm_2 map_fst_fresh_assoc distinct_bad_tms map_of_fresh_assoc)
lemma uniform_Uniformize: "Bad_tms P ⊆ set ts ⟹ uniform (Uniformize S ts P)"
by (simp add: Uniformize_def uniform_Replace_Tm_2 dom_fresh_map)
lemma uniform_uniformize: "uniform (set (uniformize S ps))"
by (simp add: set_uniformize uniform_Uniformize set_bad_tms)
lemma Lang_Uniformize:
assumes fin: "finite (Nts P)"
shows "A ∈ Nts P ∪ {S} ⟹ Lang (Uniformize S ts P) A = Lang P A"
apply (unfold Uniformize_def)
apply (subst Lang_Replace_Tm_2)
using fresh_map_disj[of "insert S (Nts P)" ts, simplified, OF fin]
by (auto simp: dom_fresh_map fresh_map_inj_on fin)
lemma lang_uniformize: "A ∈ Nts (set ps) ∪ {S} ⟹ lang (uniformize S ps) A = lang ps A"
by (auto simp: set_uniformize Lang_Uniformize finite_Nts)
lemma Eps_free_Uniformize: "Eps_free P ⟹ Eps_free (Uniformize S ts P)"
by (auto simp: Eps_free_def Uniformize_def
Replace_Tm_2_def Replace_Tm_def replace_Tm_2_syms_def Replace_Tm_new_def)
lemma eps_free_uniformize: "eps_free ps ⟹ eps_free (uniformize S ps)"
by (simp add: set_uniformize Eps_free_Uniformize)
lemma Unit_free_Uniformize: "Unit_free P ⟹ Unit_free (Uniformize S ts P)"
apply (unfold Unit_free_def)
by (auto simp add: Uniformize_def Replace_Tm_2_def Replace_Tm_def Replace_Tm_new_def replace_Tm_2_syms_def)
lemma Unit_free_uniformize: "Unit_free (set ps) ⟹ Unit_free (set (uniformize S ps))"
by (simp add: set_uniformize Unit_free_Uniformize)
text ‹The following is used to prove that binarization preserves uniformity.
The latter is characterized in terms of ‹badTmsCount = 0›.›
lemma Nts_correct: "A ∉ Nts P ⟹ (∄S α. (S, α) ∈ P ∧ (Nt A ∈ {Nt S} ∪ set α))"
unfolding Nts_def Nts_syms_def by auto
definition prodTms :: "('n,'t) prod ⇒ nat" where
"prodTms p ≡ (if length (snd p) ≤ 1 then 0 else length (filter (isTm) (snd p)))"
definition prodNts :: "('n,'t) prod ⇒ nat" where
"prodNts p ≡ (if length (snd p) ≤ 2 then 0 else length (filter (isNt) (snd p)))"
fun badTmsCount :: "('n,'t) Prods ⇒ nat" where
"badTmsCount P = sum prodTms P"
lemma uniform_badTmsCount: assumes "finite P"
shows "uniform P ⟷ badTmsCount P = 0"
proof
assume assm: "uniform P"
have "∀p ∈ P. prodTms p = 0"
proof
fix p assume "p ∈ P"
hence "(∄t. Tm t ∈ set (snd p)) ∨ (∃t. snd p = [Tm t])"
using assm unfolding uniform_def by auto
hence "length (snd p) ≤ 1 ∨ (∄t. Tm t ∈ set (snd p))"
by auto
hence "length (snd p) ≤ 1 ∨ length (filter (isTm) (snd p)) = 0"
unfolding isTm_def by (auto simp: filter_empty_conv)
thus "prodTms p = 0"
unfolding prodTms_def by argo
qed
thus "badTmsCount P = 0"
using assms by auto
next
assume assm: "badTmsCount P = 0"
have "∀p ∈ P. ((∄t. Tm t ∈ set (snd p)) ∨ (∃t. snd p = [Tm t]))"
proof
fix p assume "p ∈ P"
hence "prodTms p = 0"
using assm assms by auto
hence "length (snd p) ≤ 1 ∨ length (filter (isTm) (snd p)) = 0"
unfolding prodTms_def by argo
hence "length (snd p) ≤ 1 ∨ (∄t. Tm t ∈ set (snd p))"
by (auto simp: isTm_def filter_empty_conv)
hence "length (snd p) = 0 ∨ length (snd p) = 1 ∨ (∄t. Tm t ∈ set (snd p))"
using order_neq_le_trans by blast
thus "(∄t. Tm t ∈ set (snd p)) ∨ (∃t. snd p = [Tm t])"
by (auto simp: length_Suc_conv)
qed
thus "uniform P"
unfolding uniform_def by auto
qed
subsection ‹Binarization›
text ‹Binarization has two parts: a relational specification of what a single step in
the conversion should do and an executable function that performs the transitive-reflexive
closure of a single step. This way multiple functional implementations can be proved
correct more easily. The relational part is inherited from Aditi Barthwal's work.›
definition binary :: "('n, 't) Prods ⇒ bool" where
"binary P ≡ ∀(A, α) ∈ P. length α ≤ 2"
fun badNtsCount :: "('n,'t) Prods ⇒ nat" where
"badNtsCount P = sum prodNts P"
lemma badNtsCountSet: assumes "finite P"
shows "(∀p ∈ P. prodNts p = 0) ⟷ badNtsCount P = 0"
using assms by simp
lemma binary_badNtsCount:
assumes "finite P" "uniform P" "badNtsCount P = 0"
shows "binary P"
proof -
have "∀p ∈ P. length (snd p) ≤ 2"
proof
fix p assume assm: "p ∈ P"
obtain A α where "(A, α) = p"
using prod.collapse by blast
hence "((∄t. Tm t ∈ set α) ∨ (∃t. α = [Tm t])) ∧ (prodNts (A, α) = 0)"
using assms badNtsCountSet assm unfolding uniform_def by auto
hence "((∄t. Tm t ∈ set α) ∨ (∃t. α = [Tm t])) ∧ (length α ≤ 2 ∨ length (filter (isNt) α) = 0)"
unfolding prodNts_def by force
hence "((∄t. Tm t ∈ set α) ∨ (length α ≤ 1)) ∧ (length α ≤ 2 ∨ (∄N. Nt N ∈ set α))"
by (auto simp: filter_empty_conv[of isNt α] isNt_def)
hence "length α ≤ 2"
by (metis Suc_1 Suc_le_eq in_set_conv_nth le_Suc_eq nat_le_linear sym.exhaust)
thus "length (snd p) ≤ 2"
using ‹(A, α) = p› by auto
qed
thus ?thesis
by (auto simp: binary_def)
qed
subsubsection ‹Specification of a Single Binarization Step›
definition binarizeStep :: "'n::infinite ⇒ 'n ⇒ 'n ⇒ 'n ⇒ ('n,'t)Prods ⇒ ('n,'t)Prods ⇒ bool" where
"binarizeStep A B⇩1 B⇩2 S P P' ≡ (
∃l r p s. (l,r) ∈ P ∧ (r = p@[Nt B⇩1,Nt B⇩2]@s)
∧ (p ≠ [] ∨ s ≠ []) ∧ (A ∉ (Nts P ∪ {S}))
∧ P' = P - {(l,r)} ∪ {(A, [Nt B⇩1,Nt B⇩2]), (l, p@[Nt A]@s)})"
lemma binarizeStep_Eps_free:
assumes "Eps_free P"
and "binarizeStep A B⇩1 B⇩2 S P P'"
shows "Eps_free P'"
using assms unfolding binarizeStep_def Eps_free_def by force
lemma binarizeStep_Unit_free:
assumes "Unit_free P"
and "binarizeStep A B⇩1 B⇩2 S P P'"
shows "Unit_free P'"
proof -
have 1: "(∄l A. (l,[Nt A]) ∈ P)"
using assms(1) unfolding Unit_free_def by simp
obtain l r p s where lrps: "(l,r) ∈ P ∧ (r = p@[Nt B⇩1,Nt B⇩2]@s) ∧ (p ≠ [] ∨ s ≠ [])
∧ (P' = ((P - {(l,r)}) ∪ {(A, [Nt B⇩1,Nt B⇩2]), (l, p@[Nt A]@s)}))"
using assms(2) unfolding binarizeStep_def by blast
hence "∄l' A'. (l,[Nt A']) ∈ {(A, [Nt B⇩1,Nt B⇩2]), (l, p@[Nt A]@s)}"
using Cons_eq_append_conv by fastforce
hence "∄l' A'. (l',[Nt A']) ∈ ((P - {(l,r)}) ∪ {(A, [Nt B⇩1,Nt B⇩2]), (l, p@[Nt A]@s)})"
using 1 by simp
moreover have "P' = ((P - {(l,r)}) ∪ {(A, [Nt B⇩1,Nt B⇩2]), (l, p@[Nt A]@s)})"
using lrps by simp
ultimately show ?thesis unfolding Unit_free_def by simp
qed
lemma cnf_r1Nt:
assumes "binarizeStep A B⇩1 B⇩2 S P P'"
and "P ⊢ lhs ⇒ rhs"
shows "P' ⊢ lhs ⇒* rhs"
proof -
obtain p' s' C v where Cv: "lhs = p'@[Nt C]@s' ∧ rhs = p'@v@s' ∧ (C,v) ∈ P"
using derive.cases[OF assms(2)] by fastforce
obtain l r p s where lrps: "(l,r) ∈ P ∧ (r = p@[Nt B⇩1,Nt B⇩2]@s) ∧ (p ≠ [] ∨ s ≠ []) ∧ (A ∉ Nts P)
∧ (P' = ((P - {(l,r)}) ∪ {(A, [Nt B⇩1,Nt B⇩2]), (l, p@[Nt A]@s)}))"
using assms(1) set_removeAll unfolding binarizeStep_def by fastforce
thus ?thesis
proof (cases "(C, v) ∈ P'")
case True
then show ?thesis
using derive.intros[of C v] Cv by blast
next
case False
hence "C = l ∧ v = p@[Nt B⇩1,Nt B⇩2]@s"
by (simp add: lrps Cv)
have 1: "P' ⊢ [Nt l] ⇒ p@[Nt A]@s"
using lrps by (simp add: derive_singleton)
have "P' ⊢ [Nt A] ⇒ [Nt B⇩1,Nt B⇩2]"
using lrps by (simp add: derive_singleton)
hence "P' ⊢ [Nt l] ⇒* p@[Nt B⇩1,Nt B⇩2]@s"
by (meson 1 converse_rtranclp_into_rtranclp derive_append derive_prepend r_into_rtranclp)
thus ?thesis
using False ‹C = l ∧ v = p@[Nt B⇩1,Nt B⇩2]@s› Cv derives_append derives_prepend by blast
qed
qed
lemma slemma1_1Nt:
assumes "binarizeStep A B⇩1 B⇩2 S P P'"
and "(A, α) ∈ P'"
shows "α = [Nt B⇩1,Nt B⇩2]"
proof -
have "A ∉ Nts P"
using assms(1) unfolding binarizeStep_def by blast
hence "∄α. (A, α) ∈ P"
unfolding Nts_def by auto
hence "∄α. α ≠ [Nt B⇩1,Nt B⇩2] ∧ (A, α) ∈ P'"
using assms(1) unfolding binarizeStep_def by auto
thus ?thesis
using assms(2) by blast
qed
lemma slemma4_4Nt:
assumes "binarizeStep A B⇩1 B⇩2 S P P'"
and "(l,r) ∈ P"
shows "(Nt A) ∉ set r"
proof -
have "A ∉ Nts P"
using assms(1) unfolding binarizeStep_def by blast
hence "∄S α. (S, α) ∈ P ∧ (Nt A ∈ {Nt S} ∪ set α)"
using Nts_correct[of A ‹P›] by blast
thus ?thesis
using assms(2) by blast
qed
lemma lemma1Nt:
assumes "binarizeStep A B⇩1 B⇩2 S P P'"
and "P' ⊢ lhs ⇒ rhs"
shows "(substsNt A [Nt B⇩1,Nt B⇩2] lhs = substsNt A [Nt B⇩1,Nt B⇩2] rhs)
∨ (P ⊢ (substsNt A [Nt B⇩1,Nt B⇩2] lhs) ⇒ substsNt A [Nt B⇩1,Nt B⇩2] rhs)"
proof -
obtain l r p s where lrps: "(l,r) ∈ P ∧ (r = p@[Nt B⇩1,Nt B⇩2]@s) ∧ (p ≠ [] ∨ s ≠ []) ∧ (A ∉ Nts P)
∧ (P' = ((P - {(l,r)}) ∪ {(A, [Nt B⇩1,Nt B⇩2]), (l, p@[Nt A]@s)}))"
using assms(1) set_removeAll unfolding binarizeStep_def by fastforce
obtain p' s' u v where uv: "lhs = p'@[Nt u]@s' ∧ rhs = p'@v@s' ∧ (u,v) ∈ P'"
using derive.cases[OF assms(2)] by fastforce
thus ?thesis
proof (cases "u = A")
case True
then show ?thesis
proof (cases "v = [Nt B⇩1,Nt B⇩2]")
case True
have "substsNt A [Nt B⇩1,Nt B⇩2] lhs = substsNt A [Nt B⇩1,Nt B⇩2] p' @ substsNt A [Nt B⇩1,Nt B⇩2] ([Nt A]@s')"
using uv ‹u = A› by simp
hence 1: "substsNt A [Nt B⇩1,Nt B⇩2] lhs = substsNt A [Nt B⇩1,Nt B⇩2] p' @ [Nt B⇩1,Nt B⇩2] @ substsNt A [Nt B⇩1,Nt B⇩2] s'"
by simp
have "substsNt A [Nt B⇩1,Nt B⇩2] rhs = substsNt A [Nt B⇩1,Nt B⇩2] p' @ substsNt A [Nt B⇩1,Nt B⇩2] ([Nt B⇩1,Nt B⇩2]@s')"
using uv ‹u = A› True by simp
hence "substsNt A [Nt B⇩1,Nt B⇩2] rhs = substsNt A [Nt B⇩1,Nt B⇩2] p' @ [Nt B⇩1,Nt B⇩2] @ substsNt A [Nt B⇩1,Nt B⇩2] s'"
using assms(1) unfolding binarizeStep_def Nts_def by auto
then show ?thesis
using 1 by simp
next
case False
then show ?thesis
using True uv assms(1) slemma1_1Nt by fastforce
qed
next
case False
then show ?thesis
proof (cases "(Nt A) ∈ set v")
case True
have "Nt A ∉ set p ∧ Nt A ∉ set s"
using lrps assms(1) by (metis UnI1 UnI2 set_append slemma4_4Nt)
hence 1: "v = p@[Nt A]@s ∧ Nt A ∉ set p ∧ Nt A ∉ set s"
using True lrps uv assms slemma4_4Nt[of A B⇩1 B⇩2 S P P'] unfolding binarizeStep_def Nts_def by auto
hence "substsNt A [Nt B⇩1,Nt B⇩2] v = substsNt A [Nt B⇩1,Nt B⇩2] p @ substsNt A [Nt B⇩1,Nt B⇩2] ([Nt A]@s)"
by simp
hence "substsNt A [Nt B⇩1,Nt B⇩2] v = p @ [Nt B⇩1,Nt B⇩2] @ s"
using 1 substs_append by (simp add: substs_skip)
hence 2: "(u, substsNt A [Nt B⇩1,Nt B⇩2] v) ∈ P"
using True lrps uv assms(1) slemma4_4Nt by fastforce
have "substsNt A [Nt B⇩1,Nt B⇩2] lhs = substsNt A [Nt B⇩1,Nt B⇩2] p' @ substsNt A [Nt B⇩1,Nt B⇩2] ([Nt u]@s')"
using uv by simp
hence 3: "substsNt A [Nt B⇩1,Nt B⇩2] lhs = substsNt A [Nt B⇩1,Nt B⇩2] p' @ [Nt u] @ substsNt A [Nt B⇩1,Nt B⇩2] s'"
using ‹u ≠ A› by simp
have "substsNt A [Nt B⇩1,Nt B⇩2] rhs = substsNt A [Nt B⇩1,Nt B⇩2] p' @ substsNt A [Nt B⇩1,Nt B⇩2] (v@s')"
using uv by simp
hence "substsNt A [Nt B⇩1,Nt B⇩2] rhs = substsNt A [Nt B⇩1,Nt B⇩2] p' @ substsNt A [Nt B⇩1,Nt B⇩2] v @ substsNt A [Nt B⇩1,Nt B⇩2] s'"
by simp
then show ?thesis
using 2 3 assms(2) uv derive.simps by fast
next
case False
hence 1: "(u, v) ∈ P"
using assms(1) uv ‹u ≠ A› lrps by (simp add: in_set_conv_decomp)
have "substsNt A [Nt B⇩1,Nt B⇩2] lhs = substsNt A [Nt B⇩1,Nt B⇩2] p' @ substsNt A [Nt B⇩1,Nt B⇩2] ([Nt u]@s')"
using uv by simp
hence 2: "substsNt A [Nt B⇩1,Nt B⇩2] lhs = substsNt A [Nt B⇩1,Nt B⇩2] p' @ [Nt u] @ substsNt A [Nt B⇩1,Nt B⇩2] s'"
using ‹u ≠ A› by simp
have "substsNt A [Nt B⇩1,Nt B⇩2] rhs = substsNt A [Nt B⇩1,Nt B⇩2] p' @ substsNt A [Nt B⇩1,Nt B⇩2] (v@s')"
using uv by simp
hence "substsNt A [Nt B⇩1,Nt B⇩2] rhs = substsNt A [Nt B⇩1,Nt B⇩2] p' @ substsNt A [Nt B⇩1,Nt B⇩2] v @ substsNt A [Nt B⇩1,Nt B⇩2] s'"
by simp
hence "substsNt A [Nt B⇩1,Nt B⇩2] rhs = substsNt A [Nt B⇩1,Nt B⇩2] p' @ v @ substsNt A [Nt B⇩1,Nt B⇩2] s'"
using False substs_skip by fastforce
thus ?thesis
using 1 2 assms(2) uv derive.simps by fast
qed
qed
qed
lemma lemma3Nt:
assumes "P' ⊢ lhs ⇒* rhs"
and "binarizeStep A B⇩1 B⇩2 S P P'"
shows "P ⊢ substsNt A [Nt B⇩1, Nt B⇩2] lhs ⇒* substsNt A [Nt B⇩1, Nt B⇩2] rhs"
using assms
proof (induction rhs rule: rtranclp_induct)
case (step y z)
then show ?case
using lemma1Nt[of A B⇩1 B⇩2 S P P' y z] by auto
qed simp
lemma Lang_binarizeStep1:
assumes "binarizeStep A B⇩1 B⇩2 S P P'"
shows "Lang P' S ⊆ Lang P S"
proof
fix w
assume "w ∈ Lang P' S"
hence "P' ⊢ [Nt S] ⇒* map Tm w"
by (simp add: Lang_def)
hence "P' ⊢ [Nt S] ⇒* map Tm w"
using assms unfolding binarizeStep_def by auto
hence "P ⊢ substsNt A [Nt B⇩1, Nt B⇩2] [Nt S] ⇒* substsNt A [Nt B⇩1, Nt B⇩2] (map Tm w)"
using assms lemma3Nt[of P' ‹[Nt S]› ‹map Tm w›] by blast
moreover have "substsNt A [Nt B⇩1, Nt B⇩2] [Nt S] = [Nt S]"
using assms unfolding binarizeStep_def by auto
moreover have "substsNt A [Nt B⇩1, Nt B⇩2] (map Tm w) = map Tm w" by simp
ultimately show "w ∈ Lang P S" using Lang_def
by (metis (no_types, lifting) mem_Collect_eq)
qed
lemma slemma5_1Nt:
assumes "P ⊢ u ⇒* v"
and "binarizeStep A B⇩1 B⇩2 S P P'"
shows "P' ⊢ u ⇒* v"
using assms by (induction v rule: rtranclp_induct) (auto simp: cnf_r1Nt rtranclp_trans)
lemma Lang_binarizeStep2:
assumes "binarizeStep A B⇩1 B⇩2 S P P'"
shows "Lang P S ⊆ Lang P' S"
proof
fix w
assume "w ∈ Lang P S"
hence "P ⊢ [Nt S] ⇒* map Tm w"
using assms unfolding Lang_def binarizeStep_def by auto
thus "w ∈ Lang P' S"
using assms slemma5_1Nt Lang_def by fast
qed
lemma Lang_binarizeStep: "binarizeStep A B⇩1 B⇩2 S P P' ⟹ Lang P S = Lang P' S"
using Lang_binarizeStep1 Lang_binarizeStep2 by fast
lemma Eps_free_binarizeStepRtc:
assumes "(λx y. ∃A t B⇩1 B⇩2. binarizeStep A B⇩1 B⇩2 S x y)^** P P'"
and "Eps_free P"
shows "Eps_free P'"
using assms by (induction rule: rtranclp_induct) (auto simp: binarizeStep_Eps_free)
lemma Unit_free_binarizeStepRtc:
assumes "(λx y. ∃A t B⇩1 B⇩2. binarizeStep A B⇩1 B⇩2 S x y)^** P P'"
and "Unit_free P"
shows "Unit_free P'"
using assms by (induction rule: rtranclp_induct) (auto simp: binarizeStep_Unit_free)
theorem Lang_binarizeStepRtc:
assumes "(λx y. ∃A B⇩1 B⇩2. binarizeStep A B⇩1 B⇩2 S x y)^** P P'"
shows "Lang P S = Lang P' S"
using assms by (induction rule: rtranclp_induct) (fastforce simp: Lang_binarizeStep)+
text ‹Termination›
lemma lemma6_b:
assumes "finite P" "binarizeStep A B⇩1 B⇩2 S P P'" shows "badNtsCount P' < badNtsCount P"
proof -
from assms(2) obtain l r p s where lrps: "(l,r) ∈ P" "r = p@[Nt B⇩1,Nt B⇩2]@s" "p ≠ [] ∨ s ≠ []"
"A ∉ Nts P" "P' = P - {(l,r)} ∪ {(A, [Nt B⇩1,Nt B⇩2]), (l, p@[Nt A]@s)}"
unfolding binarizeStep_def by auto
let ?B12 = "[Nt B⇩1,Nt B⇩2]::('a,'b)syms"
have "prodNts (l,p@?B12@s) = length (filter isNt (p@?B12@s))"
using lrps unfolding prodNts_def by auto
hence 1: "prodNts (l,p@?B12@s) = length (filter isNt (p@s)) + 2"
by (simp add: isNt_def)
have "(A,?B12) ∉ P ∧ (l, p@[Nt A]@s) ∉ P"
using Nts_correct[OF ‹A ∉ Nts P›] by fastforce
then have "badNtsCount P' = badNtsCount (P - {(l,r)}) + badNtsCount {(A,?B12), (l, p@[Nt A]@s)}"
unfolding badTmsCount.simps ‹P' = _› by (simp add: assms(1) sum_Un_eq)
also have "… = badNtsCount (P - {(l,r)}) + badNtsCount {(A,?B12)} + badNtsCount{(l, p@[Nt A]@s)}"
using Nts_correct[OF ‹A ∉ Nts P›] lrps(1) by simp
finally have 2: "badNtsCount P' = …" .
have 3: "badNtsCount (P - {(l,r)}) < badNtsCount P"
using sum.remove[OF assms(1) lrps(1), of prodNts] lrps(2) 1 by (simp)
have "prodNts (l, p@[Nt A]@s) = length (filter isNt (p@[Nt A]@s)) ∨ prodNts (l, p@[Nt A]@s) = 0"
unfolding prodNts_def using lrps by simp
thus ?thesis
proof
assume "prodNts (l, p@[Nt A]@s) = length (filter (isNt) (p@[Nt A]@s))"
hence "badNtsCount P' = badNtsCount (P - {(l,r)}) + badNtsCount {(l, (p@[Nt A]@s))}"
using 2 by (simp add: prodNts_def)
moreover have "prodNts (l, p@[Nt A]@s) < prodNts (l,p@[Nt B⇩1,Nt B⇩2]@s)"
using 1 ‹prodNts (l, p@[Nt A]@s) = length (filter (isNt) (p@[Nt A]@s))› isNt_def by simp
ultimately show ?thesis
by(simp add: sum.remove[OF assms(1) lrps(1)] ‹r = _›)
next
assume "prodNts (l, p@[Nt A]@s) = 0"
hence "badNtsCount P' = badNtsCount (P - {(l,r)})"
using 2 by (simp add: prodNts_def)
thus ?thesis
using 3 by simp
qed
qed
lemma noTms_prodTms0:
assumes "prodTms (l,r) = 0"
shows "length r ≤ 1 ∨ (∀a ∈ set r. isNt a)"
proof -
have "length r ≤ 1 ∨ (∄a. a ∈ set r ∧ isTm a)"
using assms unfolding prodTms_def using empty_filter_conv by fastforce
thus ?thesis
by (metis isNt_def isTm_def sym.exhaust)
qed
lemma badNtsCountNot0:
assumes "finite P" "badNtsCount P > 0"
shows "∃l r. (l, r) ∈ P ∧ length r ≥ 3"
using assms badNtsCountSet not_gr0 unfolding prodNts_def by fastforce
subsubsection ‹Functional Binarization›
definition freshA :: "('n::fresh0,'t) prods ⇒ 'n ⇒ 'n" where
"freshA ps S = fresh0 (Nts (set ps) ∪ {S})"
lemma freshA_notin_set:
shows "freshA ps S ∉ (Nts (set ps) ∪ {S})"
unfolding freshA_def by (metis ID.set_finite finite_Un finite_nts fresh0_notIn)
fun replaceNts :: "'n::fresh0 ⇒ ('n,'t) syms ⇒ ('n × 'n) option × ('n,'t) syms" where
"replaceNts A [] = (None, [])" |
"replaceNts A [s] = (None, [s])" |
"replaceNts A (Nt s⇩1 # Nt s⇩2 # sl) = (Some (s⇩1, s⇩2), Nt A # sl)" |
"replaceNts A (s#sl) = (let (nn_opt, sl') = replaceNts A sl in (nn_opt, s#sl'))"
lemma replaceNts_tm_unchanged_opt:
assumes
"replaceNts A (s0#s1#sl) = (nn_opt, sl')"
"∃t. s0 = Tm t ∨ s1 = Tm t"
obtains sl'' where "replaceNts A (s1#sl) = (nn_opt, sl'')"
proof -
obtain nn_opt' sl'' where "replaceNts A (s1#sl) = (nn_opt', sl'')"
by fastforce
moreover with assms have "nn_opt = nn_opt'" by fastforce
ultimately show thesis using that by blast
qed
lemma replaceNts_id_iff_None:
assumes "replaceNts A sl = (nn_opt, sl')"
shows "nn_opt = None ⟷ sl = sl'"
using assms proof (induction sl arbitrary: nn_opt sl' rule: replaceNts.induct)
case ("4_1" A t s sl)
then obtain sl'' where rec: "replaceNts A (s#sl) = (nn_opt, sl'')"
using replaceNts_tm_unchanged_opt by blast
then show ?case using "4_1" by auto
next
case ("4_2" A s t sl)
then obtain sl'' where rec: "replaceNts A (Tm t#sl) = (nn_opt, sl'')"
using replaceNts_tm_unchanged_opt by blast
then show ?case using "4_2" by auto
qed auto
lemma replaceNts_replaces_pair:
assumes
"replaceNts A sl = (nn_opt, sl')"
"nn_opt ≠ None"
obtains p q B⇩1 B⇩2 where
"nn_opt = Some (B⇩1,B⇩2)"
"sl = p@[Nt B⇩1, Nt B⇩2]@q"
"sl' = p@[Nt A]@q"
using assms proof (induction sl arbitrary: thesis nn_opt sl' rule: replaceNts.induct)
case ("4_1" A t s sl)
then obtain sl'' where
"replaceNts A (s#sl) = (nn_opt, sl'')"
and sl'_def: "sl' = Tm t # sl''"
using replaceNts_tm_unchanged_opt
by (metis (lifting) case_prod_conv prod.inject replaceNts.simps(4))
with "4_1"(1,4) obtain p q B⇩1 B⇩2 where
"nn_opt = Some (B⇩1,B⇩2)" "s#sl = p@[Nt B⇩1,Nt B⇩2]@q" "sl'' = p@[Nt A]@q"
by blast
moreover with sl'_def have "Tm t #s#sl = (Tm t#p)@[Nt B⇩1,Nt B⇩2]@q" "sl' = (Tm t#p)@[Nt A]@q"
by auto
ultimately show ?case using "4_1"(2) by blast
next
case ("4_2" A s t sl)
then obtain sl'' where
"replaceNts A (Tm t#sl) = (nn_opt, sl'')"
and sl'_def: "sl' = s # sl''"
using replaceNts_tm_unchanged_opt
by (metis (lifting) old.prod.case prod.inject replaceNts.simps(5))
with "4_2"(1,4) obtain p q B⇩1 B⇩2 where
"nn_opt = Some (B⇩1,B⇩2)" "Tm t#sl = p@[Nt B⇩1,Nt B⇩2]@q" "sl'' = p@[Nt A]@q"
by blast
moreover with sl'_def have "s#Tm t#sl = (s#p)@[Nt B⇩1,Nt B⇩2]@q" "sl' = (s#p)@[Nt A]@q"
by auto
ultimately show ?case using "4_2"(2) by blast
qed fastforce+
corollary replaceNts_replaces_pair_Some:
assumes "replaceNts A sl = (Some (B⇩1,B⇩2), sl')"
obtains p q where
"sl = p@[Nt B⇩1, Nt B⇩2]@q"
"sl' = p@[Nt A]@q"
using replaceNts_replaces_pair
by (smt (verit) assms option.distinct(1) option.inject prod.inject)
fun binarize1 :: "'n::fresh0 ⇒ ('n,'t) prods ⇒ ('n,'t) prods ⇒ ('n,'t) prods" where
"binarize1 A ps0 [] = ps0" |
"binarize1 A ps0 ((l,r)#ps) =
(case replaceNts A r of
(None, _) ⇒ binarize1 A ps0 ps|
(Some (B⇩1,B⇩2), r') ⇒
if length r < 3 then binarize1 A ps0 ps
else (A, [Nt B⇩1,Nt B⇩2]) # (l, r') # removeAll (l,r) ps0)"
lemma binarize1_rec_if_id_or_lt3:
assumes
"replaceNts A r = (nn_opt, r')"
"r = r' ∨ length r < 3"
shows "binarize1 A ps0 ((l,r)#ps) = binarize1 A ps0 ps"
using assms replaceNts_id_iff_None by (cases nn_opt) auto
lemma binarize1_binarizes:
assumes "binarize1 A ps0 ps ≠ ps0"
obtains l r r' B⇩1 B⇩2 where
"(l,r) ∈ set ps"
"length r > 2"
"replaceNts A r = (Some (B⇩1,B⇩2), r')"
"binarize1 A ps0 ps = (A, [Nt B⇩1,Nt B⇩2]) # (l, r') # removeAll (l,r) ps0"
using assms proof (induction ps arbitrary: thesis)
case (Cons p ps)
obtain l r r' nn_opt where lr_defs: "p = (l,r)" "replaceNts A r = (nn_opt,r')"
by fastforce
consider (hd) "r ≠ r' ∧ length r > 2" | (tl) "r = r' ∨ length r < 3" by fastforce
then show ?case
proof cases
case hd
with replaceNts_id_iff_None lr_defs obtain B⇩1 B⇩2 where "nn_opt = Some (B⇩1,B⇩2)"
by fast
moreover from this hd have
"binarize1 A ps0 (p#ps) = (A, [Nt B⇩1,Nt B⇩2]) # (l, r') # removeAll (l,r) ps0"
using lr_defs by auto
ultimately show ?thesis using Cons(2) lr_defs hd by fastforce
next
case tl
with lr_defs binarize1_rec_if_id_or_lt3
have "binarize1 A ps0 (p#ps) = binarize1 A ps0 ps" by blast
with Cons show ?thesis using lr_defs by (metis list.set_intros(2))
qed
qed simp
lemma binarizeStep_binarize1:
assumes
"A ∉ Nts (set ps) ∪ {S}"
"binarize1 A ps ps ≠ ps"
obtains B⇩1 B⇩2 where "binarizeStep A B⇩1 B⇩2 S (set ps) (set (binarize1 A ps ps))"
proof -
from binarize1_binarizes[OF assms(2)] obtain l r r' B⇩1 B⇩2 where
binarize1_defs:
"(l,r) ∈ set ps"
"length r > 2"
"replaceNts A r = (Some (B⇩1,B⇩2), r')"
"binarize1 A ps ps = (A, [Nt B⇩1,Nt B⇩2]) # (l, r') # removeAll (l,r) ps"
by metis
moreover from this obtain p s where "r = p@[Nt B⇩1, Nt B⇩2]@s" "r' = p@[Nt A]@s"
using replaceNts_replaces_pair by blast
ultimately have "binarizeStep A B⇩1 B⇩2 S (set ps) (set (binarize1 A ps ps))"
unfolding binarizeStep_def using assms(1) by auto
then show thesis using that by simp
qed
lemma binarize1_dec_badNtsCount:
assumes "binarize1 A ps ps ≠ ps"
"A ∉ Nts (set ps) ∪ {S}"
shows "badNtsCount (set (binarize1 A ps ps)) < badNtsCount (set ps)"
using lemma6_b assms binarizeStep_binarize1
by (metis list.set_finite)
lemma badNts_impl_binarize1_not_id_unif:
assumes "badNtsCount (set ps) = Suc n"
"uniform (set ps)"
shows "binarize1 A ps0 ps ≠ ps0"
using assms proof (induction ps arbitrary: n)
case (Cons p ps)
obtain l r where lr_def: "(l,r) = p" using old.prod.exhaust by metis
consider (no_badNts_hd) "badNtsCount (set [p]) = 0" |
(Suc_badNts_hd) m where "badNtsCount (set [p]) = Suc m"
using not0_implies_Suc by blast
then show ?case
proof cases
case no_badNts_hd
from Cons(3) have only_Nts: "length r = 1 ∨ (∀s∈(set r). ∃n. s = Nt n)"
unfolding uniform_def using lr_def
by (smt (verit, best) One_nat_def case_prodD destTm.cases length_Cons list.set_intros(1)
list.size(3))
have "length r < 3"
proof (rule ccontr)
assume "¬?thesis"
hence "length r ≥ 2" by simp
moreover with only_Nts have "∀s∈set r. ∃n. s = Nt n" by presburger
ultimately have "prodNts p ≠ 0" unfolding prodNts_def using lr_def
by (metis ‹¬ length r < 3› filter_True isNt_def le_imp_less_Suc not_numeral_le_zero numeral_2_eq_2
numeral_3_eq_3 snd_conv)
with no_badNts_hd show False by simp
qed
with lr_def have "binarize1 A ps0 (p#ps) = binarize1 A ps0 ps"
using binarize1_rec_if_id_or_lt3 by (metis old.prod.exhaust)
with Cons show ?thesis
by (metis (no_types, lifting) badNtsCountSet bot_nat_0.not_eq_extremum gr0_conv_Suc
list.set_finite list.set_intros(1,2) no_badNts_hd set_ConsD uniform_def)
next
case Suc_badNts_hd
with lr_def have all_Nts: "length r > 2 ∧ (∀s∈set r. ∃n. s = Nt n)"
using Cons.prems(2) uniform_badTmsCount[of "set (p # ps)"] noTms_prodTms0[of l r]
by(auto simp: prodNts_def length_Suc_conv isNt_def split: if_splits)
moreover obtain r' B⇩1 B⇩2 where replace_defs: "replaceNts A r = (Some (B⇩1,B⇩2), r')" "r' ≠ r"
proof -
from all_Nts obtain ns B⇩1 B⇩2 where "r = [Nt B⇩1, Nt B⇩2] @ ns"
by (metis (no_types, lifting) append_Cons append_Nil le_imp_less_Suc length_Suc_conv
linorder_not_less list.exhaust list.set_intros(1,2) list.size(3) not_less_iff_gr_or_eq
numeral_2_eq_2)
thus thesis using that by simp
qed
ultimately have "binarize1 A ps0 (p#ps) = (A, [Nt B⇩1, Nt B⇩2]) # (l,r') # removeAll (l,r) ps0"
(is "_ = ?rem")
using lr_def by fastforce
also have "... ≠ ps0"
proof
assume rem_eq: "... = ps0"
then obtain xs where "ps0 = (A, [Nt B⇩1, Nt B⇩2]) # (l,r') # xs" by metis
with rem_eq have "(l,r) = (l,r')" using set_removeAll
by (smt (verit, ccfv_SIG) Diff_disjoint insert_disjoint(2) length_Cons lessI not_add_less2
plus_1_eq_Suc removeAll.simps(2) removeAll_id)
with replace_defs show False by blast
qed
finally show ?thesis .
qed
qed simp
lemma uniform_binarize1:
fixes ps :: "('n::fresh0, 't) prods"
assumes ps_uniform: "uniform (set ps)"
shows "uniform (set( binarize1 A ps ps))"
proof -
consider (id) "binarize1 A ps ps = ps" | (not_id) "binarize1 A ps ps ≠ ps" by blast
then show ?thesis
proof cases
case not_id
from binarize1_binarizes[OF not_id] obtain l r r' B⇩1 B⇩2 where lr_defs:
"(l,r) ∈ set ps" "length r > 2" "replaceNts A r = (Some (B⇩1,B⇩2), r')"
"binarize1 A ps ps = (A,[Nt B⇩1, Nt B⇩2]) # (l,r') # removeAll (l,r) ps" by metis
moreover from ps_uniform have "uniform (set (removeAll (l,r) ps))"
unfolding uniform_def by simp
moreover have "uniform (set [(l,r')])"
proof -
from replaceNts_replaces_pair_Some[OF lr_defs(3)] obtain p q where
"r = p@[Nt B⇩1,Nt B⇩2]@q" "r' = p@[Nt A]@q" .
with lr_defs ps_uniform show ?thesis unfolding uniform_def by fastforce
qed
ultimately show ?thesis unfolding uniform_def by auto
qed (use assms in simp)
qed
function binarize :: "'n::fresh0 ⇒ ('n,'t) prods ⇒ ('n,'t) prods" where
"binarize S ps =
(let ps' = binarize1 (freshA ps S) ps ps in
if ps = ps' then ps else binarize S ps')"
by auto
termination
proof
let ?R = "measure (λ(S,ps). badNtsCount (set ps))"
show "wf ?R" by fast
fix S :: "'n::fresh0"
and ps ps' :: "('n,'t) prods"
let ?A = "freshA ps S"
assume ps'_def: "ps' = binarize1 ?A ps ps"
and neq: "ps ≠ ps'"
moreover with freshA_notin_set have "?A ∉ Nts (set ps) ∪ {S}" by blast
ultimately show "((S,ps'),(S,ps)) ∈ ?R"
using binarize1_dec_badNtsCount by force
qed
lemma binarize_binarizeStep:
"(λx y. ∃A B⇩1 B⇩2. binarizeStep A B⇩1 B⇩2 S x y)⇧*⇧* (set ps) (set (binarize S ps))"
proof (induction "badNtsCount (set ps)" arbitrary: ps rule: less_induct)
case less
let ?A = "freshA ps S"
have A_notin_nts: "?A ∉ Nts (set ps) ∪ {S}"
using freshA_notin_set by metis
consider (eq) "binarize1 ?A ps ps = ps" |
(neq) "binarize1 ?A ps ps ≠ ps" by blast
then show ?case
proof cases
case neq
let ?ps' = "binarize1 ?A ps ps"
from binarize1_dec_badNtsCount[OF neq A_notin_nts] have
"badNtsCount (set ?ps') < badNtsCount (set ps)" .
with less have "(λx y. ∃A B⇩1 B⇩2. binarizeStep A B⇩1 B⇩2 S x y)⇧*⇧* (set ?ps') (set (binarize S ?ps'))"
by blast
moreover from neq A_notin_nts obtain B⇩1 B⇩2 where "binarizeStep ?A B⇩1 B⇩2 S (set ps) (set ?ps')"
using binarizeStep_binarize1 by blast
ultimately show ?thesis
by (smt (verit, best) binarize.simps
converse_rtranclp_into_rtranclp)
qed simp
qed
lemma uniform_binarize:
fixes ps :: "('n::fresh0, 't) prods"
assumes ps_uniform: "uniform (set ps)"
shows "uniform (set (binarize S ps))"
using assms proof (induction "badNtsCount (set ps)" arbitrary: ps rule: less_induct)
case less
let ?A = "freshA ps S"
consider (rec) "binarize1 ?A ps ps ≠ ps" | (no_rec) "binarize1 ?A ps ps = ps" by blast
then show ?case
proof cases
case rec
let ?ps' = "binarize1 ?A ps ps"
from rec have "binarize S ps = binarize S ?ps'"
by (smt (verit) binarize.elims)
with less binarize1_dec_badNtsCount[OF rec] freshA_notin_set
uniform_binarize1
show ?thesis by metis
qed (use less in simp)
qed
lemma binary_binarize:
assumes binary: "binary (set ps)"
shows "binary (set (binarize S ps))"
proof -
from binary have "badNtsCount (set ps) = 0"
by (metis badNtsCountNot0 binary_def bot_nat_0.not_eq_extremum leD le_imp_less_Suc numeral_2_eq_2
numeral_3_eq_3 split_conv list.set_finite)
hence "binarize S ps = ps" using binarize1_dec_badNtsCount freshA_notin_set
by (smt (verit, best) binarize.simps bot_nat_0.extremum_strict)
with assms show ?thesis by argo
qed
lemma binarize_binary_if_uniform:
fixes ps :: "('n::fresh0, 't) prods"
assumes uniform: "uniform (set ps)"
shows "binary (set (binarize S ps))"
proof -
consider (bin) "binary (set ps)" | (not_bin) "¬binary (set ps)" by blast
then show ?thesis
proof cases
case bin
then show ?thesis using binary_binarize by blast
next
case not_bin
with uniform binary_badNtsCount obtain n where Suc_badNts: "badNtsCount (set ps) = Suc n"
using not0_implies_Suc by blast
with uniform show ?thesis
proof (induction "badNtsCount (set ps)" arbitrary: ps n rule: less_induct)
case less
let ?A = "freshA ps S"
from less badNts_impl_binarize1_not_id_unif have "binarize1 ?A ps ps ≠ ps"
by fastforce
hence badNtsCount_dec: "badNtsCount (set (binarize1 ?A ps ps)) < badNtsCount (set ps)"
(is "badNtsCount ?ps' < _")
using freshA_notin_set binarize1_dec_badNtsCount by metis
consider (zero_badNts) "badNtsCount ?ps' = 0" | (Suc_badNts) m where "badNtsCount ?ps' = Suc m"
using not0_implies_Suc by blast
then show ?case
proof cases
case zero_badNts
moreover from less.prems(1) uniform_binarize1 have "uniform ?ps'"
by blast
ultimately show ?thesis using binary_badNtsCount
by (smt (verit, ccfv_threshold) List.finite_set binarize.elims binary_binarize
freshA_def less.prems(2))
next
case Suc_badNts
moreover from less.prems(1) uniform_binarize1 have unif: "uniform ?ps'"
by blast
ultimately show ?thesis using less(1)[OF badNtsCount_dec _ Suc_badNts]
by (smt (verit, best) binarize.simps freshA_def less.prems(2))
qed
qed
qed
qed
subsection ‹Conversion to CNF›
text ‹Alternative form more similar to the one Jana Hofmann used:›
lemma CNF_eq: "CNF P ⟷ (uniform P ∧ binary P ∧ Eps_free P ∧ Unit_free P)"
proof
assume "CNF P"
hence "Eps_free P"
unfolding CNF_def Eps_free_def by fastforce
moreover have "Unit_free P"
using ‹CNF P› unfolding CNF_def Unit_free_def isNt_def isTm_def by fastforce
moreover have "uniform P"
proof -
have "∀(A,α) ∈ P. (∃B C. α = [Nt B, Nt C]) ∨ (∃t. α = [Tm t])"
using ‹CNF P› unfolding CNF_def.
hence "∀(A, α) ∈ P. (∀N ∈ set α. isNt N) ∨ (∃t. α = [Tm t])"
unfolding isNt_def by fastforce
hence "∀(A, α) ∈ P. (∄t. Tm t ∈ set α) ∨ (∃t. α = [Tm t])"
by (auto simp: isNt_def)
thus "uniform P"
unfolding uniform_def.
qed
moreover have "binary P"
using ‹CNF P› unfolding binary_def CNF_def by auto
ultimately show "uniform P ∧ binary P ∧ Eps_free P ∧ Unit_free P"
by blast
next
assume assm: "uniform P ∧ binary P ∧ Eps_free P ∧ Unit_free P"
have"∀p ∈ P. (∃B C. (snd p) = [Nt B, Nt C]) ∨ (∃t. (snd p) = [Tm t])"
proof
fix p assume "p ∈ P"
obtain A α where Aα: "(A, α) = p"
by (metis prod.exhaust_sel)
hence "length α = 1 ∨ length α = 2"
using assm ‹p ∈ P› order_neq_le_trans unfolding binary_def Eps_free_def by fastforce
hence "(∃B C. (snd p) = [Nt B, Nt C]) ∨ (∃t. α = [Tm t])"
proof
assume "length α = 1"
hence "∃S. α = [S]"
by (simp add: length_Suc_conv)
moreover have "∄N. α = [Nt N]"
using assm Aα ‹p ∈ P› unfolding Unit_free_def by blast
ultimately show ?thesis by (metis sym.exhaust)
next
assume "length α = 2"
obtain B C where BC: "α = [B, C]"
using ‹length α = 2› by (metis One_nat_def Suc_1 diff_Suc_1 length_0_conv length_Cons neq_Nil_conv)
have "(∄t. Tm t ∈ set α)"
using ‹length α = 2› assm Aα ‹p ∈ P› unfolding uniform_def by auto
hence "(∀N ∈ set α. ∃A. N = Nt A)"
by (metis sym.exhaust)
hence "∃B' C'. B = Nt B' ∧ C = Nt C'"
using BC by simp
thus ?thesis using Aα BC by auto
qed
thus "(∃B C. (snd p) = [Nt B, Nt C]) ∨ (∃t. (snd p) = [Tm t])" using Aα by auto
qed
thus "CNF P" by (auto simp: CNF_def)
qed
definition cnf_of :: "('n::fresh0, 't) prods ⇒ 'n ⇒ ('n,'t) prods" where
"cnf_of ps S = (binarize S ∘ uniformize S ∘ unit_elim ∘ eps_elim) ps"
theorem cnf_of_CNF_Lang:
fixes ps :: "('n::fresh0, 't) prods"
shows "CNF (set(cnf_of ps S))" "lang (cnf_of ps S) S = lang ps S - {[]}"
proof -
let ?ps1 = "eps_elim ps" let ?ps2 = "unit_elim ?ps1"
let ?ps3 = "uniformize S ?ps2" let ?ps4 = "binarize S ?ps3"
have "eps_free ?ps1" by (rule eps_free_eps_elim)
hence "eps_free ?ps2" by (meson unit_elim_correct Unit_elim_rel_Eps_free)
have "Unit_free(set ?ps2)" by (metis unit_elim_correct Unit_free_if_Unit_elim_rel)
have "eps_free ?ps3" by(rule eps_free_uniformize[OF ‹eps_free ?ps2›])
have "Unit_free(set ?ps3)" by (rule Unit_free_uniformize[OF ‹Unit_free(set ?ps2)›])
have "uniform (set ?ps3)" by (rule uniform_uniformize)
have "eps_free ?ps4"
using binarize_binarizeStep Eps_free_binarizeStepRtc[OF _ ‹eps_free ?ps3›] by meson
moreover have "Unit_free(set ?ps4)"
using binarize_binarizeStep Unit_free_binarizeStepRtc[OF _ ‹Unit_free(set ?ps3)›] by meson
moreover have "uniform (set ?ps4)"
by(rule uniform_binarize[OF ‹uniform (set ?ps3)›])
moreover have "binary (set ?ps4)"
by (rule binarize_binary_if_uniform[OF ‹uniform (set ?ps3)›])
ultimately show "CNF (set(cnf_of ps S))" unfolding CNF_eq cnf_of_def
by(simp only: Let_def comp_def)
have "lang ?ps4 S = lang ?ps3 S" using Lang_binarizeStepRtc[OF binarize_binarizeStep, symmetric] .
also have "… = lang ?ps2 S" by (simp add: lang_uniformize)
also have "… = lang ?ps1 S" by (rule lang_unit_elim)
also have "… = lang ps S - {[]}" by (rule lang_eps_elim)
finally show "lang (cnf_of ps S) S = lang ps S - {[]}"
by (simp add: cnf_of_def)
qed
corollary cnf_exists:
fixes P :: "('n::fresh0,'t) Prods"
assumes "finite P"
shows "∃P'::('n,'t)Prods. finite P' ∧ CNF P' ∧ Lang P' S = Lang P S - {[]}"
proof -
obtain ps where "P = set ps" by (metis finite_list[OF assms])
with cnf_of_CNF_Lang[of ps] show ?thesis by blast
qed
text ‹Some helpful properties:›
lemma cnf_length_derive:
assumes "CNF P" "P ⊢ [Nt S] ⇒* α"
shows "length α ≥ 1"
using assms CNF_eq Eps_free_derives_Nil length_greater_0_conv less_eq_Suc_le by auto
lemma cnf_length_derive2:
assumes "CNF P" "P ⊢ [Nt A, Nt B] ⇒* α"
shows "length α ≥ 2"
proof -
obtain u v where uv: "P ⊢ [Nt A] ⇒* u ∧ P ⊢ [Nt B] ⇒* v ∧ α = u @ v"
using assms(2) derives_append_decomp[of P ‹[Nt A]› ‹[Nt B]› α] by auto
hence "length u ≥ 1 ∧ length v ≥ 1"
using cnf_length_derive[OF assms(1)] by blast
thus ?thesis
using uv by simp
qed
lemma cnf_single_derive:
assumes "CNF P" "P ⊢ [Nt S] ⇒* [Tm t]"
shows "(S, [Tm t]) ∈ P"
proof -
obtain α where α: "P ⊢ [Nt S] ⇒ α ∧ P ⊢ α ⇒* [Tm t]"
using converse_rtranclpE[OF assms(2)] by auto
hence 1: "(S, α) ∈ P"
by (simp add: derive_singleton)
have "∄A B. α = [Nt A, Nt B]"
proof (rule ccontr)
assume "¬ (∄A B. α = [Nt A, Nt B])"
from this obtain A B where AB: "α = [Nt A, Nt B]"
by blast
have "∀w. P ⊢ [Nt A, Nt B] ⇒* w ⟶ length w ≥ 2"
using cnf_length_derive2[OF assms(1)] by simp
moreover have "length [Tm t] = 1"
by simp
ultimately show False
using α AB by auto
qed
from this obtain a where "α = [Tm a]"
using 1 assms(1) unfolding CNF_def by auto
hence "t = a"
using α by (simp add: derives_Tm_Cons)
thus ?thesis
using 1 ‹α = [Tm a]› by blast
qed
lemma cnf_word:
assumes "CNF P" "P ⊢ [Nt S] ⇒* map Tm w"
and "length w ≥ 2"
shows "∃A B u v. (S, [Nt A, Nt B]) ∈ P ∧ P ⊢ [Nt A] ⇒* map Tm u ∧ P ⊢ [Nt B] ⇒* map Tm v ∧ u@v = w ∧ u ≠ [] ∧ v ≠ []"
proof -
have 1: "(S, map Tm w) ∉ P"
using assms(1) assms(3) unfolding CNF_def by auto
have "∃α. P ⊢ [Nt S] ⇒ α ∧ P ⊢ α ⇒* map Tm w"
using converse_rtranclpE[OF assms(2)] by auto
from this obtain α where α: "(S, α) ∈ P ∧ P ⊢ α ⇒* map Tm w"
by (auto simp: derive_singleton)
hence "(∄t. α = [Tm t])"
using 1 derives_Tm_Cons[of P] derives_from_empty by auto
hence "∃A B. P ⊢ [Nt S] ⇒ [Nt A, Nt B] ∧ P ⊢ [Nt A, Nt B] ⇒* map Tm w"
using assms(1) α derive_singleton[of P ‹Nt S› α] unfolding CNF_def by fast
from this obtain A B where AB: "(S, [Nt A, Nt B]) ∈ P ∧ P ⊢ [Nt A, Nt B] ⇒* map Tm w"
using derive_singleton[of P ‹Nt S›] by blast
hence "¬(P ⊢ [Nt A] ⇒* []) ∧ ¬(P ⊢ [Nt B] ⇒* [])"
using assms(1) CNF_eq Eps_free_derives_Nil by blast
from this obtain u v where uv: "P ⊢ [Nt A] ⇒* u ∧ P ⊢ [Nt B] ⇒* v ∧ u@v = map Tm w ∧ u ≠ [] ∧ v ≠ []"
using AB derives_append_decomp[of P ‹[Nt A]› ‹[Nt B]› ‹map Tm w›] by force
moreover have "∃u' v'. u = map Tm u' ∧ v = map Tm v'"
using uv map_eq_append_conv[of Tm w u v] by auto
ultimately show ?thesis
using AB append_eq_map_conv[of u v Tm w] list.simps(8)[of Tm] by fastforce
qed
end