Theory Context_Free_Language
section "Context-Free Languages"
theory Context_Free_Language
imports
"Regular-Sets.Regular_Set"
Renaming_CFG
Disjoint_Union_CFG
begin
subsection ‹Auxiliary: ‹lfp› as Kleene Fixpoint›
definition omega_chain :: "(nat ⇒ ('a::complete_lattice)) ⇒ bool" where
"omega_chain C = (∀i. C i ≤ C(Suc i))"
definition omega_cont :: "(('a::complete_lattice) ⇒ ('b::complete_lattice)) ⇒ bool" where
"omega_cont f = (∀C. omega_chain C ⟶ f(SUP n. C n) = (SUP n. f(C n)))"
lemma omega_chain_mono: "omega_chain C ⟹ i ≤ j ⟹ C i ≤ C j"
unfolding omega_chain_def using lift_Suc_mono_le[of C]
by(induction "j-i" arbitrary: i j)auto
lemma mono_if_omega_cont: fixes f :: "('a::complete_lattice) ⇒ ('b::complete_lattice)"
assumes "omega_cont f" shows "mono f"
proof
fix a b :: "'a" assume "a ≤ b"
let ?C = "λn::nat. if n=0 then a else b"
have *: "omega_chain ?C" using ‹a ≤ b› by(auto simp: omega_chain_def)
have "f a ≤ sup (f a) (SUP n. f(?C n))" by(rule sup.cobounded1)
also have "… = sup (f(?C 0)) (SUP n. f(?C n))" by (simp)
also have "… = (SUP n. f (?C n))" using SUP_absorb[OF UNIV_I] .
also have "… = f (SUP n. ?C n)"
using assms * by (simp add: omega_cont_def del: if_image_distrib)
also have "f (SUP n. ?C n) = f b"
using ‹a ≤ b› by (auto simp add: gt_ex sup.absorb2 split: if_splits)
finally show "f a ≤ f b" .
qed
lemma omega_chain_iterates: fixes f :: "('a::complete_lattice) ⇒ 'a"
assumes "mono f" shows "omega_chain(λn. (f^^n) bot)"
proof-
have "(f ^^ n) bot ≤ (f ^^ Suc n) bot" for n
proof (induction n)
case 0 show ?case by simp
next
case (Suc n) thus ?case using assms by (auto simp: mono_def)
qed
thus ?thesis by(auto simp: omega_chain_def assms)
qed
theorem Kleene_lfp:
assumes "omega_cont f" shows "lfp f = (SUP n. (f^^n) bot)" (is "_ = ?U")
proof(rule Orderings.antisym)
from assms mono_if_omega_cont
have mono: "(f ^^ n) bot ≤ (f ^^ Suc n) bot" for n
using funpow_decreasing [of n "Suc n"] by auto
show "lfp f ≤ ?U"
proof (rule lfp_lowerbound)
have "f ?U = (SUP n. (f^^Suc n) bot)"
using omega_chain_iterates[OF mono_if_omega_cont[OF assms]] assms
by(simp add: omega_cont_def)
also have "… = ?U" using mono by(blast intro: SUP_eq)
finally show "f ?U ≤ ?U" by simp
qed
next
have "(f^^n) bot ≤ p" if "f p ≤ p" for n p
proof -
show ?thesis
proof(induction n)
case 0 show ?case by simp
next
case Suc
from monoD[OF mono_if_omega_cont[OF assms] Suc] ‹f p ≤ p›
show ?case by simp
qed
qed
thus "?U ≤ lfp f"
using lfp_unfold[OF mono_if_omega_cont[OF assms]]
by (simp add: SUP_le_iff)
qed
subsection ‹Basic Definitions›
text ‹This definition depends on the type of nonterminals of the grammar.›
definition CFL :: "'n itself ⇒ 't list set ⇒ bool" where
"CFL (TYPE('n)) L = (∃P S::'n. L = Lang P S ∧ finite P)"
text ‹Ideally one would existentially quantify over ‹'n› on the right-hand side, but we cannot
quantify over types in HOL. But we can prove that the type is irrelevant because we can always
use another type via renaming.›
lemma arb_inj_on_finite_infinite: "finite(A :: 'a set) ⟹ ∃f :: 'a ⇒ 'b::infinite. inj_on f A"
by (meson arb_finite_subset card_le_inj infinite_imp_nonempty)
lemma CFL_change_Nt_type: assumes "CFL TYPE('t1::infinite) L" shows "CFL TYPE('t2::infinite) L"
proof -
from assms obtain P and S :: 't1 where "L = Lang P S" and "finite P"
unfolding CFL_def by blast
have fin: "finite(Nts P ∪ {S})" using ‹finite P›
by(simp add: finite_Nts)
obtain f :: "'t1 ⇒ 't2" where "inj_on f (Nts P ∪ {S})"
using arb_inj_on_finite_infinite[OF fin] by blast
from Lang_rename_Prods[OF this] ‹L = _› have "Lang (rename_Prods f P) (f S) = L"
by blast
moreover have "finite(rename_Prods f P)" using ‹finite P›
by blast
ultimately show ?thesis unfolding CFL_def by blast
qed
text ‹For hiding the infinite type of nonterminals:›
abbreviation cfl :: "'a lang ⇒ bool" where
"cfl L ≡ CFL (TYPE(nat)) L"
subsection ‹Closure Properties›
lemma CFL_Un_closed:
assumes "CFL TYPE('n1) L1" "CFL TYPE('n2) L2"
shows "CFL TYPE(('n1+'n2)option) (L1 ∪ L2)"
proof -
from assms obtain P1 P2 and S1 :: 'n1 and S2 :: 'n2
where L: "L1 = Lang P1 S1" "L2 = Lang P2 S2" and fin: "finite P1" "finite P2"
unfolding CFL_def by blast
let ?f1 = "Some o (Inl:: 'n1 ⇒ 'n1 + 'n2)"
let ?f2 = "Some o (Inr:: 'n2 ⇒ 'n1 + 'n2)"
let ?P1 = "rename_Prods ?f1 P1"
let ?P2 = "rename_Prods ?f2 P2"
let ?S1 = "?f1 S1"
let ?S2 = "?f2 S2"
let ?P = "{(None, [Nt ?S1]), (None, [Nt ?S2])} ∪ (?P1 ∪ ?P2)"
have "Lang ?P None = Lang ?P1 ?S1 ∪ Lang ?P2 ?S2"
by (rule Lang_disj_Un2)(auto simp: Nts_Un in_Nts_rename_Prods)
moreover have "… = Lang P1 S1 ∪ Lang P2 S2"
proof -
have "Lang ?P1 ?S1 = L1" unfolding ‹L1 = _›
by (meson Lang_rename_Prods comp_inj_on inj_Inl inj_Some)
moreover have "Lang ?P2 ?S2 = L2" unfolding ‹L2 = _›
by (meson Lang_rename_Prods comp_inj_on inj_Inr inj_Some)
ultimately show ?thesis using L by argo
qed
moreover have "finite ?P" using fin by auto
ultimately show ?thesis
unfolding CFL_def using L by blast
qed
lemma CFL_concat_closed:
assumes "CFL TYPE('n1) L1" and "CFL TYPE('n2) L2"
shows "CFL TYPE(('n1 + 'n2) option) (L1 @@ L2)"
proof -
obtain P1 S1 where L1_def: "L1 = Lang P1 (S1::'n1)" "finite P1"
using assms(1) CFL_def[of L1] by auto
obtain P2 S2 where L2_def: "L2 = Lang P2 (S2::'n2)" "finite P2"
using assms(2) CFL_def[of L2] by auto
let ?f1 = "Some o (Inl:: 'n1 ⇒ 'n1 + 'n2)"
let ?f2 = "Some o (Inr:: 'n2 ⇒ 'n1 + 'n2)"
let ?P1 = "rename_Prods ?f1 P1"
let ?P2 = "rename_Prods ?f2 P2"
let ?S1 = "?f1 S1"
let ?S2 = "?f2 S2"
let ?S = "None :: ('n1+'n2)option"
let ?P = "{(None, [Nt ?S1, Nt ?S2])} ∪ (?P1 ∪ ?P2)"
have "inj ?f1" by (simp add: inj_on_def)
then have L1r_def: "L1 = Lang ?P1 ?S1"
using L1_def Lang_rename_Prods[of ?f1 P1 S1] inj_on_def by force
have "inj ?f2" by (simp add: inj_on_def)
then have L2r_def: "L2 = Lang ?P2 ?S2"
using L2_def Lang_rename_Prods[of ?f2 P2 S2] inj_on_def by force
have "Lang ?P ?S = L1 @@ L2" unfolding L1r_def L2r_def
by(rule Lang_concat_disj) (auto simp add: disjoint_iff in_Nts_rename_Prods)
moreover have "finite ?P" using ‹finite P1› ‹finite P2› by auto
ultimately show ?thesis unfolding CFL_def by blast
qed
subsection ‹CFG as an Equation System›
text ‹A CFG can be viewed as a system of equations. The least solution is denoted by ‹Lang_lfp›.›
definition inst_sym :: "('n ⇒ 't lang) ⇒ ('n, 't) sym ⇒ 't lang" where
"inst_sym L s = (case s of Tm a ⇒ {[a]} | Nt A ⇒ L A)"
definition concats :: "'a lang list ⇒ 'a lang" where
"concats Ls = foldr (@@) Ls {[]}"
definition inst_syms :: "('n ⇒ 't lang) ⇒ ('n, 't) syms ⇒ 't lang" where
"inst_syms L w = concats (map (inst_sym L) w)"
definition subst_lang :: "('n,'t)Prods ⇒ ('n ⇒ 't lang) ⇒ ('n ⇒ 't lang)" where
"subst_lang P L = (λA. ⋃w ∈ Rhss P A. inst_syms L w)"
definition Lang_lfp :: "('n, 't) Prods ⇒ 'n ⇒ 't lang" where
"Lang_lfp P = lfp (subst_lang P)"
text ‹Now we show that this ‹lfp› is a Kleene fixpoint.›
lemma inst_sym_Sup_range: "inst_sym (Sup(range F)) = (λs. UN i. inst_sym (F i) s)"
by(auto simp: inst_sym_def fun_eq_iff split: sym.splits)
lemma foldr_map_mono: "F ≤ G ⟹ foldr (@@) (map F xs) Ls ⊆ foldr (@@) (map G xs) Ls"
by(induction xs)(auto simp add: le_fun_def subset_eq)
lemma inst_sym_mono: "F ≤ G ⟹ inst_sym F s ⊆ inst_sym G s"
by (auto simp add: inst_sym_def le_fun_def subset_iff split: sym.splits)
lemma foldr_conc_map_inst_sym:
assumes "omega_chain L"
shows "foldr (@@) (map (λs. ⋃i. inst_sym (L i) s) xs) Ls = (⋃i. foldr (@@) (map (inst_sym (L i)) xs) Ls)"
proof(induction xs)
case Nil
then show ?case by simp
next
case (Cons a xs)
show ?case (is "?l = ?r")
proof
show "?l ⊆ ?r"
proof
fix w assume "w ∈ ?l"
with Cons obtain u v i j
where "w = u @ v" "u ∈ inst_sym (L i) a" "v ∈ foldr (@@) (map (inst_sym (L j)) xs) Ls" by(auto)
then show "w ∈ ?r"
using omega_chain_mono[OF assms, of i "max i j"] omega_chain_mono[OF assms, of j "max i j"]
inst_sym_mono foldr_map_mono[of "inst_sym (L j)" "inst_sym (L (max i j))" xs Ls] concI
unfolding le_fun_def by(simp) blast
qed
next
show "?r ⊆ ?l" using Cons by(fastforce)
qed
qed
lemma omega_cont_Lang_lfp: "omega_cont (subst_lang P)"
unfolding omega_cont_def subst_lang_def
proof (safe)
fix L :: "nat ⇒ 'a ⇒ 'b lang"
assume o: "omega_chain L"
show "(λA. ⋃ (inst_syms (Sup (range L)) ` Rhss P A)) = (SUP i. (λA. ⋃ (inst_syms (L i) ` Rhss P A)))"
(is "(λA. ?l A) = (λA. ?r A)")
proof
fix A :: 'a
have "?l A = ⋃(⋃i. (inst_syms (L i) ` Rhss P A))"
by(auto simp: inst_syms_def inst_sym_Sup_range concats_def foldr_conc_map_inst_sym[OF o])
also have "… = ?r A"
by(auto)
finally show "?l A = ?r A" .
qed
qed
theorem Lang_lfp_SUP: "Lang_lfp P = (SUP n. ((subst_lang P)^^n) (λA. {}))"
using Kleene_lfp[OF omega_cont_Lang_lfp] unfolding Lang_lfp_def bot_fun_def by blast
subsection ‹‹Lang_lfp = Lang››
text ‹We prove that the fixpoint characterization of the language defined by a CFG is equivalent
to the standard language definition via derivations. Both directions are proved separately›
lemma inst_syms_mono: "(⋀A. R A ⊆ R' A) ⟹ w ∈ inst_syms R α ⟹ w ∈ inst_syms R' α"
unfolding inst_syms_def concats_def
by (metis (no_types, lifting) foldr_map_mono in_mono inst_sym_mono le_fun_def)
lemma omega_cont_Lang_lfp_iterates: "omega_chain (λn. ((subst_lang P)^^n) (λA. {}))"
using omega_chain_iterates[OF mono_if_omega_cont, OF omega_cont_Lang_lfp]
unfolding bot_fun_def by blast
lemma in_subst_langD_inst_syms: "w ∈ subst_lang P L A ⟹ ∃α. (A,α)∈P ∧ w ∈ inst_syms L α"
unfolding subst_lang_def inst_syms_def Rhss_def by (auto split: prod.splits)
lemma foldr_conc_conc: "foldr (@@) xs {[]} @@ A = foldr (@@) xs A"
by (induction xs)(auto simp: conc_assoc)
lemma derives_if_inst_syms:
"w ∈ inst_syms (λA. {w. P ⊢ [Nt A] ⇒* map Tm w}) α ⟹ P ⊢ α ⇒* map Tm w"
proof (induction α arbitrary: w)
case Nil
then show ?case unfolding inst_syms_def concats_def by(auto)
next
case (Cons s α)
show ?case
proof (cases s)
case (Nt A)
then show ?thesis using Cons
unfolding inst_syms_def concats_def inst_sym_def by(fastforce simp: derives_Cons_decomp)
next
case (Tm a)
then show ?thesis using Cons
unfolding inst_syms_def concats_def inst_sym_def by(auto simp:derives_Tm_Cons)
qed
qed
lemma derives_if_in_subst_lang: "w ∈ ((subst_lang P)^^n) (λA. {}) A ⟹ P ⊢ [Nt A] ⇒* map Tm w"
proof(induction n arbitrary: w A)
case 0
then show ?case by simp
next
case (Suc n)
let ?L = "((subst_lang P)^^n) (λA. {})"
have *: "?L A ⊆ {w. P ⊢ [Nt A] ⇒* map Tm w}" for A
using Suc.IH by blast
obtain α where α: "(A,α) ∈ P" "w ∈ inst_syms ?L α"
using in_subst_langD_inst_syms[OF Suc.prems[simplified]] by blast
show ?case using α(1) derives_if_inst_syms[OF inst_syms_mono[OF *, of _ "λA. A", OF α(2)]]
by (simp add: derives_Cons_rule)
qed
lemma derives_if_Lang_lfp: "w ∈ Lang_lfp P A ⟹ P ⊢ [Nt A] ⇒* map Tm w"
unfolding Lang_lfp_SUP using derives_if_in_subst_lang
by (metis (mono_tags, lifting) SUP_apply UN_E)
lemma Lang_lfp_subset_Lang: "Lang_lfp P A ⊆ Lang P A"
unfolding Lang_def by(blast intro:derives_if_Lang_lfp)
text ‹The other direction:›
lemma inst_syms_decomp:
"⟦ ∀i < length ws. ws ! i ∈ inst_sym L (α ! i); length α = length ws ⟧
⟹ concat ws ∈ inst_syms L α"
proof (induction ws arbitrary: α)
case Nil
then show ?case unfolding inst_syms_def concats_def by simp
next
case (Cons w ws)
then obtain α1 αr where *: "α = α1 # αr" by (metis Suc_length_conv)
with Cons.prems(2) have "length αr = length ws" by simp
moreover from Cons.prems * have "∀i<length ws. ws ! i ∈ inst_sym L (αr ! i)" by auto
ultimately have "concat ws ∈ inst_syms L αr" using Cons.IH by blast
moreover from Cons.prems * have "w ∈ inst_sym L α1" by fastforce
ultimately show ?case unfolding inst_syms_def concats_def using * by force
qed
lemma Lang_lfp_if_derives_aux: "P ⊢ [Nt A] ⇒(n) map Tm w ⟹ w ∈ ((subst_lang P)^^n) (λA. {}) A"
proof(induction n arbitrary: w A rule: less_induct)
case (less n)
show ?case
proof (cases n)
case 0 then show ?thesis using less.prems by auto
next
case (Suc m)
then obtain α where α_intro: "(A,α) ∈ P" "P ⊢ α ⇒(m) map Tm w"
by (metis deriven_start1 less.prems nat.inject)
then obtain ws ms where *:
"w = concat ws ∧ length α = length ws ∧ length α = length ms
∧ sum_list ms = m ∧ (∀i < length ws. P ⊢ [α ! i] ⇒(ms ! i) map Tm (ws ! i))"
using derive_decomp_Tm by metis
have "∀i < length ws. ws ! i ∈ inst_sym (λA. ((subst_lang P)^^m) (λA. {}) A) (α ! i)"
proof (rule allI | rule impI)+
fix i
show "i < length ws ⟹ ws ! i ∈ inst_sym ((subst_lang P ^^ m) (λA. {})) (α ! i)"
unfolding inst_sym_def
proof (induction "α ! i")
case (Nt B)
with * have **: "ms ! i ≤ m"
by (metis elem_le_sum_list)
with Suc have "ms ! i < n" by force
from less.IH[OF this, of B "ws ! i"] Nt *
have "ws ! i ∈ (subst_lang P ^^ (ms ! i)) (λA. {}) B" by fastforce
with omega_chain_mono[OF omega_cont_Lang_lfp_iterates, OF **]
have "ws ! i ∈ (subst_lang P ^^ m) (λA. {}) B" by (metis le_funD subset_iff)
with Nt show ?case by (metis sym.simps(5))
next
case (Tm a)
with * have "P ⊢ map Tm [a] ⇒(ms ! i) map Tm (ws ! i)" by fastforce
then have "ws ! i ∈ {[a]}" using deriven_from_TmsD by fastforce
with Tm show ?case by (metis sym.simps(6))
qed
qed
from inst_syms_decomp[OF this] * have "w ∈ inst_syms ((subst_lang P ^^ m) (λA. {})) α" by argo
with α_intro have "w ∈ (subst_lang P) (λA. (subst_lang P ^^ m) (λA. {}) A) A"
unfolding subst_lang_def Rhss_def by force
with Suc show ?thesis by force
qed
qed
lemma Lang_lfp_if_derives: "P ⊢ [Nt A] ⇒* map Tm w ⟹ w ∈ Lang_lfp P A"
proof -
assume "P ⊢ [Nt A] ⇒* map Tm w"
then obtain n where "P ⊢ [Nt A] ⇒(n) map Tm w" by (meson rtranclp_power)
from Lang_lfp_if_derives_aux[OF this] have "w ∈ ((subst_lang P)^^n) (λA. {}) A" by argo
with Lang_lfp_SUP show "w ∈ Lang_lfp P A" by (metis (mono_tags, lifting) SUP_apply UNIV_I UN_iff)
qed
theorem Lang_lfp_eq_Lang: "Lang_lfp P A = Lang P A"
unfolding Lang_def by(blast intro: Lang_lfp_if_derives derives_if_Lang_lfp)
end