Theory Finiteness
section‹Finiteness of Context-Free Languages›
theory Finiteness
imports Applications
begin
text‹
Another interesting application, particularly for context-free grammars in
chomsky normal-form (CNF), is the detection of ``cyclic'' non-terminals.
›
text‹
Particularly, if all non-terminals are reachable (can be reached from the starting symbol)
and productive (i.e., a terminal word can be derived from each symbol), the following holds:
›
text‹‹L(C) = ∞ ⟷ ∃X α β. X ⇒⇧* αXβ ∧ aβ ≠ ε››
text‹
Since we have a decision-procedure for derivability, we can work towards also automating this process.
However, to keep proofs simple, this theory only focuses on grammars in CNF, meaning a conversion
is required a priori.
›
subsection‹Preliminaries and Assumptions›
locale CFG =
fixes P :: "('n, 't) Prods" and S :: 'n
assumes cnf: "⋀p. p ∈ P ⟹ (∃A a. p = (A, [Tm a]) ∨ (∃A B C. p = (A, [Nt B, Nt C])))"
begin
definition is_useful_all :: "bool" where
"is_useful_all ≡ (∀X::'n. is_useful P S X)"
definition is_non_nullable_all :: "bool" where
"is_non_nullable_all ≡ (∀X::'n. ¬ is_nullable P X)"
lemma derives_concat:
assumes "P ⊢ X⇩1 ⇒* w⇩1" and "P ⊢ X⇩2 ⇒* w⇩2"
shows "P ⊢ (X⇩1@X⇩2) ⇒* (w⇩1@w⇩2)"
using assms derives_append_decomp by blast
lemma derives_split:
assumes "P ⊢ X ⇒* w"
shows "∃X⇩1 X⇩2 w⇩1 w⇩2. X = X⇩1@X⇩2 ∧ w = w⇩1@w⇩2 ∧ P ⊢ X⇩1 ⇒* w⇩1 ∧ P ⊢ X⇩2 ⇒* w⇩2"
using assms by blast
lemma derives_step:
assumes "P ⊢ X ⇒* (α@w⇩1@β)" and "P ⊢ w⇩1 ⇒* w⇩2"
shows "P ⊢ X ⇒* (α@w⇩2@β)"
proof -
have "P ⊢ w⇩1@β ⇒* w⇩2@β"
using assms(2) by (simp add: derives_concat)
then have "P ⊢ α@w⇩1@β ⇒* α@w⇩2@β"
by (simp add: derives_concat)
then show ?thesis
using assms(1) by simp
qed
lemma is_useful_all_derive:
assumes "is_useful_all"
shows "∃w. P ⊢ xs ⇒* map Tm w"
using assms proof (induction xs)
case Nil
moreover have "P ⊢ [] ⇒* map Tm []"
by simp
ultimately show ?case
by (elim exI)
next
case (Cons a xs)
then obtain w' where w'_def: "P ⊢ xs ⇒* map Tm w'"
by blast
have "∃w. P ⊢ [a] ⇒* map Tm w"
proof (cases a)
case (Nt X)
then have "Lang P X ≠ {}"
using Cons(2) by (simp add: is_useful_all_def is_useful_def)
then show ?thesis
by (simp add: Nt Lang_def)
next
case (Tm c)
then have "P ⊢ [Tm c] ⇒* map Tm [c]"
by simp
then show ?thesis
using Tm by blast
qed
then obtain w where w_def: "P ⊢ [a] ⇒* map Tm w"
by blast
from w_def w'_def have "P ⊢ (a#xs) ⇒* map Tm (w@w')"
using derives_concat by fastforce
then show ?case
by blast
qed
lemma is_non_nullable_all_derive:
assumes "is_non_nullable_all" and "P ⊢ xs ⇒* w"
shows "xs = [] ⟷ w = []"
proof -
have "⋀X. ¬ P ⊢ [Nt X] ⇒* []"
using assms(1) by (simp add: is_non_nullable_all_def is_nullable_def)
moreover have "⋀c. ¬ P ⊢ [Tm c] ⇒* []"
by simp
ultimately have nonNullAll: "⋀x. ¬ P ⊢ [x] ⇒* []"
using sym.exhaust by metis
have thm1: "xs = [] ⟹ w = []"
using assms(2) derives_from_empty by blast
have thm2: "xs ≠ [] ⟹ w ≠ []"
proof
assume "xs ≠ []"
then obtain x xs' where "xs = x#xs'"
using list.exhaust by blast
moreover have "P ⊢ ([x]@xs') ⇒* [] ⟹ (P ⊢ [x] ⇒* [] ∧ P ⊢ xs' ⇒* [])"
using derives_split by (metis Nil_is_append_conv derives_append_decomp)
moreover have "¬ P ⊢ [x] ⇒* []"
by (simp add: nonNullAll)
ultimately show "w = [] ⟹ False"
using assms(2) by simp
qed
show ?thesis
using thm1 thm2 by blast
qed
subsection‹Criterion of Finiteness›
text‹
Finally, we introduce the definition @{term is_infinite}, which instead of making use
of the language set, uses the criterion introduced above.
›
definition is_reachable_step :: "'n ⇒ 'n ⇒ bool" (infix "→⇧?" 80) where
"(X →⇧? Y) ≡ (∃α β. P ⊢ [Nt X] ⇒* (α@[Nt Y]@β) ∧ α@β ≠ [])"
definition is_infinite :: "bool" where
"is_infinite ≡ (∃X. X →⇧? X)"
fun is_infinite_derives :: "'n ⇒ ('n, 't) sym list ⇒ ('n, 't) sym list ⇒ nat ⇒ ('n, 't) sym list" where
"is_infinite_derives X α β (Suc n) = α@(is_infinite_derives X α β n)@β" |
"is_infinite_derives X α β 0 = [Nt X]"
fun is_infinite_words :: "'t list ⇒ 't list ⇒ 't list ⇒ nat ⇒ 't list" where
"is_infinite_words w⇩X w⇩α w⇩β (Suc n) = w⇩α@(is_infinite_words w⇩X w⇩α w⇩β n)@w⇩β" |
"is_infinite_words w⇩X w⇩α w⇩β 0 = w⇩X"
definition reachable_rel :: "('n × 'n) set" where
"reachable_rel ≡ {(X⇩2, X⇩1). ∃α β. (X⇩1, α@[Nt X⇩2]@β) ∈ P}"
lemma cnf_implies_pumping:
assumes "(Y, α@[Nt X]@β) ∈ P"
shows "Y →⇧? X"
proof -
consider "∃a. (α@[Nt X]@β) = [Tm a]" | "∃B C. (α@[Nt X]@β) = [Nt B, Nt C]"
using assms cnf by blast
then show ?thesis
proof (cases)
case 1
then have "False"
by (simp add: append_eq_Cons_conv)
then show ?thesis
by simp
next
case 2
then obtain B C where BC_def: "(α@[Nt X]@β) = [Nt B, Nt C]"
by blast
then have "X = B ∨ X = C"
by (metis Nil_is_append_conv append_Cons in_set_conv_decomp in_set_conv_decomp_first set_ConsD sym.inject(1))
then have "P ⊢ [Nt Y] ⇒ []@[Nt X]@[Nt C] | P ⊢ [Nt Y] ⇒ [Nt B]@[Nt X]@[]"
using BC_def assms(1) derive_singleton by force
then show ?thesis
unfolding is_reachable_step_def by (rule disjE) blast+
qed
qed
lemma reachable_rel_tran: "(X, Y) ∈ reachable_rel⇧+ ⟹ Y →⇧? X"
proof (induction rule: trancl.induct)
case (r_into_trancl X Y)
then show "Y →⇧? X"
using cnf cnf_implies_pumping by (auto simp: reachable_rel_def)
next
case (trancl_into_trancl X Y Z)
then have "Z →⇧? Y"
using cnf cnf_implies_pumping by (auto simp: reachable_rel_def)
with trancl_into_trancl(3) have "Z →⇧? X"
proof -
assume "Z →⇧? Y" and "Y →⇧? X"
obtain α⇩Z β⇩Z where z_der: "P ⊢ [Nt Z] ⇒* (α⇩Z@[Nt Y]@β⇩Z)" and "α⇩Z@β⇩Z ≠ []"
using ‹Z →⇧? Y›[unfolded is_reachable_step_def] by blast
obtain α⇩Y β⇩Y where y_der: "P ⊢ [Nt Y] ⇒* (α⇩Y@[Nt X]@β⇩Y)" and "α⇩Y@β⇩Y ≠ []"
using ‹Y →⇧? X›[unfolded is_reachable_step_def] by blast
have "P ⊢ [Nt Z] ⇒* (α⇩Z@α⇩Y@[Nt X]@β⇩Y@β⇩Z)"
using z_der y_der by (metis append.assoc derives_step)
moreover have "α⇩Z@α⇩Y@β⇩Y@β⇩Z ≠ []"
using ‹α⇩Z@β⇩Z ≠ []› ‹α⇩Y@β⇩Y ≠ []› by simp
ultimately show "Z →⇧? X"
unfolding is_reachable_step_def by (metis append.assoc)
qed
then show ?case
by simp
qed
lemma reachable_rel_wf:
assumes "finite P"
and cnf: "⋀p. p ∈ P ⟹ (∃A a. p = (A, [Tm a]) ∨ (∃A B C. p = (A, [Nt B, Nt C])))"
and loopfree: "⋀X. ¬ X →⇧? X"
shows "wf reachable_rel"
proof -
define Nt2 :: "'n × 'n ⇒ ('n, 't) sym × ('n, 't) sym"
where "Nt2 ≡ (λ(a,b). (Nt a, Nt b))"
define S :: "(('n, 't) sym × ('n, 't) sym) set"
where "S ≡ ⋃(set ` snd ` P) × (Nt ` fst ` P)"
have "finite (⋃(set ` snd ` P))"
by (rule finite_Union; use assms(1) in blast)
moreover have "finite (fst ` P)"
using assms(1) by simp
ultimately have "finite S"
unfolding S_def by blast
moreover have "(Nt2 ` reachable_rel) ⊆ S"
unfolding reachable_rel_def Nt2_def S_def by (auto split: prod.splits sym.splits, force)
ultimately have "finite (Nt2 ` reachable_rel)"
using finite_subset by blast
moreover have "inj_on Nt2 reachable_rel"
unfolding inj_on_def Nt2_def by fast
ultimately have finite: "finite reachable_rel"
using finite_image_iff by blast
have "acyclic reachable_rel"
unfolding acyclic_def using loopfree reachable_rel_tran by blast
from finite_acyclic_wf[OF finite this] show "wf reachable_rel" .
qed
lemma is_infinite_implies_finite:
assumes "finite P"
and loopfree: "⋀X. ¬ X →⇧? X"
shows "finite {w. P ⊢ [Nt X] ⇒* w}"
proof -
have "wf reachable_rel"
using assms cnf by (simp add: reachable_rel_wf)
then show ?thesis
proof (induction)
case (less X)
have "{w. ∃a. (X, [Tm a]) ∈ P ∧ P ⊢ [Tm a] ⇒* w} = snd ` {(Y, β) ∈ P. X = Y ∧ (∃a. β = [Tm a])}"
by force
then have finA: "finite {w. ∃a. (X, [Tm a]) ∈ P ∧ P ⊢ [Tm a] ⇒* w}"
using assms(1) by (metis (no_types, lifting) case_prod_conv finite_imageI mem_Collect_eq old.prod.exhaust rev_finite_subset subsetI)
have "⋀B C. (X, [Nt B, Nt C]) ∈ P ⟹ finite {w. P ⊢ [Nt B, Nt C] ⇒* w}"
proof -
fix B and C
assume "(X, [Nt B, Nt C]) ∈ P"
then have "(X, []@[Nt B]@[Nt C]) ∈ P" and "(X, [Nt B]@[Nt C]@[]) ∈ P"
by simp+
then have "(B, X) ∈ reachable_rel" and "(C, X) ∈ reachable_rel"
unfolding reachable_rel_def by blast+
then have "finite {w. P ⊢ [Nt B] ⇒* w}" and "finite {w. P ⊢ [Nt C] ⇒* w}"
using less by simp+
moreover have "{w. P ⊢ [Nt B, Nt C] ⇒* w} = (λ(b,c). b@c) ` ({w. P ⊢ [Nt B] ⇒* w} × {w. P ⊢ [Nt C] ⇒* w})"
proof (standard; standard)
fix w
assume "w ∈ {w. P ⊢ [Nt B, Nt C] ⇒* w}"
then have "P ⊢ [Nt B]@[Nt C] ⇒* w"
by simp
then obtain b c where "P ⊢ [Nt B] ⇒* b" and "P ⊢ [Nt C] ⇒* c" and "w = b@c"
using derives_append_decomp by blast
then show "w ∈ (λ(b,c). b@c) ` ({w. P ⊢ [Nt B] ⇒* w} × {w. P ⊢ [Nt C] ⇒* w})"
by blast
next
fix w
assume "w ∈ (λ(b,c). b@c) ` ({w. P ⊢ [Nt B] ⇒* w} × {w. P ⊢ [Nt C] ⇒* w})"
then obtain b c where "P ⊢ [Nt B] ⇒* b" and "P ⊢ [Nt C] ⇒* c" and "w = b@c"
by fast
then have "P ⊢ [Nt B]@[Nt C] ⇒* w"
using derives_concat by blast
then show "w ∈ {w. P ⊢ [Nt B, Nt C] ⇒* w}"
by simp
qed
ultimately show "finite {w. P ⊢ [Nt B, Nt C] ⇒* w}"
by simp
qed
moreover have "finite {(B, C). (X, [Nt B, Nt C]) ∈ P}"
proof -
define S :: "('n × ('n, 't) sym list) set" where
"S ≡ ((λ(B,C). (X, [Nt B, Nt C])) ` {(B, C). (X, [Nt B, Nt C]) ∈ P})"
have subP: "S ⊆ P"
unfolding S_def by fast
with assms(1) have "finite S"
by (elim finite_subset)
then show ?thesis
unfolding S_def by (rule finite_imageD, simp add: inj_on_def)
qed
ultimately have "finite (⋃((λ(B,C). {w. P ⊢ [Nt B, Nt C] ⇒* w}) ` {(B,C). (X, [Nt B, Nt C]) ∈ P}))"
by (intro finite_Union; fast)
moreover have "{w. ∃B C. (X, [Nt B, Nt C]) ∈ P ∧ P ⊢ [Nt B, Nt C] ⇒* w}
= (⋃((λ(B,C). {w. P ⊢ [Nt B, Nt C] ⇒* w}) ` {(B,C). (X, [Nt B, Nt C]) ∈ P}))"
by blast
ultimately have finB: "finite {w. ∃B C. (X, [Nt B, Nt C]) ∈ P ∧ P ⊢ [Nt B, Nt C] ⇒* w}"
by simp
let ?P = "λw β. (X, β) ∈ P ∧ P ⊢ β ⇒* w"
have un: "{w. ∃β. ?P w β} = {w. ∃a. ?P w [Tm a]} ∪ {w. ∃B C. ?P w [Nt B, Nt C]}"
using cnf by blast
have "finite {w. ∃β. (X, β) ∈ P ∧ P ⊢ β ⇒* w}"
unfolding un by (intro finite_UnI; use finA finB in simp)
moreover have "⋀X. {w. P ⊢ [Nt X] ⇒* w} = {[Nt X]} ∪ {w. ∃β. (X, β) ∈ P ∧ P ⊢ β ⇒* w}"
by (auto split: prod.splits simp: derives_Cons_decomp)
ultimately show ?case
by simp
qed
qed
theorem is_infinite_correct:
assumes "is_useful_all" and "is_non_nullable_all" and "finite P"
shows "¬ finite (Lang P S) ⟷ is_infinite"
proof (standard, erule contrapos_pp)
assume "¬ is_infinite"
then have finA: "finite {w. P ⊢ [Nt S] ⇒* w}"
using is_infinite_implies_finite assms(3) by (simp add: is_infinite_def)
have "finite (map Tm ` {w. P ⊢ [Nt S] ⇒* map Tm w}::('n, 't) sym list set)"
by (rule finite_subset[where B="{w. P ⊢ [Nt S] ⇒* w}"]; use finA in blast)
moreover have "inj_on (map Tm) {w. P ⊢ [Nt S] ⇒* map Tm w}"
by (simp add: inj_on_def)
ultimately have "finite {w. P ⊢ [Nt S] ⇒* map Tm w}"
using finite_image_iff[where f="map Tm"] by blast
then show "¬ infinite (Lang P S)"
by (simp add: Lang_def)
next
assume "is_infinite"
then obtain X where "X →⇧? X"
unfolding is_infinite_def by blast
then obtain α β where deriveX: "P ⊢ [Nt X] ⇒* (α@[Nt X]@β)" and "α@β ≠ []"
unfolding is_reachable_step_def by blast
obtain w⇩X where w⇩X_def: "P ⊢ [Nt X] ⇒* map Tm w⇩X"
using assms(1) is_useful_all_derive by blast
obtain w⇩α w⇩β where w⇩α_def: "P ⊢ α ⇒* map Tm w⇩α" and w⇩β_def: "P ⊢ β ⇒* map Tm w⇩β"
using assms(1) is_useful_all_derive by blast+
then have "w⇩α@w⇩β ≠ []"
using ‹α@β ≠ []› by (simp add: assms(2) is_non_nullable_all_derive)
define f⇩d where "f⇩d ≡ is_infinite_derives X α β"
define f⇩w where "f⇩w ≡ is_infinite_words w⇩X w⇩α w⇩β"
have "P ⊢ S ⇒⇧? X"
using assms(1) by (simp add: is_useful_all_def is_useful_def)
then obtain p s where "P ⊢ [Nt S] ⇒* (p@[Nt X]@s)"
unfolding is_reachable_from_def by blast
moreover obtain w⇩p where w⇩p_def: "P ⊢ p ⇒* map Tm w⇩p"
using assms(1) is_useful_all_derive by blast
moreover obtain w⇩s where w⇩s_def: "P ⊢ s ⇒* map Tm w⇩s"
using assms(1) is_useful_all_derive by blast
ultimately have fromS: "P ⊢ [Nt S] ⇒* (map Tm w⇩p@[Nt X]@map Tm w⇩s)"
by (meson local.derives_concat rtranclp.rtrancl_refl rtranclp_trans)
have "⋀i. P ⊢ [Nt X] ⇒* f⇩d i"
subgoal for i
apply (induction i; simp_all add: f⇩d_def)
apply (meson deriveX local.derives_concat rtranclp.rtrancl_refl rtranclp_trans)
done
done
moreover have "⋀i. P ⊢ f⇩d i ⇒* map Tm (f⇩w i)"
subgoal for i
by (induction i; simp add: f⇩d_def f⇩w_def w⇩X_def w⇩α_def w⇩β_def derives_concat)
done
ultimately have "⋀i. P ⊢ [Nt X] ⇒* map Tm (f⇩w i)"
using rtranclp_trans by fast
then have "⋀i. P ⊢ [Nt S] ⇒* (map Tm w⇩p@map Tm (f⇩w i)@map Tm w⇩s)"
using fromS derives_step by presburger
then have "⋀i. P ⊢ [Nt S] ⇒* (map Tm (w⇩p@(f⇩w i)@w⇩s))"
by simp
moreover define f⇩w' where f⇩w'_def: "f⇩w' = (λi. w⇩p @ (f⇩w i) @ w⇩s)"
ultimately have "⋀i. P ⊢ [Nt S] ⇒* map Tm (f⇩w' i)"
by simp
then have "⋀i. f⇩w' i ∈ Lang P S"
by (simp add: Lang_def)
then have "range f⇩w' ⊆ Lang P S"
by blast
have "⋀i. length (f⇩w i) < length (f⇩w (i+1))"
subgoal for i
by (induction i; use f⇩w_def ‹w⇩α@w⇩β ≠ []› in simp)
done
then have x: "⋀i. length (f⇩w' i) < length (f⇩w' (i+1))"
by (simp add: f⇩w'_def)
then have "⋀i n. 0 < n ⟹ length (f⇩w' i) < length (f⇩w' (i+n))"
subgoal for i n
apply (induction n, auto)
apply (metis Suc_lessD add_cancel_left_right gr_zeroI less_trans_Suc)
done
done
then have f⇩w'_order: "⋀i⇩1 i⇩2. i⇩1 < i⇩2 ⟹ length (f⇩w' i⇩1) < length (f⇩w' i⇩2)"
using less_imp_add_positive by blast
then have "inj f⇩w'"
unfolding inj_def by (metis nat_neq_iff)
have "infinite (Lang P S)"
using ‹range f⇩w' ⊆ Lang P S› ‹inj f⇩w'› infinite_iff_countable_subset by blast
then show "¬ finite (Lang P S)"
by simp
qed
no_notation is_reachable_step (infix "→⇧?" 80)
subsection‹Finiteness Problem›
lemma is_infinite_check:
"is_infinite ⟷ (∃X. [Nt X] ∈ pre_star P { α@[Nt X]@β | α β. α@β ≠ [] })"
unfolding is_infinite_def is_reachable_step_def by (auto simp: pre_star_term)
theorem is_infinite_by_prestar:
assumes "is_useful_all" and "is_non_nullable_all" and "finite P"
shows "finite (Lang P S) ⟷ (∀X. [Nt X] ∉ pre_star P { α@[Nt X]@β | α β. α@β ≠ [] })"
using assms is_infinite_correct is_infinite_check by blast
end
end