Theory Pumping_Lemma_Regular
section ‹Pumping Lemma for Strongly Right-Linear Grammars›
theory Pumping_Lemma_Regular
imports NDA_rlin2 "List_Power.List_Power"
begin
text ‹The proof is on the level of strongly right-linear grammars.
Currently there is no proof on the automaton level but now it would be easy to obtain one.›
lemma not_distinct:
assumes "m = card P"
and "m ≥ 1"
and "∀i < length w. w ! i ∈ P"
and "length w ≥ Suc m"
shows "∃xs ys zs y. w = xs @ [y] @ ys @ [y] @ zs ∧ length (xs @ [y] @ ys @ [y]) ≤ Suc m"
using assms proof (induction w arbitrary: P m rule: length_induct)
case (1 aw)
from "1.prems"(4) obtain a w where aw_cons[simp]: "aw = a#w" and w_len: "m ≤ length w"
using Suc_le_length_iff[of m aw] by blast
show ?case proof (cases "a ∈ set w")
case True
hence "¬ distinct aw" by simp
then obtain xs ys zs y where aw_split: "aw = xs @ [y] @ ys @ [y] @ zs"
using not_distinct_decomp by blast
show ?thesis proof (cases "length (xs @ [y] @ ys @ [y]) ≤ Suc m")
case True
with aw_split show ?thesis by blast
next
case False
let ?xsyys = "xs @ [y] @ ys"
from False have a4: "length ?xsyys ≥ Suc m" by simp
from aw_split have a5: "length ?xsyys < length aw" by simp
with "1.prems"(3) have "∀i<length ?xsyys. aw ! i ∈ P" by simp
with aw_split have a3: "∀i < length ?xsyys. ?xsyys ! i ∈ P"
by (metis append_assoc nth_append)
from "1.prems"(2) "1.prems"(1) a3 a4 a5 have "∃xs' ys' zs' y'. ?xsyys = xs' @ [y'] @ ys' @ [y'] @ zs' ∧ length (xs' @ [y'] @ ys' @ [y']) ≤ Suc m"
using "1.IH" by simp
then obtain xs' ys' zs' y' where xsyys_split: "?xsyys = xs' @ [y'] @ ys' @ [y'] @ zs'" and xsyys'_len: "length (xs' @ [y'] @ ys' @ [y']) ≤ Suc m" by blast
let ?xs = xs' let ?y = y' let ?ys = ys' let ?zs = "zs' @ [y] @ zs"
from xsyys_split aw_split have *: "aw = ?xs @ [?y] @ ?ys @ [?y] @ ?zs" by simp
from xsyys'_len have **: "length (?xs @ [?y] @ ?ys @ [?y]) ≤ Suc m" by simp
from * ** show ?thesis by blast
qed
next
case False
let ?P' = "P - {a}"
from "1.prems"(3) have a_in: "a ∈ P" by auto
with "1.prems"(1) have a1: "m-1 = card ?P'" by simp
from "1.prems"(2) w_len have "w ≠ []" by auto
with "1.prems"(3) False have b_in: "∃b≠a. b ∈ P" by force
from a_in b_in "1.prems"(2) "1.prems"(1) have "m ≥ 2"
by (metis Suc_1 card_1_singletonE not_less_eq_eq singletonD verit_la_disequality)
hence a2: "m-1 ≥ 1" by simp
from False "1.prems"(3) have a3: "∀i<length w. w ! i ∈ ?P'"
using DiffD2 by auto
from "1.prems"(2) w_len have a4: "Suc (m-1) ≤ length w" by simp
from a1 a2 a3 a4 have "∃xs ys zs y. w = xs @ [y] @ ys @ [y] @ zs ∧ length (xs @ [y] @ ys @ [y]) ≤ Suc (m - 1)"
using "1.IH" by simp
then obtain xs ys zs y where w_split: "w = xs @ [y] @ ys @ [y] @ zs" and xsys_len: "length (xs @ [y] @ ys @ [y]) ≤ m" by auto
from w_split have *: "a#w = (a#xs) @ [y] @ ys @ [y] @ zs" by simp
from xsys_len have **: "length ((a#xs) @ [y] @ ys @ [y]) ≤ Suc m" by simp
from * ** aw_cons show ?thesis by blast
qed
qed
text
‹We define the function ‹nxts_nts P a w› that collects all paths traversing the word ‹w› starting from the non-terminal ‹A› in the
production set ‹P›. ‹nxts_nts0› appends the non-terminal ‹A› in front of every list produced by ‹nxts_nts››
fun nxts_nts :: "('n,'t)Prods ⇒ 'n ⇒ 't list ⇒ 'n list set" where
"nxts_nts P A [] = {[]}"
| "nxts_nts P A (a#w) = (⋃B∈nxt_rlin2 P A a. (Cons B)`nxts_nts P B w)"
definition nxts_nts0 where
"nxts_nts0 P A w ≡ ((#) A) ` nxts_nts P A w"
subsection ‹Properties of ‹nxts_nts› and ‹nxts_nts0››
lemma nxts_nts0_i0:
"∀e ∈ nxts_nts0 P A w. e!0 = A"
unfolding nxts_nts0_def by auto
lemma nxts_nts0_shift:
assumes "i < length w"
shows "∀e ∈ nxts_nts0 P A w. ∃e' ∈ nxts_nts P A w. e ! (Suc i) = e' ! i"
unfolding nxts_nts0_def by auto
lemma nxts_nts_pick_nt:
assumes "e ∈ nxts_nts P A (a#w)"
shows "∃C∈nxt_rlin2 P A a. ∃e' ∈ nxts_nts P C w. e = C#e'"
using assms by auto
lemma nxts_nts0_len:
"∀e ∈ nxts_nts0 P A w. length e = Suc (length w)"
unfolding nxts_nts0_def
by (induction P A w rule: nxts_nts.induct) auto
lemma nxts_nts0_nxt:
assumes "i < length w"
shows "∀e ∈ nxts_nts0 P A w. e!(Suc i) ∈ nxt_rlin2 P (e!i) (w!i)"
unfolding nxts_nts0_def using assms proof (induction P A w arbitrary: i rule: nxts_nts.induct)
case (1 P A)
thus ?case by simp
next
case (2 P A a w)
thus ?case
using less_Suc_eq_0_disj by auto
qed
lemma nxts_nts0_path:
assumes "i1 ≤ length w"
and "i2 ≤ length w"
and "i1 ≤ i2"
shows "∀e ∈ nxts_nts0 P A w. e!i2 ∈ nxts_rlin2_set P {e!i1} (drop i1 (take i2 w))"
proof
fix e
assume "e ∈ nxts_nts0 P A w"
with assms show "e ! i2 ∈ nxts_rlin2_set P {e ! i1} (drop i1 (take i2 w))" proof (induction "i2-i1" arbitrary: i2)
case 0
thus ?case
by (simp add: nxts_rlin2_set_def)
next
case (Suc x)
let ?i2' = "i2 - 1"
from Suc.hyps(2) have x_def: "x = ?i2' - i1" by simp
from Suc.prems(2) have i2'_len: "?i2' ≤ length w" by simp
from Suc.prems(3) Suc.hyps(2) have i1_i2': "i1 ≤ ?i2'" by simp
have IH: "e ! ?i2' ∈ nxts_rlin2_set P {e ! i1} (drop i1 (take ?i2' w))"
using Suc.hyps(1)[of ?i2', OF x_def Suc.prems(1) i2'_len i1_i2' Suc.prems(4)] .
from Suc.hyps(2) Suc.prems(2) Suc.prems(4) have "e ! i2 ∈ nxt_rlin2 P (e!(i2-1)) (w!(i2-1))"
using nxts_nts0_nxt[of ?i2' w P A] by simp
hence e_i2: "e ! i2 ∈ nxts_rlin2_set P {e!(i2-1)} [w!(i2-1)]"
unfolding nxts_rlin2_set_def nxt_rlin2_set_def by simp
have "drop i1 (take (i2 - 1) w) @ [w ! (i2 - 1)] = drop i1 (take i2 w)"
by (smt (verit) Cons_nth_drop_Suc Suc.hyps(2) Suc.prems(2) Suc.prems(3) add_Suc drop_drop drop_eq_Nil drop_take i1_i2' i2'_len le_add_diff_inverse2 le_less_Suc_eq nle_le nth_via_drop order.strict_iff_not take_Suc_conv_app_nth x_def)
thus ?case
using nxts_trans2[of "e ! (i2 - 1)" P "e ! i1" "drop i1 (take (i2 - 1) w)" "e ! i2" "[w!(i2-1)]", OF IH e_i2] by argo
qed
qed
lemma nxts_nts0_path_start:
assumes "i ≤ length w"
shows "∀e ∈ nxts_nts0 P A w. e ! i ∈ nxts_rlin2_set P {A} (take i w)"
using assms nxts_nts0_path[of 0 w i P A] by (simp add: nxts_nts0_def)
lemma nxts_nts_elem:
assumes "i < length w"
shows "∀e ∈ nxts_nts P A w. e ! i ∈ Nts P"
proof
fix e
assume "e ∈ nxts_nts P A w"
with assms show "e ! i ∈ Nts P" proof (induction P A w arbitrary: i e rule: nxts_nts.induct)
case (1 P A)
thus ?case by simp
next
case (2 P A a w)
from 2(3) obtain C e' where C_def: "C ∈ nxt_rlin2 P A a" and e'_def: "e' ∈ nxts_nts P C w" and e_app: "e = C#e'"
using nxts_nts_pick_nt[of e P A a w] by blast
show ?case proof (cases "i = 0")
case True
with e_app C_def show ?thesis
using nxt_rlin2_nts by simp
next
case False
from False 2(2) have i_len: "i - 1 < length w" by simp
have "e' ! (i - 1) ∈ Nts P"
using "2.IH"[of C "i-1" e', OF C_def i_len e'_def] .
with e_app False have "e ! i ∈ Nts P" by simp
thus ?thesis .
qed
qed
qed
lemma nxts_nts0_elem:
assumes "A ∈ Nts P"
and "i ≤ length w"
shows "∀e ∈ nxts_nts0 P A w. e ! i ∈ Nts P"
proof (cases "i = 0")
case True
thus ?thesis
by (simp add: assms(1) nxts_nts0_i0)
next
case False
show ?thesis proof
fix e
assume e_def: "e ∈ nxts_nts0 P A w"
from False e_def assms(2) have "∃e' ∈ nxts_nts P A w. e ! i = e' ! (i-1)"
using nxts_nts0_shift[of "i-1" w P A] by simp
then obtain e' where e'_def: "e' ∈ nxts_nts P A w" and e_ind: "e ! i = e' ! (i-1)"
by blast
from False e'_def assms(2) have "e' ! (i-1) ∈ Nts P"
using nxts_nts_elem[of "i-1" w P A] by simp
with e_ind show "e ! i ∈ Nts P" by simp
qed
qed
lemma nxts_nts0_pick:
assumes "B ∈ nxts_rlin2_set P {A} w"
shows "∃e ∈ nxts_nts0 P A w. last e = B"
unfolding nxts_nts0_def using assms proof (induction P A w arbitrary: B rule: nxts_nts.induct)
case (1 P A)
thus ?case
by (simp add: nxts_rlin2_set_def)
next
case (2 P A a w)
from 2(2) obtain C where C_def: "C ∈ nxt_rlin2 P A a" and C_path: "B ∈ nxts_rlin2_set P {C} w"
using nxts_rlin2_set_first_step[of B P A a w] by blast
have "∃e ∈ nxts_nts0 P C w. last e = B"
using "2.IH"[of C B, OF C_def C_path] by (simp add: nxts_nts0_def)
then obtain e where e_def: "e ∈ nxts_nts0 P C w" and e_last: "last e = B"
by blast
from e_def C_def have *: "A#e ∈ nxts_nts0 P A (a#w)"
unfolding nxts_nts0_def by auto
from e_last e_def have **: "last (A#e) = B"
using nxts_nts0_len[of P C w] by auto
from * ** show ?case
unfolding nxts_nts0_def by blast
qed
subsection ‹Pumping Lemma›
text
‹The following lemma states that in the automata level there exists a cycle occurring in the first ‹m› symbols where ‹m› is the cardinality
of the non-terminals set, under the following assumptions›
lemma nxts_split_cycle:
assumes "finite P"
and "A ∈ Nts P"
and "m = card (Nts P)"
and "B ∈ nxts_rlin2_set P {A} w"
and "length w ≥ m"
shows "∃x y z C. w = x@y@z ∧ length y ≥ 1 ∧ length (x@y) ≤ m ∧
C ∈ nxts_rlin2_set P {A} x ∧ C ∈ nxts_rlin2_set P {C} y ∧ B ∈ nxts_rlin2_set P {C} z"
proof -
let ?nts = "nxts_nts0 P A w"
obtain e where e_def: "e ∈ ?nts" and e_last: "last e = B"
using nxts_nts0_pick[of B P A w, OF assms(4)] by auto
from e_def have e_len: "length e = Suc (length w)"
using nxts_nts0_len[of P A w] by simp
from e_len e_def have e_elem: "∀i < length e. e!i ∈ Nts P"
using nxts_nts0_elem[OF assms(2)] by (auto simp: less_Suc_eq_le)
have "finite (Nts P)"
using finite_Nts[of P, OF assms(1)] .
with assms(2) assms(3) have m_geq_1: "m ≥ 1"
using less_eq_Suc_le by fastforce
from assms(5) e_len have "∃xs ys zs y. e = xs @ [y] @ ys @ [y] @ zs ∧ length (xs @ [y] @ ys @ [y]) ≤ Suc m"
using not_distinct[OF assms(3) m_geq_1 e_elem] by simp
then obtain xs ys zs C where e_split: "e = xs @ [C] @ ys @ [C] @ zs" and xy_len: "length (xs @ [C] @ ys @ [C]) ≤ Suc m"
by blast
let ?e1 = "xs @ [C]" let ?e2 = "ys @ [C]" let ?e3 = zs
let ?x = "take (length ?e1 - 1) w" let ?y = "drop (length ?e1 - 1) (take (length ?e1+length ?e2 - 1) w)"
let ?z = "drop (length ?e1+length ?e2 - 1) w"
have *: "w = ?x@?y@?z"
by (metis Nat.add_diff_assoc2 append_assoc append_take_drop_id diff_add_inverse drop_take le_add1 length_append_singleton plus_1_eq_Suc take_add)
from e_len e_split have **: "length ?y ≥ 1" by simp
from xy_len have ***: "length (?x@?y) ≤ m" by simp
have x_fac: "?x = take (length xs) w" by simp
from ** have x_fac2: "length xs ≤ length w" by simp
from e_split have x_fac3: "e ! length xs = C" by simp
from e_def x_fac x_fac3 have ****: "C ∈ nxts_rlin2_set P {A} ?x"
using nxts_nts0_path_start[of "length xs" w P A, OF x_fac2] by auto
have y_fac: "?y = drop (length xs) (take (length xs + length ys + 1) w)" by simp
from e_len e_split have y_fac2: "length xs + length ys + 1 ≤ length w" by simp
have y_fac3: "length xs ≤ length xs + length ys + 1" by simp
have y_fac4: "e ! (length xs + length ys + 1) = C"
by (metis add.right_neutral add_Suc_right append.assoc append_Cons e_split length_Cons length_append list.size(3) nth_append_length plus_1_eq_Suc)
from e_def y_fac x_fac3 y_fac4 have *****: "C ∈ nxts_rlin2_set P {C} ?y"
using nxts_nts0_path[of "length xs" w "length xs + length ys + 1" P A, OF x_fac2 y_fac2 y_fac3] by auto
have z_fac: "?z = drop (length xs + length ys + 1) (take (length w) w)" by simp
from e_last e_len have z_fac2: "e ! (length w) = B"
by (metis Zero_not_Suc diff_Suc_1 last_conv_nth list.size(3))
from e_def z_fac y_fac2 y_fac4 z_fac2 have ******: "B ∈ nxts_rlin2_set P {C} ?z"
using nxts_nts0_path[of "length xs + length ys + 1" w "length w" P A] by auto
from * ** *** **** ***** ****** show ?thesis by blast
qed
text
‹We also show that a cycle can be pumped in the automata level›
lemma pump_cycle:
assumes "B ∈ nxts_rlin2_set P {A} x"
and "B ∈ nxts_rlin2_set P {B} y"
shows "B ∈ nxts_rlin2_set P {A} (x@(y^^i))"
using assms proof (induction i)
case 0
thus ?case by (simp add: assms(1))
next
case (Suc i)
have "B ∈ nxts_rlin2_set P {A} (x@(y^^i))"
using Suc.IH[OF assms] .
with assms(2) have "B ∈ nxts_rlin2_set P {A} (x@(y^^i)@y)"
using nxts_trans2[of B P A "x@(y^^i)" B y] by simp
thus ?case
by (simp add: pow_list_comm)
qed
text
‹Combining the previous lemmas we can prove the pumping lemma where the starting non-terminal is in the production set. We simply extend the
lemma for non-terminals that are not part of the production set, as these non-terminals will produce the empty language›
lemma pumping_re_aux:
assumes "finite P"
and "A ∈ Nts P"
and "m = card (Nts P)"
and "accepted P A w"
and "length w ≥ m"
shows "∃x y z. w = x@y@z ∧ length y ≥ 1 ∧ length (x@y) ≤ m ∧ (∀i. accepted P A (x@(y^^i)@z))"
proof -
from assms(4) obtain Z where Z_in:"Z ∈ nxts_rlin2_set P {A} w" and Z_eps:"(Z,[])∈P"
by (auto simp: accepted_def)
obtain x y z C where *: "w = x@y@z" and **: "length y ≥ 1" and ***: "length (x@y) ≤ m" and
1: "C ∈ nxts_rlin2_set P {A} x" and 2: "C ∈ nxts_rlin2_set P {C} y" and 3: "Z ∈ nxts_rlin2_set P {C} z"
using nxts_split_cycle[OF assms(1) assms(2) assms(3) Z_in assms(5)] by auto
have "∀i. C ∈ nxts_rlin2_set P {A} (x@(y^^i))"
using pump_cycle[OF 1 2] by simp
with 3 have "∀i. Z ∈ nxts_rlin2_set P {A} (x@(y^^i)@z)"
using nxts_trans2[of C P A] by fastforce
with Z_eps have ****: "(∀i. accepted P A (x@(y^^i)@z))"
by (auto simp: accepted_def)
from * ** *** **** show ?thesis by auto
qed
theorem pumping_lemma_re_nts:
assumes "rlin2 P"
and "finite P"
and "A ∈ Nts P"
shows "∃n. ∀w ∈ Lang P A. length w ≥ n ⟶
(∃x y z. w = x@y@z ∧ length y ≥ 1 ∧ length (x@y) ≤ n ∧ (∀i. x@(y^^i)@z ∈ Lang P A))"
using assms pumping_re_aux[of P A "card (Nts P)"] Lang_iff_accepted_if_rlin2[OF assms(1)] by metis
theorem pumping_lemma_regular:
assumes "rlin2 P" and "finite P"
shows "∃n. ∀w ∈ Lang P A. length w ≥ n ⟶
(∃x y z. w = x@y@z ∧ length y ≥ 1 ∧ length (x@y) ≤ n ∧ (∀i. x@(y^^i)@z ∈ Lang P A))"
proof (cases "A ∈ Nts P")
case True
thus ?thesis
using pumping_lemma_re_nts[OF assms True] by simp
next
case False
hence "Lang P A = {}"
by (auto intro!: Lang_empty_if_notin_Lhss simp add: Lhss_def Nts_def)
thus ?thesis by simp
qed
text ‹Most of the time pumping lemma is used in the contrapositive form
to prove that no right-linear set of productions exists.›
corollary pumping_lemma_regular_contr:
assumes "finite P"
and "∀n. ∃w ∈ Lang P A. length w ≥ n ∧ (∀x y z. w = x@y@z ∧ length y ≥ 1 ∧ length (x@y) ≤ n ⟶ (∃i. x@(y^^i)@z ∉ Lang P A))"
shows "¬rlin2 P"
using assms pumping_lemma_regular[of P A] by metis
end