Theory Pumping_Lemma_Regular

(*
Authors: Kaan Taskin
*)

section ‹Pumping Lemma for Strongly Right-Linear Grammars›

theory Pumping_Lemma_Regular
imports NDA_rlin2 "List_Power.List_Power"
begin

text ‹The proof is on the level of strongly right-linear grammars.
Currently there is no proof on the automaton level but now it would be easy to obtain one.›

lemma not_distinct:
  assumes "m = card P"
      and "m  1"
      and "i < length w. w ! i  P"
      and "length w  Suc m"
    shows "xs ys zs y. w = xs @ [y] @ ys @ [y] @ zs  length (xs @ [y] @ ys @ [y])  Suc m"
using assms proof (induction w arbitrary: P m rule: length_induct)
  case (1 aw)
  from "1.prems"(4) obtain a w where aw_cons[simp]: "aw = a#w" and w_len: "m  length w"
    using Suc_le_length_iff[of m aw] by blast
  show ?case proof (cases "a  set w")
    case True
    hence "¬ distinct aw" by simp
    then obtain xs ys zs y where aw_split: "aw = xs @ [y] @ ys @ [y] @ zs"
      using not_distinct_decomp by blast
    show ?thesis proof (cases "length (xs @ [y] @ ys @ [y])  Suc m")
      case True
      with aw_split show ?thesis by blast
    next
      case False
      let ?xsyys = "xs @ [y] @ ys"
      from False have a4: "length ?xsyys  Suc m" by simp
      from aw_split have a5: "length ?xsyys < length aw" by simp
      with "1.prems"(3) have "i<length ?xsyys. aw ! i  P" by simp
      with aw_split have a3: "i < length ?xsyys. ?xsyys ! i  P"
        by (metis append_assoc nth_append)
      from "1.prems"(2) "1.prems"(1) a3 a4 a5 have "xs' ys' zs' y'. ?xsyys = xs' @ [y'] @ ys' @ [y'] @ zs'  length (xs' @ [y'] @ ys' @ [y'])  Suc m"
        using "1.IH" by simp
      then obtain xs' ys' zs' y' where xsyys_split: "?xsyys = xs' @ [y'] @ ys' @ [y'] @ zs'" and xsyys'_len: "length (xs' @ [y'] @ ys' @ [y'])  Suc m" by blast
      let ?xs = xs' let ?y = y' let ?ys = ys' let ?zs = "zs' @ [y] @ zs" 
      from xsyys_split aw_split have *: "aw = ?xs @ [?y] @ ?ys @ [?y] @ ?zs" by simp
      from xsyys'_len have **: "length (?xs @ [?y] @ ?ys @ [?y])  Suc m" by simp
      from * ** show ?thesis by blast
    qed
  next
    case False
    let ?P' = "P - {a}"
    from "1.prems"(3) have a_in: "a  P" by auto
    with "1.prems"(1) have a1: "m-1 = card ?P'" by simp
    from "1.prems"(2) w_len have "w  []" by auto
    with "1.prems"(3) False have b_in: "ba. b  P" by force
    from a_in b_in "1.prems"(2) "1.prems"(1) have "m  2"
      by (metis Suc_1 card_1_singletonE not_less_eq_eq singletonD verit_la_disequality)
    hence a2: "m-1  1" by simp
    from False "1.prems"(3) have a3: "i<length w. w ! i  ?P'"
      using DiffD2 by auto
    from "1.prems"(2) w_len have a4: "Suc (m-1)  length w" by simp
    from a1 a2 a3 a4 have "xs ys zs y. w = xs @ [y] @ ys @ [y] @ zs  length (xs @ [y] @ ys @ [y])  Suc (m - 1)"
      using "1.IH" by simp
    then obtain xs ys zs y where w_split: "w = xs @ [y] @ ys @ [y] @ zs" and xsys_len: "length (xs @ [y] @ ys @ [y])  m" by auto
    from w_split have *: "a#w = (a#xs) @ [y] @ ys @ [y] @ zs" by simp
    from xsys_len have **: "length ((a#xs) @ [y] @ ys @ [y])  Suc m" by simp
    from * ** aw_cons show ?thesis by blast
  qed
qed

text
‹We define the function nxts_nts P a w› that collects all paths traversing the word w› starting from the non-terminal A› in the 
 production set P›. nxts_nts0› appends the non-terminal A› in front of every list produced by nxts_nts›

fun nxts_nts :: "('n,'t)Prods  'n  't list  'n list set" where
  "nxts_nts P A [] = {[]}"
| "nxts_nts P A (a#w) = (Bnxt_rlin2 P A a. (Cons B)`nxts_nts P B w)"

definition nxts_nts0 where
"nxts_nts0 P A w  ((#) A) ` nxts_nts P A w"

subsection ‹Properties of nxts_nts› and nxts_nts0›

lemma nxts_nts0_i0:
  "e  nxts_nts0 P A w. e!0 = A"
  unfolding nxts_nts0_def by auto

lemma nxts_nts0_shift:
  assumes "i < length w"
  shows "e  nxts_nts0 P A w. e'  nxts_nts P A w. e ! (Suc i) = e' ! i"
  unfolding nxts_nts0_def by auto

lemma nxts_nts_pick_nt:
  assumes "e  nxts_nts P A (a#w)"
  shows "Cnxt_rlin2 P A a. e'  nxts_nts P C w. e = C#e'"
  using assms by auto

lemma nxts_nts0_len:
  "e  nxts_nts0 P A w. length e = Suc (length w)"
  unfolding nxts_nts0_def
  by (induction P A w rule: nxts_nts.induct) auto

lemma nxts_nts0_nxt: 
  assumes "i < length w"
  shows "e  nxts_nts0 P A w. e!(Suc i)  nxt_rlin2 P (e!i) (w!i)"
  unfolding nxts_nts0_def using assms proof (induction P A w arbitrary: i rule: nxts_nts.induct)
  case (1 P A)
  thus ?case by simp
next
  case (2 P A a w)
  thus ?case
    using less_Suc_eq_0_disj by auto
qed

lemma nxts_nts0_path:
  assumes "i1  length w"
      and "i2  length w"
      and "i1  i2"
    shows "e  nxts_nts0 P A w. e!i2  nxts_rlin2_set P {e!i1} (drop i1 (take i2 w))"
proof
  fix e
  assume "e  nxts_nts0 P A w"
  with assms show "e ! i2  nxts_rlin2_set P {e ! i1} (drop i1 (take i2 w))" proof (induction "i2-i1" arbitrary: i2)
    case 0
    thus ?case
      by (simp add: nxts_rlin2_set_def)
  next
    case (Suc x)
      let ?i2' = "i2 - 1"
      from Suc.hyps(2) have x_def: "x = ?i2' - i1" by simp
      from Suc.prems(2) have i2'_len: "?i2'  length w" by simp
      from Suc.prems(3) Suc.hyps(2) have i1_i2': "i1  ?i2'" by simp
      have IH: "e ! ?i2'  nxts_rlin2_set P {e ! i1} (drop i1 (take ?i2' w))"
        using Suc.hyps(1)[of ?i2', OF x_def Suc.prems(1) i2'_len i1_i2' Suc.prems(4)] .
      from Suc.hyps(2) Suc.prems(2) Suc.prems(4) have "e ! i2  nxt_rlin2 P (e!(i2-1)) (w!(i2-1))"
        using nxts_nts0_nxt[of ?i2' w P A] by simp
      hence e_i2: "e ! i2  nxts_rlin2_set P {e!(i2-1)} [w!(i2-1)]"
        unfolding nxts_rlin2_set_def nxt_rlin2_set_def by simp
      have "drop i1 (take (i2 - 1) w) @ [w ! (i2 - 1)] = drop i1 (take i2 w)"
        by (smt (verit) Cons_nth_drop_Suc Suc.hyps(2) Suc.prems(2) Suc.prems(3) add_Suc drop_drop drop_eq_Nil drop_take i1_i2' i2'_len le_add_diff_inverse2 le_less_Suc_eq nle_le nth_via_drop order.strict_iff_not take_Suc_conv_app_nth x_def)
      thus ?case 
        using nxts_trans2[of "e ! (i2 - 1)" P "e ! i1" "drop i1 (take (i2 - 1) w)" "e ! i2" "[w!(i2-1)]", OF IH e_i2] by argo
  qed
qed

lemma nxts_nts0_path_start:
  assumes "i  length w"
  shows "e  nxts_nts0 P A w. e ! i  nxts_rlin2_set P {A} (take i w)"
  using assms nxts_nts0_path[of 0 w i P A] by (simp add: nxts_nts0_def)

lemma nxts_nts_elem:
  assumes "i < length w"
  shows "e  nxts_nts P A w. e ! i  Nts P"
proof
  fix e
  assume "e  nxts_nts P A w"
  with assms show "e ! i  Nts P" proof (induction P A w arbitrary: i e rule: nxts_nts.induct)
    case (1 P A)
    thus ?case by simp
  next
    case (2 P A a w)
    from 2(3) obtain C e' where C_def: "C  nxt_rlin2 P A a" and e'_def: "e'  nxts_nts P C w" and e_app: "e = C#e'"
      using nxts_nts_pick_nt[of e P A a w] by blast
    show ?case proof (cases "i = 0")
      case True
      with e_app C_def show ?thesis
        using nxt_rlin2_nts by simp
    next
      case False
      from False 2(2) have i_len: "i - 1 < length w" by simp
      have "e' ! (i - 1)  Nts P" 
        using "2.IH"[of C "i-1" e', OF C_def i_len e'_def] .
      with e_app False have "e ! i  Nts P" by simp
      thus ?thesis .
    qed
  qed
qed

lemma nxts_nts0_elem:
  assumes "A  Nts P"
      and "i  length w"
  shows "e  nxts_nts0 P A w. e ! i  Nts P"
proof (cases "i = 0")
  case True
  thus ?thesis
    by (simp add: assms(1) nxts_nts0_i0)
next
  case False
  show ?thesis proof
    fix e
    assume e_def: "e  nxts_nts0 P A w"
    from False e_def assms(2) have "e'  nxts_nts P A w. e ! i = e' ! (i-1)"
      using nxts_nts0_shift[of "i-1" w P A] by simp
    then obtain e' where e'_def: "e'  nxts_nts P A w" and e_ind: "e ! i = e' ! (i-1)"
      by blast
    from False e'_def assms(2) have "e' ! (i-1)  Nts P"
      using nxts_nts_elem[of "i-1" w P A] by simp
    with e_ind show "e ! i  Nts P" by simp
  qed
qed

lemma nxts_nts0_pick:
  assumes "B  nxts_rlin2_set P {A} w"
  shows "e  nxts_nts0 P A w. last e = B"
unfolding nxts_nts0_def using assms proof (induction P A w arbitrary: B rule: nxts_nts.induct)
  case (1 P A)
  thus ?case
    by (simp add: nxts_rlin2_set_def)
next
  case (2 P A a w)
  from 2(2) obtain C where C_def: "C  nxt_rlin2 P A a" and C_path: "B  nxts_rlin2_set P {C} w"
    using nxts_rlin2_set_first_step[of B P A a w] by blast
  have "e  nxts_nts0 P C w. last e = B"
    using "2.IH"[of C B, OF C_def C_path] by (simp add: nxts_nts0_def)
  then obtain e where e_def: "e  nxts_nts0 P C w" and e_last: "last e = B" 
    by blast
  from e_def C_def have *: "A#e  nxts_nts0 P A (a#w)"
    unfolding nxts_nts0_def by auto
  from e_last e_def have **: "last (A#e) = B"
    using nxts_nts0_len[of P C w] by auto
  from * ** show ?case
    unfolding nxts_nts0_def by blast
qed

subsection ‹Pumping Lemma›

text
‹The following lemma states that in the automata level there exists a cycle occurring in the first m› symbols where m› is the cardinality 
 of the non-terminals set, under the following assumptions›

lemma nxts_split_cycle:
  assumes "finite P"
      and "A  Nts P"
      and "m = card (Nts P)"
      and "B  nxts_rlin2_set P {A} w"
      and "length w  m"
    shows "x y z C. w = x@y@z  length y  1  length (x@y)  m 
              C  nxts_rlin2_set P {A} x  C  nxts_rlin2_set P {C} y  B  nxts_rlin2_set P {C} z"
proof -
  let ?nts = "nxts_nts0 P A w"
  obtain e where e_def: "e  ?nts" and e_last: "last e = B"
    using nxts_nts0_pick[of B P A w, OF assms(4)] by auto
  from e_def have e_len: "length e = Suc (length w)"
    using nxts_nts0_len[of P A w] by simp
  from e_len e_def have e_elem: "i < length e. e!i  Nts P"
    using nxts_nts0_elem[OF assms(2)] by (auto simp: less_Suc_eq_le)
  have "finite (Nts P)"
    using finite_Nts[of P, OF assms(1)] .
  with assms(2) assms(3) have m_geq_1: "m  1"
    using less_eq_Suc_le by fastforce
  from assms(5) e_len have "xs ys zs y. e = xs @ [y] @ ys @ [y] @ zs  length (xs @ [y] @ ys @ [y])  Suc m"
    using not_distinct[OF assms(3) m_geq_1 e_elem] by simp
  then obtain xs ys zs C where e_split: "e = xs @ [C] @ ys @ [C] @ zs" and xy_len: "length (xs @ [C] @ ys @ [C])  Suc m"
    by blast
  let ?e1 = "xs @ [C]" let ?e2 = "ys @ [C]" let ?e3 = zs
  let ?x = "take (length ?e1 - 1) w" let ?y = "drop (length ?e1 - 1) (take (length ?e1+length ?e2 - 1) w)"
  let ?z = "drop (length ?e1+length ?e2 - 1) w"
  have *: "w = ?x@?y@?z"
    by (metis Nat.add_diff_assoc2 append_assoc append_take_drop_id diff_add_inverse drop_take le_add1 length_append_singleton plus_1_eq_Suc take_add)
  from e_len e_split have **: "length ?y  1" by simp
  from xy_len have ***: "length (?x@?y)  m" by simp
  have x_fac: "?x = take (length xs) w" by simp
  from ** have x_fac2: "length xs  length w"  by simp
  from e_split have x_fac3: "e ! length xs = C" by simp
  from e_def x_fac x_fac3 have ****: "C  nxts_rlin2_set P {A} ?x"
    using nxts_nts0_path_start[of "length xs" w P A, OF x_fac2] by auto
  have y_fac: "?y = drop (length xs) (take (length xs + length ys + 1) w)" by simp
  from e_len e_split have y_fac2: "length xs + length ys + 1  length w" by simp
  have y_fac3: "length xs  length xs + length ys + 1" by simp
  have y_fac4: "e ! (length xs + length ys + 1) = C"
    by (metis add.right_neutral add_Suc_right append.assoc append_Cons e_split length_Cons length_append list.size(3) nth_append_length plus_1_eq_Suc)
  from e_def y_fac x_fac3 y_fac4 have *****: "C  nxts_rlin2_set P {C} ?y"
    using nxts_nts0_path[of "length xs" w "length xs + length ys + 1" P A, OF x_fac2 y_fac2 y_fac3] by auto
  have z_fac: "?z = drop (length xs + length ys + 1) (take (length w) w)" by simp
  from e_last e_len have z_fac2: "e ! (length w) = B"
    by (metis Zero_not_Suc diff_Suc_1 last_conv_nth list.size(3))
  from e_def z_fac y_fac2 y_fac4 z_fac2 have ******: "B  nxts_rlin2_set P {C} ?z"
    using nxts_nts0_path[of "length xs + length ys + 1" w "length w" P A] by auto
  from * ** *** **** ***** ****** show ?thesis by blast
qed

text
‹We also show that a cycle can be pumped in the automata level›

lemma pump_cycle:
  assumes "B  nxts_rlin2_set P {A} x"
      and "B  nxts_rlin2_set P {B} y"
    shows "B  nxts_rlin2_set P {A} (x@(y^^i))"
using assms proof (induction i)
  case 0
  thus ?case by (simp add: assms(1))
next
  case (Suc i)
  have "B  nxts_rlin2_set P {A} (x@(y^^i))"
    using Suc.IH[OF assms] .
  with assms(2) have "B  nxts_rlin2_set P {A} (x@(y^^i)@y)"
    using nxts_trans2[of B P A "x@(y^^i)" B y] by simp
  thus ?case
    by (simp add: pow_list_comm)
qed

text
‹Combining the previous lemmas we can prove the pumping lemma where the starting non-terminal is in the production set. We simply extend the
 lemma for non-terminals that are not part of the production set, as these non-terminals will produce the empty language›

lemma pumping_re_aux:
  assumes "finite P"
      and "A  Nts P"
      and "m = card (Nts P)"
      and "accepted P A w"
      and "length w  m"
    shows "x y z. w = x@y@z  length y  1  length (x@y)  m  (i. accepted P A (x@(y^^i)@z))"
proof -
  from assms(4) obtain Z where Z_in:"Z  nxts_rlin2_set P {A} w" and Z_eps:"(Z,[])P"
    by (auto simp: accepted_def)
  obtain x y z C where *: "w = x@y@z" and **: "length y  1" and ***: "length (x@y)  m" and
              1: "C  nxts_rlin2_set P {A} x" and 2: "C  nxts_rlin2_set P {C} y" and 3: "Z  nxts_rlin2_set P {C} z"
    using nxts_split_cycle[OF assms(1) assms(2) assms(3) Z_in assms(5)] by auto
  have "i. C  nxts_rlin2_set P {A} (x@(y^^i))"
    using pump_cycle[OF 1 2] by simp
  with 3 have "i. Z  nxts_rlin2_set P {A} (x@(y^^i)@z)"
    using nxts_trans2[of C P A] by fastforce
  with Z_eps have ****: "(i. accepted P A (x@(y^^i)@z))"
    by (auto simp: accepted_def)
  from * ** *** **** show ?thesis by auto
qed

theorem pumping_lemma_re_nts:
  assumes "rlin2 P"
      and "finite P"
      and "A  Nts P"
  shows "n. w  Lang P A. length w  n 
          (x y z. w = x@y@z  length y  1  length (x@y)  n  (i. x@(y^^i)@z  Lang P A))" 
  using assms pumping_re_aux[of P A "card (Nts P)"] Lang_iff_accepted_if_rlin2[OF assms(1)] by metis

theorem pumping_lemma_regular:
  assumes "rlin2 P" and "finite P"
  shows "n. w  Lang P A. length w  n 
          (x y z. w = x@y@z  length y  1  length (x@y)  n  (i. x@(y^^i)@z  Lang P A))" 
proof (cases "A  Nts P")
  case True
  thus ?thesis
    using pumping_lemma_re_nts[OF assms True] by simp
next
  case False
  hence "Lang P A = {}"
    by (auto intro!: Lang_empty_if_notin_Lhss simp add: Lhss_def Nts_def)
  thus ?thesis by simp
qed

text ‹Most of the time pumping lemma is used in the contrapositive form
to prove that no right-linear set of productions exists.›

corollary pumping_lemma_regular_contr:
  assumes "finite P"
      and "n. w  Lang P A. length w  n  (x y z. w = x@y@z  length y  1  length (x@y)  n  (i. x@(y^^i)@z  Lang P A))" 
    shows "¬rlin2 P"
using assms pumping_lemma_regular[of P A] by metis

end