Theory Pumping_Lemma_CFG

(* Author: August Martin Stimpfle, Tobias Nipkow
   Original theory by Thomas Ammer
*)

section "Pumping Lemma for Context Free Grammars"

theory Pumping_Lemma_CFG
imports
  "List_Power.List_Power"
  Chomsky_Normal_Form
begin

text ‹Paths in the (implicit) parse tree of the derivation of some terminal word;
specialized for productions in CNF.›

inductive path :: "('n, 't) Prods  'n  'n list  't list  bool"
   ("(2_ / (_/ ⇒⟨_/ _))" [50, 0, 0, 50] 50) where
terminal:  "(A, [Tm a])  P  P  A ⇒⟨[A] [a]" |
left:  "(A, [Nt B, Nt C])  P  (P  B ⇒⟨p w)  (P  C ⇒⟨q v)  P  A ⇒⟨A#p (w@v)" |
right:  "(A, [Nt B, Nt C])  P  (P  B ⇒⟨p w)  (P  C ⇒⟨q v)  P  A ⇒⟨A#q (w@v)"

(* rules for derivations of words if the grammar is in cnf, resembles binary syntax trees *)
inductive cnf_derives :: "('n, 't) Prods  'n  't list  bool"
  ("(2_ / (_/ ⇒cnf/ _))" [50, 0, 50] 50) where
step:  "(A, [Tm a])  P  P  A ⇒cnf [a]"|
trans:  "(A, [Nt B, Nt C])  P  P  B ⇒cnf w  P  C ⇒cnf v  P  A ⇒cnf (w@v)"

lemma path_if_cnf_derives: "P  S ⇒cnf w  p. P  S ⇒⟨p w"
  by (induction rule: cnf_derives.induct) (auto intro: path.intros)

lemma cnf_derives_if_path: "P  S ⇒⟨p w  P  S ⇒cnf w"
  by (induction rule: path.induct) (auto intro: cnf_derives.intros)

corollary cnf_path: "P  S ⇒cnf w  (p. P  S ⇒⟨p w)"
  using path_if_cnf_derives[of P] cnf_derives_if_path[of P] by blast

lemma cnf_der: 
  assumes "P  [Nt S] ⇒* map Tm w" "CNF P" 
  shows "P  S ⇒cnf w"
using assms proof (induction w arbitrary: S rule: length_induct)
  case (1 w)
  have "Eps_free P"
    using assms(2) CNF_eq by blast
  hence "¬(P  [Nt S] ⇒* [])"
    by (simp add: Eps_free_derives_Nil)
  hence 2: "length w  0" 
    using 1 by auto
  thus ?case
  proof (cases "length w  1")
    case True
    hence "length w = 1"
      using 2 by linarith
    then obtain t where "w = [t]"
      using length_Suc_conv[of w 0] by auto 
    hence "(S, [Tm t])  P"
      using 1 assms(2) cnf_single_derive[of P S t] by simp
    thus ?thesis
      by (simp add: w = _ cnf_derives.intros(1))
  next
    case False 
    obtain A B u v where ABuv: "(S, [Nt A, Nt B])  P  P  [Nt A] ⇒* map Tm u  P  [Nt B] ⇒* map Tm v  u@v = w  u  []  v  []"
      using False assms(2) 1 cnf_word[of P S w] by auto
    have "length u < length w  length v < length w"
      using ABuv by auto
    hence "cnf_derives P A u  cnf_derives P B v"
      using 1 ABuv by blast
    thus ?thesis
      using ABuv cnf_derives.intros(2)[of S A B P u v] by blast
  qed
qed

lemma der_cnf:
  assumes "P  S ⇒cnf w" "CNF P"
  shows "P  [Nt S] ⇒* map Tm w"
using assms proof (induction rule: cnf_derives.induct)
  case (step A a P)
  then show ?case 
    by (simp add: derive_singleton r_into_rtranclp)
next
  case (trans A B C P w v)
  hence "P  [Nt A]  [Nt B, Nt C]"
    by (simp add: derive_singleton)
  moreover have "P  [Nt B] ⇒* map Tm w  P  [Nt C] ⇒* map Tm v"
    using trans by blast
  ultimately show ?case 
    using derives_append_decomp[of P [Nt B] [Nt C] map Tm (w @ v)] by auto
qed

corollary cnf_der_eq: "CNF P  (P  [Nt S] ⇒* map Tm w  P  S ⇒cnf w)"
  using cnf_der[of P S w] der_cnf[of P S w] by blast

lemma path_if_derives: 
  assumes "P  [Nt S] ⇒* map Tm w" "CNF P" 
  shows "p. P  S ⇒⟨p w"
  using assms cnf_der[of P S w] path_if_cnf_derives[of P S w] by blast

lemma derives_if_path:
  assumes "P  S ⇒⟨p w" "CNF P"
  shows "P  [Nt S] ⇒* map Tm w"
  using assms der_cnf[of P S w] cnf_derives_if_path[of P S p w] by blast

text lpath› = longest path, similar to path›; lpath› always chooses the longest path in a syntax tree›
inductive lpath :: "('n, 't) Prods  'n  'n list  't list  bool" 
   ("(2_ / (_/ ⇒⟪_/ _))" [50, 0, 0, 50] 50) where
terminal:  "(A, [Tm a])  P  P  A ⇒⟪[A] [a]" |
nonTerminals:  "(A, [Nt B, Nt C])  P  (P  B ⇒⟪p w)  (P  C ⇒⟪q v)  
                P  A ⇒⟪A#(if length p  length q then p else q) (w@v)" 

lemma path_lpath: "P  S ⇒⟨p w  p'. (P  S ⇒⟪p' w)  length p'  length p"
  by (induction rule: path.induct) (auto intro: lpath.intros)

lemma lpath_path: "(P  S ⇒⟪p w)  P  S ⇒⟨p w"
  by (induction rule: lpath.induct) (auto intro: path.intros)

(*Property of the a binary tree: Since the tree is a binary tree, word lengths are always ≤ 2^(longest path-1).
However, this version is easier to use/proof and suffices *)
lemma lpath_length: "(P  S ⇒⟪p w)  length w  2^length p"
proof (induction rule: lpath.induct)
  case (terminal A a P)
  then show ?case by simp
next
  case (nonTerminals A B C P p w q v)
  then show ?case 
  proof (cases "length p  length q")
    case True
    hence "length v  2^length p"
      using nonTerminals order_subst1[of length v λx. 2^x length q length p] by simp
    hence "length w +length v  2*2^length p"
      by (simp add: nonTerminals.IH(1) add_le_mono mult_2)
    then show ?thesis
      by (simp add: True)
  next
    case False
    hence "length w  2^length q"
      using nonTerminals order_subst1[of length w λx. 2^x length p length q] by simp 
    hence "length w +length v  2*2^length q"
      by (simp add: nonTerminals.IH add_le_mono mult_2)
    then show ?thesis
      by (simp add: False)
  qed
qed

lemma step_decomp:
  assumes "P  A ⇒⟨[A]@p w" " length p  1" 
  shows "B C p' a b. (A, [Nt B, Nt C])  P  w =a@b 
        ((P  B ⇒⟨p a  P  C ⇒⟨p' b)  (P  B ⇒⟨p' a  P  C ⇒⟨p b))"
  using assms by (cases) fastforce+

lemma steplp_decomp:
  assumes "P  A ⇒⟪[A]@p w" " length p  1" 
  shows "B C p' a b. (A, [Nt B, Nt C])  P  w =a@b 
      ((P  B ⇒⟪p a  P  C ⇒⟪p' b  length p  length p') 
       (P  B ⇒⟪p' a  P  C ⇒⟪p b  length p  length p'))"
  using assms by (cases) fastforce+

lemma path_first_step: "P  A ⇒⟨p w  q. p = [A]@q "
  by (induction rule: path.induct) simp_all

lemma no_empty: "P  A ⇒⟨p w  length w > 0"
  by (induction rule: path.induct) simp_all

lemma substitution: 
  assumes "P  A ⇒⟨p1@[X]@p2 z"
  shows "v w x. P  X ⇒⟨[X]@p2 w  z = v@w@x 
        (w' p'. P  X ⇒⟨[X]@p' w'  P  A ⇒⟨p1@[X]@p' v@w'@x) 
        (length p1 > 0  length (v@x) > 0)"
using assms proof (induction p1 arbitrary: P A z)
  case Nil
  hence "w' p'. ((P  X ⇒⟨[X]@p2 z)  z = []@z@[] 
        (P  X ⇒⟨[X]@p' w'  P  A ⇒⟨[]@[X]@p' ([]@w'@[])) 
        (length [] > 0  length ([]@[]) > 0))"
    using path_first_step[of P A] by auto
  then show ?case 
    by blast
next
  case (Cons A p1 P Y)
  hence 0: "A = Y"
    using path_first_step[of P Y] by fastforce 
  have "length (p1@[X]@p2)  1"
    by simp
  hence "B C a b q. (A, [Nt B, Nt C])  P  a@b = z  
        ((P  B ⇒⟨q a  P  C ⇒⟨p1@[X]@p2 b)  (P  B ⇒⟨p1@[X]@p2 a  P  C ⇒⟨q b))"
    using Cons.prems path_first_step step_decomp by fastforce
  then obtain B C a b q where BC: "(A, [Nt B, Nt C])  P  a@b = z  
        ((P  B ⇒⟨q a  P  C ⇒⟨p1@[X]@p2 b)  (P  B ⇒⟨p1@[X]@p2 a  P  C ⇒⟨q b))"
    by blast
  then show ?case
  proof (cases "P  B ⇒⟨q a  P  C ⇒⟨p1@[X]@p2 b")
    case True
    then obtain v w x where vwx: "P  X ⇒⟨[X]@p2 w  b = v@w@x  
          (w' p'. P  X ⇒⟨[X]@p' w'  P  C ⇒⟨p1@[X]@p' (v@w'@x))"
      using Cons.IH by blast
    hence 1: "w' p'. (P  X ⇒⟨[X]@p' w')  P  A ⇒⟨A#p1@[X]@p' (a@v@w'@x)"
      using BC by (auto intro: path.intros(3))
    obtain v' where "v' = a@v"
      by simp
    hence "length (v'@x) > 0"
      using True no_empty by fast
    hence "P  X ⇒⟨[X]@p2 w  z = v'@w@x  (w' p'.
           P  X ⇒⟨[X]@p' w'  P  A ⇒⟨A#p1@[X]@p' (v'@w'@x)) 
          (length (A#p1) > 0  length (v'@x) >0)"
      using vwx 1 BC v' = _ by simp
    thus ?thesis
      using 0 by auto
  next
    case False
    then obtain v w x where vwx: "(P  X ⇒⟨[X]@p2 w)  a = v@w@x  
          (w' p'. P  X ⇒⟨[X]@p' w'  P  B ⇒⟨p1@[X]@p' (v@w'@x))"
      using Cons.IH BC by blast
    hence 1: "w' p'. P  X ⇒⟨[X]@p' w'  P  A ⇒⟨A#p1@[X]@p' (v@w'@x@b)"
      using BC left[of A B C P] by fastforce
    hence "P  X ⇒⟨[X]@p2 w  z = v@w@x@b  (w' p'.
           P  X ⇒⟨[X]@p' w'  P  A ⇒⟨A#p1@[X]@p' (v@w'@x@b)) 
          (length (A#p1) > 0  length (a@v@x) >0)"
      using vwx BC no_empty by fastforce
    moreover have "length (v@x@b) > 0"
      using no_empty BC by fast
    ultimately show ?thesis
    using 0 by auto
  qed
qed

lemma substitution_lp: 
  assumes "P  A ⇒⟪p1@[X]@p2 z"
  shows "v w x. P  X ⇒⟪[X]@p2 w  z = v@w@x 
        (w' p'. P  X ⇒⟨[X]@p' w'  P  A ⇒⟨p1@[X]@p' v@w'@x)"
using assms proof (induction p1 arbitrary: P A z)
  case Nil
  hence "w' p'. P  X ⇒⟨[X]@p' w'  P  A ⇒⟨[]@[X]@p' ([]@w'@[])"
    using path_first_step lpath_path by fastforce
  moreover have "P  X ⇒⟪[X]@p2 z  z = []@z@[]"
    using Nil lpath.cases[of P A [X] @ p2 z] by auto
  ultimately show ?case 
    by blast
next
  case (Cons A p1 P Y)
  hence 0: "A = Y"
    using path_first_step[of P Y] lpath_path by fastforce 
  have "length (p1@[X]@p2)  1"
    by simp
  hence "B C p' a b. (A, [Nt B, Nt C])  P  z = a@b  
        ((P  B ⇒⟪p1@[X]@p2 a  P  C ⇒⟪p' b  length (p1@[X]@p2)  length p')  
         (P  B ⇒⟪p' a  P  C ⇒⟪p1@[X]@p2 b  length (p1@[X]@p2)  length p'))"
    using steplp_decomp[of P A p1@[X]@p2 z] 0 Cons by simp
  then obtain B C p' a b where BC: "(A, [Nt B, Nt C])  P  z = a@b  
        ((P  B ⇒⟪p1@[X]@p2 a  P  C ⇒⟪p' b  length (p1@[X]@p2)  length p')  
         (P  B ⇒⟪p' a  P  C ⇒⟪p1@[X]@p2 b  length (p1@[X]@p2)  length p'))"
    by blast
  then show ?case
  proof (cases "P  B ⇒⟪p1@[X]@p2 a  P  C ⇒⟪p' b  length (p1@[X]@p2)  length p'")
    case True
    then obtain v w x where vwx: "P  X ⇒⟪[X]@p2 w  a = v@w@x  
          (w' p'. P  X ⇒⟨[X]@p' w'  P  B ⇒⟨p1@[X]@p' v@w'@x)"
      using Cons.IH by blast
    hence "P  X ⇒⟪[X]@p2 w  z = v@w@x@b 
       (w' p'. P  X ⇒⟨[X]@p' w'  P  A ⇒⟨A#p1@[X]@p' v@w'@x@b)"
      using BC lpath_path[of P] path.intros(2)[of A B C P] by fastforce
    then show ?thesis
      using 0 by auto
  next
    case False
    hence "(P  B ⇒⟪p' a  P  C ⇒⟪p1@[X]@p2 b  length (p1@[X]@p2)  length p')"
      using BC by blast
    then obtain v w x where vwx: "P  X ⇒⟪[X]@p2 w  b = v@w@x  
          (w' p'. P  X ⇒⟨[X]@p' w'  P  C ⇒⟨p1@[X]@p' v@w'@x)"
      using Cons.IH by blast
    then obtain v' where "v' = a@v"
      by simp
    hence "P  X ⇒⟪[X]@p2 w  z = v'@w@x 
       (w' p'. P  X ⇒⟨[X]@p' w'  P  A ⇒⟨A#p1@[X]@p' v'@w'@x)"
      using BC lpath_path[of P] path.intros(3)[of A B C P] vwx by fastforce
    then show ?thesis
      using 0 by auto
  qed
qed

lemma path_nts: "P  S ⇒⟨p w  set p  Nts P"
  unfolding Nts_def by (induction rule: path.induct) auto

lemma inner_aux: 
  assumes "w' p'. P  A ⇒⟨[A]@p3 w  (P  A ⇒⟨[A]@p' w' 
          P  A ⇒⟨[A]@p2@[A]@p' v@w'@x)"
  shows "P  A ⇒⟨(([A]@p2)^^(Suc i)) @ [A]@p3 v^^(Suc i) @ w @ x^^(Suc i)"
  using assms proof (induction i)
  case 0
  then show ?case by simp
next
  case (Suc i)
  hence 1: "P  A ⇒⟨[A]@p2@ ([A] @ p2)^^i @ [A]@p3 v^^(Suc i) @ w @ x^^(Suc i)"
    by simp
  hence "P  A ⇒⟨[A] @ p2 @ [A] @ p2@ ([A] @ p2)^^i @ [A]@p3 v @ v^^(Suc i) @ w @ x^^(Suc i) @ x"
    using assms by fastforce
  thus ?case 
    using pow_list_Suc2[of Suc i x] by simp
qed

lemma inner_pumping: 
  assumes "CNF P" "finite P"
    and "m = card (Nts P)"
    and "z  Lang P S"
    and "length z  2^(m+1)"
  shows "u v w x y . z = u@v@w@x@y  length (v@w@x)  2^(m+1)  length (v@x)  1  (i. u@(v^^i)@w@(x^^i)@y  Lang P S)"
proof -
  obtain p' where p': "P  S ⇒⟨p' z"
    using assms Lang_def[of P S] path_if_derives[of P S] by blast
  then obtain lp where lp: "P  S ⇒⟪lp z"
    using path_lpath[of P] by blast
  hence 1: "set lp  Nts P"
    using lpath_path[of P] path_nts[of P] by blast
  have "length lp > m"
  proof -
    have "(2^(m+1)::nat)  2^length lp"
      using lp lpath_length[of P S lp z] assms(5) le_trans by blast
    hence "m+1  length lp" 
      using power_le_imp_le_exp[of 2 m+1 length lp] by auto
    thus ?thesis
      by simp
  qed
  then obtain l p where p: "lp = l@p  length p = m+1"
    using less_Suc_eq by (induction lp) fastforce+
  hence "set l  Nts P  set p  Nts P  finite (Nts P)"
    using finite P 1 finite_Nts[of P] assms(1) by auto
  hence "card (set p) < length p"
    using p assms(3) card_mono[of Nts P set p] by simp
  then obtain A p1 p2 p3 where "p = p1@[A]@p2@[A]@p3"
    by (metis distinct_card nat_neq_iff not_distinct_decomp)
  then obtain u vwx y where uy: "(P  A ⇒⟪[A]@p2@[A]@p3 vwx  z = u@vwx@y 
        (w' p'. (P  A ⇒⟨[A]@p' w'  P  S ⇒⟨l@p1@[A]@p' u@w'@y)))"
    using substitution_lp[of P S l@p1 A p2@[A]@p3 z] lp p by auto
  hence "length vwx  2^(m+1)"
    using p = _ p lpath_length[of P A [A] @ p2 @ [A] @ p3 vwx] order_subst1 by fastforce
  then obtain v w x where vwx: "P  A ⇒⟨[A]@p3 w  vwx = v@w@x 
        (w' p'. (P  A ⇒⟨[A]@p' w'  P  A ⇒⟨[A]@p2@[A]@p' v@w'@x)) 
        (length ([A]@p2) > 0  length (v@x) > 0)"
    using substitution[of P A [A]@p2 A p3 vwx] uy lpath_path[of P A] by auto
  have "P  S ⇒⟨l@p1@ (([A]@p2)^^i) @[A]@p3 u@(v^^i)@w@(x^^i)@y" for i
  proof -
    have "i. P  A ⇒⟨([A]@p2)^^(Suc i) @ [A]@p3 v^^(Suc i) @ w @ x^^(Suc i)"
      using vwx inner_aux[of P A] by blast
    hence "i. P  S ⇒⟨l@p1@(([A]@p2)^^(Suc i)) @[A]@p3 u@ (v^^(Suc i)) @ w @ (x^^(Suc i)) @y"
      using uy by fastforce
    moreover have "P  S ⇒⟨l@p1@(([A]@p2)^^0) @[A]@p3 u@ (v^^0) @ w @ (x^^0) @y"
      using vwx uy by auto
    ultimately show "P  S ⇒⟨l@p1@ (([A]@p2)^^i) @[A]@p3 u@(v^^i)@w@(x^^i)@y"
      by (induction i) simp_all
  qed
  hence "i. u@(v^^i)@w@(x^^i)@y  Lang P S"
    unfolding Lang_def using assms(1) assms(2) derives_if_path[of P S] by blast
  hence "z = u@v@w@x@y  length (v@w@x)  2^(m+1)  1  length (v@x)  ( i. u@(v^^i)@w@(x^^i)@ y  Lang P S)"
    using vwx uy length vwx  2 ^ (m + 1) by (simp add: Suc_leI)
  then show ?thesis
    by blast
qed

abbreviation "pumping_property L n  z  L. length z  n 
  (u v w x y. z = u @ v @ w @ x @ y  length (v@w@x)  n  length (v@x)  1
         (i. u @ v^^i @ w @ x^^i @ y  L))"

theorem Pumping_Lemma_CNF:
  assumes "CNF P" "finite P"
  shows "n. pumping_property (Lang P S) n"
using inner_pumping[OF assms, of card (Nts P)] by blast

theorem Pumping_Lemma:
  assumes "finite (P :: ('n::infinite,'t)Prods)"
  shows "n. pumping_property (Lang P S) n"
proof -
  obtain ps where "set ps = P" using finite_list[OF assms] by blast
  obtain ps' :: "('n,'t)prods" where ps': "CNF(set ps')" "lang ps' S= lang ps S - {[]}"
    using cnf_exists[of S ps] by auto
  let ?P' = "set ps'"
  have P': "CNF ?P'" "finite ?P'" using ps'(1) by auto
  from Pumping_Lemma_CNF[OF P', of S] obtain n where
    pump: "pumping_property (Lang ?P' S) n" by blast
  then have "pumping_property (Lang ?P' S) (Suc n)"
    by (metis Suc_leD nle_le)
  then have "pumping_property (Lang P S) (Suc n)"
    using ps'(2) set ps = P by (metis Diff_iff list.size(3) not_less_eq_eq singletonD zero_le)
  then show ?thesis by blast
qed

end