Theory AnBnCn_not_CFL

(* 
Author:     Bruno Philipp, TU München
Author:     Tobias Nipkow
*)

section anbncn is Not Context-Free›

theory AnBnCn_not_CFL
imports Pumping_Lemma_CFG
begin                           

text ‹This theory proves that the language @{term "n. {[a]^^n @ [b]^^n @ [c]^^n}"}
is not context-free using the Pumping lemma.

The formal proof follows the textbook proof (e.g.\ citeHopcroftMU) closely.
The Isabelle proof is about 10\% of the length of the Coq proof by Ramos \emph{et al.} citeRamosAMQ and RamosGithub.
The latter proof suffers from excessive case analyses.›

declare count_list_pow_list[simp]

context
  fixes a b c
  assumes neq: "ab" "bc" "ca"
begin

lemma  c_greater_count0:
  assumes "x@y = [a]^^n @ [b]^^n @ [c]^^n" "length yn"
  shows "count_list x c = 0"
  using assms proof -
   have "drop (2*n) (x@y) = [c]^^n" using assms
     by simp
   then have count_c_end: "count_list (drop (2*n) (x@y)) c = n"
     by (simp)
   have "count_list (x@y) c= n" using assms neq
     by (simp)
   then have count_c_front: "count_list (take (2*n) (x@y)) c = 0"
     using count_c_end by (metis add_cancel_left_left append_take_drop_id count_list_append)
   have  "i. length y = n+i" using assms
     by presburger
   then obtain i where i: "length y= n+i"
     by blast
   then have "x = take (3*n-n-i) (x@y)"
   proof -
     have "x = take (2 * n - i) x @ take (2 * n - (i + length x)) y"
       using i by (metis add.commute add_diff_cancel_left' append_eq_conv_conj assms(1) diff_diff_left
         length_append length_pow_list_single mult_2 take_append)
     then show ?thesis
       by (simp)
   qed
   then have "x = take (3*n-n-i) (take (3*n-n) (x@y))"
     by (metis diff_le_self min_def take_take)
   then have "x = take (3*n-n-i) (take (2*n) (x@y))" 
     by fastforce  
  then show ?thesis using count_c_front count_list_0_iff in_set_takeD
    by metis
qed

lemma  a_greater_count0:
  assumes "x@y = [a]^^n @ [b]^^n @ [c]^^n" "length xn"
  shows "count_list y a = 0"
  text ‹this prof is easier than @{thm c_greater_count0} since a is at the start of the word rather than at the end ›
proof -
  have count_whole: "count_list (x@y) a = n"
    using assms neq by auto
  have take_n: "take n (x@y) = [a]^^n"
    using assms by simp
  then have count_take_n: "count_list (take n (x@y)) a = n"
    by (simp)
  have " z. x = take n (x@y) @ z" 
    by (metis append_eq_conv_conj assms(2) nat_le_iff_add take_add)
  then have  count_a_x:"count_list x a = n" using count_take_n take_n count_whole 
    by (metis add_diff_cancel_left' append.right_neutral count_list_append diff_add_zero)
  have "count_list (x@y) a = n"
    using assms neq by simp
   then have "count_list y a = 0"
    using count_a_x by simp
  then show ?thesis
    by presburger
qed

lemma a_or_b_zero:
  assumes "u@w@y = [a]^^n @ [b]^^n @ [c]^^n" "length w  n" (* neq not used *)
  shows "count_list w a = 0  count_list w c = 0"
  text ‹This lemma uses @{term "count_list w a = 0  count_list w c = 0"} similar to all following proofs, focusing on the number of a› and c› found in w› rather than the concrete structure.
        It is also the merge of the two previous lemmas to make the final proof shorter›
proof-
  show ?thesis proof (cases "length u <n")
    case True 
    have "length (u@w@y) = 3*n"  using assms 
      by simp  
    then have "length u + length w + length y = 3*n" 
      by simp
    then have "length u + length y  2*n" using assms 
      by linarith
    then have u_or_y: "length u  n  length y  n" 
      by linarith
    then  have "length yn" using True  
      by simp
    then show ?thesis using c_greater_count0[of "u@w" y n] neq assms 
      by simp
  next
    case False
    then have "length u  n" 
      by simp
    then show ?thesis using a_greater_count0[of u "w@y" n ] neq assms 
      by auto
  qed
qed

lemma count_vx_not_zero:
  assumes "u@v@w@x@y = [a]^^n @ [b]^^n @ [c]^^n" "v@x  []"
  shows "count_list (v@x) a  0  count_list (v@x) b  0  count_list (v@x) c  0"
proof -
  have set: "set ([a]^^n @ [b]^^n @ [c]^^n) = {a,b,c}" using assms pow_list_single_Nil_iff
    by (fastforce simp add: pow_list_single)
  show ?thesis proof (cases  "v[]")
    case True
    then have "dset([a]^^n @ [b]^^n @ [c]^^n). count_list v d  0"
      using assms(1)
      by (metis append_Cons count_list_0_iff in_set_conv_decomp list.exhaust list.set_intros(1))
    then have "count_list v a  0  count_list v b  0  count_list v c 0"
      using set by simp
    then show ?thesis 
      by force
  next
    case False
    then have "x[]" using assms 
      by fast
    then have "dset ([a]^^n @ [b]^^n @ [c]^^n). count_list x d  0"
    proof -
      have "dset ([a] ^^ n)  (set ([b] ^^ n)  set ([c] ^^ n)). ya  d  0 < count_list ys d"
        if "u @ v @ w @ ya # ys @ y = [a] ^^ n @ [b] ^^ n @ [c] ^^ n"
          and "x = ya # ys"
        for ya :: 'a
          and ys :: "'a list"
        using that by (metis Un_iff in_set_conv_decomp set_append)
      then show ?thesis
        using assms(1) x  [] by (auto simp: neq_Nil_conv)
    qed
    then have "count_list x a  0  count_list x b  0  count_list x c  0"
       using set by simp
    then show ?thesis 
       by force
   qed
qed

lemma not_ex_y_count:
  assumes "ik  kj  ij" "count_list w a = i" "count_list w b = k" "count_list w c = j"
  shows "¬(EX y. w = [a]^^y @ [b]^^y @ [c]^^y)"
 proof 
   assume "EX y. w = [a]^^y @ [b]^^y @ [c]^^y"
   then obtain y where y: "w = [a]^^y @ [b]^^y @ [c]^^y" 
     by blast
   then have "count_list w a = y" using neq 
     by simp
   then have i_eq_y: "i=y" using assms 
     by argo
   then have "count_list w b = y"
     using neq assms(2) y by (auto)
   then have k_eq_y: "k=y" using assms 
     by argo
   have "count_list w c = y" using neq y
     by simp
   then have j_eq_y: "j=y" using assms  
     by argo
   show False  using i_eq_y k_eq_y j_eq_y assms 
      by presburger
 qed

lemma not_in_count:
  assumes (* no neq *) "count_list w a  count_list w b  count_list w b  count_list w c  count_list w c  count_list w a"
  shows "w  {word.  n.  word = [a]^^n @ [b]^^n @ [c]^^n}"
  text ‹This definition of a word not in the language is useful as it allows us to prove a word is not in the language just by knowing the number of each letter in a word›
  using assms not_ex_y_count
  by (smt (verit, del_insts) mem_Collect_eq)

text ‹This is the central and only case analysis, which follows the textbook proofs.
The Coq proof by Ramos is considerably more involved and ends up with a total of 24 cases›

lemma pumping_application:
  assumes "u@v@w@x@y = [a]^^n @ [b]^^n @ [c]^^n"
  and "count_list (v@w@x) a = 0  count_list (v@w@x) c = 0" and "v@x[]"
  shows "u@w@y  (n. {[a]^^n @ [b]^^n @ [c]^^n})"
  text ‹In this lemma it is proven that a word @{term "u @ v^^0 @ w @ x^^0 @ y"}
        is not in the language @{term "n. {[a]^^n @ [b]^^n @ [c]^^n}"}
        as this is the easiest counterexample useful for the Pumping lemma›
proof-
  have count_word_a: "count_list (u@v@w@x@y) a = n"
    using neq assms(1) by simp
  have count_word_b: "count_list (u@v@w@x@y) b = n"
    using neq assms(1) by simp
  have count_word_c: "count_list (u@v@w@x@y) c = n"
    using neq assms(1) by simp
  have count_non_zero: "((count_list (v@x) a) 0)  (count_list (v@x) b0)  (count_list (v@x) c  0)"
    using count_vx_not_zero[of u v w x y n] assms(1,3) by simp  
  from assms(2) show ?thesis proof
    assume *: "count_list (v @ w @ x) a = 0"
    then have vx_b_or_c_not0: "count_list (v@x) b  0  count_list (v@x) c  0" using count_non_zero
      by simp
    have uwy_count_a: "count_list (u@w@y) a = n" using * count_word_a 
      by simp 
    have "count_list (u@w@y) b  n  count_list (u@w@y) c  n" using vx_b_or_c_not0 count_word_b count_word_c 
      by auto
    then have "(count_list (u@w@y) a  count_list (u@w@y) b)  (count_list (u@w@y) c  count_list (u@w@y) a)" using uwy_count_a
      by argo
    then show ?thesis using not_in_count[of "u@w@y"] 
      by blast
  next
    assume *: "count_list (v @ w @ x) c = 0"
    then have vx_a_or_b_not0: "(count_list (v@x) a0)  (count_list (v@x) b  0)" using count_non_zero
      by fastforce
    have uwy_count_c: "count_list (u@w@y) c=n" using * count_word_c 
      by auto 
    have "count_list (u@w@y) a n  count_list (u@w@y) b n" using vx_a_or_b_not0 count_word_a count_word_b
      by force
    then have "(count_list (u@w@y) a  count_list (u@w@y) b)  (count_list (u@w@y) c  count_list (u@w@y) a)" using uwy_count_c
      by argo
    then show ?thesis using not_in_count[of "u@w@y"] 
      by blast  
  qed
qed

theorem anbncn_not_cfl:
  assumes "finite (P :: ('n::infinite,'a)Prods)"
  shows "Lang P S  (n. {[a]^^n @ [b]^^n @ [c]^^n})" (is "¬ ?E")
proof
  assume "?E"
  from Pumping_Lemma[OF finite P, of S] obtain n where
    pump: "word  Lang P S. length word  n 
     (u v w x y. word = u@v@w@x@y  length (v@w@x)  n  length (v@x)  1  (i. u@(v^^i)@w@(x^^i)@y  Lang P S))" 
    by blast
  let ?word = "[a]^^n @ [b]^^n @ [c]^^n"
  have wInLg: "?word  Lang P S"
    using ?E by blast
  have "length ?word  n"
    by simp
  then obtain u v w x y where uvwxy: "?word = u@v@w@x@y  length (v@w@x)  n  length (v@x)  1  (i. u@(v^^i)@w@(x^^i)@y  Lang P S)"
    using pump wInLg by metis
  let ?vwx= "v@w@x"
  have "(count_list ?vwx  a=0 )  (count_list ?vwx c=0)" using  uvwxy a_or_b_zero assms 
    by (metis (no_types, lifting) append.assoc)
  then show False using assms uvwxy pumping_application[of u v w x y n]
    by (metis ?E append_Nil length_0_conv not_one_le_zero pow_list.simps(1))
qed

end

end