Theory Chomsky_Normal_Form

(*
Authors: August Martin Stimpfle, Tobias Nipkow
Based on HOL4 theories by Aditi Barthwal
*)

section ‹Conversion to Chomsky Normal Form›

theory Chomsky_Normal_Form
imports Unit_Elimination Epsilon_Elimination
begin

definition CNF :: "('n, 't) Prods  bool" where
"CNF P  ((A,α)  P. (B C. α = [Nt B, Nt C])  (t. α = [Tm t]))"

lemma Nts_correct: "A  Nts P  (S α. (S, α)  P  (Nt A  {Nt S}  set α))"
unfolding Nts_def nts_syms_def by auto

(* Chomsky Normal Form *)

definition uniformize :: "'n::infinite  't  'n  ('n,'t)prods  ('n,'t) prods  bool" where 
      "uniformize A t S ps ps'  (
     l r p s. (l,r)  set ps  (r = p@[Tm t]@s) 
     (p  []  s  [])  A = fresh(nts ps  {S})
     ps' = ((removeAll (l,r) ps) @ [(A,[Tm t]), (l, p@[Nt A]@s)]))"

lemma uniformize_Eps_free:
  assumes "Eps_free (set ps)"
    and "uniformize A t S ps ps'"
  shows "Eps_free (set ps')"
  using assms unfolding uniformize_def Eps_free_def by force

lemma uniformize_Unit_free:
  assumes "Unit_free (set ps)"
    and "uniformize A t S ps ps'"
  shows "Unit_free (set ps')"
proof -
  have 1: "(l A. (l,[Nt A])  (set ps))"
    using assms(1) unfolding Unit_free_def by simp
  obtain l r p s where lrps: "(l,r)  set ps  (r = p@[Tm t]@s)  (p  []  s  []) 
       set ps' = ((set ps - {(l,r)})  {(A,[Tm t]), (l, p@[Nt A]@s)})"
    using assms(2) set_removeAll unfolding uniformize_def by force
  hence "l' A'. (l,[Nt A'])  {(A,[Tm t]), (l, p@[Nt A]@s)}" 
    using Cons_eq_append_conv by fastforce
  hence "l' A'. (l',[Nt A'])  ((set ps - {(l,r)})  {(A,[Tm t]), (l, p@[Nt A]@s)})"
    using 1 by simp
  moreover have "set ps' = ((set ps - {(l,r)})  {(A,[Tm t]), (l, p@[Nt A]@s)})"
    using lrps by simp
  ultimately show ?thesis unfolding Unit_free_def by simp
qed

definition prodTms :: "('n,'t) prod  nat" where
"prodTms p  (if length (snd p)  1 then 0 else length (filter (isTm) (snd p)))"

definition prodNts :: "('n,'t) prod  nat" where
"prodNts p  (if length (snd p)  2 then 0 else length (filter (isNt) (snd p)))"

(* This definition can be reduced to badTmsCount ps ≡ fold (+) (map prodTms ps) 0. 
   However, the proofs turned out to be much nicer with this version  *)
fun badTmsCount :: "('n,'t) prods  nat" where
  "badTmsCount ps = sum_list(map prodTms ps)"

lemma badTmsCountSet: "(p  set ps. prodTms p = 0)  badTmsCount ps = 0"
by auto

fun badNtsCount :: "('n,'t) prods  nat" where
  "badNtsCount ps = sum_list(map prodNts ps)"

lemma badNtsCountSet: "(p  set ps. prodNts p = 0)  badNtsCount ps = 0"
  by auto

definition uniform :: "('n, 't) Prods  bool" where
  "uniform P  (A, α)  P. (t. Tm t  set α)  (t. α = [Tm t])"

lemma uniform_badTmsCount: 
  "uniform (set ps)  badTmsCount ps = 0"
proof 
  assume assm: "uniform (set ps)"
  have "p  set ps. prodTms p = 0"
  proof 
    fix p assume "p  set ps"
    hence "(t. Tm t  set (snd p))  (t. snd p = [Tm t])"
      using assm unfolding uniform_def by auto
    hence "length (snd p)  1  (t. Tm t  set (snd p))"
      by auto
    hence "length (snd p)  1  length (filter (isTm) (snd p)) = 0"
      unfolding isTm_def by (auto simp: filter_empty_conv)
    thus "prodTms p = 0"
      unfolding prodTms_def by argo
   qed
   thus "badTmsCount ps = 0"
     using badTmsCountSet by blast
next 
  assume assm: "badTmsCount ps = 0"
  have "p  set ps. ((t. Tm t  set (snd p))  (t. snd p = [Tm t]))"
  proof 
    fix p assume "p  set ps"
    hence "prodTms p = 0"
      using assm badTmsCountSet by blast
    hence "length (snd p)  1  length (filter (isTm) (snd p)) = 0"
      unfolding prodTms_def by argo
    hence "length (snd p)  1  (t. Tm t  set (snd p))"
      by (auto simp: isTm_def filter_empty_conv)
    hence "length (snd p) = 0  length (snd p) = 1  (t. Tm t  set (snd p))"
      using order_neq_le_trans by blast
    thus "(t. Tm t  set (snd p))  (t. snd p = [Tm t])"
      by (auto simp: length_Suc_conv)
  qed
  thus "uniform (set ps)"
    unfolding uniform_def by auto
qed

definition binary :: "('n, 't) Prods  bool" where
  "binary P  (A, α)  P. length α  2"

lemma binary_badNtsCount:
  assumes "uniform (set ps)" "badNtsCount ps = 0"
  shows "binary (set ps)"
proof -
  have "p  set ps. length (snd p)  2"
  proof 
    fix p assume assm: "p  set ps"
    obtain A α where "(A, α) = p"
      using prod.collapse by blast
    hence "((t. Tm t  set α)  (t. α = [Tm t]))  (prodNts (A, α) = 0)"
      using assms badNtsCountSet assm unfolding uniform_def by auto
    hence "((t. Tm t  set α)  (t. α = [Tm t]))  (length α  2  length (filter (isNt) α) = 0)"
      unfolding prodNts_def by force
    hence "((t. Tm t  set α)  (length α  1))  (length α  2  (N. Nt N  set α))"
      by (auto simp: filter_empty_conv[of isNt α] isNt_def)
    hence "length α  2"
      by (metis Suc_1 Suc_le_eq in_set_conv_nth le_Suc_eq nat_le_linear sym.exhaust)
    thus "length (snd p)  2"
      using (A, α) = p by auto
  qed
  thus ?thesis
    by (auto simp: binary_def)
qed

lemma count_bin_un: "(binary (set ps)  uniform (set ps))  (badTmsCount ps = 0  badNtsCount ps = 0)"
proof 
  assume "binary (set ps)  uniform (set ps)"
  hence "badTmsCount ps = 0  ((A, α)  set ps. length α  2)"
    unfolding binary_def using uniform_badTmsCount by blast
  thus "badTmsCount ps = 0  badNtsCount ps = 0"
    by (metis badNtsCountSet case_prodE prod.sel(2) prodNts_def)
next
  assume "badTmsCount ps = 0  badNtsCount ps = 0"
  thus "binary (set ps)  uniform (set ps)"
    using binary_badNtsCount uniform_badTmsCount by blast 
qed


definition binarizeNt :: "'n::infinite  'n  'n  'n  ('n,'t)prods  ('n,'t)prods  bool" where
"binarizeNt A B1 B2 S ps ps'  (
    l r p s. (l,r)  set ps  (r = p@[Nt B1,Nt B2]@s)
     (p  []  s  [])  (A = fresh(nts ps  {S}))
     ps' = ((removeAll (l,r) ps) @ [(A, [Nt B1,Nt B2]), (l, p@[Nt A]@s)]))"

lemma binarizeNt_Eps_free:
  assumes "Eps_free (set ps)"
    and "binarizeNt A B1 B2 S ps ps'"
  shows "Eps_free (set ps')"
  using assms unfolding binarizeNt_def Eps_free_def by force

lemma binarizeNt_Unit_free:
  assumes "Unit_free (set ps)"
    and "binarizeNt A B1 B2 S ps ps'"
  shows "Unit_free (set ps')"
  proof -
  have 1: "(l A. (l,[Nt A])  (set ps))"
    using assms(1) unfolding Unit_free_def by simp
  obtain l r p s where lrps: "(l,r)  set ps  (r = p@[Nt B1,Nt B2]@s)  (p  []  s  []) 
       (set ps' = ((set ps - {(l,r)})  {(A, [Nt B1,Nt B2]), (l, p@[Nt A]@s)}))"
    using assms(2) set_removeAll unfolding binarizeNt_def by force
  hence "l' A'. (l,[Nt A'])  {(A, [Nt B1,Nt B2]), (l, p@[Nt A]@s)}" 
    using Cons_eq_append_conv by fastforce
  hence "l' A'. (l',[Nt A'])  ((set ps - {(l,r)})  {(A, [Nt B1,Nt B2]), (l, p@[Nt A]@s)})"
    using 1 by simp
  moreover have "set ps' = ((set ps - {(l,r)})  {(A, [Nt B1,Nt B2]), (l, p@[Nt A]@s)})"
    using lrps by simp
  ultimately show ?thesis unfolding Unit_free_def by simp
qed

lemma fresh_nts_single: "fresh(nts ps  {S})  nts ps  {S}"
by(rule fresh_finite) (simp add: finite_nts)

lemma binarizeNt_aux1:
  assumes "binarizeNt A B1 B2 S ps ps'"
  shows "A  B1  A  B2"
using assms fresh_nts_single unfolding binarizeNt_def Nts_def nts_syms_def by fastforce

lemma derives_sub:
  assumes "P  [Nt A]  u" and "P  xs  p @ [Nt A] @ s"
  shows "P  xs ⇒* p @ u @ s"
proof -
  have "P  p @ [Nt A] @ s ⇒* p @ u @ s"
    using assms derive_append derive_prepend by blast
  thus ?thesis
    using assms(2) by simp
qed

lemma cnf_r1Tm: 
  assumes "uniformize A t S ps ps'"
    and "set ps  lhs  rhs"
  shows "set ps'  lhs ⇒* rhs"
proof -
  obtain p' s' B v where Bv: "lhs = p'@[Nt B]@s'  rhs = p'@v@s'  (B,v)  set ps"
    using derive.cases[OF assms(2)] by fastforce
  obtain l r p s where lrps: "(l,r)  set ps  (r = p@[Tm t]@s)  (p  []  s  [])  (A  Nts (set ps))
       set ps' = ((set ps - {(l,r)})  {(A,[Tm t]), (l, p@[Nt A]@s)})"
    using assms(1) set_removeAll fresh_nts_single[of ps S] unfolding uniformize_def by fastforce
  thus ?thesis
  proof (cases "(B, v)  set ps'")
    case True
    then show ?thesis
      using derive.intros[of B v] Bv by blast
  next
    case False
    hence "B = l  v = p@[Tm t]@s"
      by (simp add: lrps Bv) 
    have 1: "set ps'  [Nt l]  p@[Nt A]@s"
      using lrps by (simp add: derive_singleton)
    have "set ps'  [Nt A]  [Tm t]"
      using lrps by (simp add: derive_singleton)
    hence "set ps'  [Nt l] ⇒* p@[Tm t]@s"
      using 1 derives_sub[of set ps'] by blast
    then show ?thesis 
      using False B = l  v = p@[Tm t]@s Bv derives_append derives_prepend by blast
  qed
qed

lemma cnf_r1Nt:
  assumes "binarizeNt A B1 B2 S ps ps'"
    and "set ps  lhs  rhs"
  shows "set ps'  lhs ⇒* rhs"
proof -
  obtain p' s' C v where Cv: "lhs = p'@[Nt C]@s'  rhs = p'@v@s'  (C,v)  set ps"
    using derive.cases[OF assms(2)] by fastforce
  obtain l r p s where lrps: "(l,r)  set ps  (r = p@[Nt B1,Nt B2]@s)  (p  []  s  [])  (A  Nts (set ps))
     (set ps' = ((set ps - {(l,r)})  {(A, [Nt B1,Nt B2]), (l, p@[Nt A]@s)}))"
    using assms(1) set_removeAll fresh_nts_single[of ps S] unfolding binarizeNt_def by fastforce
  thus ?thesis
  proof (cases "(C, v)  set ps'")
    case True
    then show ?thesis
      using derive.intros[of C v] Cv by blast
  next
    case False
    hence "C = l  v = p@[Nt B1,Nt B2]@s"
      by (simp add: lrps Cv)
    have 1: "set ps'  [Nt l]  p@[Nt A]@s"
      using lrps by (simp add: derive_singleton)
    have "set ps'  [Nt A]  [Nt B1,Nt B2]"
      using lrps by (simp add: derive_singleton)
    hence "set ps'  [Nt l] ⇒* p@[Nt B1,Nt B2]@s" 
      using 1 derives_sub[of set ps'] by blast
    thus ?thesis 
      using False C = l  v = p@[Nt B1,Nt B2]@s Cv derives_append derives_prepend by blast
  qed
qed

lemma slemma1_1: 
  assumes "uniformize A t S ps ps'"
    and "(A, α)  set ps'"
  shows "α = [Tm t]"
proof -
  have "A  Nts (set ps)"
    using assms(1) fresh_nts_single unfolding uniformize_def by blast
  hence "α. (A, α)  set ps"
    unfolding Nts_def by auto
  hence "α. α  [Tm t]  (A, α)  set ps'"
    using assms(1) unfolding uniformize_def by auto
  thus ?thesis 
    using assms(2) by blast
qed

lemma slemma1_1Nt:
  assumes "binarizeNt A B1 B2 S ps ps'"
    and "(A, α)  set ps'"
  shows "α = [Nt B1,Nt B2]"
proof -
  have "A  Nts (set ps)"
    using assms(1) fresh_nts_single unfolding binarizeNt_def by blast
  hence "α. (A, α)  set ps"
    unfolding Nts_def  by auto
  hence "α. α  [Nt B1,Nt B2]  (A, α)  set ps'"
    using assms(1) unfolding binarizeNt_def by auto
  thus ?thesis 
    using assms(2) by blast
qed

lemma slemma4_1:
  assumes "Nt A  set rhs"
  shows "α. rhs = substsNt A α rhs"
  using assms by (simp add: substs_skip)

lemma slemma4_3_1:
  assumes "lhs = A"
  shows "α = substsNt A α [Nt lhs]"
  using assms by simp

lemma slemma4_4:
  assumes "uniformize A t S ps ps'"
    and "(l,r)  set ps"
  shows "Nt A  set r"
proof -
  have "A  Nts (set ps)"
    using assms(1) fresh_nts_single unfolding uniformize_def by blast
  hence "S α. (S, α)  set ps  (Nt A  {Nt S}  set α)"
    using Nts_correct[of A set ps] by blast
  thus ?thesis 
    using assms(2) by blast
qed

lemma slemma4_4Nt:
  assumes "binarizeNt A B1 B2 S ps ps'"
    and "(l,r)  set ps"
  shows "(Nt A)  set r"
proof -
  have "A  Nts (set ps)"
    using assms(1) fresh_nts_single unfolding binarizeNt_def by blast
  hence "S α. (S, α)  set ps  (Nt A  {Nt S}  set α)"
    using Nts_correct[of A set ps] by blast
  thus ?thesis 
    using assms(2) by blast
qed


lemma lemma1:
  assumes "uniformize A t S ps ps'"
    and "set ps'  lhs  rhs"
  shows "substsNt A [Tm t] lhs = substsNt A [Tm t] rhs
     set ps  substsNt A [Tm t] lhs  substsNt A [Tm t] rhs"
proof -
  obtain l r p s where lrps: "(l,r)  set ps  (r = p@[Tm t]@s)  (p  []  s  [])  (A  Nts (set ps)) 
       set ps' = ((set ps - {(l,r)})  {(A,[Tm t]), (l, p@[Nt A]@s)})"
    using assms(1) set_removeAll fresh_nts_single[of ps S] unfolding uniformize_def by fastforce
  obtain p' s' u v where uv: "lhs = p'@[Nt u]@s'  rhs = p'@v@s'  (u,v)  set ps'"
    using derive.cases[OF assms(2)] by fastforce
  thus ?thesis
  proof (cases "u = A")
    case True
    then show ?thesis 
    proof (cases "v = [Tm t]")
      case True
      have "substsNt A [Tm t] lhs = substsNt A [Tm t] p' @ substsNt A [Tm t] ([Nt A]@s')"
        using uv u = A by simp
      hence "substsNt A [Tm t] lhs = substsNt A [Tm t] p' @ [Tm t] @ substsNt A [Tm t] s'"
        by simp
      then show ?thesis
        by (simp add: True uv) 
    next
      case False
      then show ?thesis 
        using True uv assms(1) slemma1_1 by fastforce 
    qed
  next
    case False
    then show ?thesis 
    proof (cases "(Nt A)  set v")
      case True
      hence 1: "v = p@[Nt A]@s  Nt A  set p  Nt A  set s" 
        using lrps uv assms slemma4_4 by fastforce
      hence "substsNt A [Tm t] v = substsNt A [Tm t] p @ substsNt A [Tm t] ([Nt A]@s)"
        by simp
      hence "substsNt A [Tm t] v = p @ [Tm t] @ s"
        using 1 substs_append slemma4_1 slemma4_3_1 by metis
      hence 2: "(u, substsNt A [Tm t] v)  set ps" using lrps
        using True uv assms(1) slemma4_4 by fastforce
      have "substsNt A [Tm t] lhs = substsNt A [Tm t] p' @ substsNt A [Tm t] ([Nt u]@s')"
        using uv by simp
      hence 3: "substsNt A [Tm t] lhs = substsNt A [Tm t] p' @ [Nt u] @ substsNt A [Tm t] s'" 
        using u  A by simp
      have "substsNt A [Tm t] rhs = substsNt A [Tm t] p' @ substsNt A [Tm t] (v@s')"
        using uv by simp
      hence "substsNt A [Tm t] rhs = substsNt A [Tm t] p' @ substsNt A [Tm t] v @ substsNt A [Tm t] s'"
        by simp
      then show ?thesis 
        using 2 3 assms(2) uv derive.simps by fast
    next
      case False
      hence 1: "(u, v)  set ps" 
        using assms(1) uv u  A lrps by (simp add: in_set_conv_decomp)
       have "substsNt A [Tm t] lhs = substsNt A [Tm t] p' @ substsNt A [Tm t] ([Nt u]@s')"
         using uv by simp
       hence 2: "substsNt A [Tm t] lhs = substsNt A [Tm t] p' @ [Nt u] @ substsNt A [Tm t] s'"
         using u  A by simp
       have "substsNt A [Tm t] rhs = substsNt A [Tm t] p' @ substsNt A [Tm t] (v@s')"
         using uv by simp
       hence "substsNt A [Tm t] rhs = substsNt A [Tm t] p' @ substsNt A [Tm t] v @ substsNt A [Tm t] s'"
         by simp
       hence "substsNt A [Tm t] rhs = substsNt A [Tm t] p' @ v @ substsNt A [Tm t] s'"
         using False slemma4_1 by fastforce
       thus ?thesis 
         using 1 2 assms(2) uv derive.simps by fast
    qed
  qed
qed

lemma lemma1Nt: 
  assumes "binarizeNt A B1 B2 S ps ps'"
    and "set ps'  lhs  rhs"
  shows "(substsNt A [Nt B1,Nt B2] lhs = substsNt A [Nt B1,Nt B2] rhs) 
           ((set ps)  (substsNt A [Nt B1,Nt B2] lhs)  substsNt A [Nt B1,Nt B2] rhs)"
proof -
  obtain l r p s where lrps: "(l,r)  set ps  (r = p@[Nt B1,Nt B2]@s)  (p  []  s  [])  (A  Nts (set ps))
     (set ps' = ((set ps - {(l,r)})  {(A, [Nt B1,Nt B2]), (l, p@[Nt A]@s)}))"
    using assms(1) set_removeAll fresh_nts_single[of ps S] unfolding binarizeNt_def by fastforce
  obtain p' s' u v where uv: "lhs = p'@[Nt u]@s'  rhs = p'@v@s'  (u,v)  set ps'"
    using derive.cases[OF assms(2)] by fastforce
  thus ?thesis
  proof (cases "u = A")
    case True
    then show ?thesis 
    proof (cases "v = [Nt B1,Nt B2]")
      case True
      have "substsNt A [Nt B1,Nt B2] lhs = substsNt A [Nt B1,Nt B2] p' @ substsNt A [Nt B1,Nt B2] ([Nt A]@s')"
        using uv u = A by simp
      hence 1: "substsNt A [Nt B1,Nt B2] lhs = substsNt A [Nt B1,Nt B2] p' @ [Nt B1,Nt B2] @ substsNt A [Nt B1,Nt B2] s'"
        by simp
      have "substsNt A [Nt B1,Nt B2] rhs = substsNt A [Nt B1,Nt B2] p' @ substsNt A [Nt B1,Nt B2] ([Nt B1,Nt B2]@s')"
        using uv u = A True by simp
      hence "substsNt A [Nt B1,Nt B2] rhs = substsNt A [Nt B1,Nt B2] p' @ [Nt B1,Nt B2] @ substsNt A [Nt B1,Nt B2] s'"
        using assms(1) binarizeNt_aux1[of A B1 B2 S ps ps'] by auto 
      then show ?thesis
        using 1 by simp
    next
      case False
      then show ?thesis
        using True uv assms(1) slemma1_1Nt by fastforce
    qed
  next
    case False
    then show ?thesis 
    proof (cases "(Nt A)  set v")
      case True
      have "Nt A  set p  Nt A  set s" 
        using lrps assms(1) by (metis UnI1 UnI2 set_append slemma4_4Nt)
      hence 1: "v = p@[Nt A]@s  Nt A  set p  Nt A  set s" 
        using True lrps uv assms slemma4_4Nt[of A B1 B2 S ps ps'] binarizeNt_aux1[of A B1 B2 S ps ps'] by auto
      hence "substsNt A [Nt B1,Nt B2] v = substsNt A [Nt B1,Nt B2] p @ substsNt A [Nt B1,Nt B2] ([Nt A]@s)"
        by simp
      hence "substsNt A [Nt B1,Nt B2] v = p @ [Nt B1,Nt B2] @ s"
        using 1 substs_append slemma4_1 slemma4_3_1 by metis
      hence 2: "(u, substsNt A [Nt B1,Nt B2] v)  set ps" 
        using True lrps uv assms(1) slemma4_4Nt by fastforce
      have "substsNt A [Nt B1,Nt B2] lhs = substsNt A [Nt B1,Nt B2] p' @ substsNt A [Nt B1,Nt B2] ([Nt u]@s')"
        using uv by simp
      hence 3: "substsNt A [Nt B1,Nt B2] lhs = substsNt A [Nt B1,Nt B2] p' @ [Nt u] @ substsNt A [Nt B1,Nt B2] s'" 
        using u  A by simp
      have "substsNt A [Nt B1,Nt B2] rhs = substsNt A [Nt B1,Nt B2] p' @ substsNt A [Nt B1,Nt B2] (v@s')"
        using uv by simp
      hence "substsNt A [Nt B1,Nt B2] rhs = substsNt A [Nt B1,Nt B2] p' @ substsNt A [Nt B1,Nt B2] v @ substsNt A [Nt B1,Nt B2] s'"
        by simp
      then show ?thesis 
        using 2 3 assms(2) uv derive.simps by fast
    next
      case False
      hence 1: "(u, v)  set ps" 
        using assms(1) uv u  A lrps by (simp add: in_set_conv_decomp)
       have "substsNt A [Nt B1,Nt B2] lhs = substsNt A [Nt B1,Nt B2] p' @ substsNt A [Nt B1,Nt B2] ([Nt u]@s')"
         using uv by simp
       hence 2: "substsNt A [Nt B1,Nt B2] lhs = substsNt A [Nt B1,Nt B2] p' @ [Nt u] @ substsNt A [Nt B1,Nt B2] s'"
         using u  A by simp
       have "substsNt A [Nt B1,Nt B2] rhs = substsNt A [Nt B1,Nt B2] p' @ substsNt A [Nt B1,Nt B2] (v@s')"
         using uv by simp
       hence "substsNt A [Nt B1,Nt B2] rhs = substsNt A [Nt B1,Nt B2] p' @ substsNt A [Nt B1,Nt B2] v @ substsNt A [Nt B1,Nt B2] s'"
         by simp
       hence "substsNt A [Nt B1,Nt B2] rhs = substsNt A [Nt B1,Nt B2] p' @ v @ substsNt A [Nt B1,Nt B2] s'"
         using False slemma4_1 by fastforce
       thus ?thesis 
         using 1 2 assms(2) uv derive.simps by fast
    qed
  qed
qed

lemma lemma3:
  assumes "set ps'  lhs ⇒* rhs"
    and "uniformize A t S ps ps'"
  shows "set ps  substsNt A [Tm t] lhs ⇒* substsNt A [Tm t] rhs"
  using assms
proof (induction rhs rule: rtranclp_induct)
  case (step y z)
  then show ?case 
    using lemma1[of A t S ps ps' y z] by auto 
qed simp

lemma lemma3Nt:
  assumes "set ps'  lhs ⇒* rhs"
    and "binarizeNt A B1 B2 S ps ps'"
  shows "set ps  substsNt A [Nt B1, Nt B2] lhs ⇒* substsNt A [Nt B1, Nt B2] rhs"
  using assms 
proof (induction rhs rule: rtranclp_induct)
  case (step y z)
  then show ?case 
    using lemma1Nt[of A B1 B2 S ps ps' y z] by auto
qed simp

lemma lemma4:
  assumes "uniformize A t S ps ps'" 
  shows "lang ps' S  lang ps S"
proof 
  fix w
  assume "w  lang ps' S"
  hence "set ps'  [Nt S] ⇒* map Tm w"
    unfolding Lang_def by simp
  hence "set ps'  [Nt S] ⇒* map Tm w"
    using assms unfolding uniformize_def by auto
  hence "set ps  substsNt A [Tm t] [Nt S] ⇒* substsNt A [Tm t] (map Tm w)"
    using assms lemma3[of ps' [Nt S] map Tm w] by blast
  moreover have "substsNt A [Tm t] [Nt S] = [Nt S]"
    using assms fresh_nts_single[of ps S] unfolding uniformize_def by auto
  moreover have "substsNt A [Tm t] (map Tm w) = map Tm w" 
    by simp
  ultimately show "w  lang ps S" 
    by (simp add: Lang_def)
qed

lemma lemma4Nt:
  assumes "binarizeNt A B1 B2 S ps ps'"
  shows "lang ps' S  lang ps S"
proof
  fix w
  assume "w  lang ps' S"
  hence "set ps'  [Nt S] ⇒* map Tm w"
    by (simp add: Lang_def)
  hence "set ps'  [Nt S] ⇒* map Tm w"
    using assms unfolding binarizeNt_def by auto
  hence "set ps  substsNt A [Nt B1, Nt B2] [Nt S] ⇒*  substsNt A [Nt B1, Nt B2] (map Tm w)"
    using assms lemma3Nt[of ps' [Nt S] map Tm w] by blast
  moreover have "substsNt A [Nt B1, Nt B2] [Nt S] = [Nt S]"
    using assms fresh_nts_single[of ps S] unfolding binarizeNt_def by auto
  moreover have "substsNt A [Nt B1, Nt B2] (map Tm w) = map Tm w" by simp
  ultimately show "w  lang ps S" using Lang_def
    by (metis (no_types, lifting) mem_Collect_eq)
qed

lemma slemma5_1:
  assumes "set ps  u ⇒* v"
    and "uniformize A t S ps ps'"
  shows "set ps'  u ⇒* v"
  using assms by (induction v rule: rtranclp_induct) (auto simp: cnf_r1Tm rtranclp_trans)

lemma slemma5_1Nt:
  assumes "set ps  u ⇒* v"
    and "binarizeNt A B1 B2 S ps ps'"
  shows "set ps'  u ⇒* v"
  using assms by (induction v rule: rtranclp_induct) (auto simp: cnf_r1Nt rtranclp_trans)

lemma lemma5: 
  assumes "uniformize A t S ps ps'"
  shows "lang ps S  lang ps' S"
proof 
  fix w
  assume "w  lang ps S"
  hence "set ps  [Nt S] ⇒* map Tm w"
    using assms unfolding Lang_def uniformize_def by auto 
  thus "w  lang ps' S" 
    using assms slemma5_1 Lang_def by fastforce
qed 

lemma lemma5Nt: 
  assumes "binarizeNt A B1 B2 S ps ps'"
  shows "lang ps S  lang ps' S"
proof 
  fix w
  assume "w  lang ps S"
  hence "set ps  [Nt S] ⇒* map Tm w"
    using assms unfolding Lang_def binarizeNt_def by auto 
  thus "w  lang ps' S" 
    using assms slemma5_1Nt Lang_def by fast
qed 

lemma cnf_lemma1: "uniformize A t S ps ps'  lang ps S = lang ps' S"
  using lemma4 lemma5 by fast

lemma cnf_lemma1Nt: "binarizeNt A B1 B2 S ps ps'  lang ps S = lang ps' S"
  using lemma4Nt lemma5Nt by fast

lemma uniformizeRtc_Eps_free: 
  assumes "(λx y. A t. uniformize A t S x y)^** ps ps'"
    and "Eps_free (set ps)"
  shows "Eps_free (set ps')"
  using assms by (induction rule: rtranclp_induct) (auto simp: uniformize_Eps_free)

lemma binarizeNtRtc_Eps_free:
  assumes "(λx y. A t B1 B2. binarizeNt A B1 B2 S x y)^** ps ps'"
    and "Eps_free (set ps)"
  shows "Eps_free (set ps')"
  using assms by (induction rule: rtranclp_induct) (auto simp: binarizeNt_Eps_free)

lemma uniformizeRtc_Unit_free: 
  assumes "(λx y. A t. uniformize A t S x y)^** ps ps'"
    and "Unit_free (set ps)"
  shows "Unit_free (set ps')"
  using assms by (induction rule: rtranclp_induct) (auto simp: uniformize_Unit_free)

lemma binarizeNtRtc_Unit_free:
  assumes "(λx y. A t B1 B2. binarizeNt A B1 B2 S x y)^** ps ps'"
    and "Unit_free (set ps)"
  shows "Unit_free (set ps')"
  using assms by (induction rule: rtranclp_induct) (auto simp: binarizeNt_Unit_free)

(* proofs about Nts *)

lemma uniformize_Nts: 
  assumes "uniformize A t S ps ps'" "S  Nts (set ps)"
  shows "S  Nts (set ps')"
proof -
  obtain l r p s where lrps: "(l,r)  set ps  (r = p@[Tm t]@s)  (p  []  s  [])  (A  Nts (set ps)) 
       set ps' = ((set ps - {(l,r)})  {(A,[Tm t]), (l, p@[Nt A]@s)})"
    using assms(1) set_removeAll fresh_nts_single[of ps S] unfolding uniformize_def by fastforce
  thus ?thesis
  proof (cases "S  Nts {(l,r)}")
    case True
    hence "S  Nts {(A,[Tm t]), (l, p@[Nt A]@s)}"
      unfolding Nts_def nts_syms_def using lrps by auto
    then show ?thesis using  lrps Nts_Un by (metis UnCI)
  next
    case False
    hence "S  Nts (set ps - {(l,r)})"
      unfolding Nts_def using lrps 
      by (metis UnCI UnE Un_Diff_cancel2 assms(2) Nts_Un Nts_def)
    then show ?thesis 
      by (simp add: lrps Nts_def)
  qed
qed  

lemma uniformizeRtc_Nts: 
  assumes "(λx y. A t. uniformize A t S x y)^** ps ps'" "S  Nts (set ps)"
  shows "S  Nts (set ps')"
  using assms by (induction rule: rtranclp_induct) (auto simp: uniformize_Nts)

(* Termination *)

theorem cnf_lemma2: 
  assumes "(λx y. A t. uniformize A t S x y)^** ps ps'"
  shows "lang ps S = lang ps' S"
  using assms by (induction rule: rtranclp_induct) (fastforce simp: cnf_lemma1)+ 

theorem cnf_lemma2Nt: 
  assumes "(λx y. A t B1 B2. binarizeNt A B1 B2 S x y)^** ps ps'"
  shows "lang ps S = lang ps' S"
  using assms by (induction rule: rtranclp_induct) (fastforce simp: cnf_lemma1Nt)+

theorem cnf_lemma: 
  assumes "(λx y. A t. uniformize A t S x y)^** ps ps'"
    and "(λx y. A B1 B2. binarizeNt A B1 B2 S x y)^** ps' ps''"
  shows "lang ps S = lang ps'' S"
  using assms cnf_lemma2 cnf_lemma2Nt uniformizeRtc_Nts by fastforce

(* Part 2 *)
lemma badTmsCount_append: "badTmsCount (ps@ps') = badTmsCount ps + badTmsCount ps'"
by auto

lemma badNtsCount_append: "badNtsCount (ps@ps') = badNtsCount ps + badNtsCount ps'"
by auto

lemma badTmsCount_removeAll: 
  assumes "prodTms p > 0" "p  set ps"
  shows "badTmsCount (removeAll p ps) < badTmsCount ps"
  using assms by (induction ps) fastforce+

lemma badNtsCount_removeAll: 
  assumes "prodNts p > 0" "p  set ps"
  shows "badNtsCount (removeAll p ps) < badNtsCount ps"
  using assms by (induction ps) fastforce+

lemma badTmsCount_removeAll2:
  assumes "prodTms p > 0" "p  set ps" "prodTms p' < prodTms p"
  shows "badTmsCount (removeAll p ps) + prodTms p' < badTmsCount ps"
  using assms by (induction ps) fastforce+

lemma badNtsCount_removeAll2:
  assumes "prodNts p > 0" "p  set ps" "prodNts p' < prodNts p"
  shows "badNtsCount (removeAll p ps) + prodNts p' < badNtsCount ps"
  using assms by (induction ps) fastforce+

lemma lemma6_a:
  assumes "uniformize A t S ps ps'" shows "badTmsCount (ps') < badTmsCount ps"
proof -
  from assms obtain l r p s where lrps: "(l,r)  set ps  (r = p@[Tm t]@s)  (p  []  s  [])  (A  Nts (set ps)) 
       ps' = ((removeAll (l,r) ps) @ [(A,[Tm t]), (l, p@[Nt A]@s)])"
    using fresh_nts_single[of ps S] unfolding uniformize_def by auto
  hence "prodTms (l,p@[Tm t]@s) = length (filter (isTm) (p@[Tm t]@s))"
    unfolding prodTms_def by auto
  hence 1: "prodTms (l,p@[Tm t]@s) = Suc (length (filter (isTm) (p@s)))"
    by (simp add: isTm_def)
  have 2: "badTmsCount ps' = badTmsCount (removeAll (l,r) ps) + badTmsCount [(A,[Tm t])] + badTmsCount [(l, p@[Nt A]@s)]"
    using lrps by (auto simp: badTmsCount_append)
  have 3: "badTmsCount (removeAll (l,r) ps) < badTmsCount ps"
    using 1 badTmsCount_removeAll lrps gr0_conv_Suc by blast
  have "prodTms (l, p@[Nt A]@s) = (length (filter (isTm) (p@[Nt A]@s)))  prodTms (l, p@[Nt A]@s) = 0"
    unfolding prodTms_def using lrps by simp
  thus ?thesis
  proof 
    assume "prodTms (l, p@[Nt A]@s) = (length (filter (isTm) (p@[Nt A]@s)))"
    hence "badTmsCount ps' = badTmsCount (removeAll (l,r) ps) + prodTms (l, p@[Nt A]@s)"
      using 2 by (simp add: prodTms_def)
    moreover have "prodTms (l,p@[Nt A]@s) < prodTms (l,p@[Tm t]@s)"
      using 1 prodTms (l, p @ [Nt A] @ s) = length (filter isTm (p @ [Nt A] @ s)) isTm_def by force 
    ultimately show "badTmsCount ps' < badTmsCount ps" 
      using badTmsCount_removeAll2[of "(l,r)" ps "(l,p @[Nt A]@s)"] lrps 1 by auto
  next 
    assume "prodTms (l, p@[Nt A]@s) = 0"
    hence "badTmsCount ps' = badTmsCount (removeAll (l,r) ps)"
      using 2 by (simp add: prodTms_def)
    thus "badTmsCount ps' < badTmsCount ps" 
      using 3 by simp
  qed
qed

lemma lemma6_b:
  assumes "binarizeNt A B1 B2 S ps ps'" shows "badNtsCount ps' < badNtsCount ps"
proof -
  from assms obtain l r p s where lrps: "(l,r)  set ps  (r = p@[Nt B1,Nt B2]@s)  (p  []  s  [])  (A  Nts (set ps))
     ps' = ((removeAll (l,r) ps) @ [(A, [Nt B1,Nt B2]), (l, p@[Nt A]@s)])"
    using fresh_nts_single[of ps S] unfolding binarizeNt_def by auto
  hence "prodNts (l,p@[Nt B1,Nt B2]@s) = length (filter (isNt) (p@[Nt B1,Nt B2]@s))"
    unfolding prodNts_def by auto
  hence 1: "prodNts (l,p@[Nt B1,Nt B2]@s) = Suc (Suc (length (filter (isNt) (p@s))))"
    by (simp add: isNt_def)
  have 2: "badNtsCount ps' = badNtsCount (removeAll (l,r) ps) + badNtsCount [(A, [Nt B1,Nt B2])] + badNtsCount [(l, (p@[Nt A]@s))]"
    using lrps by (auto simp: badNtsCount_append prodNts_def)
  have 3: "badNtsCount (removeAll (l,r) ps) < badNtsCount ps"
    using lrps badNtsCount_removeAll 1 by force
  have "prodNts (l, p@[Nt A]@s) = length (filter (isNt) (p@[Nt A]@s))  prodNts (l, p@[Nt A]@s) = 0"
    unfolding prodNts_def using lrps by simp
  thus ?thesis 
  proof 
    assume "prodNts (l, p@[Nt A]@s) = length (filter (isNt) (p@[Nt A]@s))"
    hence "badNtsCount ps' = badNtsCount (removeAll (l,r) ps) + badNtsCount [(l, (p@[Nt A]@s))]"
      using 2 by (simp add: prodNts_def)
    moreover have "prodNts (l, p@[Nt A]@s) < prodNts (l,p@[Nt B1,Nt B2]@s)"
      using 1 prodNts (l, p@[Nt A]@s) = length (filter (isNt) (p@[Nt A]@s)) isNt_def by simp
    ultimately show ?thesis 
      using badNtsCount_removeAll2[of "(l,r)" ps "(l, (p@[Nt A]@s))"] 1 lrps by auto
  next 
    assume "prodNts (l, p@[Nt A]@s) = 0"
    hence "badNtsCount ps' = badNtsCount (removeAll (l,r) ps)"
      using 2 by (simp add: prodNts_def)
    thus ?thesis 
      using 3 by simp
  qed
qed

lemma badTmsCount0_removeAll: "badTmsCount ps = 0  badTmsCount (removeAll (l,r) ps) = 0" 
by auto 

lemma slemma15_a:
  assumes "binarizeNt A B1 B2 S ps ps'"
    and "badTmsCount ps = 0"
  shows "badTmsCount ps' = 0"
proof -
  obtain l r p s where lrps: "(l,r)  set ps  (r = p@[Nt B1,Nt B2]@s)  (p  []  s  [])  (A  Nts (set ps))
     (ps' = ((removeAll (l,r) ps) @ [(A, [Nt B1,Nt B2]), (l, p@[Nt A]@s)]))"
    using assms(1) fresh_nts_single[of ps S] unfolding binarizeNt_def by auto
  hence "badTmsCount ps' = badTmsCount (removeAll (l,r) ps) + badTmsCount [(l, (p@[Nt A]@s))]"
    by (auto simp: badTmsCount_append prodTms_def isTm_def)
  moreover have "badTmsCount (removeAll (l,r) ps) = 0"
    using badTmsCount0_removeAll[of ps l r] assms(2) by simp
  moreover have "badTmsCount [(l, (p@[Nt A]@s))] = 0" 
  proof -
    have "prodTms (l,p@[Nt B1,Nt B2]@s) = 0"
      using lrps assms(2) badTmsCountSet by auto
    thus "badTmsCount [(l, (p@[Nt A]@s))] = 0"
      by (auto simp: isTm_def prodTms_def)
  qed
  ultimately show ?thesis 
    by simp
qed

lemma lemma15_a:
  assumes "(λx y. A B1 B2. binarizeNt A B1 B2 S x y)^** ps ps'"
    and "badTmsCount ps = 0"
  shows "badTmsCount ps' = 0"
  using assms by (induction) (auto simp: slemma15_a simp del: badTmsCount.simps)

lemma noTms_prodTms0:
  assumes "prodTms (l,r) = 0"
  shows "length r  1  (a  set r. isNt a)"
proof -
  have "length r  1  (a. a  set r  isTm a)"
    using assms unfolding prodTms_def using empty_filter_conv by fastforce
  thus ?thesis 
    by (metis isNt_def isTm_def sym.exhaust)
qed

lemma badTmsCountNot0:
  assumes "badTmsCount ps > 0"
  shows "l r t. (l,r)  set ps  length r  2  Tm t  set r"
proof -
  have "p  set ps. prodTms p > 0"
    using assms badTmsCountSet not_gr0 by blast
  from this obtain l r where lr: "(l, r)  set ps  prodTms (l,r) > 0"
    by auto
  hence 1: "length r  2"
    unfolding prodTms_def using not_le_imp_less by fastforce
  hence "prodTms (l,r) = length (filter (isTm) r)"
    unfolding prodTms_def by simp
  hence "t. Tm t  set r"
    by (metis lr empty_filter_conv isTm_def length_greater_0_conv)
  thus ?thesis using lr 1 by blast
qed

lemma badNtsCountNot0: 
  assumes "badNtsCount ps > 0" 
  shows "l r. (l, r)  set ps  length r  3"
proof -
  have "p  set ps. prodNts p > 0"
    using assms badNtsCountSet not_gr0 by blast
  from this obtain l r where lr: "(l, r)  set ps  prodNts (l,r) > 0"
    by auto
  hence "length r  3"
    unfolding prodNts_def using not_le_imp_less by fastforce
  thus ?thesis using lr by auto
qed

lemma list_longer2: "length l  2  x  set l  (hd tl . l = hd@[x]@tl  (hd  []  tl  []))"
  using split_list_last by fastforce 

lemma list_longer3: "length l  3  (hd tl x y. l = hd@[x]@[y]@tl  (hd  []  tl  []))"
  by (metis Suc_le_length_iff append.left_neutral append_Cons neq_Nil_conv numeral_3_eq_3)

lemma lemma8_a: "badTmsCount ps > 0  ps' A t. uniformize A t S ps ps'"
proof -
  assume "badTmsCount ps > 0"
  then obtain l r t where lr: "(l,r)  set ps  length r  2  Tm t  set r"
    using badTmsCountNot0 by blast
  from this obtain p s where ps: "r = p@[Tm t]@s  (p  []  s  [])"
    unfolding isTm_def using lr list_longer2[of r] by blast
  from this obtain A ps' where "A = fresh(nts ps  {S})  ps' = removeAll (l, r) ps @ [(A, [Tm t]), (l, p @ [Nt A] @ s)]" 
    by auto
  hence "uniformize A t S ps ps'"
    unfolding uniformize_def using lr ps by auto
  thus ?thesis by blast
qed

lemma lemma8_b:
  assumes "badTmsCount ps = 0" and "badNtsCount ps > 0"
  shows "ps' A B1 B2. binarizeNt A B1 B2 S ps ps'"
proof -
  obtain l r where lr: "(l, r)  set ps  length r  3"
    using assms(2) badNtsCountNot0 by blast
  obtain p s X Y where psXY: "r = p@[X]@[Y]@s  (p  []  s  [])"
    using lr list_longer3[of r] by blast
  have "(a  set r. isNt a)"
    using lr assms(1) badTmsCountSet[of ps] noTms_prodTms0[of l r] by fastforce
  from this obtain B1 B2 where "X = Nt B1  Y = Nt B2"
    using isNt_def psXY by fastforce
  hence B: "(r = p@[Nt B1,Nt B2]@s)  (p  []  s  [])"
    using psXY by auto
  from this obtain A ps' where "A = fresh(nts ps  {S})  ps' = removeAll (l, r) ps @ [(A, [Nt B1, Nt B2]), (l, p @ [Nt A] @ s)]"
    by simp
  hence "binarizeNt A B1 B2 S ps ps'"
    unfolding binarizeNt_def using lr B by auto
  thus ?thesis by blast
qed

lemma uniformize_2: "ps'. (λx y. A t. uniformize A t S x y)^** ps ps'  (badTmsCount ps' = 0)"
proof (induction "badTmsCount ps" arbitrary: ps rule: less_induct)
  case less
  then show ?case
  proof (cases "badTmsCount ps = 0")
    case False
    from this obtain ps' A t where g': "uniformize A t S ps ps'"
      using lemma8_a by blast
    hence "badTmsCount ps' < badTmsCount ps"
      using lemma6_a[of A t] by blast
    from this obtain ps'' where "(λx y. A t. uniformize A t S x y)** ps' ps''  badTmsCount ps'' = 0"
      using less by blast
    thus ?thesis 
      using g' converse_rtranclp_into_rtranclp[of "(λx y. A t. uniformize A t S x y)" ps ps' ps''] by blast
  qed blast
qed

lemma binarizeNt_2: 
  assumes "badTmsCount ps = 0"
    shows "ps'. (λx y. A B1 B2. binarizeNt A B1 B2 S x y)^** ps ps'  (badNtsCount ps' = 0)"
using assms proof (induction "badNtsCount ps" arbitrary: ps rule: less_induct)
  case less
  then show ?case 
  proof (cases "badNtsCount ps = 0")
    case False
    from this obtain ps' A B1 B2 where g': "binarizeNt A B1 B2 S ps ps'"
      using assms lemma8_b less(2) by blast
    hence "badNtsCount ps' < badNtsCount ps"
      using lemma6_b by blast
    from this obtain ps'' where "(λx y. A B1 B2. binarizeNt A B1 B2 S x y)** ps' ps''  badNtsCount ps'' = 0"
      using less slemma15_a[of A B1 B2 S ps ps'] g' by blast
    then show ?thesis 
      using g' converse_rtranclp_into_rtranclp[of "(λx y. A B1 B2. binarizeNt A B1 B2 S x y)" ps ps' ps''] by blast
  qed blast
qed

theorem cnf_noe_nou:  fixes ps :: "('n::infinite,'t)prods"
  assumes "Eps_free (set ps)" and "Unit_free (set ps)"
  shows "ps'::('n,'t)prods. uniform (set ps')  binary (set ps')  lang ps S = lang ps' S  Eps_free (set ps')  Unit_free (set ps')"
proof -
  obtain ps' where g': "(λx y. A t. uniformize A t S x y)^** ps ps'  badTmsCount ps' = 0  Eps_free (set ps')  Unit_free (set ps')"
    using assms uniformize_2 uniformizeRtc_Eps_free uniformizeRtc_Unit_free by blast
  obtain ps'' where g'': "(λx y. A B1 B2. binarizeNt A B1 B2 S x y)^** ps' ps''  (badNtsCount ps'' = 0)  (badTmsCount ps'' = 0)"
    using g' binarizeNt_2 lemma15_a by blast
  hence "uniform (set ps'')  binary (set ps'')  Eps_free (set ps'')  Unit_free (set ps'')"
    using g' count_bin_un binarizeNtRtc_Eps_free binarizeNtRtc_Unit_free by fastforce
  moreover have "lang ps S = lang ps'' S"
    using g' g'' cnf_lemma by blast
  ultimately show ?thesis by blast
qed

text ‹Alternative form more similar to the one Jana Hofmann used:›

lemma CNF_eq: "CNF P  (uniform P  binary P  Eps_free P  Unit_free P)"
proof 
  assume "CNF P"
  hence "Eps_free P"
    unfolding CNF_def Eps_free_def by fastforce
  moreover have "Unit_free P"
    using CNF P unfolding CNF_def Unit_free_def isNt_def isTm_def by fastforce
  moreover have "uniform P"
  proof -
    have "(A,α)  P. (B C. α = [Nt B, Nt C])  (t. α = [Tm t])"
      using CNF P unfolding CNF_def.
    hence "(A, α)  P. (N  set α. isNt N)  (t. α = [Tm t])"
      unfolding isNt_def by fastforce
    hence "(A, α)  P. (t. Tm t  set α)  (t. α = [Tm t])"
      by (auto simp: isNt_def)
    thus "uniform P"
      unfolding uniform_def.
  qed
  moreover have "binary P"
    using CNF P unfolding binary_def CNF_def by auto
  ultimately show "uniform P  binary P  Eps_free P  Unit_free P"
    by blast
next 
  assume assm: "uniform P  binary P  Eps_free P  Unit_free P"
  have"p  P. (B C. (snd p) = [Nt B, Nt C])  (t. (snd p) = [Tm t])"
  proof
    fix p assume "p  P"
    obtain A α where : "(A, α) = p"
      by (metis prod.exhaust_sel)
    hence "length α = 1  length α = 2"
      using assm p  P order_neq_le_trans unfolding binary_def Eps_free_def by fastforce
    hence "(B C. (snd p) = [Nt B, Nt C])  (t. α = [Tm t])"
    proof 
      assume "length α = 1"
      hence "S. α = [S]"
        by (simp add: length_Suc_conv)
      moreover have "N. α = [Nt N]"
        using assm  p  P unfolding Unit_free_def by blast
      ultimately show ?thesis by (metis sym.exhaust)
    next
      assume "length α = 2"
      obtain B C where BC: "α = [B, C]"
        using length α = 2 by (metis One_nat_def Suc_1 diff_Suc_1 length_0_conv length_Cons neq_Nil_conv)
      have "(t. Tm t  set α)"
        using length α = 2 assm  p  P unfolding uniform_def by auto
      hence "(N  set α. A. N = Nt A)"
        by (metis sym.exhaust)
      hence "B' C'. B = Nt B'  C = Nt C'" 
        using BC by simp
      thus ?thesis using  BC by auto
    qed
    thus "(B C. (snd p) = [Nt B, Nt C])  (t. (snd p) = [Tm t])" using  by auto
  qed
  thus "CNF P" by (auto simp: CNF_def)
qed

text ‹Main Theorem: existence of CNF with the same language except for the empty word []›:›

theorem cnf_exists:
  fixes ps :: "('n::infinite,'t) prods"
  shows "ps'::('n,'t)prods. CNF(set ps')  lang ps' S = lang ps S - {[]}"
proof -
  obtain ps0 where ps0: "eps_elim_rel ps ps0"
    using eps_elim_rel_exists by blast
  obtain psu where psu: "unit_elim_rel ps0 psu"
    using unit_elim_rel_exists by blast
  hence 1: "Eps_free (set psu)  Unit_free (set psu)"
    using ps0 psu Eps_free_if_eps_elim_rel Unit_free_if_unit_elim_rel unit_elim_rel_Eps_free by fastforce
  have 2: "lang psu S = lang ps S - {[]}"
    using psu eps_elim_rel_lang_eq[OF ps0] unit_elim_rel_lang_eq[OF psu] by (simp add: eps_elim_rel_lang_eq)
  obtain ps'::"('n,'t)prods" where g': "uniform (set ps')  binary (set ps')  lang psu S = lang ps' S  Eps_free (set ps')  Unit_free (set ps')"
    using 1 cnf_noe_nou by blast
  hence "CNF (set ps')" 
    using g' CNF_eq by blast
  moreover have "lang ps' S = lang ps S - {[]}"
    using 2 g' by blast
  ultimately show ?thesis by blast
qed


text ‹Some helpful properties:›

lemma cnf_length_derive: 
  assumes "CNF P" "P  [Nt S] ⇒* α"
  shows "length α  1"
  using assms CNF_eq Eps_free_derives_Nil length_greater_0_conv less_eq_Suc_le by auto

lemma cnf_length_derive2: 
  assumes "CNF P" "P  [Nt A, Nt B] ⇒* α"
  shows "length α  2"
proof -
  obtain u v where uv: "P  [Nt A] ⇒* u  P  [Nt B] ⇒* v  α = u @ v"
    using assms(2) derives_append_decomp[of P [Nt A] [Nt B] α] by auto
  hence "length u  1  length v  1" 
    using cnf_length_derive[OF assms(1)] by blast
  thus ?thesis
    using uv by simp
qed

lemma cnf_single_derive:
  assumes "CNF P" "P  [Nt S] ⇒* [Tm t]"
  shows "(S, [Tm t])  P"
proof -
  obtain α where α: "P  [Nt S]  α  P  α ⇒* [Tm t]"
    using converse_rtranclpE[OF assms(2)] by auto
  hence 1: "(S, α)  P" 
    by (simp add: derive_singleton)
  have "A B. α = [Nt A, Nt B]"
  proof (rule ccontr)
    assume "¬ (A B. α = [Nt A, Nt B])"
    from this obtain A B where AB: "α = [Nt A, Nt B]"
      by blast
    have "w. P  [Nt A, Nt B] ⇒* w  length w  2"
      using cnf_length_derive2[OF assms(1)] by simp
    moreover have "length [Tm t] = 1"
      by simp
    ultimately show False
      using α AB by auto
  qed
  from this obtain a where "α = [Tm a]"
    using 1 assms(1) unfolding CNF_def by auto
  hence "t = a"
    using α by (simp add: derives_Tm_Cons)
  thus ?thesis 
    using 1 α = [Tm a] by blast
qed

lemma cnf_word:
  assumes "CNF P" "P  [Nt S] ⇒* map Tm w"
    and "length w  2"
  shows "A B u v. (S, [Nt A, Nt B])  P  P  [Nt A] ⇒* map Tm u  P  [Nt B] ⇒* map Tm v  u@v = w  u  []  v  []"
proof -
  have 1: "(S, map Tm w)  P"
    using assms(1) assms(3) unfolding CNF_def by auto
  have "α. P  [Nt S]  α  P  α ⇒* map Tm w"
    using converse_rtranclpE[OF assms(2)] by auto
  from this obtain α where α: "(S, α)  P  P  α ⇒* map Tm w"
    by (auto simp: derive_singleton)
  hence "(t. α = [Tm t])"
    using 1 derives_Tm_Cons[of P] derives_from_empty by auto
  hence "A B. P  [Nt S]  [Nt A, Nt B]  P  [Nt A, Nt B] ⇒* map Tm w"
    using assms(1) α derive_singleton[of P Nt S α] unfolding CNF_def by fast
  from this obtain A B where AB: "(S, [Nt A, Nt B])  P  P  [Nt A, Nt B] ⇒* map Tm w"
    using derive_singleton[of P Nt S] by blast
  hence "¬(P  [Nt A] ⇒* [])  ¬(P  [Nt B] ⇒* [])"
    using assms(1) CNF_eq Eps_free_derives_Nil by blast
  from this obtain u v where uv: "P  [Nt A] ⇒* u  P  [Nt B] ⇒* v  u@v = map Tm w  u  []  v  []"
    using AB derives_append_decomp[of P [Nt A] [Nt B] map Tm w] by force
  moreover have "u' v'. u = map Tm u'  v = map Tm v'"
    using uv map_eq_append_conv[of Tm w u v] by auto
  ultimately show ?thesis
    using AB append_eq_map_conv[of u v Tm w] list.simps(8)[of Tm] by fastforce
qed

end