Theory Epsilon_Elimination

(*
Author: August Martin Stimpfle
Based on HOL4 theories by Aditi Barthwal
*)

section ‹Elimination of Epsilon Productions›

theory Epsilon_Elimination
imports Context_Free_Grammar
begin

inductive nullable :: "('n,'t) prods  ('n,'t) sym  bool"
for ps where
  NullableSym:
  " (A, w)  set ps; s  set w. nullable ps s
   nullable ps (Nt A)"

abbreviation "nullables ps w  (s  set w. nullable ps s)"

lemma nullables_if: 
  assumes "set ps  u ⇒* v" 
    and "u=[a]" "nullables ps v"
  shows "nullables ps u"
  using assms
proof(induction arbitrary: a rule: rtranclp.induct)
  case (rtrancl_refl a)
  then show ?case by simp
next
  case (rtrancl_into_rtrancl u v w)
  from set ps  v  w obtain A α l r where : "v = l @ [Nt A] @ r  w = l @ α @ r  (A, α)  set ps"
    by (auto simp: derive.simps)
  from this nullables ps w have "nullables ps α  nullables ps l  nullables ps r"
    by simp
  hence "nullables ps [Nt A]"
    using  nullable.simps by auto
  from this nullables ps α  nullables ps l  nullables ps r have "nullables ps v"
    using  by auto
  thus ?case
    using rtrancl_into_rtrancl.IH rtrancl_into_rtrancl.prems(1) by blast
qed

lemma nullable_if: "set ps  [a] ⇒* []  nullable ps a"
  using nullables_if[of ps "[a]" "[]" a] by simp

lemma nullable_aux: "sset gamma. nullable ps s  set ps  [s] ⇒* []  set ps  gamma ⇒* []"
proof (induction gamma)
  case (Cons a list)
  hence "set ps  list ⇒* []"
    by simp
  moreover have "set ps  [a] ⇒* []"
    using Cons by simp
  ultimately show ?case 
    using derives_Cons[of set ps list [] a] by simp
qed simp

lemma if_nullable: "nullable ps a  set ps  [a] ⇒* []"
proof (induction rule: nullable.induct)
  case (NullableSym x gamma)
    hence "set ps  [Nt x] ⇒* gamma" 
      using derive_singleton by blast
    also have "set ps  gamma ⇒* []" 
      using NullableSym nullable_aux by blast
  finally show ?case .
qed

corollary nullable_iff: "nullable ps a  set ps  [a] ⇒* []"
  by (auto simp: nullable_if if_nullable)

fun eps_closure :: "('n, 't) prods  ('n, 't) syms  ('n, 't) syms list" where
  "eps_closure ps [] = [[]]" |
  "eps_closure ps (s#sl) = (
    if nullable ps s then (map ((#) s) (eps_closure ps sl)) @ eps_closure ps sl 
    else map ((#) s) (eps_closure ps sl))"

definition eps_elim :: "('n, 't) prods  ('n, 't) Prods" where
"eps_elim ps  {(l,r'). r. (l,r)  set ps  r'  set (eps_closure ps r)  (r'  [])}"

definition eps_elim_rel :: "('n,'t) prods  ('n,'t) prods  bool" where 
  "eps_elim_rel ps ps' set ps'= eps_elim ps"

lemma Eps_free_if_eps_elim_rel: "eps_elim_rel ps ps'  Eps_free (set ps')"
unfolding eps_elim_rel_def eps_elim_def Eps_free_def by blast

(* auxiliary function to prove finiteness *)
definition eps_elim_fun :: "('n, 't) prods  ('n, 't) prod  ('n, 't) Prods" where 
  "eps_elim_fun ps p = {(l',r'). l' = fst p  r'  set (eps_closure ps (snd p))  (r'  [])}"

lemma eps_elim_fun_eq: "eps_elim ps = ((eps_elim_fun ps) ` set ps)"
proof 
  show "eps_elim ps  ( (eps_elim_fun ps ` set ps))" 
   unfolding eps_elim_def eps_elim_fun_def by auto
next
  show "((eps_elim_fun ps) ` set ps)  eps_elim ps"
  proof
    fix x
    assume "x  ((eps_elim_fun ps) ` set ps)"
    obtain l r' where "x = (l,r')" by fastforce
    hence "(l,r')  ((eps_elim_fun ps) ` set ps)" 
      using x  ((eps_elim_fun ps) ` set ps) by simp
    hence 1: "r. r'  set (eps_closure ps r)  (r'  [])  (l,r)  set ps" 
      using eps_elim_fun_def by fastforce
    from this  obtain r where "r'  set (eps_closure ps r)  (l,r)  set ps" 
      by blast
    thus "x  eps_elim ps" unfolding eps_elim_fun_def eps_elim_def
      using 1 x = (l, r') by blast 
  qed
qed

lemma finite_eps_elim: "finite (eps_elim ps)" 
proof -
  have "p  set ps. finite (eps_elim_fun ps p)"
    unfolding eps_elim_fun_def by auto
  hence "finite (((eps_elim_fun ps) ` set ps))"
    using finite_UN by simp
  thus ?thesis using eps_elim_fun_eq by metis
qed

lemma eps_elim_rel_exists: "ps. ps'. eps_elim_rel ps ps'"
  unfolding eps_elim_rel_def by (simp add: finite_list finite_eps_elim)

lemma eps_closure_nullable:  "[]  set (eps_closure ps w)  nullables ps w"
proof (induction w)
  case Nil
  then show ?case by simp
next
  case (Cons a r)
  hence "nullable ps a"
    using image_iff[of [] eps_closure ps {a#r}] by auto
  then show ?case 
    using Cons Un_iff by auto
qed

lemma eps_elim_rel_1: "r'  set (eps_closure ps r)  set ps  r ⇒* r'"
proof (induction r arbitrary: r')
  case (Cons a r)
  then show ?case 
  proof (cases "nullable ps a")
    case True
    obtain e where e: "e  set (eps_closure ps r)  (r' = (a#e)  r' = e)"
      using Cons.prems True by auto
    hence 1: "set ps  r ⇒* e" 
      using Cons.IH by blast
    hence 2: "set ps  [a]@r ⇒* [a]@e" 
      using e derives_prepend by blast
    have "set ps  [a] ⇒* []" 
      using True if_nullable by blast
    hence "set ps  [a]@r ⇒* r" 
      using derives_append by fastforce
    thus ?thesis 
      using 1 2 e by force 
  next
    case False
    obtain e where e: "e  set (eps_closure ps r)  (r' = (a#e))"
      using Cons.prems False by auto
    hence "set ps  r ⇒* e" 
      using Cons.IH by simp
    hence "set ps  [a]@r ⇒* [a]@e" 
      using derives_prepend by blast
    thus ?thesis
      using e by simp
  qed
qed simp

lemma eps_elim_rel_r2: 
  assumes "set ps'  u  v" and "eps_elim_rel ps ps'" 
  shows "set ps  u ⇒* v"
  using assms 
proof -
  obtain A α x y where A: "(A, α)  set ps' u = x @ [Nt A] @ y  v = x @ α @ y"
    using assms derive.cases by meson
  hence 1: "(A, α)  {(l,r'). r. (l,r)  set ps  r'  set (eps_closure ps r)  (r'  [])}"
    using assms(2) unfolding eps_elim_rel_def eps_elim_def by simp
  obtain r where r: "(A, r)  set ps  α  set (eps_closure ps r)"
    using 1 by blast
  hence "set ps  r ⇒* α" 
    using eps_elim_rel_1 by blast
  hence 2: "set ps  x @ r @ y ⇒* x @ α @ y"
    using r derives_prepend derives_append by blast
  hence "set ps  x @ [Nt A] @ y  x @ r @ y" 
    using r derive.simps by fast
  thus ?thesis 
    using 2 by (simp add: A)
qed

lemma eps_elim_rel_r3:
  assumes "set ps'  u ⇒* v" and "eps_elim_rel ps ps'" 
  shows "set ps  u ⇒* v"
    using assms by (induction v rule: rtranclp_induct) (auto simp: eps_elim_rel_r2 rtranclp_trans)

lemma eps_elim_rel_r5: "r  set (eps_closure ps r)" 
  by (induction r) auto

lemma eps_elim_rel_r4:
  assumes "(l,r)  set ps"
    and "eps_elim_rel ps ps'"
    and "(r'  [])" 
    and "r'  set (eps_closure ps r)"
  shows "(l,r')  set ps'"
  using assms unfolding eps_elim_rel_def eps_elim_def by blast

lemma eps_elim_rel_r7: 
  assumes "eps_elim_rel ps ps'"
    and "set ps  [Nt A]  v"
    and "v'  set (eps_closure ps v)  (v'  [])"
  shows "set ps'  [Nt A]  v'"
proof -
  have "(A,v)  set ps" 
    using assms(2) by (simp add: derive_singleton)
  hence "(A,v')  set ps'" 
    using assms eps_elim_rel_r4 conjE by fastforce
  thus ?thesis 
    using derive_singleton by fast
qed

lemma eps_elim_rel_r12a: 
  assumes "x'  set (eps_closure ps x)"
    and "y'  set (eps_closure ps y)"
  shows "(x'@y')  set (eps_closure ps (x@y))"
  using assms by (induction x arbitrary: x' y y' rule: eps_closure.induct) auto

lemma eps_elim_rel_r12b:
  assumes "x'  set (eps_closure ps x)"
    and "y'  set (eps_closure ps y)"
    and "z'  set (eps_closure ps z)"
  shows "(x'@y'@z')  set (eps_closure ps (x@y@z))"
  using assms 
  by (induction x arbitrary: x' y y' z z' rule: eps_closure.induct) (auto simp: eps_elim_rel_r12a)

lemma eps_elim_rel_r14:
  assumes "r'  set (eps_closure ps (x@y))"
  shows "x' y'. (r'=x'@y')  x'  set (eps_closure ps x)  y'  set (eps_closure ps y)"
  using assms
proof (induction x arbitrary: y r' rule: eps_closure.induct)
  case (2 ps s sl)
  then show ?case
  proof -
    have "x' y'. s # x = x' @ y'  (x'  (#) s ` set (eps_closure ps sl)  x'  set (eps_closure ps sl))  y'  set (eps_closure ps y)"
      if "r' y. r'  set (eps_closure ps (sl @ y))  x' y'. r' = x' @ y'  x'  set (eps_closure ps sl)  y'  set (eps_closure ps y)"
        and "nullable ps s"
        and "x  set (eps_closure ps (sl @ y))"
        and "r' = s # x"
      for x :: "('a, 'b) sym list"
      using that by (metis append_Cons imageI)
    moreover have "x' y'. r' = x' @ y'  (x'  (#) s ` set (eps_closure ps sl)  x'  set (eps_closure ps sl))  y'  set (eps_closure ps y)"
      if "r' y. r'  set (eps_closure ps (sl @ y))  x' y'. r' = x' @ y'  x'  set (eps_closure ps sl)  y'  set (eps_closure ps y)"
        and "nullable ps s"
        and "r'  set (eps_closure ps (sl @ y))"
      using that by metis
    moreover have "x' y'. s # x = x' @ y'  x'  (#) s ` set (eps_closure ps sl)  y'  set (eps_closure ps y)"
      if "r' y. r'  set (eps_closure ps (sl @ y))  x' y'. r' = x' @ y'  x'  set (eps_closure ps sl)  y'  set (eps_closure ps y)"
        and "¬ nullable ps s"
        and "x  set (eps_closure ps (sl @ y))"
        and "r' = s # x"
      for x :: "('a, 'b) sym list"
      using that by (metis append_Cons imageI)
    ultimately show ?thesis
      using 2 by auto
  qed
qed simp

lemma eps_elim_rel_r15:
  assumes "set ps  [Nt S] ⇒* u"
    and "eps_elim_rel ps ps'"
    and "v  set (eps_closure ps u)  (v  [])"
  shows "set ps'  [Nt S] ⇒* v"
  using assms
proof (induction u arbitrary: v rule: derives_induct)
  case base
  then show ?case
    by (cases "nullable ps (Nt S)") auto
next
  case (step x A y w)
  then obtain x' w' y' where 
    v: "(v = (x'@w'@y'))  x'  set (eps_closure ps x)  w'  set (eps_closure ps w)  y'  set (eps_closure ps y)"
    using step eps_elim_rel_r14 by metis
  then show ?case
  proof (cases "w' = []")
    case True
      hence "v = x'@y'" 
        using v by simp 
      have "[]  set (eps_closure ps w)"
        using True v by simp
      hence "nullables ps w"
        using eps_closure_nullable by blast
      hence "[]  set (eps_closure ps [Nt A])" 
        using step(2) NullableSym by fastforce
      hence "(x'@y')  set (eps_closure ps (x@[Nt A]@y))"
        using eps_elim_rel_r12b[of x' ps x [] [Nt A] y' y] v by simp
      then show ?thesis 
        using v = x' @ y' step by blast
  next
    case False
      have "(x'@[Nt A]@y')  set (eps_closure ps (x@[Nt A]@y)) "
        using eps_elim_rel_r12b[of x' ps x [Nt A] [Nt A] y' y] eps_elim_rel_r5[of [Nt A] ps] v by blast
      hence 1: "set ps'  [Nt S] ⇒* (x'@[Nt A]@y')" 
        using step by blast
      have "set ps  [Nt A]  w" 
        using step(2) derive_singleton by blast
      hence "set ps'  [Nt A]  w'"
        using eps_elim_rel_r7[of ps ps' A w w'] False step v by blast
      hence "set ps'  (x'@[Nt A]@y')  (x'@w'@y')" 
        using derive_append derive_prepend by blast
      thus ?thesis using 1
      by (simp add: v step.prems(2))
  qed
qed

theorem eps_elim_rel_eq_if_noe:
  assumes "eps_elim_rel ps ps'"
    and "[]  lang ps S"
  shows "lang ps S = lang ps' S"
proof 
  show "lang ps S  lang ps' S"
  proof 
    fix x
    assume "x  lang ps S"
    have "x. set ps  [Nt S] ⇒* x  x  []"
      using assms Lang_def by fastforce
    hence "(map Tm x)  set (eps_closure ps (map Tm x))" 
      using eps_elim_rel_r5 by auto
    hence "set ps'  [Nt S] ⇒* (map Tm x)"
      using assms x  lang ps S Lang_def eps_elim_rel_r15[of ps S map Tm x] by fast
    thus "x  lang ps' S"
      using Lang_def x  lang ps S by fast 
  qed
next
  show "lang ps' S  lang ps S"
  proof
    fix x'
    assume "x'  lang ps' S"
    show "x'  lang ps S" 
      using assms Lang_def x'  lang ps' S eps_elim_rel_r3[of ps' [Nt S] map Tm x' ps] by fast
  qed
qed

(* correctness *)
lemma noe_lang_eps_elim_rel_aux: 
  assumes "ps  [Nt S] ⇒* w" "w = []"  
  shows "A. ps  [Nt S] ⇒* [Nt A]  (A, w)  ps"
  using assms by (induction w rule: rtranclp_induct) (auto simp: derive.simps)

lemma noe_lang_eps_elim_rel: "eps_elim_rel ps ps'  []  lang ps' S"
proof (rule ccontr)
  assume "eps_elim_rel ps ps'" "¬([]  lang ps' S)"
  hence "set ps'  [Nt S] ⇒* map Tm []"
    using Lang_def by fast
  hence "set ps'  [Nt S] ⇒* []"
    by simp
  hence "A. set ps'  [Nt S] ⇒* [Nt A]  (A, [])  set ps'"
    using noe_lang_eps_elim_rel_aux[of set ps'] by blast
  thus False 
    using eps_elim_rel ps ps' unfolding eps_elim_rel_def eps_elim_def by blast
qed

theorem eps_elim_rel_lang_eq: "eps_elim_rel ps ps' lang ps' S = lang ps S - {[]}"
proof 
  assume "eps_elim_rel ps ps'"
  show "lang ps' S  lang ps S - {[]}"
  proof 
    fix w
    assume "w  lang ps' S"
    hence "w  lang ps' S - {[]}"
      using noe_lang_eps_elim_rel[of ps] eps_elim_rel ps ps' by simp
    thus "w  lang ps S - {[]}"
      using eps_elim_rel ps ps' by (auto simp: Lang_def eps_elim_rel_r3)
  qed
next
  assume "eps_elim_rel ps ps'"
  show "lang ps S - {[]}  lang ps' S"
  proof 
    fix w
    assume "w  lang ps S - {[]}"
    hence 1: "(map Tm w)  []" 
      by simp
    have 2: "set ps  [Nt S] ⇒* (map Tm w)"
      using w  lang ps S - {[]} Lang_def by fast
    have "(map Tm w)  set (eps_closure ps (map Tm w)) "
      using w  lang ps S - {[]} eps_elim_rel_r5 by blast
    hence "set ps'  [Nt S] ⇒* (map Tm w)"
      using 1 2 eps_elim_rel_r15[of ps] eps_elim_rel ps ps' by simp
    thus "w  lang ps' S"
      by (simp add: Lang_def)
  qed
qed

end