Theory Unit_Elimination

(*
Author: August Martin Stimpfle
Based on HOL4 theories by Aditi Barthwal
*)

section ‹Elimination of Unit Productions›

theory Unit_Elimination
imports Context_Free_Grammar
begin

(* Rules of the form A→B, where A and B are in nonterminals ps *)
definition unit_prods :: "('n,'t) prods  ('n,'t) Prods" where
"unit_prods ps = {(l,r)  set ps. A. r = [Nt A]}"

(* A ⇒* B where A and B are in nonTerminals g *)
definition unit_rtc :: "('n, 't) Prods  ('n × 'n) set" where
"unit_rtc Ps = {(A,B). Ps  [Nt A] ⇒* [Nt B]  {A,B}  Nts Ps}"

definition unit_rm :: "('n, 't) prods  ('n, 't) Prods" where
"unit_rm ps = (set ps - unit_prods ps)"

definition new_prods :: "('n, 't) prods  ('n, 't) Prods" where 
"new_prods ps = {(A,r). B. (B,r)  (unit_rm ps)  (A, B)  unit_rtc (unit_prods ps)}"

definition unit_elim_rel :: "('n, 't) prods  ('n, 't) prods  bool" where
"unit_elim_rel ps ps'  set ps' = (unit_rm ps  new_prods ps)"

definition Unit_free :: "('n, 't) Prods  bool" where
"Unit_free P = (A B. (A,[Nt B])  P)"

lemma Unit_free_if_unit_elim_rel: "unit_elim_rel ps ps'  Unit_free (set ps')" 
unfolding unit_elim_rel_def unit_rm_def new_prods_def unit_prods_def Unit_free_def by simp

lemma unit_elim_rel_Eps_free:
  assumes "Eps_free (set ps)" and "unit_elim_rel ps ps'"
  shows "Eps_free (set ps')"
  using assms 
  unfolding unit_elim_rel_def Eps_free_def unit_rm_def unit_prods_def new_prods_def by auto

(* Finiteness & Existence *)

(* finiteness unit_prods which also implies finiteness for unit_rm *)
fun uprods :: "('n,'t) prods  ('n,'t) prods" where
"uprods [] = []" |
"uprods (p#ps) = (if A. (snd p) = [Nt A] then p#uprods ps else uprods ps)"

lemma unit_prods_uprods: "set (uprods ps) = unit_prods ps"
unfolding unit_prods_def by (induction ps) auto

lemma finiteunit_prods: "finite (unit_prods ps)"
using unit_prods_uprods by (metis List.finite_set)

(* finiteness for unit_rtc *)
definition NtsCross :: "('n, 't) Prods   ('n × 'n) set" where
"NtsCross Ps = {(A, B). A  Nts Ps  B  Nts Ps }"

lemma finite_unit_rtc: 
  assumes "finite ps" 
  shows  "finite (unit_rtc ps)"
proof -
  have "finite (Nts ps)"
    unfolding Nts_def using assms finite_nts_syms by auto
  hence "finite (NtsCross ps)"
    unfolding NtsCross_def by auto
  moreover have "unit_rtc ps  NtsCross ps"
    unfolding unit_rtc_def NtsCross_def by blast
  ultimately show ?thesis
    using assms infinite_super by fastforce 
qed

(* finiteness for new_prods *)
definition nPSlambda :: "('n, 't) Prods  ('n × 'n)  ('n, 't) Prods" where
"nPSlambda Ps d = {fst d} × {r. (snd d, r)  Ps}"

lemma npsImage: "((nPSlambda (unit_rm ps)) ` (unit_rtc (unit_prods ps))) = new_prods ps"
  unfolding new_prods_def nPSlambda_def by fastforce

lemma finite_nPSlambda:
  assumes "finite Ps" 
  shows "finite (nPSlambda Ps d)"
proof -
  have "{(B, r). (B, r)  Ps  B = snd d}  Ps" 
    by blast
  hence "finite {(B, r). (B, r)  Ps  B = snd d}"
    using assms finite_subset by blast
  hence "finite (snd ` {(B, r). (B, r)  Ps  B = snd d})" 
    by simp
  moreover have "{r. (snd d, r)  Ps} = (snd ` {(B, r). (B, r)  Ps  B = snd d})"
    by force
  ultimately show ?thesis
    using assms unfolding nPSlambda_def by simp
qed

lemma finite_new_prods: "finite (new_prods ps)"
proof -
  have "finite (unit_rm ps)"
    unfolding unit_rm_def using finiteunit_prods by blast
  moreover have "finite (unit_rtc (unit_prods ps))"
    using finiteunit_prods finite_unit_rtc by blast
  ultimately show ?thesis
    using npsImage finite_nPSlambda finite_UN by metis
qed

lemma finiteunit_elim_relRules: "finite (unit_rm ps  new_prods ps)"
proof -
  have "finite (unit_rm ps)"
    unfolding unit_rm_def using finiteunit_prods by blast
  moreover have "finite (new_prods ps)"
    using finite_new_prods by blast
  ultimately show ?thesis by blast
qed

lemma unit_elim_rel_exists: "ps. ps'. unit_elim_rel ps ps'"
unfolding unit_elim_rel_def using finite_list[OF finiteunit_elim_relRules] by blast

definition unit_elim where
"unit_elim ps = (SOME ps'. unit_elim_rel ps ps')"

lemma unit_elim_rel_unit_elim: "unit_elim_rel ps (unit_elim ps)"
by (simp add: someI_ex unit_elim_def unit_elim_rel_exists)

(* towards theorem 4.4 *)

lemma inNonUnitProds:
  "p  unit_rm ps  p  set ps"
  unfolding unit_rm_def by blast

lemma psubDeriv:
  assumes "ps  u  v"
    and "p  ps. p  ps'"
  shows "ps'  u  v"
  using assms by (meson derive_iff)

lemma psubRtcDeriv:
  assumes "ps  u ⇒* v"
    and "p  ps. p  ps'"
  shows "ps'  u ⇒* v"
  using assms by (induction rule: rtranclp.induct) (auto simp: psubDeriv rtranclp.rtrancl_into_rtrancl)

lemma unit_prods_deriv: 
  assumes "unit_prods ps  u ⇒* v"
  shows "set ps  u ⇒* v"
proof -
  have "p  unit_prods ps. p  set ps"
    unfolding unit_prods_def by blast
  thus ?thesis 
    using assms psubRtcDeriv by blast
qed

lemma unit_elim_rel_r3:
  assumes "unit_elim_rel ps ps'" and "set ps'  u  v"
  shows "set ps  u ⇒* v"
proof -
  obtain A α r1 r2 where A: "(A, α)  set ps'  u = r1 @ [Nt A] @ r2  v = r1 @ α @ r2"
    using assms derive.cases by meson
  hence "(A, α)  unit_rm ps  (A, α)  new_prods ps"
    using assms(1) unfolding unit_elim_rel_def by simp
  thus ?thesis
  proof
    assume "(A, α)  unit_rm ps"
    hence "(A, α)  set ps"
      using inNonUnitProds by blast
    hence "set ps  r1 @ [Nt A] @ r2  r1 @ α @ r2"
      by (auto simp: derive.simps)
    thus ?thesis using A by simp
  next 
    assume "(A, α)  new_prods ps"
    from this obtain B where B: "(B, α)  unit_rm ps  (A, B)  unit_rtc (unit_prods ps)"
      unfolding new_prods_def by blast
    hence "unit_prods ps  [Nt A] ⇒* [Nt B]"
      unfolding unit_rtc_def by simp
    hence "set ps  [Nt A] ⇒* [Nt B]"
      using unit_prods_deriv by blast
    hence 1: "set ps  r1 @ [Nt A] @ r2 ⇒* r1 @ [Nt B] @ r2"
      using derives_append derives_prepend by blast
    have "(B, α)  set ps"
      using B inNonUnitProds by blast
    hence "set ps  r1 @ [Nt B] @ r2  r1 @ α @ r2"
      by (auto simp: derive.simps)
    thus ?thesis 
      using 1 A by simp
  qed
qed

lemma unit_elim_rel_r4: 
  assumes "set ps'  u ⇒* v"
    and "unit_elim_rel ps ps'"
  shows "set ps  u ⇒* v"
  using assms by (induction rule: rtranclp.induct) (auto simp: unit_elim_rel_r3 rtranclp_trans)

lemma deriv_unit_rtc:
  assumes "set ps  [Nt A]  [Nt B]"
  shows "(A, B)  unit_rtc (unit_prods ps)"
proof -
  have "(A, [Nt B])  set ps"
    using assms by (simp add: derive_singleton)
  hence "(A, [Nt B])  unit_prods ps"
    unfolding unit_prods_def by blast
  hence "unit_prods ps  [Nt A]  [Nt B]"
    by (simp add: derive_singleton)
  moreover have "B  Nts (unit_prods ps)  A  Nts (unit_prods ps)"
    using (A, [Nt B])  unit_prods ps Nts_def nts_syms_def by fastforce
  ultimately show ?thesis
    unfolding unit_rtc_def by blast
qed

lemma unit_elim_rel_r12: 
  assumes "unit_elim_rel ps ps'" "(A, α)  set ps'"
  shows "(A, α)  unit_prods ps"
  using assms unfolding unit_elim_rel_def unit_rm_def unit_prods_def new_prods_def by blast

lemma unit_elim_rel_r14: 
  assumes "unit_elim_rel ps ps'" 
    and "set ps  [Nt A]  [Nt B]" "set ps'  [Nt B]  v"
  shows "set ps'  [Nt A]  v"
proof -
  have 1: "(A, B)  unit_rtc (unit_prods ps)"
    using deriv_unit_rtc assms(2) by fast
  have 2: "(B, v)  set ps'"
    using assms(3) by (simp add: derive_singleton)
  thus ?thesis
  proof (cases "(B, v)  set ps")
    case True
    hence "(B, v)  unit_rm ps"
      unfolding unit_rm_def using assms(1) assms(3) unit_elim_rel_r12[of ps ps' B v] by (simp add: derive_singleton)
    then show ?thesis
    using 1 assms(1) unfolding unit_elim_rel_def new_prods_def derive_singleton by blast
  next
    case False
    hence "(B, v)  new_prods ps"
      using assms(1) 2 unfolding unit_rm_def unit_elim_rel_def  by simp
    from this obtain C where C: "(C, v)  unit_rm ps  (B, C)  unit_rtc (unit_prods ps)"
      unfolding new_prods_def by blast
    hence "unit_prods ps  [Nt A] ⇒* [Nt C]"
      using 1 unfolding unit_rtc_def by auto
    hence "(A, C)  unit_rtc (unit_prods ps)"
      unfolding unit_rtc_def using 1 C unit_rtc_def by fastforce
    hence "(A, v)  new_prods ps"
      unfolding new_prods_def using C by blast
    hence "(A, v)  set ps'"
      using assms(1) unfolding unit_elim_rel_def  by blast
    thus ?thesis by (simp add: derive_singleton)
  qed
qed

lemma unit_elim_rel_r20_aux:
  assumes "set ps  l @ [Nt A] @ r ⇒* map Tm v" 
  shows "α. set ps  l @ [Nt A] @ r  l @ α @ r  set ps  l @ α @ r ⇒* map Tm v  (A, α)  set ps"
proof -
  obtain l' w r' where w: "set ps  l ⇒* l'   set ps  [Nt A] ⇒* w   set ps  r ⇒* r'  map Tm v = l' @ w @ r'"
    using assms(1) by (metis derives_append_decomp)
  have "Nt A  set (map Tm v)" 
    using assms(1) by auto
  hence "[Nt A]  w" 
    using w by auto
  from this obtain α where α: "set ps  [Nt A]  α  set ps  α ⇒* w"
    by (metis w converse_rtranclpE)
  hence "(A, α)  set ps" 
    by (simp add: derive_singleton)
  thus ?thesis by (metis α w derive.intros derives_append_decomp) 
qed

lemma unit_elim_rel_r20: 
  assumes "set ps  u ⇒* map Tm v" "unit_elim_rel ps ps'"
  shows "set ps'  u ⇒* map Tm v"
  using assms proof (induction rule: converse_derives_induct)
  case base
  then show ?case by blast
next
  case (step l A r w)
  then show ?case 
  proof (cases "(A, w)  unit_prods ps")
    case True
    from this obtain B where "w = [Nt B]"
      unfolding unit_prods_def by blast
    have "set ps'  l @ w @ r ⇒* map Tm v  Nt B  set (map Tm v)"
      using step.IH assms(2) by auto
    obtain α where α: "set ps'  l @ [Nt B] @ r  l @ α @ r  set ps'  l @ α @ r ⇒* map Tm v  (B, α)  set ps'"
      using assms(2) step.IH w=_  unit_elim_rel_r20_aux[of ps' l B r v] by blast
    hence "(A, α)  set ps'"
      using assms(2) step.hyps(2) w=_ unit_elim_rel_r14[of ps ps' A B α] by (simp add: derive_singleton)
    hence "set ps'  l @ [Nt A] @ r ⇒* l @ α @ r"
      using derive.simps by fastforce
    then show ?thesis 
      using α by auto
  next
    case False
    hence "(A, w)  unit_rm ps"
      unfolding unit_rm_def using step.hyps(2) by blast
    hence "(A, w)  set ps'"
      using assms(2) unfolding unit_elim_rel_def  by simp
    hence "set ps'  l @ [Nt A] @ r  l @ w @ r"
      by (auto simp: derive.simps)
    then show ?thesis
      using step by simp
  qed
qed

theorem unit_elim_rel_lang_eq: "unit_elim_rel ps ps'  lang ps' S = lang ps S"
  unfolding Lang_def using unit_elim_rel_r4 unit_elim_rel_r20 by blast

corollary lang_unit_elim: "lang (unit_elim ps) A = lang ps A"
by (rule unit_elim_rel_lang_eq[OF unit_elim_rel_unit_elim])

end