Theory CFG_To_PDA

section ‹Equivalence of CFG and PDA›

subsection ‹CFG to PDA›

text ‹Starting from a CFG, we construct an equivalent single-state PDA.
The formalization is based on the Lean formalization by Leichtfried\cite{lean}.›

theory CFG_To_PDA
imports
  Pushdown_Automata
  Context_Free_Grammar.Context_Free_Grammar
begin

datatype sing_st = Q_loop

instance sing_st :: finite
proof
  have *: "UNIV = {Q_loop}"
    by (auto intro: sing_st.exhaust)
  show "finite (UNIV :: sing_st set)"
    by (simp add: *)
qed

instance sym :: (finite, finite) finite
proof
  have *: "UNIV = {t. s. t = Nt s}  {t. s. t = Tm s}"
    by (auto intro: sym.exhaust)
  show "finite (UNIV :: ('a, 'b) sym set)"
    by (simp add: * full_SetCompr_eq)
qed

locale cfg_to_pda =
  fixes G :: "('n :: finite, 't :: finite) Cfg"
  assumes finite_G: "finite (Prods G)"
begin

fun pda_of_cfg :: "sing_st  't  ('n,'t) sym  (sing_st × ('n,'t) syms) set" where
  "pda_of_cfg Q_loop a (Tm b) = (if a = b then {(Q_loop, [])} else {})"
| "pda_of_cfg _ _ _ = {}"

fun pda_eps_of_cfg :: "sing_st  ('n,'t) sym  (sing_st × ('n,'t) syms) set" where
  "pda_eps_of_cfg Q_loop (Nt A) = {(Q_loop, α)| α. (A, α)  Prods G}"
| "pda_eps_of_cfg _ _ = {}"

definition cfg_to_pda_pda :: "(sing_st, 't, ('n,'t) sym) pda" where
  "cfg_to_pda_pda   init_state = Q_loop, init_symbol = Nt (Start G), final_states = {}, 
                    delta = pda_of_cfg, delta_eps = pda_eps_of_cfg "

lemma pda_cfg_to_pda: "pda cfg_to_pda_pda"
proof (standard, goal_cases)
  case (1 p x z)
  have "finite (pda_of_cfg p x z)"
    by (induction p x z rule: pda_of_cfg.induct) auto
  then show ?case
    by (simp add: cfg_to_pda_pda_def)
next
  case (2 p z)
  let ?h = "λ(A,α). (Q_loop, α)"
  have *: "A. {(Q_loop, α) |α. (A, α)  Prods G}  (?h ` Prods G)" by auto
  have **: "finite (?h ` Prods G)"
    by (simp add: finite_G)
  have "A. finite {(Q_loop, α) |α. (A, α)  Prods G}"
    using finite_subset[OF * **] by simp
  hence "finite (pda_eps_of_cfg p z)"
    by (induction p z rule: pda_eps_of_cfg.induct) auto
  then show ?case
    by (simp add: cfg_to_pda_pda_def)
qed

lemma cfg_to_pda_cons_tm:
  "pda.step1 cfg_to_pda_pda (Q_loop, a#w, Tm a#γ) (Q_loop, w, γ)"
using pda.step1_rule[OF pda_cfg_to_pda] by (simp add: cfg_to_pda_pda_def)

lemma cfg_to_pda_cons_nt:
  assumes "(A, α)  Prods G"
  shows "pda.step1 cfg_to_pda_pda (Q_loop, w, Nt A#γ) (Q_loop, w, α@γ)"
using assms pda.step1_rule[OF pda_cfg_to_pda] by (simp add: cfg_to_pda_pda_def)

lemma cfg_to_pda_cons_tms:
  "pda.steps cfg_to_pda_pda (Q_loop, w@w', map Tm w @ γ) (Q_loop, w', γ)"
proof (induction w)
  case (Cons a w)
  have "pda.step1 cfg_to_pda_pda (Q_loop, (a # w) @ w', map Tm (a # w) @ γ) (Q_loop, w @ w', map Tm w @ γ)"
    using cfg_to_pda_cons_tm by simp
  with Cons.IH show ?case
    using pda.step1_steps[OF pda_cfg_to_pda] pda.steps_trans[OF pda_cfg_to_pda] by blast
qed (simp add: pda.steps_refl[OF pda_cfg_to_pda])

lemma cfg_to_pda_nt_cons:
  assumes "pda.step1 cfg_to_pda_pda (Q_loop, w, Nt A#γ) (Q_loop, w', β)"
  shows "α. (A, α)  Prods G  β = α @ γ  w' = w"
proof -
  from assms have "(β0. w' = w  β = β0@γ  (Q_loop, β0)  pda_eps_of_cfg Q_loop (Nt A)) 
                      (a β0. w = a # w'  β = β0@γ  (Q_loop,β0)  pda_of_cfg Q_loop a (Nt A))"
    using pda.step1_rule[OF pda_cfg_to_pda] by (simp add: cfg_to_pda_pda_def)
  thus ?thesis
    by (induction Q_loop "Nt A :: ('n, 't) sym" rule: pda_eps_of_cfg.induct) auto
qed

lemma cfg_to_pda_tm_stack_cons:
  assumes "pda.step1 cfg_to_pda_pda (Q_loop, w, Tm a # β) (Q_loop, w', β')"
  shows "w = a # w'  β = β'"
proof -
  from assms have "(β0. w' = w  β' = β0@β  (Q_loop, β0)  pda_eps_of_cfg Q_loop (Tm a)) 
                    (a0 β0. w = a0#w'  β' = β0@β  (Q_loop, β0)  pda_of_cfg Q_loop a0 (Tm a))"
    using pda.step1_rule[OF pda_cfg_to_pda] by (simp add: cfg_to_pda_pda_def)
  thus ?thesis
    by (induction Q_loop "Tm a :: ('n, 't) sym" rule: pda_eps_of_cfg.induct, auto) (metis emptyE, metis empty_iff prod.inject singletonD)
qed

lemma cfg_to_pda_tm_stack_path:
  assumes "pda.steps cfg_to_pda_pda (Q_loop, w, Tm a # α) (Q_loop, [], [])"
  shows "w'. w = a#w'  pda.steps cfg_to_pda_pda (Q_loop, w', α) (Q_loop, [], [])"
proof -
  from assms obtain q' w' α' where step1: "pda.step1 cfg_to_pda_pda (Q_loop, w, Tm a # α) (q', w', α')" and 
                                   steps: "pda.steps cfg_to_pda_pda (q', w', α') (Q_loop, [], [])"
    using pda.steps_not_refl_split_first[OF pda_cfg_to_pda] by blast
  have q'_def: "q' = Q_loop"
    using sing_st.exhaust by blast
  from step1[unfolded q'_def] have "(β0. w' = w  α' = β0@α  (Q_loop, β0)  pda_eps_of_cfg Q_loop (Tm a)) 
                     (a0 β0. w = a0#w'  α' = β0@α  (Q_loop, β0)  pda_of_cfg Q_loop a0 (Tm a))"
    using pda.step1_rule[OF pda_cfg_to_pda] by (simp add: cfg_to_pda_pda_def)
  hence "w = a # w'  α' = α"
    by (induction Q_loop "Tm a :: ('n, 't) sym" rule: pda_eps_of_cfg.induct, auto) (metis empty_iff, metis empty_iff prod.inject singletonD)
  with steps q'_def show ?thesis by simp
qed

lemma cfg_to_pda_tms_stack_path:
  assumes "pda.steps cfg_to_pda_pda (Q_loop, w, map Tm v @ α) (Q_loop, [], [])"
  shows "w'. w = v @ w'  pda.steps cfg_to_pda_pda (Q_loop, w', α) (Q_loop, [], [])"
using assms cfg_to_pda_tm_stack_path by (induction v arbitrary: w) fastforce+

lemma cfg_to_pda_accepts_if_G_derives:
  assumes "Prods G  α ⇒l* map Tm w"
  shows "pda.steps cfg_to_pda_pda (Q_loop, w, α) (Q_loop, [], [])"
using assms proof (induction rule: converse_rtranclp_induct)
  case base
  then show ?case
    using cfg_to_pda_cons_tms[where ?w' = "[]" and  = "[]"] by simp 
next
  case (step y z)
  from step(1) obtain A α u1 u2 where prod: "(A,α)  Prods G" and y_def: "y = map Tm u1 @ Nt A # u2" and z_def: "z = map Tm u1 @ α @ u2"
    using derivel_iff[of "Prods G" y z] by blast
  from step(3) z_def obtain w' where w_def: "w = u1 @ w'" and 
                                     *: "pda.steps cfg_to_pda_pda (Q_loop, w', α @ u2) (Q_loop, [], [])"
    using cfg_to_pda_tms_stack_path by blast

  from w_def y_def have "pda.steps cfg_to_pda_pda (Q_loop, w, y) (Q_loop, w', Nt A # u2)"
    using cfg_to_pda_cons_tms by simp

  moreover from prod have "pda.steps cfg_to_pda_pda (Q_loop, w', Nt A # u2) (Q_loop, w', α @ u2)"
    using cfg_to_pda_cons_nt pda.step1_steps[OF pda_cfg_to_pda] by simp

  ultimately show ?case
    using * pda.steps_trans[OF pda_cfg_to_pda] by blast
qed

lemma G_derives_if_cfg_to_pda_accepts:
  assumes "pda.steps cfg_to_pda_pda (Q_loop, w, α) (Q_loop, [], [])"
  shows "Prods G  α ⇒* map Tm w"
using assms proof (induction "(Q_loop, w, α)" "(Q_loop, [] :: 't list, [] :: ('n, 't) syms)" 
                      arbitrary: w α rule: pda.steps_induct2[OF pda_cfg_to_pda])
  case (3 w1 α1 p2 w2 α2)
  then show ?case proof (cases α1)
    case (Cons Z' α')
    have p2_def: "p2 = Q_loop"
      using sing_st.exhaust by blast
    with 3(2,3) have IH: "Prods G  α2 ⇒* map Tm w2" by simp
    show ?thesis proof (cases Z')
      case (Nt A)
      with Cons p2_def 3(1) obtain α where prod: "(A, α)  Prods G" and α2_def: "α2 = α @ α'" and w2_def: "w2 = w1"
        using cfg_to_pda_nt_cons by blast
      from Cons Nt prod α2_def have "Prods G  α1  α2"
        using derive_iff by fast
      with IH w2_def show ?thesis by simp
    next
      case (Tm a)
      with Cons p2_def 3(1) have w_α_def: "w1 = a # w2  α' = α2"
        using cfg_to_pda_tm_stack_cons by simp
      from IH have "Prods G  Tm a # α2 ⇒* Tm a # map Tm w2"
        using derives_Cons by auto
      with Cons Tm w_α_def show ?thesis by simp
    qed
  qed (simp add: 3(1) pda.step1_empty_stack[OF pda_cfg_to_pda])
qed (simp_all add: assms)

lemma cfg_to_pda: "LangS G = pda.accept_stack cfg_to_pda_pda" (is "?L = ?P")
proof
  show "?L  ?P"
  proof
    fix x
    assume "x  ?L"
    hence "Prods G  [Nt (Start G)] ⇒* map Tm x"
      by (simp add: Lang_def)
    hence "Prods G  [Nt (Start G)] ⇒l* map Tm x"
      using derivels_iff_derives by auto
    hence "pda.steps cfg_to_pda_pda (Q_loop, x, [Nt (Start G)]) (Q_loop, [], [])"
      using cfg_to_pda_accepts_if_G_derives by simp
    thus "x  ?P"
      unfolding pda.accept_stack_def[OF pda_cfg_to_pda] by (auto simp: cfg_to_pda_pda_def)
  qed
next
  show "?P  ?L"
  proof
    fix x
    assume "x  ?P"
    then obtain q where steps: "pda.steps cfg_to_pda_pda (Q_loop, x, [Nt (Start G)]) (q, [], [])"
      unfolding pda.accept_stack_def[OF pda_cfg_to_pda] by (auto simp: cfg_to_pda_pda_def)
    have "q = Q_loop"
      using sing_st.exhaust by blast
    with steps have "Prods G  [Nt (Start G)] ⇒* map Tm x"
      using G_derives_if_cfg_to_pda_accepts by simp
    thus "x  ?L"
      by (simp add: Lang_def)
  qed
qed

end
end