Theory Binarize

(* Authors: Kaan Taskin *)

section ‹Transforming Long Productions Into a Binary Cascade›

theory Binarize
imports Inlining1Prod
begin

(* FFPI *) 
lemma funpow_fix: fixes f :: "'a  'a" and m :: "'a  nat"
assumes "x. m(f x) < m x  f x = x"
shows "f((f ^^ m x) x) = (f ^^ m x) x"
proof -
 have "n + m ((f^^n) x)  m x  f((f^^n) x) = (f^^n) x" for n
 proof(induction n)
   case 0
   then show ?case by simp
 next
   case (Suc n)
   then show ?case
   proof
     assume a1: "n + m ((f ^^ n) x)  m x"
     then show ?thesis
     proof (cases "m ((f ^^ Suc n) x) < m ((f ^^ n) x)")
       case True
       then show ?thesis using a1 by auto
     next
       case False
       with assms have "(f ^^ Suc n) x = (f ^^ n) x" by auto
       then show ?thesis by simp
     qed
   next
     assume "f ((f ^^ n) x) = (f ^^ n) x"
     then show ?thesis by simp
   qed
 qed
  from this[of "m x"] show ?thesis using assms[of "(f ^^ m x) x"] by auto
qed

text
‹In a binary grammar, every right-hand side consists of at most two symbols. The binarize› function should convert an
 arbitrary production list into a binary production list, without changing the language of the grammar. For this we make use
 of fixpoint iteration and define the function binarize1› for splitting a production, whose right-hand side exceeds the
 maximum number of symbols 2, into two productions. The step function is then defined as the auxiliary function binarize'›.
 We also define the count function count› that counts the right-hand sides whose length is more than or equal to 2›

fun binarize1 :: "('n :: infinite, 't) prods  ('n, 't) prods  ('n, 't) prods" where
  "binarize1 ps' [] = []"
| "binarize1 ps' ((A, []) # ps) = (A, []) # binarize1 ps' ps"
| "binarize1 ps' ((A, s0 # u) # ps) =
 (if length u  1 then (A, s0 # u) # binarize1 ps' ps
  else let B = fresh (nts ps') in (A,[s0, Nt B]) # (B, u) # ps)"

definition binarize' :: "('n::infinite, 't) prods  ('n, 't) prods" where
  "binarize' ps = binarize1 ps ps"

fun count :: "('n::infinite, 't) prods  nat" where
  "count [] = 0"
| "count ((A,u) # ps) = (if length u  2 then count ps else length u + count ps)"

definition binarize :: "('n::infinite, 't) prods  ('n, 't) prods" where
  "binarize ps = (binarize' ^^ (count ps)) ps"

text
‹Firstly we show that the binarize› function transforms a production list into a binary production list›

lemma count_dec1:
  assumes "binarize1 ps' ps  ps" 
  shows "count ps > count (binarize1 ps' ps)"
using assms proof (induction ps' ps rule: binarize1.induct)
  case (3 ps' A s0 u ps)
  show ?case proof (cases "length u  1")
    case True
    with "3.prems" have "binarize1 ps' ps  ps" by simp
    with True have "count (binarize1 ps' ps) < count ps"
      using "3.IH" by simp
    with True show ?thesis by simp
  next
    case False
    let ?B = "fresh(nts ps')"
    from False have "count (binarize1 ps' ((A, s0 # u) # ps)) = count ((A,[s0, Nt ?B]) # (?B, u) # ps)"
      by (metis binarize1.simps(3))
    also have "... = count ((?B, u) # ps)" by simp
    also from False have "... < count ((A, s0 # u) # ps)" by simp
    finally have "count (binarize1 ps' ((A, s0 # u) # ps)) < count ((A, s0 # u) # ps)" by simp
    thus ?thesis .
  qed
qed simp_all

lemma count_dec':
  assumes "binarize' ps  ps" 
  shows "count ps > count (binarize' ps)"
 using binarize'_def assms count_dec1 by metis

lemma binarize_ffpi:
  "binarize'((binarize' ^^ count x) x) = (binarize' ^^ count x) x"
proof -
  have "x. count(binarize' x) < count x  binarize' x = x"
    using count_dec' by blast
  thus ?thesis using funpow_fix by metis
qed

lemma binarize_binary1: 
  assumes "ps = binarize1 ps' ps"
  shows "(A,w)  set(binarize1 ps' ps)  length w  2"
  using assms proof (induction ps' ps rule: binarize1.induct)
  case (3 ps' C s0 u ps)
  show ?case proof (cases "length u  1")
    case True
    with 3 show ?thesis by auto
  next
    case False
    hence "(C, s0 # u) # ps  binarize1 ps' ((C, s0 # u) # ps)"
      by (metis binarize1.simps(3) list.sel(3) not_Cons_self2)
    with "3.prems"(2) show ?thesis by blast
  qed
qed auto

lemma binarize_binary':
  assumes "ps = binarize' ps"
  shows "(A,w)  set(binarize' ps)  length w  2"
  using binarize'_def assms binarize_binary1 by metis

lemma binarize_binary: "(A,w)  set(binarize ps)  length w  2"
  unfolding binarize_def using binarize_ffpi binarize_binary' by metis

text
‹Now we prove the property of language preservation›

lemma binarize1_cases:
  "binarize1 ps' ps = ps  (A ps'' B u s. set ps = {(A, s#u)}  set ps''  set (binarize1 ps' ps) = {(A,[s,Nt B]),(B,u)}  set ps''  Nt B  syms ps')"
proof (induction ps' ps rule: binarize1.induct)
  case (2 ps' C ps)
  then show ?case
  proof (cases "binarize1 ps' ps = ps")
    case True
    then show ?thesis by simp
  next
    case False
    then obtain A ps'' B u s where defs: "set ps = {(A, s # u)}  set ps''  set (binarize1 ps' ps) = {(A, [s, Nt B]), (B, u)}  set ps''  Nt B  syms ps'"
      using 2 by blast
    from defs have wit: "set ((C, []) # ps) = {(A, s # u)}  set ((C, []) # ps'')" by simp
    from defs have wit2: "set (binarize1 ps' ((C, []) # ps)) = {(A, [s, Nt B]), (B, u)}  set ((C, []) # ps'')" by simp
    from defs have wit3: "Nt B  syms ps'" by simp
    from wit wit2 wit3 show ?thesis by blast
  qed
next
  case (3 ps' C s0 u ps)
  show ?case proof (cases "length u  1")
    case T1: True
    then show ?thesis proof (cases "binarize1 ps' ps = ps")
      case T2: True
      with T1 show ?thesis by simp
    next
      case False
      with T1 obtain A ps'' B v s where defs: "set ps = {(A, s # v)}  set ps''  set (binarize1 ps' ps) = {(A, [s, Nt B]), (B, v)}  set ps''  Nt B  syms ps'"
        using 3 by blast
      from defs have wit: "set ((C, s0 # u) # ps) = {(A, s # v)}  set ((C, s0 # u) # ps'')" by simp
      from defs T1 have wit2: "set (binarize1 ps' ((C, s0 # u) # ps)) = {(A, [s, Nt B]), (B, v)}  set ((C, s0 # u) # ps'')" by simp
      from defs have wit3: "Nt B  syms ps'" by simp
      from wit wit2 wit3 show ?thesis by blast
    qed
   next
    case False
    then show ?thesis
      using fresh_nts in_Nts_iff_in_Syms[of "fresh (nts ps')" "set ps'"]
      by (fastforce simp add: Let_def)
  qed
qed simp

text
‹We show that a list of terminals map Tm x› can be derived from the original production set ps› if and only if map Tm x›
 can be derived after the transformation of the step function binarize'›, under the assumption that the starting symbol A›
 occurs in a left-hand side of at least one production in ps›. We can then extend this property to the binarize› function›

lemma binarize_der':
  assumes "A  lhss ps"
  shows "set ps  [Nt A] ⇒* map Tm x  set (binarize' ps)  [Nt A] ⇒* map Tm x"
  unfolding binarize'_def proof (cases "binarize1 ps ps = ps")
  case False
  then obtain C ps'' B u s where defs: "set ps = {(C, s # u)}  set ps''  set (binarize1 ps ps) = {(C, [s, Nt B]), (B, u)}  set ps''  Nt B  syms ps"
    by (meson binarize1_cases)
  from defs have a_not_b: "C  B" unfolding Syms_def by fast
  from defs assms have a1: "A  B" unfolding Lhss_def Syms_def by auto
  from defs have a2: "Nt B  set (map Tm x)" by auto
  from defs have a3: "Nt B  set u" unfolding Syms_def by fastforce
  from defs have "set ps = set ((C, s # u) # ps'')" by simp
  with defs a_not_b have a4: "B  lhss ((C, [s, Nt B]) # ps'')" unfolding Lhss_def Syms_def by auto
  from defs have notB: "Nt B  syms ps''" by fastforce
  then have 1: "set ps = set (substP (Nt B) u ((C, [s, Nt B]) # ps''))" proof -
    from defs have "set ps = {(C, s # u)}  set ps''" by simp
    also have "... = set ((C, s#u) # ps'')" by simp
    also have "... = set ([(C, s#u)] @ ps'')" by simp
    also from defs have "... = set ([(C,substs (Nt B) u [s, Nt B])] @ ps'')" unfolding Syms_def by fastforce
    also have "... = set ((substP (Nt B) u [(C, [s, Nt B])]) @ ps'')" by (simp add: substP_def)
    also have "... = set ((substP (Nt B) u [(C, [s, Nt B])]) @ substP (Nt B) u ps'')" using notB by (simp add: substP_skip2)
    also have "... = set (substP (Nt B) u ((C, [s, Nt B]) # ps''))" by (simp add: substP_def)
    finally show ?thesis .
  qed
  from defs have 2: "set (binarize1 ps ps) = set ((C, [s, Nt B]) # (B, u) # ps'')" by auto
  with 1 2 a1 a2 a3 a4 show "(set ps  [Nt A] ⇒* map Tm x) = (set (binarize1 ps ps)  [Nt A] ⇒* map Tm x)"
    by (simp add: derives_inlining insert_commute)
qed simp

lemma lhss_binarize1:
  "lhss ps  lhss (binarize1 ps' ps)"
proof (induction ps' ps rule: binarize1.induct)
  case (3 ps' A s0 u ps)
  then show ?case proof (cases "length u  1")
    case True
    with 3 show ?thesis by auto
  next
    case False
    let ?B = "fresh(nts ps')"
    have "lhss ((A, s0 # u) # ps) = {A}  lhss ps" by simp
    also have "...  {A}  {?B}  lhss ps" by blast
    also have "... = lhss ((A,[s0, Nt ?B]) # (?B, u) # ps)" by simp
    also from False have "... = lhss (binarize1 ps' ((A, s0 # u) # ps))"
      by (metis binarize1.simps(3))
    finally show ?thesis .
  qed
qed auto

lemma lhss_binarize'n:
  "lhss ps  lhss ((binarize'^^n) ps)"
proof (induction n)
  case (Suc n)
  thus ?case unfolding binarize'_def using lhss_binarize1 by auto
qed simp

lemma binarize_der'n: 
  assumes "A  lhss ps"
  shows "set ps  [Nt A] ⇒* map Tm x  set ((binarize'^^n) ps)  [Nt A] ⇒* map Tm x"
using assms proof (induction n)
  case (Suc n)
  hence "A  lhss ((binarize' ^^ n) ps)"
    using lhss_binarize'n by blast
  hence "set ((binarize' ^^ n) ps)  [Nt A] ⇒* map Tm x  set (binarize' ((binarize' ^^ n) ps))  [Nt A] ⇒* map Tm x"
    using binarize_der' by blast
  hence "set ((binarize' ^^ n) ps)  [Nt A] ⇒* map Tm x  set ((binarize' ^^ Suc n) ps)  [Nt A] ⇒* map Tm x"
    by simp
  with Suc show ?case by blast
qed simp

lemma binarize_der: 
  assumes "A  lhss ps"
  shows "set ps  [Nt A] ⇒* map Tm x  set (binarize ps)  [Nt A] ⇒* map Tm x"
  unfolding binarize_def using binarize_der'n[OF assms] by simp

lemma lang_binarize_lhss: 
  assumes "A  lhss ps"
  shows "lang ps A = lang (binarize ps) A"
  using binarize_der[OF assms] Lang_eqI_derives by metis

text
‹As a last step, we generalize the language preservation property to also include non-terminals which only occur at right-hand
 sides of the production set›

lemma binarize_syms1:
  assumes  "Nt A  syms ps"
    shows  "Nt A  syms (binarize1 ps' ps)"
using assms proof (induction ps' ps rule: binarize1.induct)
  case (3 ps' A s0 u ps)
  show ?case proof (cases "length u  1")
    case True
    with 3 show ?thesis by auto
  next
    case False
    with 3 show ?thesis by (auto simp: Syms_def Let_def)
  qed
qed auto

lemma binarize_lhss_nts1:
  assumes "A  lhss ps"
      and "A  nts ps'"
    shows "A  lhss (binarize1 ps' ps)"
  using assms proof (induction ps' ps rule: binarize1.induct)
  case (3 ps' A s0 u ps)
  thus ?case proof (cases "length u  1")
    case True
    with 3 show ?thesis by auto
  next
    case False
    with 3 show ?thesis by (auto simp add: Let_def fresh_nts)
  qed
qed simp_all

lemma binarize_lhss_nts'n:
  assumes "A  lhss ps"
      and "A  nts ps"
    shows "A  lhss ((binarize'^^n) ps)  A  nts ((binarize'^^n) ps)"
using assms proof (induction n)
  case (Suc n)
  thus ?case 
    unfolding binarize'_def by (simp add: binarize_lhss_nts1 binarize_syms1 in_Nts_iff_in_Syms)
qed simp

lemma binarize_lhss_nts:
   assumes "A  lhss ps"
      and  "A  nts ps"
    shows "A  lhss (binarize ps)  A  nts (binarize ps)"
  unfolding binarize_def using binarize_lhss_nts'n[OF assms] by simp

lemma binarize_nts'n:
  assumes "A  nts ps"
  shows   "A  nts ((binarize' ^^ n) ps)"
using assms proof (induction n)
  case (Suc n)
  thus ?case 
    unfolding binarize'_def by (simp add: binarize_syms1 in_Nts_iff_in_Syms)
qed simp

lemma binarize_nts:
  assumes "A  nts ps"
  shows   "A  nts (binarize ps)"
  unfolding binarize_def using assms binarize_nts'n by blast

lemma lang_binarize: 
  assumes "A  nts ps"
  shows "lang (binarize ps) A = lang ps A"
proof (cases "A  lhss ps")
  case True
  thus ?thesis
    using lang_binarize_lhss by blast
next
  case False
  thus ?thesis
    using assms binarize_lhss_nts Lang_empty_if_notin_Lhss by fast
qed

end