Theory Pi_Derivatives

(* Author: Dmitriy Traytel *)

section ‹Derivatives of $\Pi$-Extended Regular Expressions›
theory Pi_Derivatives
imports Pi_Regular_Exp

locale embed = project Σ wf_atom project lookup
  for Σ :: "nat  'a set"
  and wf_atom :: "nat  'b :: linorder  bool"
  and project :: "'a  'a"
  and lookup :: "'b  'a  bool" +
fixes embed :: "'a  'a list"
assumes embed: "a. a  Σ n  b  set (embed a) = (b  Σ (Suc n)  project b = a)"

subsection ‹Syntactic Derivatives›

primrec lderiv :: "'a  'b rexp  'b rexp" where
  "lderiv _ Zero = Zero"
| "lderiv _ Full = Full"
| "lderiv _ One = Zero"
| "lderiv a (Atom b) = (if lookup b a then One else Zero)"
| "lderiv a (Plus r s) = Plus (lderiv a r) (lderiv a s)"
| "lderiv a (Times r s) =
    (let r's = Times (lderiv a r) s
     in if final r then Plus r's (lderiv a s) else r's)"
| "lderiv a (Star r) = Times (lderiv a r) (Star r)"
| "lderiv a (Not r) = Not (lderiv a r)"
| "lderiv a (Inter r s) = Inter (lderiv a r) (lderiv a s)"
| "lderiv a (Pr r) = Pr (PLUS (map (λa'. lderiv a' r) (embed a)))"

primrec lderivs where
  "lderivs [] r = r"
| "lderivs (w#ws) r = lderivs ws (lderiv w r)"

subsection ‹Finiteness of ACI-Equivalent Derivatives›

lemma toplevel_summands_lderiv:
  "toplevel_summands (lderiv as r) = (stoplevel_summands r. toplevel_summands (lderiv as s))"
  by (induct r) (auto simp: Let_def)

lemma lderivs_Zero[simp]: "lderivs xs Zero = Zero"
  by (induct xs) auto

lemma lderivs_Full[simp]: "lderivs xs Full = Full"
  by (induct xs) auto

lemma lderivs_One: "lderivs xs One  {Zero, One}"
  by (induct xs) auto

lemma lderivs_Atom: "lderivs xs (Atom as)  {Zero, One, Atom as}"
proof (induct xs)
  case (Cons x xs) thus ?case by (auto intro: insertE[OF lderivs_One])
qed simp

lemma lderivs_Plus: "lderivs xs (Plus r s) = Plus (lderivs xs r) (lderivs xs s)"
  by (induct xs arbitrary: r s) auto

lemma lderivs_PLUS: "lderivs xs (PLUS ys) = PLUS (map (lderivs xs) ys)"
  by (induct ys rule: list_singleton_induct) (auto simp: lderivs_Plus)

lemma toplevel_summands_lderivs_Times: "toplevel_summands (lderivs xs (Times r s)) 
  {Times (lderivs xs r) s} 
  {r'. ys zs. r'  toplevel_summands (lderivs ys s)  ys  []  zs @ ys = xs}"
proof (induct xs arbitrary: r s)
  case (Cons x xs)
  thus ?case by (auto simp: Let_def lderivs_Plus) (fastforce intro: exI[of _ "x#xs"])+
qed simp

lemma toplevel_summands_lderivs_Star_nonempty:
  "xs  []  toplevel_summands (lderivs xs (Star r)) 
    {Times (lderivs ys r) (Star r) | ys. zs. ys  []  zs @ ys = xs}"
proof (induct xs rule: length_induct)
  case (1 xs)
  then obtain y ys where "xs = y # ys" by (cases xs) auto
  thus ?case using spec[OF 1(1)]
    by (auto dest!: subsetD[OF toplevel_summands_lderivs_Times] intro: exI[of _ "y#ys"])
       (auto elim!: impE dest!: meta_spec subsetD)

lemma toplevel_summands_lderivs_Star:
  "toplevel_summands (lderivs xs (Star r)) 
    {Star r}  {Times (lderivs ys r) (Star r) | ys. zs. ys  []  zs @ ys = xs}"
  by (cases "xs = []") (auto dest!: toplevel_summands_lderivs_Star_nonempty)

lemma ex_lderivs_Pr: "s. lderivs ass (Pr r) = Pr s"
  by (induct ass arbitrary: r) auto

lemma toplevel_summands_PLUS:
 "xs  []  toplevel_summands (PLUS (map f xs)) = (r  set xs. toplevel_summands (f r))"
  by (induct xs rule: list_singleton_induct) simp_all

lemma lderiv_toplevel_summands_Zero:
  "lderivs xs (Pr r) = Pr s; toplevel_summands r = {Zero}  toplevel_summands s = {Zero}"
proof (induct xs arbitrary: r s)
  case (Cons y ys)
  from Cons.prems(1) have "toplevel_summands (PLUS (map (λa. lderiv a r) (embed y))) = {Zero}"
  proof (cases "embed y = []")
    case False
    show ?thesis using Cons.prems(2) unfolding toplevel_summands_PLUS[OF False]
      by (subst toplevel_summands_lderiv) (simp add: False)
  qed simp
  with Cons show ?case by simp
qed simp

lemma toplevel_summands_lderivs_Pr:
  "xs  []; lderivs xs (Pr r) = Pr s 
    toplevel_summands s  {Zero}  toplevel_summands s  (xs. toplevel_summands (lderivs xs r))"
proof (induct xs arbitrary: r s)
  case (Cons y ys) note * = this
  show ?case
  proof (cases "embed y = []")
    case True with Cons show ?thesis by (cases "ys = []") (auto dest: lderiv_toplevel_summands_Zero)
    case False 
    show ?thesis
    proof (cases ys)
      case Nil with * show ?thesis
        by (auto simp: toplevel_summands_PLUS[OF False]) (metis lderivs.simps)
      case (Cons z zs)
      have "toplevel_summands s  {Zero}  toplevel_summands s 
       (xs. toplevel_summands (lderivs xs (PLUS (map (λa. lderiv a r) (embed y)))))" (is "_  ?B")
        by (rule *(1)) (auto simp: Cons *(3)[symmetric])
      thus ?thesis
        assume ?B
        also have "  (xs. toplevel_summands (lderivs xs r))"
          by (auto simp: lderivs_PLUS toplevel_summands_PLUS[OF False]) (metis lderivs.simps(2))
        finally show ?thesis ..
      qed blast
qed simp

lemma lderivs_Pr:
  "{lderivs xs (Pr r) | xs. True} 
    {Pr s | s. toplevel_summands s  {Zero} 
               toplevel_summands s  (xs. toplevel_summands (lderivs xs r))}"
  (is "?L ?R")
proof (rule subsetI)
  fix s assume "s  ?L"
  then obtain xs where "s = lderivs xs (Pr r)" by blast
  moreover obtain t where "lderivs xs (Pr r) = Pr t" using ex_lderivs_Pr by blast
  ultimately show "s  ?R"
    by (cases "xs = []") (auto dest!: toplevel_summands_lderivs_Pr elim!: allE[of _ "[]"])

lemma ACI_norm_toplevel_summands_Zero: "toplevel_summands r  {Zero}  «r» = Zero"
  by (subst ACI_norm_flatten) (auto dest: subset_singletonD)

lemma ACI_norm_lderivs_Pr:
  "ACI_norm ` {lderivs xs (Pr r) | xs. True} 
    {Pr Zero}  {Pr «s» | s. toplevel_summands s  (xs. toplevel_summands «lderivs xs r»)}"
proof (intro subset_trans[OF image_mono[OF lderivs_Pr]] subsetI,
       elim imageE CollectE exE conjE disjE)
  fix x x' s :: "'b rexp"
  assume *: "x = «x'»" "x' = Pr s" and "toplevel_summands s  {Zero}"
  hence "«Pr s» = Pr Zero" using ACI_norm_toplevel_summands_Zero by simp
  thus "x  {Pr Zero} 
    {Pr «s» |s. toplevel_summands s  (xs. toplevel_summands «lderivs xs r»)}"
    unfolding * by blast
  fix x x' s :: "'b rexp"
  assume *: "x = «x'»" "x' = Pr s" and "toplevel_summands s  (xs. toplevel_summands (lderivs xs r))"
  hence "toplevel_summands «s»  (xs. toplevel_summands «lderivs xs r»)"
    by (fastforce simp: toplevel_summands_ACI_norm)
  moreover have "x = Pr ««s»»" unfolding * ACI_norm_idem ACI_norm.simps(10) ..
  ultimately show "x  {Pr Zero} 
    {Pr «s» |s. toplevel_summands s  (xs. toplevel_summands «lderivs xs r»)}"
    by blast

lemma finite_ACI_norm_toplevel_summands: "finite B  finite {f «s» |s. toplevel_summands s  B}"
  apply (elim finite_surj [OF iffD2 [OF finite_Pow_iff], of _ _ "f o flatten PLUS o image ACI_norm"])
  apply (subst ACI_norm_flatten)
  apply auto

lemma lderivs_Not: "lderivs xs (Not r) = Not (lderivs xs r)"
  by (induct xs arbitrary: r) auto

lemma lderivs_Inter: "lderivs xs (Inter r s) = Inter (lderivs xs r) (lderivs xs s)"
  by (induct xs arbitrary: r s) auto

theorem finite_lderivs: "finite {«lderivs xs r» | xs . True}"
proof (induct r)
  case Zero show ?case by simp
  case Full show ?case by simp
  case One show ?case
   by (rule finite_surj[of "{Zero, One}"]) (blast intro: insertE[OF lderivs_One])+
  case (Atom as) show ?case
    by (rule finite_surj[of "{Zero, One, Atom as}"]) (blast intro: insertE[OF lderivs_Atom])+
  case (Plus r s)
  show ?case by (auto simp: lderivs_Plus intro!: finite_surj[OF finite_cartesian_product[OF Plus]])
  case (Times r s)
  hence "finite ( (toplevel_summands ` {«lderivs xs s» | xs . True}))" by auto
  moreover have "{«r'» |r'. ys. r'  toplevel_summands (lderivs ys s)} =
    {r'. ys. r'  toplevel_summands «lderivs ys s»}"
    unfolding toplevel_summands_ACI_norm by auto
  ultimately have fin: "finite {«r'» |r'. ys. r'  toplevel_summands (lderivs ys s)}"
    by (fastforce intro: finite_subset[of _ " (toplevel_summands ` {«lderivs xs s» | xs . True})"])
  let ?X = "λxs. {Times (lderivs ys r) s | ys. True}  {r'. r'  (ys. toplevel_summands (lderivs ys s))}"
  show ?case
  proof (simp only: ACI_norm_flatten,
      rule finite_surj[of "{X. xs. X  ACI_norm ` ?X xs}" _ "flatten PLUS"])
    show "finite {X. xs. X  ACI_norm ` ?X xs}"
      using fin by (fastforce simp: image_Un elim: finite_subset[rotated] intro: finite_surj[OF Times(1), of _ "λr. Times r «s»"])
  qed (fastforce dest!: subsetD[OF toplevel_summands_lderivs_Times] intro!: imageI)
  case (Star r)
  let ?f = "λf r'. Times r' (Star (f r))"
  let ?X = "{Star r}  ?f id ` {r'. r'  {lderivs ys r|ys. True}}"
  show ?case
  proof (simp only: ACI_norm_flatten,
      rule finite_surj[of "{X. X  ACI_norm ` ?X}" _ "flatten PLUS"])
    have *: "X. ACI_norm ` ?f (λx. x) ` X = ?f ACI_norm ` ACI_norm ` X" by (auto simp: image_def)
    show "finite {X. X  ACI_norm ` ?X}"
      by (rule finite_Collect_subsets)
         (auto simp: * intro!: finite_imageI[of _ "?f ACI_norm"] intro: finite_subset[OF _ Star])
  qed (fastforce dest!: subsetD[OF toplevel_summands_lderivs_Star] intro!: imageI)
  case (Not r) thus ?case by (auto simp: lderivs_Not) (blast intro: finite_surj)
  case (Inter r s)
  show ?case by (auto simp: lderivs_Inter intro!: finite_surj[OF finite_cartesian_product[OF Inter]])
  case (Pr r)
  hence *: "finite ( (toplevel_summands ` {«lderivs xs r» | xs . True}))" by auto
  have "finite (xs. toplevel_summands «lderivs xs r»)" by (rule finite_subset[OF _ *]) auto
  hence fin: "finite {Pr «s» |s. toplevel_summands s  (xs. toplevel_summands «lderivs xs r»)}"
    by (intro finite_ACI_norm_toplevel_summands)
  have "{s. xs. s = «lderivs xs (Pr r)»} = {«s»| s. xs. s = lderivs xs (Pr r)}" by auto
  thus ?case using finite_subset[OF ACI_norm_lderivs_Pr, of r] fin unfolding image_Collect by auto

subsection ‹Wellformedness and language of derivatives›

lemma wf_lderiv[simp]: "wf n r  wf n (lderiv w r)"
  by (induct r arbitrary: n w) (auto simp add: Let_def)

lemma wf_lderivs[simp]: "wf n r  wf n (lderivs ws r)"
  by (induct ws arbitrary: r) (auto intro: wf_lderiv)

lemma lQuot_map_project:
assumes "as  Σ n" "A  lists (Σ (Suc n))"
shows "lQuot as (map project ` A) = map project ` (a  set (embed as). lQuot a A)" (is "?L = ?R")
proof (intro equalityI image_subsetI subsetI)
  fix xss assume "xss  ?L"
  with assms obtain zss
    where zss: "zss  A" "as # xss = map project zss"
    unfolding lQuot_def by fastforce
  hence "xss = map project (tl zss)" by auto
  with zss assms(2) show "xss  ?R" using embed[OF project, of _ n] unfolding lQuot_def by fastforce
  fix xss assume "xss  (a  set (embed as). lQuot a A)"
  with assms(1) show "map project xss  lQuot as (map project ` A)" unfolding lQuot_def
    by (fastforce intro!: rev_image_eqI simp: embed)

lemma lang_lderiv: "wf n r; w  Σ n  lang n (lderiv w r) = lQuot w (lang n r)"
proof (induct r arbitrary: n w)
  case (Pr r)
  hence *: "wf (Suc n) r" "w'. w'  set (embed w)   w'  Σ (Suc n)" by (auto simp: embed)
  from Pr(1)[OF *] lQuot_map_project[OF Pr(3) lang_subset_lists[OF *(1)]] show ?case
    by (auto simp: wf_lderiv[OF *(1)])
qed (auto simp: Let_def lang_final[symmetric])

lemma lang_lderivs: "wf n r; wf_word n ws  lang n (lderivs ws r) = lQuots ws (lang n r)"
  by (induct ws arbitrary: r) (auto simp: lang_lderiv)

corollary lderivs_final:
assumes "wf n r" "wf_word n ws"
shows "final (lderivs ws r)  ws  lang n r"
  using lang_lderivs[OF assms] lang_final[of "lderivs ws r" n] by auto

abbreviation "lderivs_set n r s  {(«lderivs w r», «lderivs w s») | w. wf_word n w}"

subsection ‹Deriving preserves ACI-equivalence›

lemma ACI_norm_PLUS:
  "list_all2 (λr s. «r» = «s») xs ys  «PLUS xs» = «PLUS ys»"
proof (induct rule: list_all2_induct)
  case (Cons x xs y ys)
  hence "length xs = length ys" by (elim list_all2_lengthD)
  thus ?case using Cons by (induct xs ys rule: list_induct2) auto
qed simp

lemma toplevel_summands_ACI_norm_lderiv:
  "(atoplevel_summands r. toplevel_summands «lderiv as «a»») = toplevel_summands «lderiv as «r»»"
proof (induct r)
  case (Plus r1 r2) thus ?case
   unfolding toplevel_summands.simps toplevel_summands_ACI_norm
     toplevel_summands_lderiv[of as "«Plus r1 r2»"] image_Un Union_Un_distrib
   by (simp add: image_UN)
qed (auto simp: Let_def)

theorem ACI_norm_lderiv:
  "«lderiv as «r»» = «lderiv as r»"
proof (induct r arbitrary: as)
  case (Plus r1 r2) thus ?case
   unfolding lderiv.simps ACI_norm_flatten[of "lderiv as «Plus r1 r2»"]
     toplevel_summands_lderiv[of as "«Plus r1 r2»"] image_Un image_UN
   by (auto simp: toplevel_summands_ACI_norm toplevel_summands_flatten_ACI_norm_image_Union)
     (auto simp: toplevel_summands_ACI_norm[symmetric] toplevel_summands_ACI_norm_lderiv)
  case (Pr r)
  hence "list_all2 (λr s. «r» = «s»)
    (map (λa. lderiv a «r») (embed as)) (map (λa. lderiv a r) (embed as))"
    unfolding list_all2_map1 list_all2_map2 by (intro list_all2_refl)
  thus ?case unfolding lderiv.simps ACI_norm.simps by (blast intro: ACI_norm_PLUS)
qed (simp_all add: Let_def)

corollary lderiv_preserves: "«r» = «s»  «lderiv as r» = «lderiv as s»"
  by (rule box_equals[OF _ ACI_norm_lderiv ACI_norm_lderiv]) (erule arg_cong)

lemma lderivs_snoc[simp]: "lderivs (ws @ [w]) r = (lderiv w (lderivs ws r))"
  by (induct ws arbitrary: r) auto

theorem ACI_norm_lderivs:
  "«lderivs ws «r»» = «lderivs ws r»"
proof (induct ws arbitrary: r rule: rev_induct)
  case (snoc w ws) thus ?case
    using ACI_norm_lderiv[of w "lderivs ws r"] ACI_norm_lderiv[of w "lderivs ws «r»"] by auto
qed simp

lemma lderivs_alt: "«lderivs w r» = fold (λa r. «lderiv a r») w «r»"
  by (induct w arbitrary: r) (auto simp: ACI_norm_lderiv)

lemma finite_fold_lderiv: "finite {fold (λa r. «lderiv a r») w «s» |w. True}"
  using finite_lderivs unfolding lderivs_alt .