Theory Reg_Lang_Exp

(* Authors: Fabian Lehr *)

section ‹Regular language expressions›

theory Reg_Lang_Exp
  imports 
    "Regular-Sets.Regular_Exp"
begin


subsection ‹Definition›

text ‹We introduce regular language expressions which will be the building blocks of the systems of
equations defined later. Regular language expressions can contain both constant languages and
variable languages where variables are natural numbers for simplicity. Given a valuation, i.e.\ an
instantiation of each variable with a language, the regular language expression can be evaluated,
yielding a language.›
datatype 'a rlexp = Var nat                          (* Variable *)
                  | Const "'a lang"                  (* Constant *)
                  | Union "'a rlexp" "'a rlexp"
                  | Concat "'a rlexp" "'a rlexp"     (* Concatenation *)
                  | Star "'a rlexp"                  (* Kleene star*)

type_synonym 'a valuation = "nat  'a lang"

primrec eval :: "'a rlexp  'a valuation  'a lang" where
  "eval (Var n) v = v n" |
  "eval (Const l) _ = l" |
  "eval (Union f g) v = eval f v  eval g v" |
  "eval (Concat f g) v = eval f v @@ eval g v" |
  "eval (Star f) v = star (eval f v)"

primrec vars :: "'a rlexp  nat set" where
  "vars (Var n) = {n}" |
  "vars (Const _) = {}" |
  "vars (Union f g) = vars f  vars g" |
  "vars (Concat f g) = vars f  vars g" |
  "vars (Star f) = vars f"

text ‹Given some regular language expression, substituting each occurrence of a variable i› by
the regular language expression s i› yields the following regular language expression:›
primrec subst :: "(nat  'a rlexp)  'a rlexp  'a rlexp" where
  "subst s (Var n) = s n" |
  "subst _ (Const l) = Const l" |
  "subst s (Union f g) = Union (subst s f) (subst s g)" |
  "subst s (Concat f g) = Concat (subst s f) (subst s g)" |
  "subst s (Star f) = Star (subst s f)"



subsection ‹Basic lemmas›

lemma substitution_lemma:
  assumes "i. v' i = eval (upd i) v"
  shows "eval (subst upd f) v = eval f v'"
  by (induction f rule: rlexp.induct) (use assms in auto)

lemma substitution_lemma_upd:
  "eval (subst (Var(x := f')) f) v = eval f (v(x := eval f' v))"
  using substitution_lemma[of "v(x := eval f' v)"] by force

lemma subst_id: "eval (subst Var f) v = eval f v"
  using substitution_lemma[of v] by simp

lemma vars_subst: "vars (subst upd f) = (x  vars f. vars (upd x))"
  by (induction f) auto

lemma vars_subst_upd_upper: "vars (subst (Var(x := fx)) f)  vars f - {x}  vars fx"
proof
  fix y
  let ?upd = "Var(x := fx)"
  assume "y  vars (subst ?upd f)"
  then obtain y' where "y'  vars f  y  vars (?upd y')" using vars_subst by blast
  then show "y  vars f - {x}  vars fx" by (cases "x = y'") auto
qed


lemma eval_vars:
  assumes "i  vars f. s i = s' i"
  shows "eval f s = eval f s'"
  using assms by (induction f) auto

lemma eval_vars_subst:
  assumes "i  vars f. v i = eval (upd i) v"
  shows "eval (subst upd f) v = eval f v"
proof -
  let ?v' = "λi. if i  vars f then v i else eval (upd i) v"
  let ?v'' = "λi. eval (upd i) v"
  have v'_v'': "?v' i = ?v'' i" for i using assms by simp
  then have v_v'': "i. ?v'' i = eval (upd i) v" by simp
  from assms have "eval f v = eval f ?v'" using eval_vars[of f] by simp
  also have " = eval (subst upd f) v"
    using assms substitution_lemma[OF v_v'', of f] by (simp add: eval_vars)
  finally show ?thesis by simp
qed


text termeval f is monotone:›
lemma rlexp_mono:
  assumes "i  vars f. v i  v' i"
  shows "eval f v  eval f v'"
using assms proof (induction f rule: rlexp.induct)
  case (Star x)
  then show ?case
    by (smt (verit, best) eval.simps(5) in_star_iff_concat order_trans subsetI vars.simps(5))
qed fastforce+



subsection ‹Continuity›

(* TODO: rm, is in devel *)
lemma lang_pow_mono:
  fixes A :: "'a lang"
  assumes "A  B"
  shows "A ^^ n  B ^^ n"
  by (induction n) (use assms conc_mono[of A B] in auto)

lemma rlexp_cont_aux1:
  assumes "i. v i  v (Suc i)"
      and "w  (i. eval f (v i))"
    shows "w  eval f (λx. i. v i x)"
proof -
  from assms(2) obtain n where n_intro: "w  eval f (v n)" by auto
  have "v n x  (i. v i x)" for x by auto
  with n_intro show "?thesis"
    using rlexp_mono[where v="v n" and v'="λx. i. v i x"] by auto
qed

lemma langpow_Union_eval:
  assumes "i. v i  v (Suc i)"
      and "w  (i. eval f (v i)) ^^ n"
    shows "w  (i. eval f (v i) ^^ n)"
using assms(2) proof (induction n arbitrary: w)
  case 0
  then show ?case by simp
next
  case (Suc n)
  then obtain u u' where w_decomp: "w = u@u'" and
    "u  (i. eval f (v i))  u'  (i. eval f (v i)) ^^ n" by fastforce
  with Suc have "u  (i. eval f (v i))  u'  (i. eval f (v i) ^^ n)" by auto
  then obtain i j where i_intro: "u  eval f (v i)" and j_intro: "u'  eval f (v j) ^^ n" by blast
  let ?m = "max i j"
  from i_intro Suc.prems(1) assms(1) rlexp_mono have 1: "u  eval f (v ?m)"
    by (metis le_fun_def lift_Suc_mono_le max.cobounded1 subset_eq)
  from Suc.prems(1) assms (1) rlexp_mono have "eval f (v j)  eval f (v ?m)"
    by (metis le_fun_def lift_Suc_mono_le max.cobounded2)
  with j_intro lang_pow_mono have 2: "u'  eval f (v ?m) ^^ n" by auto
  from 1 2 show ?case using w_decomp by auto
qed

lemma rlexp_cont_aux2:
  assumes "i. v i  v (Suc i)"
      and "w  eval f (λx. i. v i x)"
    shows "w  (i. eval f (v i))"
using assms(2) proof (induction f arbitrary: w rule: rlexp.induct)
  case (Concat f g)
  then obtain u u' where w_decomp: "w = u@u'"
    and "u  eval f (λx. i. v i x)  u'  eval g (λx. i. v i x)" by auto
  with Concat have "u  (i. eval f (v i))  u'  (i. eval g (v i))" by auto
  then obtain i j where i_intro: "u  eval f (v i)" and j_intro: "u'  eval g (v j)" by blast
  let ?m = "max i j"
  from i_intro Concat.prems(1) assms(1) rlexp_mono have "u  eval f (v ?m)"
    by (metis le_fun_def lift_Suc_mono_le max.cobounded1 subset_eq)
  moreover from j_intro Concat.prems(1) assms(1) rlexp_mono have "u'  eval g (v ?m)"
    by (metis le_fun_def lift_Suc_mono_le max.cobounded2 subset_eq)
  ultimately show ?case using w_decomp by auto
next
  case (Star f)
  then obtain n where n_intro: "w  (eval f (λx. i. v i x)) ^^ n"
    using eval.simps(5) star_pow by blast
  with Star have "w  (i. eval f (v i)) ^^ n" using lang_pow_mono by blast
  with Star.prems assms have "w  (i. eval f (v i) ^^ n)" using langpow_Union_eval by auto
  then show ?case by (auto simp add: star_def)
qed fastforce+

text ‹Now we prove that termeval f is continuous. This result is not needed in the further
proof, but it is interesting anyway:›
(* currently unused *)
lemma rlexp_cont:
  assumes "i. v i  v (Suc i)"
  shows "eval f (λx. i. v i x) = (i. eval f (v i))"
proof
  from assms show "eval f (λx. i. v i x)  (i. eval f (v i))" using rlexp_cont_aux2 by auto
  from assms show "(i. eval f (v i))  eval f (λx. i. v i x)" using rlexp_cont_aux1 by blast
qed



subsection ‹Regular language expressions which evaluate to regular languages›

text ‹Evaluating regular language expressions can yield non-regular languages even if
the valuation maps each variable to a regular language. This is because constConst may introduce
non-regular languages.
We therefore define the following predicate which guarantees that a regular language expression
f› yields a regular language if the valuation maps all variables occurring in f› to some regular
language. This is achieved by only allowing regular languages as constants.
However, note that this predicate is just an under-approximation, i.e.\ there exist regular language
expressions which do not satisfy this predicate but evaluate to regular languages anyway.›

fun reg_eval :: "'a rlexp  bool" where
  "reg_eval (Var _)  True" |
  "reg_eval (Const l)  regular_lang l" |
  "reg_eval (Union f g)  reg_eval f  reg_eval g" |
  "reg_eval (Concat f g)  reg_eval f  reg_eval g" |
  "reg_eval (Star f)  reg_eval f"


lemma emptyset_regular: "reg_eval (Const {})"
  using lang.simps(1) reg_eval.simps(2) by blast

lemma epsilon_regular: "reg_eval (Const {[]})"
  using lang.simps(2) reg_eval.simps(2) by blast


text ‹If the valuation v› maps all variables occurring in the regular language expression f› to
a regular language, then evaluating f› again yields a regular language:›
lemma reg_eval_regular:
  assumes "reg_eval f"
      and "n. n  vars f  regular_lang (v n)"
    shows "regular_lang (eval f v)"
using assms proof (induction f rule: reg_eval.induct)
  case (3 f g)
  then obtain r1 r2 where "Regular_Exp.lang r1 = eval f v  Regular_Exp.lang r2 = eval g v" by auto
  then have "Regular_Exp.lang (Plus r1 r2) = eval (Union f g) v" by simp
  then show ?case by blast
next
  case (4 f g)
  then obtain r1 r2 where "Regular_Exp.lang r1 = eval f v  Regular_Exp.lang r2 = eval g v" by auto
  then have "Regular_Exp.lang (Times r1 r2) = eval (Concat f g) v" by simp
  then show ?case by blast
next
  case (5 f)
  then obtain r  where "Regular_Exp.lang r = eval f v" by auto
  then have "Regular_Exp.lang (Regular_Exp.Star r) = eval (Star f) v" by simp
  then show ?case by blast
qed simp_all


text ‹A constreg_eval regular language expression stays constreg_eval if all variables are substituted
by constreg_eval regular language expressions:›
lemma subst_reg_eval:
  assumes "reg_eval f"
      and "x  vars f. reg_eval (upd x)"
    shows "reg_eval (subst upd f)"
  using assms by (induction f rule: reg_eval.induct) simp_all

lemma subst_reg_eval_update:
  assumes "reg_eval f"
      and "reg_eval g"
    shows "reg_eval (subst (Var(x := g)) f)"
  using assms subst_reg_eval fun_upd_def by (metis reg_eval.simps(1))


text ‹For any finite union of constreg_eval regular language expressions exists a constreg_eval regular
language expression:›
lemma finite_Union_regular_aux:
  "f  set fs. reg_eval f  g. reg_eval g  (vars ` set fs) = vars g
                                       (v. (f  set fs. eval f v) = eval g v)"
proof (induction fs)
  case Nil
  then show ?case using emptyset_regular by fastforce
next
  case (Cons f1 fs)
  then obtain g where *: "reg_eval g  (vars ` set fs) = vars g
                           (v. (fset fs. eval f v) = eval g v)" by auto
  let ?g' = "Union f1 g"
  from Cons.prems * have "reg_eval ?g'   (vars ` set (f1 # fs)) = vars ?g'
       (v. (fset (f1 # fs). eval f v) = eval ?g' v)" by simp
  then show ?case by blast
qed

lemma finite_Union_regular:
  assumes "finite F"
      and "f  F. reg_eval f"
    shows "g. reg_eval g  (vars ` F) = vars g  (v. (fF. eval f v) = eval g v)"
  using assms finite_Union_regular_aux finite_list by metis



subsection ‹Constant regular language expressions›

text ‹We call a regular language expression constant if it contains no variables. A constant
regular language expression always evaluates to the same language, independent on the valuation.
Thus, if the constant regular language expression is constreg_eval, then it evaluates to some
regular language, independent on the valuation.›

abbreviation const_rlexp :: "'a rlexp  bool" where
  "const_rlexp f  vars f = {}"

lemma const_rlexp_lang: "const_rlexp f  l. v. eval f v = l"
  by (induction f) auto

lemma const_rlexp_regular_lang:
  assumes "const_rlexp f"
      and "reg_eval f"
    shows "l. regular_lang l  (v. eval f v = l)"
  using assms const_rlexp_lang reg_eval_regular by fastforce

end