# Theory Regular-Sets.Regular_Exp

```(*  Author: Tobias Nipkow *)

section "Regular expressions"

theory Regular_Exp
imports Regular_Set
begin

datatype (atoms: 'a) rexp =
is_Zero: Zero |
is_One: One |
Atom 'a |
Plus "('a rexp)" "('a rexp)" |
Times "('a rexp)" "('a rexp)" |
Star "('a rexp)"

primrec lang :: "'a rexp => 'a lang" where
"lang Zero = {}" |
"lang One = {[]}" |
"lang (Atom a) = {[a]}" |
"lang (Plus r s) = (lang r) Un (lang s)" |
"lang (Times r s) = conc (lang r) (lang s)" |
"lang (Star r) = star(lang r)"

abbreviation (input) regular_lang where "regular_lang A ≡ (∃r. lang r = A)"

primrec nullable :: "'a rexp ⇒ bool" where
"nullable Zero = False" |
"nullable One = True" |
"nullable (Atom c) = False" |
"nullable (Plus r1 r2) = (nullable r1 ∨ nullable r2)" |
"nullable (Times r1 r2) = (nullable r1 ∧ nullable r2)" |
"nullable (Star r) = True"

lemma nullable_iff [code_abbrev]: "nullable r ⟷ [] ∈ lang r"
by (induct r) (auto simp add: conc_def split: if_splits)

primrec rexp_empty where
"rexp_empty Zero ⟷ True"
| "rexp_empty One ⟷ False"
| "rexp_empty (Atom a) ⟷ False"
| "rexp_empty (Plus r s) ⟷ rexp_empty r ∧ rexp_empty s"
| "rexp_empty (Times r s) ⟷ rexp_empty r ∨ rexp_empty s"
| "rexp_empty (Star r) ⟷ False"

(* TODO Fixme: This code_abbrev rule does not work. Why? *)
lemma rexp_empty_iff [code_abbrev]: "rexp_empty r ⟷ lang r = {}"
by (induction r) auto

text‹Composition on rhs usually complicates matters:›
lemma map_map_rexp:
"map_rexp f (map_rexp g r) = map_rexp (λr. f (g r)) r"
unfolding rexp.map_comp o_def ..

lemma map_rexp_ident[simp]: "map_rexp (λx. x) = (λr. r)"
unfolding id_def[symmetric] fun_eq_iff rexp.map_id id_apply by (intro allI refl)

lemma atoms_lang: "w : lang r ⟹ set w ⊆ atoms r"
proof(induction r arbitrary: w)
case Times thus ?case by fastforce
next
case Star thus ?case by (fastforce simp add: star_conv_concat)
qed auto

lemma lang_eq_ext: "(lang r = lang s) =
(∀w ∈ lists(atoms r ∪ atoms s). w ∈ lang r ⟷ w ∈ lang s)"
by (auto simp: atoms_lang[unfolded subset_iff])

lemma lang_eq_ext_Nil_fold_Deriv:
fixes r s
defines "𝔅 ≡ {(fold Deriv w (lang r), fold Deriv w (lang s))| w. w∈lists (atoms r ∪ atoms s)}"
shows "lang r = lang s ⟷ (∀(K, L) ∈ 𝔅. [] ∈ K ⟷ [] ∈ L)"
unfolding lang_eq_ext 𝔅_def by (subst (1 2) in_fold_Deriv[of "[]", simplified, symmetric]) auto

subsection ‹Term ordering›

instantiation rexp :: (order) "{order}"
begin

fun le_rexp :: "('a::order) rexp ⇒ ('a::order) rexp ⇒ bool"
where
"le_rexp Zero _ = True"
| "le_rexp _ Zero = False"
| "le_rexp One _ = True"
| "le_rexp _ One = False"
| "le_rexp (Atom a) (Atom b) = (a <= b)"
| "le_rexp (Atom _) _ = True"
| "le_rexp _ (Atom _) = False"
| "le_rexp (Star r) (Star s) = le_rexp r s"
| "le_rexp (Star _) _ = True"
| "le_rexp _ (Star _) = False"
| "le_rexp (Plus r r') (Plus s s') =
(if r = s then le_rexp r' s' else le_rexp r s)"
| "le_rexp (Plus _ _) _ = True"
| "le_rexp _ (Plus _ _) = False"
| "le_rexp (Times r r') (Times s s') =
(if r = s then le_rexp r' s' else le_rexp r s)"

(* The class instance stuff is by Dmitriy Traytel *)

definition less_eq_rexp where "r ≤ s ≡ le_rexp r s"
definition less_rexp where "r < s ≡ le_rexp r s ∧ r ≠ s"

lemma le_rexp_Zero: "le_rexp r Zero ⟹ r = Zero"
by (induction r) auto

lemma le_rexp_refl: "le_rexp r r"
by (induction r) auto

lemma le_rexp_antisym: "⟦le_rexp r s; le_rexp s r⟧ ⟹ r = s"
by (induction r s rule: le_rexp.induct) (auto dest: le_rexp_Zero)

lemma le_rexp_trans: "⟦le_rexp r s; le_rexp s t⟧ ⟹ le_rexp r t"
proof (induction r s arbitrary: t rule: le_rexp.induct)
fix v t assume "le_rexp (Atom v) t" thus "le_rexp One t" by (cases t) auto
next
fix s1 s2 t assume "le_rexp (Plus s1 s2) t" thus "le_rexp One t" by (cases t) auto
next
fix s1 s2 t assume "le_rexp (Times s1 s2) t" thus "le_rexp One t" by (cases t) auto
next
fix s t assume "le_rexp (Star s) t" thus "le_rexp One t" by (cases t) auto
next
fix v u t assume "le_rexp (Atom v) (Atom u)" "le_rexp (Atom u) t"
thus "le_rexp (Atom v) t" by (cases t) auto
next
fix v s1 s2 t assume "le_rexp (Plus s1 s2) t" thus "le_rexp (Atom v) t" by (cases t) auto
next
fix v s1 s2 t assume "le_rexp (Times s1 s2) t" thus "le_rexp (Atom v) t" by (cases t) auto
next
fix v s t assume "le_rexp (Star s) t" thus "le_rexp (Atom v) t" by (cases t) auto
next
fix r s t
assume IH: "⋀t. le_rexp r s ⟹ le_rexp s t ⟹ le_rexp r t"
and "le_rexp (Star r) (Star s)" "le_rexp (Star s) t"
thus "le_rexp (Star r) t" by (cases t) auto
next
fix r s1 s2 t assume "le_rexp (Plus s1 s2) t" thus "le_rexp (Star r) t" by (cases t) auto
next
fix r s1 s2 t assume "le_rexp (Times s1 s2) t" thus "le_rexp (Star r) t" by (cases t) auto
next
fix r1 r2 s1 s2 t
assume "⋀t. r1 = s1 ⟹ le_rexp r2 s2 ⟹ le_rexp s2 t ⟹ le_rexp r2 t"
"⋀t. r1 ≠ s1 ⟹ le_rexp r1 s1 ⟹ le_rexp s1 t ⟹ le_rexp r1 t"
"le_rexp (Plus r1 r2) (Plus s1 s2)" "le_rexp (Plus s1 s2) t"
thus "le_rexp (Plus r1 r2) t" by (cases t) (auto split: if_split_asm intro: le_rexp_antisym)
next
fix r1 r2 s1 s2 t assume "le_rexp (Times s1 s2) t" thus "le_rexp (Plus r1 r2) t" by (cases t) auto
next
fix r1 r2 s1 s2 t
assume "⋀t. r1 = s1 ⟹ le_rexp r2 s2 ⟹ le_rexp s2 t ⟹ le_rexp r2 t"
"⋀t. r1 ≠ s1 ⟹ le_rexp r1 s1 ⟹ le_rexp s1 t ⟹ le_rexp r1 t"
"le_rexp (Times r1 r2) (Times s1 s2)" "le_rexp (Times s1 s2) t"
thus "le_rexp (Times r1 r2) t" by (cases t) (auto split: if_split_asm intro: le_rexp_antisym)
qed auto

instance proof
qed (auto simp add: less_eq_rexp_def less_rexp_def
intro: le_rexp_refl le_rexp_antisym le_rexp_trans)

end

instantiation rexp :: (linorder) "{linorder}"
begin

lemma le_rexp_total: "le_rexp (r :: 'a :: linorder rexp) s ∨ le_rexp s r"
by (induction r s rule: le_rexp.induct) auto

instance proof
qed (unfold less_eq_rexp_def less_rexp_def, rule le_rexp_total)

end

end
```