Theory Regex

(*<*)
theory Regex
  imports
    "MFOTL_Monitor.Trace"
    "HOL-Library.Extended_Nat"
begin

unbundle lattice_syntax
(*>*)

section ‹Regular expressions›

context begin

qualified datatype (atms: 'a) regex = Skip nat | Test 'a
  | Plus "'a regex" "'a regex" | Times "'a regex" "'a regex"  | Star "'a regex"

lemma finite_atms[simp]: "finite (atms r)"
  by (induct r) auto

definition "Wild = Skip 1"

lemma size_regex_estimation[termination_simp]: "x  atms r  y < f x  y < size_regex f r"
  by (induct r) auto

lemma size_regex_estimation'[termination_simp]: "x  atms r  y  f x  y  size_regex f r"
  by (induct r) auto

qualified definition "TimesL r S = Times r ` S"
qualified definition "TimesR R s = (λr. Times r s) ` R"

qualified primrec fv_regex where
  "fv_regex fv (Skip n) = {}"
| "fv_regex fv (Test φ) = fv φ"
| "fv_regex fv (Plus r s) = fv_regex fv r  fv_regex fv s"
| "fv_regex fv (Times r s) = fv_regex fv r  fv_regex fv s"
| "fv_regex fv (Star r) = fv_regex fv r"

lemma fv_regex_cong[fundef_cong]:
  "r = r'  (z. z  atms r  fv z = fv' z)  fv_regex fv r = fv_regex fv' r'"
  by (induct r arbitrary: r') auto

lemma finite_fv_regex[simp]: "(z. z  atms r  finite (fv z))  finite (fv_regex fv r)"
  by (induct r) auto

lemma fv_regex_commute:
  "(z. z  atms r  x  fv z  g x  fv' z)  x  fv_regex fv r  g x  fv_regex fv' r"
  by (induct r) auto

lemma fv_regex_alt: "fv_regex fv r = (z  atms r. fv z)"
  by (induct r) auto

qualified definition nfv_regex where
  "nfv_regex fv r = Max (insert 0 (Suc ` fv_regex fv r))"

lemma insert_Un: "insert x (A  B) = insert x A  insert x B"
  by auto

lemma nfv_regex_simps[simp]:
  assumes [simp]: "(z. z  atms r  finite (fv z))" "(z. z  atms s  finite (fv z))"
  shows
  "nfv_regex fv (Skip n) = 0"
  "nfv_regex fv (Test φ) = Max (insert 0 (Suc ` fv φ))"
  "nfv_regex fv (Plus r s) = max (nfv_regex fv r) (nfv_regex fv s)"
  "nfv_regex fv (Times r s) = max (nfv_regex fv r) (nfv_regex fv s)"
  "nfv_regex fv (Star r) = nfv_regex fv r"
  unfolding nfv_regex_def
  by (auto simp add: image_Un Max_Un insert_Un simp del: Un_insert_right Un_insert_left)

abbreviation "min_regex_default f r j  (if atms r = {} then j else Min ((λz. f z j) ` atms r))"

qualified primrec match :: "(nat  'a  bool)  'a regex  nat  nat  bool" where
  "match test (Skip n) = (λi j. j = i + n)"
| "match test (Test φ) = (λi j. i = j  test i φ)"
| "match test (Plus r s) = match test r  match test s"
| "match test (Times r s) = match test r OO match test s"
| "match test (Star r) = (match test r)**"

lemma match_cong[fundef_cong]:
  "r = r'  (i z. z  atms r  t i z = t' i z)  match t r = match t' r'"
  by (induct r arbitrary: r') auto

qualified primrec eps where
  "eps test i (Skip n) = (n = 0)"
| "eps test i (Test φ) = test i φ"
| "eps test i (Plus r s) = (eps test i r  eps test i s)"
| "eps test i (Times r s) = (eps test i r  eps test i s)"
| "eps test i (Star r) = True"

qualified primrec lpd where
  "lpd test i (Skip n) = (case n of 0  {} | Suc m  {Skip m})"
| "lpd test i (Test φ) = {}"
| "lpd test i (Plus r s) = (lpd test i r  lpd test i s)"
| "lpd test i (Times r s) = TimesR (lpd test i r) s  (if eps test i r then lpd test i s else {})"
| "lpd test i (Star r) = TimesR (lpd test i r) (Star r)"

qualified primrec lpdκ where
  "lpdκ κ test i (Skip n) = (case n of 0  {} | Suc m  {κ (Skip m)})"
| "lpdκ κ test i (Test φ) = {}"
| "lpdκ κ test i (Plus r s) = lpdκ κ test i r  lpdκ κ test i s"
| "lpdκ κ test i (Times r s) = lpdκ (λt. κ (Times t s)) test i r  (if eps test i r then lpdκ κ test i s else {})"
| "lpdκ κ test i (Star r) = lpdκ (λt. κ (Times t (Star r))) test i r"

qualified primrec rpd where
  "rpd test i (Skip n) = (case n of 0  {} | Suc m  {Skip m})"
| "rpd test i (Test φ) = {}"
| "rpd test i (Plus r s) = (rpd test i r  rpd test i s)"
| "rpd test i (Times r s) = TimesL r (rpd test i s)  (if eps test i s then rpd test i r else {})"
| "rpd test i (Star r) = TimesL (Star r) (rpd test i r)"

qualified primrec rpdκ where
  "rpdκ κ test i (Skip n) = (case n of 0  {} | Suc m  {κ (Skip m)})"
| "rpdκ κ test i (Test φ) = {}"
| "rpdκ κ test i (Plus r s) = rpdκ κ test i r  rpdκ κ test i s"
| "rpdκ κ test i (Times r s) = rpdκ (λt. κ (Times r t)) test i s  (if eps test i s then rpdκ κ test i r else {})"
| "rpdκ κ test i (Star r) = rpdκ (λt. κ (Times (Star r) t)) test i r"

lemma lpdκ_lpd: "lpdκ κ test i r = κ ` lpd test i r"
  by (induct r arbitrary: κ) (auto simp: TimesR_def split: nat.splits)

lemma rpdκ_rpd: "rpdκ κ test i r = κ ` rpd test i r"
  by (induct r arbitrary: κ) (auto simp: TimesL_def split: nat.splits)

lemma match_le: "match test r i j  i  j"
proof (induction r arbitrary: i j)
  case (Times r s)
  then show ?case using order.trans by fastforce
next
  case (Star r)
  from Star.prems show ?case
    unfolding match.simps by (induct i j rule: rtranclp.induct) (force dest: Star.IH)+
qed auto

lemma match_rtranclp_le: "(match test r)** i j  i  j"
  by (metis match.simps(5) match_le)

lemma eps_match: "eps test i r  match test r i i"
  by (induction r) (auto dest: antisym[OF match_le match_le])

lemma lpd_match: "i < j  match test r i j  (s  lpd test i r. match test s) (i + 1) j"
proof (induction r arbitrary: i j)
  case (Times r s)
  have "match test (Times r s) i j  (k. match test r i k  match test s k j)"
    by auto
  also have "  match test r i i  match test s i j 
    (k>i. match test r i k  match test s k j)"
    using match_le[of test r i] nat_less_le by auto
  also have "  match test r i i  (t  lpd test i s. match test t) (i + 1) j 
    (k>i. (t  lpd test i r. match test t) (i + 1) k  match test s k j)"
    using Times.IH(1) Times.IH(2)[OF Times.prems] by metis
  also have "  match test r i i  (t  lpd test i s. match test t) (i + 1) j 
    (k. (t  lpd test i r. match test t) (i + 1) k  match test s k j)"
    using Times.prems by (intro disj_cong[OF refl] iff_exI) (auto dest: match_le)
  also have "  ( (match test ` lpd test i (Times r s))) (i + 1) j"
    by (force simp: TimesL_def TimesR_def eps_match)
  finally show ?case .
next
  case (Star r)
  have "slpd test i r. (match test s OO (match test r)**) (i + 1) j" if "(match test r)** i j"
    using that Star.prems match_le[of test _ "i + 1"]
  proof (induct rule: converse_rtranclp_induct)
    case (step i k)
    then show ?case
      by (cases "i < k") (auto simp: not_less Star.IH dest: match_le)
  qed simp
  with Star.prems show ?case using match_le[of test _  "i + 1"]
    by (auto simp: TimesL_def TimesR_def Suc_le_eq Star.IH[of i]
      elim!: converse_rtranclp_into_rtranclp[rotated])
qed (auto split: nat.splits)

lemma rpd_match: "i < j  match test r i j  (s  rpd test j r. match test s) i (j - 1)"
proof (induction r arbitrary: i j)
  case (Times r s)
  have "match test (Times r s) i j  (k. match test r i k  match test s k j)"
    by auto
  also have "  match test r i j  match test s j j 
    (k<j. match test r i k  match test s k j)"
    using match_le[of test s _ j] nat_less_le by auto
  also have "  (t  rpd test j r. match test t) i (j - 1)  match test s j j  
    (k<j. match test r i k  (t  rpd test j s. match test t) k (j - 1))"
    using Times.IH(1)[OF Times.prems] Times.IH(2) by metis
  also have "  (t  rpd test j r. match test t) i (j - 1)  match test s j j  
    (k. match test r i k  (t  rpd test j s. match test t) k (j - 1))"
    using Times.prems by (intro disj_cong[OF refl] iff_exI) (auto dest: match_le)
  also have "  ( (match test ` rpd test j (Times r s))) i (j - 1)"
    by (force simp: TimesL_def TimesR_def eps_match)
  finally show ?case .
next
  case (Star r)
  have "srpd test j r. ((match test r)** OO match test s) i (j - 1)" if "(match test r)** i j"
    using that Star.prems match_le[of test _ _ "j - 1"]
  proof (induct rule: rtranclp_induct)
    case (step k j)
    then show ?case
      by (cases "k < j") (auto simp: not_less Star.IH dest: match_le)
  qed simp
  with Star.prems show ?case
    by (auto 0 3 simp: TimesL_def TimesR_def intro: Star.IH[of _ j, THEN iffD2]
      elim!: rtranclp.rtrancl_into_rtrancl dest: match_le[of test _ _ "j - 1", unfolded One_nat_def])
qed (auto split: nat.splits)

lemma lpd_fv_regex: "s  lpd test i r  fv_regex fv s  fv_regex fv r"
  by (induct r arbitrary: s) (auto simp: TimesR_def TimesL_def split: if_splits nat.splits)+

lemma rpd_fv_regex: "s  rpd test i r  fv_regex fv s  fv_regex fv r"
  by (induct r arbitrary: s) (auto simp: TimesR_def TimesL_def split: if_splits nat.splits)+

lemma match_fv_cong:
  "(i x. x  atms r  test i x = test' i x)  match test r = match test' r"
  by (induct r) auto

lemma eps_fv_cong:
  "(i x. x  atms r  test i x = test' i x)  eps test i r = eps test' i r"
  by (induct r) auto

datatype modality = Past | Futu
datatype safety = Strict | Lax

context
  fixes fv :: "'a  'b set"
  and safe :: "safety  'a  bool"
begin

qualified fun safe_regex :: "modality  safety  'a regex  bool" where
  "safe_regex m _ (Skip n) = True"
| "safe_regex m g (Test φ) = safe g φ"
| "safe_regex m g (Plus r s) = ((g = Lax  fv_regex fv r = fv_regex fv s)  safe_regex m g r  safe_regex m g s)"
| "safe_regex Futu g (Times r s) =
    ((g = Lax  fv_regex fv r  fv_regex fv s)  safe_regex Futu g s  safe_regex Futu Lax r)"
| "safe_regex Past g (Times r s) =
    ((g = Lax  fv_regex fv s  fv_regex fv r)  safe_regex Past g r  safe_regex Past Lax s)"
| "safe_regex m g (Star r) = ((g = Lax  fv_regex fv r = {})  safe_regex m g r)"

lemmas safe_regex_induct = safe_regex.induct[case_names Skip Test Plus TimesF TimesP Star]

lemma safe_cosafe:
  "(x. x  atms r  safe Strict x  safe Lax x)  safe_regex m Strict r  safe_regex m Lax r"
  by (induct r; cases m) auto

lemma safe_lpd_fv_regex_le: "safe_regex Futu Strict r  s  lpd test i r  fv_regex fv r  fv_regex fv s"
  by (induct r) (auto simp: TimesR_def split: if_splits)

lemma safe_lpd_fv_regex: "safe_regex Futu Strict r  s  lpd test i r  fv_regex fv s = fv_regex fv r"
  by (simp add: eq_iff lpd_fv_regex safe_lpd_fv_regex_le)

lemma cosafe_lpd: "safe_regex Futu Lax r  s  lpd test i r  safe_regex Futu Lax s"
proof (induct r arbitrary: s)
  case (Plus r1 r2)
  from Plus(3,4) show ?case
    by (auto elim: Plus(1,2))
next
  case (Times r1 r2)
  from Times(3,4) show ?case
    by (auto simp: TimesR_def elim: Times(1,2) split: if_splits)
qed (auto simp: TimesR_def split: nat.splits)

lemma safe_lpd: "(x  atms r. safe Strict x  safe Lax x) 
  safe_regex Futu Strict r  s  lpd test i r  safe_regex Futu Strict s"
proof (induct r arbitrary: s)
  case (Plus r1 r2)
  from Plus(3,4,5) show ?case
    by (auto elim: Plus(1,2) simp: ball_Un)
next
  case (Times r1 r2)
  from Times(3,4,5) show ?case
    by (force simp: TimesR_def ball_Un elim: Times(1,2) cosafe_lpd dest: lpd_fv_regex split: if_splits)
next
  case (Star r)
  from Star(2,3,4) show ?case
    by (force simp: TimesR_def elim: Star(1) cosafe_lpd
      dest: safe_cosafe[rotated] lpd_fv_regex[where fv=fv] split: if_splits)
qed (auto split: nat.splits)

lemma safe_rpd_fv_regex_le: "safe_regex Past Strict r  s  rpd test i r  fv_regex fv r  fv_regex fv s"
  by (induct r) (auto simp: TimesL_def split: if_splits)

lemma safe_rpd_fv_regex: "safe_regex Past Strict r  s  rpd test i r  fv_regex fv s = fv_regex fv r"
  by (simp add: eq_iff rpd_fv_regex safe_rpd_fv_regex_le)

lemma cosafe_rpd: "safe_regex Past Lax r  s  rpd test i r  safe_regex Past Lax s"
proof (induct r arbitrary: s)
  case (Plus r1 r2)
  from Plus(3,4) show ?case
    by (auto elim: Plus(1,2))
next
  case (Times r1 r2)
  from Times(3,4) show ?case
    by (auto simp: TimesL_def elim: Times(1,2) split: if_splits)
qed (auto simp: TimesL_def split: nat.splits)

lemma safe_rpd: "(x  atms r. safe Strict x  safe Lax x) 
  safe_regex Past Strict r  s  rpd test i r  safe_regex Past Strict s"
proof (induct r arbitrary: s)
  case (Plus r1 r2)
  from Plus(3,4,5) show ?case
    by (auto elim: Plus(1,2) simp: ball_Un)
next
  case (Times r1 r2)
  from Times(3,4,5) show ?case
    by (force simp: TimesL_def ball_Un elim: Times(1,2) cosafe_rpd dest: rpd_fv_regex split: if_splits)
next
  case (Star r)
  from Star(2,3,4) show ?case
    by (force simp: TimesL_def elim: Star(1) cosafe_rpd
      dest: safe_cosafe[rotated] rpd_fv_regex[where fv=fv] split: if_splits)
qed (auto split: nat.splits)

lemma safe_regex_safe: "(g r. safe g r  safe Lax r) 
  safe_regex m g r  x  atms r  safe Lax x"
  by (induct m g r rule: safe_regex.induct) auto

lemma safe_regex_map_regex:
  "(g x. x  atms r  safe g x   safe g (f x))  (x. x  atms r  fv (f x) = fv x) 
   safe_regex m g r  safe_regex m g (map_regex f r)"
  by (induct m g r rule: safe_regex.induct) (auto simp: fv_regex_alt regex.set_map)

end

lemma safe_regex_cong[fundef_cong]:
  "(g x. x  atms r  safe g x = safe' g x) 
  Regex.safe_regex fv safe m g r = Regex.safe_regex fv safe' m g r"
  by (induct m g r rule: safe_regex.induct) auto

lemma safe_regex_mono:
  "(g x. x  atms r  safe g x  safe' g x) 
  Regex.safe_regex fv safe m g r  Regex.safe_regex fv safe' m g r"
  by (induct m g r rule: safe_regex.induct) auto

lemma match_map_regex: "match t (map_regex f r) = match (λk z. t k (f z)) r"
  by (induct r) auto

lemma match_cong_strong:
  "(k z. k  {i ..< j + 1}  z  atms r  t k z = t' k z)  match t r i j = match t' r i j"
proof (induction r arbitrary: i j)
  case (Times r s)
  from Times.prems show ?case
    by (auto 0 4 simp: relcompp_apply intro: le_less_trans match_le less_Suc_eq_le
      dest: Times.IH[THEN iffD1, rotated -1] Times.IH[THEN iffD2, rotated -1] match_le)
next
  case (Star r)
  show ?case unfolding match.simps
  proof (rule iffI)
    assume *: "(match t r)** i j"
    then have "i  j" unfolding match.simps(5)[symmetric]
      by (rule match_le)
    with * show "(match t' r)** i j" using Star.prems
    proof (induction i j rule: rtranclp.induct)
      case (rtrancl_into_rtrancl a b c)
      from rtrancl_into_rtrancl(1,2,4,5) show ?case
        by (intro rtranclp.rtrancl_into_rtrancl[OF rtrancl_into_rtrancl.IH])
          (auto dest!: Star.IH[THEN iffD1, rotated -1]
            dest: match_le match_rtranclp_le simp: less_Suc_eq_le)
    qed simp
  next
    assume *: "(match t' r)** i j"
    then have "i  j" unfolding match.simps(5)[symmetric]
      by (rule match_le)
    with * show "(match t r)** i j" using Star.prems
    proof (induction i j rule: rtranclp.induct)
      case (rtrancl_into_rtrancl a b c)
      from rtrancl_into_rtrancl(1,2,4,5) show ?case
        by (intro rtranclp.rtrancl_into_rtrancl[OF rtrancl_into_rtrancl.IH])
          (auto dest!: Star.IH[THEN iffD2, rotated -1]
            dest: match_le match_rtranclp_le simp: less_Suc_eq_le)
    qed simp
  qed
qed auto

end

(*<*)
end
(*>*)