Theory Parikh_Img

(* Authors: Fabian Lehr *)

section ‹Parikh images›

theory Parikh_Img
  imports 
    "Reg_Lang_Exp"
    "HOL-Library.Multiset"
begin


subsection ‹Definition and basic lemmas›

text ‹The Parikh vector of a finite word describes how often each symbol of the alphabet occurs in the word.
We represent parikh vectors by multisets. The Parikh image of a language L›, denoted by Ψ L›,
is then the set of Parikh vectors of all words in the language.›

definition parikh_img :: "'a lang  'a multiset set" where
  "parikh_img L  mset ` L"

notation parikh_img ("Ψ")

lemma parikh_img_Un [simp]: "Ψ (L1  L2) = Ψ L1  Ψ L2"
  by (auto simp add: parikh_img_def)

lemma parikh_img_UNION: "Ψ ((L ` I)) =  ((λi. Ψ (L i)) ` I)"
  by (auto simp add: parikh_img_def)

lemma parikh_img_conc: "Ψ (L1 @@ L2) = { m1 + m2 | m1 m2. m1  Ψ L1  m2  Ψ L2 }"
  unfolding parikh_img_def by force

lemma parikh_img_commut: "Ψ (L1 @@ L2) = Ψ (L2 @@ L1)"
proof -
  have "{ m1 + m2 | m1 m2. m1  Ψ L1  m2  Ψ L2 } = 
        { m2 + m1 | m1 m2. m1  Ψ L1  m2  Ψ L2 }"
    using add.commute by blast
  then show ?thesis
    using parikh_img_conc[of L1] parikh_img_conc[of L2] by auto
qed


subsection ‹Monotonicity properties›

lemma parikh_img_mono: "A  B  Ψ A  Ψ B"
  unfolding parikh_img_def by fast

lemma parikh_conc_right_subset: "Ψ A  Ψ B  Ψ (A @@ C)  Ψ (B @@ C)"
  by (auto simp add: parikh_img_conc)

lemma parikh_conc_left_subset: "Ψ A  Ψ B  Ψ (C @@ A)  Ψ (C @@ B)"
  by (auto simp add: parikh_img_conc)

lemma parikh_conc_subset:
  assumes "Ψ A  Ψ C"
      and "Ψ B  Ψ D"
    shows "Ψ (A @@ B)  Ψ (C @@ D)"
  using assms parikh_conc_right_subset parikh_conc_left_subset by blast

lemma parikh_conc_right: "Ψ A = Ψ B  Ψ (A @@ C) = Ψ (B @@ C)"
  by (auto simp add: parikh_img_conc)

lemma parikh_conc_left: "Ψ A = Ψ B  Ψ (C @@ A) = Ψ (C @@ B)"
  by (auto simp add: parikh_img_conc)

lemma parikh_pow_mono: "Ψ A  Ψ B  Ψ (A ^^ n)  Ψ (B ^^ n)"
  by (induction n) (auto simp add: parikh_img_conc)


lemma parikh_star_mono:
  assumes "Ψ A  Ψ B"
  shows "Ψ (star A)  Ψ (star B)"
proof
  fix v
  assume "v  Ψ (star A)"
  then obtain w where w_intro: "mset w = v  w  star A" unfolding parikh_img_def by blast
  then obtain n where "w  A ^^ n" unfolding star_def by blast
  then have "v  Ψ (A ^^ n)" using w_intro unfolding parikh_img_def by blast
  with assms have "v  Ψ (B ^^ n)" using parikh_pow_mono by blast
  then show "v  Ψ (star B)" unfolding star_def using parikh_img_UNION by fastforce
qed

lemma parikh_star_mono_eq:
  assumes "Ψ A = Ψ B"
  shows "Ψ (star A) = Ψ (star B)"
  using parikh_star_mono by (metis Orderings.order_eq_iff assms)


lemma parikh_img_subst_mono:
  assumes "i. Ψ (eval (A i) v)  Ψ (eval (B i) v)"
  shows "Ψ (eval (subst A f) v)  Ψ (eval (subst B f) v)"
proof (induction f)
  case (Concat f1 f2)
  then have "Ψ (eval (subst A f1) v @@ eval (subst A f2) v)
               Ψ (eval (subst B f1) v @@ eval (subst B f2) v)"
    using parikh_conc_subset by blast
  then show ?case by simp
next
  case (Star f)
  then have "Ψ (star (eval (subst A f) v))  Ψ (star (eval (subst B f) v))"
    using parikh_star_mono by blast
  then show ?case by simp
qed (use assms(1) in auto)

lemma parikh_img_subst_mono_upd:
  assumes "Ψ (eval A v)  Ψ (eval B v)"
  shows "Ψ (eval (subst (Var(x := A)) f) v)  Ψ (eval (subst (Var(x := B)) f) v)"
  using parikh_img_subst_mono[of "Var(x := A)" v "Var(x := B)"] assms by auto

lemma rlexp_mono_parikh:
  assumes "i  vars f. Ψ (v i)  Ψ (v' i)"
  shows "Ψ (eval f v)  Ψ (eval f v')"
using assms proof (induction f rule: rlexp.induct)
case (Concat f1 f2)
  then have "Ψ (eval f1 v @@ eval f2 v)  Ψ (eval f1 v' @@ eval f2 v')"
    using parikh_conc_subset by (metis UnCI vars.simps(4))
  then show ?case by simp
qed (auto simp add: SUP_mono' parikh_img_UNION parikh_star_mono)

lemma rlexp_mono_parikh_eq:
  assumes "i  vars f. Ψ (v i) = Ψ (v' i)"
  shows "Ψ (eval f v) = Ψ (eval f v')"
  using assms rlexp_mono_parikh by blast



subsection ‹$\Psi \; (A \cup B)^* = \Psi \; A^* B^*$›

text ‹\label{sec:parikh_img_star}›
text ‹This property is claimed by Pilling in citePilling and will be needed later.›

lemma parikh_img_union_pow_aux1:
  assumes "v  Ψ ((A  B) ^^ n)"
    shows "v  Ψ (i  n. A ^^ i @@ B ^^ (n-i))"
using assms proof (induction n arbitrary: v)
  case 0
  then show ?case by simp
next
  case (Suc n)
  then obtain w where w_intro: "w  (A  B) ^^ (Suc n)  mset w = v"
    unfolding parikh_img_def by auto
  then obtain w1 w2 where w1_w2_intro: "w = w1@w2  w1  A  B  w2  (A  B) ^^ n" by fastforce
  let ?v1 = "mset w1" and ?v2 = "mset w2"
  from w1_w2_intro have "?v2  Ψ ((A  B) ^^ n)" unfolding parikh_img_def by blast
  with Suc.IH have "?v2  Ψ (i  n. A ^^ i @@ B ^^ (n-i))" by auto
  then obtain w2' where w2'_intro: "mset w2' = mset w2 
      w2'  (i  n. A ^^ i @@ B ^^ (n-i))" unfolding parikh_img_def by fastforce
  then obtain i where i_intro: "i  n  w2'  A ^^ i @@ B ^^ (n-i)" by blast
  from w1_w2_intro w2'_intro have "mset w = mset (w1@w2')"
    by simp
  moreover have "mset (w1@w2')  Ψ (i  Suc n. A ^^ i @@ B ^^ (Suc n-i))"
  proof (cases "w1  A")
    case True
    with i_intro have Suc_i_valid: "Suc i  Suc n" and "w1@w2'  A ^^ (Suc i) @@ B ^^ (Suc n - Suc i)"
      by (auto simp add: conc_assoc)
    then have "mset (w1@w2')  Ψ (A ^^ (Suc i) @@ B ^^ (Suc n - Suc i))"
      unfolding parikh_img_def by blast
    with Suc_i_valid parikh_img_UNION show ?thesis by fast
  next
    case False
    with w1_w2_intro have "w1  B" by blast
    with i_intro have "mset (w1@w2')  Ψ (B @@ A ^^ i @@ B ^^ (n-i))"
      unfolding parikh_img_def by blast
    then have "mset (w1@w2')  Ψ (A ^^ i @@ B ^^ (Suc n-i))"
      using parikh_img_commut conc_assoc
      by (metis Suc_diff_le conc_pow_comm i_intro lang_pow.simps(2))
    with i_intro parikh_img_UNION show ?thesis by fastforce
  qed
  ultimately show ?case using w_intro by auto
qed

lemma parikh_img_star_aux1:
  assumes "v  Ψ (star (A  B))"
  shows "v  Ψ (star A @@ star B)"
proof -
  from assms have "v  (n. Ψ ((A  B) ^^ n))"
    unfolding star_def using parikh_img_UNION by metis
  then obtain n where "v  Ψ ((A  B) ^^ n)" by blast
  then have "v  Ψ (i  n. A ^^ i @@ B ^^ (n-i))"
    using parikh_img_union_pow_aux1 by auto
  then have "v  (in. Ψ (A ^^ i @@ B ^^ (n-i)))" using parikh_img_UNION by metis
  then obtain i where "in  v  Ψ (A ^^ i @@ B ^^ (n-i))" by blast
  then obtain w where w_intro: "mset w = v  w  A ^^ i @@ B ^^ (n-i)"
    unfolding parikh_img_def by blast
  then obtain w1 w2 where w_decomp: "w=w1@w2  w1  A ^^ i  w2  B ^^ (n-i)" by blast
  then have "w1  star A" and "w2  star B" by auto
  with w_decomp have "w  star A @@ star B" by auto
  with w_intro show ?thesis unfolding parikh_img_def by blast
qed

lemma parikh_img_star_aux2:
  assumes "v  Ψ (star A @@ star B)"
  shows "v  Ψ (star (A  B))"
proof -
  from assms obtain w where w_intro: "mset w = v  w  star A @@ star B"
    unfolding parikh_img_def by blast
  then obtain w1 w2 where w_decomp: "w=w1@w2  w1  star A  w2  star B" by blast
  then obtain i j where "w1  A ^^ i" and w2_intro: "w2  B ^^ j" unfolding star_def by blast
  then have w1_in_union: "w1  (A  B) ^^ i" using lang_pow_mono by blast
  from w2_intro have "w2  (A  B) ^^ j" using lang_pow_mono by blast
  with w1_in_union w_decomp have "w  (A  B) ^^ (i+j)" using lang_pow_add by fast
  with w_intro show ?thesis unfolding parikh_img_def by auto
qed

lemma parikh_img_star: "Ψ (star (A  B)) = Ψ (star A @@ star B)"
proof
  show "Ψ (star (A  B))  Ψ (star A @@ star B)" using parikh_img_star_aux1 by auto
  show "Ψ (star A @@ star B)  Ψ (star (A  B))" using parikh_img_star_aux2 by auto
qed



subsection ‹$\Psi \; (E^* F)^* = \Psi \; \left(\{\varepsilon\} \cup E^* F^* F\right)$›

text ‹\label{sec:parikh_img_star2}›
text ‹This property (where $\varepsilon$ denotes the empty word) is claimed by
Pilling as well citePilling; we will use it later.›

(* It even holds = but ⊆ suffices for the proof *)
lemma parikh_img_conc_pow: "Ψ ((A @@ B) ^^ n)  Ψ (A ^^ n @@ B ^^ n)"
proof (induction n)
  case (Suc n)
  then have "Ψ ((A @@ B) ^^ n @@ A @@ B)  Ψ (A ^^ n @@ B ^^ n @@ A @@ B)"
    using parikh_conc_right_subset conc_assoc by metis
  also have " = Ψ (A ^^ n @@ A @@ B ^^ n @@ B)"
    by (metis parikh_img_commut conc_assoc parikh_conc_left)
  finally show ?case by (simp add: conc_assoc conc_pow_comm)
qed simp

lemma parikh_img_conc_star: "Ψ (star (A @@ B))  Ψ (star A @@ star B)"
proof
  fix v
  assume "v  Ψ (star (A @@ B))"
  then have "n. v  Ψ ((A @@ B) ^^ n)" unfolding star_def by (simp add: parikh_img_UNION)
  then obtain n where "v  Ψ ((A @@ B) ^^ n)" by blast
  with parikh_img_conc_pow have "v  Ψ (A ^^ n @@ B ^^ n)" by fast
  then have "v  Ψ (A ^^ n @@ star B)"
    unfolding star_def using parikh_conc_left_subset
    by (metis (no_types, lifting) Sup_upper parikh_img_mono rangeI subset_eq)
  then show "v  Ψ (star A @@ star B)"
    unfolding star_def using parikh_conc_right_subset
    by (metis (no_types, lifting) Sup_upper parikh_img_mono rangeI subset_eq)
qed

lemma parikh_img_conc_pow2: "Ψ ((A @@ B) ^^ Suc n)  Ψ (star A @@ star B @@ B)"
proof
  fix v
  assume "v  Ψ ((A @@ B) ^^ Suc n)"
  with parikh_img_conc_pow have "v  Ψ (A ^^ Suc n @@ B ^^ n @@ B)"
    by (metis conc_pow_comm lang_pow.simps(2) subsetD)
  then have "v  Ψ (star A @@ B ^^ n @@ B)"
    unfolding star_def using parikh_conc_right_subset
    by (metis (no_types, lifting) Sup_upper parikh_img_mono rangeI subset_eq)
  then show "v  Ψ (star A @@ star B @@ B)"
    unfolding star_def using parikh_conc_right_subset parikh_conc_left_subset
    by (metis (no_types, lifting) Sup_upper parikh_img_mono rangeI subset_eq)
qed

lemma parikh_img_star2_aux1:
  "Ψ (star (star E @@ F))  Ψ ({[]}  star E @@ star F @@ F)"
proof
  fix v
  assume "v  Ψ (star (star E @@ F))"
  then have "n. v  Ψ ((star E @@ F) ^^ n)"
    unfolding star_def by (simp add: parikh_img_UNION)
  then obtain n where v_in_pow_n: "v  Ψ ((star E @@ F) ^^ n)" by blast
  show "v  Ψ ({[]}  star E @@ star F @@ F)"
  proof (cases n)
    case 0
    with v_in_pow_n have "v = mset []" unfolding parikh_img_def by simp
    then show ?thesis unfolding parikh_img_def by blast
  next
    case (Suc m)
    with parikh_img_conc_pow2 v_in_pow_n have "v  Ψ (star (star E) @@ star F @@ F)" by blast
    then show ?thesis by (metis UnCI parikh_img_Un star_idemp)
  qed
qed

lemma parikh_img_star2_aux2: "Ψ (star E @@ star F @@ F)  Ψ (star (star E @@ F))"
proof -
  have "F  star E @@ F" unfolding star_def using Nil_in_star
    by (metis concI_if_Nil1 star_def subsetI)
  then have "Ψ (star E @@ F @@ star F)  Ψ (star E @@ F @@ star (star E @@ F))"
    using parikh_conc_left_subset parikh_img_mono parikh_star_mono by meson
  also have "  Ψ (star (star E @@ F))"
    by (metis conc_assoc inf_sup_ord(3) parikh_img_mono star_unfold_left)
  finally show ?thesis using conc_star_comm by metis
qed

lemma parikh_img_star2: "Ψ (star (star E @@ F)) = Ψ ({[]}  star E @@ star F @@ F)"
proof
  from parikh_img_star2_aux1
    show "Ψ (star (star E @@ F))  Ψ ({[]}  star E @@ star F @@ F)" .
  from parikh_img_star2_aux2
    show "Ψ ({[]}  star E @@ star F @@ F)  Ψ (star (star E @@ F))"
    by (metis le_sup_iff parikh_img_Un star_unfold_left sup.cobounded2)
qed



subsection ‹A homogeneous-like property for regular language expressions›

text ‹\label{sec:rlexp_homogeneous}›

lemma rlexp_homogeneous_aux:
  assumes "v x = star Y @@ Z"
    shows "Ψ (eval f v)  Ψ (star Y @@ eval f (v(x := Z)))"
proof (induction f)
  case (Var y)
  show ?case
  proof (cases "x = y")
    case True
    with Var assms show ?thesis by simp
  next
    case False
    have "eval (Var y) v  star Y @@ eval (Var y) v" by (metis Nil_in_star concI_if_Nil1 subsetI)
    with False parikh_img_mono show ?thesis by auto
  qed
next
  case (Const l)
  have "eval (Const l) v  star Y @@ eval (Const l) v" using concI_if_Nil1 by blast
  then show ?case by (simp add: parikh_img_mono)
next
  case (Union f g)
  then have "Ψ (eval (Union f g) v)  Ψ (star Y @@ eval f (v(x := Z)) 
                                                            star Y @@ eval g (v(x := Z)))"
    by (metis eval.simps(3) parikh_img_Un sup.mono)
  then show ?case by (metis conc_Un_distrib(1) eval.simps(3))
next
  case (Concat f g)
  then have "Ψ (eval (Concat f g) v)  Ψ ((star Y @@ eval f (v(x := Z)))
                                                          @@ star Y @@ eval g (v(x := Z)))"
    by (metis eval.simps(4) parikh_conc_subset)
  also have " = Ψ (star Y @@ star Y @@ eval f (v(x := Z)) @@ eval g (v(x := Z)))"
    by (metis conc_assoc parikh_conc_right parikh_img_commut)
  also have " = Ψ (star Y @@ eval f (v(x := Z)) @@ eval g (v(x := Z)))"
    by (metis conc_assoc conc_star_star)
  finally show ?case by (metis eval.simps(4))
next
  case (Star f)
  then have "Ψ (star (eval f v))  Ψ (star (star Y @@ eval f (v(x := Z))))"
    using parikh_star_mono by metis
  also from parikh_img_conc_star have "  Ψ (star Y @@ star (eval f (v(x := Z))))"
    by fastforce
  finally show ?case by (metis eval.simps(5))
qed

text ‹Now we can prove the desired homogeneous-like property which will become useful later.
Notably this property slightly differs from the property claimed in citePilling. However, our
property is easier to prove formally and it suffices for the rest of the proof.›
lemma rlexp_homogeneous:  "Ψ (eval (subst (Var(x := Concat (Star y) z)) f) v)
                           Ψ (eval (Concat (Star y) (subst (Var(x := z)) f)) v)"
                          (is "Ψ ?L  Ψ ?R")
proof -
  let ?v' = "v(x := star (eval y v) @@ eval z v)"
  have "Ψ ?L = Ψ (eval f ?v')" using substitution_lemma_upd[where f=f] by simp
  also have "  Ψ (star (eval y v) @@ eval f (?v'(x := eval z v)))"
    using rlexp_homogeneous_aux[of ?v'] unfolding fun_upd_def by auto
  also have " = Ψ ?R" using substitution_lemma[of "v(x := eval z v)"] by simp
  finally show ?thesis .
qed


subsection ‹Extension of Arden's lemma to Parikh images›

text ‹\label{sec:parikh_arden}›

lemma parikh_img_arden_aux:
  assumes "Ψ (A @@ X  B)  Ψ X"
  shows "Ψ (A ^^ n @@ B)  Ψ X"
proof (induction n)
  case 0
  with assms show ?case by auto
next
  case (Suc n)
  then have "Ψ (A ^^ (Suc n) @@ B)  Ψ (A @@ A ^^ n @@B)"
    by (simp add: conc_assoc)
  moreover from Suc parikh_conc_left have "  Ψ (A @@ X)"
    by (metis conc_Un_distrib(1) parikh_img_Un sup.orderE sup.orderI)
  moreover from Suc.prems assms have "  Ψ X" by auto
  ultimately show ?case by fast
qed

lemma parikh_img_arden:
  assumes "Ψ (A @@ X  B)  Ψ X"
  shows "Ψ (star A @@ B)  Ψ X"
proof
  fix x
  assume "x  Ψ (star A @@ B)"
  then have "n. x  Ψ (A ^^ n @@ B)"
    unfolding star_def by (simp add: conc_UNION_distrib(2) parikh_img_UNION)
  then obtain n where "x  Ψ (A ^^ n @@ B)" by blast
  then show "x  Ψ X" using parikh_img_arden_aux[OF assms] by fast
qed


subsection ‹Equivalence class of languages with identical Parikh image›

text ‹\label{sec:parikh_eq_class}›
text ‹For a given language L›, we define the equivalence class of all languages with identical Parikh
image:›

definition parikh_img_eq_class :: "'a lang  'a lang set" where
  "parikh_img_eq_class L  {L'. Ψ L' = Ψ L}"

lemma parikh_img_Union_class: "Ψ A = Ψ ((parikh_img_eq_class A))"
proof
  let ?A' = "(parikh_img_eq_class A)"
  show "Ψ A  Ψ ?A'"
    unfolding parikh_img_eq_class_def by (simp add: Union_upper parikh_img_mono)
  show "Ψ ?A'  Ψ A"
  proof
    fix v
    assume "v  Ψ ?A'"
    then obtain a where a_intro: "mset a = v  a  ?A'"
      unfolding parikh_img_def by blast
    then obtain L where L_intro: "a  L  L  parikh_img_eq_class A"
      unfolding parikh_img_eq_class_def by blast
    then have "Ψ L = Ψ A" unfolding parikh_img_eq_class_def by fastforce
    with a_intro L_intro show "v  Ψ A" unfolding parikh_img_def by blast
  qed
qed

lemma subseteq_comm_subseteq:
  assumes "Ψ A  Ψ B"
  shows "A  (parikh_img_eq_class B)" (is "A  ?B'")
proof
  fix a
  assume a_in_A: "a  A"
  from assms have "Ψ A  Ψ ?B'"
    using parikh_img_Union_class by blast
  with a_in_A have vec_a_in_B': "mset a  Ψ ?B'" unfolding parikh_img_def by fast
  then have "b. mset b = mset a  b  ?B'"
    unfolding parikh_img_def by fastforce
  then obtain b where b_intro: "mset b = mset a  b  ?B'" by blast
  with vec_a_in_B' have "Ψ (?B'  {a}) = Ψ ?B'"unfolding parikh_img_def by blast
  with parikh_img_Union_class have "Ψ (?B'  {a}) = Ψ B" by blast
  then show "a  ?B'" unfolding parikh_img_eq_class_def by blast
qed

end