Theory Distributed_Distinct_Elements_Accuracy

section ‹Accuracy with cutoff\label{sec:accuracy}›

text ‹This section verifies that each of the $l$ estimate have the required accuracy with high
probability assuming as long as the cutoff is below @{term "q_max"}, generalizing the result from
Section~\ref{sec:accuracy_wo_cutoff}.›

theory Distributed_Distinct_Elements_Accuracy
  imports
    Distributed_Distinct_Elements_Accuracy_Without_Cutoff
    Distributed_Distinct_Elements_Cutoff_Level
begin

unbundle intro_cong_syntax

lemma (in semilattice_set) Union:
  assumes "finite I" "I  {}"
  assumes "i. i  I  finite (Z i)"
  assumes "i. i  I  Z i  {}"
  shows "F ( (Z ` I)) = F ((λi. (F (Z i))) ` I)"
  using assms(1,2,3,4)
proof (induction I rule:finite_ne_induct)
  case (singleton x)
  then show ?case by simp
next
  case (insert x I)
  have "F ( (Z ` insert x I)) = F ((Z x)  ( (Z ` I)))"
    by simp
  also have "... = f (F (Z x)) (F ( (Z ` I)))"
    using insert by (intro union finite_UN_I) auto
  also have "... = f (F {F (Z x)}) (F ((λi. F (Z i)) ` I))"
    using insert(5,6) by (subst insert(4)) auto
  also have "... = F ({F (Z x)}  (λi. F (Z i)) ` I)"
    using insert(1,2) by (intro union[symmetric] finite_imageI) auto
  also have "... = F ((λi. F (Z i)) ` insert x I)"
    by simp
  finally show ?case by simp
qed

text ‹This is similar to the existing @{thm [source] hom_Max_commute} with the crucial difference
that it works even if the function is a homomorphism between distinct lattices.
An example application is @{term "Max (int ` A) = int (Max A)"}.›

lemma hom_Max_commute':
  assumes "finite A" "A  {}"
  assumes "x y. x  A  y  A  max (f x) (f y) = f (max x y)"
  shows "Max (f ` A) = f (Max A)"
  using assms by (induction A rule:finite_ne_induct) auto

context inner_algorithm_fix_A
begin

definition tc
  where "tc ψ σ = (Max ((λj. τ1 ψ A σ j + σ) ` {..<b})) - b_exp + 9"

definition sc (* tilde t *)
  where "sc ψ σ = nat (tc ψ σ)"

definition pc (* tilde p *)
  where "pc ψ σ = card {j {..<b}. τ1 ψ A σ j + σ  sc ψ σ}"

definition Yc (* tilde A* *)
  where "Yc ψ σ = 2 ^ sc ψ σ * ρ_inv (pc ψ σ)"

lemma sc_eq_s:
  assumes "(f,g,h)  sample_set Ψ"
  assumes "σ  s f"
  shows "sc (f,g,h) σ = s f"
proof -
  have "int (Max (f ` A)) - int b_exp + 9  int (Max (f ` A)) - 26 + 9"
    using b_exp_ge_26 by (intro add_mono diff_left_mono) auto
  also have "...  int (Max (f ` A))" by simp
  finally have 1:"int (Max (f ` A)) - int b_exp + 9  int (Max (f ` A))"
    by simp
  have "σ  int (s f)" using assms(2) by simp
  also have "... = max 0 (t f)"
    unfolding s_def by simp
  also have "...  max 0 (int (Max (f ` A)))"
    unfolding t_def using 1 by simp
  also have "... = int (Max (f ` A))"
    by simp
  finally have "σ  int (Max (f ` A))"
    by simp
  hence 0: "int σ - 1  int (Max (f ` A))"
    by simp

  have c:"h  sample_set ( k (C7 * b2) [b]S)"
    using assms(1) sample_set_Ψ by auto
  hence h_range: "h x < b" for x
    using h_range_1 by simp

  have "(MAX j{..<b}. τ1 (f, g, h) A σ j + int σ) =
    (MAX x{..<b}. Max ({int (f a) |a. a  A  h (g a) = x}  {-1}  {int σ -1}))"
    using fin_f[OF assms(1)] by (simp add:max_add_distrib_left max.commute τ1_def)
  also have "... = Max (x<b. {int (f a) |a. a  A  h (g a) = x}  {- 1}  {int σ - 1})"
    using fin_f[OF assms(1)] b_ne by (intro Max.Union[symmetric]) auto
  also have "... = Max ({int (f a) |a. a  A}  {- 1, int σ - 1})"
    using h_range by (intro arg_cong[where f="Max"]) auto
  also have "... = max (Max (int ` f ` A)) (int σ - 1)"
    using A_nonempty fin_A unfolding Setcompr_eq_image image_image
    by (subst Max.union) auto
  also have "... = max (int (Max (f ` A))) (int σ - 1)"
    using fin_A A_nonempty by (subst hom_Max_commute') auto
  also have "... = int (Max (f ` A))"
    by (intro max_absorb1 0)
  finally have "(MAX j{..<b}. τ1 (f, g, h) A σ j + int σ) = Max (f ` A)" by simp

  thus ?thesis
    unfolding sc_def tc_def s_def t_def by simp
qed

lemma pc_eq_p:
  assumes "(f,g,h)  sample_set Ψ"
  assumes "σ  s f"
  shows "pc (f,g,h) σ = p (f,g,h)"
proof -
  have "{j  {..<b}. int (s f)  max (τ0 (f, g, h) A j) (int σ - 1)} =
    {j  {..<b}. int (s f)  max (τ0 (f, g, h) A j) (- 1)}"
    using assms(2) unfolding le_max_iff_disj by simp
  thus ?thesis
    unfolding pc_def p_def sc_eq_s[OF assms]
    by (simp add:max_add_distrib_left τ1_def del:τ0.simps)
qed

lemma Yc_eq_Y:
  assumes "(f,g,h)  sample_set Ψ"
  assumes "σ  s f"
  shows "Yc (f,g,h) σ = Y (f,g,h)"
  unfolding Yc_def Y_def sc_eq_s[OF assms] pc_eq_p[OF assms] by simp

lemma accuracy_single: "measure Ψ {ψ. σ  q_max. ¦Yc ψ σ - real X¦ > ε * X}  1/2^4"
  (is "?L  ?R")
proof -
  have "measure Ψ {ψ. σ  q_max. ¦Yc ψ σ - real X¦ > ε * real X} 
    measure Ψ {(f,g,h). ¦Y (f,g,h) - real X¦ >  ε * real X  s f < q_max}"
  proof (rule pmf_mono)
    fix ψ
    assume a:"ψ  {ψ. σq_max. ε * real X < ¦Yc ψ σ - real X¦}"
    assume d:"ψ  set_pmf (sample_pmf Ψ)"
    obtain σ where b:"σ  q_max" and c:" ε * real X < ¦Yc ψ σ - real X¦"
      using a by auto
    obtain f g h where ψ_def: "ψ = (f,g,h)" by (metis prod_cases3)
    hence e:"(f,g,h)  sample_set Ψ"
      using d unfolding sample_space_alt[OF sample_space_Ψ] by simp

    show "ψ  {(f, g, h). ε * real X < ¦Y (f, g, h) - real X¦  s f < q_max}"
    proof (cases "s f  q_max")
      case True
      hence f:"σ  s f" using b by simp
      have "ε * real X < ¦Y ψ - real X¦"
        using Yc_eq_Y[OF e f] c unfolding ψ_def by simp
      then show ?thesis unfolding ψ_def by simp
    next
      case False
      then show ?thesis unfolding ψ_def by simp
    qed
  qed
  also have "...  1/2^4"
    using accuracy_without_cutoff by simp
  finally show ?thesis by simp
qed

lemma estimate1_eq:
  assumes "j < l"
  shows "estimate1 (τ2 ω A σ, σ) j = Yc (ω j) σ" (is "?L = ?R")
proof -
  define t where "t = max 0 (Max ((τ2 ω A σ j) ` {..<b}) + σ - log 2 b + 9)"
  define p where "p = card { k. k  {..<b}  (τ2 ω A σ j k) + σ  t }"

  have 0: "int (nat x) = max 0 x" for x
    by simp
  have 1: "log 2 b = b_exp"
    unfolding b_def by simp

  have "b > 0"
    using b_min by simp
  hence 2: " {..<b}  {}" by auto

  have "t = int (nat (Max ((τ2 ω A σ j) ` {..<b}) + σ - b_exp + 9))"
    unfolding t_def 0 1 by (rule refl)
  also have "... = int (nat (Max ((λx. x + σ) ` (τ2 ω A σ j) ` {..<b}) - b_exp + 9))"
    by (intro_cong "[σ1 int,σ1 nat,σ2(+),σ2(-)]" more:hom_Max_commute) (simp_all add:2)
  also have "... = int (sc (ω j) σ)"
    using assms
    unfolding sc_def tc_def τ2_def image_image by simp
  finally have 3:"t = int (sc (ω j) σ)"
    by simp

  have 4: "p = pc (ω j) σ"
    using assms unfolding p_def pc_def 3 τ2_def by simp

  have "?L = 2 powr t * ln (1-p/b) / ln(1-1/b)"
    unfolding estimate1.simps τ_def τ3_def
    by (simp only:t_def p_def Let_def)
  also have "... = 2 powr (sc (ω j) σ) * ρ_inv p"
    unfolding 3 ρ_inv_def by (simp)
  also have "... = ?R"
    unfolding Yc_def 3 4 by (simp add:powr_realpow)
  finally show ?thesis
    by blast
qed

lemma estimate_result_1:
  "measure Ω {ω. (σq_max. ε*X < ¦estimate (τ2 ω A σ,σ)-X¦) }  δ/2" (is "?L  ?R")
proof -
  define I :: "real set" where "I = {x. ¦x - real X¦  ε*X}"

  define μ where "μ = measure Ψ {ψ. σq_max. Yc ψ σI}"

  have int_I: "interval I"
    unfolding interval_def I_def by auto

  have "μ = measure Ψ {ψ. σ  q_max. ¦Yc ψ σ - real X¦ > ε * X}"
    unfolding μ_def I_def by (simp add:not_le)
  also have "...   1 / 2 ^ 4"
    by (intro accuracy_single)
  also have "... = 1/ 16"
    by simp
  finally have 1:"μ  1 / 16" by simp

  have "(μ + Λ)  1/16 + 1/16"
    unfolding Λ_def by (intro add_mono 1) auto
  also have "...  1/8"
    by simp
  finally have 2:"(μ + Λ)  1/8"
    by simp

  hence 0: "(μ + Λ)  1/2"
    by simp

  have "μ  0"
    unfolding μ_def by simp
  hence 3: "μ + Λ > 0"
    by (intro add_nonneg_pos Λ_gt_0)

  have "?L = measure Ω {ω. (σq_max. ε*X < ¦median l (estimate1 (τ2 ω A σ,σ))-X¦) }"
    by simp
  also have "... = measure Ω {ω. (σq_max. median l (estimate1 (τ2 ω A σ,σ))  I)}"
    unfolding I_def by (intro measure_pmf_cong) auto
  also have "...  measure Ω {ω. real(card{i{..<l}.(σq_max. Yc (ω i) σI)}) real l/2}"
  proof (rule pmf_mono)
    fix ω
    assume "ω  set_pmf Ω" "ω  {ω. σq_max. median l (estimate1 (τ2 ω A σ, σ))  I}"
    then obtain σ where σ_def: "median l (estimate1 (τ2 ω A σ, σ))  I" "σq_max"
      by auto

    have "real l = 2 * real l - real l"
      by simp
    also have "...  2 * real l - 2 * card {i. i < l  estimate1 (τ2 ω A σ, σ) i  I}"
      using σ_def median_est[OF int_I, where n="l"] not_less
      by (intro diff_left_mono Nat.of_nat_mono) (auto simp del:estimate1.simps)
    also have "... = 2 * (real (card {..<l}) -card {i. i < l  estimate1 (τ2 ω A σ, σ) i  I})"
      by (simp del:estimate1.simps)
    also have "... = 2 * real (card {..<l} -card {i. i < l  estimate1 (τ2 ω A σ, σ) i  I})"
      by (intro_cong "[σ2 (*)]" more:of_nat_diff[symmetric] card_mono)
        (auto simp del:estimate1.simps)
    also have "... = 2 * real (card ({..<l} - {i. i < l  estimate1 (τ2 ω A σ, σ) i  I}))"
      by (intro_cong "[σ2 (*), σ1 of_nat]" more:card_Diff_subset[symmetric])
        (auto simp del:estimate1.simps)
    also have "... = 2 * real (card {i{..<l}. estimate1 (τ2 ω A σ, σ) i  I})"
      by (intro_cong "[σ2 (*), σ1 of_nat, σ1 card]") (auto simp del:estimate1.simps)
    also have "... = 2 * real (card {i  {..<l}. Yc (ω i) σ  I})"
      using estimate1_eq by (intro_cong "[σ2 (*), σ1 of_nat, σ1 card]" more:restr_Collect_cong) auto
    also have "...  2 * real (card {i  {..<l}. (σq_max. Yc (ω i) σ  I)})"
      using σ_def(2) by (intro mult_left_mono Nat.of_nat_mono card_mono) auto
    finally have "real l  2 * real (card {i  {..<l}. (σq_max. Yc (ω i) σ  I)})"
      by simp
    thus "ω  {ω. real l/2  real (card {i  {..<l}. σq_max. Yc (ω i) σ  I})}"
      by simp
  qed
  also have "... = measure Ω {ω. real (card{i{..<l}. (σq_max. Yc (ω i) σI)})  (1/2)*real l}"
    unfolding sample_pmf_alt[OF Ω.sample_space] p_def by simp
  also have "...  exp (- real l * ((1/2) * ln (1 / (μ + Λ)) - 2 * exp (- 1)))"
    using 0 unfolding μ_def by (intro Ω.tail_bound l_gt_0 Λ_gt_0) auto
  also have "... = exp (- (real l * ((1/2) * ln (1 / (μ + Λ)) - 2 * exp (- 1))))"
    by simp
  also have "...  exp (- (real l * ((1/2) * ln 8 - 2 * exp (- 1))))"
    using 2 3 l_gt_0 by (intro iffD2[OF exp_le_cancel_iff] le_imp_neg_le mult_left_mono diff_mono)
      (auto simp add:divide_simps)
  also have "...  exp (- (real l * (1/4)))"
    by (intro iffD2[OF exp_le_cancel_iff] le_imp_neg_le mult_left_mono of_nat_0_le_iff)
     (approximation 5)
  also have "...  exp (- (C6 * ln (2/ δ)*(1/4)))"
    by (intro iffD2[OF exp_le_cancel_iff] le_imp_neg_le mult_right_mono l_lbound) auto
  also have "... = exp ( - ln (2/ δ))"
    unfolding C6_def by simp
  also have "... = ?R"
    using δ_gt_0 by (subst ln_inverse[symmetric]) auto
  finally show ?thesis
    by simp
qed

theorem estimate_result:
  "measure Ω {ω. ¦estimate (τ ω A)- X¦ >  ε * X}   δ"
  (is "?L  ?R")
proof -
  let ?P = "measure Ω"
  have "?L  ?P {ω. (σq_max.  ε*real X<¦estimate (τ2 ω A σ, σ)-real X¦)q ω A> q_max}"
    unfolding τ_def τ3_def not_le[symmetric]
    by (intro pmf_mono) auto
  also have "... ?P {ω. (σq_max. ε*real X<¦estimate (τ2 ω A σ,σ)-X¦)} + ?P {ω. q ω A> q_max}"
    by (intro pmf_add) auto
  also have "...  δ/2 +  δ/2"
    by (intro add_mono cutoff_level estimate_result_1)
  also have "... =  δ"
    by simp
  finally show ?thesis
    by simp
qed

end

lemma (in inner_algorithm) estimate_result:
  assumes "A  {..<n}" "A  {}"
  shows "measure Ω {ω. ¦estimate (τ ω A)- real (card A)¦ > ε * real (card A)}  δ" (is "?L  ?R")
proof -
  interpret inner_algorithm_fix_A
    using assms by unfold_locales auto
  have "?L = measure Ω {ω. ¦estimate (τ ω A)- X¦ > ε * X}"
    unfolding X_def by simp
  also have "...  ?R"
    by (intro estimate_result)
  finally show ?thesis
    by simp
qed

unbundle no_intro_cong_syntax

end