Theory Distributed_Distinct_Elements_Accuracy

section ‹Accuracy with cutoff\label{sec:accuracy}›

text ‹This section verifies that each of the $l$ estimate have the required accuracy with high
probability assuming as long as the cutoff is below @{term "q_max"}, generalizing the result from
Section~\ref{sec:accuracy_wo_cutoff}.›

theory Distributed_Distinct_Elements_Accuracy
  imports
    Distributed_Distinct_Elements_Accuracy_Without_Cutoff
    Distributed_Distinct_Elements_Cutoff_Level
begin

unbundle intro_cong_syntax

lemma (in semilattice_set) Union:
  assumes "finite I" "I ≠ {}"
  assumes "⋀i. i ∈ I ⟹ finite (Z i)"
  assumes "⋀i. i ∈ I ⟹ Z i ≠ {}"
  shows "F (⋃ (Z ` I)) = F ((λi. (F (Z i))) ` I)"
  using assms(1,2,3,4)
proof (induction I rule:finite_ne_induct)
  case (singleton x)
  then show ?case by simp
next
  case (insert x I)
  have "F (⋃ (Z ` insert x I)) = F ((Z x) ∪ (⋃ (Z ` I)))"
    by simp
  also have "... = f (F (Z x)) (F (⋃ (Z ` I)))"
    using insert by (intro union finite_UN_I) auto
  also have "... = f (F {F (Z x)}) (F ((λi. F (Z i)) ` I))"
    using insert(5,6) by (subst insert(4)) auto
  also have "... = F ({F (Z x)} ∪ (λi. F (Z i)) ` I)"
    using insert(1,2) by (intro union[symmetric] finite_imageI) auto
  also have "... = F ((λi. F (Z i)) ` insert x I)"
    by simp
  finally show ?case by simp
qed

text ‹This is similar to the existing @{thm [source] hom_Max_commute} with the crucial difference
that it works even if the function is a homomorphism between distinct lattices.
An example application is @{term "Max (int ` A) = int (Max A)"}.›

lemma hom_Max_commute':
  assumes "finite A" "A ≠ {}"
  assumes "⋀x y. x ∈ A ⟹ y ∈ A ⟹ max (f x) (f y) = f (max x y)"
  shows "Max (f ` A) = f (Max A)"
  using assms by (induction A rule:finite_ne_induct) auto

context inner_algorithm_fix_A
begin

definition t⇩_c
  where "t⇩_c ψ σ = (Max ((λj. τ⇩₁ ψ A σ j + σ) ` {..<b})) - b_exp + 9"

definition s⇩_c (* tilde t *)
  where "s⇩_c ψ σ = nat (t⇩_c ψ σ)"

definition p⇩_c (* tilde p *)
  where "p⇩_c ψ σ = card {j∈ {..<b}. τ⇩₁ ψ A σ j + σ ≥ s⇩_c ψ σ}"

definition Y⇩_c (* tilde A* *)
  where "Y⇩_c ψ σ = 2 ^ s⇩_c ψ σ * ρ_inv (p⇩_c ψ σ)"

lemma s⇩_c_eq_s:
  assumes "(f,g,h) ∈ sample_pro Ψ"
  assumes "σ ≤ s f"
  shows "s⇩_c (f,g,h) σ = s f"
proof -
  have "int (Max (f ` A)) - int b_exp + 9 ≤ int (Max (f ` A)) - 26 + 9"
    using b_exp_ge_26 by (intro add_mono diff_left_mono) auto
  also have "... ≤ int (Max (f ` A))" by simp
  finally have 1:"int (Max (f ` A)) - int b_exp + 9 ≤ int (Max (f ` A))"
    by simp
  have "σ ≤ int (s f)" using assms(2) by simp
  also have "... = max 0 (t f)"
    unfolding s_def by simp
  also have "... ≤ max 0 (int (Max (f ` A)))"
    unfolding t_def using 1 by simp
  also have "... = int (Max (f ` A))"
    by simp
  finally have "σ ≤ int (Max (f ` A))"
    by simp
  hence 0: "int σ - 1 ≤ int (Max (f ` A))"
    by simp

  have c:"h ∈ sample_pro (ℋ k (C⇩₇ * b⇧²) (𝒩 b))"
    using assms(1) sample_set_Ψ by auto
  hence h_range: "h x < b" for x
    using h_range_1 by simp

  have "(MAX j∈{..<b}. τ⇩₁ (f, g, h) A σ j + int σ) =
    (MAX x∈{..<b}. Max ({int (f a) |a. a ∈ A ∧ h (g a) = x} ∪ {-1} ∪ {int σ -1}))"
    using fin_f[OF assms(1)] by (simp add:max_add_distrib_left max.commute τ⇩₁_def)
  also have "... = Max (⋃x<b. {int (f a) |a. a ∈ A ∧ h (g a) = x} ∪ {- 1} ∪ {int σ - 1})"
    using fin_f[OF assms(1)] b_ne by (intro Max.Union[symmetric]) auto
  also have "... = Max ({int (f a) |a. a ∈ A} ∪ {- 1, int σ - 1})"
    using h_range by (intro arg_cong[where f="Max"]) auto
  also have "... = max (Max (int ` f ` A)) (int σ - 1)"
    using A_nonempty fin_A unfolding Setcompr_eq_image image_image
    by (subst Max.union) auto
  also have "... = max (int (Max (f ` A))) (int σ - 1)"
    using fin_A A_nonempty by (subst hom_Max_commute') auto
  also have "... = int (Max (f ` A))"
    by (intro max_absorb1 0)
  finally have "(MAX j∈{..<b}. τ⇩₁ (f, g, h) A σ j + int σ) = Max (f ` A)" by simp

  thus ?thesis
    unfolding s⇩_c_def t⇩_c_def s_def t_def by simp
qed

lemma p⇩_c_eq_p:
  assumes "(f,g,h) ∈ sample_pro Ψ"
  assumes "σ ≤ s f"
  shows "p⇩_c (f,g,h) σ = p (f,g,h)"
proof -
  have "{j ∈ {..<b}. int (s f) ≤ max (τ⇩₀ (f, g, h) A j) (int σ - 1)} =
    {j ∈ {..<b}. int (s f) ≤ max (τ⇩₀ (f, g, h) A j) (- 1)}"
    using assms(2) unfolding le_max_iff_disj by simp
  thus ?thesis
    unfolding p⇩_c_def p_def s⇩_c_eq_s[OF assms]
    by (simp add:max_add_distrib_left τ⇩₁_def del:τ⇩₀.simps)
qed

lemma Y⇩_c_eq_Y:
  assumes "(f,g,h) ∈ sample_pro Ψ"
  assumes "σ ≤ s f"
  shows "Y⇩_c (f,g,h) σ = Y (f,g,h)"
  unfolding Y⇩_c_def Y_def s⇩_c_eq_s[OF assms] p⇩_c_eq_p[OF assms] by simp

lemma accuracy_single: "measure Ψ {ψ. ∃σ ≤ q_max. ¦Y⇩_c ψ σ - real X¦ > ε * X} ≤ 1/2^4"
  (is "?L ≤ ?R")
proof -
  have "measure Ψ {ψ. ∃σ ≤ q_max. ¦Y⇩_c ψ σ - real X¦ > ε * real X} ≤
    measure Ψ {(f,g,h). ¦Y (f,g,h) - real X¦ >  ε * real X ∨ s f < q_max}"
  proof (rule pmf_mono)
    fix ψ
    assume a:"ψ ∈ {ψ. ∃σ≤q_max. ε * real X < ¦Y⇩_c ψ σ - real X¦}"
    assume d:"ψ ∈ set_pmf (sample_pro Ψ)"
    obtain σ where b:"σ ≤ q_max" and c:" ε * real X < ¦Y⇩_c ψ σ - real X¦"
      using a by auto
    obtain f g h where ψ_def: "ψ = (f,g,h)" by (metis prod_cases3)
    hence e:"(f,g,h) ∈ sample_pro Ψ" using d by simp

    show "ψ ∈ {(f, g, h). ε * real X < ¦Y (f, g, h) - real X¦ ∨ s f < q_max}"
    proof (cases "s f ≥ q_max")
      case True
      hence f:"σ ≤ s f" using b by simp
      have "ε * real X < ¦Y ψ - real X¦"
        using Y⇩_c_eq_Y[OF e f] c unfolding ψ_def by simp
      then show ?thesis unfolding ψ_def by simp
    next
      case False
      then show ?thesis unfolding ψ_def by simp
    qed
  qed
  also have "... ≤ 1/2^4"
    using accuracy_without_cutoff by simp
  finally show ?thesis by simp
qed

lemma estimate1_eq:
  assumes "j < l"
  shows "estimate1 (τ⇩₂ ω A σ, σ) j = Y⇩_c (ω j) σ" (is "?L = ?R")
proof -
  define t where "t = max 0 (Max ((τ⇩₂ ω A σ j) ` {..<b}) + σ - ⌊log 2 b⌋ + 9)"
  define p where "p = card { k. k ∈ {..<b} ∧ (τ⇩₂ ω A σ j k) + σ ≥ t }"

  have 0: "int (nat x) = max 0 x" for x
    by simp
  have 1: "⌊log 2 b⌋ = b_exp"
    unfolding b_def by simp

  have "b > 0"
    using b_min by simp
  hence 2: " {..<b} ≠ {}" by auto

  have "t = int (nat (Max ((τ⇩₂ ω A σ j) ` {..<b}) + σ - b_exp + 9))"
    unfolding t_def 0 1 by (rule refl)
  also have "... = int (nat (Max ((λx. x + σ) ` (τ⇩₂ ω A σ j) ` {..<b}) - b_exp + 9))"
    by (intro_cong "[σ⇩₁ int,σ⇩₁ nat,σ⇩₂(+),σ⇩₂(-)]" more:hom_Max_commute) (simp_all add:2)
  also have "... = int (s⇩_c (ω j) σ)"
    using assms
    unfolding s⇩_c_def t⇩_c_def τ⇩₂_def image_image by simp
  finally have 3:"t = int (s⇩_c (ω j) σ)"
    by simp

  have 4: "p = p⇩_c (ω j) σ"
    using assms unfolding p_def p⇩_c_def 3 τ⇩₂_def by simp

  have "?L = 2 powr t * ln (1-p/b) / ln(1-1/b)"
    unfolding estimate1.simps τ_def τ⇩₃_def
    by (simp only:t_def p_def Let_def)
  also have "... = 2 powr (s⇩_c (ω j) σ) * ρ_inv p"
    unfolding 3 ρ_inv_def by (simp)
  also have "... = ?R"
    unfolding Y⇩_c_def 3 4 by (simp add:powr_realpow)
  finally show ?thesis
    by blast
qed

lemma estimate_result_1:
  "measure Ω {ω. (∃σ≤q_max. ε*X < ¦estimate (τ⇩₂ ω A σ,σ)-X¦) } ≤ δ/2" (is "?L ≤ ?R")
proof -
  define I :: "real set" where "I = {x. ¦x - real X¦ ≤ ε*X}"

  define μ where "μ = measure Ψ {ψ. ∃σ≤q_max. Y⇩_c ψ σ∉I}"

  have int_I: "interval I"
    unfolding interval_def I_def by auto

  have "μ = measure Ψ {ψ. ∃σ ≤ q_max. ¦Y⇩_c ψ σ - real X¦ > ε * X}"
    unfolding μ_def I_def by (simp add:not_le)
  also have "... ≤  1 / 2 ^ 4"
    by (intro accuracy_single)
  also have "... = 1/ 16"
    by simp
  finally have 1:"μ ≤ 1 / 16" by simp

  have "(μ + Λ) ≤ 1/16 + 1/16"
    unfolding Λ_def by (intro add_mono 1) auto
  also have "... ≤ 1/8"
    by simp
  finally have 2:"(μ + Λ) ≤ 1/8"
    by simp

  hence 0: "(μ + Λ) ≤ 1/2"
    by simp

  have "μ ≥ 0"
    unfolding μ_def by simp
  hence 3: "μ + Λ > 0"
    by (intro add_nonneg_pos Λ_gt_0)

  have "?L = measure Ω {ω. (∃σ≤q_max. ε*X < ¦median l (estimate1 (τ⇩₂ ω A σ,σ))-X¦) }"
    by simp
  also have "... = measure Ω {ω. (∃σ≤q_max. median l (estimate1 (τ⇩₂ ω A σ,σ)) ∉ I)}"
    unfolding I_def by (intro measure_pmf_cong) auto
  also have "... ≤ measure Ω {ω. real(card{i∈{..<l}.(∃σ≤q_max. Y⇩_c (ω i) σ∉I)})≥ real l/2}"
  proof (rule pmf_mono)
    fix ω
    assume "ω ∈ set_pmf Ω" "ω ∈ {ω. ∃σ≤q_max. median l (estimate1 (τ⇩₂ ω A σ, σ)) ∉ I}"
    then obtain σ where σ_def: "median l (estimate1 (τ⇩₂ ω A σ, σ)) ∉ I" "σ≤q_max"
      by auto
    hence "real l ≤ real (2 * card {i. i <l ∧ estimate1 (τ⇩₂ ω A σ, σ) i ∉ I})"
      by (intro of_nat_mono median_est_rev[OF int_I])
    also have "... = 2 * real (card {i∈{..<l}. estimate1 (τ⇩₂ ω A σ, σ) i ∉ I})"
      by simp
    also have "... = 2 * real (card {i ∈ {..<l}. Y⇩_c (ω i) σ ∉ I})"
      using estimate1_eq by (intro_cong "[σ⇩₂ (*), σ⇩₁ of_nat, σ⇩₁ card]" more:restr_Collect_cong) auto
    also have "... ≤ 2 * real (card {i ∈ {..<l}. (∃σ≤q_max. Y⇩_c (ω i) σ ∉ I)})"
      using σ_def(2) by (intro mult_left_mono Nat.of_nat_mono card_mono) auto
    finally have "real l ≤ 2 * real (card {i ∈ {..<l}. (∃σ≤q_max. Y⇩_c (ω i) σ ∉ I)})"
      by simp
    thus "ω ∈ {ω. real l/2 ≤ real (card {i ∈ {..<l}. ∃σ≤q_max. Y⇩_c (ω i) σ ∉ I})}"
      by simp
  qed
  also have "... = measure Ω {ω. real (card{i∈{..<l}. (∃σ≤q_max. Y⇩_c (ω i) σ∉I)}) ≥ (1/2)*real l}"
    unfolding p_def by simp
  also have "... ≤ exp (- real l * ((1/2) * ln (1 / (μ + Λ)) - 2 * exp (- 1)))"
    using 0 unfolding μ_def by (intro walk_tail_bound l_gt_0 Λ_gt_0) auto
  also have "... = exp (- (real l * ((1/2) * ln (1 / (μ + Λ)) - 2 * exp (- 1))))"
    by simp
  also have "... ≤ exp (- (real l * ((1/2) * ln 8 - 2 * exp (- 1))))"
    using 2 3 l_gt_0 by (intro iffD2[OF exp_le_cancel_iff] le_imp_neg_le mult_left_mono diff_mono)
      (auto simp add:divide_simps)
  also have "... ≤ exp (- (real l * (1/4)))"
    by (intro iffD2[OF exp_le_cancel_iff] le_imp_neg_le mult_left_mono of_nat_0_le_iff)
     (approximation 5)
  also have "... ≤ exp (- (C⇩₆ * ln (2/ δ)*(1/4)))"
    by (intro iffD2[OF exp_le_cancel_iff] le_imp_neg_le mult_right_mono l_lbound) auto
  also have "... = exp ( - ln (2/ δ))"
    unfolding C⇩₆_def by simp
  also have "... = ?R"
    using δ_gt_0 by (subst ln_inverse[symmetric]) auto
  finally show ?thesis
    by simp
qed

theorem estimate_result:
  "measure Ω {ω. ¦estimate (τ ω A)- X¦ >  ε * X} ≤  δ"
  (is "?L ≤ ?R")
proof -
  let ?P = "measure Ω"
  have "?L ≤ ?P {ω. (∃σ≤q_max.  ε*real X<¦estimate (τ⇩₂ ω A σ, σ)-real X¦)∨q ω A> q_max}"
    unfolding τ_def τ⇩₃_def not_le[symmetric]
    by (intro pmf_mono) auto
  also have "...≤ ?P {ω. (∃σ≤q_max. ε*real X<¦estimate (τ⇩₂ ω A σ,σ)-X¦)} + ?P {ω. q ω A> q_max}"
    by (intro pmf_add) auto
  also have "...≤  δ/2 +  δ/2"
    by (intro add_mono cutoff_level estimate_result_1)
  also have "... =  δ"
    by simp
  finally show ?thesis
    by simp
qed

end

lemma (in inner_algorithm) estimate_result:
  assumes "A ⊆ {..<n}" "A ≠ {}"
  shows "measure Ω {ω. ¦estimate (τ ω A)- real (card A)¦ > ε * real (card A)} ≤ δ" (is "?L ≤ ?R")
proof -
  interpret inner_algorithm_fix_A
    using assms by unfold_locales auto
  have "?L = measure Ω {ω. ¦estimate (τ ω A)- X¦ > ε * X}"
    unfolding X_def by simp
  also have "... ≤ ?R"
    by (intro estimate_result)
  finally show ?thesis
    by simp
qed

unbundle no intro_cong_syntax

end