Theory Discrete_UCB_Step2
theory Discrete_UCB_Step2
imports "Discrete_UCB_Step1"
begin
locale bandit_policy = bandit_problem + prob_space +
fixes Ω :: "'b set"
and ℱ :: "'b set set"
and ω :: 'b
fixes π :: "nat ⇒ 'b ⇒ 'a"
and N_n :: "nat ⇒ 'a ⇒ 'b ⇒ nat"
and Z :: "nat ⇒ 'a ⇒ 'b ⇒ real"
and δ :: real
and q :: real
assumes finite_A: "finite A"
and a_in_A: "a ∈ A"
and measurable_policy: "∀t. π t ∈ measurable M (count_space A)"
and N_n_def: "∀n a b. N_n n a b = card {t ∈ {0..<n}. π (t+1) b = a}"
and δ_pos: "0 < δ"
and δ_less1: "δ < 1"
and q_pos: "q > 0"
begin
definition sample_mean_Z :: "nat ⇒ 'a ⇒ 'b ⇒ real" where
"sample_mean_Z n a b ≡ (1 / real n) * (∑ i<n. Z i a b)"
definition M_val :: "nat ⇒ 'a ⇒ 'b ⇒ real" where
"M_val t a b ≡ (if N_n (t+1) a b = 0 then 0
else (∑ s < t. if π s b = a then Z s a b else 0) / real (N_n t a b))"
definition U :: "nat ⇒ 'a ⇒ 'b ⇒ real" where
"U t a b ≡ M_val t a b + q * sqrt (ln (1 / δ) / (2 * real (max 1 (N_n t a b))))"
definition A_t_plus_1 :: "nat ⇒ 'b ⇒ 'a" where
"A_t_plus_1 t b ≡ (SOME a. a ∈ A ∧ (∀a'. a' ∈ A ⟶ U t a b ≥ U t a' b))"
lemma (in finite_measure) finite_measure_mono:
assumes "A ⊆ B" "B ∈sets M" shows "measure M A ≤ measure M B"
using emeasure_mono[OF assms] emeasure_real[of A] emeasure_real[of B] by (auto simp: measure_def)
theorem union_bound:
fixes E F G :: "'b set"
assumes "E ⊆ F ∪ G"
and "E ∈ events" "F ∈ events" "G ∈ events"
shows "prob E ≤ prob F + prob G"
proof -
have "F ∪ G ∈ events"
using assms(3,4) sets.Un by blast
have "prob E ≤ prob (F ∪ G)"
using assms local.finite_measure_mono by auto
also have "prob (F ∪ G) ≤ prob F + prob G"
using assms local.finite_measure_subadditive by blast
finally show ?thesis .
qed
theorem hoeffding_iid_bound_ge_general:
fixes a :: 'a and n :: nat and ε :: real and μ_hat :: real and l u :: real
assumes a_in: "a ∈ A"
and eps_pos: "ε ≥ 0"
and bounds: "∀i < n. ∀ω ∈ Ω. l ≤ Z i a ω ∧ Z i a ω ≤ u"
and mu_def: "μ_hat = (∑ i < n. expectation (λω. Z i a ω))"
and "u - l ≠ 0"
and n_pos: "n > 0"
and space_M: "space M = Ω"
and sets_M: "sets M = ℱ"
and indep: "indep_vars (λ_. borel) (λi. (λω. Z i a ω)) {i. i < n}"
and rv: "∀i<n. random_variable borel (λω. Z i a ω)"
shows "prob {ω ∈ Ω. (∑ i < n. Z i a ω) ≥ μ_hat + ε}
≤ exp (- 2 * ε^2 / (real n * (u - l)^2))"
proof -
let ?I = "{i. i < n}"
let ?X = "λi. (λω. Z i a ω)"
let ?a = "λi. l"
let ?b = "λi. u"
have finite_I: "finite ?I" by simp
have AE_bounds: "∀i∈?I. AE ω in M. ?a i ≤ ?X i ω ∧ ?X i ω ≤ ?b i"
proof
fix i assume iI: "i ∈ ?I"
have "∀ω∈Ω. l ≤ Z i a ω ∧ Z i a ω ≤ u" using bounds iI by simp
thus "AE ω in M. ?a i ≤ ?X i ω ∧ ?X i ω ≤ ?b i"
using assms by auto
qed
have indep_loc: "indep_interval_bounded_random_variables M ?I ?X ?a ?b"
by (standard; use finite_I indep AE_bounds in auto)
from indep_loc
have H: "Hoeffding_ineq M ?I ?X ?a ?b"
by (rule Hoeffding_ineq.intro)
have widths: "(∑ i ∈ ?I. (?b i - ?a i)^2) = real n * (u - l)^2"
by simp
have widths_pos: "0 < (∑ i ∈ ?I. (?b i - ?a i)^2)"
using ‹u - l ≠ 0› n_pos by simp
have sum_expectations_eq_integrals:
"(∑ i < n. expectation (λω. Z i a ω)) = (∑ i∈?I. integral⇧L M (?X i))"
proof -
have eq1: "∀i<n. expectation (λω. Z i a ω) = integral⇧L M (?X i)"
using rv space_M sets_M by simp
moreover have sum_eq: "(∑ i<n. integral⇧L M (?X i)) = (∑ i∈?I. integral⇧L M (?X i))"
by (simp add: lessThan_def)
ultimately show ?thesis
by simp
qed
have sum_integrals_eq: "(∑ i∈?I. integral⇧L M (?X i)) = μ_hat"
using mu_def by (simp add: sum_expectations_eq_integrals)
have tail:
"prob {ω ∈ Ω. (∑ i∈?I. ?X i ω) - (∑ i∈?I. integral⇧L M (?X i)) ≥ ε}
≤ exp (- 2 * ε^2 / (∑ i∈?I. (?b i - ?a i)^2))"
using Hoeffding_ineq.Hoeffding_ineq_ge[OF H eps_pos widths_pos]
by (smt (verit, best) Collect_cong assms(7))
have lhs_rewrite: "{ω ∈ Ω. (∑ i < n. Z i a ω) ≥ μ_hat + ε}
= {ω ∈ Ω. (∑ i∈?I. ?X i ω) - (∑ i∈?I. integral⇧L M (?X i)) ≥ ε}"
by (simp add: add.commute le_diff_eq lessThan_def sum_integrals_eq)
have rhs_rewrite: "exp (- 2 * ε^2 / (∑ i∈?I. (?b i - ?a i)^2))
= exp (- 2 * ε^2 / (real n * (u - l)^2))"
using widths by simp
show ?thesis
using tail lhs_rewrite rhs_rewrite by simp
qed
theorem hoeffding_iid_bound_le_general:
fixes a :: 'a and n :: nat and ε :: real and μ_hat :: real and l u :: real
assumes a_in: "a ∈ A"
and eps_pos: "ε ≥ 0"
and bounds: "∀i < n. ∀ω ∈ Ω. l ≤ Z i a ω ∧ Z i a ω ≤ u"
and mu_def: "μ_hat = (∑ i < n. expectation (λω. Z i a ω))"
and "u - l ≠ 0"
and n_pos: "n > 0"
and space_M: "space M = Ω"
and sets_M: "sets M = ℱ"
and indep: "indep_vars (λ_. borel) (λi. (λω. Z i a ω)) {i. i < n}"
and rv: "∀i<n. random_variable borel (λω. Z i a ω)"
shows "prob {ω ∈ Ω. (∑ i < n. Z i a ω) ≤ μ_hat - ε}
≤ exp (- 2 * ε^2 / (real n * (u - l)^2))"
proof -
let ?I = "{i. i < n}"
let ?X = "λi. (λω. Z i a ω)"
let ?a = "λi. l"
let ?b = "λi. u"
have finite_I: "finite ?I" by simp
have AE_bounds: "∀i∈?I. AE ω in M. ?a i ≤ ?X i ω ∧ ?X i ω ≤ ?b i"
proof
fix i assume iI: "i ∈ ?I"
have "∀ω∈Ω. l ≤ Z i a ω ∧ Z i a ω ≤ u" using bounds iI by simp
thus "AE ω in M. ?a i ≤ ?X i ω ∧ ?X i ω ≤ ?b i"
using assms by auto
qed
have indep_loc: "indep_interval_bounded_random_variables M ?I ?X ?a ?b"
by (standard; use finite_I indep AE_bounds in auto)
from indep_loc
have H: "Hoeffding_ineq M ?I ?X ?a ?b"
by (rule Hoeffding_ineq.intro)
have widths: "(∑ i ∈ ?I. (?b i - ?a i)^2) = real n * (u - l)^2" by simp
have widths_pos: "0 < (∑ i ∈ ?I. (?b i - ?a i)^2)" using ‹u - l ≠ 0› n_pos by simp
have sum_expectations_eq_integrals:
"(∑ i < n. expectation (λω. Z i a ω)) = (∑ i∈?I. integral⇧L M (?X i))"
proof -
have eq1: "∀i<n. expectation (λω. Z i a ω) = integral⇧L M (?X i)" using rv space_M sets_M by simp
moreover have sum_eq: "(∑ i<n. integral⇧L M (?X i)) = (∑ i∈?I. integral⇧L M (?X i))" by (simp add: lessThan_def)
ultimately show ?thesis by simp
qed
have sum_integrals_eq: "(∑ i∈?I. integral⇧L M (?X i)) = μ_hat"
using mu_def by (simp add: sum_expectations_eq_integrals)
have tail:
"prob {ω ∈ Ω. (∑ i∈?I. ?X i ω) - (∑ i∈?I. integral⇧L M (?X i)) ≤ - ε}
≤ exp (- 2 * ε^2 / (∑ i∈?I. (?b i - ?a i)^2))"
using Hoeffding_ineq.Hoeffding_ineq_le[OF H eps_pos widths_pos]
by (smt (verit, best) Collect_cong assms(7))
have lhs_rewrite: "{ω ∈ Ω. (∑ i < n. Z i a ω) ≤ μ_hat - ε}
= {ω ∈ Ω. (∑ i∈?I. ?X i ω) - (∑ i∈?I. integral⇧L M (?X i)) ≤ -ε}"
using add.inverse_inverse[of ε] add.inverse_inverse[of μ_hat] assms(4)
cancel_ab_semigroup_add_class.diff_right_commute[of μ_hat μ_hat "∑uuc<n. Z uuc a _"]
cancel_ab_semigroup_add_class.diff_right_commute[of "- μ_hat" "- μ_hat" "ε - μ_hat"]
cancel_ab_semigroup_add_class.diff_right_commute[of "ε - μ_hat" "ε - μ_hat" ε]
cancel_ab_semigroup_add_class.diff_right_commute[of ε ε μ_hat]
cancel_ab_semigroup_add_class.diff_right_commute[of "0" "- μ_hat" ε]
cancel_ab_semigroup_add_class.diff_right_commute[of μ_hat μ_hat ε]
cancel_ab_semigroup_add_class.diff_right_commute[of "- μ_hat" "- μ_hat" "(∑uuc<n. Z uuc a _) - μ_hat"]
cancel_ab_semigroup_add_class.diff_right_commute[of "(∑uuc<n. Z uuc a _) - μ_hat" "(∑uuc<n. Z uuc a _) - μ_hat"
"∑uuc<n. Z uuc a _"]
cancel_ab_semigroup_add_class.diff_right_commute[of "∑uuc<n. Z uuc a _" "∑uuc<n. Z uuc a _" μ_hat]
cancel_ab_semigroup_add_class.diff_right_commute[of "0" "- μ_hat" "∑uuc<n. Z uuc a _"]
cancel_comm_monoid_add_class.diff_cancel[of "ε - μ_hat"] cancel_comm_monoid_add_class.diff_cancel[of ε]
cancel_comm_monoid_add_class.diff_cancel[of μ_hat] cancel_comm_monoid_add_class.diff_cancel[of "- μ_hat"]
cancel_comm_monoid_add_class.diff_cancel[of "(∑uuc<n. Z uuc a _) - μ_hat"]
cancel_comm_monoid_add_class.diff_cancel[of "∑uuc<n. Z uuc a _"] diff_0
diff_right_mono[of ε "μ_hat - (∑uuc<n. Z uuc a _)" μ_hat] diff_right_mono[of "∑uuc<n. Z uuc a _" "μ_hat - ε" μ_hat]
lessThan_def[of n] more_arith_simps(1)[of "- ε" "(∑uuc<n. Z uuc a _) - μ_hat"] by force
have rhs_rewrite: "exp (- 2 * ε^2 / (∑ i∈?I. (?b i - ?a i)^2)) = exp (- 2 * ε^2 / (real n * (u - l)^2))"
using widths by simp
show ?thesis using tail lhs_rewrite rhs_rewrite by simp
qed
theorem hoeffding_iid_ge_delta_bound:
fixes a :: 'a and n :: nat and δ_hat :: real and μ_hat :: real and l u :: real
assumes a_in: "a ∈ A"
and delta_bound: "0 < δ_hat" "δ_hat ≤ 1"
and bounds: "∀i<n. ∀ω∈Ω. l ≤ Z i a ω ∧ Z i a ω ≤ u"
and mu_def: "μ_hat = (∑i<n. expectation (λω. Z i a ω))"
and n_pos: "n > 0"
and eps_pos: "ε ≥ 0"
and u_minus_l_nonzero: "u - l ≠ 0"
and space_M: "space M = Ω"
and sets_M: "sets M = ℱ"
and indep: "indep_vars (λ_. borel) (λi. (λω. Z i a ω)) {i. i < n}"
and rv: "∀i<n. random_variable borel (λω. Z i a ω)"
and eps_expression: "ε = sqrt ((real n * (u - l)^2 * ln (1 / δ_hat)) / 2)"
shows "prob {ω ∈ Ω. (∑i<n. Z i a ω) ≥ μ_hat + ε} ≤ δ_hat ∧
prob {ω ∈ Ω. (∑i<n. Z i a ω) ≤ μ_hat - ε} ≤ δ_hat"
proof -
have eps_pos: "ε ≥ 0"
using assms(7) by auto
have eps_squared: "ε^2 = (real n * (u - l)^2 * ln (1 / δ_hat)) / 2"
using eps_expression by (simp add: assms(3) delta_bound(1))
have exp_eq: "exp (- 2 * ε^2 / (real n * (u - l)^2)) = δ_hat"
proof -
have "- 2 * ε^2 / (real n * (u - l)^2) = - ln (1 / δ_hat)"
proof -
have "ε^2 /( real n * (u - l)^2 ) = ( ln (1 / δ_hat)) / 2"
using assms(6,8) eps_squared by fastforce
then show ?thesis by linarith
qed
also have "... = ln δ_hat"
using delta_bound(1) by (simp add: ln_div)
finally show ?thesis
using delta_bound(1) by simp
qed
have ge_bound:
"prob {ω ∈ Ω. (∑i<n. Z i a ω) ≥ μ_hat + ε} ≤ δ_hat"
using hoeffding_iid_bound_ge_general[OF a_in eps_pos bounds mu_def u_minus_l_nonzero
n_pos space_M sets_M indep rv] exp_eq by simp
have le_bound:
"prob {ω ∈ Ω. (∑i<n. Z i a ω) ≤ μ_hat - ε} ≤ δ_hat"
using hoeffding_iid_bound_le_general[OF a_in eps_pos bounds mu_def u_minus_l_nonzero
n_pos space_M sets_M indep rv] exp_eq by simp
show ?thesis
using ge_bound le_bound by simp
qed
lemma add_le_iff:
fixes x y z :: real
shows "x ≤ y - z ⟷ x - y ≤ -z"
by auto
lemma max_Suc_0_eq_1: "max (Suc 0) x = max 1 x"
by simp
theorem ucb_suboptimal_bound_set:
fixes t :: nat
and a :: "'a"
and Δ :: "'a ⇒ real"
assumes finite_A: "finite A"
and a_in_A: "a ∈ A"
and a_star_in_A: "a_star ∈ A"
and argmax_exists: "A ≠ {}"
and subopt_gap: "Δ a > 0"
and a_not_opt: "∃a'. a' ∈ A ∧ Δ a > 0"
and delta_a: "Δ a = μ a_star - μ a"
and ω_in_Ω: "ω ∈ Ω"
and asm: " ω ∈ {ω ∈ Ω. A_t_plus_1 t ω = a}"
and setopt: "∀ω ∈ Ω. ∃a_max ∈ A. ∀b ∈ A. U t b ω ≤ U t a_max ω "
and A_t_plus_1_maximizes:
"⋀t ω a. A_t_plus_1 t ω = a ⟹ a ∈ A ∧ (∀b ∈ A. U t a ω ≥ U t b ω)"
shows "{ω ∈ Ω. A_t_plus_1 t ω = a} ⊆
{ω ∈ Ω. U t a_star ω ≤ μ a_star} ∪ {ω ∈ Ω. μ a_star ≤ U t a ω}"
proof -
have set_result_1:
"{ω ∈ Ω. A_t_plus_1 t ω = a} ⊆ {ω ∈ Ω. ∀b ∈ A. U t a ω ≥ U t b ω}"
proof
fix ω
assume "ω ∈ {ω ∈ Ω. A_t_plus_1 t ω = a}"
hence "A_t_plus_1 t ω = a" by simp
from setopt obtain a_max where
"a_max ∈ A ∧ (∀b ∈ A. U t b ω ≤ U t a_max ω)"
using ‹ω ∈ {ω ∈ Ω. A_t_plus_1 t ω = a}› by auto
hence "∀b ∈ A. U t a ω ≥ U t b ω"
using `A_t_plus_1 t ω = a` A_t_plus_1_maximizes by auto
thus "ω ∈ {ω ∈ Ω. ∀b ∈ A. U t a ω ≥ U t b ω}"
using ‹ω ∈ {ω ∈ Ω. A_t_plus_1 t ω = a}› by blast
qed
have set_result_2: "{ω ∈ Ω. ∀b ∈ A. U t a ω ≥ U t b ω} ⊆ {ω ∈ Ω. U t a_star ω ≤ U t a ω}"
proof
fix ω assume asm: "ω ∈ {ω ∈ Ω. ∀b ∈ A. U t a ω ≥ U t b ω}"
hence "ω ∈ Ω" and ub: "∀b ∈ A. U t a ω ≥ U t b ω" by simp_all
from a_star_in_A have "U t a ω ≥ U t a_star ω" using ub by simp
thus "ω ∈ {ω ∈ Ω. U t a_star ω ≤ U t a ω}" using `ω ∈ Ω` by simp
qed
have set_result_3: "{ω ∈ Ω. U t a_star ω ≤ U t a ω} ⊆
{ω ∈ Ω. U t a_star ω ≤ μ a_star} ∪ {ω ∈ Ω. μ a_star ≤ U t a ω}"
proof
fix ω assume asm: "ω ∈ {ω ∈ Ω. U t a_star ω ≤ U t a ω}"
hence "ω ∈ Ω" and le: "U t a_star ω ≤ U t a ω" by simp_all
show "ω ∈ {ω ∈ Ω. U t a_star ω ≤ μ a_star} ∪ {ω ∈ Ω. μ a_star ≤ U t a ω}"
proof (cases "U t a_star ω ≤ μ a_star")
case True
then show ?thesis using `ω ∈ Ω` by simp
next
case False
hence "μ a_star < U t a_star ω" by simp
with le have "μ a_star < U t a ω" by (simp add: less_le_trans)
then show ?thesis using `ω ∈ Ω` by simp
qed
qed
from set_result_1 set_result_2 set_result_3 show ?thesis by auto
qed
theorem ucb_suboptimal_bound_prob_statement:
fixes t :: nat and a :: 'a and Δ :: "'a ⇒ real"
assumes finite_A: "finite A"
and a_star_in_A: "a_star ∈ A"
and argmax_exists: "A ≠ {}"
and subopt_gap: "Δ a > 0"
and a_not_opt: "∃a'. a' ∈ A ∧ Δ a > 0"
and ω_in_Ω: "ω ∈ Ω"
and asm: " ω ∈ {ω ∈ Ω. A_t_plus_1 t ω = a}"
and setopt: "∀ω ∈ Ω. ∃a_max ∈ A. ∀b ∈ A. U t b ω ≤ U t a_max ω "
and A_t_plus_1_maximizes:
"⋀t ω a. A_t_plus_1 t ω = a ⟹ a ∈ A ∧ (∀b ∈ A. U t a ω ≥ U t b ω)"
and a_in_A: "a ∈ A"
and omega_in: "ω ∈ Ω"
and subopt_gap: "Δ a > 0"
and delta_a: "Δ a = μ a_star - μ a"
and H_def: "H = (2 * ln (1 / δ)) / (Δ a)^2"
and E_def: "E = {ω ∈ Ω. A_t_plus_1 t ω = a} ∩ {ω ∈ Ω. H ≤ real (N_n t a ω)}"
and F_def: "F = {ω ∈ Ω. U t a_star ω ≤ μ a_star} ∩ {ω ∈ Ω. H ≤ real (N_n t a ω)}"
and G_def: "G = {ω ∈ Ω. μ a_star ≤ U t a ω} ∩ {ω ∈ Ω. H ≤ real (N_n t a ω)}"
and meas_sets: "E ∈ sets M" "F ∈ sets M" "G ∈ sets M"
and prob_inter: "prob (F ∩ G) ≡ enn2real (emeasure M (F ∩ G))"
shows "prob ({ω ∈ Ω. A_t_plus_1 t ω = a} ∩ {ω ∈ Ω. H ≤ real (N_n t a ω)}) ≤
prob ({ω ∈ Ω. U t a_star ω ≤ μ a_star} ∩ {ω ∈ Ω. H ≤ real (N_n t a ω)}) +
prob ({ω ∈ Ω. U t a ω ≥ μ a_star} ∩ {ω ∈ Ω. H ≤ real (N_n t a ω)})"
proof -
have subset_result:"E ⊆ F ∪ G"
proof -
have step1: "{ω ∈ Ω. A_t_plus_1 t ω = a} ⊆
{ω ∈ Ω. U t a_star ω ≤ μ a_star} ∪ {ω ∈ Ω. μ a_star ≤ U t a ω}"
using ucb_suboptimal_bound_set assms by blast
then have step2:
"{ω ∈ Ω. A_t_plus_1 t ω = a} ∩ {ω ∈ Ω. H ≤ real (N_n t a ω)} ⊆
({ω ∈ Ω. U t a_star ω ≤ μ a_star} ∪ {ω ∈ Ω. μ a_star ≤ U t a ω}) ∩ {ω ∈ Ω. H ≤ real (N_n t a ω)}"
by auto
then have step3:
"{ω ∈ Ω. A_t_plus_1 t ω = a} ∩ {ω ∈ Ω. H ≤ real (N_n t a ω)} ⊆
({ω ∈ Ω. U t a_star ω ≤ μ a_star} ∩ {ω ∈ Ω. H ≤ real (N_n t a ω)}) ∪
({ω ∈ Ω. μ a_star ≤ U t a ω} ∩ {ω ∈ Ω. H ≤ real (N_n t a ω)})"
by (auto simp add: set_eq_iff)
then have step4:
"E ⊆ F ∪ G" using assms by simp
then show ?thesis
by (simp add: E_def F_def G_def)
qed
have bound: "prob E ≤ prob F + prob G"
proof (rule union_bound)
show "E ⊆ F ∪ G" by (simp add: subset_result)
show "E ∈ sets M" using meas_sets by simp
show "F ∈ sets M" using meas_sets by simp
show "G ∈ sets M " using meas_sets by simp
have "prob (F ∩ G) ≡ enn2real (emeasure M (F ∩ G))" using prob_inter by simp
have "prob E ≤ prob (F ∪ G)"
using assms(18,19,20) increasingD measure_increasing subset_result by blast
have "prob (F ∪ G) = prob F + prob G - prob (F ∩ G)"
by (simp add: Int_commute assms(19,20) finite_measure_Diff' finite_measure_Union')
qed
hence union_bound:
"prob ({ω ∈ Ω. A_t_plus_1 t ω = a} ∩ {ω ∈ Ω. H ≤ real (N_n t a ω)}) ≤
prob ({ω ∈ Ω. U t a_star ω ≤ μ a_star} ∩ {ω ∈ Ω. H ≤ real (N_n t a ω)}) +
prob ({ω ∈ Ω. U t a ω ≥ μ a_star} ∩ {ω ∈ Ω. H ≤ real (N_n t a ω)})"
using bound assms by simp
show ?thesis
using union_bound by simp
qed
lemma U_le_μ_pointwise:
"U t a_star ω ≤ μ a_star ⟷
M_val t a_star ω - μ a_star ≤
- q * sqrt (ln (1 / δ) / (2 * real (max 1 (N_n t a_star ω))))"
unfolding U_def
using add_le_iff max_Suc_0_eq_1
by auto
lemma U_ge_μ_pointwise:
assumes delta_a: "Δ a = μ a_star - μ a"
shows
"U t a ω ≥ μ a_star ⟷
M_val t a ω - μ a ≥ Δ a - q * sqrt (ln (1 / δ) / (2 * real (max 1 (N_n t a ω))))"
proof -
have delta_plus_mu_eq:
"Δ a + μ a = μ a_star"
proof -
have "Δ a = μ a_star - μ a"
using delta_a by simp
hence "Δ a + μ a = (μ a_star - μ a) + μ a"
by simp
also have "... = μ a_star"
by simp
finally show ?thesis .
qed
have U_ge_μ_rewrite:
"U t a ω ≥ μ a_star ⟷ M_val t a ω - μ a ≥ Δ a - q * sqrt (ln (1 / δ) / (2 * real (max 1 (N_n t a ω))))"
unfolding U_def using add_le_iff max_Suc_0_eq_1 delta_plus_mu_eq by auto
then show ?thesis by simp
qed
theorem hoeffding_iid__bound_le:
fixes a :: 'a and n :: nat and ε :: real and μ_hat :: real and l_hat u_hat :: real
and I :: "nat set"
and X_new :: "nat ⇒ 'b ⇒ real"
and a_bound b_bound :: "nat ⇒ real"
assumes a_in: "a ∈ A"
and "b ∈ Ω"
and eps_pos: "ε ≥ 0"
and eps: "ε = abs (u_hat - l_hat) * sqrt (((real n) / 2) * ln (1 / δ))"
and "δ ≥ 0 ∧ δ ≤ 1"
and t_eq_n: "t = n"
and "c > 0 "
and bounds: "∀j < t. ∀ω ∈ Ω. ∀ a ∈A. l_hat ≤ Z_hat j a ω ∧ Z_hat j a ω ≤ u_hat"
and mu_def: "μ_hat = (∑ j < t. expectation (λω. Z_hat j a ω))"
and "u_hat - l_hat ≠ 0"
and t_pos: "t > 0"
and "∀t<n. N_n t a_star b > 0"
and n_pos: "n > 0"
and "M_val t a_star b ≡ (∑ s < t. if π s b = a_star then Z s a_star b else 0) /
real (N_n t a_star b)"
and widths: "(∑ i ∈ I. (b_bound i - a_bound i)^2) = (real n) * (u_hat - l_hat)^2"
and space_M: "space M = Ω"
and sets_M: "sets M = ℱ"
and indep: "indep_vars (λ_. borel) (λj. (λω. Z j a ω)) {j. j < t}"
and rv: "∀j<t. random_variable borel (λω. Z j a ω)"
and "∀j<t. Z_hat j a_star ω = c * (if π j b = a_star then Z j a_star b else 0) "
and "indep_interval_bounded_random_variables M I X_new a_bound b_bound"
and indep_loc: "indep_interval_bounded_random_variables M {j. j < t}
(λj. (λω. Z_hat j a_star ω))
(λj. l_hat) (λj. u_hat)"
and H: "Hoeffding_ineq M {j. j < t}
(λj. (λω. Z_hat j a_star ω))
(λj. l_hat) (λj. u_hat)"
and sum_integrals_eq: "(∑ j ∈ {j. j < t}. integral⇧L M (λω. Z_hat j a_star ω)) = μ_hat"
and rewriting: "prob {ω ∈ Ω. (∑ j < t. Z_hat j a_star ω) - (∑ j < t. expectation (Z_hat j a_star)) ≤ - ε} =
prob {x ∈ space M. (∑ j < t. Z_hat j a_star x) ≤ (∑ j < t. expectation (Z_hat j a_star)) - ε}"
shows "prob {ω ∈ Ω. (∑ j < n. Z_hat j a_star ω) ≤ μ_hat - ε}
≤ δ"
proof -
let ?I = "{j. j < t}"
let ?X_new = "λj. (λω. Z_hat j a_star ω)"
let ?a_bound = "λj. l_hat"
let ?b_bound = "λj. u_hat"
have
"M_val t a_star b ≡ (∑ s < t. if π s b = a_star then Z s a_star b else 0) / real (N_n t a_star b)"
using assms by simp
have finite_I: "finite ?I" by simp
have AE_bounds: "∀j ∈ ?I. AE ω in M. ?a_bound j ≤ ?X_new j ω ∧ ?X_new j ω ≤ ?b_bound j"
proof
fix j assume "j ∈ ?I"
then have "j < t" by simp
then have bound_j: "∀ω ∈ Ω. l_hat ≤ Z_hat j a_star ω ∧ Z_hat j a_star ω ≤ u_hat"
using bounds by (simp add: a_in_A)
then show "AE ω in M. ?a_bound j ≤ ?X_new j ω ∧ ?X_new j ω ≤ ?b_bound j"
using space_M sets_M by force
qed
have indep_loc: "indep_interval_bounded_random_variables M ?I ?X_new ?a_bound ?b_bound"
using assms by simp
have H: "Hoeffding_ineq M ?I ?X_new ?a_bound ?b_bound"
using assms by simp
have sum_integrals_eq: "(∑ j∈?I. integral⇧L M (?X_new j)) = μ_hat"
using assms by simp
have widths:
"(∑ j ∈ ?I. (?b_bound j - ?a_bound j)^2) = (real n) * (u_hat - l_hat)^2"
proof -
have "(∑ j ∈ ?I. (?b_bound j - ?a_bound j)^2) =
(∑ j ∈ ?I. (u_hat - l_hat)^2)"
by simp
also have "... = card ?I * (u_hat - l_hat)^2"
by simp
also have "card ?I = t"
by simp
also have "... = n"
using ‹t = n› assms by blast
finally show ?thesis
using assms by fastforce
qed
have widths_pos: "0 < (∑ j ∈ ?I. (?b_bound j - ?a_bound j )^2)"
using ‹u_hat - l_hat ≠ 0› widths n_pos by auto
have res: "prob {ω ∈ Ω. (∑ j < t. Z_hat j a_star ω) ≤ (∑ j < t. expectation (Z_hat j a_star)) - ε} =
prob {x ∈ space M. (∑ j < t. Z_hat j a_star x) ≤ (∑ j < t. expectation (Z_hat j a_star)) - ε}"
using assms by simp
then have tail: "prob {ω ∈ Ω. (∑ j∈?I. ?X_new j ω) ≤ (∑ j∈?I. integral⇧L M (?X_new j)) - ε}
≤ exp (- 2 * ε^2 / (∑ j∈?I. (?b_bound j - ?a_bound j)^2))"
using Hoeffding_ineq.Hoeffding_ineq_le[OF H eps_pos widths_pos]
by (smt (verit, best) Collect_cong assms)
have lhs_rewrite: "{ω ∈ Ω. (∑ j < n. Z_hat j a_star ω) ≤ μ_hat - ε}
= {ω ∈ Ω. (∑ j∈?I. ?X_new j ω) - (∑ j∈?I. integral⇧L M (?X_new j)) ≤ -ε}"
by (metis (mono_tags, lifting) add_le_iff assms(24,6) lessThan_def)
have rhs_rewrite: "exp (- 2 * ε^2 / (∑ j∈?I. (?b_bound j - ?a_bound j)^2)) = exp (- 2 * ε^2 / (real n * (u_hat - l_hat)^2))"
using widths by simp
have lhs_prob:"prob {ω ∈ Ω. (∑ j < n. Z_hat j a_star ω) ≤ μ_hat - ε}
= prob {ω ∈ Ω. (∑ j∈?I. ?X_new j ω) - (∑ j∈?I. integral⇧L M (?X_new j)) ≤ -ε}"
using lhs_rewrite by simp
then have "prob {ω ∈ Ω. (∑ j < n. Z_hat j a_star ω) ≤ μ_hat - ε} ≤
exp (- 2 * ε^2 / (∑ j∈?I. (?b_bound j - ?a_bound j)^2))"
using lhs_rewrite tail lhs_prob by (smt (verit, ccfv_threshold) Collect_cong)
then have "prob {ω ∈ Ω. (∑ j < n. Z_hat j a_star ω) ≤ μ_hat - ε} ≤ exp (- 2 * ε^2 / (real n * (u_hat - l_hat)^2))"
using rhs_rewrite by linarith
then have "prob {ω ∈ Ω. (∑ j < n. Z_hat j a_star ω) - μ_hat≤ - ε} =
prob {ω ∈ Ω. (∑ j < n. Z_hat j a_star ω) - μ_hat≤ - abs (u_hat - l_hat) * sqrt (((real n) / 2) * ln (1 / δ))} "
using eps by simp
have res1: "exp (- 2 * ε^2 / (real n * (u_hat - l_hat)^2)) =
exp (- 2 * (abs (u_hat - l_hat) * sqrt ((real n / 2) * ln (1 / δ)))^2 /
(real n * (u_hat - l_hat)^2))"
using eps by simp
have "exp (- 2 * (abs (u_hat - l_hat) * sqrt ((real n / 2) * ln (1 / δ)))^2 /
(real n * (u_hat - l_hat)^2)) =
exp (- 2 * (abs (u_hat - l_hat) * sqrt ((real n / 2) * ln (1 / δ)))^2 / (real n * (u_hat - l_hat)^2)) "
using eps eps_pos by blast
then have "... =
exp (- 2 * ((abs (u_hat - l_hat))^2 / (real n * (u_hat - l_hat)^2 )) * sqrt (((real n / 2) * ln (1 / δ)))^2 )"
by (metis (no_types, opaque_lifting) more_arith_simps(11) power_mult_distrib times_divide_eq_left
times_divide_eq_right)
have "... =
exp (- 2 * ((abs (u_hat - l_hat))^2 / (real n * abs ((u_hat - l_hat))^2 )) * sqrt (((real n / 2) * ln (1 / δ)))^2 )"
by simp
have "... = exp (- 2 * (1/ ((real n) )) * sqrt (((real n / 2) * ln (1 / δ)))^2 )"
using assms by simp
then have "... = exp (-2 * (1/ ((real n) )) * (((real n / 2) * ln (1 / δ))) )"
using power2_eq_square by (smt (verit, best)
arithmetic_simps(51) assms(10) eps eps_pos real_sqrt_ge_0_iff real_sqrt_pow2
zero_le_mult_iff)
then have "... = exp (-1* ln (1 / δ)) "
using assms by (simp add: field_simps)
then have fin: "exp (- 2 * ε^2 / (real n * (u_hat - l_hat)^2)) = δ "
using δ_pos assms
by (metis (lifting)
‹exp (- 2 * (1 / real n) * (sqrt (real n / 2 * ln (1 / δ)))⇧2) = exp (- 2 * (1 / real n) * (real n / 2 * ln (1 / δ)))›
‹exp (- 2 * (¦u_hat - l_hat¦ * sqrt (real n / 2 * ln (1 / δ)))⇧2 / (real n * (u_hat - l_hat)⇧2)) = exp (- 2 * (¦u_hat - l_hat¦⇧2 / (real n * (u_hat - l_hat)⇧2)) * (sqrt (real n / 2 * ln (1 / δ)))⇧2)›
‹exp (- 2 * (¦u_hat - l_hat¦⇧2 / (real n * (u_hat - l_hat)⇧2)) * (sqrt (real n / 2 * ln (1 / δ)))⇧2) = exp (- 2 * (¦u_hat - l_hat¦⇧2 / (real n * ¦u_hat - l_hat¦⇧2)) * (sqrt (real n / 2 * ln (1 / δ)))⇧2)›
‹exp (- 2 * (¦u_hat - l_hat¦⇧2 / (real n * ¦u_hat - l_hat¦⇧2)) * (sqrt (real n / 2 * ln (1 / δ)))⇧2) = exp (- 2 * (1 / real n) * (sqrt (real n / 2 * ln (1 / δ)))⇧2)›
arith_simps(56) exp_ln ln_divide_pos ln_one more_arith_simps(10) mult_minus1 of_nat_zero_less_power_iff power_0)
show ?thesis using assms fin
‹prob {ω ∈ Ω. (∑j<n. Z_hat j a_star ω) ≤ μ_hat - ε} ≤ exp (- 2 * ε⇧2 / (real n * (u_hat - l_hat)⇧2))›
by presburger
qed
theorem hoeffding_iid__bound_ge:
fixes a :: 'a and n :: nat and ε :: real and μ_hat :: real and l_hat u_hat :: real
and I :: "nat set"
and X_new :: "nat ⇒ 'b ⇒ real"
and a_bound b_bound :: "nat ⇒ real"
assumes a_in: "a ∈ A"
and "b ∈ Ω"
and eps_pos: "ε ≥ 0"
and eps: "ε = abs (u_hat - l_hat) * sqrt (((real n) / 2) * ln (1 / δ))"
and "δ ≥ 0 ∧ δ ≤ 1"
and t_eq_n: "t = n"
and "c > 0 "
and bounds: "∀j < t. ∀ω ∈ Ω. ∀ a ∈A. l_hat ≤ Z_hat j a ω ∧ Z_hat j a ω ≤ u_hat"
and mu_def: "μ_hat = (∑ j < t. expectation (λω. Z_hat j a ω))"
and "u_hat - l_hat ≠ 0"
and t_pos: "t > 0"
and "∀t<n. N_n t a_star b > 0"
and n_pos: "n > 0"
and "M_val t a b ≡ (∑ s < t. if π s b = a then Z s a b else 0) /
real (N_n t a b)"
and widths: "(∑ i ∈ I. (b_bound i - a_bound i)^2) = (real n) * (u_hat - l_hat)^2"
and space_M: "space M = Ω"
and sets_M: "sets M = ℱ"
and indep: "indep_vars (λ_. borel) (λj. (λω. Z j a ω)) {j. j < t}"
and rv: "∀j<t. random_variable borel (λω. Z j a ω)"
and "∀j<t. Z_hat j a ω = c * (if π j b = a then Z j a_star b else 0) "
and "indep_interval_bounded_random_variables M I X_new a_bound b_bound"
and indep_loc: "indep_interval_bounded_random_variables M {j. j < t}
(λj. (λω. Z_hat j a ω))
(λj. l_hat) (λj. u_hat)"
and H: "Hoeffding_ineq M {j. j < t}
(λj. (λω. Z_hat j a ω))
(λj. l_hat) (λj. u_hat)"
and sum_integrals_eq: "(∑ j ∈ {j. j < t}. integral⇧L M (λω. Z_hat j a ω)) = μ_hat"
and rewriting: "prob {ω ∈ Ω. (∑ j < t. Z_hat j a ω) - (∑ j < t. expectation (Z_hat j a)) ≥ ε} =
prob {x ∈ space M. (∑ j < t. Z_hat j a x) ≥ (∑ j < t. expectation (Z_hat j a)) + ε}"
shows "prob {ω ∈ Ω. (∑ j < n. Z_hat j a ω) ≥ μ_hat + ε} ≤ δ"
proof -
let ?I = "{j. j < t}"
let ?X_new = "λj. (λω. Z_hat j a ω)"
let ?a_bound = "λj. l_hat"
let ?b_bound = "λj. u_hat"
have
"M_val t a b ≡ (∑ s < t. if π s b = a then Z s a b else 0) / real (N_n t a b)"
using assms by simp
have finite_I: "finite ?I" by simp
have AE_bounds: "∀j ∈ ?I. AE ω in M. ?a_bound j ≤ ?X_new j ω ∧ ?X_new j ω ≤ ?b_bound j"
proof
fix j assume "j ∈ ?I"
then have "j < t" by simp
then have bound_j: "∀ω ∈ Ω. l_hat ≤ Z_hat j a ω ∧ Z_hat j a ω ≤ u_hat"
using bounds by (simp add: a_in_A)
then show "AE ω in M. ?a_bound j ≤ ?X_new j ω ∧ ?X_new j ω ≤ ?b_bound j"
using space_M sets_M by force
qed
have indep_loc: "indep_interval_bounded_random_variables M ?I ?X_new ?a_bound ?b_bound"
using assms by simp
have H: "Hoeffding_ineq M ?I ?X_new ?a_bound ?b_bound"
using assms by simp
have sum_integrals_eq: "(∑ j∈?I. integral⇧L M (?X_new j)) = μ_hat"
using assms by simp
have widths:
"(∑ j ∈ ?I. (?b_bound j - ?a_bound j)^2) = (real n) * (u_hat - l_hat)^2"
proof -
have "(∑ j ∈ ?I. (?b_bound j - ?a_bound j)^2) =
(∑ j ∈ ?I. (u_hat - l_hat)^2)"
by simp
also have "... = card ?I * (u_hat - l_hat)^2"
by simp
also have "card ?I = t"
by simp
also have "... = n"
using ‹t = n› assms by blast
finally show ?thesis
using assms by fastforce
qed
have widths_pos: "0 < (∑ j ∈ ?I. (?b_bound j - ?a_bound j )^2)"
using ‹u_hat - l_hat ≠ 0› widths n_pos by auto
have res: "prob {ω ∈ Ω. (∑ j < t. Z_hat j a ω) - (∑ j < t. expectation (Z_hat j a)) ≥ ε} =
prob {x ∈ space M. (∑ j < t. Z_hat j a x) ≥ (∑ j < t. expectation (Z_hat j a)) + ε}"
using assms by simp
then have tail: "prob {ω ∈ Ω. (∑ j∈?I. ?X_new j ω) ≥ (∑ j∈?I. integral⇧L M (?X_new j)) + ε}
≤ exp (- 2 * ε^2 / (∑ j∈?I. (?b_bound j - ?a_bound j)^2))"
using Hoeffding_ineq.Hoeffding_ineq_ge[OF H eps_pos widths_pos]
by (smt (verit, best) Collect_cong assms)
have lhs_rewrite: "{ω ∈ Ω. (∑ j < n. Z_hat j a ω) ≥ μ_hat + ε}
= {ω ∈ Ω. (∑ j∈?I. ?X_new j ω) - (∑ j∈?I. integral⇧L M (?X_new j)) ≥ ε}"
using assms by (metis (lifting) ext add.commute le_diff_eq lessThan_def)
have rhs_rewrite: "exp (- 2 * ε^2 / (∑ j∈?I. (?b_bound j - ?a_bound j)^2)) = exp (- 2 * ε^2 / (real n * (u_hat - l_hat)^2))"
using widths by simp
have lhs_prob:"prob {ω ∈ Ω. (∑ j < n. Z_hat j a ω) ≥ μ_hat + ε}
= prob{ω ∈ Ω. (∑ j∈?I. ?X_new j ω) - (∑ j∈?I. integral⇧L M (?X_new j)) ≥ ε}"
using assms lhs_rewrite by presburger
then have "prob {ω ∈ Ω. (∑ j < n. Z_hat j a ω) ≥ μ_hat + ε} ≤
exp (- 2 * ε^2 / (∑ j∈?I. (?b_bound j - ?a_bound j)^2))"
using lhs_rewrite tail lhs_prob by (smt (verit, ccfv_threshold) Collect_cong)
then have "prob {ω ∈ Ω. (∑ j < n. Z_hat j a ω) ≥ μ_hat + ε} ≤ exp (- 2 * ε^2 / (real n * (u_hat - l_hat)^2))"
using rhs_rewrite by linarith
then have "prob {ω ∈ Ω. (∑ j < n. Z_hat j a ω) - μ_hat ≥ ε} =
prob {ω ∈ Ω. (∑ j < n. Z_hat j a ω) - μ_hat ≥ abs (u_hat - l_hat) * sqrt (((real n) / 2) * ln (1 / δ))} "
using eps by simp
have res1: "exp (- 2 * ε^2 / (real n * (u_hat - l_hat)^2)) =
exp (- 2 * (abs (u_hat - l_hat) * sqrt ((real n / 2) * ln (1 / δ)))^2 /
(real n * (u_hat - l_hat)^2))"
using eps by simp
have "exp (- 2 * (abs (u_hat - l_hat) * sqrt ((real n / 2) * ln (1 / δ)))^2 /
(real n * (u_hat - l_hat)^2)) =
exp (- 2 * (abs (u_hat - l_hat) * sqrt ((real n / 2) * ln (1 / δ)))^2 / (real n * (u_hat - l_hat)^2)) "
using eps eps_pos by blast
then have "... = exp (- 2 * ((abs (u_hat - l_hat))^2 / (real n * (u_hat - l_hat)^2 )) * sqrt (((real n / 2) * ln (1 / δ)))^2 )"
by (metis (no_types, opaque_lifting) more_arith_simps(11) power_mult_distrib times_divide_eq_left
times_divide_eq_right)
also have "... = exp (- 2 * ((abs (u_hat - l_hat))^2 / (real n * abs ((u_hat - l_hat))^2 )) * sqrt (((real n / 2) * ln (1 / δ)))^2 )"
by simp
have "... = exp (- 2 * (1/ ((real n) )) * sqrt (((real n / 2) * ln (1 / δ)))^2 )"
using assms by simp
then have "... = exp (-2 * (1/ ((real n) )) * (((real n / 2) * ln (1 / δ))) )"
using power2_eq_square by (smt (verit, best)
arithmetic_simps(51) assms(10) eps eps_pos real_sqrt_ge_0_iff real_sqrt_pow2
zero_le_mult_iff)
then have "... = exp (-1* ln (1 / δ)) "
using assms by (simp add: field_simps)
then have fin: "exp (- 2 * ε^2 / (real n * (u_hat - l_hat)^2)) = δ "
using δ_pos assms
by (metis (lifting)
‹exp (- 2 * (1 / real n) * (sqrt (real n / 2 * ln (1 / δ)))⇧2) = exp (- 2 * (1 / real n) * (real n / 2 * ln (1 / δ)))›
‹exp (- 2 * (¦u_hat - l_hat¦ * sqrt (real n / 2 * ln (1 / δ)))⇧2 / (real n * (u_hat - l_hat)⇧2)) = exp (- 2 * (¦u_hat - l_hat¦⇧2 / (real n * (u_hat - l_hat)⇧2)) * (sqrt (real n / 2 * ln (1 / δ)))⇧2)›
‹exp (- 2 * (¦u_hat - l_hat¦⇧2 / (real n * (u_hat - l_hat)⇧2)) * (sqrt (real n / 2 * ln (1 / δ)))⇧2) = exp (- 2 * (¦u_hat - l_hat¦⇧2 / (real n * ¦u_hat - l_hat¦⇧2)) * (sqrt (real n / 2 * ln (1 / δ)))⇧2)›
‹exp (- 2 * (¦u_hat - l_hat¦⇧2 / (real n * ¦u_hat - l_hat¦⇧2)) * (sqrt (real n / 2 * ln (1 / δ)))⇧2) = exp (- 2 * (1 / real n) * (sqrt (real n / 2 * ln (1 / δ)))⇧2)›
diff_0 exp_ln ln_divide_pos ln_one more_arith_simps(10) mult_minus1 of_nat_zero_less_power_iff power_0)
show ?thesis using assms fin
using ‹prob {ω ∈ Ω. μ_hat + ε ≤ (∑j<n. Z_hat j a ω)} ≤ exp (- 2 * ε⇧2 / (real n * (u_hat - l_hat)⇧2))›
by presburger
qed
end
end