Theory Discrete_UCB_Step3
theory Discrete_UCB_Step3
imports Discrete_UCB_Step2
begin
locale bandit_policy = bandit_problem + prob_space +
fixes
Ω :: "'b set"
and ℱ :: "'b set set"
and π :: "nat ⇒ 'b ⇒ 'a"
and N_n :: "nat ⇒ 'a ⇒ 'b ⇒ nat"
and Z :: "nat ⇒ 'a ⇒ 'b ⇒ real"
and δ :: real
and q :: real
assumes fin_A: "finite A"
and "ω ∈ Ω"
and a_in_A: "a ∈ A"
and measurable_policy: "∀t. π t ∈ measurable M (count_space A)"
and N_n_def: "∀n a ω. N_n n a ω = card {t ∈ {0..<n}. π (t+1) ω = a}"
and count_assm_pointwise: "∀n ω. (∑a ∈ A. real (N_n n a ω)) = real n"
and delta_pos: "0 < δ"
and delta_less1: "δ < 1"
and q_pos: "q > 0"
begin
definition sample_mean_Z :: "nat ⇒ 'a ⇒ 'b ⇒ real" where
"sample_mean_Z n a ω = (1 / real n) * (∑i<n. Z i a ω)"
definition M_fun :: "nat ⇒ 'a ⇒ 'b ⇒ real" where
"M_fun t a ω = (if N_n (t+1) a ω = 0 then 0
else (∑ s < t. (if π s ω = a then Z s a ω else 0)) / real (N_n t a ω))"
definition U :: "nat ⇒ 'a ⇒ 'b ⇒ real" where
"U t a ω = M_fun t a ω + q * sqrt (ln (1 / δ) / (2 * real (max 1 (N_n t a ω))))"
definition A_t_plus_1 :: "nat ⇒ 'b ⇒ 'a" where
"A_t_plus_1 t ω = (SOME a. a ∈ A ∧ (∀a'. a' ∈ A ⟶ U t a ω ≥ U t a' ω))"
definition prob_eq_Ex :: "'b set ⇒ bool" where
"prob_eq_Ex E ≡ prob E = expectation (λω. indicator E ω)"
theorem proposition_15_7:
assumes
a_in_A: "a ∈ A"
and "ω ∈ Ω"
and subopt_gap: "Δ a > 0"
and a_not_opt: "∃ a' ∈ A. Δ a' > 0"
and delta_a: "∀ a ∈ A. Δ a = μ a_star - μ a"
and "k ≤ n"
and from_UCB_step1: "∀ a ∈ A. expectation (λω. real (N_n n a ω)) ≤
s n + expectation (λω. (∑ t = k..<n. if π (t+1) ω = a ∧ s t ≤ real (N_n t a ω) then 1 else 0))"
and from_UCB_step2: "∀ a ∈ A. ∀ t ∈ {k..<n}. prob ({ω ∈ Ω. A_t_plus_1 t ω = a} ∩
{ω ∈ Ω. N_n t a ω ≥ (2 * ln (1 / δ)) / (Δ a)^2}) ≤ 2 * δ"
and eps_pos: "ε > 0"
and t_gt0: "∀ t ∈ {k..<n}. t > 0"
and choice_delta: "∀ t ∈ {k..<n}. δ = 1 / (real t powr ε)"
and s_form: "∀ a ∈ A. ∀ u. s u = (2 * ε * ln (real u)) / ((Δ a)^2)"
and subset_meas:"∀ a ∈ A. ∀t ∈ {k..<n}. ∀ω ∈ Ω. {ω. π (t+1) ω = a ∧ 2 * ε * ln (real t)/(Δ a)^2 ≤ N_n t a ω} ⊆ Ω"
and prob_eq_E_assm: "∀ a ∈ A. ∀t ∈ {k..<n}. prob {ω. π (Suc t) ω = a ∧ 2 * ε * ln (real t) / (Δ a)^2 ≤ real (N_n t a ω)} =
prob ({ω. π (Suc t) ω = a ∧ 2 * ε * ln (real t) / (Δ a)^2 ≤ real (N_n t a ω)} ∩ space M)"
and finiteness: "∀t∈{k..<n}. ∀a∈A. emeasure M {ω. π (t+1) ω = a ∧ 2*ε*ln(real t)/(Δ a)^2 ≤ real (N_n t a ω)} < ∞"
and measurable_set: "∀t∈{k..<n}. ∀a∈A. {ω. π (t+1) ω = a ∧ 2*ε*ln(real t)/(Δ a)^2 ≤ real (N_n t a ω)} ∈ sets M"
and eq_sets_optimum:
"∀ a ∈ A. ∀ t ∈ {k..<n}. {ω. π (t+1) ω = a ∧ 2 * ε * ln (real t) / (Δ a)^2 ≤ real (N_n t a ω)} =
{ω ∈ Ω. A_t_plus_1 t ω = a} ∩ {ω ∈ Ω. N_n t a ω ≥ (2 * ε * ln (real t)) / (Δ a)^2}"
shows
"∀ a ∈ A. expectation (λω. real (N_n n a ω)) ≤ s n + (∑ t = k..<n. 2 / (real t powr ε))"
proof -
have def_sn: "∀ a ∈ A. s n = (2 * ε * ln (real n)) / ((Δ a)^2)"
using s_form by simp
have def_st: "∀ a ∈ A. s t = (2 * ε * ln (real t)) / ((Δ a)^2)"
using s_form by simp
then have expression: "∀ a ∈ A. expectation (λω. real (N_n n a ω)) ≤
(2 * ε * ln (real n)) / ((Δ a)^2) + expectation (λω. (∑t = k..<n. if π (t+1) ω = a ∧
(2 * ε * ln (real t)) / ((Δ a)^2) ≤ real (N_n t a ω) then 1 else 0))"
using assms def_sn def_st by simp
have eq_if_of_bool:
"∀ a ∈ A. expectation (λω. ∑ t = k..<n.
(if π (t+1) ω = a ∧ 2 * ε * ln (real t) / (Δ a)^2 ≤ real (N_n t a ω) then 1 else 0))
= expectation (λω. ∑ t = k..<n.
of_bool (π (t+1) ω = a ∧ 2 * ε * ln (real t) / (Δ a)^2 ≤ real (N_n t a ω)))"
by (simp add: of_bool_def)
have eq_indic_bool:
"∀ a ∈ A. expectation (λω. ∑ t = k..<n.
of_bool (π (t+1) ω = a ∧ 2 * ε * ln (real t) / (Δ a)^2 ≤ real (N_n t a ω)))
= expectation (λω. ∑ t = k..<n.
indicat_real {ω. π (t+1) ω = a ∧ 2 * ε * ln (real t) / (Δ a)^2 ≤ real (N_n t a ω)} ω)"
by (simp add: indicator_def of_bool_def)
have expression_1:
"∀ a ∈ A. expectation (λω. ∑ t = k..<n.
(if π (t+1) ω = a ∧ 2 * ε * ln (real t) / (Δ a)^2 ≤ real (N_n t a ω) then 1 else 0)) =
expectation (λω. ∑ t = k..<n.
indicat_real {ω. π (t+1) ω = a ∧ 2 * ε * ln (real t) / (Δ a)^2 ≤ real (N_n t a ω)} ω)"
proof
fix a assume "a ∈ A"
from eq_if_of_bool[rule_format, OF `a ∈ A`]
have eq1: "expectation (λω. ∑ t = k..<n.
(if π (t+1) ω = a ∧ 2 * ε * ln (real t) / (Δ a)^2 ≤ real (N_n t a ω) then 1 else 0)) =
expectation (λω. ∑ t = k..<n.
of_bool (π (t+1) ω = a ∧ 2 * ε * ln (real t) / (Δ a)^2 ≤ real (N_n t a ω)))"
by simp
from eq_indic_bool[rule_format, OF `a ∈ A`]
have eq2: "expectation (λω. ∑ t = k..<n.
of_bool (π (t+1) ω = a ∧ 2 * ε * ln (real t) / (Δ a)^2 ≤ real (N_n t a ω))) =
expectation (λω. ∑ t = k..<n.
indicat_real {ω. π (t+1) ω = a ∧ 2 * ε * ln (real t) / (Δ a)^2 ≤ real (N_n t a ω)} ω)"
by simp
from eq1 eq2 show
"expectation (λω. ∑ t = k..<n.
(if π (t+1) ω = a ∧ 2 * ε * ln (real t) / (Δ a)^2 ≤ real (N_n t a ω) then 1 else 0)) =
expectation (λω. ∑ t = k..<n.
indicat_real {ω. π (t+1) ω = a ∧ 2 * ε * ln (real t) / (Δ a)^2 ≤ real (N_n t a ω)} ω)"
by (rule trans)
qed
have res:"∀ a ∈ A. ∀t ∈ {k..<n}. prob_eq_Ex {ω. π (t+1) ω = a ∧ 2 * ε * ln (real t) / (Δ a)^2 ≤ real (N_n t a ω)}"
using assms prob_eq_Ex_def by auto
have lin_of_expect_indicators:
"∀ a ∈ A. ∀t ∈ {k..<n}. prob {ω. π (t+1) ω = a ∧ 2 * ε * ln (real t) / (Δ a)^2 ≤ real (N_n t a ω)} =
expectation (λω. (indicat_real {ω. π (t+1) ω = a ∧ 2 * ε * ln (real t) / (Δ a)^2 ≤ real (N_n t a ω)} ω))"
using prob_eq_Ex_def res by simp
then have key_result_1: "∀ a ∈ A. (∑t = k..<n.
expectation (λω. (indicat_real {ω. π (t+1) ω = a ∧ 2 * ε * ln (real t) / (Δ a)^2 ≤ real (N_n t a ω)} ω)))
= (∑t = k..<n. prob {ω. π (t+1) ω = a ∧ 2 * ε * ln (real t) / (Δ a)^2 ≤ real (N_n t a ω)})"
using lin_of_expect_indicators by simp
have expression_follow_up: "∀ a ∈ A.
expectation (λω. ∑ t = k..<n. (if π (t+1) ω = a ∧ 2 * ε * ln (real t) / (Δ a)^2 ≤ real (N_n t a ω) then 1 else 0)) =
(∑ t = k..<n. prob {ω. π (t+1) ω = a ∧ 2 * ε * ln (real t) / (Δ a)^2 ≤ real (N_n t a ω)})"
proof -
have res1: "∀ a ∈ A.
expectation (λω. ∑ t = k..<n. (if π (t+1) ω = a ∧ 2 * ε * ln (real t) / (Δ a)^2 ≤ real (N_n t a ω) then 1 else 0)) =
expectation (λω. ∑ t = k..<n.
of_bool (π (t+1) ω = a ∧ 2 * ε * ln (real t) / (Δ a)^2 ≤ real (N_n t a ω)))"
using eq_if_of_bool by simp
have res2:"∀ a ∈ A.
expectation (λω. ∑ t = k..<n. (if π (t+1) ω = a ∧ 2 * ε * ln (real t) / (Δ a)^2 ≤ real (N_n t a ω) then 1 else 0)) =
expectation (λω. ∑ t = k..<n. indicat_real {ω. π (t+1) ω = a ∧ 2 * ε * ln (real t) / (Δ a)^2 ≤ real (N_n t a ω)} ω)"
using expression_1 by simp
have res3: "∀ a ∈ A.
expectation (λω. ∑ t = k..<n. (if π (t+1) ω = a ∧ 2 * ε * ln (real t) / (Δ a)^2 ≤ real (N_n t a ω) then 1 else 0)) =
( ∑ t = k..<n. expectation (λω. indicat_real {ω. π (t+1) ω = a ∧ 2 * ε * ln (real t) / (Δ a)^2 ≤ real (N_n t a ω)} ω))"
proof -
have "∀ a ∈ A.
expectation (λω. ∑ t = k..<n. (if π (t+1) ω = a ∧ 2 * ε * ln (real t) / (Δ a)^2 ≤ real (N_n t a ω) then 1 else 0)) =
expectation (λω. ∑ t = k..<n. indicat_real {ω. π (t+1) ω = a ∧ 2 * ε * ln (real t) / (Δ a)^2 ≤ real (N_n t a ω)} ω)"
using res2 by blast
then have "∀ a ∈ A. expectation (λω. ∑ t = k..<n. indicat_real {ω. π (t+1) ω = a ∧ 2 * ε * ln (real t) / (Δ a)^2 ≤ real (N_n t a ω)} ω) =
integral⇧L M (λω. ∑ t = k..<n. indicat_real {ω. π (t+1) ω = a ∧ 2 * ε * ln (real t) / (Δ a)^2 ≤ real (N_n t a ω)} ω)"
by simp
then have result_intermed:"∀ a ∈ A. expectation (λω. ∑ t = k..<n. indicat_real {ω. π (t+1) ω = a ∧ 2 * ε * ln (real t) / (Δ a)^2 ≤ real (N_n t a ω)} ω) =
( ∑ t = k..<n. integral⇧L M (λω. indicat_real {ω. π (t+1) ω = a ∧ 2 * ε * ln (real t) / (Δ a)^2 ≤ real (N_n t a ω)} ω))"
using assms integral_sum by simp
have result: "∀ a ∈ A. expectation (λω. ∑ t = k..<n. indicat_real {ω. π (t+1) ω = a ∧ 2 * ε *
ln (real t) / (Δ a)^2 ≤ real (N_n t a ω)} ω) =
( ∑ t = k..<n. expectation (λω. indicat_real {ω. π (t+1) ω = a ∧ 2 * ε * ln (real t) / (Δ a)^2 ≤ real (N_n t a ω)} ω))"
using result_intermed by auto
have "∀ a ∈ A.
expectation (λω. ∑ t = k..<n. (if π (t+1) ω = a ∧ 2 * ε * ln (real t) / (Δ a)^2 ≤ real (N_n t a ω) then 1 else 0)) =
expectation (λω. ∑ t = k..<n. indicat_real {ω. π (t+1) ω = a ∧ 2 * ε * ln (real t) / (Δ a)^2 ≤ real (N_n t a ω)} ω)"
using res2 by simp
then have linearity_expectation: "∀ a ∈ A.
expectation (λω. ∑ t = k..<n. (if π (t+1) ω = a ∧ 2 * ε * ln (real t) / (Δ a)^2 ≤ real (N_n t a ω) then 1 else 0)) =
( ∑ t = k..<n. expectation (λω. indicat_real {ω. π (t+1) ω = a ∧ 2 * ε * ln (real t) / (Δ a)^2 ≤ real (N_n t a ω)} ω))"
using result by simp
then show ?thesis using linearity_expectation by simp
qed
have final_linearity:"∀ a ∈ A.
expectation (λω. ∑ t = k..<n. (if π (t+1) ω = a ∧ 2 * ε * ln (real t) / (Δ a)^2 ≤ real (N_n t a ω) then 1 else 0)) =
(∑t = k..<n. prob {ω. π (t+1) ω = a ∧ 2 * ε * ln (real t) / (Δ a)^2 ≤ real (N_n t a ω)})"
using res1 res2 res3 key_result_1 by simp
then show ?thesis using final_linearity by simp
qed
have intermed_result: "∀ a ∈ A.
expectation (λω. real (N_n n a ω)) ≤
(2 * ε * ln (real n)) / (Δ a)^2 +
(∑ t = k..<n. prob {ω. π (t+1) ω = a ∧ 2 * ε * ln (real t) / (Δ a)^2 ≤ real (N_n t a ω)})"
using expression_follow_up expression by simp
then have follow_up_result: "∀ a ∈ A. ∀ t ∈ {k..<n}.
prob {ω. π (t+1) ω = a ∧ 2 * ε * ln (real t) / (Δ a)^2 ≤ real (N_n t a ω)} =
prob ({ω ∈ Ω. A_t_plus_1 t ω = a} ∩ {ω ∈ Ω. N_n t a ω ≥ (2 * ε * ln (real t)) / (Δ a)^2})"
using expression_follow_up expression eq_sets_optimum intermed_result by simp
have next_result_sum_prob: "∀ a ∈ A.
(∑ t = k..<n. prob {ω. π (t+1) ω = a ∧ 2 * ε * ln (real t) / (Δ a)^2 ≤ real (N_n t a ω)}) =
(∑ t = k..<n. prob ({ω ∈ Ω. A_t_plus_1 t ω = a} ∩ {ω ∈ Ω. N_n t a ω ≥ (2 * ε * ln (real t)) / (Δ a)^2}))"
using assms follow_up_result by simp
then have next_result_fin: "∀ a ∈ A.
expectation (λω. real (N_n n a ω)) ≤
(2 * ε * ln (real n)) / (Δ a)^2 +
(∑ t = k..<n. prob ({ω ∈ Ω. A_t_plus_1 t ω = a} ∩ {ω ∈ Ω. N_n t a ω ≥ (2 * ε * ln (real t)) / (Δ a)^2}))"
using expression_follow_up next_result_sum_prob expression by fastforce
have generalized_bound:
"∀ a ∈ A. ∀ t ∈ {k..<n}. prob ({ω ∈ Ω. A_t_plus_1 t ω = a} ∩
{ω ∈ Ω. N_n t a ω ≥ (2 * ε * ln (real t)) / (Δ a)^2}) ≤ 2 / (real t powr ε)"
proof
fix a
assume a_in: "a ∈ A"
show "∀ t ∈ {k..<n}. prob ({ω ∈ Ω. A_t_plus_1 t ω = a} ∩ {ω ∈ Ω. N_n t a ω ≥ (2 * ε * ln (real t)) / (Δ a)^2}) ≤ 2 / (real t powr ε)"
proof
fix t
assume t_in: "t ∈ {k..<n}"
have Hprob:
"prob ({ω ∈ Ω. A_t_plus_1 t ω = a} ∩
{ω ∈ Ω. N_n t a ω ≥ (2 * ln (1 / δ)) / (Δ a)^2}) ≤ 2 * δ"
using from_UCB_step2[rule_format, OF a_in t_in] by blast
have ln_eq: "ln (1 / δ) = ε * ln (real t)"
using choice_delta[rule_format, OF t_in] eps_pos by simp
hence threshold_eq:
"(2 * ln (1 / δ)) / (Δ a)^2 = (2 * ε * ln (real t)) / (Δ a)^2"
by simp
hence set_eq:
"{ω ∈ Ω. N_n t a ω ≥ (2 * ln (1 / δ)) / (Δ a)^2} =
{ω ∈ Ω. N_n t a ω ≥ (2 * ε * ln (real t)) / (Δ a)^2}"
by auto
from Hprob set_eq have
"prob ({ω ∈ Ω. A_t_plus_1 t ω = a} ∩
{ω ∈ Ω. N_n t a ω ≥ (2 * ε * ln (real t)) / (Δ a)^2}) ≤ 2 * δ"
by simp
moreover from choice_delta[rule_format, OF t_in] have "2 * δ = 2 / (real t powr ε)"
by simp
ultimately show
"prob ({ω ∈ Ω. A_t_plus_1 t ω = a} ∩
{ω ∈ Ω. N_n t a ω ≥ (2 * ε * ln (real t)) / (Δ a)^2}) ≤ 2 / (real t powr ε)"
by simp
qed
qed
have sum_mono_expression:
"∀ a ∈ A. (∑ t = k..<n. prob ({ω ∈ Ω. A_t_plus_1 t ω = a} ∩
{ω ∈ Ω. N_n t a ω ≥ (2 * ε * ln (real t)) / (Δ a)^2}))
≤ (∑ t = k..<n. 2 / (real t powr ε))"
proof
fix a
assume a_in: "a ∈ A"
show "(∑ t = k..<n. prob ({ω ∈ Ω. A_t_plus_1 t ω = a} ∩ {ω ∈ Ω. N_n t a ω ≥ (2 * ε * ln (real t)) / (Δ a)^2}))
≤ (∑ t = k..<n. 2 / (real t powr ε))"
using generalized_bound a_in by (intro sum_mono) auto
qed
have final_result:
"∀ a ∈ A. expectation (λω. real (N_n n a ω))
≤ (2 * ε * ln (real n)) / ((Δ a)^2) + (∑ t = k..<n. 2 / (real t powr ε))"
proof
fix a
assume a_in: "a ∈ A"
from next_result_fin[rule_format, OF a_in]
have bound1:
"expectation (λω. real (N_n n a ω))
≤ (2 * ε * ln (real n)) / ((Δ a)^2)
+ (∑ t = k..<n. prob ({ω ∈ Ω. A_t_plus_1 t ω = a}
∩ {ω ∈ Ω. N_n t a ω ≥ (2 * ε * ln (real t)) / ((Δ a)^2)}))"
by simp
moreover from sum_mono_expression[rule_format, OF a_in]
have bound2:
"(∑ t = k..<n. prob ({ω ∈ Ω. A_t_plus_1 t ω = a}
∩ {ω ∈ Ω. N_n t a ω ≥ (2 * ε * ln (real t)) / ((Δ a)^2)}))
≤ (∑ t = k..<n. 2 / (real t powr ε))"
by simp
ultimately show
"expectation (λω. real (N_n n a ω))
≤ (2 * ε * ln (real n)) / ((Δ a)^2) + (∑ t = k..<n. 2 / (real t powr ε))"
by auto
qed
show ?thesis using assms final_result by simp
qed
theorem theorem_15_4:
assumes
a_in_A: "a ∈ A"
and "finite A" and "∀a ∈ A. integrable M (λω. real (N_n n a ω))"
and ω_in_Ω: "ω ∈ Ω"
and subopt_gap: "∀ a ∈ A. Δ a > 0"
and a_not_opt: "∃ a' ∈ A. Δ a' > 0"
and delta_a: "∀ a ∈ A. Δ a = μ a_star - μ a"
and "k ≤ n"
and n_count_assm_pointwise: "(∑a∈A. real (N_n n a ω)) = real n"
and expected_regret_prop_15_1: "expectation (λω. R_n n ω) = (∑a∈A. Δ a * expectation (λω. real (N_n n a ω)))"
and from_UCB_step1: "∀ a ∈ A. expectation (λω. real (N_n n a ω)) ≤
s n + expectation (λω. (∑ t = k..<n. if π (t+1) ω = a ∧ s t ≤ real (N_n t a ω) then 1 else 0))"
and from_UCB_step2: "∀ a ∈ A. ∀ t ∈ {k..<n}. prob ({ω ∈ Ω. A_t_plus_1 t ω = a} ∩
{ω ∈ Ω. N_n t a ω ≥ (2 * ln (1 / δ)) / (Δ a)^2}) ≤ 2 * δ"
and eps_pos: "ε > 0"
and t_gt0: "∀ t ∈ {k..<n}. t > 0"
and choice_delta: "∀ t ∈ {k..<n}. δ = 1 / (real t powr ε)"
and s_form: "∀ a ∈ A. ∀ u. s u = (2 * ε * ln (real u)) / ((Δ a)^2)"
and subset_meas:"∀ a ∈ A. ∀t ∈ {k..<n}. ∀ω ∈ Ω. {ω. π (t+1) ω = a ∧ 2 * ε * ln (real t)/(Δ a)^2 ≤ N_n t a ω} ⊆ Ω"
and prob_eq_E_assm: "∀ a ∈ A. ∀t ∈ {k..<n}. prob {ω. π (Suc t) ω = a ∧ 2 * ε * ln (real t) / (Δ a)^2 ≤ real (N_n t a ω)} =
prob ({ω. π (Suc t) ω = a ∧ 2 * ε * ln (real t) / (Δ a)^2 ≤ real (N_n t a ω)} ∩ space M)"
and finiteness: "∀t∈{k..<n}. ∀a∈A. emeasure M {ω. π (t+1) ω = a ∧ 2*ε*ln(real t)/(Δ a)^2 ≤ real (N_n t a ω)} < ∞"
and measurable_set: "∀t∈{k..<n}. ∀a∈A. {ω. π (t+1) ω = a ∧ 2*ε*ln(real t)/(Δ a)^2 ≤ real (N_n t a ω)} ∈ sets M"
and eq_sets_optimum:
"∀ a ∈ A. ∀ t ∈ {k..<n}. {ω. π (t+1) ω = a ∧ 2 * ε * ln (real t) / (Δ a)^2 ≤ real (N_n t a ω)} =
{ω ∈ Ω. A_t_plus_1 t ω = a} ∩ {ω ∈ Ω. N_n t a ω ≥ (2 * ε * ln (real t)) / (Δ a)^2}"
and assms_lin_expect: "∀ a ∈ A. expectation (λω. ∑ t = k..<n.
(if π (t+1) ω = a ∧ 2 * ε * ln (real t) / (Δ a)^2 ≤ real (N_n t a ω) then 1 else 0)) =
(∑ t = k..<n. expectation (λω. indicat_real {ω. π (t+1) ω = a ∧ 2 * ε * ln (real t) / (Δ a)^2 ≤ real (N_n t a ω)} ω))"
and mono_sum_sets:
" (∀a ∈ A. Δ a * expectation (λω. real (N_n n a ω)) ≤ Δ a * (s n + (∑ t = k..<n. 2 / (real t powr ε))))
⟹ (∑a ∈ A. Δ a * expectation (λω. real (N_n n a ω))) ≤
(∑a ∈ A. Δ a * (s n + (∑ t = k..<n. 2 / (real t powr ε))))"
shows "expectation (λω. R_n n ω) ≤ (∑a∈A. Δ a * ((2 * ε * ln (real n)) /
((Δ a)^2)+ (∑ t = k..<n. 2 / (real t powr ε))))"
proof -
have exp_regret_eq:
"expectation (λω. R_n n ω) = (∑a∈A. Δ a * expectation (λω. real (N_n n a ω)))"
using assms by fastforce
have result_2: "∀ a ∈ A. expectation (λω. real (N_n n a ω))
≤ s n + (∑ t = k..<n. 2 / (real t powr ε))"
using assms proposition_15_7 by (smt (verit, ccfv_threshold) a_star_in_A)
have intermed_step: "∀a ∈ A. Δ a * expectation (λω. real (N_n n a ω)) ≤ Δ a * (s n + (∑ t = k..<n. 2 / (real t powr ε)))"
proof
fix a assume "a ∈ A"
then have "expectation (λω. real (N_n n a ω)) ≤ s n + (∑ t = k..<n. 2 / (real t powr ε))"
using result_2 by auto
moreover have "0 < Δ a"
using assms subopt_gap ‹a ∈ A› by blast
ultimately show "Δ a * expectation (λω. real (N_n n a ω)) ≤ Δ a * (s n + (∑ t = k..<n. 2 / (real t powr ε)))"
using mult_left_mono by simp
qed
have intermed_step_2:"(∑a∈A. Δ a * expectation (λω. real (N_n n a ω))) ≤ (∑a∈A. Δ a * (s n + (∑ t = k..<n. 2 / (real t powr ε))))"
proof -
have res:"∀a∈A. Δ a * expectation (λω. real (N_n n a ω)) ≤ Δ a * (s n + (∑ t = k..<n. 2 / (real t powr ε)))"
using intermed_step by auto
have res2:" (∀a ∈ A. Δ a * expectation (λω. real (N_n n a ω)) ≤ Δ a * (s n + (∑ t = k..<n. 2 / (real t powr ε))))
⟹ (∑a ∈ A. Δ a * expectation (λω. real (N_n n a ω))) ≤ (∑a ∈ A. Δ a * (s n + (∑ t = k..<n. 2 / (real t powr ε))))"
using res assms by fastforce
then have intermed_step: "(∑a∈A. Δ a * expectation (λω. real (N_n n a ω))) ≤ (∑a∈A. Δ a * (s n + (∑ t = k..<n. 2 / (real t powr ε))))"
using res res2 by auto
then show ?thesis using intermed_step by simp
qed
then have "expectation (λω. R_n n ω) ≤ (∑a∈A. Δ a * (s n + (∑ t = k..<n. 2 / (real t powr ε))))"
using assms intermed_step_2 by linarith
have "∀a ∈ A. s n = (2 * ε * ln (real n)) / ((Δ a)^2)"
using s_form by auto
then have "expectation (λω. R_n n ω) ≤ (∑a∈A. Δ a * ((2 * ε * ln (real n)) /
((Δ a)^2)+ (∑ t = k..<n. 2 / (real t powr ε))))"
using assms intermed_step_2 by (metis a_star_in_A less_irrefl verit_minus_simplify(1))
then show ?thesis by simp
qed
end
end