Theory MSc_Project_Discrete_Prop15_1
theory MSc_Project_Discrete_Prop15_1
imports
"HOL-Probability.Probability"
begin
locale bandit_problem =
fixes A :: "'a set"
and μ :: "'a ⇒ real"
and a_star :: "'a"
assumes finite_arms: "finite A"
and a_star_in_A: "a_star ∈ A"
and optimal_arm: "∀a ∈ A. μ a_star ≥ μ a"
begin
definition Δ :: "'a ⇒ real" where
"Δ a = μ a_star - μ a"
end
locale bandit_policy = bandit_problem + prob_space +
fixes Ω :: "'b set"
and ℱ :: "'b set set"
and π :: "nat ⇒ 'b ⇒ 'a"
and N_n :: "nat ⇒ 'a ⇒ 'b ⇒ nat"
assumes measurable_policy: "∀t. π t ∈ measurable M (count_space A)"
and N_n_def: "∀n a ω. N_n n a ω = card {t ∈ {0..<n}. π (t+1) ω = a}"
and count_assm_pointwise: "∀n ω. (∑a ∈ A. real (N_n n a ω)) = real n"
begin
definition R_n :: "nat ⇒ 'b ⇒ real" where
"R_n n ω = n * μ a_star - (∑a ∈ A. μ a * real (N_n n a ω))"
lemma regret_decomposition_pointwise:
fixes n :: nat and ω :: 'b
assumes n_count_assm_pointwise: "(∑a∈A. real (N_n n a ω)) = real n"
shows "R_n n ω = (∑a ∈ A. Δ a * real (N_n n a ω))"
proof -
have sum_Nn: "(∑a ∈ A. real (N_n n a ω)) = real n"
using n_count_assm_pointwise by simp
have sum_const: "(∑a ∈ A. μ a_star * real (N_n n a ω)) = μ a_star * (∑a ∈ A. real (N_n n a ω))"
by (simp add: Cartesian_Space.vector_space_over_itself.scale_sum_right)
have eq1: "R_n n ω = real n * μ a_star - (∑a ∈ A. μ a * real (N_n n a ω))"
by (simp add: R_n_def)
also have eq2: "... = (∑a ∈ A. μ a_star * real (N_n n a ω)) - (∑a ∈ A. μ a * real (N_n n a ω))"
using sum_const sum_Nn
by (subst sum_Nn[symmetric], subst sum_const) simp
also have eq3: "... = (∑a ∈ A. (μ a_star * real (N_n n a ω) - μ a * real (N_n n a ω)))"
by (rule sum_subtractf[symmetric])
also have eq4: "... = (∑a ∈ A. (μ a_star - μ a) * real (N_n n a ω))"
by (simp add: algebra_simps)
also have "... = (∑a ∈ A. Δ a * real (N_n n a ω))"
by (simp add: Δ_def)
finally show ?thesis .
qed
lemma integrable_const_fun:
assumes "finite_measure M"
shows "integrable M (λx. c)"
using assms by (simp add: Bochner_Integration.finite_measure.integrable_const)
lemma expected_regret:
assumes "finite A"
and "∀a ∈ A. integrable M (λω. real (N_n n a ω))"
shows "expectation (λω. R_n n ω) = (∑a∈A. Δ a * expectation (λω. real (N_n n a ω)))"
proof -
have integrable_sum: "integrable M (λω. ∑a ∈ A. μ a * real (N_n n a ω))"
proof -
have "∀a ∈ A. integrable M (λω. μ a * real (N_n n a ω))"
using assms integrable_mult_right by blast
then show ?thesis
by (simp add: integrable_sum)
qed
have pointwise: "∀ω. R_n n ω = (∑a∈A. Δ a * real (N_n n a ω))"
using regret_decomposition_pointwise count_assm_pointwise by blast
hence rewrite: "expectation (λω. R_n n ω) = expectation (λω. ∑a∈A. Δ a * real (N_n n a ω))"
by simp
also have "expectation (λω. R_n n ω) = (∑a∈A. Δ a * expectation (λω. real (N_n n a ω)))"
proof -
have "∀a ∈ A. integrable M (λω. Δ a * real (N_n n a ω))"
using assms integrable_mult_right by blast
hence "integrable M (λω. ∑a ∈ A. Δ a * real (N_n n a ω))"
using integrable_sum by simp
have res_1: "expectation (λω. ∑a∈A. Δ a * real (N_n n a ω)) = integral⇧L M (λω. ∑a∈A. Δ a * real (N_n n a ω))"
by simp
also have res_2: "... = (∑a∈A. integral⇧L M (λω. Δ a * real (N_n n a ω)))"
using ‹finite A› assms
by (simp add: integral_sum integrable_sum)
have res_3: "expectation (λω. R_n n ω) = (∑a∈A. Δ a * expectation (λω. real (N_n n a ω)))"
using assms rewrite
by simp
then show ?thesis using assms res_3 by auto
qed
then show ?thesis .
qed
end
end