Theory MSc_Project_Discrete_Prop15_1

theory MSc_Project_Discrete_Prop15_1
  imports
    "HOL-Probability.Probability"

begin


locale bandit_problem = 
  fixes A :: "'a set"  (* Set of arms *)
    and μ :: "'a  real"  (* Expected reward for each arm *)
    and a_star :: "'a"  (* Optimal arm *)
  assumes finite_arms: "finite A"
    and a_star_in_A: "a_star  A"
    and optimal_arm: "a  A. μ a_star  μ a"
begin

definition Δ :: "'a  real" where
  "Δ a = μ a_star - μ a"
end

locale bandit_policy = bandit_problem +  prob_space  +
  fixes Ω :: "'b set"
    and  :: "'b set set"
    and π :: "nat  'b  'a"
    and N_n :: "nat  'a  'b  nat"  
  assumes  measurable_policy: "t. π t  measurable M (count_space A)"
    and N_n_def: "n a ω. N_n n a ω = card {t  {0..<n}. π (t+1) ω = a}"
    and count_assm_pointwise: "n ω. (a  A. real (N_n n a ω)) = real n" 
begin

definition R_n :: "nat  'b  real" where
  "R_n n ω = n * μ a_star - (a  A. μ a * real (N_n n a ω))"

lemma regret_decomposition_pointwise:
  fixes n :: nat and ω :: 'b
  assumes n_count_assm_pointwise: "(aA. real (N_n n a ω)) = real n"
  shows "R_n n ω = (a  A. Δ a * real (N_n n a ω))"
proof -
  have sum_Nn: "(a  A. real (N_n n a ω)) = real n"
    using n_count_assm_pointwise by simp

  have sum_const: "(a  A. μ a_star * real (N_n n a ω)) = μ a_star * (a  A. real (N_n n a ω))"
    by (simp add: Cartesian_Space.vector_space_over_itself.scale_sum_right)

  have eq1: "R_n n ω = real n * μ a_star - (a  A. μ a * real (N_n n a ω))"
    by (simp add: R_n_def)

  also have eq2: "... = (a  A. μ a_star * real (N_n n a ω)) - (a  A. μ a * real (N_n n a ω))"
    using sum_const sum_Nn
    by (subst sum_Nn[symmetric], subst sum_const) simp

  also have eq3: "... = (a  A. (μ a_star * real (N_n n a ω) - μ a * real (N_n n a ω)))"
    by (rule sum_subtractf[symmetric])

  also have eq4: "... = (a  A. (μ a_star - μ a) * real (N_n n a ω))"
    by (simp add: algebra_simps)

  also have "... = (a  A. Δ a * real (N_n n a ω))"
    by (simp add: Δ_def)

  finally show ?thesis .
qed


lemma integrable_const_fun:
  assumes "finite_measure M"
  shows "integrable M (λx. c)"
  using assms by (simp add: Bochner_Integration.finite_measure.integrable_const)

lemma expected_regret:
  assumes "finite A"
  and  "a  A. integrable M (λω. real (N_n n a ω))"
  shows "expectation (λω. R_n n ω) = (aA. Δ a * expectation (λω. real (N_n n a ω)))"
proof -

  have integrable_sum: "integrable M (λω. a  A. μ a * real (N_n n a ω))"
  proof -
    have "a  A. integrable M (λω. μ a * real (N_n n a ω))"
      using assms integrable_mult_right by blast
    then show ?thesis
      by (simp add: integrable_sum)
  qed

  have pointwise: "ω. R_n n ω = (aA. Δ a * real (N_n n a ω))"
    using regret_decomposition_pointwise count_assm_pointwise by blast

  hence rewrite: "expectation (λω. R_n n ω) = expectation (λω. aA. Δ a * real (N_n n a ω))"
    by simp
  
  also have "expectation (λω. R_n n ω) = (aA. Δ a * expectation (λω. real (N_n n a ω)))"
  proof -
    (* show integrability of each summand *)
    have "a  A. integrable M (λω. Δ a * real (N_n n a ω))"
    using assms integrable_mult_right by blast
    hence "integrable M (λω. a  A. Δ a * real (N_n n a ω))"
     using integrable_sum by simp

    (* unfold expectation to integralL *)
     have res_1: "expectation (λω. aA. Δ a * real (N_n n a ω)) = integralL M (λω. aA. Δ a * real (N_n n a ω))"
       by simp

     also have res_2: "... = (aA. integralL M (λω. Δ a * real (N_n n a ω)))"
       using finite A assms
       by (simp add: integral_sum integrable_sum)

     have res_3: "expectation (λω. R_n n ω) = (aA. Δ a * expectation (λω. real (N_n n a ω)))"
      using assms rewrite
      by simp

    then show ?thesis using assms res_3 by auto
  qed

  then show ?thesis .
qed



end

end