Theory Discrete_UCB_Step2

theory Discrete_UCB_Step2
  imports "Discrete_UCB_Step1"

begin

locale bandit_policy = bandit_problem + prob_space  +
  fixes Ω :: "'b set"
    and  :: "'b set set"
    and ω :: 'b 
  fixes  π :: "nat  'b  'a"
    and N_n :: "nat  'a  'b  nat"
    and Z :: "nat  'a  'b  real"
    and δ :: real
    and q :: real
  assumes finite_A: "finite A"
    and a_in_A: "a  A"
    and measurable_policy: "t. π t  measurable M (count_space A)"
    and N_n_def: "n a b. N_n n a b = card {t  {0..<n}. π (t+1) b = a}"
    and δ_pos: "0 < δ"
    and δ_less1: "δ < 1"
    and q_pos: "q > 0"

begin



definition sample_mean_Z :: "nat  'a  'b  real" where
  "sample_mean_Z n a b  (1 / real n) * ( i<n. Z i a b)"

definition M_val :: "nat  'a  'b  real" where
  "M_val t a b  (if N_n (t+1) a b = 0 then 0
             else ( s < t. if π s b = a then Z s a b else 0) / real (N_n t a b))"

definition U :: "nat  'a  'b  real" where
  "U t a b  M_val t a b + q * sqrt (ln (1 / δ) / (2 * real (max 1 (N_n t a b))))"


definition A_t_plus_1 :: "nat  'b  'a" where
  "A_t_plus_1 t b  (SOME a. a  A  (a'. a'  A  U t a b  U t a' b))"

lemma (in finite_measure) finite_measure_mono:
  assumes "A  B" "B sets M" shows "measure M A  measure M B"
  using emeasure_mono[OF assms] emeasure_real[of A] emeasure_real[of B] by (auto simp: measure_def)

theorem union_bound:
  fixes E F G :: "'b set"
  assumes "E  F  G"
    and "E  events" "F  events" "G  events"
  shows "prob E  prob F + prob G"
proof -
  have "F  G  events"
    using assms(3,4) sets.Un by blast
  have "prob E  prob (F  G)"
    using assms local.finite_measure_mono by auto
  also have "prob (F  G)  prob F + prob G"
    using assms local.finite_measure_subadditive by blast
  finally show ?thesis .
qed

theorem hoeffding_iid_bound_ge_general:
  fixes a :: 'a and n :: nat and ε :: real and μ_hat :: real and l u :: real
  assumes a_in: "a  A"
    and eps_pos: "ε  0"
    and bounds: "i < n. ω  Ω. l  Z i a ω  Z i a ω  u"
    and mu_def: "μ_hat = ( i < n. expectation (λω. Z i a ω))"
    and "u - l  0"
    and n_pos:  "n > 0" 
    and space_M: "space M = Ω"
    and sets_M:  "sets M  = "
    and indep: "indep_vars (λ_. borel) (λi. (λω. Z i a ω)) {i. i < n}"
    and rv: "i<n. random_variable borel (λω. Z i a ω)"

shows "prob {ω  Ω. ( i < n. Z i a ω)  μ_hat + ε}
          exp (- 2 * ε^2 / (real n * (u - l)^2))"
proof -
  let ?I = "{i. i < n}"
  let ?X = "λi. (λω. Z i a ω)"
  let ?a = "λi. l"
  let ?b = "λi. u"

  have finite_I: "finite ?I" by simp

(* AE bounds needed by Hoeffding *)
  have AE_bounds: "i?I. AE ω in M. ?a i  ?X i ω  ?X i ω  ?b i"
  proof
    fix i assume iI: "i  ?I"
    have "ωΩ. l  Z i a ω  Z i a ω  u" using bounds iI by simp
    thus "AE ω in M. ?a i  ?X i ω  ?X i ω  ?b i" 
      using assms by auto
  qed

(* Independence & boundedness setup for Hoeffding *)
  have indep_loc: "indep_interval_bounded_random_variables M ?I ?X ?a ?b" 
    by (standard; use finite_I indep AE_bounds in auto)

  from indep_loc
  have H: "Hoeffding_ineq M ?I ?X ?a ?b"
    by (rule Hoeffding_ineq.intro)

(* Widths squared sum *)
  have widths: "( i  ?I. (?b i - ?a i)^2) = real n * (u - l)^2"
    by simp
  have widths_pos: "0 < ( i  ?I. (?b i - ?a i)^2)"
    using u - l  0 n_pos by simp
  have sum_expectations_eq_integrals:
    "( i < n. expectation (λω. Z i a ω)) = ( i?I. integralL M (?X i))"
  proof -
    have eq1: "i<n. expectation (λω. Z i a ω) = integralL M (?X i)"
      using rv space_M sets_M by simp
    moreover have  sum_eq: "( i<n. integralL M (?X i)) = ( i?I. integralL M (?X i))"
      by (simp add: lessThan_def)
    ultimately show ?thesis
      by simp
  qed

(* Rewrite sum of expectations as integrals *)
  have sum_integrals_eq: "( i?I. integralL M (?X i)) = μ_hat"
    using mu_def by (simp add: sum_expectations_eq_integrals) 

(* Apply Hoeffding inequality *)
  have tail:
    "prob {ω  Ω. ( i?I. ?X i ω) - ( i?I. integralL M (?X i))  ε} 
      exp (- 2 * ε^2 / ( i?I. (?b i - ?a i)^2))"
    using Hoeffding_ineq.Hoeffding_ineq_ge[OF H eps_pos widths_pos]
    by (smt (verit, best) Collect_cong assms(7))

(* Rewrite LHS and RHS to original statement *)
  have lhs_rewrite: "{ω  Ω. ( i < n. Z i a ω)  μ_hat + ε} 
                     = {ω  Ω. ( i?I. ?X i ω) - ( i?I. integralL M (?X i))  ε}"
    by (simp add: add.commute le_diff_eq lessThan_def sum_integrals_eq) 

  have rhs_rewrite: "exp (- 2 * ε^2 / ( i?I. (?b i - ?a i)^2)) 
                     = exp (- 2 * ε^2 / (real n * (u - l)^2))"
    using widths by simp

  show ?thesis
    using tail lhs_rewrite rhs_rewrite by simp
qed

theorem hoeffding_iid_bound_le_general:
  fixes a :: 'a and n :: nat and ε :: real and μ_hat :: real and l u :: real
  assumes a_in: "a  A"
    and eps_pos: "ε  0"
    and bounds: "i < n. ω  Ω. l  Z i a ω  Z i a ω  u"
    and mu_def: "μ_hat = ( i < n. expectation (λω. Z i a ω))"
    and "u - l  0"
    and n_pos:  "n > 0" 
    and space_M: "space M = Ω"
    and sets_M:  "sets M  = "
    and indep: "indep_vars (λ_. borel) (λi. (λω. Z i a ω)) {i. i < n}"
    and rv: "i<n. random_variable borel (λω. Z i a ω)"
  shows "prob {ω  Ω. ( i < n. Z i a ω)  μ_hat - ε} 
          exp (- 2 * ε^2 / (real n * (u - l)^2))"
proof -
  let ?I = "{i. i < n}"
  let ?X = "λi. (λω. Z i a ω)"
  let ?a = "λi. l"
  let ?b = "λi. u"

  have finite_I: "finite ?I" by simp

(* AE bounds *)
  have AE_bounds: "i?I. AE ω in M. ?a i  ?X i ω  ?X i ω  ?b i"
  proof
    fix i assume iI: "i  ?I"
    have "ωΩ. l  Z i a ω  Z i a ω  u" using bounds iI by simp
    thus "AE ω in M. ?a i  ?X i ω  ?X i ω  ?b i" 
      using assms by auto
  qed

(* Independence & boundedness *)
  have indep_loc: "indep_interval_bounded_random_variables M ?I ?X ?a ?b"
    by (standard; use finite_I indep AE_bounds in auto)

  from indep_loc
  have H: "Hoeffding_ineq M ?I ?X ?a ?b"
    by (rule Hoeffding_ineq.intro)

(* Widths sum *)
  have widths: "( i  ?I. (?b i - ?a i)^2) = real n * (u - l)^2" by simp
  have widths_pos: "0 < ( i  ?I. (?b i - ?a i)^2)" using u - l  0 n_pos by simp

(* Sum of expectations = sum of integrals *)
  have sum_expectations_eq_integrals:
    "( i < n. expectation (λω. Z i a ω)) = ( i?I. integralL M (?X i))"
  proof -
    have eq1: "i<n. expectation (λω. Z i a ω) = integralL M (?X i)" using rv space_M sets_M by simp
    moreover have sum_eq: "( i<n. integralL M (?X i)) = ( i?I. integralL M (?X i))" by (simp add: lessThan_def)
    ultimately show ?thesis by simp
  qed

  have sum_integrals_eq: "( i?I. integralL M (?X i)) = μ_hat"
    using mu_def by (simp add: sum_expectations_eq_integrals)

(* Apply Hoeffding inequality for ≤ *)
  have tail:
    "prob {ω  Ω. ( i?I. ?X i ω) - ( i?I. integralL M (?X i))  - ε} 
      exp (- 2 * ε^2 / ( i?I. (?b i - ?a i)^2))"
    using Hoeffding_ineq.Hoeffding_ineq_le[OF H eps_pos widths_pos]
    by (smt (verit, best) Collect_cong assms(7)) 

(* Rewrite LHS and RHS *)
  have lhs_rewrite: "{ω  Ω. ( i < n. Z i a ω)  μ_hat - ε} 
                     = {ω  Ω. ( i?I. ?X i ω) - ( i?I. integralL M (?X i))  -ε}"
    using add.inverse_inverse[of ε] add.inverse_inverse[of μ_hat] assms(4)
      cancel_ab_semigroup_add_class.diff_right_commute[of μ_hat μ_hat "uuc<n. Z uuc a _"]
      cancel_ab_semigroup_add_class.diff_right_commute[of "- μ_hat" "- μ_hat" "ε - μ_hat"]
      cancel_ab_semigroup_add_class.diff_right_commute[of "ε - μ_hat" "ε - μ_hat" ε]
      cancel_ab_semigroup_add_class.diff_right_commute[of ε ε μ_hat]
      cancel_ab_semigroup_add_class.diff_right_commute[of "0" "- μ_hat" ε]
      cancel_ab_semigroup_add_class.diff_right_commute[of μ_hat μ_hat ε]
      cancel_ab_semigroup_add_class.diff_right_commute[of "- μ_hat" "- μ_hat" "(uuc<n. Z uuc a _) - μ_hat"]
      cancel_ab_semigroup_add_class.diff_right_commute[of "(uuc<n. Z uuc a _) - μ_hat" "(uuc<n. Z uuc a _) - μ_hat"
        "uuc<n. Z uuc a _"]
      cancel_ab_semigroup_add_class.diff_right_commute[of "uuc<n. Z uuc a _" "uuc<n. Z uuc a _" μ_hat]
      cancel_ab_semigroup_add_class.diff_right_commute[of "0" "- μ_hat" "uuc<n. Z uuc a _"]
      cancel_comm_monoid_add_class.diff_cancel[of "ε - μ_hat"] cancel_comm_monoid_add_class.diff_cancel[of ε]
      cancel_comm_monoid_add_class.diff_cancel[of μ_hat] cancel_comm_monoid_add_class.diff_cancel[of "- μ_hat"]
      cancel_comm_monoid_add_class.diff_cancel[of "(uuc<n. Z uuc a _) - μ_hat"]
      cancel_comm_monoid_add_class.diff_cancel[of "uuc<n. Z uuc a _"] diff_0
      diff_right_mono[of ε "μ_hat - (uuc<n. Z uuc a _)" μ_hat] diff_right_mono[of "uuc<n. Z uuc a _" "μ_hat - ε" μ_hat]
      lessThan_def[of n] more_arith_simps(1)[of "- ε" "(uuc<n. Z uuc a _) - μ_hat"] by force 

  have rhs_rewrite: "exp (- 2 * ε^2 / ( i?I. (?b i - ?a i)^2)) = exp (- 2 * ε^2 / (real n * (u - l)^2))"
    using widths by simp

  show ?thesis using tail lhs_rewrite rhs_rewrite by simp
qed

theorem hoeffding_iid_ge_delta_bound:
  fixes a :: 'a and n :: nat and δ_hat :: real and μ_hat :: real and l u :: real
  assumes a_in: "a  A"
    and delta_bound: "0 < δ_hat" "δ_hat  1"
    and bounds: "i<n. ωΩ. l  Z i a ω  Z i a ω  u"
    and mu_def: "μ_hat = (i<n. expectation (λω. Z i a ω))"
    and n_pos: "n > 0"
    and eps_pos: "ε  0"
    and u_minus_l_nonzero: "u - l  0"
    and space_M: "space M = Ω"
    and sets_M:  "sets M  = "
    and indep: "indep_vars (λ_. borel) (λi. (λω. Z i a ω)) {i. i < n}"
    and rv: "i<n. random_variable borel (λω. Z i a ω)"
    and eps_expression: "ε = sqrt ((real n * (u - l)^2 * ln (1 / δ_hat)) / 2)"
  shows "prob {ω  Ω. (i<n. Z i a ω)  μ_hat + ε}  δ_hat 
         prob {ω  Ω. (i<n. Z i a ω)  μ_hat - ε}  δ_hat"
proof -
  have eps_pos: "ε  0" 
    using assms(7) by auto 
  have eps_squared: "ε^2 = (real n * (u - l)^2 * ln (1 / δ_hat)) / 2"
    using eps_expression by (simp add: assms(3) delta_bound(1))

  have exp_eq: "exp (- 2 * ε^2 / (real n * (u - l)^2)) = δ_hat"
  proof -
    have "- 2 * ε^2 / (real n * (u - l)^2) = - ln (1 / δ_hat)"
    proof -
      have "ε^2 /( real n * (u - l)^2 ) = ( ln (1 / δ_hat)) / 2"
        using assms(6,8) eps_squared by fastforce
      then show ?thesis  by linarith 
    qed
    also have "... = ln δ_hat"
      using delta_bound(1) by (simp add: ln_div)
    finally show ?thesis
      using delta_bound(1) by simp
  qed

  have ge_bound:
    "prob {ω  Ω. (i<n. Z i a ω)  μ_hat + ε}  δ_hat"
    using hoeffding_iid_bound_ge_general[OF a_in eps_pos bounds mu_def u_minus_l_nonzero 
        n_pos space_M sets_M indep rv] exp_eq by simp

  have le_bound:
    "prob {ω  Ω. (i<n. Z i a ω)  μ_hat - ε}  δ_hat"
    using hoeffding_iid_bound_le_general[OF a_in eps_pos bounds mu_def u_minus_l_nonzero 
        n_pos space_M sets_M indep rv] exp_eq by simp

  show ?thesis
    using ge_bound le_bound by simp
qed

lemma add_le_iff:
  fixes x y z :: real
  shows "x  y - z  x - y  -z"
  by auto
lemma max_Suc_0_eq_1: "max (Suc 0) x = max 1 x"
  by simp


theorem ucb_suboptimal_bound_set:
  fixes t :: nat           (* current time step *)
    and a :: "'a"           (* a chosen action *)
    and Δ :: "'a  real"    (* suboptimality gap function *)
  assumes finite_A: "finite A"                        (* action set is finite *)
    and  a_in_A: "a  A"                             (* action a is in the action set *)
    and a_star_in_A: "a_star  A"                  (* optimal action is in A *)
    and  argmax_exists: "A  {}"    
    and subopt_gap: "Δ a > 0"                       (* a is suboptimal *)
    and a_not_opt: "a'. a'  A  Δ a > 0"         (* there exists a suboptimal action *)
    and  delta_a: "Δ a = μ a_star - μ a"             (* Δ defined as expected reward difference *)
    and   ω_in_Ω: "ω  Ω"                             (* ω is in sample space *)
    and  asm: " ω  {ω  Ω. A_t_plus_1 t ω = a}"     (* ω is such that A_t_plus_1 selects a *)
    and  setopt: "ω  Ω. a_max  A. b  A. U t b ω  U t a_max ω "(* argmax exists *)
    and A_t_plus_1_maximizes: 
    "t ω a. A_t_plus_1 t ω = a  a  A  (b  A. U t a ω  U t b ω)" 
  shows "{ω  Ω. A_t_plus_1 t ω = a}  
         {ω  Ω. U t a_star ω  μ a_star}  {ω  Ω. μ a_star  U t a ω}"
proof -
  have set_result_1:
    "{ω  Ω. A_t_plus_1 t ω = a}  {ω  Ω. b  A. U t a ω  U t b ω}"
  proof
    fix ω
    assume "ω  {ω  Ω. A_t_plus_1 t ω = a}"  (* fix an arbitrary ω *)
    hence "A_t_plus_1 t ω = a" by simp

(* From setopt, there exists a maximizer a_max for this ω *)
    from setopt obtain a_max where
      "a_max  A  (b  A. U t b ω  U t a_max ω)" 
      using ω  {ω  Ω. A_t_plus_1 t ω = a} by auto

(* Since A_t_plus_1 t ω selects a maximizer, a = a_max *)
    hence "b  A. U t a ω  U t b ω"
      using `A_t_plus_1 t ω = a` A_t_plus_1_maximizes by auto

(* Conclude ω ∈ right-hand set *)
    thus "ω  {ω  Ω. b  A. U t a ω  U t b ω}" 
      using ω  {ω  Ω. A_t_plus_1 t ω = a} by blast
  qed


  have set_result_2: "{ω  Ω. b  A. U t a ω  U t b ω}  {ω  Ω. U t a_star ω  U t a ω}"
  proof 
    fix ω assume asm: "ω  {ω  Ω. b  A. U t a ω  U t b ω}"
    hence "ω  Ω" and ub: "b  A. U t a ω  U t b ω" by simp_all
    from a_star_in_A have "U t a ω  U t a_star ω" using ub by simp
    thus "ω  {ω  Ω. U t a_star ω  U t a ω}" using `ω  Ω` by simp
  qed

  have set_result_3: "{ω  Ω. U t a_star ω  U t a ω}  
                     {ω  Ω. U t a_star ω  μ a_star}  {ω  Ω. μ a_star  U t a ω}"
  proof 
    fix ω assume asm: "ω  {ω  Ω. U t a_star ω  U t a ω}"
    hence "ω  Ω" and le: "U t a_star ω  U t a ω" by simp_all
    show "ω  {ω  Ω. U t a_star ω  μ a_star}  {ω  Ω. μ a_star  U t a ω}"
    proof (cases "U t a_star ω  μ a_star")
      case True
      then show ?thesis using `ω  Ω` by simp
    next
      case False
      hence "μ a_star < U t a_star ω" by simp
      with le have "μ a_star < U t a ω" by (simp add: less_le_trans)
      then show ?thesis using `ω  Ω` by simp
    qed
  qed

  from set_result_1 set_result_2 set_result_3 show ?thesis by auto
qed



theorem ucb_suboptimal_bound_prob_statement:
  fixes t :: nat and a :: 'a  and Δ :: "'a  real"
  assumes finite_A: "finite A"                        (* action set is finite *)
    and a_star_in_A: "a_star  A"                  (* optimal action is in A *)
    and  argmax_exists: "A  {}"    
    and subopt_gap: "Δ a > 0"                       (* a is suboptimal *)
    and a_not_opt: "a'. a'  A  Δ a > 0"         (* there exists a suboptimal action *)
    and   ω_in_Ω: "ω  Ω"                             (* ω is in sample space *)
    and  asm: " ω  {ω  Ω. A_t_plus_1 t ω = a}"     (* ω is such that A_t_plus_1 selects a *)
    and  setopt: "ω  Ω. a_max  A. b  A. U t b ω  U t a_max ω "(* argmax exists *)
    and A_t_plus_1_maximizes: 
    "t ω a. A_t_plus_1 t ω = a  a  A  (b  A. U t a ω  U t b ω)" 
    and a_in_A: "a  A"
    and omega_in: "ω  Ω"
    and subopt_gap: "Δ a > 0"
    and delta_a: "Δ a = μ a_star - μ a" (* Δ defined as expected reward difference *)
    and H_def: "H = (2 * ln (1 / δ)) / (Δ a)^2"
    and E_def: "E = {ω  Ω. A_t_plus_1 t ω = a}  {ω  Ω. H  real (N_n t a ω)}"
    and F_def: "F = {ω  Ω. U t a_star ω  μ a_star}  {ω  Ω. H  real (N_n t a ω)}"
    and G_def: "G = {ω  Ω. μ a_star  U t a ω}  {ω  Ω. H  real (N_n t a ω)}"
    and meas_sets: "E  sets M" "F  sets M" "G  sets M"
    and prob_inter: "prob  (F  G)  enn2real (emeasure M (F  G))"

shows "prob  ({ω  Ω. A_t_plus_1 t ω = a}  {ω  Ω. H  real (N_n t a ω)}) 
         prob  ({ω  Ω. U t a_star ω  μ a_star}  {ω  Ω. H  real (N_n t a ω)}) +
         prob  ({ω  Ω. U t a ω  μ a_star}  {ω  Ω. H  real (N_n t a ω)})"
proof -

  have subset_result:"E  F  G"
  proof -
    have step1: "{ω  Ω. A_t_plus_1 t ω = a} 
               {ω  Ω. U t a_star ω  μ a_star}  {ω  Ω. μ a_star  U t a ω}"
      using ucb_suboptimal_bound_set assms by blast

    then have step2:
      "{ω  Ω. A_t_plus_1 t ω = a}  {ω  Ω. H  real (N_n t a ω)} 
     ({ω  Ω. U t a_star ω  μ a_star}  {ω  Ω. μ a_star  U t a ω})  {ω  Ω. H  real (N_n t a ω)}"
      by auto

    then have step3:
      "{ω  Ω. A_t_plus_1 t ω = a}  {ω  Ω. H  real (N_n t a ω)} 
     ({ω  Ω. U t a_star ω  μ a_star}  {ω  Ω. H  real (N_n t a ω)}) 
     ({ω  Ω. μ a_star  U t a ω}  {ω  Ω. H  real (N_n t a ω)})"
      by (auto simp add: set_eq_iff)

    then have step4: 
      "E  F  G" using assms by simp
    then show ?thesis
      by (simp add: E_def F_def G_def)
  qed

  have bound: "prob  E  prob  F + prob G"
  proof (rule union_bound)
    show "E  F  G"  by (simp add: subset_result) 
    show "E  sets M" using meas_sets by simp   
    show "F  sets M" using meas_sets by simp
    show "G  sets M " using meas_sets by simp
    have "prob  (F  G)  enn2real (emeasure M (F  G))" using prob_inter by simp  
    have "prob  E  prob  (F  G)" 
      using assms(18,19,20) increasingD measure_increasing subset_result by blast
    have "prob  (F  G) = prob  F + prob G - prob  (F  G)" 
      by (simp add: Int_commute assms(19,20) finite_measure_Diff' finite_measure_Union')
  qed

  hence union_bound:
    "prob  ({ω  Ω. A_t_plus_1 t ω = a}  {ω  Ω. H  real (N_n t a ω)}) 
     prob  ({ω  Ω. U t a_star ω  μ a_star}  {ω  Ω. H  real (N_n t a ω)}) +
     prob  ({ω  Ω. U t a ω  μ a_star}  {ω  Ω. H  real (N_n t a ω)})"
    using bound assms by simp

  show ?thesis
    using union_bound by simp
qed

lemma U_le_μ_pointwise:
  "U t a_star ω  μ a_star  
   M_val t a_star ω - μ a_star  
   - q * sqrt (ln (1 / δ) / (2 * real (max 1 (N_n t a_star ω))))"
  unfolding U_def
  using add_le_iff max_Suc_0_eq_1
  by auto


lemma U_ge_μ_pointwise: 
  assumes  delta_a: "Δ a = μ a_star - μ a"
  shows
    "U t a ω  μ a_star 
   M_val t a ω - μ a  Δ a - q * sqrt (ln (1 / δ) / (2 * real (max 1 (N_n t a ω))))"
proof -
  have delta_plus_mu_eq:
    "Δ a + μ a = μ a_star"
  proof -
    have "Δ a = μ a_star - μ a"
      using  delta_a by simp (* or just assume you have it *)

    hence "Δ a + μ a = (μ a_star - μ a) + μ a"
      by simp

    also have "... = μ a_star"
      by simp

    finally show ?thesis .
  qed
  have U_ge_μ_rewrite:
    "U t a ω   μ a_star  M_val t a ω - μ a  Δ a  - q * sqrt (ln (1 / δ) / (2 * real (max 1 (N_n t a ω))))"
    unfolding U_def using add_le_iff max_Suc_0_eq_1 delta_plus_mu_eq by auto
  then show ?thesis by simp
qed

theorem hoeffding_iid__bound_le:
  fixes a :: 'a and n :: nat and ε :: real and μ_hat :: real and l_hat u_hat :: real
    and I :: "nat set"
    and X_new :: "nat  'b  real"
    and a_bound b_bound :: "nat  real"
  assumes a_in: "a  A"
    and "b  Ω"
    and eps_pos: "ε  0"
    and  eps: "ε =  abs (u_hat - l_hat) * sqrt (((real n) / 2) * ln (1 / δ))"
    and "δ  0  δ  1"
    and t_eq_n: "t = n"
    and  "c > 0 " 
    and bounds: "j < t. ω  Ω.  a A. l_hat  Z_hat j a ω  Z_hat j a ω  u_hat"
    and mu_def: "μ_hat = ( j < t. expectation (λω. Z_hat j a ω))"
    and "u_hat - l_hat  0"
    and t_pos:  "t > 0" 
    and "t<n.  N_n t a_star b > 0"
    and   n_pos:  "n > 0" 
    and "M_val t a_star b  ( s < t. if π s b = a_star then Z s a_star b else 0) /
 real (N_n t a_star b)"
    and widths: "( i  I. (b_bound i - a_bound i)^2) = (real n) * (u_hat - l_hat)^2"
    and space_M: "space M = Ω"
    and sets_M:  "sets M  = "
    and indep: "indep_vars (λ_. borel) (λj. (λω. Z j a ω)) {j. j < t}"
    and rv: "j<t. random_variable borel (λω. Z j a ω)"
    and "j<t.  Z_hat j a_star ω =  c * (if π j b = a_star then Z j a_star b else 0) "
    and "indep_interval_bounded_random_variables M I X_new a_bound b_bound"
    and indep_loc: "indep_interval_bounded_random_variables M {j. j < t}
                   (λj. (λω. Z_hat j a_star ω))
                   (λj. l_hat) (λj. u_hat)"
    and H: "Hoeffding_ineq M {j. j < t} 
                     (λj. (λω. Z_hat j a_star ω))
                     (λj. l_hat) (λj. u_hat)"
    and sum_integrals_eq: "( j  {j. j < t}. integralL M (λω. Z_hat j a_star ω)) = μ_hat"
    and rewriting: "prob {ω  Ω. ( j < t. Z_hat j a_star ω) - ( j < t. expectation (Z_hat j a_star))  - ε} =
      prob {x  space M. ( j < t. Z_hat j a_star x)  ( j < t. expectation (Z_hat j a_star)) - ε}"
  shows "prob {ω  Ω. ( j < n. Z_hat j a_star ω)  μ_hat - ε}   
          δ"
proof -

  let ?I = "{j. j < t}"
  let ?X_new = "λj. (λω. Z_hat j a_star ω)"
  let ?a_bound = "λj. l_hat"
  let ?b_bound = "λj. u_hat"

  have
    "M_val t a_star b  ( s < t. if π s b = a_star then Z s a_star b else 0) / real (N_n t a_star b)"
    using assms by simp


  have finite_I: "finite ?I" by simp

(* AE bounds *)
  have AE_bounds: "j  ?I. AE ω in M. ?a_bound j  ?X_new j ω  ?X_new j ω  ?b_bound j"
  proof
    fix j assume "j  ?I"
    then have "j < t" by simp

    then have bound_j: "ω  Ω. l_hat  Z_hat j a_star ω  Z_hat j a_star ω  u_hat"
      using bounds by (simp add: a_in_A)
    then show "AE ω in M. ?a_bound j  ?X_new j ω  ?X_new j ω  ?b_bound j"
      using space_M sets_M by force
  qed


(* Independence & boundedness *)
  have indep_loc: "indep_interval_bounded_random_variables M ?I ?X_new ?a_bound ?b_bound"
    using assms by simp
  have H: "Hoeffding_ineq M ?I ?X_new ?a_bound  ?b_bound" 
    using assms by simp


  have sum_integrals_eq: "( j?I. integralL M (?X_new j)) = μ_hat"
    using assms by simp
      (* Widths sum *)
  have widths:
    "( j  ?I. (?b_bound j - ?a_bound j)^2) = (real n) * (u_hat - l_hat)^2"
  proof -
    have "( j  ?I. (?b_bound j - ?a_bound j)^2) =
        ( j  ?I. (u_hat - l_hat)^2)"
      by simp
    also have "... = card ?I * (u_hat - l_hat)^2"
      by simp
    also have "card ?I = t"
      by simp
    also have "... = n" 
      using t = n assms by blast  (* or adjust if you have a lemma/assumption linking t and n *)
    finally show ?thesis 
      using assms by fastforce
  qed

  have widths_pos: "0 < ( j  ?I. (?b_bound j - ?a_bound j )^2)"
    using u_hat - l_hat  0 widths n_pos by auto

  have res: "prob {ω  Ω. ( j < t. Z_hat j a_star ω)   ( j < t. expectation (Z_hat j a_star)) - ε} =
      prob {x  space M. ( j < t. Z_hat j a_star x)  ( j < t. expectation (Z_hat j a_star)) - ε}"
    using assms by simp


  then have tail: "prob {ω  Ω. ( j?I. ?X_new j ω)   ( j?I. integralL M (?X_new j)) - ε} 
   exp (- 2 * ε^2 / ( j?I. (?b_bound j - ?a_bound j)^2))"
    using Hoeffding_ineq.Hoeffding_ineq_le[OF H eps_pos widths_pos]
    by (smt (verit, best) Collect_cong assms) 

(* Rewrite LHS and RHS *)
  have lhs_rewrite: "{ω  Ω. ( j < n. Z_hat j a_star ω)  μ_hat - ε} 
                     = {ω  Ω. ( j?I. ?X_new j ω) - ( j?I. integralL M (?X_new j))  -ε}"
    by (metis (mono_tags, lifting) add_le_iff assms(24,6) lessThan_def)
  have rhs_rewrite: "exp (- 2 * ε^2 / ( j?I. (?b_bound j - ?a_bound j)^2)) = exp (- 2 * ε^2 / (real n * (u_hat - l_hat)^2))"
    using widths by simp

  have lhs_prob:"prob {ω  Ω. ( j < n. Z_hat j a_star ω)  μ_hat - ε}  
         = prob {ω  Ω. ( j?I. ?X_new j ω) - ( j?I. integralL M (?X_new j))  -ε}"
    using lhs_rewrite by simp

  then have "prob {ω  Ω. ( j < n. Z_hat j a_star ω)  μ_hat - ε}  
exp (- 2 * ε^2 / ( j?I. (?b_bound j - ?a_bound j)^2))" 
    using lhs_rewrite tail lhs_prob by (smt (verit, ccfv_threshold) Collect_cong)  
  then have "prob {ω  Ω. ( j < n. Z_hat j a_star ω)  μ_hat - ε}  exp (- 2 * ε^2 / (real n * (u_hat - l_hat)^2))"
    using rhs_rewrite by linarith 


  then have "prob {ω  Ω. ( j < n. Z_hat j a_star ω) - μ_hat  - ε} =
prob {ω  Ω. ( j < n. Z_hat j a_star ω) - μ_hat  - abs (u_hat - l_hat) * sqrt (((real n) / 2) * ln (1 / δ))} "
    using eps by simp

  have res1: "exp (- 2 * ε^2 / (real n * (u_hat - l_hat)^2)) =
             exp (- 2 * (abs (u_hat - l_hat) * sqrt ((real n / 2) * ln (1 / δ)))^2 /
                     (real n * (u_hat - l_hat)^2))"
    using eps by simp

  have "exp (- 2 * (abs (u_hat - l_hat) * sqrt ((real n / 2) * ln (1 / δ)))^2 /
                  (real n * (u_hat - l_hat)^2)) = 
exp (- 2 * (abs (u_hat - l_hat) * sqrt ((real n / 2) * ln (1 / δ)))^2 / (real n * (u_hat - l_hat)^2)) "      
    using eps eps_pos  by blast
  then have "... = 
exp (- 2 * ((abs (u_hat - l_hat))^2 / (real n * (u_hat - l_hat)^2 )) * sqrt (((real n / 2) * ln (1 / δ)))^2 )"
    by (metis (no_types, opaque_lifting) more_arith_simps(11) power_mult_distrib times_divide_eq_left
        times_divide_eq_right) 

  have "... =
exp (- 2 * ((abs (u_hat - l_hat))^2 / (real n * abs ((u_hat - l_hat))^2 )) * sqrt (((real n / 2) * ln (1 / δ)))^2 )"
    by simp
  have "... = exp (- 2 * (1/ ((real n) )) * sqrt (((real n / 2) * ln (1 / δ)))^2 )"
    using assms by simp

  then have "... = exp (-2 * (1/ ((real n) )) *  (((real n / 2) * ln (1 / δ))) )"
    using power2_eq_square by (smt (verit, best)
        arithmetic_simps(51) assms(10) eps eps_pos real_sqrt_ge_0_iff real_sqrt_pow2
        zero_le_mult_iff) 

  then have "... = exp (-1* ln (1 / δ)) "
    using assms by (simp add: field_simps)
  then have fin: "exp (- 2 * ε^2 / (real n * (u_hat - l_hat)^2))  =  δ "
    using δ_pos assms
    by (metis (lifting)
        exp (- 2 * (1 / real n) * (sqrt (real n / 2 * ln (1 / δ)))2) = exp (- 2 * (1 / real n) * (real n / 2 * ln (1 / δ)))
        exp (- 2 * (¦u_hat - l_hat¦ * sqrt (real n / 2 * ln (1 / δ)))2 / (real n * (u_hat - l_hat)2)) = exp (- 2 * (¦u_hat - l_hat¦2 / (real n * (u_hat - l_hat)2)) * (sqrt (real n / 2 * ln (1 / δ)))2)
        exp (- 2 * (¦u_hat - l_hat¦2 / (real n * (u_hat - l_hat)2)) * (sqrt (real n / 2 * ln (1 / δ)))2) = exp (- 2 * (¦u_hat - l_hat¦2 / (real n * ¦u_hat - l_hat¦2)) * (sqrt (real n / 2 * ln (1 / δ)))2)
        exp (- 2 * (¦u_hat - l_hat¦2 / (real n * ¦u_hat - l_hat¦2)) * (sqrt (real n / 2 * ln (1 / δ)))2) = exp (- 2 * (1 / real n) * (sqrt (real n / 2 * ln (1 / δ)))2)
        arith_simps(56) exp_ln ln_divide_pos ln_one more_arith_simps(10) mult_minus1 of_nat_zero_less_power_iff power_0)

  show ?thesis using assms fin
      prob {ω  Ω. (j<n. Z_hat j a_star ω)  μ_hat - ε}  exp (- 2 * ε2 / (real n * (u_hat - l_hat)2))
    by presburger 


qed

theorem hoeffding_iid__bound_ge:
  fixes a :: 'a and n :: nat and ε :: real and μ_hat :: real and l_hat u_hat :: real
    and I :: "nat set"
    and X_new :: "nat  'b  real"
    and a_bound b_bound :: "nat  real"
  assumes a_in: "a  A"
    and "b  Ω"
    and eps_pos: "ε  0"
    and  eps: "ε =  abs (u_hat - l_hat) * sqrt (((real n) / 2) * ln (1 / δ))"
    and "δ  0  δ  1"
    and t_eq_n: "t = n"
    and  "c > 0 " 
    and bounds: "j < t. ω  Ω.  a A. l_hat  Z_hat j a ω  Z_hat j a ω  u_hat"
    and mu_def: "μ_hat = ( j < t. expectation (λω. Z_hat j a ω))"
    and "u_hat - l_hat  0"
    and t_pos:  "t > 0" 
    and "t<n.  N_n t a_star b > 0"
    and   n_pos:  "n > 0" 
    and "M_val t a b  ( s < t. if π s b = a then Z s a b else 0) /
 real (N_n t a b)"
    and widths: "( i  I. (b_bound i - a_bound i)^2) = (real n) * (u_hat - l_hat)^2"
    and space_M: "space M = Ω"
    and sets_M:  "sets M  = "
    and indep: "indep_vars (λ_. borel) (λj. (λω. Z j a ω)) {j. j < t}"
    and rv: "j<t. random_variable borel (λω. Z j a ω)"
    and "j<t.  Z_hat j a ω =  c * (if π j b = a then Z j a_star b else 0) "
    and "indep_interval_bounded_random_variables M I X_new a_bound b_bound"
    and indep_loc: "indep_interval_bounded_random_variables M {j. j < t}
                   (λj. (λω. Z_hat j a ω))
                   (λj. l_hat) (λj. u_hat)"
    and H: "Hoeffding_ineq M {j. j < t} 
                     (λj. (λω. Z_hat j a  ω))
                     (λj. l_hat) (λj. u_hat)"
    and sum_integrals_eq: "( j  {j. j < t}. integralL M (λω. Z_hat j a ω)) = μ_hat"
    and rewriting: "prob {ω  Ω. ( j < t. Z_hat j a ω) - ( j < t. expectation (Z_hat j a))   ε} =
      prob {x  space M. ( j < t. Z_hat j a x)  ( j < t. expectation (Z_hat j a)) + ε}"
  shows "prob {ω  Ω. ( j < n. Z_hat j a ω)  μ_hat + ε}   δ"
proof -

  let ?I = "{j. j < t}"
  let ?X_new = "λj. (λω. Z_hat j a ω)"
  let ?a_bound = "λj. l_hat"
  let ?b_bound = "λj. u_hat"

  have
    "M_val t a b  ( s < t. if π s b = a then Z s a b else 0) / real (N_n t a b)"
    using assms by simp


  have finite_I: "finite ?I" by simp

(* AE bounds *)
  have AE_bounds: "j  ?I. AE ω in M. ?a_bound j  ?X_new j ω  ?X_new j ω  ?b_bound j"
  proof
    fix j assume "j  ?I"
    then have "j < t" by simp

    then have bound_j: "ω  Ω. l_hat  Z_hat j a ω  Z_hat j a ω  u_hat"
      using bounds by (simp add: a_in_A)
    then show "AE ω in M. ?a_bound j  ?X_new j ω  ?X_new j ω  ?b_bound j"
      using space_M sets_M by force
  qed


(* Independence & boundedness *)
  have indep_loc: "indep_interval_bounded_random_variables M ?I ?X_new ?a_bound ?b_bound"
    using assms by simp
  have H: "Hoeffding_ineq M ?I ?X_new ?a_bound  ?b_bound" 
    using assms by simp


  have sum_integrals_eq: "( j?I. integralL M (?X_new j)) = μ_hat"
    using assms by simp
      (* Widths sum *)
  have widths:
    "( j  ?I. (?b_bound j - ?a_bound j)^2) = (real n) * (u_hat - l_hat)^2"
  proof -
    have "( j  ?I. (?b_bound j - ?a_bound j)^2) =
        ( j  ?I. (u_hat - l_hat)^2)"
      by simp
    also have "... = card ?I * (u_hat - l_hat)^2"
      by simp
    also have "card ?I = t"
      by simp
    also have "... = n" 
      using t = n assms by blast  (* or adjust if you have a lemma/assumption linking t and n *)
    finally show ?thesis 
      using assms by fastforce
  qed

  have widths_pos: "0 < ( j  ?I. (?b_bound j - ?a_bound j )^2)"
    using u_hat - l_hat  0 widths n_pos by auto

  have res: "prob {ω  Ω. ( j < t. Z_hat j a ω) - ( j < t. expectation (Z_hat j a))  ε} =
      prob {x  space M. ( j < t. Z_hat j a x)  ( j < t. expectation (Z_hat j a)) + ε}"
    using assms by simp


  then have tail: "prob {ω  Ω. ( j?I. ?X_new j ω)   ( j?I. integralL M (?X_new j)) + ε} 
   exp (- 2 * ε^2 / ( j?I. (?b_bound j - ?a_bound j)^2))"
    using Hoeffding_ineq.Hoeffding_ineq_ge[OF H eps_pos widths_pos]
    by (smt (verit, best) Collect_cong assms) 

(* Rewrite LHS and RHS *)
  have lhs_rewrite: "{ω  Ω. ( j < n. Z_hat j a ω)  μ_hat + ε} 
                     = {ω  Ω. ( j?I. ?X_new j ω) - ( j?I. integralL M (?X_new j))  ε}"
    using assms  by (metis (lifting) ext add.commute le_diff_eq lessThan_def) 
  have rhs_rewrite: "exp (- 2 * ε^2 / ( j?I. (?b_bound j - ?a_bound j)^2)) = exp (- 2 * ε^2 / (real n * (u_hat - l_hat)^2))"
    using widths by simp

  have lhs_prob:"prob {ω  Ω. ( j < n. Z_hat j a ω)  μ_hat + ε} 
                     = prob{ω  Ω. ( j?I. ?X_new j ω) - ( j?I. integralL M (?X_new j))  ε}"
    using assms lhs_rewrite by presburger

  then have "prob {ω  Ω. ( j < n. Z_hat j a ω)  μ_hat + ε}  
exp (- 2 * ε^2 / ( j?I. (?b_bound j - ?a_bound j)^2))" 
    using lhs_rewrite tail lhs_prob by (smt (verit, ccfv_threshold) Collect_cong)  
  then have "prob {ω  Ω. ( j < n. Z_hat j a ω)  μ_hat + ε}  exp (- 2 * ε^2 / (real n * (u_hat - l_hat)^2))"
    using rhs_rewrite by linarith 


  then have "prob {ω  Ω. ( j < n. Z_hat j a ω) - μ_hat    ε} =
prob {ω  Ω. ( j < n. Z_hat j a ω) - μ_hat    abs (u_hat - l_hat) * sqrt (((real n) / 2) * ln (1 / δ))} "
    using eps by simp

  have res1: "exp (- 2 * ε^2 / (real n * (u_hat - l_hat)^2)) =
             exp (- 2 * (abs (u_hat - l_hat) * sqrt ((real n / 2) * ln (1 / δ)))^2 /
                     (real n * (u_hat - l_hat)^2))"
    using eps by simp

  have "exp (- 2 * (abs (u_hat - l_hat) * sqrt ((real n / 2) * ln (1 / δ)))^2 /
                  (real n * (u_hat - l_hat)^2)) = 
exp (- 2 * (abs (u_hat - l_hat) * sqrt ((real n / 2) * ln (1 / δ)))^2 / (real n * (u_hat - l_hat)^2)) "      
    using eps eps_pos  by blast
  then have "... = exp (- 2 * ((abs (u_hat - l_hat))^2 / (real n * (u_hat - l_hat)^2 )) * sqrt (((real n / 2) * ln (1 / δ)))^2 )"
    by (metis (no_types, opaque_lifting) more_arith_simps(11) power_mult_distrib times_divide_eq_left
        times_divide_eq_right) 

  also have "... = exp (- 2 * ((abs (u_hat - l_hat))^2 / (real n * abs ((u_hat - l_hat))^2 )) * sqrt (((real n / 2) * ln (1 / δ)))^2 )"
    by simp
  have "... = exp (- 2 * (1/ ((real n) )) * sqrt (((real n / 2) * ln (1 / δ)))^2 )"
    using assms by simp

  then have "... = exp (-2 * (1/ ((real n) )) *  (((real n / 2) * ln (1 / δ))) )"
    using power2_eq_square by (smt (verit, best)
        arithmetic_simps(51) assms(10) eps eps_pos real_sqrt_ge_0_iff real_sqrt_pow2
        zero_le_mult_iff) 

  then have "... = exp (-1* ln (1 / δ)) "
    using assms by (simp add: field_simps)
  then have fin: "exp (- 2 * ε^2 / (real n * (u_hat - l_hat)^2))  =  δ "
    using δ_pos assms
    by (metis (lifting)
        exp (- 2 * (1 / real n) * (sqrt (real n / 2 * ln (1 / δ)))2) = exp (- 2 * (1 / real n) * (real n / 2 * ln (1 / δ)))
        exp (- 2 * (¦u_hat - l_hat¦ * sqrt (real n / 2 * ln (1 / δ)))2 / (real n * (u_hat - l_hat)2)) = exp (- 2 * (¦u_hat - l_hat¦2 / (real n * (u_hat - l_hat)2)) * (sqrt (real n / 2 * ln (1 / δ)))2)
        exp (- 2 * (¦u_hat - l_hat¦2 / (real n * (u_hat - l_hat)2)) * (sqrt (real n / 2 * ln (1 / δ)))2) = exp (- 2 * (¦u_hat - l_hat¦2 / (real n * ¦u_hat - l_hat¦2)) * (sqrt (real n / 2 * ln (1 / δ)))2)
        exp (- 2 * (¦u_hat - l_hat¦2 / (real n * ¦u_hat - l_hat¦2)) * (sqrt (real n / 2 * ln (1 / δ)))2) = exp (- 2 * (1 / real n) * (sqrt (real n / 2 * ln (1 / δ)))2)
        diff_0 exp_ln ln_divide_pos ln_one more_arith_simps(10) mult_minus1 of_nat_zero_less_power_iff power_0)

  show ?thesis using assms fin
    using prob {ω  Ω. μ_hat + ε  (j<n. Z_hat j a ω)}  exp (- 2 * ε2 / (real n * (u_hat - l_hat)2))
    by presburger

qed



end 
end