Theory Laws_of_Large_Numbers

(*
   File:     Laws_of_Large_Numbers.thy
   Author:   Manuel Eberl, TU München
*)
section ‹The Laws of Large Numbers›
theory Laws_of_Large_Numbers
  imports Ergodic_Theory.Shift_Operator
begin

text ‹
  We prove the strong law of large numbers in the following form: Let $(X_i)_{i\in\mathbb{N}}$
  be a sequence of i.i.d. random variables over a probability space ‹M›. Further assume that
  the expected value $E[X_0]$ of $X_0$ exists. Then the sequence of random variables
  \[\overline{X}_n = \frac{1}{n} \sum_{i=0}^n X_i\]
  of running averages almost surely converges to $E[X_0]$.
  This means that
  \[\mathcal{P}[\overline{X}_n \longrightarrow E[X_0]] = 1\ .\]

  We start with the strong law.
›


subsection ‹The strong law›

text ‹
  The proof uses Birkhoff's Theorem from Gouëzel's formalisation of ergodic theory~\<^cite>‹"gouezel"›
  and the fact that the shift operator $T(x_1, x_2, x_3, \ldots) = (x_2, x_3, \ldots)$ is ergodic.
  This proof can be found in various textbooks on probability theory/ergodic
  theory, e.g. the ones by Krengel~\<^cite>‹‹p.~24› in "krengel"› and
  Simmonet~\<^cite>‹‹Chapter 15, pp.~311--325› in "Simonnet1996"›.
›
theorem (in prob_space) strong_law_of_large_numbers_iid:
  fixes X :: "nat ⇒ 'a ⇒ real"
  assumes indep: "indep_vars (λ_. borel) X UNIV"
  assumes distr: "⋀i. distr M borel (X i) = distr M borel (X 0)"
  assumes L1:    "integrable M (X 0)"
  shows "AE x in M. (λn. (∑i<n. X i x) / n) ⇢ expectation (X 0)"
proof -
  text ‹
    We adopt a more explicit view of \<^term>‹M› as a countably infinite product of i.i.d.
    random variables, indexed by the natural numbers:
  ›
  define M' :: "(nat ⇒ real) measure" where "M' = Pi⇩_M UNIV (λi. distr M borel (X i))"
  have [measurable]: "random_variable borel (X i)" for i
    using indep by (auto simp: indep_vars_def)
  have M'_eq: "M' = distr M (Pi⇩_M UNIV (λi. borel)) (λx. λi∈UNIV. X i x)"
    using indep unfolding M'_def by (subst (asm) indep_vars_iff_distr_eq_PiM) auto
  have space_M': "space M' = UNIV"
    by (simp add: M'_def space_PiM)
  have sets_M' [measurable_cong]: "sets M' = sets (Pi⇩_M UNIV (λi. borel))"
    by (simp add: M'_eq)
  interpret M': prob_space M'
    unfolding M'_eq by (intro prob_space_distr) auto

  text ‹We introduce a shift operator that forgets the first variable in the sequence.›
  define T :: "(nat ⇒ real) ⇒ (nat ⇒ real)" where
    "T = (λf. f ∘ Suc)"
  have funpow_T: "(T ^^ i) = (λf. f ∘ (λn. n + i))" for i
    by (induction i) (auto simp: T_def)

  interpret T: shift_operator_ergodic "distr M borel (X 0)" T M'
  proof -
    interpret X0: prob_space "distr M borel (X 0)"
      by (rule prob_space_distr) auto
    show "shift_operator_ergodic (distr M borel (X 0))"
      by unfold_locales
    show "M' ≡ Pi⇩_M UNIV (λ_. distr M borel (X 0)) "
      unfolding M'_def by (subst distr)
  qed (simp_all add: T_def)

  have [intro]: "integrable M' (λf. f 0)"
    unfolding M'_eq by (subst integrable_distr_eq) (use L1 in auto)
  have "AE f in M'. (λn. T.birkhoff_sum (λf. f 0) n f / real n)
            ⇢ real_cond_exp M' T.Invariants (λf. f 0) f"
    by (rule T.birkhoff_theorem_AE_nonergodic) auto
  moreover have "AE x in M'. real_cond_exp M' T.Invariants (λf. f 0) x =
                    M'.expectation (λf. f 0) / M'.prob (space M')"
    by (intro T.Invariants_cond_exp_is_integral_fmpt) auto
  ultimately have "AE f in M'. (λn. T.birkhoff_sum (λf. f 0) n f / real n)
                     ⇢ M'.expectation (λf. f 0)"
    by eventually_elim (simp_all add: M'.prob_space)
  also have "M'.expectation (λf. f 0) = expectation (X 0)"
    unfolding M'_eq by (subst integral_distr) simp_all
  also have "T.birkhoff_sum (λf. f 0) = (λn f. sum f {..<n})"
    by (intro ext) (simp_all add:T.birkhoff_sum_def funpow_T)
  finally show ?thesis
    unfolding M'_eq by (subst (asm) AE_distr_iff) simp_all
qed


subsection ‹The weak law›

text ‹
  To go from the strong law to the weak one, we need the fact that almost sure convergence
  implies convergence in probability. We prove this for sequences of random variables here.
›
lemma (in prob_space) AE_convergence_imp_convergence_in_prob:
  assumes [measurable]: "⋀i. random_variable borel (X i)" "random_variable borel Y"
  assumes AE: "AE x in M. (λi. X i x) ⇢ Y x"
  assumes "ε > (0 :: real)"
  shows   "(λi. prob {x∈space M. ¦X i x - Y x¦ > ε}) ⇢ 0"
proof -
  define A where "A = (λi. {x∈space M. ¦X i x - Y x¦ > ε})"
  define B where "B = (λn. (⋃i∈{n..}. A i))"
  have [measurable]: "A i ∈ sets M"  "B i ∈ sets M" for i
    unfolding A_def B_def by measurable

  have "AE x in M. x ∉ (⋂i. B i)"
    using AE unfolding B_def A_def
    by eventually_elim
       (use ‹ε > 0› in ‹fastforce simp: tendsto_iff dist_norm eventually_at_top_linorder›)
  hence "(⋂i. B i) ∈ null_sets M"
    by (subst AE_iff_null_sets) auto

  show "(λi. prob (A i)) ⇢ 0"
  proof (rule Lim_null_comparison)
    have "(λi. prob (B i)) ⇢ prob (⋂i. B i)"
    proof (rule finite_Lim_measure_decseq)
      show "decseq B"
        by (rule decseq_SucI) (force simp: B_def)
    qed auto
    also have "prob (⋂i. B i) = 0"
      using ‹(⋂i. B i) ∈ null_sets M› by (simp add: measure_eq_0_null_sets)
    finally show "(λi. prob (B i)) ⇢ 0" .
  next
    have "prob (A n) ≤ prob (B n)" for n
      unfolding B_def by (intro finite_measure_mono) auto
    thus "∀⇩_F n in at_top. norm (prob (A n)) ≤ prob (B n)"
      by (intro always_eventually) auto
  qed
qed

text ‹
  The weak law is now a simple corollary: we again have the same setting as before. The weak
  law now states that $\overline{X}_n$ converges to $E[X_0]$ in probability. This means that
  for any ‹ε > 0›, the probability that $|\overline{X}_n - X_0| > \varepsilon$ vanishes as
  ‹n → ∞›.
›
corollary (in prob_space) weak_law_of_large_numbers_iid:
  fixes X :: "nat ⇒ 'a ⇒ real" and ε :: real
  assumes indep: "indep_vars (λ_. borel) X UNIV"
  assumes distr: "⋀i. distr M borel (X i) = distr M borel (X 0)"
  assumes L1:    "integrable M (X 0)"
  assumes "ε > 0"
  shows   "(λn. prob {x∈space M. ¦(∑i<n. X i x) / n - expectation (X 0)¦ > ε}) ⇢ 0"
proof (rule AE_convergence_imp_convergence_in_prob)
  show "AE x in M. (λn. (∑i<n. X i x) / n) ⇢ expectation (X 0)"
    by (rule strong_law_of_large_numbers_iid) fact+
next
  have [measurable]: "random_variable borel (X i)" for i
    using indep by (auto simp: indep_vars_def)
  show "random_variable borel (λx. (∑i<n. X i x) / real n)" for n
    by measurable
qed (use ‹ε > 0› in simp_all)

end