Theory Laws_of_Large_Numbers

   File:     Laws_of_Large_Numbers.thy
   Author:   Manuel Eberl, TU München
section ‹The Laws of Large Numbers›
theory Laws_of_Large_Numbers
  imports Ergodic_Theory.Shift_Operator

text ‹
  We prove the strong law of large numbers in the following form: Let $(X_i)_{i\in\mathbb{N}}$
  be a sequence of i.i.d. random variables over a probability space M›. Further assume that
  the expected value $E[X_0]$ of $X_0$ exists. Then the sequence of random variables
  \[\overline{X}_n = \frac{1}{n} \sum_{i=0}^n X_i\]
  of running averages almost surely converges to $E[X_0]$.
  This means that
  \[\mathcal{P}[\overline{X}_n \longrightarrow E[X_0]] = 1\ .\]

  We start with the strong law.

subsection ‹The strong law›

text ‹
  The proof uses Birkhoff's Theorem from Gouëzel's formalisation of ergodic theory~cite"gouezel"
  and the fact that the shift operator $T(x_1, x_2, x_3, \ldots) = (x_2, x_3, \ldots)$ is ergodic.
  This proof can be found in various textbooks on probability theory/ergodic
  theory, e.g. the ones by Krengel~cite‹p.~24› in "krengel" and
  Simmonet~cite‹Chapter 15, pp.~311--325› in "Simonnet1996".
theorem (in prob_space) strong_law_of_large_numbers_iid:
  fixes X :: "nat  'a  real"
  assumes indep: "indep_vars (λ_. borel) X UNIV"
  assumes distr: "i. distr M borel (X i) = distr M borel (X 0)"
  assumes L1:    "integrable M (X 0)"
  shows "AE x in M. (λn. (i<n. X i x) / n)  expectation (X 0)"
proof -
  text ‹
    We adopt a more explicit view of termM as a countably infinite product of i.i.d.
    random variables, indexed by the natural numbers:
  define M' :: "(nat  real) measure" where "M' = PiM UNIV (λi. distr M borel (X i))"
  have [measurable]: "random_variable borel (X i)" for i
    using indep by (auto simp: indep_vars_def)
  have M'_eq: "M' = distr M (PiM UNIV (λi. borel)) (λx. λiUNIV. X i x)"
    using indep unfolding M'_def by (subst (asm) indep_vars_iff_distr_eq_PiM) auto
  have space_M': "space M' = UNIV"
    by (simp add: M'_def space_PiM)
  have sets_M' [measurable_cong]: "sets M' = sets (PiM UNIV (λi. borel))"
    by (simp add: M'_eq)
  interpret M': prob_space M'
    unfolding M'_eq by (intro prob_space_distr) auto

  text ‹We introduce a shift operator that forgets the first variable in the sequence.›
  define T :: "(nat  real)  (nat  real)" where
    "T = (λf. f  Suc)"
  have funpow_T: "(T ^^ i) = (λf. f  (λn. n + i))" for i
    by (induction i) (auto simp: T_def)

  interpret T: shift_operator_ergodic "distr M borel (X 0)" T M'
  proof -
    interpret X0: prob_space "distr M borel (X 0)"
      by (rule prob_space_distr) auto
    show "shift_operator_ergodic (distr M borel (X 0))"
      by unfold_locales
    show "M'  PiM UNIV (λ_. distr M borel (X 0)) "
      unfolding M'_def by (subst distr)
  qed (simp_all add: T_def)

  have [intro]: "integrable M' (λf. f 0)"
    unfolding M'_eq by (subst integrable_distr_eq) (use L1 in auto)
  have "AE f in M'. (λn. T.birkhoff_sum (λf. f 0) n f / real n)
             real_cond_exp M' T.Invariants (λf. f 0) f"
    by (rule T.birkhoff_theorem_AE_nonergodic) auto
  moreover have "AE x in M'. real_cond_exp M' T.Invariants (λf. f 0) x =
                    M'.expectation (λf. f 0) / M'.prob (space M')"
    by (intro T.Invariants_cond_exp_is_integral_fmpt) auto
  ultimately have "AE f in M'. (λn. T.birkhoff_sum (λf. f 0) n f / real n)
                      M'.expectation (λf. f 0)"
    by eventually_elim (simp_all add: M'.prob_space)
  also have "M'.expectation (λf. f 0) = expectation (X 0)"
    unfolding M'_eq by (subst integral_distr) simp_all
  also have "T.birkhoff_sum (λf. f 0) = (λn f. sum f {..<n})"
    by (intro ext) (simp_all add:T.birkhoff_sum_def funpow_T)
  finally show ?thesis
    unfolding M'_eq by (subst (asm) AE_distr_iff) simp_all

subsection ‹The weak law›

text ‹
  To go from the strong law to the weak one, we need the fact that almost sure convergence
  implies convergence in probability. We prove this for sequences of random variables here.
lemma (in prob_space) AE_convergence_imp_convergence_in_prob:
  assumes [measurable]: "i. random_variable borel (X i)" "random_variable borel Y"
  assumes AE: "AE x in M. (λi. X i x)  Y x"
  assumes "ε > (0 :: real)"
  shows   "(λi. prob {xspace M. ¦X i x - Y x¦ > ε})  0"
proof -
  define A where "A = (λi. {xspace M. ¦X i x - Y x¦ > ε})"
  define B where "B = (λn. (i{n..}. A i))"
  have [measurable]: "A i  sets M"  "B i  sets M" for i
    unfolding A_def B_def by measurable

  have "AE x in M. x  (i. B i)"
    using AE unfolding B_def A_def
    by eventually_elim
       (use ε > 0 in fastforce simp: tendsto_iff dist_norm eventually_at_top_linorder)
  hence "(i. B i)  null_sets M"
    by (subst AE_iff_null_sets) auto

  show "(λi. prob (A i))  0"
  proof (rule Lim_null_comparison)
    have "(λi. prob (B i))  prob (i. B i)"
    proof (rule finite_Lim_measure_decseq)
      show "decseq B"
        by (rule decseq_SucI) (force simp: B_def)
    qed auto
    also have "prob (i. B i) = 0"
      using (i. B i)  null_sets M by (simp add: measure_eq_0_null_sets)
    finally show "(λi. prob (B i))  0" .
    have "prob (A n)  prob (B n)" for n
      unfolding B_def by (intro finite_measure_mono) auto
    thus "F n in at_top. norm (prob (A n))  prob (B n)"
      by (intro always_eventually) auto

text ‹
  The weak law is now a simple corollary: we again have the same setting as before. The weak
  law now states that $\overline{X}_n$ converges to $E[X_0]$ in probability. This means that
  for any ε > 0›, the probability that $|\overline{X}_n - X_0| > \varepsilon$ vanishes as
  n → ∞›.
corollary (in prob_space) weak_law_of_large_numbers_iid:
  fixes X :: "nat  'a  real" and ε :: real
  assumes indep: "indep_vars (λ_. borel) X UNIV"
  assumes distr: "i. distr M borel (X i) = distr M borel (X 0)"
  assumes L1:    "integrable M (X 0)"
  assumes "ε > 0"
  shows   "(λn. prob {xspace M. ¦(i<n. X i x) / n - expectation (X 0)¦ > ε})  0"
proof (rule AE_convergence_imp_convergence_in_prob)
  show "AE x in M. (λn. (i<n. X i x) / n)  expectation (X 0)"
    by (rule strong_law_of_large_numbers_iid) fact+
  have [measurable]: "random_variable borel (X i)" for i
    using indep by (auto simp: indep_vars_def)
  show "random_variable borel (λx. (i<n. X i x) / real n)" for n
    by measurable
qed (use ε > 0 in simp_all)