# Theory Laws_of_Large_Numbers

(*
File:     Laws_of_Large_Numbers.thy
Author:   Manuel Eberl, TU München
*)
section ‹The Laws of Large Numbers›
theory Laws_of_Large_Numbers
imports Ergodic_Theory.Shift_Operator
begin

text ‹
We prove the strong law of large numbers in the following form: Let $(X_i)_{i\in\mathbb{N}}$
be a sequence of i.i.d. random variables over a probability space ‹M›. Further assume that
the expected value $E[X_0]$ of $X_0$ exists. Then the sequence of random variables
$\overline{X}_n = \frac{1}{n} \sum_{i=0}^n X_i$
of running averages almost surely converges to $E[X_0]$.
This means that
$\mathcal{P}[\overline{X}_n \longrightarrow E[X_0]] = 1\ .$

›

subsection ‹The strong law›

text ‹
The proof uses Birkhoff's Theorem from Gouëzel's formalisation of ergodic theory~\<^cite>‹"gouezel"›
and the fact that the shift operator $T(x_1, x_2, x_3, \ldots) = (x_2, x_3, \ldots)$ is ergodic.
This proof can be found in various textbooks on probability theory/ergodic
theory, e.g. the ones by Krengel~\<^cite>‹‹p.~24› in "krengel"› and
Simmonet~\<^cite>‹‹Chapter 15, pp.~311--325› in "Simonnet1996"›.
›
theorem (in prob_space) strong_law_of_large_numbers_iid:
fixes X :: "nat ⇒ 'a ⇒ real"
assumes indep: "indep_vars (λ_. borel) X UNIV"
assumes distr: "⋀i. distr M borel (X i) = distr M borel (X 0)"
assumes L1:    "integrable M (X 0)"
shows "AE x in M. (λn. (∑i<n. X i x) / n) ⇢ expectation (X 0)"
proof -
text ‹
We adopt a more explicit view of \<^term>‹M› as a countably infinite product of i.i.d.
random variables, indexed by the natural numbers:
›
define M' :: "(nat ⇒ real) measure" where "M' = Pi⇩M UNIV (λi. distr M borel (X i))"
have [measurable]: "random_variable borel (X i)" for i
using indep by (auto simp: indep_vars_def)
have M'_eq: "M' = distr M (Pi⇩M UNIV (λi. borel)) (λx. λi∈UNIV. X i x)"
using indep unfolding M'_def by (subst (asm) indep_vars_iff_distr_eq_PiM) auto
have space_M': "space M' = UNIV"
have sets_M' [measurable_cong]: "sets M' = sets (Pi⇩M UNIV (λi. borel))"
interpret M': prob_space M'
unfolding M'_eq by (intro prob_space_distr) auto

text ‹We introduce a shift operator that forgets the first variable in the sequence.›
define T :: "(nat ⇒ real) ⇒ (nat ⇒ real)" where
"T = (λf. f ∘ Suc)"
have funpow_T: "(T ^^ i) = (λf. f ∘ (λn. n + i))" for i
by (induction i) (auto simp: T_def)

interpret T: shift_operator_ergodic "distr M borel (X 0)" T M'
proof -
interpret X0: prob_space "distr M borel (X 0)"
by (rule prob_space_distr) auto
show "shift_operator_ergodic (distr M borel (X 0))"
by unfold_locales
show "M' ≡ Pi⇩M UNIV (λ_. distr M borel (X 0)) "
unfolding M'_def by (subst distr)

have [intro]: "integrable M' (λf. f 0)"
unfolding M'_eq by (subst integrable_distr_eq) (use L1 in auto)
have "AE f in M'. (λn. T.birkhoff_sum (λf. f 0) n f / real n)
⇢ real_cond_exp M' T.Invariants (λf. f 0) f"
by (rule T.birkhoff_theorem_AE_nonergodic) auto
moreover have "AE x in M'. real_cond_exp M' T.Invariants (λf. f 0) x =
M'.expectation (λf. f 0) / M'.prob (space M')"
by (intro T.Invariants_cond_exp_is_integral_fmpt) auto
ultimately have "AE f in M'. (λn. T.birkhoff_sum (λf. f 0) n f / real n)
⇢ M'.expectation (λf. f 0)"
also have "M'.expectation (λf. f 0) = expectation (X 0)"
unfolding M'_eq by (subst integral_distr) simp_all
also have "T.birkhoff_sum (λf. f 0) = (λn f. sum f {..<n})"
by (intro ext) (simp_all add:T.birkhoff_sum_def funpow_T)
finally show ?thesis
unfolding M'_eq by (subst (asm) AE_distr_iff) simp_all
qed

subsection ‹The weak law›

text ‹
To go from the strong law to the weak one, we need the fact that almost sure convergence
implies convergence in probability. We prove this for sequences of random variables here.
›
lemma (in prob_space) AE_convergence_imp_convergence_in_prob:
assumes [measurable]: "⋀i. random_variable borel (X i)" "random_variable borel Y"
assumes AE: "AE x in M. (λi. X i x) ⇢ Y x"
assumes "ε > (0 :: real)"
shows   "(λi. prob {x∈space M. ¦X i x - Y x¦ > ε}) ⇢ 0"
proof -
define A where "A = (λi. {x∈space M. ¦X i x - Y x¦ > ε})"
define B where "B = (λn. (⋃i∈{n..}. A i))"
have [measurable]: "A i ∈ sets M"  "B i ∈ sets M" for i
unfolding A_def B_def by measurable

have "AE x in M. x ∉ (⋂i. B i)"
using AE unfolding B_def A_def
by eventually_elim
(use ‹ε > 0› in ‹fastforce simp: tendsto_iff dist_norm eventually_at_top_linorder›)
hence "(⋂i. B i) ∈ null_sets M"
by (subst AE_iff_null_sets) auto

show "(λi. prob (A i)) ⇢ 0"
proof (rule Lim_null_comparison)
have "(λi. prob (B i)) ⇢ prob (⋂i. B i)"
proof (rule finite_Lim_measure_decseq)
show "decseq B"
by (rule decseq_SucI) (force simp: B_def)
qed auto
also have "prob (⋂i. B i) = 0"
using ‹(⋂i. B i) ∈ null_sets M› by (simp add: measure_eq_0_null_sets)
finally show "(λi. prob (B i)) ⇢ 0" .
next
have "prob (A n) ≤ prob (B n)" for n
unfolding B_def by (intro finite_measure_mono) auto
thus "∀⇩F n in at_top. norm (prob (A n)) ≤ prob (B n)"
by (intro always_eventually) auto
qed
qed

text ‹
The weak law is now a simple corollary: we again have the same setting as before. The weak
law now states that $\overline{X}_n$ converges to $E[X_0]$ in probability. This means that
for any ‹ε > 0›, the probability that $|\overline{X}_n - X_0| > \varepsilon$ vanishes as
‹n → ∞›.
›
corollary (in prob_space) weak_law_of_large_numbers_iid:
fixes X :: "nat ⇒ 'a ⇒ real" and ε :: real
assumes indep: "indep_vars (λ_. borel) X UNIV"
assumes distr: "⋀i. distr M borel (X i) = distr M borel (X 0)"
assumes L1:    "integrable M (X 0)"
assumes "ε > 0"
shows   "(λn. prob {x∈space M. ¦(∑i<n. X i x) / n - expectation (X 0)¦ > ε}) ⇢ 0"
proof (rule AE_convergence_imp_convergence_in_prob)
show "AE x in M. (λn. (∑i<n. X i x) / n) ⇢ expectation (X 0)"
by (rule strong_law_of_large_numbers_iid) fact+
next
have [measurable]: "random_variable borel (X i)" for i
using indep by (auto simp: indep_vars_def)
show "random_variable borel (λx. (∑i<n. X i x) / real n)" for n
by measurable
qed (use ‹ε > 0› in simp_all)

end