# Theory Skip_List

(*
File:    Skip_List.thy
Authors: Max W. Haslbeck, Manuel Eberl
*)

section ‹Randomized Skip Lists›
theory Skip_List
imports Geometric_PMF
Misc
begin

text ‹Conflicting notation from \<^theory>‹HOL-Analysis.Infinite_Sum››
no_notation Infinite_Sum.abs_summable_on (infixr "abs'_summable'_on" 46)

subsection ‹Preliminaries›

lemma bind_pmf_if': "(do {c ← C;
ab ← (if c then A else B);
D ab}::'a pmf) =
do {c ← C;
(if c then (A ⤜ D) else (B ⤜ D))}"
by (metis (mono_tags, lifting))

abbreviation (input) Max⇩0 where "Max⇩0 ≡ (λA. Max (A ∪ {0}))"

subsection ‹Definition of a Randomised Skip List›

text ‹
Given a set A we assign a geometric random variable (counting the number of failed Bernoulli
trials before the first success) to every element in A. That means an arbitrary element of A is
on level n with probability $(1-p)^{n}p$. We define he height of the skip list as the maximum
assigned level. So a skip list with only one level has height 0 but the calculation of the
expected height is cleaner this way.
›

locale random_skip_list =
fixes p::real
begin

definition q where "q = 1 - p"

definition SL :: "('a::linorder) set ⇒ ('a ⇒ nat) pmf" where "SL A = Pi_pmf A 0 (λ_. geometric_pmf p)"
definition SL⇩N :: "nat ⇒ (nat ⇒ nat) pmf" where "SL⇩N n = SL {..<n}"

subsection ‹Height of Skip List›

definition H where "H A = map_pmf (λf. Max⇩0 (f  A)) (SL A)"
definition H⇩N :: "nat ⇒ nat pmf" where "H⇩N n = H {..<n}"

begin

text ‹
The height of a skip list is independent of the values in a set A. For simplicity we can
therefore work on the skip list over the set @{term "{..< card A}"}
›

lemma
assumes "finite A"
shows "H A = H⇩N (card A)"
proof -
define f' where "f' = (λx. if x ∈ A
then the_inv_into {..<card A} ((!) (sorted_list_of_set A)) x
else card A)"
have bij_f': "bij_betw f' A {..<card A}"
proof -
(* I know the proof looks weird, but for some reason all tools have problems with this proof *)
have "bij_betw (the_inv_into {..<card A} ((!) (sorted_list_of_set A))) A {..<card A}"
unfolding f'_def using sorted_list_of_set_bij_betw assms bij_betw_the_inv_into by blast
moreover have "bij_betw (the_inv_into {..<card A} ((!) (sorted_list_of_set A))) A {..<card A}
= bij_betw f' A {..<card A}"
unfolding f'_def by (rule bij_betw_cong) simp
ultimately show ?thesis
by blast
qed
have *: "Max⇩0 ((f ∘ f')  A) = Max⇩0 (f  {..<card A})" for f :: "nat ⇒ nat"
using  bij_betw_imp_surj_on bij_f' image_comp by metis
have "H A = map_pmf (λf. Max⇩0 (f  A)) (map_pmf (λg. g ∘ f') (SL⇩N (card A)))"
using assms bij_f' unfolding H_def SL_def SL⇩N_def
by (subst Pi_pmf_bij_betw[of _ f' "{..<card A}"]) (auto simp add: f'_def)
also have "… =  H⇩N (card A)"
unfolding H⇩N_def H_def SL⇩N_def using * by (auto intro!: bind_pmf_cong simp add: map_pmf_def)
finally show ?thesis
by simp
qed

text ‹
The cumulative distribution function (CDF) of the height is the CDF of the geometric PMF to the
power of n
›

lemma prob_Max_IID_geometric_atMost:
assumes "p ∈ {0..1}"
shows "measure_pmf.prob (H⇩N n) {..i}
= (measure_pmf.prob (geometric_pmf p) {..i}) ^ n" (is "?lhs = ?rhs")
proof -
note SL_def[simp] SL⇩N_def[simp] H_def[simp] H⇩N_def[simp]
have "{f. Max⇩0 (f  {..<n}) ≤ i}  = {..<n} → {..i}"
by auto
then have "?lhs = measure_pmf.prob (SL⇩N n) ({..<n} → {..i})"
also have "… = measure_pmf.prob (SL⇩N n) (PiE_dflt {..<n} 0 (λ_. {..i}))"
by (intro measure_prob_cong_0) (auto simp add: PiE_dflt_def pmf_Pi split: if_splits)
also have "… = measure_pmf.prob (geometric_pmf p) {..i} ^ n"
using assms by (auto simp add: measure_Pi_pmf_PiE_dflt)
finally show ?thesis
by simp
qed

lemma prob_Max_IID_geometric_greaterThan:
assumes "p ∈ {0<..1}"
shows "measure_pmf.prob (H⇩N n) {i<..} =
1 - (1 - q ^ (i + 1)) ^ n"
proof -
have "UNIV - {..i} = {i<..}"
by auto
then have "measure_pmf.prob (H⇩N n) {i<..} = measure_pmf.prob (H⇩N n) (space (measure_pmf (H⇩N n)) - {..i})"
by (auto)
also have "… = 1 - (measure_pmf.prob (geometric_pmf p) {..i}) ^ n"
using assms by (subst measure_pmf.prob_compl) (auto simp add: prob_Max_IID_geometric_atMost)
also have "… =   1 - (1 - q ^ (i + 1)) ^ n"
using assms unfolding q_def by (subst geometric_pmf_prob_atMost) auto
finally show ?thesis
by simp
qed

end (* context includes monad_normalisation *)
end (* locale skip_list *)

text ‹
An alternative definition of the expected value of a non-negative random variable
\footnote{\url{https://en.wikipedia.org/w/index.php?title=Expected\_value&oldid=881384346\#Formula\_for\_non-negative\_random\_variables}}
›

lemma expectation_prob_atLeast:
assumes "(λi. measure_pmf.prob N {i..}) abs_summable_on {1..}"
shows "measure_pmf.expectation N real = infsetsum (λi. measure_pmf.prob N {i..}) {1..}"
"integrable N real"
proof -
have "(λ(x, y). pmf N y) abs_summable_on Sigma {Suc 0..} atLeast"
using assms by (auto simp add: measure_pmf_conv_infsetsum abs_summable_on_Sigma_iff)
then have summable: "(λ(x, y). pmf N x) abs_summable_on Sigma {Suc 0..} (atLeastAtMost (Suc 0))"
by (subst abs_summable_on_reindex_bij_betw[of "λ(x,y). (y,x)", symmetric])
(auto intro!: bij_betw_imageI simp add: inj_on_def case_prod_beta)
have "measure_pmf.expectation N real = (∑⇩ax. pmf N x *⇩R real x)"
by (auto simp add: infsetsum_def integral_density measure_pmf_eq_density)
also have "… = (∑⇩ax ∈ ({0} ∪ {Suc 0..}). pmf N x *⇩R real x)"
by (auto intro!: infsetsum_cong)
also have "… = (∑⇩ax∈{Suc 0..}. pmf N x * real x)"
proof -
have "(λx. pmf N x *⇩R real x) abs_summable_on {0} ∪ {Suc 0..}"
using summable by (subst (asm) abs_summable_on_Sigma_iff) (auto simp add: mult.commute)
then show ?thesis
by (subst infsetsum_Un_Int) auto
qed
also have "… = (∑⇩a(x, y)∈Sigma {Suc 0..} (atLeastAtMost (Suc 0)). pmf N x)"
using summable by (subst infsetsum_Sigma) (auto simp add: mult.commute)
also have "… = (∑⇩ax∈Sigma {Suc 0..} atLeast. pmf N (snd x))"
by (subst infsetsum_reindex_bij_betw[of "λ(x,y). (y,x)", symmetric])
(auto intro!: bij_betw_imageI simp add: inj_on_def case_prod_beta)
also have "… = infsetsum (λi. measure_pmf.prob N {i..}) {1..}"
using assms
by (subst infsetsum_Sigma)
(auto simp add: measure_pmf_conv_infsetsum abs_summable_on_Sigma_iff infsetsum_Sigma')
finally show "measure_pmf.expectation N real = infsetsum (λi. measure_pmf.prob N {i..}) {1..}"
by simp
have "(λx. pmf N x *⇩R real x) abs_summable_on {0} ∪ {Suc 0..}"
using summable by (subst (asm) abs_summable_on_Sigma_iff) (auto simp add: mult.commute)
then have "(λx. pmf N x *⇩R real x) abs_summable_on UNIV"
then have "integrable (count_space UNIV) (λx. pmf N x *⇩R real x)"
by (subst abs_summable_on_def[symmetric]) blast
then show "integrable N real"
by (subst measure_pmf_eq_density, subst integrable_density) auto
qed

text ‹
The expected height of a skip list has no closed-form expression but we can approximate it. We
start by showing how we can calculate an infinite sum over the natural numbers with an integral
over the positive reals and the floor function.
›

lemma infsetsum_set_nn_integral_reals:
assumes "f abs_summable_on UNIV" "⋀n. f n ≥ 0"
shows "infsetsum f UNIV = set_nn_integral lborel {0::real..} (λx. f (nat (floor x)))"
proof -
have "x < 1 + (floor x)"for x::real
by linarith
then have "∃n. real n ≤ x ∧ x < 1 + real n" if "x ≥ 0" for x
using that of_nat_floor by (intro exI[of _ "nat (floor x)"]) auto
then have "{0..} = (⋃n. {real n..<real (Suc n)})"
by auto
then have "∫⇧+x∈{0::real..}. ennreal (f (nat ⌊x⌋))∂lborel =
(∑n. ∫⇧+x∈{real n..<1 + real n}. ennreal (f (nat ⌊x⌋))∂lborel)"
by (auto simp add: disjoint_family_on_def nn_integral_disjoint_family)
also have "… = (∑n. ∫⇧+x∈{real n..<1 + real n}. ennreal (f n)∂lborel)"
by(subst suminf_cong, rule nn_integral_cong_AE)
(auto intro!: eventuallyI  simp add: indicator_def floor_eq4)
also have "… = (∑n. ennreal (f n))"
by (auto intro!: suminf_cong simp add: nn_integral_cmult)
also have "… = infsetsum f {0..}"
using assms suminf_ennreal2 abs_summable_on_nat_iff' summable_norm_cancel
finally show ?thesis
by simp
qed

lemma nn_integral_nats_reals:
shows "(∫⇧+ i. ennreal (f i) ∂count_space UNIV) = ∫⇧+x∈{0::real..}. ennreal (f (nat ⌊x⌋))∂lborel"
proof -
have "x < 1 + (floor x)"for x::real
by linarith
then have "∃n. real n ≤ x ∧ x < 1 + real n" if "x ≥ 0" for x
using that of_nat_floor by (intro exI[of _ "nat (floor x)"]) auto
then have "{0..} = (⋃n. {real n..<real (Suc n)})"
by auto
then have "∫⇧+x∈{0::real..}. f (nat ⌊x⌋)∂lborel =
(∑n. ∫⇧+x∈{real n..<1 + real n}. ennreal (f (nat ⌊x⌋))∂lborel)"
by (auto simp add: disjoint_family_on_def nn_integral_disjoint_family)
also have "… = (∑n. ∫⇧+x∈{real n..<1 + real n}. ennreal (f n)∂lborel)"
by(subst suminf_cong,rule nn_integral_cong_AE)
(auto intro!: eventuallyI  simp add: indicator_def floor_eq4)
also have "… = (∑n. ennreal (f n))"
by (auto intro!: suminf_cong simp add: nn_integral_cmult)
also have "… = (∫⇧+ i. ennreal (f i) ∂count_space UNIV)"
finally show ?thesis
by simp
qed

lemma nn_integral_floor_less_eq:
assumes "⋀x y. x ≤ y ⟹ f y ≤ f x"
shows "∫⇧+x∈{0::real..}. ennreal (f x)∂lborel ≤ ∫⇧+x∈{0::real..}. ennreal (f (nat ⌊x⌋))∂lborel"
using assms by (auto simp add: indicator_def intro!: nn_integral_mono ennreal_leI)

lemma nn_integral_finite_imp_abs_sumable_on:
fixes f :: "'a ⇒ 'b::{banach, second_countable_topology}"
assumes "nn_integral (count_space A) (λx. norm (f x)) < ∞"
shows   "f abs_summable_on A"
using assms unfolding abs_summable_on_def integrable_iff_bounded by auto

lemma nn_integral_finite_imp_abs_sumable_on':
assumes "nn_integral (count_space A) (λx. ennreal (f x)) < ∞" "⋀x. f x ≥ 0"
shows   "f abs_summable_on A"
using assms unfolding abs_summable_on_def integrable_iff_bounded by auto

text ‹
We now show that $\int_0^\infty 1 - (1 - q^x) ^ n\;dx = \frac{- H_n}{\ln q}$ if $0 < q < 1$.
›

lemma harm_integral_x_raised_n:
"set_integrable lborel {0::real..1} (λx. (∑i∈{..<n}. x ^ i))" (is ?thesis1)
"LBINT x = 0..1. (∑i∈{..<n}. x ^ i) = harm n" (is ?thesis2)
proof -
have h: "set_integrable lborel {0::real..1} (λx. (∑i∈{..<n}. x ^ i))" for n
by (intro borel_integrable_atLeastAtMost') (auto intro!: continuous_intros)
then show ?thesis1
by (intro borel_integrable_atLeastAtMost') (auto intro!: continuous_intros)
show ?thesis2
proof (induction n)
case (Suc n)
have "(LBINT x=0..1.(∑i∈{..<n}. x ^ i) + x ^ n) =
(LBINT x=0..1. (∑i∈{..<n}. x ^ i)) + (LBINT x=0..1. x ^ n)"
proof -
have "set_integrable lborel (einterval 0 1) (λx. (∑i∈{..<n}. x ^ i))"
by (rule set_integrable_subset) (use h in ‹auto simp add: einterval_def›)
moreover have "set_integrable lborel (einterval 0 1) (λx. (x ^ n))"
proof -
have "set_integrable lborel {0::real..1} (λx. (x ^ n))"
by (rule borel_integrable_atLeastAtMost')
(auto intro!: borel_integrable_atLeastAtMost' continuous_intros)
then show ?thesis
by (rule set_integrable_subset) (auto simp add: einterval_def)
qed
ultimately show ?thesis
by (auto intro!: borel_integrable_atLeastAtMost' simp add:  interval_lebesgue_integrable_def)
qed
also have "(LBINT x=0..1. x ^ n) = 1 / (1 + real n)"
proof -
have "(LBINT x=0..1. x ^ n) = LBINT x. x ^ n * indicator {0..1} x "
proof -
have "AE x in lborel. x ^ n * indicator {0..1} x = indicator (einterval 0 1) x * x ^ n"
by(rule eventually_mono[OF eventually_conj[OF  AE_lborel_singleton[of 1]
AE_lborel_singleton[of 0]]])
then show ?thesis
using integral_cong_AE unfolding interval_lebesgue_integral_def set_lebesgue_integral_def
by (auto intro!: integral_cong_AE)
qed
then show ?thesis
qed
finally show ?case
using Suc by (auto simp add: harm_def inverse_eq_divide)
qed

lemma harm_integral_0_1_fraction:
"set_integrable lborel {0::real..1} (λx. (1 - x ^ n) / (1 - x))"
"(LBINT x = 0..1. ((1 - x ^ n) / (1 - x))) = harm n"
proof -
show "set_integrable lborel {0::real..1} (λx. (1 - x ^ n) / (1 - x))"
proof -
have "AE x∈{0::real..1} in lborel. (1 - x ^ n) / (1 - x) = sum ((^) x) {..<n}"
by (auto intro!: eventually_mono[OF AE_lborel_singleton[of 1]] simp add: sum_gp_strict)
with harm_integral_x_raised_n show ?thesis
by (subst set_integrable_cong_AE) auto
qed
moreover have "AE x∈{0::real<..<1} in lborel. (1 - x ^ n) / (1 - x) = sum ((^) x) {..<n}"
moreover have "einterval (min 0 1) (max 0 1) = {0::real<..<1}"
by (auto simp add: min_def max_def einterval_iff)
ultimately show "(LBINT x = 0..1. ((1 - x ^ n) / (1 - x))) = harm n"
using harm_integral_x_raised_n by (subst interval_integral_cong_AE) auto
qed

lemma one_minus_one_minus_q_x_n_integral:
assumes "q ∈ {0<..<1}"
shows "set_integrable lborel (einterval 0 ∞) (λx. (1 - (1 - q powr x) ^ n))"
"(LBINT x=0..∞. 1 - (1 - q powr x) ^ n) = - harm n / ln q"
proof -
have [simp]: "q powr (log q (1-x)) = 1 - x" if "x ∈ {0<..<1}" for x
using that assms by (subst powr_log_cancel) auto
have 1: "((ereal ∘ (λx. log q (1 - x)) ∘ real_of_ereal) ⤏ 0) (at_right 0)"
using assms unfolding zero_ereal_def ereal_tendsto_simps by (auto intro!: tendsto_eq_intros)
have 2: "((ereal ∘ (λx. log q (1-x)) ∘ real_of_ereal) ⤏ ∞) (at_left 1)"
proof -
have "filterlim ((-) 1) (at_right 0) (at_left (1::real))"
by (intro filterlim_at_withinI eventually_at_leftI[of 0]) (auto intro!: tendsto_eq_intros)
then have "LIM x at_left 1. - inverse (ln q) * - ln (1 - x) :> at_top"
using assms
by (intro filterlim_tendsto_pos_mult_at_top [OF tendsto_const])
(auto simp: filterlim_uminus_at_top intro!: filterlim_compose[OF ln_at_0])
then show ?thesis
unfolding one_ereal_def ereal_tendsto_simps log_def by (simp add: field_simps)
qed
have 3: "set_integrable lborel (einterval 0 1)
(λx. (1 - (1 - q powr (log q (1 - x))) ^ n) * (- 1 / (ln q * (1 - x))))"
proof -
have "set_integrable lborel (einterval 0 1) (λx. - (1 / ln q) * ((1 - x ^ n) / (1 - x)))"
by(intro set_integrable_mult_right)
(auto intro!: harm_integral_0_1_fraction intro: set_integrable_subset simp add: einterval_def)
then show ?thesis
by(subst set_integrable_cong_AE[where g="λx. - (1 / ln q) * ((1 - x ^ n) / (1 - x))"])
(auto intro!: eventuallyI simp add: einterval_def)
qed
have 4: "LBINT x=0..1. - ((1 - (1 - q powr log q (1 - x)) ^ n) / (ln q * (1 - x))) = - (harm n / ln q)"
(is "?lhs = ?rhs")
proof -
have "?lhs = LBINT x=0..1. ((1 - x ^ n) / (1 - x)) * (- 1 / ln q)"
using assms
by (intro interval_integral_cong_AE)
(auto intro!: eventuallyI simp add: max_def einterval_def field_simps)
also have "… = harm n * (-1 / ln q)"
using harm_integral_0_1_fraction by (subst interval_lebesgue_integral_mult_left) auto
finally show ?thesis
by auto
qed
note sub = interval_integral_substitution_nonneg
[where f = "(λx. (1 - (1 - q powr x) ^ n))" and g="(λx. log q (1-x))"
and g'="(λx. - 1 / (ln q * (1 - x)))" and a = 0 and b = 1]
show "set_integrable lborel (einterval 0 ∞) (λx. (1 - (1 - q powr x) ^ n))"
using assms 1 2 3 4
by (intro sub) (auto intro!: derivative_eq_intros mult_nonneg_nonpos2 tendsto_intros power_le_one)
show "(LBINT x=0..∞. 1 - (1 - q powr x) ^ n) = - harm n / ln q"
using assms 1 2 3 4
by (subst sub) (auto intro!: derivative_eq_intros mult_nonneg_nonpos2 tendsto_intros power_le_one)
qed

lemma one_minus_one_minus_q_x_n_nn_integral:
fixes q::real
assumes "q ∈ {0<..<1}"
shows "set_nn_integral lborel {0..} (λx. (1 - (1 - q powr x) ^ n)) =
LBINT x=0..∞. 1 - (1 - q powr x) ^ n"
proof -
have "set_nn_integral  lborel {0..} (λx. (1 - (1 - q powr x) ^ n)) =
nn_integral lborel (λx. indicator (einterval 0 ∞) x *  (1 - (1 - q powr x) ^ n))"
using assms by (intro nn_integral_cong_AE eventually_mono[OF AE_lborel_singleton[of 0]])
also have "… = ennreal (LBINT x. indicator (einterval 0 ∞) x * (1 - (1 - q powr x) ^ n))"
using one_minus_one_minus_q_x_n_integral assms
by(intro nn_integral_eq_integral)
(auto simp add: indicator_def einterval_def set_integrable_def
intro!: eventuallyI power_le_one powr_le1)
finally show ?thesis
qed

text ‹
We can now derive bounds for the expected height.
›

context random_skip_list
begin

definition EH⇩N where "EH⇩N n = measure_pmf.expectation (H⇩N n) real"

lemma EH⇩N_bounds':
fixes n::nat
assumes "p ∈ {0<..<1}" "0 < n"
shows "- harm n / ln q - 1 ≤ EH⇩N n"
"EH⇩N n ≤ - harm n / ln q"
"integrable (H⇩N n) real"
proof -
define f where "f = (λx. 1 - (1 - q ^ x) ^ n)"
define f' where "f' = (λx. 1 - (1 - q powr x) ^ n)"
have q: "q ∈ {0<..<1}"
unfolding q_def using assms by auto
have f_descending: "f y ≤ f x" if "x ≤ y" for x y
unfolding f_def using that q
by (auto intro!: power_mono simp add: power_decreasing power_le_one_iff)
have f'_descending: "f' y ≤ f' x" if "x ≤ y" "0 ≤ x" for x y
unfolding f'_def using that q
by (auto intro!: power_mono simp add: ln_powr powr_def mult_nonneg_nonpos)
have [simp]: "harm n / ln q <= 0"
using harm_nonneg ln_ge_zero_imp_ge_one q by (intro divide_nonneg_neg) auto
have f_nn_integral_harm:
"- harm n / ln q ≤ ∫⇧+ x. (f x) ∂count_space UNIV"
"(∫⇧+ i. f (i + 1) ∂count_space UNIV) ≤ - harm n / ln q"
proof -
have "(∫⇧+ i. f (i + 1) ∂count_space UNIV) = (∫⇧+x∈{0::real..}. (f (nat ⌊x⌋ + 1))∂lborel)"
using nn_integral_nats_reals by auto
also have "… = ∫⇧+x∈{0::real..}. ennreal (f' (nat ⌊x⌋ + 1))∂lborel"
proof -
have "0 ≤ x ⟹ (1 - q * q ^ nat ⌊x⌋) ^ n = (1 - q powr (1 + real_of_int ⌊x⌋)) ^ n" for x::real
using q by (subst powr_realpow [symmetric]) (auto simp: powr_add)
then show ?thesis
unfolding f_def f'_def using q
by (auto intro!: nn_integral_cong ennreal_cong  simp add: powr_real_of_int indicator_def)
qed
also have "… ≤ set_nn_integral lborel {0..} f'"
proof -
have "x ≤ 1 + real_of_int ⌊x⌋" for x
by linarith
then show ?thesis
by (auto simp add: indicator_def intro!: f'_descending nn_integral_mono ennreal_leI)
qed
also have harm_integral_f': "… = - harm n / ln q"
unfolding f'_def using q
by (auto intro!: ennreal_cong
finally show "(∫⇧+ i. f (i + 1) ∂count_space UNIV) ≤ - harm n / ln q"
by simp
note harm_integral_f'[symmetric]
also have "set_nn_integral lborel {0..} f' ≤ ∫⇧+x∈{0::real..}. f' (nat ⌊x⌋)∂lborel"
using assms f'_descending
by (auto simp add: indicator_def intro!: nn_integral_mono ennreal_leI)
also have "… = ∫⇧+x∈{0::real..}. f (nat ⌊x⌋)∂lborel"
unfolding f_def f'_def
using q by (auto intro!: nn_integral_cong ennreal_cong simp add: powr_real_of_int indicator_def)
also have "… = (∫⇧+ x. f x ∂count_space UNIV)"
using nn_integral_nats_reals by auto
finally show "- harm n / ln q ≤ ∫⇧+ x. f x ∂count_space UNIV"
by simp
qed
then have f1_abs_summable_on: "(λi. f (i + 1)) abs_summable_on UNIV"
unfolding f_def using q
by (intro nn_integral_finite_imp_abs_sumable_on')
(auto simp add: f_def le_less_trans intro!: power_le_one mult_le_one)
then have f_abs_summable_on: "f abs_summable_on {1..}"
using Suc_le_lessD greaterThan_0
by (subst abs_summable_on_reindex_bij_betw[symmetric, where g="λx. x + 1" and A="UNIV"]) auto
also have "(f abs_summable_on {1..}) = ((λx. measure_pmf.prob (H⇩N n) {x..}) abs_summable_on {1..})"
proof -
have "((λx. measure_pmf.prob (H⇩N n) {x..}) abs_summable_on {1..}) =
((λx. measure_pmf.prob (H⇩N n) {x - 1<..}) abs_summable_on {1..})"
by (auto intro!: measure_prob_cong_0 abs_summable_on_cong)
also have "… = (f abs_summable_on {1..})"
using assms
by (intro abs_summable_on_cong) (auto simp add: f_def prob_Max_IID_geometric_greaterThan)
finally show ?thesis
by simp
qed
finally have EH⇩N_sum:
"EH⇩N n = (∑⇩ai∈{1..}. measure_pmf.prob (H⇩N n) {i..})"
"integrable (measure_pmf (H⇩N n)) real"
unfolding EH⇩N_def using expectation_prob_atLeast by auto
then show "integrable (measure_pmf (H⇩N n)) real"
by simp
have EH⇩N_sum': "EH⇩N n = infsetsum f {1..}"
proof -
have "EH⇩N n = (∑⇩ak∈{1..}. measure_pmf.prob (H⇩N n) {k - 1<..})"
unfolding EH⇩N_sum by (auto intro!: measure_prob_cong_0 infsetsum_cong)
also have "… = infsetsum f {1..}"
using assms
by (intro infsetsum_cong) (auto simp add: f_def prob_Max_IID_geometric_greaterThan)
finally show ?thesis
by simp
qed
also have "… = (∑⇩ak. f (k + 1))"
using Suc_le_lessD greaterThan_0
by (subst infsetsum_reindex_bij_betw[symmetric, where g="λx. x + 1" and A="UNIV"]) auto
also have "ennreal … = (∫⇧+x∈{0::real..}. f (nat ⌊x⌋ + 1)∂lborel)"
using f1_abs_summable_on q
by (intro infsetsum_set_nn_integral_reals) (auto simp add: f_def mult_le_one power_le_one)
also have "… = (∫⇧+ i. f (i + 1) ∂count_space UNIV)"
using nn_integral_nats_reals by auto
also have "… ≤ - harm n / ln q"
using f_nn_integral_harm by auto
finally show "EH⇩N n ≤ - harm n / ln q"
by (subst (asm) ennreal_le_iff) (auto)
have "EH⇩N n + 1 = (∑⇩ax∈{Suc 0..}. f x) + (∑⇩ax∈{0}. f x)"
using assms by (subst EH⇩N_sum') (auto simp add: f_def)
also have "… = infsetsum f UNIV"
using f_abs_summable_on by (subst infsetsum_Un_disjoint[symmetric]) (auto intro!: infsetsum_cong)
also have "… = (∫⇧+x∈{0::real..}. f (nat ⌊x⌋)∂lborel)"
proof -
have "f abs_summable_on ({0} ∪ {1..})"
using f_abs_summable_on by (intro abs_summable_on_union) (auto)
also have "{0::nat} ∪ {1..} = UNIV"
by auto
finally show ?thesis
using q
by (intro infsetsum_set_nn_integral_reals) (auto simp add: f_def mult_le_one power_le_one)
qed
also have "… = (∫⇧+ x. f x ∂count_space UNIV)"
using nn_integral_nats_reals by auto
also have "... ≥ - harm n / ln q"
using f_nn_integral_harm by auto
finally have "- harm n / ln q ≤ EH⇩N n + 1"
by (subst (asm) ennreal_le_iff) (auto simp add: EH⇩N_def)
then show "- harm n / ln q - 1 ≤ EH⇩N n"
by simp
qed

theorem EH⇩N_bounds:
fixes n::nat
assumes "p ∈ {0<..<1}"
shows
"- harm n / ln q - 1 ≤ EH⇩N n"
"EH⇩N n ≤ - harm n / ln q"
"integrable (H⇩N n) real"
proof -
show "- harm n / ln q - 1 ≤ EH⇩N n"
using assms EH⇩N_bounds'
by (cases "n = 0") (auto simp add: EH⇩N_def H⇩N_def H_def SL_def harm_expand)
show "EH⇩N n ≤ - harm n / ln q"
using assms EH⇩N_bounds'
by (cases "n = 0") (auto simp add: EH⇩N_def H⇩N_def H_def SL_def harm_expand)
show "integrable (H⇩N n) real"
using assms EH⇩N_bounds'
by (cases "n = 0") (auto simp add: H⇩N_def H_def SL_def intro!: integrable_measure_pmf_finite)
qed

end (* context random_skip_list *)

subsection ‹Expected Length of Search Path›

text ‹
Let @{term "A::'a::linorder set"} and @{term "f::'a ⇒ nat"} where f is an abstract description
of a skip list (assign each value its maximum level). steps A f s u l starts on the rightmost element
on level s in the skip lists. If possible it moves up, if not it moves to the left. For every step
up it adds cost u and for every step to the left it adds cost l. steps A f 0 1 1 therefore walks
from the bottom right corner of a skip list to the top left corner of a skip list and counts
all steps.
›

― ‹NOTE: You could also define steps with lsteps and then prove that the following recursive
definition holds›

function steps :: "'a :: linorder set ⇒ ('a ⇒ nat) ⇒ nat ⇒ nat ⇒ nat ⇒ nat" where
"steps A f l up left = (if A = {} ∨ infinite A
then 0
else (let m = Max A in (if f m < l then       steps (A - {m}) f l up left
else (if f m > l then up + steps A f (l + 1) up left
else                  left + steps (A - {m}) f l up left))))"
by pat_completeness auto
termination
proof (relation "(λ(A,f,l,a,b). card A) <*mlex*> (λ(A,f,l,a,b). Max (f  A) - l) <*mlex*> {}", goal_cases)
case 1
then show ?case
by(intro wf_mlex wf_empty)
next
case 2
then show ?case
by (intro mlex_less) (auto simp: card_gt_0_iff)
next
case (3 A f l a b x)
then have "Max (f  A) - Suc l < Max (f  A) - l"
by (meson Max_gr_iff Max_in diff_less_mono2 finite_imageI imageI image_is_empty lessI)
with 3 have "((A, f, l + 1, a, b), A, f, l, a, b) ∈ (λ(A, f, l, a, b). Max (f  A) - l) <*mlex*> {}"
by (intro mlex_less) (auto)
with 3 show ?case apply - apply(rule mlex_leq) by auto
next
case 4
then show ?case by (intro mlex_less) (auto simp: card_gt_0_iff)
qed

declare steps.simps[simp del]

text ‹
lsteps is similar to steps but is using lists instead of sets. This makes the proofs where we use
induction easier.
›

function lsteps :: "'a list ⇒ ('a ⇒ nat) ⇒ nat ⇒ nat ⇒ nat ⇒ nat" where
"lsteps [] f l up left = 0" |
"lsteps (x#xs) f l up left = (if       f x < l then lsteps xs f l up left
else (if f x > l then up + lsteps (x#xs) f (l + 1) up left
else                        left + lsteps xs f l up left))"
by pat_completeness auto
termination
proof (relation "(λ(xs,f,l,a,b). length xs) <*mlex*> (λ(xs,f,l,a,b).
Max (f  set xs) - l) <*mlex*> {}",
goal_cases)
case 1
then show ?case
by(intro wf_mlex wf_empty)
next
case 2
then show ?case
by (auto intro: mlex_less simp: card_gt_0_iff)
next
case (3 n f l a b)
show ?case
by (rule mlex_leq) (use 3 in ‹auto intro: mlex_less mlex_leq intro!:  diff_less_mono2 simp add: Max_gr_iff›)
next
case 4
then show ?case by (intro mlex_less) (auto simp: card_gt_0_iff)
qed

declare lsteps.simps(2)[simp del]

lemma steps_empty [simp]: "steps {} f l up left = 0"

lemma steps_lsteps: "steps A f l u v = lsteps (rev (sorted_list_of_set A)) f l u v"
proof (cases "finite A ∧ A ≠ {}")
case True
then show ?thesis
proof(induction "(rev (sorted_list_of_set A))" f l u v arbitrary: A rule: lsteps.induct)
case (2 y ys f l u v A)
then have y_ys: "y = Max A" "ys  = rev (sorted_list_of_set (A - {y}))"
consider (a) "l < f y" | (b) "f y < l" | (c) "f y = l"
by fastforce
then have "steps A f l u v = lsteps (y#ys) f l u v"
proof cases
case a
then show ?thesis
by (subst steps.simps, subst lsteps.simps) (use y_ys 2 in auto)
next
case b
then show ?thesis
using y_ys 2(1) by (cases "ys = []") (auto simp add: steps.simps lsteps.simps)
next
case c
then have "steps (A - {Max A}) f l u v =
lsteps (rev (sorted_list_of_set (A - {Max A}))) f l u v"
by (cases "A = {Max A}") (use y_ys 2 in ‹auto intro!: 2(3) simp add: steps.simps›)
then show ?thesis
by (subst steps.simps, subst lsteps.simps) (use y_ys 2 in auto)
qed
then show ?case
using 2 by simp

lemma lsteps_comp_map: "lsteps zs (f ∘ g) l u v = lsteps (map g zs) f l u v"
by (induction zs "f ∘ g" l u v rule: lsteps.induct) (auto simp add: lsteps.simps)

lemma steps_image:
assumes "finite A" "mono_on A g" "inj_on g A"
shows "steps A (f ∘ g) l u v = steps (g  A) f l u v"
proof -
have "(sorted_list_of_set (g  A)) = map g (sorted_list_of_set A)"
using sorted_list_of_set_image assms by auto
also have "rev … = map g (rev (sorted_list_of_set A))"
using rev_map by auto
finally show ?thesis
qed

lemma lsteps_cong:
assumes "ys = xs" "⋀x. x ∈ set xs ⟹ f x = g x" "l = l'"
shows "lsteps xs f l u v = lsteps ys g l' u v"
using assms proof (induction xs f l u v arbitrary: ys l' rule: lsteps.induct)
case (2 x xs f l up left)
then show ?case
by (subst ‹ys = x # xs›, subst lsteps.simps, subst (2) lsteps.simps) auto
qed (auto)

lemma steps_cong:
assumes "A = B" "⋀x. x ∈ A ⟹ f x = g x" "l = l'"
shows   "steps A f l u v = steps B g l' u v"
using assms
by (cases "A = {} ∨ infinite A") (auto simp add: steps_lsteps steps.simps intro!: lsteps_cong)

shows "lsteps xs f l u v = lsteps xs (λx. f x + m) (l + m) u v"
by  (induction xs f l u v rule: lsteps.induct) (auto simp add: lsteps.simps)

shows "steps A f l u v = steps A (λx. f x + m) (l + m) u v"
by (cases "A = {} ∨ infinite A") (auto simp add: steps_lsteps steps.simps intro!: lsteps_f_add')

lemma lsteps_smaller_set:
assumes "m ≤ l"
shows "lsteps xs f l u v = lsteps [x ← xs. m ≤ f x] f l u v"
using assms by (induction xs f l u v rule: lsteps.induct) (auto simp add: lsteps.simps)

lemma steps_smaller_set:
assumes "finite A" "m ≤ l"
shows "steps A f l u v = steps {x∈A. f x ≥ m} f l u v"
using assms
by(cases "A = {} ∨ infinite A")
(auto simp add: steps_lsteps steps.simps rev_filter sorted_list_of_set_filter
intro!: lsteps_smaller_set)

lemma lsteps_level_greater_fun_image:
assumes "⋀x. x ∈ set xs ⟹ f x < l"
shows   "lsteps xs f l u v = 0"
using assms by (induction xs f l u v rule: lsteps.induct) (auto simp add: lsteps.simps)

lemma lsteps_smaller_card_Max_fun':
assumes "∃x ∈ set xs. l ≤ f x"
shows   "lsteps xs f l u v + l * u ≤ v * length xs + u * Max ((f  (set xs)) ∪ {0})"
using assms proof (induction xs f l u v rule: lsteps.induct)
case (1 f l up left)
then show ?case by (simp)
next
case (2 x xs f l up left)
consider "l = f x" "∃y∈set xs. l ≤ f y" | "f x = l" "¬ (∃y∈set xs. l ≤ f y)" |
"f x < l" | "l < f x"
by fastforce
then show ?case
proof cases
assume a: "l = f x" "∃y∈set xs. l ≤ f y"
have "lsteps (x # xs) f l up left + l * up = lsteps xs f l up left + f x * up + left"
using a by (auto simp add: lsteps.simps)
also have "lsteps xs f l up left + f x * up ≤ left * length xs + up * Max (f  set xs ∪ {0})"
using a 2 by blast
also have "up * Max (f  set xs ∪ {0}) ≤ up * Max (insert (f x) (f  set xs))"
by simp
finally  show ?case
by auto
next
assume a: "f x = l" "¬ (∃y∈set xs. l ≤ f y)"
have "lsteps (x # xs) f l up left + l * up = lsteps xs f l up left + f x * up + left"
using a by (auto simp add: lsteps.simps)
also have "lsteps xs f l up left = 0"
using a by (subst lsteps_level_greater_fun_image) auto
also have "f x * up ≤ up * Max (insert (f x) (f  set xs))"
by simp
finally show ?case
by simp
next
assume a: "f x < l"
then have "lsteps (x # xs) f l up left = lsteps xs f l up left"
also have "… + l * up ≤ left * length (x # xs) + up * Max (insert 0 (f  set xs))"
using a 2 by auto
also have "Max (insert 0 (f  set xs)) ≤ Max (f  set (x # xs) ∪ {0})"
by simp
finally show ?case
by simp
next
assume "f x > l"
then show ?case
using 2 by (subst lsteps.simps) auto
qed
qed

lemma steps_smaller_card_Max_fun':
assumes "finite A" "∃x∈A. l ≤ f x"
shows   "steps A f l up left + l * up ≤ left * card A + up * Max⇩0 (f  A)"
proof -
let ?xs = "rev (sorted_list_of_set A)"
have "steps A f l up left  = lsteps (rev (sorted_list_of_set A)) f l up left"
using steps_lsteps by blast
also have "… + l * up ≤ left * length ?xs + up * Max (f  set ?xs ∪ {0})"
using assms by (intro lsteps_smaller_card_Max_fun') auto
also have "left * length ?xs = left * card A"
using assms sorted_list_of_set_length by (auto)
also have "set ?xs = A"
using assms by (auto)
finally show ?thesis
by simp
qed

lemma lsteps_height:
assumes  "∃x ∈ set xs. l ≤ f x"
shows "lsteps xs f l up 0 + up * l = up * Max⇩0 (f  (set xs))"
using assms proof (induction xs f l up "0::nat" rule: lsteps.induct)
case (2 x xs f l up)
consider "l = f x" "∃y∈set xs. l ≤ f y" | "f x = l" "¬ (∃y∈set xs. l ≤ f y)" |
"f x < l" | "l < f x"
by fastforce
then show ?case
proof cases
assume 0: "l = f x" "∃y∈set xs. l ≤ f y"
then have 1: "set xs ≠ {}"
using 2 by auto
then have "∃xa∈set xs. f x ≤ f xa"
using 0 2 by force
then have "f x ≤ Max (f  set xs)"
using 0 2 by (subst Max_ge_iff) auto
then have "max (f x) (Max (f  set xs)) = (Max (f  set xs))"
using 0 2 by (auto intro!: simp add: max_def)
then show ?case
using 0 1 2 by (subst lsteps.simps) (auto)
next
assume 0: "f x = l" "¬ (∃y∈set xs. l ≤ f y)"
then have "Max (insert l (f  set xs)) = l"
by (intro Max_eqI) (auto)
moreover have "lsteps xs f l up 0 = 0"
using 0 by (subst lsteps_level_greater_fun_image) auto
ultimately show ?case
using 0 by (subst lsteps.simps) auto
next
assume 0: "f x < l"
then have 1: "set xs ≠ {}"
using 2 by auto
then have "∃xa∈set xs. f x ≤ f xa"
using 0 2 by force
then have " f x ≤ Max (f  set xs)"
using 0 2 by (subst Max_ge_iff) auto
then have "max (f x) (Max (f  set xs)) = Max (f  set xs)"
using 0 2 by (auto intro!: simp add: max_def)
then show ?case
using 0 1 2 by (subst lsteps.simps) (auto)
next
assume "f x > l"
then show ?case
using 2 by (subst lsteps.simps) auto
qed
qed (simp)

lemma steps_height:
assumes "finite A"
shows   "steps A f 0 up 0 = up * Max⇩0 (f  A)"
proof -
have "steps A f 0 up 0 = lsteps (rev (sorted_list_of_set A)) f 0 up 0 + up * 0"
by (subst steps_lsteps) simp
also have "… = up * Max (f  A ∪ {0})" if "A ≠ {}"
using assms that by (subst lsteps_height) auto
finally show ?thesis
using assms by (cases "A = {}") (auto)
qed

context random_skip_list
begin

text ‹
We can now define the pmf describing the length of the search path in a skip list.
Like the height it only depends on the number of elements in the skip list's underlying set.
›

definition R where "R A u l = map_pmf (λf. steps A f 0 u l) (SL A)"
definition R⇩N :: "nat ⇒ nat ⇒ nat ⇒ nat pmf" where "R⇩N n u l = R {..<n} u l"

lemma R⇩N_alt_def: "R⇩N n u l = map_pmf (λf. steps {..<n} f 0 u l) (SL⇩N n)"
unfolding SL⇩N_def R⇩N_def R_def by simp

begin

lemma R_R⇩N:
assumes "finite A" "p ∈ {0..1}"
shows "R A u l = R⇩N (card A) u l"
proof -
let ?steps = "λA f. steps A f 0 u l"
let ?f' = "bij_mono_map_set_to_nat A"
have "R A u l = SL A ⤜ (λf. return_pmf (?steps A f))"
unfolding R_def map_pmf_def by simp
also have "… = SL⇩N (card A) ⤜ (λf. return_pmf (?steps A (f ∘ ?f')))"
proof -
have "?f' x ∉ {..<card A}" if "x ∉ A" for x
using that unfolding bij_mono_map_set_to_nat_def by (auto)
then show ?thesis
using assms bij_mono_map_set_to_nat unfolding SL_def SL⇩N_def
by (subst Pi_pmf_bij_betw[of _ ?f' "{..<card A}"])
qed
also have "… = SL⇩N (card A) ⤜ (λf. return_pmf (?steps {..<card A} f))"
using assms bij_mono_map_set_to_nat bij_betw_def by (subst steps_image) (fastforce)+
finally show ?thesis
unfolding R⇩N_def R_def SL⇩N_def SL_def by (simp add: map_pmf_def)
qed

text ‹
@{const R⇩N} fulfills a recurrence relation. If we move up or to the left the remaining'' length of the
search path is again a slightly different probability distribution over the length.
›

lemma R⇩N_recurrence:
assumes "0 < n" "p ∈ {0<..1}"
shows   "R⇩N n u l =
do {
b ← bernoulli_pmf p;
if b then               ― ‹leftwards›
map_pmf (λn. n + l) (R⇩N (n - 1) u l)
else do {               ― ‹upwards›
m ← binomial_pmf (n - 1) (1 - p);
map_pmf (λn. n + u) (R⇩N (m + 1) u l)
}
}"
proof -
define B where "B = (λb. insert (n-1) {x ∈ {..<n - 1}. ¬ b x})"
have "R⇩N n u l = map_pmf (λf. steps {..<n} f 0 u l) (SL⇩N n)"
by (auto simp add: R⇩N_def R_def SL⇩N_def)
also have "… = map_pmf (λf. steps {..<n} f 0 u l)
(map_pmf (λ(y, f). f(n-1 := y)) (pair_pmf (geometric_pmf p) (SL⇩N (n - 1))))"
proof -
have "{..<n} = insert (n - Suc 0) {..<n - 1}"
using assms by force
then have "(Pi_pmf {..<n} 0 (λ_. geometric_pmf p)) =
map_pmf (λ(y, f). f(n - 1 := y)) (pair_pmf (geometric_pmf p)
(Pi_pmf {..<n-1} 0 (λ_. geometric_pmf p)))"
using assms
by (subst Pi_pmf_insert[of "{..<n-1}" "n-1" 0 "λ_. geometric_pmf p", symmetric])  (auto)
then show ?thesis
qed
also have "… =
do { g ← geometric_pmf p;
f ← SL⇩N (n - 1);
return_pmf (steps {..<n} (f(n - 1 := g)) 0 u l)}"
by (simp add: case_prod_beta map_pmf_def pair_pmf_def)
also have "… =
do { b ←  bernoulli_pmf p;
g ← if b then return_pmf 0 else map_pmf Suc (geometric_pmf p);
f ← SL⇩N (n - 1);
return_pmf (steps {..<n} (f(n - 1 := g)) 0 u l)}"
using assms by (subst geometric_bind_pmf_unfold) (auto)
also have "… =
do { b ← bernoulli_pmf p;
if b
then do { g ← return_pmf 0;
f ← SL⇩N (n - 1);
return_pmf (steps {..<n} (f(n - 1 := g)) 0 u l) }
else do { g ← map_pmf Suc (geometric_pmf p);
f ← SL⇩N (n - 1);
return_pmf (steps {..<n} (f(n - 1 := g)) 0 u l) }}"
by (subst bind_pmf_if') (auto)
also have "do { g ← return_pmf 0;
f ← SL⇩N (n - 1);
return_pmf (steps {..<n} (f(n - 1 := g)) 0 u l) }  =
do { f ← SL⇩N (n - 1);
return_pmf (steps {..<n} (f(n - 1 := 0)) 0 u l) }"
by (subst bind_return_pmf) auto
also have "… = map_pmf (λn. n + l) (map_pmf (λf. steps {..<n - 1} f 0 u l) (SL⇩N (n - 1)))"
proof -
have I: "{..<n} - {n - Suc 0} = {..<n - Suc 0}"
by fastforce
have "Max {..<n} = n - Suc 0"
using assms by (intro Max_eqI) (auto)
then have "steps {..<n} (f(n - 1 := 0)) 0 u l = l + steps {..<n - 1} f 0 u l" for f
using assms by (subst steps.simps) (auto intro!: steps_cong simp add: I simp add: Let_def)
then show ?thesis
qed
also have "… = map_pmf (λn. n + l) (R⇩N (n - 1) u l)"
unfolding R⇩N_def R_def SL⇩N_def by simp
also have "map_pmf Suc (geometric_pmf p) ⤜
(λg. SL⇩N (n - 1) ⤜
(λf. return_pmf (steps {..<n} (f(n - 1 := g)) 0 u l)))
=
Pi_pmf {..<n - 1} True (λ_. bernoulli_pmf p) ⤜
(λb. map_pmf Suc (geometric_pmf p) ⤜
(λg. Pi_pmf {x ∈ {..<n - 1}. ¬ b x} 0 (λ_. map_pmf Suc (geometric_pmf p)) ⤜
(λf. return_pmf (steps {..<n} (f(n - 1 := g)) 0 u l))))"
using assms unfolding SL⇩N_def SL_def by (subst Pi_pmf_geometric_filter) (auto)
also have "… =
do {
b ← Pi_pmf {..<n - 1} True (λ_. bernoulli_pmf p);
f ← Pi_pmf (insert (n-1) {x ∈ {..<n - 1}. ¬ b x}) 0 (λ_. map_pmf Suc (geometric_pmf p));
return_pmf (steps {..<n} f 0 u l)}" (is "_ = ?rhs")
using assms by (subst Pi_pmf_insert') (auto)
also have "… =
do {
b ← Pi_pmf {..<n - 1} True (λ_. bernoulli_pmf p);
f ← Pi_pmf (B b) 1 (λ_. map_pmf Suc (geometric_pmf p));
return_pmf (steps {..<n} (λx. if x ∈ (B b) then f x else 0) 0 u l)}"
by (subst Pi_pmf_default_swap[symmetric, of _ _ _ 1]) (auto simp add: map_pmf_def B_def)
also have "… =
do {
b ← Pi_pmf {..<n - 1} True (λ_. bernoulli_pmf p);
f ← SL (B b);
return_pmf (steps {..<n} (λx. if x ∈ (B b) then Suc (f x) else 0) 0 u l)}"
proof -
have *: "(Suc ∘ f) x = Suc (f x)" for x and f::"nat ⇒ nat"
by simp
have "(λf. return_pmf (steps {..<n} (λx. if x ∈ B b then (Suc ∘ f) x else 0) 0 u l)) =
(λf. return_pmf (steps {..<n} (λx. if x ∈ B b then Suc (f x) else 0) 0 u l))" for b
by (subst *) (simp)
then show ?thesis
by (subst Pi_pmf_map[of _ _ 0]) (auto simp add: map_pmf_def B_def SL_def)
qed
also have "… =
do {
b ← Pi_pmf {..<n - 1} True (λ_. bernoulli_pmf p);
r ← R (B b) u l;
return_pmf (u + r)}"
proof -
have "steps {..<n} (λx. if x ∈ B b then Suc (f x) else 0) 0 u l = u + steps (B b) f 0 u l"
for f b
proof -
have "Max {..<n} = n - 1"
using assms by (intro Max_eqI) auto
then have "steps {..<n} (λx. if x ∈ B b then Suc (f x) else 0) 0 u l =
u + (steps {..<n} (λx. if x ∈ (B b) then Suc (f x) else 0) 1 u l)"
unfolding B_def using assms by (subst steps.simps) (auto simp add: Let_def)
also have "steps {..<n} (λx. if x ∈ (B b) then Suc (f x) else 0) 1 u l =
steps (B b) (λx. if x ∈ (B b) then Suc (f x) else 0) 1 u l"
proof -
have "{x ∈ {..<n}. 1 ≤ (if x ∈ B b then Suc (f x) else 0)} = B b"
using assms unfolding B_def by force
then show ?thesis
by (subst steps_smaller_set[of _ 1]) auto
qed
also have "… = steps (B b) (λx. f x + 1) 1 u l"
by (rule steps_cong) (auto)
also have "… = steps (B b) f 0 u l"
by (subst (2) steps_f_add'[of _ _ _ _ _ 1]) simp
finally show ?thesis
by auto
qed
then show ?thesis
qed
also have "… = do {
b ← Pi_pmf {..<n - 1} False (λ_. bernoulli_pmf (1 - p));
let m = 1 + card {x. x < n - 1 ∧ b x};
r ← R {..<m} u l;
return_pmf (u + r)}"
proof -
have *: "card (insert (n - Suc 0) {x. x < n - 1 ∧ b x}) =
(Suc (card {x. x < n - 1 ∧ b x}))" for b
using assms by (auto simp add: card_insert_if)
have "Pi_pmf {..<n - 1} True (λ_. bernoulli_pmf p) =
Pi_pmf {..<n - 1} True (λ_. map_pmf Not (bernoulli_pmf (1 - p)))"
using assms by (subst bernoulli_pmf_Not) auto
also have "… = map_pmf ((∘) Not) (Pi_pmf {..<n - 1}  False (λ_. bernoulli_pmf (1 - p)))"
using assms by (subst Pi_pmf_map[of _ _ False]) auto
finally show ?thesis
unfolding B_def using assms *
by (subst R_R⇩N) (auto simp add: R_R⇩N map_pmf_def)
qed
also have "… = binomial_pmf (n - 1) (1 - p) ⤜ (λm. map_pmf (λn. n + u) (R⇩N (m + 1) u l))"
using assms
by (subst binomial_pmf_altdef'[where A = "{..<n - 1}" and dflt = "False"])
(auto simp add: R⇩N_def R_def SL_def map_pmf_def ac_simps)
finally show ?thesis
by simp
qed

end (* context includes monad_normalisation *)

text ‹
The expected height and length of search path defined as non-negative integral. It's easier
to prove the recurrence relation of the expected length of the search path using non-negative
integrals.
›

definition NH⇩N where "NH⇩N n = nn_integral (H⇩N n) real"
definition NR⇩N where "NR⇩N n u l = nn_integral (R⇩N n u l) real"

lemma NH⇩N_EH⇩N:
assumes "p ∈ {0<..<1}"
shows "NH⇩N n = EH⇩N n"
using assms EH⇩N_bounds unfolding EH⇩N_def NH⇩N_def by (subst nn_integral_eq_integral) (auto)

lemma R⇩N_0 [simp]: "R⇩N 0 u l = return_pmf 0"
unfolding R⇩N_def R_def SL_def by (auto simp add: steps.simps)

lemma NR⇩N_bounds:
fixes u l::nat
shows "NR⇩N n u l ≤ l * n + u * NH⇩N n"
proof -
have "NR⇩N n u l = ∫⇧+ x. x ∂measure_pmf (R⇩N n u l)"
unfolding NR⇩N_def R⇩N_alt_def
also have "… ≤ ∫⇧+ x. x ∂(measure_pmf (map_pmf (λf. l * n + u * Max⇩0 (f  {..<n})) (SL⇩N n)))"
using of_nat_mono[OF steps_smaller_card_Max_fun'[of "{..<n}" 0 _ u l]] unfolding R⇩N_alt_def
by (cases "n = 0") (auto intro!: nn_integral_mono)
also have "… = l * n + u * NH⇩N n"
unfolding NH⇩N_def H⇩N_def H_def SL⇩N_def
finally show "NR⇩N n u l ≤ l * n + u * NH⇩N n"
by simp
qed

lemma NR⇩N_recurrence:
assumes "0 < n" "p ∈ {0<..<1}"
shows "NR⇩N n u l = (p * (l + NR⇩N (n - 1) u l) +
q * (u + (∑k<n - 1. NR⇩N (k + 1) u l * (pmf (binomial_pmf (n - 1) q) k))))
/ (1 - (q ^ n))"
proof -
define B where "B = (λn k. pmf (binomial_pmf n q) k)"
have q: "q ∈ {0<..<1}"
using assms unfolding q_def by auto
then have "q ^ n < 1"
using assms power_Suc_less_one by (induction n) (auto)
then have qn: "q ^ n ∈ {0<..<1}"
using assms q by (auto)
have "NR⇩N n u l = p * (l + NR⇩N (n - 1) u l) +
q * (u + ∫⇧+ k. NR⇩N (k + 1) u l  ∂measure_pmf (binomial_pmf (n - 1) q))"
using assms unfolding NR⇩N_def
by(subst R⇩N_recurrence)
also have "(∫⇧+ m. NR⇩N (m + 1) u l  ∂measure_pmf (binomial_pmf (n - 1) q)) =
(∑k≤n - 1. NR⇩N (k + 1) u l * B (n - 1) k)"
using assms unfolding B_def q_def
also have "… = (∑k∈{..<n - 1} ∪ {n - 1}. NR⇩N (k + 1) u l * B (n - 1) k)"
by (rule sum.cong) (auto)
also have "… = (∑k<n - 1. NR⇩N (k + 1) u l * B (n - 1) k) + NR⇩N n u l * q ^ (n - 1)"
unfolding B_def q_def using assms by (subst sum.union_disjoint) (auto)
finally have "NR⇩N n u l = p * (l + NR⇩N (n - 1) u l) +
q * ((∑k<n - 1. NR⇩N (k + 1) u l * B (n - 1) k) + u) +
NR⇩N n u l * (q ^ (n - 1)) * q"
using assms by (auto simp add: field_simps numerals)
also have "NR⇩N n u l * (q ^ (n - 1)) * q = (q ^ n) * NR⇩N n u l"
using q power_minus_mult[of _ q] assms
by (subst mult_ac, subst ennreal_mult[symmetric], auto simp add: mult_ac)
finally have 1: "NR⇩N n u l = p * (l + NR⇩N (n - 1) u l) +
q * (u + (∑k<n - 1. NR⇩N (k + 1) u l * (B (n - 1) k))) +
(q ^ n) * NR⇩N n u l "
have "x - z = y" if "x = y + z" "z ≠ ⊤" for x y z::ennreal
using that by (subst that) (auto)
have "NR⇩N n u l ≤ l * n + u * NH⇩N n"
using NR⇩N_bounds by (auto simp add: ennreal_of_nat_eq_real_of_nat)
also have "NH⇩N n = EH⇩N n"
using assms NH⇩N_EH⇩N by auto
also have "(l * n) + u * ennreal (EH⇩N n) < ⊤"
`