Theory Frequency_Moments.Frequency_Moments_Preliminary_Results

section ‹Preliminary Results›

theory Frequency_Moments_Preliminary_Results
  imports
    "HOL.Transcendental"
    "HOL-Computational_Algebra.Primes"
    "HOL-Library.Extended_Real"
    "HOL-Library.Multiset"
    "HOL-Library.Sublist"
    Prefix_Free_Code_Combinators.Prefix_Free_Code_Combinators
    Bertrands_Postulate.Bertrand
    Expander_Graphs.Expander_Graphs_Multiset_Extras
begin

text ‹This section contains various preliminary results.›

lemma card_ordered_pairs:
  fixes M :: "('a ::linorder) set"
  assumes "finite M"
  shows "2 * card {(x,y) ∈ M × M. x < y} = card M * (card M - 1)"
proof -
  have a: "finite (M × M)" using assms by simp

  have inj_swap: "inj (λx. (snd x, fst x))"
    by (rule inj_onI, simp add: prod_eq_iff)

  have "2 * card {(x,y) ∈ M × M. x < y} =
    card {(x,y) ∈ M × M. x < y} + card ((λx. (snd x, fst x))`{(x,y) ∈ M × M. x < y})"
    by (simp add: card_image[OF inj_on_subset[OF inj_swap]])
  also have "... = card {(x,y) ∈ M × M. x < y} + card {(x,y) ∈ M × M. y < x}"
    by (auto intro: arg_cong[where f="card"] simp add:set_eq_iff image_iff)
  also have "... = card ({(x,y) ∈ M × M. x < y} ∪ {(x,y) ∈ M × M. y < x})"
    by (intro card_Un_disjoint[symmetric] a finite_subset[where B="M × M"] subsetI) auto
  also have "... = card ((M × M) - {(x,y) ∈ M × M. x = y})"
    by (auto intro: arg_cong[where f="card"] simp add:set_eq_iff)
  also have "... = card (M × M) - card {(x,y) ∈ M × M. x = y}"
    by (intro card_Diff_subset a finite_subset[where B="M × M"] subsetI) auto
  also have "... = card M ^ 2 - card ((λx. (x,x)) ` M)"
    using assms
    by (intro arg_cong2[where f="(-)"] arg_cong[where f="card"])
      (auto simp:power2_eq_square set_eq_iff image_iff)
  also have "... = card M ^ 2 - card M"
    by (intro arg_cong2[where f="(-)"] card_image inj_onI, auto)
  also have "... = card M * (card M - 1)"
    by (cases "card M ≥ 0", auto simp:power2_eq_square algebra_simps)
  finally show ?thesis by simp
qed

lemma ereal_mono: "x ≤ y ⟹ ereal x ≤ ereal y"
  by simp

lemma abs_ge_iff: "((x::real) ≤ abs y) = (x ≤ y ∨ x ≤ -y)"
  by linarith

lemma count_list_gr_1:
  "(x ∈ set xs) = (count_list xs x ≥ 1)"
  by (induction xs, simp, simp)

lemma count_list_append: "count_list (xs@ys) v = count_list xs v + count_list ys v"
  by (induction xs, simp, simp)

lemma count_list_lt_suffix:
  assumes "suffix a b"
  assumes "x ∈ {b ! i| i. i <  length b - length a}"
  shows  "count_list a x < count_list b x"
proof -
  have "length a ≤ length b" using assms(1)
    by (simp add: suffix_length_le)
  hence "x ∈ set (nths b {i. i < length b - length a})"
    using assms diff_commute by (auto simp add:set_nths)
  hence a:"x ∈ set (take (length b - length a) b)"
    by (subst (asm) lessThan_def[symmetric], simp)
  have "b = (take (length b - length a) b)@drop (length b - length a) b"
    by simp
  also have "... = (take (length b - length a) b)@a"
    using assms(1) suffix_take by auto
  finally have b:"b = (take (length b - length a) b)@a" by simp

  have "count_list a x < 1 + count_list a x" by simp
  also have "... ≤ count_list (take (length b - length a) b) x + count_list a x"
    using a count_list_gr_1
    by (intro add_mono, fast, simp)
  also have "... = count_list b x"
    using b count_list_append by metis
  finally show ?thesis by simp
qed

lemma suffix_drop_drop:
  assumes "x ≥ y"
  shows "suffix (drop x a) (drop y a)"
proof -
  have "drop y a = take (x - y) (drop y a)@drop (x- y) (drop y a)"
    by (subst append_take_drop_id, simp)
  also have " ... = take (x-y) (drop y a)@drop x a"
    using assms by simp
  finally have "drop y a = take (x-y) (drop y a)@drop x a" by simp
  thus ?thesis
    by (auto simp add:suffix_def)
qed

lemma count_list_card: "count_list xs x = card {k. k < length xs ∧ xs ! k = x}"
proof -
  have "count_list xs x = length (filter ((=) x) xs)"
    by (induction xs, simp, simp)
  also have "... = card {k. k < length xs ∧ xs ! k = x}"
    by (subst length_filter_conv_card, metis)
  finally show ?thesis by simp
qed

lemma card_gr_1_iff:
  assumes "finite S"  "x ∈ S"  "y ∈ S"  "x ≠ y"
  shows "card S > 1"
  using assms card_le_Suc0_iff_eq leI by auto

lemma count_list_ge_2_iff:
  assumes "y < z"
  assumes "z < length xs"
  assumes "xs ! y = xs ! z"
  shows "count_list xs (xs ! y) > 1"
proof -
  have " 1 < card {k. k < length xs ∧ xs ! k = xs ! y}"
    using assms by (intro card_gr_1_iff[where x="y" and y="z"], auto)

  thus ?thesis
    by (simp add: count_list_card)
qed

text ‹Results about multisets and sorting›

lemmas disj_induct_mset = disj_induct_mset

lemma prod_mset_conv:
  fixes f :: "'a ⇒ 'b::{comm_monoid_mult}"
  shows "prod_mset (image_mset f A) = prod (λx. f x^(count A x)) (set_mset A)"
proof (induction A rule: disj_induct_mset)
  case 1
  then show ?case by simp
next
  case (2 n M x)
  moreover have "count M x = 0" using 2 by (simp add: count_eq_zero_iff)
  moreover have "⋀y. y ∈ set_mset M ⟹ y ≠ x" using 2 by blast
  ultimately show ?case by (simp add:algebra_simps)
qed

text ‹There is a version @{thm [source] sum_list_map_eq_sum_count} but it doesn't work
if the function maps into the reals.›

lemma sum_list_eval:
  fixes f :: "'a ⇒ 'b::{ring,semiring_1}"
  shows "sum_list (map f xs) = (∑x ∈ set xs. of_nat (count_list xs x) * f x)"
proof -
  define M where "M = mset xs"
  have "sum_mset (image_mset f M) = (∑x ∈ set_mset M. of_nat (count M x) * f x)"
  proof (induction "M" rule:disj_induct_mset)
    case 1
    then show ?case by simp
  next
    case (2 n M x)
    have a:"⋀y. y ∈ set_mset M ⟹ y ≠ x" using 2(2) by blast
    show ?case using 2 by (simp add:a  count_eq_zero_iff[symmetric])
  qed
  moreover have "⋀x. count_list xs x = count (mset xs) x"
    by (induction xs, simp, simp)
  ultimately show ?thesis
    by (simp add:M_def sum_mset_sum_list[symmetric])
qed

lemma prod_list_eval:
  fixes f :: "'a ⇒ 'b::{ring,semiring_1,comm_monoid_mult}"
  shows "prod_list (map f xs) = (∏x ∈ set xs. (f x)^(count_list xs x))"
proof -
  define M where "M = mset xs"
  have "prod_mset (image_mset f M) = (∏x ∈ set_mset M. f x ^ (count M x))"
  proof (induction "M" rule:disj_induct_mset)
    case 1
    then show ?case by simp
  next
    case (2 n M x)
    have a:"⋀y. y ∈ set_mset M ⟹ y ≠ x" using 2(2) by blast
    have b:"count M x = 0" using 2 by (subst  count_eq_zero_iff) blast
    show ?case using 2  by (simp add:a b mult.commute)
  qed
  moreover have "⋀x. count_list xs x = count (mset xs) x"
    by (induction xs, simp, simp)
  ultimately show ?thesis
    by (simp add:M_def prod_mset_prod_list[symmetric])
qed

lemma sorted_sorted_list_of_multiset: "sorted (sorted_list_of_multiset M)"
  by (induction M, auto simp:sorted_insort)

lemma count_mset: "count (mset xs) a = count_list xs a"
  by (induction xs, auto)

lemma swap_filter_image: "filter_mset g (image_mset f A) = image_mset f (filter_mset (g ∘ f) A)"
  by (induction A, auto)

lemma list_eq_iff:
  assumes "mset xs = mset ys"
  assumes "sorted xs"
  assumes "sorted ys"
  shows "xs = ys"
  using assms properties_for_sort by blast

lemma sorted_list_of_multiset_image_commute:
  assumes "mono f"
  shows "sorted_list_of_multiset (image_mset f M) = map f (sorted_list_of_multiset M)"
proof -
  have "sorted (sorted_list_of_multiset (image_mset f M))"
    by (simp add:sorted_sorted_list_of_multiset)
  moreover have " sorted_wrt (λx y. f x ≤ f y) (sorted_list_of_multiset M)"
    by (rule sorted_wrt_mono_rel[where P="λx y. x ≤ y"])
      (auto intro: monoD[OF assms] sorted_sorted_list_of_multiset)
  hence "sorted (map f (sorted_list_of_multiset M))"
    by (subst sorted_wrt_map)
  ultimately show ?thesis
    by (intro list_eq_iff, auto)
qed

text ‹Results about rounding and floating point numbers›

lemma round_down_ge:
  "x ≤ round_down prec x + 2 powr (-prec)"
  using round_down_correct by (simp, meson diff_diff_eq diff_eq_diff_less_eq)

lemma truncate_down_ge:
  "x ≤ truncate_down prec x + abs x * 2 powr (-prec)"
proof (cases "abs x > 0")
  case True
  have "x ≤ round_down (int prec - ⌊log 2 ¦x¦⌋) x + 2 powr (-real_of_int(int prec - ⌊log 2 ¦x¦⌋))"
    by (rule round_down_ge)
  also have "... ≤ truncate_down prec x + 2 powr ( ⌊log 2 ¦x¦⌋) * 2 powr (-real prec)"
    by (rule add_mono, simp_all add:powr_add[symmetric] truncate_down_def)
  also have "... ≤ truncate_down prec x + ¦x¦ * 2 powr (-real prec)"
    using True
    by (intro add_mono mult_right_mono, simp_all add:le_log_iff[symmetric])
  finally show ?thesis by simp
next
  case False
  then show ?thesis by simp
qed

lemma truncate_down_pos:
  assumes "x ≥ 0"
  shows "x * (1 - 2 powr (-prec)) ≤ truncate_down prec x"
  by (simp add:right_diff_distrib diff_le_eq)
   (metis truncate_down_ge assms  abs_of_nonneg)

lemma truncate_down_eq:
  assumes "truncate_down r x = truncate_down r y"
  shows "abs (x-y) ≤ max (abs x) (abs y) * 2 powr (-real r)"
proof -
  have "x - y ≤ truncate_down r x + abs x * 2 powr (-real r) - y"
    by (rule diff_right_mono, rule truncate_down_ge)
  also have "... ≤ y + abs x * 2 powr (-real r) - y"
    using truncate_down_le
    by (intro diff_right_mono add_mono, subst assms(1), simp_all)
  also have "... ≤ abs x * 2 powr (-real r)" by simp
  also have "... ≤ max (abs x) (abs y) * 2 powr (-real r)" by simp
  finally have a:"x - y ≤ max (abs x) (abs y) * 2 powr (-real r)" by simp

  have "y - x ≤ truncate_down r y + abs y * 2 powr (-real r) - x"
    by (rule diff_right_mono, rule truncate_down_ge)
  also have "... ≤ x + abs y * 2 powr (-real r) - x"
    using truncate_down_le
    by (intro diff_right_mono add_mono, subst assms(1)[symmetric], auto)
  also have "... ≤ abs y * 2 powr (-real r)" by simp
  also have "... ≤ max (abs x) (abs y) * 2 powr (-real r)" by simp
  finally have b:"y - x ≤ max (abs x) (abs y) * 2 powr (-real r)" by simp

  show ?thesis
    using abs_le_iff a b by linarith
qed

definition rat_of_float :: "float ⇒ rat" where
  "rat_of_float f = of_int (mantissa f) *
    (if exponent f ≥ 0 then 2 ^ (nat (exponent f)) else 1 / 2 ^ (nat (-exponent f)))"

lemma real_of_rat_of_float: "real_of_rat (rat_of_float x) = real_of_float x"
proof -
  have "real_of_rat (rat_of_float x) = mantissa x * (2 powr (exponent x))"
    by (simp add:rat_of_float_def of_rat_mult of_rat_divide of_rat_power powr_realpow[symmetric] powr_minus_divide)
  also have "... = real_of_float x"
    using mantissa_exponent by simp
  finally show ?thesis by simp
qed

lemma log_est: "log 2 (real n + 1) ≤ n"
proof -
  have "1 + real n = real (n + 1)"
    by simp
  also have "... ≤ real (2 ^ n)"
    by (intro of_nat_mono suc_n_le_2_pow_n)
  also have "... = 2 powr (real n)"
    by (simp add:powr_realpow)
  finally have "1 + real n ≤ 2 powr (real n)"
    by simp
  thus ?thesis
    by (simp add: Transcendental.log_le_iff)
qed

lemma truncate_mantissa_bound:
  "abs (⌊x * 2 powr (real r - real_of_int ⌊log 2 ¦x¦⌋)⌋) ≤ 2 ^ (r+1)" (is "?lhs ≤ _")
proof -
  define q where "q = ⌊x * 2 powr (real r - real_of_int (⌊log 2 ¦x¦⌋))⌋"

  have "¦q¦ ≤ 2 ^ (r + 1)" if a:"x > 0"
  proof -
    have "¦q¦ = q"
      using a by (intro abs_of_nonneg, simp add:q_def)
    also have "... ≤ x * 2 powr (real r - real_of_int ⌊log 2 ¦x¦⌋)"
      unfolding q_def using of_int_floor_le by blast
    also have "... = x * 2 powr real_of_int (int r - ⌊log 2 ¦x¦⌋)"
      by auto
    also have "... = 2 powr (log 2 x + real_of_int (int r - ⌊log 2 ¦x¦⌋))"
      using a by (simp add:powr_add)
    also have "... ≤ 2 powr (real r + 1)"
      using a by (intro powr_mono, linarith+)
    also have "... = 2 ^ (r+1)"
      by (subst powr_realpow[symmetric], simp_all add:add.commute)
    finally show "¦q¦ ≤ 2 ^ (r+1)"
      by (metis of_int_le_iff of_int_numeral of_int_power)
  qed

  moreover have "¦q¦ ≤ (2 ^ (r + 1))" if a: "x < 0"
  proof -
    have "-(2 ^ (r+1) + 1) = -(2 powr (real r + 1)+1)"
      by (simp add: powr_add powr_realpow)
    also have  "... < -(2 powr (log 2 (- x) + (r - ⌊log 2 ¦x¦⌋)) + 1)"
      using a by (simp, linarith)
    also have "... = x * 2 powr (r - ⌊log 2 ¦x¦⌋) - 1"
      using a by (simp add:powr_add)
    also have "... ≤ q"
      by (simp add:q_def)
    also have "... = - ¦q¦"
      using a by (simp add: q_def mult_less_0_iff abs_if)
    finally have "-(2 ^ (r+1)+1) < - ¦q¦" using of_int_less_iff by fastforce
    hence "-(2 ^ (r+1)) ≤ - ¦q¦" by linarith
    thus "¦q¦ ≤ 2^(r+1)" by linarith
  qed

  moreover have "x = 0 ⟹ ¦q¦ ≤ 2^(r+1)"
    by (simp add:q_def)
  ultimately have "¦q¦ ≤ 2^(r+1)"
    by fastforce
  thus ?thesis using q_def by blast
qed

lemma truncate_float_bit_count:
  "bit_count (F⇩_e (float_of (truncate_down r x))) ≤ 10 + 4 * real r + 2*log 2 (2 + ¦log 2 ¦x¦¦)"
  (is "?lhs ≤ ?rhs")
proof -
  define m where "m = ⌊x * 2 powr (real r - real_of_int ⌊log 2 ¦x¦⌋)⌋"
  define e where "e = ⌊log 2 ¦x¦⌋ - int r"

  have a: "(real_of_int ⌊log 2 ¦x¦⌋ - real r) = e"
    by (simp add:e_def)
  have "abs m + 2 ≤ 2 ^ (r + 1) + 2^1"
    using truncate_mantissa_bound
    by (intro add_mono, simp_all add:m_def)
  also have "... ≤ 2 ^ (r+2)"
    by simp
  finally have b:"abs m + 2 ≤ 2 ^ (r+2)" by simp
  hence "real_of_int (¦m¦ + 2) ≤ real_of_int (4 * 2 ^ r)"
    by (subst of_int_le_iff, simp)
  hence "¦real_of_int m¦ + 2 ≤ 4 * 2 ^ r"
    by simp
  hence c:"log 2 (real_of_int (¦m¦ + 2)) ≤ r+2"
    by (simp add: Transcendental.log_le_iff powr_add powr_realpow)

  have "real_of_int (abs e + 1) ≤ real_of_int ¦⌊log 2 ¦x¦⌋¦ +  real_of_int r + 1"
    by (simp add:e_def)
  also have "... ≤ 1 + abs (log 2 (abs x)) + real_of_int r + 1"
    by (simp add:abs_le_iff, linarith)
  also have "... ≤ (real_of_int r+ 1) * (2 + abs (log 2 (abs x)))"
    by (simp add:distrib_left distrib_right)
  finally have d:"real_of_int (abs e + 1) ≤ (real_of_int r+ 1) * (2 + abs (log 2 (abs x)))" by simp

  have "log 2 (real_of_int (abs e + 1)) ≤ log 2 (real_of_int r + 1) + log 2 (2 + abs (log 2 (abs x)))"
    using d by (simp flip: log_mult_pos)
  also have "... ≤ r + log 2 (2 + abs (log 2 (abs x)))"
    using log_est by (intro add_mono, simp_all add:add.commute)
  finally have e: "log 2 (real_of_int (abs e + 1)) ≤ r + log 2 (2 + abs (log 2 (abs x)))" by simp

  have "?lhs =  bit_count (F⇩_e (float_of (real_of_int m * 2 powr real_of_int e)))"
    by (simp add:truncate_down_def round_down_def m_def[symmetric] a)
  also have "... ≤ ereal (6 + (2 * log 2 (real_of_int (¦m¦ + 2)) + 2 * log 2 (real_of_int (¦e¦ + 1))))"
    using float_bit_count_2 by simp
  also have "... ≤ ereal (6 + (2 * real (r+2) + 2 * (r + log 2 (2 + abs (log 2 (abs x))))))"
    using c e
    by (subst ereal_less_eq, intro add_mono mult_left_mono, linarith+)
  also have "... = ?rhs" by simp
  finally show ?thesis by simp
qed

definition prime_above :: "nat ⇒ nat"
  where "prime_above n = (SOME x. x ∈ {n..(2*n+2)} ∧ prime x)"

text ‹The term @{term"prime_above n"} returns a prime between @{term "n::nat"} and @{term "2*(n::nat)+2"}.
Because of Bertrand's postulate there always is such a value. In a refinement of the algorithms, it may make sense to
replace this with an algorithm, that finds such a prime exactly or approximately.

The definition is intentionally inexact, to allow refinement with various algorithms, without modifying the
high-level mathematical correctness proof.›

lemma ex_subset:
  assumes "∃x ∈ A. P x"
  assumes "A ⊆ B"
  shows "∃x ∈ B. P x"
  using assms by auto

lemma
  shows prime_above_prime: "prime (prime_above n)"
  and prime_above_range: "prime_above n ∈ {n..(2*n+2)}"
proof -
  define r where "r = (λx. x ∈ {n..(2*n+2)} ∧ prime x)"
  have "∃x. r x"
  proof (cases "n>2")
    case True
    hence "n-1 > 1" by simp
    hence "∃x ∈ {(n-1)<..<(2*(n-1))}. prime x"
      using bertrand by simp
    moreover have "{n - 1<..<2 * (n - 1)} ⊆ {n..2 * n + 2}"
      by (intro subsetI, auto)
    ultimately have "∃x ∈ {n..(2*n+2)}. prime x"
      by (rule ex_subset)
    then show ?thesis by (simp add:r_def Bex_def)
  next
    case False
    hence "2 ∈ {n..(2*n+2)}"
      by simp
    moreover have "prime (2::nat)"
      using two_is_prime_nat by blast
    ultimately have "r 2"
      using r_def by simp
    then show ?thesis by (rule exI)
  qed
  moreover have "prime_above n = (SOME x. r x)"
    by (simp add:prime_above_def r_def)
  ultimately have a:"r (prime_above n)"
    using someI_ex by metis
  show "prime (prime_above n)"
    using a unfolding r_def by blast
  show "prime_above n ∈ {n..(2*n+2)}"
    using a unfolding r_def by blast
qed

lemma prime_above_min:  "prime_above n ≥ 2"
  using prime_above_prime
  by (simp add: prime_ge_2_nat)

lemma prime_above_lower_bound: "prime_above n ≥ n"
  using prime_above_range
  by simp

lemma prime_above_upper_bound: "prime_above n ≤ 2*n+2"
  using prime_above_range
  by simp

end