Theory Frequency_Moments_Preliminary_Results

section ‹Preliminary Results›

theory Frequency_Moments_Preliminary_Results
  imports 
    "HOL.Transcendental" 
    "HOL-Computational_Algebra.Primes"
    "HOL-Library.Extended_Real"
    "HOL-Library.Multiset"
    "HOL-Library.Sublist"
    Prefix_Free_Code_Combinators.Prefix_Free_Code_Combinators
    Bertrands_Postulate.Bertrand
begin

text ‹This section contains various preliminary results.›

lemma card_ordered_pairs:
  fixes M :: "('a ::linorder) set" 
  assumes "finite M"
  shows "2 * card {(x,y)  M × M. x < y} = card M * (card M - 1)"
proof -
  have a: "finite (M × M)" using assms by simp

  have inj_swap: "inj (λx. (snd x, fst x))"
    by (rule inj_onI, simp add: prod_eq_iff)

  have "2 * card {(x,y)  M × M. x < y} =
    card {(x,y)  M × M. x < y} + card ((λx. (snd x, fst x))`{(x,y)  M × M. x < y})"
    by (simp add: card_image[OF inj_on_subset[OF inj_swap]])
  also have "... = card {(x,y)  M × M. x < y} + card {(x,y)  M × M. y < x}"
    by (auto intro: arg_cong[where f="card"] simp add:set_eq_iff image_iff)
  also have "... = card ({(x,y)  M × M. x < y}  {(x,y)  M × M. y < x})"
    by (intro card_Un_disjoint[symmetric] a finite_subset[where B="M × M"] subsetI) auto
  also have "... = card ((M × M) - {(x,y)  M × M. x = y})"
    by (auto intro: arg_cong[where f="card"] simp add:set_eq_iff) 
  also have "... = card (M × M) - card {(x,y)  M × M. x = y}"
    by (intro card_Diff_subset a finite_subset[where B="M × M"] subsetI) auto
  also have "... = card M ^ 2 - card ((λx. (x,x)) ` M)"
    using assms
    by (intro arg_cong2[where f="(-)"] arg_cong[where f="card"])
      (auto simp:power2_eq_square set_eq_iff image_iff)
  also have "... = card M ^ 2 - card M"
    by (intro arg_cong2[where f="(-)"] card_image inj_onI, auto)
  also have "... = card M * (card M - 1)"
    by (cases "card M  0", auto simp:power2_eq_square algebra_simps)
  finally show ?thesis by simp
qed

lemma ereal_mono: "x  y  ereal x  ereal y"
  by simp

lemma log_mono: "a > 1  x  y  0 < x  log a x  log a y"
  by (subst log_le_cancel_iff, auto)

lemma abs_ge_iff: "((x::real)  abs y) = (x  y  x  -y)"
  by linarith

lemma count_list_gr_1:
  "(x  set xs) = (count_list xs x  1)"
  by (induction xs, simp, simp)

lemma count_list_append: "count_list (xs@ys) v = count_list xs v + count_list ys v"
  by (induction xs, simp, simp)

lemma count_list_lt_suffix:
  assumes "suffix a b"
  assumes "x  {b ! i| i. i <  length b - length a}"
  shows  "count_list a x < count_list b x"
proof -
  have "length a  length b" using assms(1) 
    by (simp add: suffix_length_le)
  hence "x  set (nths b {i. i < length b - length a})"
    using assms diff_commute by (auto simp add:set_nths) 
  hence a:"x  set (take (length b - length a) b)"
    by (subst (asm) lessThan_def[symmetric], simp)
  have "b = (take (length b - length a) b)@drop (length b - length a) b"
    by simp
  also have "... = (take (length b - length a) b)@a"
    using assms(1) suffix_take by auto 
  finally have b:"b = (take (length b - length a) b)@a" by simp

  have "count_list a x < 1 + count_list a x" by simp
  also have "...  count_list (take (length b - length a) b) x + count_list a x"
    using a count_list_gr_1
    by (intro add_mono, fast, simp)  
  also have "... = count_list b x"
    using b count_list_append by metis
  finally show ?thesis by simp
qed

lemma suffix_drop_drop:
  assumes "x  y"
  shows "suffix (drop x a) (drop y a)"
proof -
  have "drop y a = take (x - y) (drop y a)@drop (x- y) (drop y a)"
    by (subst append_take_drop_id, simp)
  also have " ... = take (x-y) (drop y a)@drop x a"
    using assms by simp
  finally have "drop y a = take (x-y) (drop y a)@drop x a" by simp
  thus ?thesis 
    by (auto simp add:suffix_def) 
qed

lemma count_list_card: "count_list xs x = card {k. k < length xs  xs ! k = x}"
proof -
  have "count_list xs x = length (filter ((=) x) xs)"
    by (induction xs, simp, simp)
  also have "... = card {k. k < length xs  xs ! k = x}"
    by (subst length_filter_conv_card, metis)
  finally show ?thesis by simp
qed

lemma card_gr_1_iff:
  assumes "finite S"  "x  S"  "y  S"  "x  y"
  shows "card S > 1"
  using assms card_le_Suc0_iff_eq leI by auto

lemma count_list_ge_2_iff:
  assumes "y < z"
  assumes "z < length xs"
  assumes "xs ! y = xs ! z"
  shows "count_list xs (xs ! y) > 1"
proof -
  have " 1 < card {k. k < length xs  xs ! k = xs ! y}"
    using assms by (intro card_gr_1_iff[where x="y" and y="z"], auto)

  thus ?thesis
    by (simp add: count_list_card)
qed

text ‹Results about multisets and sorting›

text ‹This is a induction scheme over the distinct elements of a multisets: 
We can represent each multiset as a sum like: 
@{text "replicate_mset n1 x1 + replicate_mset n2 x2 + ... + replicate_mset nk xk"} where the 
@{term "xi"} are distinct.›

lemma disj_induct_mset:
  assumes "P {#}"
  assumes "n M x. P M  ¬(x ∈# M)  n > 0  P (M + replicate_mset n x)"
  shows "P M"
proof (induction "size M" arbitrary: M rule:nat_less_induct)
  case 1
  show ?case
  proof (cases "M = {#}")
    case True
    then show ?thesis using assms by simp
  next
    case False
    then obtain x where x_def: "x ∈# M" using multiset_nonemptyE by auto
    define M1 where "M1 = M - replicate_mset (count M x) x"
    then have M_def: "M = M1 + replicate_mset (count M x) x"
      by (metis count_le_replicate_mset_subset_eq dual_order.refl subset_mset.diff_add)
    have "size M1 < size M"
      by (metis M_def x_def count_greater_zero_iff less_add_same_cancel1 size_replicate_mset size_union)
    hence "P M1" using 1 by blast
    then show "P M" 
      apply (subst M_def, rule assms(2), simp)
      by (simp add:M1_def x_def count_eq_zero_iff[symmetric])+
  qed
qed

lemma prod_mset_conv: 
  fixes f :: "'a  'b::{comm_monoid_mult}"
  shows "prod_mset (image_mset f A) = prod (λx. f x^(count A x)) (set_mset A)"
proof (induction A rule: disj_induct_mset)
  case 1
  then show ?case by simp
next
  case (2 n M x)
  moreover have "count M x = 0" using 2 by (simp add: count_eq_zero_iff)
  moreover have "y. y  set_mset M  y  x" using 2 by blast
  ultimately show ?case by (simp add:algebra_simps) 
qed

text ‹There is a version @{thm [source] sum_list_map_eq_sum_count} but it doesn't work
if the function maps into the reals.›

lemma sum_list_eval:
  fixes f :: "'a  'b::{ring,semiring_1}"
  shows "sum_list (map f xs) = (x  set xs. of_nat (count_list xs x) * f x)"
proof -
  define M where "M = mset xs"
  have "sum_mset (image_mset f M) = (x  set_mset M. of_nat (count M x) * f x)"
  proof (induction "M" rule:disj_induct_mset)
    case 1
    then show ?case by simp
  next
    case (2 n M x)
    have a:"y. y  set_mset M  y  x" using 2(2) by blast
    show ?case using 2 by (simp add:a  count_eq_zero_iff[symmetric])
  qed
  moreover have "x. count_list xs x = count (mset xs) x" 
    by (induction xs, simp, simp)
  ultimately show ?thesis
    by (simp add:M_def sum_mset_sum_list[symmetric])
qed

lemma prod_list_eval:
  fixes f :: "'a  'b::{ring,semiring_1,comm_monoid_mult}"
  shows "prod_list (map f xs) = (x  set xs. (f x)^(count_list xs x))"
proof -
  define M where "M = mset xs"
  have "prod_mset (image_mset f M) = (x  set_mset M. f x ^ (count M x))"
  proof (induction "M" rule:disj_induct_mset)
    case 1
    then show ?case by simp
  next
    case (2 n M x)
    have a:"y. y  set_mset M  y  x" using 2(2) by blast
    have b:"count M x = 0" using 2 by (subst  count_eq_zero_iff) blast 
    show ?case using 2  by (simp add:a b mult.commute)
  qed
  moreover have "x. count_list xs x = count (mset xs) x" 
    by (induction xs, simp, simp)
  ultimately show ?thesis
    by (simp add:M_def prod_mset_prod_list[symmetric])
qed

lemma sorted_sorted_list_of_multiset: "sorted (sorted_list_of_multiset M)"
  by (induction M, auto simp:sorted_insort) 

lemma count_mset: "count (mset xs) a = count_list xs a"
  by (induction xs, auto)

lemma swap_filter_image: "filter_mset g (image_mset f A) = image_mset f (filter_mset (g  f) A)"
  by (induction A, auto)

lemma list_eq_iff:
  assumes "mset xs = mset ys"
  assumes "sorted xs"
  assumes "sorted ys"
  shows "xs = ys" 
  using assms properties_for_sort by blast

lemma sorted_list_of_multiset_image_commute:
  assumes "mono f"
  shows "sorted_list_of_multiset (image_mset f M) = map f (sorted_list_of_multiset M)"
proof -
  have "sorted (sorted_list_of_multiset (image_mset f M))" 
    by (simp add:sorted_sorted_list_of_multiset)
  moreover have " sorted_wrt (λx y. f x  f y) (sorted_list_of_multiset M)"
    by (rule sorted_wrt_mono_rel[where P="λx y. x  y"]) 
      (auto intro: monoD[OF assms] sorted_sorted_list_of_multiset)
  hence "sorted (map f (sorted_list_of_multiset M))"
    by (subst sorted_wrt_map)
  ultimately show ?thesis
    by (intro list_eq_iff, auto)
qed

text ‹Results about rounding and floating point numbers›

lemma round_down_ge:
  "x  round_down prec x + 2 powr (-prec)"
  using round_down_correct by (simp, meson diff_diff_eq diff_eq_diff_less_eq)

lemma truncate_down_ge:
  "x  truncate_down prec x + abs x * 2 powr (-prec)"
proof (cases "abs x > 0")
  case True
  have "x  round_down (int prec - log 2 ¦x¦) x + 2 powr (-real_of_int(int prec - log 2 ¦x¦))"
    by (rule round_down_ge)
  also have "...  truncate_down prec x + 2 powr ( log 2 ¦x¦) * 2 powr (-real prec)"
    by (rule add_mono, simp_all add:powr_add[symmetric] truncate_down_def)
  also have "...  truncate_down prec x + ¦x¦ * 2 powr (-real prec)"
    using True
    by (intro add_mono mult_right_mono, simp_all add:le_log_iff[symmetric])
  finally show ?thesis by simp
next
  case False
  then show ?thesis by simp
qed

lemma truncate_down_pos:
  assumes "x  0"
  shows "x * (1 - 2 powr (-prec))  truncate_down prec x"
  by (simp add:right_diff_distrib diff_le_eq)
   (metis truncate_down_ge assms  abs_of_nonneg)

lemma truncate_down_eq:
  assumes "truncate_down r x = truncate_down r y"
  shows "abs (x-y)  max (abs x) (abs y) * 2 powr (-real r)"
proof - 
  have "x - y  truncate_down r x + abs x * 2 powr (-real r) - y"
    by (rule diff_right_mono, rule truncate_down_ge)
  also have "...  y + abs x * 2 powr (-real r) - y"
    using truncate_down_le
    by (intro diff_right_mono add_mono, subst assms(1), simp_all)
  also have "...  abs x * 2 powr (-real r)" by simp
  also have "...  max (abs x) (abs y) * 2 powr (-real r)" by simp
  finally have a:"x - y  max (abs x) (abs y) * 2 powr (-real r)" by simp

  have "y - x  truncate_down r y + abs y * 2 powr (-real r) - x"
    by (rule diff_right_mono, rule truncate_down_ge)
  also have "...  x + abs y * 2 powr (-real r) - x"
    using truncate_down_le
    by (intro diff_right_mono add_mono, subst assms(1)[symmetric], auto)
  also have "...  abs y * 2 powr (-real r)" by simp
  also have "...  max (abs x) (abs y) * 2 powr (-real r)" by simp
  finally have b:"y - x  max (abs x) (abs y) * 2 powr (-real r)" by simp

  show ?thesis
    using abs_le_iff a b by linarith
qed

definition rat_of_float :: "float  rat" where 
  "rat_of_float f = of_int (mantissa f) * 
    (if exponent f  0 then 2 ^ (nat (exponent f)) else 1 / 2 ^ (nat (-exponent f)))" 

lemma real_of_rat_of_float: "real_of_rat (rat_of_float x) = real_of_float x"
proof -
  have "real_of_rat (rat_of_float x) = mantissa x * (2 powr (exponent x))"
    by (simp add:rat_of_float_def of_rat_mult of_rat_divide of_rat_power powr_realpow[symmetric] powr_minus_divide)
  also have "... = real_of_float x"
    using mantissa_exponent by simp
  finally show ?thesis by simp 
qed

lemma log_est: "log 2 (real n + 1)  n"
proof -
  have "1 + real n = real (n + 1)"
    by simp
  also have "...  real (2 ^ n)"
    by (intro of_nat_mono suc_n_le_2_pow_n)
  also have "... = 2 powr (real n)"
    by (simp add:powr_realpow)
  finally have "1 + real n  2 powr (real n)"
    by simp
  thus ?thesis
    by (simp add: Transcendental.log_le_iff)
qed

lemma truncate_mantissa_bound:
  "abs (x * 2 powr (real r - real_of_int log 2 ¦x¦))  2 ^ (r+1)" (is "?lhs  _")
proof -
  define q where "q = x * 2 powr (real r - real_of_int (log 2 ¦x¦))"

  have "abs q  2 ^ (r + 1)" if a:"x > 0"
  proof -
    have "abs q = q"
      using a by (intro abs_of_nonneg, simp add:q_def)
    also have "...  x * 2 powr (real r - real_of_int log 2 ¦x¦)"
      unfolding q_def using of_int_floor_le by blast
    also have "... = x * 2 powr real_of_int (int r - log 2 ¦x¦)"
      by auto
    also have "... = 2 powr (log 2 x + real_of_int (int r - log 2 ¦x¦))"
      using a by (simp add:powr_add)
    also have "...  2 powr (real r + 1)"
      using a by (intro powr_mono, linarith+) 
    also have "... = 2 ^ (r+1)"
      by (subst powr_realpow[symmetric], simp_all add:add.commute)
    finally show "abs q  2 ^ (r+1)" 
      by (metis of_int_le_iff of_int_numeral of_int_power)
  qed
    
  moreover have "abs q  (2 ^ (r + 1))" if a: "x < 0"
  proof -
    have "-(2 ^ (r+1) + 1) = -(2 powr (real r + 1)+1)"
      by (subst powr_realpow[symmetric], simp_all add: add.commute)
    also have  "... < -(2 powr (log 2 (- x) + (r - log 2 ¦x¦)) + 1)"
      using a by (simp, linarith)
    also have "... = x * 2 powr (r - log 2 ¦x¦) - 1"
      using a by (simp add:powr_add)
    also have "...  q"
      by (simp add:q_def)
    also have "... = - abs q"
      using a
      by (subst abs_of_neg, simp_all add: mult_pos_neg2 q_def)
    finally have "-(2 ^ (r+1)+1) < - abs q" using of_int_less_iff by fastforce
    hence "-(2 ^ (r+1))  - abs q" by linarith
    thus "abs q  2^(r+1)" by linarith
  qed

  moreover have "x = 0  abs q  2^(r+1)"
    by (simp add:q_def)
  ultimately have "abs q  2^(r+1)"
    by fastforce
  thus ?thesis using q_def by blast
qed

lemma truncate_float_bit_count:
  "bit_count (Fe (float_of (truncate_down r x)))  10 + 4 * real r + 2*log 2 (2 + ¦log 2 ¦x¦¦)" 
  (is "?lhs  ?rhs")
proof -
  define m where "m = x * 2 powr (real r - real_of_int log 2 ¦x¦)"
  define e where "e = log 2 ¦x¦ - int r"

  have a: "(real_of_int log 2 ¦x¦ - real r) = e"
    by (simp add:e_def)
  have "abs m + 2  2 ^ (r + 1) + 2^1"
    using truncate_mantissa_bound
    by (intro add_mono, simp_all add:m_def)
  also have "...  2 ^ (r+2)"
    by simp
  finally have b:"abs m + 2  2 ^ (r+2)" by simp
  hence "real_of_int (¦m¦ + 2)  real_of_int (4 * 2 ^ r)" 
    by (subst of_int_le_iff, simp)
  hence "¦real_of_int m¦ + 2  4 * 2 ^ r" 
    by simp
  hence c:"log 2 (real_of_int (¦m¦ + 2))  r+2"
    by (simp add: Transcendental.log_le_iff powr_add powr_realpow)

  have "real_of_int (abs e + 1)  real_of_int ¦log 2 ¦x¦¦ +  real_of_int r + 1"
    by (simp add:e_def)
  also have "...  1 + abs (log 2 (abs x)) + real_of_int r + 1"
    by (simp add:abs_le_iff, linarith)
  also have "...  (real_of_int r+ 1) * (2 + abs (log 2 (abs x)))"
    by (simp add:distrib_left distrib_right)
  finally have d:"real_of_int (abs e + 1)  (real_of_int r+ 1) * (2 + abs (log 2 (abs x)))" by simp

  have "log 2 (real_of_int (abs e + 1))  log 2 (real_of_int r + 1) + log 2 (2 + abs (log 2 (abs x)))"
    using d by (simp add: log_mult[symmetric])
  also have "...  r + log 2 (2 + abs (log 2 (abs x)))"
    using log_est by (intro add_mono, simp_all add:add.commute)
  finally have e: "log 2 (real_of_int (abs e + 1))  r + log 2 (2 + abs (log 2 (abs x)))" by simp

  have "?lhs =  bit_count (Fe (float_of (real_of_int m * 2 powr real_of_int e)))"
    by (simp add:truncate_down_def round_down_def m_def[symmetric] a)
  also have "...  ereal (6 + (2 * log 2 (real_of_int (¦m¦ + 2)) + 2 * log 2 (real_of_int (¦e¦ + 1))))"
    using float_bit_count_2 by simp
  also have "...  ereal (6 + (2 * real (r+2) + 2 * (r + log 2 (2 + abs (log 2 (abs x))))))"
    using c e
    by (subst ereal_less_eq, intro add_mono mult_left_mono, linarith+) 
  also have "... = ?rhs" by simp
  finally show ?thesis by simp
qed

definition prime_above :: "nat  nat" 
  where "prime_above n = (SOME x. x  {n..(2*n+2)}  prime x)"

text ‹The term @{term"prime_above n"} returns a prime between @{term "n::nat"} and @{term "2*(n::nat)+2"}.
Because of Bertrand's postulate there always is such a value. In a refinement of the algorithms, it may make sense to
replace this with an algorithm, that finds such a prime exactly or approximately.

The definition is intentionally inexact, to allow refinement with various algorithms, without modifying the
high-level mathematical correctness proof.›

lemma ex_subset:
  assumes "x  A. P x"
  assumes "A  B"
  shows "x  B. P x"
  using assms by auto

lemma
  shows prime_above_prime: "prime (prime_above n)"
  and prime_above_range: "prime_above n  {n..(2*n+2)}"
proof -
  define r where "r = (λx. x  {n..(2*n+2)}  prime x)"
  have "x. r x"
  proof (cases "n>2")
    case True
    hence "n-1 > 1" by simp
    hence "x  {(n-1)<..<(2*(n-1))}. prime x"
      using bertrand by simp
    moreover have "{n - 1<..<2 * (n - 1)}  {n..2 * n + 2}"
      by (intro subsetI, auto) 
    ultimately have "x  {n..(2*n+2)}. prime x"
      by (rule ex_subset)
    then show ?thesis by (simp add:r_def Bex_def)
  next
    case False
    hence "2  {n..(2*n+2)}" 
      by simp
    moreover have "prime (2::nat)" 
      using two_is_prime_nat by blast
    ultimately have "r 2"
      using r_def by simp
    then show ?thesis by (rule exI)
  qed
  moreover have "prime_above n = (SOME x. r x)"
    by (simp add:prime_above_def r_def)
  ultimately have a:"r (prime_above n)"
    using someI_ex by metis
  show "prime (prime_above n)"
    using a unfolding r_def by blast
  show "prime_above n  {n..(2*n+2)}"
    using a unfolding r_def by blast
qed

lemma prime_above_min:  "prime_above n  2"
  using prime_above_prime 
  by (simp add: prime_ge_2_nat)

lemma prime_above_lower_bound: "prime_above n  n"
  using prime_above_range
  by simp

lemma prime_above_upper_bound: "prime_above n  2*n+2"
  using prime_above_range
  by simp

end