Theory Source_Coding_Theorem

(* Title:       One Part of Shannon's Source Coding Theorem
   Author:      Quentin Hibon <qh225@cl.cam.ac.uk>, Lawrence Paulson <lp15@cam.ac.uk>, 2014
   Maintainer:  Quentin Hibon <qh225@cl.cam.ac.uk>
*)

theory Source_Coding_Theorem
imports "HOL-Probability.Information"
begin
section‹Basic types›

type_synonym bit = bool
type_synonym bword = "bit list"
type_synonym letter = nat
type_synonym 'b word = "'b list"

type_synonym 'b encoder = "'b word  bword"
type_synonym 'b decoder = "bword  'b word option"

section‹Locale for the source coding theorem›
locale source_code = information_space +
  fixes fi :: "'b  real"
  fixes X :: "'a  'b"

  assumes distr_i: "simple_distributed M X fi"
  assumes b_val: "b = 2"

  fixes enc::"'b encoder"
  fixes dec::"'b decoder"
  assumes real_code:
  "dec (enc x) = Some x"
  "enc w = []  w = []"
  "x  []  enc x = enc [hd x] @ enc (tl x)"

section‹Source coding theorem, direct: the entropy is a lower bound of the code rate›
context source_code
begin
subsection‹The letter set›

definition L :: "'b set" where
  "L  X ` space M"

lemma fin_L: "finite L"
    using L_def distr_i
    by auto

lemma emp_L: "L  {}"
    using L_def subprob_not_empty
    by auto

subsection‹Codes and words›

abbreviation real_word :: "'b word  bool" where
  "real_word w  (set w  L)"

abbreviation k_words :: "nat  ('b word) set" where
  "k_words k  {w. length w = k  real_word w}"

lemma rw_tail:
  assumes "real_word w"
shows "w = []  real_word (tl w)"
    by (meson assms list.set_sel(2) subset_code(1))

definition code_word_length :: "'e encoder  'e  nat" where
  "code_word_length e l = length (e [l])"

abbreviation cw_len :: "'b  nat" where
  "cw_len l  code_word_length enc l"

definition code_rate :: "'e encoder  ('a  'e)  real" where
  "code_rate e Xo = expectation (λa. (code_word_length e ((Xo) a)))"

lemma fi_pos: "i L  0  fi i"
    using simple_distributed_nonneg[OF distr_i] L_def by auto

lemma (in prob_space) simp_exp_composed:
  assumes X: "simple_distributed M X Px"
shows "expectation (λa. f (X a)) = (x  X`space M. f x * Px x)"
  using distributed_integral[OF simple_distributed[OF X], of f]
    simple_distributed_nonneg[OF X]
    lebesgue_integral_count_space_finite[OF simple_distributed_finite[OF X], of "λx. f x * Px x"]
  by (simp add:  ac_simps)

lemma cr_rw:
  "code_rate enc X = (i  X ` space M. fi i * cw_len i)"
    using simp_exp_composed[OF distr_i, of "cw_len"]
    by (simp add: mult.commute code_rate_def)

abbreviation cw_len_concat :: "'b word  nat" where
  "cw_len_concat w  foldr (λx s. (cw_len x) + s) w 0"

lemma cw_len_length: "cw_len_concat w = length (enc w)"
proof (induction w)
    case Nil
    show ?case using real_code by simp
    case (Cons a w)
    have "cw_len_concat (a # w) = cw_len a + cw_len_concat w" by simp
    thus ?case using code_word_length_def real_code Cons
      by (metis length_append list.distinct(1) list.sel(1) list.sel(3))
qed

lemma maj_fold:
  assumes "l. lL  f l  bound"
  assumes "real_word w"
shows "foldr (λx s. f x + s) w 0  length w * bound"
    using assms
    by(induction w) (simp,fastforce)

definition max_len :: "nat" where
  "max_len = Max ((λx. cw_len x) ` L)"

lemma max_cw:
  "l  L  cw_len l  max_len"
    by (simp add: max_len_def fin_L)

subsection‹Related to the Kraft theorem›
definition 𝒦 :: "real" where
  "𝒦 = (iL. 1 / b ^ (cw_len i))"

lemma pos_cw_len: "0 < 1 / b ^ cw_len i" using b_gt_1 by simp

lemma 𝒦_pos: "0 < 𝒦"
    using emp_L fin_L pos_cw_len sum_pos 𝒦_def
    by metis

lemma 𝒦_pow: "𝒦 = (iL. 1 / b powr cw_len i)"
    using powr_realpow b_gt_1
    by (simp add: 𝒦_def)

lemma k_words_rel:
  "k_words (Suc k) = {w. (hd w  L  tl w  k_words k  w  [])}"
proof
    fix k
    show "k_words (Suc k)  {w. (hd w  L  tl w  k_words k  w  [] )}" (is "?l  ?r")
  proof
      fix w
      assume w_kw: "w  k_words (Suc k)"
      hence "real_word w" by simp
      hence "hd w  L"
        by (metis (mono_tags) w_kw hd_in_set list.size(3) mem_Collect_eq nat.distinct(1) subset_code(1))
      moreover have "length w = Suc k" using w_kw by simp
      moreover hence "w  []" by auto
      moreover have "real_word (tl w)" using real_word w calculation(3) rw_tail by auto
      ultimately show "w  ?r" using w_kw by simp
  qed
next
    fix k
    show "k_words (Suc k)  {w. (hd w  L  tl w  k_words k  w  [])}"
  proof
      fix w
      assume asm: "w  {w. hd w  L  tl w  {w. length w = k  real_word w}  w  []}"
      hence "hd w  L  length (tl w) = k  real_word (tl w)" by simp
      hence "real_word w"
        by (metis empty_iff insert_subset list.collapse list.set(1) set_simps(2) subsetI)
      moreover hence "length w = Suc k" using asm by auto
      ultimately show "w  k_words (Suc k)" by simp
  qed
qed

lemma bij_k_words:
shows "bij_betw (λwi. Cons (fst wi) (snd wi)) (L × k_words k) (k_words (Suc k))"
    unfolding bij_betw_def
proof
    fix k
    let ?f = "(λwi. Cons (fst wi) (snd wi))"
    let ?S = "L × (k_words k)"
    let ?T = "k_words (Suc k)"
    show "inj_on ?f ?S" by (simp add: inj_on_def)
    show "?f`?S = ?T"
  proof (rule ccontr)
      assume "?f ` ?S  ?T"
      hence "w. w ?T  w  ?f`?S" by auto
      then obtain w where asm: "w ?T  w  ?f`?S" by blast
      hence "w = ?f (hd w,tl w)" using k_words_rel by simp
      moreover have "(hd w,tl w)  ?S" using k_words_rel asm by simp
      ultimately have "w  ?f`?S" by blast
      thus "False" using asm by simp
  qed
qed

lemma finite_k_words: "finite (k_words k)"
proof (induct k)
    case 0
    show ?case by simp
    case (Suc n)
    thus ?case using bij_k_words bij_betw_finite fin_L by blast
qed

lemma cartesian_product:
  fixes f::"('c  real)"
  fixes g::"('d  real)"
  assumes "finite A"
  assumes "finite B"
shows "(bB. g b) * (aA. f a) = (abA×B. f (fst ab) * g (snd ab))"
    using bilinear_times bilinear_sum[where h="(λx y. x * y)" and f="f" and g="g"] assms
    by (metis (erased, lifting) sum.cong split_beta' Groups.ab_semigroup_mult_class.mult.commute)

lemma 𝒦_power:
shows "𝒦^k = (w  (k_words k). 1 / b^(cw_len_concat w))"
proof (induct k)
    case 0
    have "k_words 0 = {[]}" by auto
    thus ?case by simp
next
    case (Suc n)
    have " 𝒦 ^Suc n = 𝒦 ^n * 𝒦 " by simp
    also have " = (w  k_words n. 1 / b^cw_len_concat w) * (iL. 1 / b^cw_len i)"
      using Suc.hyps 𝒦_def by auto
    also have " = (wi  L × k_words n. 1/b^cw_len (fst wi) * (1 / b^cw_len_concat (snd wi)))"
      using fin_L finite_k_words cartesian_product
      by blast
    also have " = (wi  L × k_words n. 1 / b^(cw_len_concat (snd wi) + cw_len (fst wi)))"
      by (metis (no_types, lifting) power_add add.commute power_one_over)
    also have " = (wi  L × k_words n. 1 / b^cw_len_concat (fst wi # snd wi))"
      by (metis (erased, lifting) add.commute comp_apply foldr.simps(2))
    also have " = (w  (k_words (Suc n)). 1 / b^(cw_len_concat w))"
      using sum.reindex_bij_betw [OF bij_k_words] by fastforce
    finally show ?case by simp
qed

lemma bound_len_concat:
shows "w  k_words k  cw_len_concat w  k * max_len"
    using max_cw maj_fold by blast

subsection‹Inequality of the kraft sum (source coding theorem, direct)›
subsubsection‹Sum manipulation lemmas and McMillan theorem›

lemma sum_vimage_proof:
  fixes g::"nat  real"
  assumes "w. f w < bd"
shows "finite S  (wS. g (f w)) = ( m=0..<bd. (card ((f-`{m})  S) )* g m)"
(is "_  _ = ( m=0..<bd. ?ff m S)")
proof (induct S rule: finite_induct)
    case empty
    show ?case by simp
next
    case (insert x F)
    let ?rr = "(m = 0..<bd. ?ff m (insert x F))"
    have "(f x)  {0..<bd}" using assms by simp
    hence "h::(nat  real). (m=0..<bd. h m) = (y({0..<bd} - {f x}).h y) + h (f x)"
      by (metis diff_add_cancel finite_atLeastLessThan sum_diff1)
    moreover hence
    "(m = 0..<bd. ?ff m (insert x F))
    = (m{0..<bd} - {f x}. ?ff m (insert x F)) + card (f -` {f x}  F) * g (f x) + g (f x)"
      by (simp add: semiring_normalization_rules(2), simp add: insert)
    ultimately have "(m = 0..<bd. ?ff m (insert x F)) = (m{0..<bd}. ?ff m F) + g (f x)"
      by fastforce
    thus ?case using insert by simp
qed

lemma sum_vimage:
  fixes g::"nat  real"
  assumes bounded: "w. w  S  f w < bd" and "0 < bd"
  assumes finite: "finite S"
shows "(wS. g (f w)) = ( m=0..<bd. (card ((f-`{m})  S) ) * g m)"
(is "?s1 = ?s2")
proof -
    let ?ff = "(λx. if xS then f x else 0)"
    let ?ss1 = "(wS. g (?ff w))"
    let ?ss2 = "( m=0..<bd. (card ((?ff-`{m})  S) ) * g m)"
    have "?s1 =?ss1" by simp
    moreover have"m. ?ff -`{m}  S = f-`{m}  S" by auto
    moreover hence "?s2 = ?ss2" by simp
    moreover have "w . ?ff w < bd" using assms by simp
    moreover hence "?ss1 = ?ss2" using sum_vimage_proof[of "?ff"] finite by blast
    ultimately show "?s1 = ?s2" by metis
qed

lemma 𝒦_rw:
  "(w  (k_words k). 1 / b^(cw_len_concat w)) = (m=0..<Suc (k*max_len). card (k_words k 
((cw_len_concat) -` {m})) * (1 / b^m))" (is "?L = ?R")
proof -
    have "w. w  k_words k  cw_len_concat w < Suc ( k * max_len)"
      by (simp add: bound_len_concat le_imp_less_Suc)
    moreover have
    "?R = (m = 0..<Suc (k * max_len).
  (card (cw_len_concat -` {m}  k_words k)) * (1 / b ^ m))"
      by (metis Int_commute)
    moreover have "0 < Suc (k*max_len)" by simp
    ultimately show ?thesis
      using finite_k_words
    sum_vimage[where f="cw_len_concat" and g = "λi. 1/ (b^i)"]
      by fastforce
qed

definition set_of_k_words_length_m :: "nat  nat  'b word set" where
  "set_of_k_words_length_m k m = {xk. xk  k_words k}  (cw_len_concat)-`{m}"

lemma am_inj_code: "inj_on enc ((cw_len_concat)-`{m})" (is "inj_on _ ?s")
  using inj_on_def[of enc "?s"] real_code
  by (metis option.inject)

lemma img_inc: "enc`cw_len_concat-`{m}  {bl. length bl = m}" using cw_len_length by auto

lemma bool_lists_card: "card {bl::bool list. length bl = m} = b^m"
 using card_lists_length_eq[of "UNIV::bool set"]
  by (simp add: b_val)

lemma bool_list_fin: "finite {bl::bool list. length bl = m}"
  using finite_lists_length_eq[of "UNIV::bool set"]
  by (simp add: b_val)

lemma set_of_k_words_bound:
shows "card (set_of_k_words_length_m k m)  b^m" (is "?c  ?b")
proof -
    have card_w_len_m_bound: "card (cw_len_concat-`{m})  b^m"
      by (metis (no_types, lifting) am_inj_code bool_list_fin bool_lists_card card_image card_mono
    img_inc of_nat_le_iff)
    have "set_of_k_words_length_m k m  (cw_len_concat)-`{m}"
      by (simp add: set_of_k_words_length_m_def)
    hence "card (set_of_k_words_length_m k m)  card ((cw_len_concat)-`{m})"
      by (metis (no_types, lifting) am_inj_code bool_list_fin card.infinite card_0_eq
    card_image card_mono empty_iff finite_subset img_inc inf_img_fin_dom)
    thus ?thesis using card_w_len_m_bound by simp
qed

lemma empty_set_k_words:
  assumes "0 < k"
shows "set_of_k_words_length_m k 0 = {}"
proof(rule ccontr)
    assume "¬ set_of_k_words_length_m k 0 = {}"
    hence "x. x  set_of_k_words_length_m k 0" by auto
    then obtain x where x_def: "x  set_of_k_words_length_m k 0" by auto
    hence "x  []" unfolding set_of_k_words_length_m_def using assms by auto
    moreover have "cw_len_concat (hd x#tl x) = cw_len_concat (tl x) + cw_len (hd x)"
      by (metis add.commute comp_apply foldr.simps(2))
    moreover have "enc [(hd x)]  []" using assms real_code by blast
    moreover hence "0 < cw_len (hd x)" unfolding code_word_length_def by simp
    ultimately have "x  set_of_k_words_length_m k 0" by (simp add:set_of_k_words_length_m_def)
    thus "False" using x_def by simp
qed

lemma 𝒦_rw2:
  assumes "0 < k"
shows "(m=0..<Suc (k * max_len). card (set_of_k_words_length_m k m)/ b^m)  (k * max_len)"
proof -
    have
    "(m=1..<Suc (k * max_len). card (set_of_k_words_length_m k m) / b^m)
     (m=1..<Suc(k * max_len). b^m / b^m)"
      using set_of_k_words_bound b_val
    Groups_Big.sum_mono[of "{1..<Suc(k * max_len)}"
    "(λm. (card (set_of_k_words_length_m k m))/b^m)" "λm. b^m /b^m"]
      by simp
    moreover have"(m=1..<Suc(k * max_len). b^m / b^m) = (m=1..<Suc(k *max_len). 1)"
      using b_gt_1 by simp
    moreover have "(m=1..<Suc(k * max_len). 1) = (k * max_len)"
      by simp
    ultimately have
    "(m = 1..<Suc (k * max_len). card (set_of_k_words_length_m k m) / b ^ m)  k * max_len"
      by (metis One_nat_def card_atLeastLessThan card_eq_sum diff_Suc_Suc real_of_card)
    thus ?thesis using empty_set_k_words assms
      by (simp add: sum_shift_lb_Suc0_0_upt split: if_split_asm)
qed

lemma 𝒦_power_bound :
  assumes "0 < k"
shows " 𝒦^k  k * max_len"
    using assms 𝒦_power 𝒦_rw 𝒦_rw2
    by (simp add: set_of_k_words_length_m_def)

theorem McMillan :
shows "𝒦  1"
proof -
    have ineq: "k. 0 < k  𝒦  root k k * root k max_len"
      using 𝒦_pos 𝒦_power_bound
      by (metis (no_types, opaque_lifting) not_less of_nat_0_le_iff of_nat_mult power_strict_mono real_root_mult real_root_pos_pos_le real_root_pos_unique real_root_power)
    hence "0 < max_len  (λk. root k k * root k max_len)  1"
      by (auto intro!: tendsto_eq_intros LIMSEQ_root LIMSEQ_root_const)
    moreover have "n1. 𝒦  root n n * root n max_len"
      using ineq by simp
    moreover have "max_len = 0  𝒦  1" using ineq by fastforce
    ultimately show " 𝒦  1" using LIMSEQ_le_const by blast
qed

lemma entropy_rw: "ℋ(X) = -(i  L. fi i * log b (fi i))"
    using entropy_simple_distributed[OF distr_i]
    by (simp add: L_def)

subsubsection‹Technical lemmas about the logarithm›
lemma log_mult_ext3:
  "0  x  0 < y  0 < z  x * log b (x*y*z) = x * log b (x*y) + x * log b z"
    by(cases "x=0")(simp add: log_mult_eq abs_of_pos distrib_left less_eq_real_def)+

lemma log_mult_ext2:
  "0  x  0 < y  x * log b (x*y) = x * log b x + x * log b y"
    using log_mult_ext3[where y=1] by simp

subsubsection ‹KL divergence and properties›
definition KL_div ::"'b set  ('b  real)  ('b  real)  real" where
  "KL_div S a d = ( i  S. a i * log b (a i / d i))"

lemma KL_div_mul:
  assumes "0 < d" "d  1"
  assumes "i. iS  0  a i"
  assumes "i. iS  0 < e i"
shows "KL_div S a e  KL_div S a (λi. e i / d)"
    unfolding KL_div_def
proof -
    {
    fix i
    assume "iS"
    hence "a i / (e i / d)  a i / e i" using assms
      by (metis (no_types) div_by_1 frac_le less_imp_triv not_less)
    hence "log b (a i / (e i / d))  log b (a i / e i)" using assms(1)
      by (metis (full_types) b_gt_1 divide_divide_eq_left inverse_divide le_less_linear log_le
    log_neg_const order_refl times_divide_eq_right zero_less_mult_iff)
    }
    thus "(iS. a i * log b (a i / (e i / d)))  (iS. a i * log b (a i / e i))"
      by (meson mult_left_mono assms sum_mono)
qed

lemma KL_div_pos:
  fixes a e::"'b  real"
  assumes fin: "finite S"
  assumes nemp: "S  {}"
  assumes non_null: "i. iS  0 < a i" "i. i S  0 < e i"
  assumes sum_a_one: "( i  S. a i) = 1"
  assumes sum_c_one: "( i  S. e i) = 1"
shows "0  KL_div S a e"
    unfolding KL_div_def
proof -
    let ?f = "λi. e i / a i"
    have f_pos: "i. iS  0 < ?f i"
      using non_null
      by simp
    have a_pos: "i. i S  0  a i"
      using non_null
      by (simp add: order.strict_implies_order)
    have "- log b (iS. a i * e i / a i)  (iS. a i * - log b (e i / a i))"
      using convex_on_sum[OF fin nemp  minus_log_convex[OF b_gt_1] convex_real_interval(3)
                             sum_a_one a_pos, of "λi. e i / a i"] f_pos by simp
    also have "-log b (iS. a i * e i / a i) = -log b (iS. e i)"
  proof -
      from non_null(1) have "i. i  S  a i * e i / a i = e i" by force
      thus ?thesis by simp
  qed
    finally have "0  (iS. a i * - log b (e i / a i))"
      by (simp add: sum_c_one)
    thus "0  (iS. a i * log b (a i / e i))"
      using b_gt_1 log_divide non_null
      by simp
qed

lemma KL_div_pos_emp:
  "0  KL_div {} a e" by (simp add: KL_div_def)

lemma KL_div_pos_gen:
  fixes a d::"'b  real"
  assumes fin: "finite S"
  assumes non_null: "i. iS  0 < a i" "i. i S  0 < d i"
  assumes sum_a_one: "( i  S. a i) = 1"
  assumes sum_d_one: "( i  S. d i) = 1"
shows "0  KL_div S a d"
    using KL_div_pos KL_div_pos_emp assms by metis

theorem KL_div_pos2:
  fixes a d::"'b  real"
  assumes fin: "finite S"
  assumes non_null: "i. iS  0  a i" "i. i S  0 < d i"
  assumes sum_a_one: "( i  S. a i) = 1"
  assumes sum_c_one: "( i  S. d i) = 1"
shows "0  KL_div S a d"
proof -
    have "S = (S  {i. 0 < a i})  (S  {i. 0 = a i})" using non_null(1) by fastforce
    moreover have "(S  {i. 0 < a i})  (S  {i. 0 = a i}) = {}" by auto
    ultimately have
    eq: "KL_div S a d = KL_div (S  {i. 0 < a i}) a d + KL_div (S  {i. 0 = a i}) a d"
      unfolding KL_div_def
      by (metis (mono_tags, lifting) fin finite_Un sum.union_disjoint)
    have "KL_div (S  {i. 0 = a i}) a d = 0" unfolding KL_div_def by simp
    hence "KL_div S a d = KL_div (S  {i. 0 < a i}) a d" using eq by simp
    moreover have "0  KL_div (S  {i. 0 < a i}) a d"
  proof(cases "(S  {i. 0 < a i}) = {}")
      case True
      thus ?thesis unfolding KL_div_def by simp
  next
      case False
      let ?c = "λi. d i / (j (S  {i. 0 < a i}). d j)"
      have 1: "(i. i  S  {i. 0 < a i}  0 < a i)" by simp
      have 2: "(i. i  S  {i. 0 < a i}  0 < ?c i)"
        by (metis False IntD1 divide_pos_pos fin finite_Int non_null(2) sum_pos)
      have 3: "(i (S  {i. 0 < a i}). a i) = 1"
        using sum.cong[of S, of S, of "(λx. if x  {i. 0 < a i} then a x else 0)", of a]
      sum.inter_restrict[OF fin, of a] non_null(1) sum_a_one
        by fastforce
      have "(iS  {j. 0 < a j}. ?c i) = (iS  {j. 0 < a j}. d i) / (iS  {j. 0 < a j}. d i)"
        by (metis sum_divide_distrib)
      hence 5: "(iS  {j. 0 < a j}. ?c i) = 1" using 2 False by force
      hence "0  KL_div (S  {j. 0 < a j}) a ?c"
        using KL_div_pos_gen[
      OF finite_Int[OF disjI1, of S, of "{j. 0 < a j}"], of a, of ?c
      ] 1 2 3
        by (metis fin)
      have fstdb: "0 < (iS  {i. 0 < a i}. d i)" using non_null(2) False
        by (metis Int_Collect fin finite_Int sum_pos)
      have 6: "0  KL_div (S  {i. 0 < a i}) a (λi. d i / (i(S  {i. 0 < a i}). d i))"
        using 2 3 5
        KL_div_pos_gen[
      OF finite_Int[OF disjI1, OF fin], of "{i. 0 < a i}", of "a", of "?c"
      ]
        by simp
      hence
      "KL_div (S  {j. 0 < a j}) a (λi. d i / (i(S  {i. 0 < a i}). d i))  KL_div (S  {j. 0 < a j}) a d"
        using non_null sum.inter_restrict[OF fin, of d, of "{i. 0 < a i}"]
        sum_mono[of S, of "(λx. if x  {i. 0 < a i} then d x else 0)", of d] non_null(2) sum_c_one
        non_null(2) fstdb KL_div_mul
        by force
      moreover have "0  KL_div (S  {j. 0 < a j}) a (λi. d i / (i(S  {i. 0 < a i}). d i))"
        using KL_div_pos_gen[ OF finite_Int[OF disjI1, OF fin]] using 2 3 5 by fastforce
      ultimately show "0  KL_div (S  {j. 0 < a j}) a d" by simp
  qed
    ultimately show ?thesis by simp
qed

lemma sum_div_1:
  fixes f::"'b  'c::field"
  assumes "(iA. f i)  0"
shows "(iA. f i / (jA. f j)) = 1"
    by (metis (no_types) assms right_inverse_eq sum_divide_distrib)

theorem rate_lower_bound:
shows "ℋ(X)  code_rate enc X"
proof -
    let ?cr = "code_rate enc X"
    let ?r = "(λi. 1 / ((b powr cw_len i) * 𝒦))"
    have pos_pi: "i. i  L  0  fi i" using fi_pos by simp
    {
    fix i
    assume "i  L"
    hence
    "fi i * (log b (1 / (1 / b powr (cw_len i))) + log b (fi i))
    = fi i * log b (fi i / (1 / b powr (cw_len i)))"
      using log_mult_ext2 [OF pos_pi, of i] b_gt_1
      by simp (simp add: algebra_simps)
    }
    hence eqpi:
    "i. i L  fi i * (log b (1 / (1 / b powr (cw_len i))) + log b (fi i))
    = fi i * log b (fi i / (1 / b powr (cw_len i)))"
      by simp
    have sum_one_L: "( i  L. fi i) = 1"
      using simple_distributed_sum_space[OF distr_i] by (simp add: L_def)
    {
    fix i
    assume "i  L"
    hence h1: "0  fi i" using pos_pi by blast
    have h2: "0 < 𝒦 / (1/b powr cw_len i)" using b_gt_1 𝒦_pos by auto
    have h3: "0 < 1 / 𝒦" using 𝒦_pos by simp
    have
    "fi i * log b (fi i * 𝒦 / (1/b powr cw_len i) * (1/ 𝒦)) =
    fi i * log b (fi i * 𝒦 / (1/b powr cw_len i)) + fi i * log b (1/ 𝒦)"
      using log_mult_ext3[OF h1 h2 h3]
      by (metis times_divide_eq_right)
    } hence big_eq:
    "i. i  L  fi i * log b (fi i * 𝒦 / (1/b powr cw_len i) * (1 / 𝒦)) =
    fi i * log b (fi i * 𝒦 / (1/b powr cw_len i)) + fi i * log b (1 / 𝒦)"
      by (simp add: inverse_eq_divide)
    have 1: "?cr - ℋ(X) = (i  L. fi i * cw_len i) + (i  L. fi i * log b (fi i))"
      using 𝒦_def entropy_rw cr_rw L_def by simp
    also have 2: "(iL. fi i * cw_len i) = (i  L. fi i * (-log b (1/(b powr (cw_len i)))))"
      using b_gt_1 log_divide by simp
    also have " = -1 * (i  L. fi i * (log b (1/(b powr (cw_len i)))))"
      using sum_distrib_left[of "-1" "(λi. fi i * (- 1 * log b (1 / b powr (cw_len i))))" L]
      by simp
    finally have
    "?cr - ℋ(X) = -(i  L. fi i * log b (1/b powr cw_len i)) + (i  L. fi i * log b (fi i))"
      by simp
    have "?cr - ℋ(X) = (i  L. fi i * ((log b (1/ (1/(b powr (cw_len i))))) + log b (fi i)))"
      using b_gt_1 1
      by (simp add: distrib_left sum.distrib)
    also have " = (i  L. fi i *((log b (fi i / (1/(b powr (cw_len i)))))))"
      using Finite_Cartesian_Product.sum_cong_aux[OF eqpi] by simp
    also from big_eq have
    " = (iL. fi i * (log b (fi i * 𝒦 / (1 / b powr (cw_len i))))) + (i  L. fi i) * log b (1/ 𝒦)"
      using 𝒦_pos
      by (simp add: sum_distrib_right sum.distrib)
    also have " = (iL. fi i * (log b (fi i * 𝒦 / (1 / b powr (cw_len i))))) - log b (𝒦)"
      using 𝒦_pos
      by (simp add: log_inverse_eq divide_inverse sum_one_L)
    also have " = ( i  L. fi i * log b (fi i / ?r i)) - log b (𝒦)"
      by (metis (mono_tags, opaque_lifting) divide_divide_eq_left divide_divide_eq_right)
    also have " = KL_div L fi ?r - log b ( 𝒦)"
      using b_gt_1 𝒦_pos log_inverse KL_div_def
      by simp
    also have " = KL_div L fi ?r + log b (1 / 𝒦)"
      using log_inverse b_val 𝒦_pos
      by (simp add: inverse_eq_divide)
    finally have code_ent_kl_log: "?cr - ℋ(X) = KL_div L fi ?r + log b (1 / 𝒦)" by simp
    have "(iL. ?r i) = 1"
      using sum_div_1[of "λi. 1 / (b powr (cw_len i))"] 𝒦_pos 𝒦_pow
      by simp
    moreover have "i. 0 < ?r i" using b_gt_1 𝒦_pos by simp
    moreover have "(iL. fi i) = 1" using sum_one_L by simp
    ultimately have "0  KL_div L fi ?r"
      using KL_div_pos2[OF fin_L fi_pos] by simp
    hence "log b (1 / 𝒦)  ?cr - ℋ(X)" using code_ent_kl_log by simp
    moreover from McMillan have "0  log b (1 / 𝒦)"
      using 𝒦_pos
      by (simp add: b_gt_1)
    ultimately show ?thesis by simp
qed

end

end