Theory Center_Selection

(* Author: Ujkan Sulejmani *)

section ‹Center Selection›

theory Center_Selection
  imports Complex_Main "HOL-Hoare.Hoare_Logic"
begin

text ‹The Center Selection (or metric k-center) problem. Given a set of \textit{sites} S›
in a metric space, find a subset C ⊆ S› that minimizes the maximal distance from any s ∈ S›
to some c ∈ C›. This theory presents a verified 2-approximation algorithm.
It is based on Section 11.2 in the book by Kleinberg and Tardos cite"KleinbergT06".
In contrast to the proof in the book, our proof is a standard invariant proof.›

locale Center_Selection =
  fixes S :: "('a :: metric_space) set"
    and k :: nat
  assumes finite_sites: "finite S"
  and     non_empty_sites: "S  {}"
and       non_zero_k: "k > 0"
begin

definition distance :: "('a::metric_space) set  ('a::metric_space)  real" where
"distance C s = Min (dist s ` C)"

definition radius :: "('a :: metric_space) set  real" where
"radius C = Max (distance C ` S)"

lemma distance_mono:
assumes "C1  C2" and "C1  {}" and "finite C2"
shows "distance C1 s  distance C2 s"
by (simp add: Min.subset_imp assms distance_def image_mono)

lemma finite_distances: "finite (distance C ` S)"
  using finite_sites by simp

lemma non_empty_distances: "distance C ` S  {}"
  using non_empty_sites by simp

lemma radius_contained: "radius C  distance C ` S"
  using finite_distances non_empty_distances Max_in radius_def by simp

lemma radius_def2: "s  S. distance C s = radius C"
  using radius_contained image_iff by metis

lemma dist_lemmas_aux:
  assumes  "finite C"
      and  "C  {}"
  shows  "finite (dist s ` C)"
    and "finite (dist s ` C)  distance C s  dist s ` C"
    and "distance C s  dist s ` C  c  C. dist s c = distance C s"
and "c  C. dist s c = distance C s  distance C s  0"
proof
  show "finite C" using assms(1) by simp
next
  assume "finite (dist s ` C)"
  then show "distance C s  dist s ` C" using distance_def eq_Min_iff assms(2) by blast
next
  assume "distance C s  dist s ` C" 
  then show "c  C. dist s c = distance C s" by auto
next
  assume "c  C. dist s c = distance C s"
  then show "distance C s  0" by (metis zero_le_dist)
qed

lemma dist_lemmas:
  assumes "finite C"
      and "C  {}"
  shows "finite (dist s ` C)"
    and "distance C s  dist s ` C"
    and "c  C. dist s c = distance C s"
    and "distance C s  0"
  using dist_lemmas_aux assms by auto

lemma radius_max_prop: "(s  S. distance C s  r)  (radius C  r)"
  by (metis image_iff radius_contained)

lemma dist_ins:
assumes "c1  C. c2  C. c1  c2  x < dist c1 c2"
and "distance C s > x"
and "finite C"
and "C  {}"
shows "c1  (C  {s}). c2  (C  {s}). c1  c2  x < dist c1 c2"
proof (rule+)
  fix c1 c2
  assume local_assms: "c1C  {s}" "c2C  {s}" "c1  c2"
  then have "c1  C   c2  C   c1 C   c2 {s}  c2C   c1  {s}  c1  {s}  c2 {s}" by auto
  then show "x < dist c1 c2"
  proof (elim disjE)
    assume "c1 C   c2C"
    then show ?thesis using assms(1) local_assms(3) by simp
  next
    assume case_assm: "c1  C  c2  {s}"
    have "x < distance C c2" using assms(2) case_assm by simp
    also have " ...  dist c2 c1"
      using Min.coboundedI distance_def assms(3,4) dist_lemmas(1, 2) case_assm by simp
    also have " ... = dist c1 c2" using dist_commute by metis
    finally show ?thesis .
  next
    assume case_assm: "c2  C  c1  {s}"
    have "x < distance C c1" using assms(2) case_assm by simp
    also have " ...  dist c1 c2"
      using Min.coboundedI distance_def assms(3,4) dist_lemmas(1, 2) case_assm by simp
    finally show ?thesis .
  next
    assume "c1  {s}  c2  {s}" 
    then have False using local_assms by simp
    then show ?thesis by simp
  qed
qed

subsection ‹A Preliminary Algorithm and Proof›

text ‹This subsection verifies an auxiliary algorithm by Kleinberg and Tardos.
Our proof of the main algorithm does not does not rely on this auxiliary algorithm at all
but we do reuse part off its invariant proof later on.›

definition inv :: "('a :: metric_space) set  ('a :: metric_space set)  real  bool" where
"inv S' C r =
  ((s  (S - S'). distance C s  2*r)  S'  S  C  S 
   (c  C. s  S'. S'  {}  dist c s > 2 * r)  (S' = S  C  {}) 
   (c1  C. c2  C. c1  c2  dist c1 c2 > 2 * r))" 

lemma inv_init: "inv S {} r"
  unfolding inv_def non_empty_sites by simp
lemma inv_step:
  assumes "S'  {}"
and IH: "inv S' C r"
defines[simp]: "s  (SOME s. s  S')"
shows "inv (S' - {s' . s'  S'  dist s s'  2*r}) (C  {s}) r"
proof -
  have s_def: "s  S'" using assms(1) some_in_eq by auto

  have "finite (C  {s})" using IH finite_subset[OF _ finite_sites] by (simp add: inv_def)

  moreover

  have "(s'  (S - (S' - {s' . s'  S'  dist s s'  2*r})). distance (C  {s}) s'  2*r)"
  proof 
    fix s''
    assume "s''  S - (S' - {s' . s'  S'  dist s s'  2*r})"
    then have "s''  S - S'  s''  {s' . s'  S'  dist s s'  2*r}" by simp
    then show "distance (C  {s}) s''  2 * r"
    proof (elim disjE)
      assume local_assm: "s''  S - S'"
      have "S' = S  C  {}" using IH by (simp add: inv_def)
      then show ?thesis
      proof (elim disjE)
        assume "S' = S"
        then have "s''  {}" using local_assm by simp
        then show ?thesis by simp
      next
        assume C_not_empty: "C  {}"
        have "finite C" using IH finite_subset[OF _ finite_sites] by (simp add: inv_def)
        then have "distance (C  {s}) s''  distance C s''"
          using distance_mono C_not_empty by (meson Un_upper1 calculation)
        also have " ...   2 * r" using IH local_assm inv_def by simp
        finally show ?thesis .
      qed
    next
      assume local_assm: "s''  {s' . s'  S'  dist s s'  2*r}"
      then have "distance (C  {s}) s''  dist s'' s"
        using Min.coboundedI distance_def dist_lemmas calculation by auto
      also have " ...  2 * r" using local_assm by (smt dist_self dist_triangle2 mem_Collect_eq)
      finally show ?thesis .
    qed
  qed

  moreover

  have "S' - {s' . s'  S'  dist s s'  2*r}  S" using IH by (auto simp: inv_def)

  moreover
  {
    have "s  S" using IH inv_def s_def by auto
    then have "C  {s}  S" using IH by (simp add: inv_def)
  }
  moreover

  have "(cC  {s}. c2C  {s}. c  c2  2 * r < dist c c2)"
  proof (rule+)
    fix c1 c2
    assume local_assms: "c1  C  {s}" "c2  C  {s}" "c1  c2"
    then have "(c1  C  c2  C)  (c1 = s  c2  C)  (c1  C  c2 = s)  (c1 = s  c2 = s)"
      using assms by auto
    then show "2 * r < dist c1 c2"
    proof (elim disjE)
      assume "c1  C  c2  C"
      then show "2 * r < dist c1 c2" using IH inv_def local_assms by simp
    next
      assume case_assm: "c1 = s  c2  C"
      have "(c  C. sS'. S'  {}  2 * r < dist c s)" using IH inv_def by simp
      then show ?thesis by (smt case_assm s_def assms(1) dist_self dist_triangle3 singletonD)
    next
      assume case_assm: "c1  C  c2 = s"
      have "(c  C. sS'. S'  {}  2 * r < dist c s)" using IH inv_def by simp
      then show ?thesis by (smt case_assm s_def assms(1) dist_self dist_triangle3 singletonD)
    next
      assume "c1 = s  c2 = s"
      then have False using local_assms(3) by simp
      then show ?thesis by simp
    qed
  qed

  moreover

  have "(cC  {s}. s''  S' - {s'  S'. dist s s'  2 * r}.
           S' - {s'  S'. dist s s'  2 * r}  {}  2 * r < dist c s'')"
    using IH inv_def by fastforce

  moreover

  have "(S' - {s'  S'. dist s s'  2 * r} = S  C  {s}  {})" by simp

  ultimately show ?thesis unfolding inv_def by blast
qed

lemma inv_last_1: 
  assumes "s  (S - S'). distance C s  2*r"
    and "S' = {}"
  shows "radius C  2*r"
  by (metis Diff_empty assms image_iff radius_contained)

lemma inv_last_2: 
  assumes "finite C"
  and "card C > n"
  and "C  S"
  and "c1  C. c2  C. c1  c2  dist c1 c2 > 2*r"
  shows "C'. card C'  n  card C' > 0  radius C' > r" (is ?P)
proof (rule ccontr)
  assume "¬ ?P"
  then obtain C' where card_C': "card C'  n  card C' > 0" and radius_C': "radius C'  r" by auto
  have "c  C. (c'. c'  C'  dist c c'  r)"
  proof
    fix c
    assume "c  C"
    then have "c  S" using assms(3) by blast
    then have "distance C' c  radius C'" using finite_distances by (simp add: radius_def)
    then have "distance C' c  r" using radius_C' by simp
    then show "c'. c'  C'  dist c c'  r" using dist_lemmas
      by (metis card_C' card_gt_0_iff)
  qed
  then obtain f where f: "cC. f c  C'  dist c (f c)  r" by metis
  have "¬inj_on f C"
  proof
    assume "inj_on f C"
    then have "card C'  card C" using inj_on f C card_inj_on_le card_ge_0_finite card_C' f by blast
    then show False using card_C' n < card C by linarith
  qed
  then obtain c1 c2 where defs: "c1  C  c2  C  c1  c2  f c1 = f c2" using inj_on_def by blast
  then have *: "dist c1 (f c1)  r  dist c2 (f c1)  r" using f by auto

  have "2 * r < dist c1 c2" using assms defs by simp
  also have " ...  dist c1 (f c1) + dist (f c1) c2" by(rule dist_triangle)
  also have " ... = dist c1 (f c1) + dist c2 (f c1)" using dist_commute by simp
  also have " ...  2 * r" using * by simp
  finally show False by simp
qed

lemma inv_last:
  assumes "inv {} C r"
  shows "(card C  k  radius C  2*r)  (card C > k  (C'. card C' > 0  card C'  k  radius C' > r))"
  using assms inv_def inv_last_1 inv_last_2 finite_subset[OF _ finite_sites] by auto

theorem Center_Selection_r:
  "VARS (S' :: ('a :: metric_space) set) (C :: ('a :: metric_space) set) (r :: real) (s :: 'a)
  {True}
  S' := S;
  C := {};
  WHILE S'  {} INV {inv S' C r} DO
    s := (SOME s. s  S');
    C := C  {s};
    S' := S' - {s' . s'  S'  dist s s'  2*r}
    OD
  {(card C  k  radius C  2*r)  (card C > k  (C'. card C' > 0  card C'  k  radius C' > r))}"
proof (vcg, goal_cases)
  case (1 S' C r)
  then show ?case using inv_init by simp
next
  case (2 S' C r)
  then show ?case using inv_step by simp
next
  case (3 S' C r)
  then show ?case using inv_last by blast
qed


subsection ‹The Main Algorithm›

definition invar :: "('a :: metric_space) set  bool" where
"invar C = (C  {}  card C  k  C  S 
  (C'. (c1  C. c2  C. c1  c2  dist c1 c2 > 2 * radius C')
         (s  S. distance C s  2 * radius C')))"

abbreviation some where "some A  (SOME s. s  A)"

lemma invar_init: "invar {some S}"
proof -
  let ?s = "some S"
  have s_in_S: "?s  S" using some_in_eq non_empty_sites by blast

  have "{?s}  {}" by simp

  moreover

  have "{SOME s. s  S}  S" using s_in_S by simp

  moreover

  have "card {SOME s. s  S}  k" using non_zero_k by simp

  ultimately show ?thesis by (auto simp: invar_def)
qed

abbreviation furthest_from where
"furthest_from C  (SOME s. s  S  distance C s = Max (distance C ` S))" 

lemma invar_step:
assumes "invar C"
and "card C < k"
shows "invar (C  {furthest_from C})"
proof -
  have furthest_from_C_props: "furthest_from C  S  distance C (furthest_from C) = radius C "
    using someI_ex[of "λx. x  S  distance C x = radius C"] radius_def2 radius_def by auto
  have C_props: "finite C  C  {}"
    using finite_subset[OF _ finite_sites] assms(1) unfolding invar_def by blast
  {
    have "card (C  {furthest_from C})  card C + 1"
      using assms(1) C_props unfolding invar_def by (simp add: card_insert_if)
    then have "card (C  {furthest_from C}) < k + 1" using assms(2) by simp
    then have "card (C  {furthest_from C})  k" by simp
  }
  moreover

  have "C  {furthest_from C}  {}" by simp

  moreover

  have "(C  {furthest_from C})  S" using assms(1) furthest_from_C_props unfolding invar_def by simp

  moreover

  have "C'. (s  S. distance (C  {furthest_from C}) s  2 * radius C')
           (c1  C  {furthest_from C}. c2  C  {furthest_from C}. c1  c2  2 * radius C' < dist c1 c2)"
  proof 
    fix C'
    have "distance C (furthest_from C) > 2 * radius C'  distance C (furthest_from C)  2 * radius C'" by auto
    then show "(s  S. distance (C  {furthest_from C}) s  2 * radius C')
                (c1  C  {furthest_from C}. c2  C  {furthest_from C}. c1  c2  2 * radius C' < dist c1 c2)"
    proof (elim disjE)
      assume asm: "distance C (furthest_from C) > 2 * radius C'"
      then have "¬(s  S. distance C s  2 * radius C')" using furthest_from_C_props by force
      then have IH: "c1  C. c2  C. c1  c2  2 * radius C' < dist c1 c2"
        using assms(1) unfolding invar_def by blast
      have "(c1  C  {furthest_from C}. (c2  C  {furthest_from C}. c1  c2  2 * radius C' < dist c1 c2))"
        using dist_ins[of "C" "2 * radius C'" "furthest_from C"] IH C_props asm by simp
      then show ?thesis by simp
    next
      assume main_assm: "2 * radius C'  distance C (furthest_from C)"
      have "(s  S. distance (C  {furthest_from C}) s  2 * radius C')"
      proof
        fix s
        assume local_assm: "s  S"
        then show "distance (C  {furthest_from C}) s  2 * radius C'"
        proof -
          have "distance (C  {furthest_from C}) s  distance C s"
            using distance_mono[of C "C  {furthest_from C}"] C_props by auto
          also have " ...  distance C (furthest_from C)"
            using Max.coboundedI local_assm finite_distances radius_def furthest_from_C_props by auto
          also have " ...  2 * radius C'" using main_assm by simp
          finally show ?thesis .
        qed
      qed
      then show ?thesis by blast
    qed
  qed

  ultimately show ?thesis unfolding invar_def by blast
qed

lemma invar_last:
assumes "invar C" and "¬card C < k"
shows "card C = k" and "card C' > 0  card C'  k  radius C  2 * radius C'"
proof -
  show "card C = k" using assms(1, 2) unfolding invar_def by simp
next
  have C_props: "finite C  C  {}" using finite_sites assms(1) unfolding invar_def by (meson finite_subset)
  show "card C' > 0  card C'  k  radius C  2 * radius C'"
  proof (rule impI)
    assume C'_assms: "0 < card (C' :: 'a set)  card C'  k"
    let ?r = "radius C'"
    have "(c1  C. c2  C. c1  c2  2 * ?r < dist c1 c2)  (s  S. distance C s  2 * ?r)"
      using assms(1) unfolding invar_def by simp
    then show "radius C  2 * ?r"
    proof
      assume case_assm: "c1C. c2C. c1  c2  2 * ?r < dist c1 c2"
      obtain s where s_def: "radius C = distance C s  s  S" using radius_def2 by metis
      show ?thesis
      proof (rule ccontr)
        assume contr_assm: "¬ radius C  2 * ?r"
        then have s_prop: "distance C s > 2 * ?r" using s_def by simp
        then have c1  C  {s}. c2  C  {s}. c1  c2  dist c1 c2 > 2 * ?r
          using C_props dist_ins[of "C" "2*?r" "s"] case_assm by blast
        moreover
        {
          have "s  C"
          proof
            assume "s  C"
            then have "distance C s  dist s s" using Min.coboundedI[of "distance C ` S" "dist s s"] 
              by (simp add: distance_def C_props)
            also have " ... = 0" by simp
            finally have "distance C s = 0" using dist_lemmas(4) by (smt C_props)
            then have radius_le_zero: "2 * ?r < 0" using contr_assm s_def by simp
            obtain x where x_def: "?r = distance C' x" using radius_def2 by metis
            obtain l where l_def: "distance C' x = dist x l" using dist_lemmas(3) by (metis C'_assms card_gt_0_iff)
            then have "dist x l = ?r" by (simp add: x_def)
            also have "...  < 0" using C'_assms radius_le_zero by simp
            finally show False by simp
          qed
          then have "card (C  {s}) > k" using assms(1,2) C_props unfolding invar_def by simp
        }
        moreover
          have "C  {s}  S" using assms(1) s_def unfolding invar_def by simp
        moreover
          have "finite (C  {s})" using calculation(3) finite_subset finite_sites by auto
        ultimately have "C. card C  k  card C > 0  radius C > ?r" using inv_last_2 by metis
        then have "?r > ?r" using C'_assms by blast
        then show False by simp
      qed
    next
      assume "sS. distance C s  2 * radius C'"
      then show ?thesis by (metis image_iff radius_contained)
    qed
  qed
qed

theorem Center_Selection: 
"VARS (C :: ('a :: metric_space) set) (s :: ('a :: metric_space))
  {k  card S}
  C := {some S};
  WHILE card C < k INV {invar C} DO
    C := C  {furthest_from C}
  OD
  {card C = k  (C'. card C' > 0  card C'  k  radius C  2 * radius C')}"
proof (vcg, goal_cases)
  case (1 C s)
  show ?case using invar_init by simp
next
  case (2 C s)
  then show ?case using invar_step by blast
next
  case (3 C s)
  then show ?case using invar_last by blast
qed

end
end