# Theory RemoveMax

(*  Title:      Sort.thy
Author:     Danijela Petrovi\'c, Facylty of Mathematics, University of Belgrade *)

section ‹Defining data structure and \\
key function remove\_max›

theory RemoveMax
imports Sort
begin

subsection ‹Describing data structure›

text‹
We have already said that we are going to formalize heap and selection
sort and to show connections between these two sorts.  However, one
can immediately notice that selection sort is using list and heap sort
is using heap during its work. It would be very difficult to show
equivalency between these two sorts if it is continued straightforward
and independently proved that they satisfy conditions of locale
\verb|Sort|. They work with different objects. Much better thing to do
is to stay on the abstract level and to add the new locale, one that
describes characteristics of both list and heap.
›

locale Collection =
fixes empty :: "'b"
― ‹-- Represents empty element of the object (for example, for list it is $[]$)›
fixes is_empty :: "'b ⇒ bool"
― ‹-- Function that checks weather the object is empty or not›
fixes of_list :: "'a list ⇒ 'b"
― ‹-- Function transforms given list to desired object (for example,
for heap sort, function {\em of\_list} transforms list to heap)›
fixes multiset :: "'b ⇒ 'a multiset"
― ‹-- Function makes a multiset from the given object. A multiset is a collection without order.›
assumes is_empty_inj: "is_empty e ⟹ e = empty"
― ‹-- It must be assured that the empty element is {\em empty}›
assumes is_empty_empty: "is_empty empty"
― ‹-- Must be satisfied that function {\em is\_empty} returns true for element {\em empty}›
assumes multiset_empty: "multiset empty = {#}"
― ‹-- Multiset of an empty object is empty multiset.›
assumes multiset_of_list: "multiset (of_list i) = mset i"
― ‹-- Multiset of an object gained by
applying function {\em of\_list} must be the same as the multiset of
the list. This, practically, means that function {\em of\_list} does
not delete or change elements of the starting list.›
begin
lemma is_empty_as_list: "is_empty e ⟹ multiset e = {#}"
using is_empty_inj multiset_empty
by auto

definition set :: "'b ⇒ 'a set" where
[simp]: "set l = set_mset (multiset l)"
end

subsection ‹Function remove\_max›

text‹
We wanted to emphasize that algorithms are same. Due to
the complexity of the implementation it usually happens that simple
properties are omitted, such as the connection between these two
sorting algorithms. This is a key feature that should be presented to
students in order to understand these algorithms. It is not unknown
that students usually prefer selection sort for its simplicity whereas
avoid heap sort for its complexity. However, if we can present them as
the algorithms that are same they may hesitate less in using the heap
sort. This is why the refinement is important. Using this technique we
were able to notice these characteristics. Separate verification would
not bring anything new. Being on the abstract level does not only
simplify the verifications, but also helps us to notice and to show
students important features. Even further, we can prove them formally
and completely justify our observation.
›

locale RemoveMax = Collection empty is_empty of_list  multiset for
empty :: "'b" and
is_empty :: "'b ⇒ bool" and
of_list :: "'a::linorder list ⇒ 'b" and
multiset :: "'b ⇒ 'a::linorder multiset" +
fixes remove_max :: "'b ⇒ 'a × 'b"
― ‹--- Function that removes maximum element from the object of type $'b$.
It returns maximum element and the object without that maximum element.›
fixes inv :: "'b ⇒ bool"
― ‹--- It checks weather the object is in required condition.
For example, if we expect to work with heap it checks weather the object
is heap. This is called {\em invariant condition}›
assumes of_list_inv: "inv (of_list x)"
― ‹--- This condition assures that function {\em of\_list}
made a object with desired property.›
assumes remove_max_max:
"⟦¬ is_empty l; inv l; (m, l') = remove_max l⟧ ⟹ m = Max (set l)"
― ‹--- First parameter of the return value of the
function {\em remove\_max} is the maximum element›
assumes remove_max_multiset:
"⟦¬ is_empty l; inv l; (m, l') = remove_max l⟧ ⟹
add_mset m (multiset l') = multiset l"
― ‹--- Condition for multiset, ensures that nothing new is added or nothing
is lost after applying {\em remove\_max} function.›
assumes remove_max_inv:
"⟦¬ is_empty l; inv l; (m, l') = remove_max l⟧ ⟹ inv l'"
― ‹--- Ensures that invariant condition is true after removing maximum element.
Invariant condition must be true in each step of sorting algorithm, for example
if we are sorting using heap than in each iteration we must have heap and function
{\em remove\_max} must not change that.›
begin

lemma remove_max_multiset_size:
"⟦¬ is_empty l; inv l; (m, l') = remove_max l⟧ ⟹
size (multiset l) > size (multiset l')"
using remove_max_multiset[of l m l']

lemma remove_max_set:
"⟦¬ is_empty l; inv l; (m, l') = remove_max l⟧ ⟹
set l' ∪ {m} = set l"
using remove_max_multiset[of l m l']
by (metis Un_insert_right local.set_def set_mset_add_mset_insert sup_bot_right)

text‹As it is said before
in each iteration invariant condition must be satisfied, so the {\em
inv l} is always true, e.g. before and after execution of any
function. This is also the reason why sort function must be defined as
partial. This function parameters stay the same in each step of
iteration -- list stays list, and heap stays heap. As we said before,
in Isabelle/HOL we can only define total function, but there is a
mechanism that enables total function to appear as partial one:›

partial_function (tailrec) ssort' where
"ssort' l sl =
(if is_empty l then
sl
else
let
(m, l') = remove_max l
in
ssort' l' (m # sl))"
declare ssort'.simps[code]
definition ssort :: "'a list ⇒ 'a list" where
"ssort l = ssort' (of_list l) []"

inductive ssort'_dom where
step: "⟦⋀m l'. ⟦¬ is_empty l; (m, l') = remove_max l⟧ ⟹
ssort'_dom (l', m # sl)⟧ ⟹ ssort'_dom (l, sl)"
lemma ssort'_termination:
assumes "inv (fst p)"
shows "ssort'_dom p"
using assms
proof (induct p rule: wf_induct[of "measure (λ(l, sl). size (multiset l))"])
let ?r = "measure (λ(l, sl). size (multiset l))"
fix p :: "'b × 'a list"
assume "inv (fst p)" and *:
"∀y. (y, p) ∈ ?r ⟶ inv (fst y) ⟶ ssort'_dom y"
obtain l sl where "p = (l, sl)"
by (cases p) auto
show "ssort'_dom p"
proof (subst ‹p = (l, sl)›, rule ssort'_dom.step)
fix m l'
assume "¬ is_empty l" "(m, l') = remove_max l"
show "ssort'_dom (l', m#sl)"
proof (rule *[rule_format])
show "((l', m#sl), p) ∈ ?r" "inv (fst (l', m#sl))"
using ‹p = (l, sl)› ‹inv (fst p)› ‹¬ is_empty l›
using ‹(m, l') = remove_max l›
using remove_max_inv[of l m l']
using remove_max_multiset_size[of l m l']
by auto
qed
qed
qed simp

lemma ssort'Induct:
assumes "inv l" "P l sl"
"⋀ l sl m l'.
⟦¬ is_empty l; inv l; (m, l') = remove_max l; P l sl⟧ ⟹ P l' (m # sl)"
shows "P empty (ssort' l sl)"
proof-
from ‹inv l› have "ssort'_dom (l, sl)"
using ssort'_termination
by auto
thus ?thesis
using assms
proof (induct "(l, sl)" arbitrary: l sl rule: ssort'_dom.induct)
case (step l sl)
show ?case
proof (cases "is_empty l")
case True
thus ?thesis
using step(4) step(5) ssort'.simps[of l sl] is_empty_inj[of l]
by simp
next
case False
let ?p = "remove_max l"
let ?m = "fst ?p" and ?l' = "snd ?p"
show ?thesis
using False step(2)[of ?m ?l'] step(3)
using step(4) step(5)[of l ?m ?l' sl] step(5)
using remove_max_inv[of l ?m ?l']
using ssort'.simps[of l sl]
by (cases ?p) auto
qed
qed
qed

lemma mset_ssort':
assumes "inv l"
shows "mset (ssort' l sl) = multiset l + mset sl"
using assms
proof-
have "multiset empty + mset (ssort' l sl) = multiset l + mset sl"
using assms
proof (rule ssort'Induct)
fix l1 sl1 m l'
assume "¬ is_empty l1"
"inv l1"
"(m, l') = remove_max l1"
"multiset l1 + mset sl1 = multiset l + mset sl"
thus "multiset l' + mset (m # sl1) = multiset l + mset sl"
using remove_max_multiset[of l1 m l']
qed simp
thus ?thesis
using multiset_empty
by simp
qed

lemma sorted_ssort':
assumes "inv l" "sorted sl ∧ (∀ x ∈ set l. (∀ y ∈ List.set sl. x ≤ y))"
shows "sorted (ssort' l sl)"
using assms
proof-
have "sorted (ssort' l sl) ∧
(∀ x ∈ set empty. (∀ y ∈ List.set (ssort' l sl). x ≤ y))"
using assms
proof (rule ssort'Induct)
fix l sl m l'
assume "¬ is_empty l"
"inv l"
"(m, l') = remove_max l"
"sorted sl ∧ (∀x∈set l. ∀y∈List.set sl. x ≤ y)"
thus "sorted (m # sl) ∧ (∀x∈set l'. ∀y∈List.set (m # sl). x ≤ y)"
using remove_max_set[of l m l'] remove_max_max[of l m l']
by (auto intro: Max_ge)
qed
thus ?thesis
by simp
qed

lemma sorted_ssort: "sorted (ssort i)"
unfolding ssort_def
using sorted_ssort'[of "of_list i" "[]"] of_list_inv
by auto

lemma permutation_ssort: "mset (ssort l) = mset l"
unfolding ssort_def
using mset_ssort'[of "of_list l" "[]"]
using multiset_of_list of_list_inv
by simp

end

text‹Using assumptions given in the definitions of the locales {\em
Collection} and {\em RemoveMax} for the functions {\em multiset},
{\em is\_empty}, {\em of\_list} and {\em remove\_max} it is no
difficulty to show:›

sublocale RemoveMax < Sort ssort
by (unfold_locales) (auto simp add: sorted_ssort permutation_ssort)

end