# Theory Gabow_SCC

section ‹Enumerating the SCCs of a Graph \label{sec:scc}›
theory Gabow_SCC
imports Gabow_Skeleton
begin

text ‹
As a first variant, we implement an algorithm that computes a list of SCCs
of a graph, in topological order. This is the standard variant described by
Gabow~\<^cite>‹"Gabow00"›.
›

section ‹Specification›
context fr_graph
begin
text ‹We specify a distinct list that covers all reachable nodes and
contains SCCs in topological order›

definition "compute_SCC_spec ≡ SPEC (λl.
distinct l ∧ ⋃(set l) = E⇧*V0 ∧ (∀U∈set l. is_scc E U)
∧ (∀i j. i<j ∧ j<length l ⟶ l!j × l!i ∩ E⇧* = {}) )"
end

section ‹Extended Invariant›

locale cscc_invar_ext = fr_graph G
for G :: "('v,'more) graph_rec_scheme" +
fixes l :: "'v set list" and D :: "'v set"
assumes l_is_D: "⋃(set l) = D" ― ‹The output contains all done CNodes›
assumes l_scc: "set l ⊆ Collect (is_scc E)" ― ‹The output contains only SCCs›
assumes l_no_fwd: "⋀i j. ⟦i<j; j<length l⟧ ⟹ l!j × l!i ∩ E⇧* = {}"
― ‹The output contains no forward edges›
begin
lemma l_no_empty: "{}∉set l" using l_scc by (auto simp: in_set_conv_decomp)
end

locale cscc_outer_invar_loc = outer_invar_loc G it D + cscc_invar_ext G l D
for G :: "('v,'more) graph_rec_scheme" and it l D
begin
lemma locale_this: "cscc_outer_invar_loc G it l D" by unfold_locales
lemma abs_outer_this: "outer_invar_loc G it D" by unfold_locales
end

locale cscc_invar_loc = invar_loc G v0 D0 p D pE + cscc_invar_ext G l D
for G :: "('v,'more) graph_rec_scheme" and v0 D0 and l :: "'v set list"
and p D pE
begin
lemma locale_this: "cscc_invar_loc G v0 D0 l p D pE" by unfold_locales
lemma invar_this: "invar_loc G v0 D0 p D pE" by unfold_locales
end

context fr_graph
begin
definition "cscc_outer_invar ≡ λit (l,D). cscc_outer_invar_loc G it l D"
definition "cscc_invar ≡ λv0 D0 (l,p,D,pE). cscc_invar_loc G v0 D0 l p D pE"
end

section ‹Definition of the SCC-Algorithm›

context fr_graph
begin
definition compute_SCC :: "'v set list nres" where
"compute_SCC ≡ do {
let so = ([],{});
(l,D) ← FOREACHi cscc_outer_invar V0 (λv0 (l,D0). do {
if v0∉D0 then do {
let s = (l,initial v0 D0);

(l,p,D,pE) ←
WHILEIT (cscc_invar v0 D0)
(λ(l,p,D,pE). p ≠ []) (λ(l,p,D,pE).
do {
― ‹Select edge from end of path›
(vo,(p,D,pE)) ← select_edge (p,D,pE);

ASSERT (p≠[]);
case vo of
Some v ⇒ do {
if v ∈ ⋃(set p) then do {
― ‹Collapse›
RETURN (l,collapse v (p,D,pE))
} else if v∉D then do {
― ‹Edge to new node. Append to path›
RETURN (l,push v (p,D,pE))
} else RETURN (l,p,D,pE)
}
| None ⇒ do {
― ‹No more outgoing edges from current node on path›
ASSERT (pE ∩ last p × UNIV = {});
let V = last p;
let (p,D,pE) = pop (p,D,pE);
let l = V#l;
RETURN (l,p,D,pE)
}
}) s;
ASSERT (p=[] ∧ pE={});
RETURN (l,D)
} else
RETURN (l,D0)
}) so;
RETURN l
}"
end

section ‹Preservation of Invariant Extension›
context cscc_invar_ext
begin
lemma l_disjoint:
assumes A: "i<j" "j<length l"
shows "l!i ∩ l!j = {}"
proof (rule disjointI)
fix u
assume "u∈l!i" "u∈l!j"
with l_no_fwd A show False by auto
qed

corollary l_distinct: "distinct l"
using l_disjoint l_no_empty
by (metis distinct_conv_nth inf_idem linorder_cases nth_mem)
end

context fr_graph
begin
definition "cscc_invar_part ≡ λ(l,p,D,pE). cscc_invar_ext G l D"

lemma cscc_invarI[intro?]:
assumes "invar v0 D0 PDPE"
assumes "invar v0 D0 PDPE ⟹ cscc_invar_part (l,PDPE)"
shows "cscc_invar v0 D0 (l,PDPE)"
using assms
unfolding initial_def cscc_invar_def invar_def
apply (simp split: prod.split_asm)
apply intro_locales
done

thm cscc_invarI[of v_0 D_0 s l]

lemma cscc_outer_invarI[intro?]:
assumes "outer_invar it D"
assumes "outer_invar it D ⟹ cscc_invar_ext G l D"
shows "cscc_outer_invar it (l,D)"
using assms
unfolding initial_def cscc_outer_invar_def outer_invar_def
apply (simp split: prod.split_asm)
apply intro_locales
done

lemma cscc_invar_initial[simp, intro!]:
assumes A: "v0∈it" "v0∉D0"
assumes INV: "cscc_outer_invar it (l,D0)"
shows "cscc_invar_part (l,initial v0 D0)"
proof -
from INV interpret cscc_outer_invar_loc G it l D0
unfolding cscc_outer_invar_def by simp

show ?thesis
unfolding cscc_invar_part_def initial_def
apply simp
by unfold_locales
qed

lemma cscc_invar_pop:
assumes INV: "cscc_invar v0 D0 (l,p,D,pE)"
assumes "invar v0 D0 (pop (p,D,pE))"
assumes NE[simp]: "p≠[]"
assumes NO': "pE ∩ (last p × UNIV) = {}"
shows "cscc_invar_part (last p # l, pop (p,D,pE))"
proof -
from INV interpret cscc_invar_loc G v0 D0 l p D pE
unfolding cscc_invar_def by simp

have AUX_l_scc: "is_scc E (last p)"
unfolding is_scc_pointwise
proof safe
{
assume "last p = {}" thus False
using p_no_empty by (cases p rule: rev_cases) auto
}

fix u v
assume "u∈last p" "v∈last p"
with p_sc[of "last p"] have "(u,v) ∈ (lvE ∩ last p × last p)⇧*" by auto
with lvE_ss_E show "(u,v)∈(E ∩ last p × last p)⇧*"
by (metis Int_mono equalityE rtrancl_mono_mp)

fix u'
assume "u'∉last p" "(u,u')∈E⇧*" "(u',v)∈E⇧*"

from ‹u'∉last p› ‹u∈last p› ‹(u,u')∈E⇧*›
and rtrancl_reachable_induct[OF order_refl lastp_un_D_closed[OF NE NO']]
have "u'∈D" by auto
with ‹(u',v)∈E⇧*› and rtrancl_reachable_induct[OF order_refl D_closed]
have "v∈D" by auto
with ‹v∈last p› p_not_D show False by (cases p rule: rev_cases) auto
qed

{
fix i j
assume A: "i<j" "j<Suc (length l)"
have "l ! (j - Suc 0) × (last p # l) ! i ∩ E⇧* = {}"
proof (rule disjointI, safe)
fix u v
assume "(u, v) ∈ E⇧*" "u ∈ l ! (j - Suc 0)" "v ∈ (last p # l) ! i"
from ‹u ∈ l ! (j - Suc 0)› A have "u∈⋃(set l)"
by (metis Ex_list_of_length Suc_pred UnionI length_greater_0_conv
less_nat_zero_code not_less_eq nth_mem)
with l_is_D have "u∈D" by simp
with rtrancl_reachable_induct[OF order_refl D_closed] ‹(u,v)∈E⇧*›
have "v∈D" by auto

show False proof cases
assume "i=0" hence "v∈last p" using ‹v ∈ (last p # l) ! i› by simp
with p_not_D ‹v∈D› show False by (cases p rule: rev_cases) auto
next
assume "i≠0" with ‹v ∈ (last p # l) ! i› have "v∈l!(i - 1)" by auto
with l_no_fwd[of "i - 1" "j - 1"]
and ‹u ∈ l ! (j - Suc 0)› ‹(u, v) ∈ E⇧*› ‹i≠0› A
show False by fastforce
qed
qed
} note AUX_l_no_fwd = this

show ?thesis
unfolding cscc_invar_part_def pop_def apply simp
apply unfold_locales
apply clarsimp_all
using l_is_D apply auto []

using l_scc AUX_l_scc apply auto []

apply (rule AUX_l_no_fwd, assumption+) []
done
qed

thm cscc_invar_pop[of v_0 D_0 l p D pE]

lemma cscc_invar_unchanged:
assumes INV: "cscc_invar v0 D0 (l,p,D,pE)"
shows "cscc_invar_part (l,p',D,pE')"
using INV unfolding cscc_invar_def cscc_invar_part_def cscc_invar_loc_def
by simp

corollary cscc_invar_collapse:
assumes INV: "cscc_invar v0 D0 (l,p,D,pE)"
shows "cscc_invar_part (l,collapse v (p',D,pE'))"
unfolding collapse_def

corollary cscc_invar_push:
assumes INV: "cscc_invar v0 D0 (l,p,D,pE)"
shows "cscc_invar_part (l,push v (p',D,pE'))"
unfolding push_def

lemma cscc_outer_invar_initial: "cscc_invar_ext G [] {}"
by unfold_locales auto

lemma cscc_invar_outer_newnode:
assumes A: "v0∉D0" "v0∈it"
assumes OINV: "cscc_outer_invar it (l,D0)"
assumes INV: "cscc_invar v0 D0 (l',[],D',pE)"
shows "cscc_invar_ext G l' D'"
proof -
from OINV interpret cscc_outer_invar_loc G it l D0
unfolding cscc_outer_invar_def by simp
from INV interpret inv: cscc_invar_loc G v0 D0 l' "[]" D' pE
unfolding cscc_invar_def by simp

show ?thesis
by unfold_locales

qed

lemma cscc_invar_outer_Dnode:
assumes "cscc_outer_invar it (l, D)"
shows "cscc_invar_ext G l D"
using assms

lemmas cscc_invar_preserve = invar_preserve
cscc_invar_initial
cscc_invar_pop cscc_invar_collapse cscc_invar_push cscc_invar_unchanged
cscc_outer_invar_initial cscc_invar_outer_newnode cscc_invar_outer_Dnode

text ‹On termination, the invariant implies the specification›
lemma cscc_finI:
assumes INV: "cscc_outer_invar {} (l,D)"
shows fin_l_is_scc: "⟦U∈set l⟧ ⟹ is_scc E U"
and fin_l_distinct: "distinct l"
and fin_l_is_reachable: "⋃(set l) = E⇧*  V0"
and fin_l_no_fwd: "⟦i<j; j<length l⟧ ⟹ l!j ×l!i ∩ E⇧* = {}"
proof -
from INV interpret cscc_outer_invar_loc G "{}" l D
unfolding cscc_outer_invar_def by simp

show "⟦U∈set l⟧ ⟹ is_scc E U" using l_scc by auto

show "distinct l" by (rule l_distinct)

show "⋃(set l) = E⇧*  V0"
using fin_outer_D_is_reachable[OF outer_invar_this] l_is_D
by auto

show "⟦i<j; j<length l⟧ ⟹ l!j ×l!i ∩ E⇧* = {}"
by (rule l_no_fwd)

qed

end

section ‹Main Correctness Proof›

context fr_graph
begin
lemma invar_from_cscc_invarI: "cscc_invar v0 D0 (L,PDPE) ⟹ invar v0 D0 PDPE"
unfolding cscc_invar_def invar_def
apply (simp split: prod.splits)
unfolding cscc_invar_loc_def by simp

lemma outer_invar_from_cscc_invarI:
"cscc_outer_invar it (L,D) ⟹outer_invar it D"
unfolding cscc_outer_invar_def outer_invar_def
apply (simp split: prod.splits)
unfolding cscc_outer_invar_loc_def by simp

text ‹With the extended invariant and the auxiliary lemmas, the actual
correctness proof is straightforward:›
theorem compute_SCC_correct: "compute_SCC ≤ compute_SCC_spec"
proof -
note [[goals_limit = 2]]
note [simp del] = Union_iff

show ?thesis
unfolding compute_SCC_def compute_SCC_spec_def select_edge_def select_def
apply (refine_rcg
WHILEIT_rule[where R="inv_image (abs_wf_rel v0) snd" for v0]
refine_vcg
)

apply (vc_solve
rec: cscc_invarI cscc_outer_invarI
solve: cscc_invar_preserve cscc_finI
intro: invar_from_cscc_invarI outer_invar_from_cscc_invarI
dest!: sym[of "pop A" for A]
simp: pE_fin'[OF invar_from_cscc_invarI] finite_V0
)
apply auto
done
qed

text ‹Simple proof, for presentation›
context
notes [refine]=refine_vcg
notes [[goals_limit = 1]]
begin
theorem "compute_SCC ≤ compute_SCC_spec"
unfolding compute_SCC_def compute_SCC_spec_def select_edge_def select_def
by (refine_rcg
WHILEIT_rule[where R="inv_image (abs_wf_rel v0) snd" for v0])
(vc_solve
rec: cscc_invarI cscc_outer_invarI solve: cscc_invar_preserve cscc_finI
intro: invar_from_cscc_invarI outer_invar_from_cscc_invarI
dest!: sym[of "pop A" for A]
simp: pE_fin'[OF invar_from_cscc_invarI] finite_V0, auto)
end

end

section ‹Refinement to Gabow's Data Structure›

context GS begin
definition "seg_set_impl l u ≡ do {
(_,res) ← WHILET
(λ(l,_). l<u)
(λ(l,res). do {
ASSERT (l<length S);
let x = S!l;
ASSERT (x∉res);
RETURN (Suc l,insert x res)
})
(l,{});

RETURN res
}"

lemma seg_set_impl_aux:
fixes l u
shows "⟦l<u; u≤length S; distinct S⟧ ⟹ seg_set_impl l u
≤ SPEC (λr. r = {S!j | j. l≤j ∧ j<u})"
unfolding seg_set_impl_def
apply (refine_rcg
WHILET_rule[where
I="λ(l',res). res = {S!j | j. l≤j ∧ j<l'} ∧ l≤l' ∧ l'≤u"
and R="measure (λ(l',_). u-l')"
]
refine_vcg)

apply (auto simp: less_Suc_eq nth_eq_iff_index_eq)
done

lemma (in GS_invar) seg_set_impl_correct:
assumes "i<length B"
shows "seg_set_impl (seg_start i) (seg_end i) ≤ SPEC (λr. r=p_α!i)"
apply (refine_rcg order_trans[OF seg_set_impl_aux] refine_vcg)

using assms
apply (simp_all add: seg_start_less_end seg_end_bound S_distinct) [3]

apply (auto simp: p_α_def assms seg_def) []
done

definition "last_seg_impl
≡ do {
ASSERT (length B - 1 < length B);
seg_set_impl (seg_start (length B - 1)) (seg_end (length B - 1))
}"

lemma (in GS_invar) last_seg_impl_correct:
assumes "p_α ≠ []"
shows "last_seg_impl ≤ SPEC (λr. r=last p_α)"
unfolding last_seg_impl_def
apply (refine_rcg order_trans[OF seg_set_impl_correct] refine_vcg)
using assms apply (auto simp add: p_α_def last_conv_nth)
done

end

context fr_graph
begin

definition "last_seg_impl s ≡ GS.last_seg_impl s"
lemmas last_seg_impl_def_opt =
last_seg_impl_def[abs_def, THEN opt_GSdef,
unfolded GS.last_seg_impl_def GS.seg_set_impl_def
GS.seg_start_def GS.seg_end_def GS_sel_simps]
(* TODO: Some potential for optimization here: the assertion
guarantees that length B - 1 + 1 = length B !*)

lemma last_seg_impl_refine:
assumes A: "(s,(p,D,pE))∈GS_rel"
assumes NE: "p≠[]"
shows "last_seg_impl s ≤ ⇓Id (RETURN (last p))"
proof -
from A have
[simp]: "p=GS.p_α s ∧ D=GS.D_α s ∧ pE=GS.pE_α s"
and INV: "GS_invar s"
by (auto simp add: GS_rel_def br_def GS_α_split)

show ?thesis
unfolding last_seg_impl_def[abs_def]
apply (rule order_trans[OF GS_invar.last_seg_impl_correct])
using INV NE
apply (simp_all)
done
qed

definition compute_SCC_impl :: "'v set list nres" where
"compute_SCC_impl ≡ do {
stat_start_nres;
let so = ([],Map.empty);
(l,D) ← FOREACHi (λit (l,s). cscc_outer_invar it (l,oGS_α s))
V0 (λv0 (l,I0). do {
if ¬is_done_oimpl v0 I0 then do {
let ls = (l,initial_impl v0 I0);

(l,(S,B,I,P))←WHILEIT (λ(l,s). cscc_invar v0 (oGS_α I0) (l,GS.α s))
(λ(l,s). ¬path_is_empty_impl s) (λ(l,s).
do {
― ‹Select edge from end of path›
(vo,s) ← select_edge_impl s;

case vo of
Some v ⇒ do {
if is_on_stack_impl v s then do {
s←collapse_impl v s;
RETURN (l,s)
} else if ¬is_done_impl v s then do {
― ‹Edge to new node. Append to path›
RETURN (l,push_impl v s)
} else do {
― ‹Edge to done node. Skip›
RETURN (l,s)
}
}
| None ⇒ do {
― ‹No more outgoing edges from current node on path›
scc ← last_seg_impl s;
s←pop_impl s;
let l = scc#l;
RETURN (l,s)
}
}) (ls);
RETURN (l,I)
} else RETURN (l,I0)
}) so;
stat_stop_nres;
RETURN l
}"

lemma compute_SCC_impl_refine: "compute_SCC_impl ≤ ⇓Id compute_SCC"
proof -
note [refine2] = bind_Let_refine2[OF last_seg_impl_refine]

have [refine2]: "⋀s' p D pE l' l v' v. ⟦
(s',(p,D,pE))∈GS_rel;
(l',l)∈Id;
(v',v)∈Id;
v∈⋃(set p)
⟧ ⟹ do { s'←collapse_impl v' s'; RETURN (l',s') }
≤ ⇓(Id ×⇩r GS_rel) (RETURN (l,collapse v (p,D,pE)))"
apply (refine_rcg order_trans[OF collapse_refine] refine_vcg)
apply assumption+
apply (auto simp add: pw_le_iff refine_pw_simps)
done

note [[goals_limit = 1]]
show ?thesis
unfolding compute_SCC_impl_def compute_SCC_def
apply (refine_rcg
bind_refine'
select_edge_refine push_refine
pop_refine
(*collapse_refine*)
initial_refine
oinitial_refine
(*last_seg_impl_refine*)
prod_relI IdI
inj_on_id
)

apply refine_dref_type
apply (vc_solve (nopre) solve: asm_rl I_to_outer
simp: GS_rel_def br_def GS.α_def oGS_rel_def oGS_α_def
is_on_stack_refine path_is_empty_refine is_done_refine is_done_orefine
)

done
qed

end

end