Theory Global_Invariants

(*<*)
(*
 * Copyright 2015, NICTA
 *
 * This software may be distributed and modified according to the terms of
 * the BSD 2-Clause license. Note that NO WARRANTY is provided.
 * See "LICENSE_BSD2.txt" for details.
 *
 * @TAG(NICTA_BSD)
 *)

theory Global_Invariants
imports
  Proofs_Basis
begin

(*>*)
section‹Global Invariants \label{sec:global-invariants}›


subsection‹The valid references invariant›

text‹

The key safety property of a GC is that it does not free objects that
are reachable from mutator roots. The GC also requires that there are
objects for all references reachable from grey objects.

›

definition valid_refs_inv :: "('field, 'mut, 'payload, 'ref) lsts_pred" where
 "valid_refs_inv = (m x. mut_m.reachable m x  grey_reachable x  valid_ref x)"

text‹

The remainder of the invariants support the inductive argument that
this one holds.

›


subsection‹The strong-tricolour invariant \label{sec:strong-tricolour-invariant} ›

text‹

As the GC algorithm uses both insertion and deletion barriers, it
preserves the \emph{strong tricolour-invariant}:

›

abbreviation points_to_white :: "'ref  'ref  ('field, 'mut, 'payload, 'ref) lsts_pred" (infix "points'_to'_white" 51) where
  "x points_to_white y  x points_to y  white y"

definition strong_tricolour_inv :: "('field, 'mut, 'payload, 'ref) lsts_pred" where
  "strong_tricolour_inv = (b w. black b  ¬b points_to_white w)"

text‹

Intuitively this invariant says that there are no pointers from
completely processed objects to the unexplored space; i.e., the grey
references properly separate the two. In contrast the weak tricolour
invariant allows such pointers, provided there is a grey reference
that protects the unexplored object.

›

definition has_white_path_to :: "'ref  'ref  ('field, 'mut, 'payload, 'ref) lsts_pred" (infix "has'_white'_path'_to" 51) where
  "x has_white_path_to y = (λs. (λx y. (x points_to_white y) s)** x y)"

definition grey_protects_white :: "'ref  'ref  ('field, 'mut, 'payload, 'ref) lsts_pred" (infix "grey'_protects'_white" 51) where
  "g grey_protects_white w = (grey g  g has_white_path_to w)"

definition weak_tricolour_inv :: "('field, 'mut, 'payload, 'ref) lsts_pred" where
  "weak_tricolour_inv =
     (b w. black b  b points_to_white w  (g. g grey_protects_white w))"

lemma "strong_tricolour_inv s  weak_tricolour_inv s"
by (clarsimp simp: strong_tricolour_inv_def weak_tricolour_inv_def grey_protects_white_def) (* FIXME elide *)

text‹

The key invariant that the mutators establish as they perform get_roots›: they protect their white-reachable references with grey
objects.

›

definition in_snapshot :: "'ref  ('field, 'mut, 'payload, 'ref) lsts_pred" where
  "in_snapshot r = (black r  (g. g grey_protects_white r))"

definition (in mut_m) reachable_snapshot_inv :: "('field, 'mut, 'payload, 'ref) lsts_pred" where
  "reachable_snapshot_inv = (r. reachable r  in_snapshot r)"


subsection‹Phase invariants \label{sec:phase-invariants}›

text (in mut_m) ‹

The phase structure of this GC algorithm greatly complicates this
safety proof. The following assertions capture this structure in
several relations.

We begin by relating the mutators' @{const
"mut_ghost_hs_phase"} to @{const "sys_ghost_hs_phase"},
which tracks the GC's. Each mutator can be at most one handshake step
behind the GC. If any mutator is behind then the GC is stalled on a
pending handshake. We include the handshake type as
get_work› can occur any number of times.

›

definition hp_step_rel :: "(bool × hs_type × hs_phase × hs_phase) set" where
  "hp_step_rel =
  { True }  × ({ (ht_NOOP, hp, hp) |hp. hp  {hp_Idle, hp_IdleInit, hp_InitMark, hp_Mark} }
             { (ht_GetRoots, hp_IdleMarkSweep, hp_IdleMarkSweep)
              , (ht_GetWork,  hp_IdleMarkSweep, hp_IdleMarkSweep) })
 { False } × { (ht_NOOP,     hp_Idle,          hp_IdleMarkSweep)
              , (ht_NOOP,     hp_IdleInit,      hp_Idle)
              , (ht_NOOP,     hp_InitMark,      hp_IdleInit)
              , (ht_NOOP,     hp_Mark,          hp_InitMark)
              , (ht_GetRoots, hp_IdleMarkSweep, hp_Mark)
              , (ht_GetWork,  hp_IdleMarkSweep, hp_IdleMarkSweep) }"

definition handshake_phase_inv :: "('field, 'mut, 'payload, 'ref) lsts_pred" where
  "handshake_phase_inv = (m.
     sys_ghost_hs_in_sync m  sys_hs_type  sys_ghost_hs_phase  mut_m.mut_ghost_hs_phase m  hp_step_rel
   (sys_hs_pending m  ¬sys_ghost_hs_in_sync m))"

text ‹

In some phases we need to know that the insertion and deletion
barriers are installed, in order to preserve the snapshot. These can
ignore TSO effects as the process doing the marking holds the TSO lock
until the mark is committed to the shared memory (see
\S\ref{def:valid_W_inv}).

Note that it is not easy to specify precisely when the snapshot (of
objects the GC will retain) is taken due to the raggedness of the
initialisation.

Read the following as ``when mutator m› is past the
specified handshake, and has yet to reach the next one, ... holds.''

›

abbreviation marked_insertion :: "('field, 'payload, 'ref) mem_store_action  ('field, 'mut, 'payload, 'ref) lsts_pred" where
  "marked_insertion w  λs. case w of mw_Mutate r f (Some r')  marked r' s | _  True"

abbreviation marked_deletion :: "('field, 'payload, 'ref) mem_store_action  ('field, 'mut, 'payload, 'ref) lsts_pred" where
  "marked_deletion w  λs. case w of mw_Mutate r f opt_r'  obj_at_field_on_heap (λr'. marked r' s) r f s | _  True"

context mut_m
begin

definition marked_insertions :: "('field, 'mut, 'payload, 'ref) lsts_pred" where
  "marked_insertions = (w. tso_pending_store (mutator m) w  marked_insertion w)"

definition marked_deletions :: "('field, 'mut, 'payload, 'ref) lsts_pred" where
  "marked_deletions = (w. tso_pending_store (mutator m) w  marked_deletion w)"

primrec mutator_phase_inv_aux :: "hs_phase  ('field, 'mut, 'payload, 'ref) lsts_pred" where
  "mutator_phase_inv_aux hp_Idle          = True"
| "mutator_phase_inv_aux hp_IdleInit      = no_black_refs"
| "mutator_phase_inv_aux hp_InitMark      = marked_insertions"
| "mutator_phase_inv_aux hp_Mark          = (marked_insertions  marked_deletions)"
| "mutator_phase_inv_aux hp_IdleMarkSweep = (marked_insertions  marked_deletions  reachable_snapshot_inv)"

abbreviation mutator_phase_inv :: "('field, 'mut, 'payload, 'ref) lsts_pred" where
  "mutator_phase_inv  mutator_phase_inv_aux $ mut_ghost_hs_phase"

end

abbreviation mutators_phase_inv :: "('field, 'mut, 'payload, 'ref) lsts_pred" where
  "mutators_phase_inv  (m. mut_m.mutator_phase_inv m)"

text‹

This is what the GC guarantees. Read this as ``when the GC is at or
past the specified handshake, ... holds.''

›

primrec sys_phase_inv_aux :: "hs_phase  ('field, 'mut, 'payload, 'ref) lsts_pred" where
  "sys_phase_inv_aux hp_Idle          = ( (If sys_fA = sys_fM Then black_heap Else white_heap)  no_grey_refs )"
| "sys_phase_inv_aux hp_IdleInit      = no_black_refs"
| "sys_phase_inv_aux hp_InitMark      = (sys_fA  sys_fM  no_black_refs)"
| "sys_phase_inv_aux hp_Mark          = True"
| "sys_phase_inv_aux hp_IdleMarkSweep = ( (sys_phase = ph_Idle  tso_pending_store gc (mw_Phase ph_Idle))  no_grey_refs )"

abbreviation sys_phase_inv :: "('field, 'mut, 'payload, 'ref) lsts_pred" where
  "sys_phase_inv  sys_phase_inv_aux $ sys_ghost_hs_phase"


subsubsection‹Writes to shared GC variables›

text‹

Relate @{const "sys_ghost_hs_phase"}, @{const "gc_phase"},
@{const "sys_phase"} and writes to the phase in the GC's TSO buffer.

The first relation treats the case when the GC's TSO buffer does not
contain any writes to the phase.

The second relation exhibits the data race on the phase variable: we
need to precisely track the possible states of the GC's TSO buffer.

›

definition handshake_phase_rel :: "hs_phase  bool  gc_phase  bool" where
  "handshake_phase_rel hp in_sync ph =
     (case hp of
       hp_Idle           ph = ph_Idle
     | hp_IdleInit       ph = ph_Idle  (in_sync  ph = ph_Init)
     | hp_InitMark       ph = ph_Init  (in_sync  ph = ph_Mark)
     | hp_Mark           ph = ph_Mark
     | hp_IdleMarkSweep  ph = ph_Mark  (in_sync  ph  { ph_Idle, ph_Sweep }))"

definition phase_rel :: "(bool × hs_phase × gc_phase × gc_phase × ('field, 'payload, 'ref) mem_store_action list) set" where
  "phase_rel =
     ({ (in_sync, hp, ph, ph, []) |in_sync hp ph. handshake_phase_rel hp in_sync ph }
     ({True} × { (hp_IdleInit, ph_Init, ph_Idle, [mw_Phase ph_Init]),
                  (hp_InitMark, ph_Mark, ph_Init, [mw_Phase ph_Mark]),
                  (hp_IdleMarkSweep, ph_Sweep, ph_Mark, [mw_Phase ph_Sweep]),
                  (hp_IdleMarkSweep, ph_Idle, ph_Mark, [mw_Phase ph_Sweep, mw_Phase ph_Idle]),
                  (hp_IdleMarkSweep, ph_Idle, ph_Sweep, [mw_Phase ph_Idle]) }))"

definition phase_rel_inv :: "('field, 'mut, 'payload, 'ref) lsts_pred" where
  "phase_rel_inv = ((m. sys_ghost_hs_in_sync m)  sys_ghost_hs_phase  gc_phase  sys_phase  tso_pending_phase gc  phase_rel)"

text‹

Similarly we track the validity of @{const "sys_fM"} (respectively,
@{const "sys_fA"}) wrt @{const "gc_fM"} (@{const "sys_fA"}) and the
handshake phase. We also include the TSO lock to rule out the GC
having any pending marks during the @{const "hp_Idle"} handshake
phase.

›

definition fM_rel :: "(bool × hs_phase × gc_mark × gc_mark × ('field, 'payload, 'ref) mem_store_action list × bool) set" where
  "fM_rel =
      { (in_sync, hp, fM, fM, [], l) |fM hp in_sync l. hp = hp_Idle  ¬in_sync }
     { (in_sync, hp_Idle, fM, fM', [], l) |fM fM' in_sync l. in_sync }
     { (in_sync, hp_Idle, ¬fM, fM, [mw_fM (¬fM)], False) |fM in_sync. in_sync }"

definition fM_rel_inv :: "('field, 'mut, 'payload, 'ref) lsts_pred" where
  "fM_rel_inv = ((m. sys_ghost_hs_in_sync m)  sys_ghost_hs_phase  gc_fM  sys_fM  tso_pending_fM gc  (sys_mem_lock = Some gc)  fM_rel)"

definition fA_rel :: "(bool × hs_phase × gc_mark × gc_mark × ('field, 'payload, 'ref) mem_store_action list) set" where
  "fA_rel =
      { (in_sync, hp_Idle,          fA,  fM, []) |fA fM in_sync. ¬in_sync  fA = fM }
     { (in_sync, hp_IdleInit,      fA, ¬fA, []) |fA in_sync. True }
     { (in_sync, hp_InitMark,      fA, ¬fA, [mw_fA (¬fA)]) |fA in_sync. in_sync }
     { (in_sync, hp_InitMark,      fA,  fM, []) |fA fM in_sync. ¬in_sync  fA  fM }
     { (in_sync, hp_Mark,          fA,  fA, []) |fA in_sync. True }
     { (in_sync, hp_IdleMarkSweep, fA,  fA, []) |fA in_sync. True }"

definition fA_rel_inv :: "('field, 'mut, 'payload, 'ref) lsts_pred" where
  "fA_rel_inv = ((m. sys_ghost_hs_in_sync m)  sys_ghost_hs_phase  sys_fA  gc_fM  tso_pending_fA gc  fA_rel)"


subsection‹Worklist invariants \label{def:valid_W_inv}›

text‹

The worklists track the grey objects. The following invariant asserts
that grey objects are marked on the heap except for a few steps near
the end of @{const "mark_object_fn"}, the processes' worklists and
@{const "ghost_honorary_grey"}s are disjoint, and that pending marks
are sensible.

The safety of the collector does not to depend on disjointness; we
include it as proof that the single-threading of grey objects in the
implementation is sound.

Note that the phase invariants of \S\ref{sec:phase-invariants} limit
the scope of this invariant.

›

definition valid_W_inv :: "('field, 'mut, 'payload, 'ref) lsts_pred" where
  "valid_W_inv =
    ((p r. r in_W p  (sys_mem_lock  Some p  r in_ghost_honorary_grey p)  marked r)
   (p q. p  q  WL p  WL q = {})
   (p q r. ¬(r in_ghost_honorary_grey p  r in_W q))
   (EMPTY sys_ghost_honorary_grey)
   (p r fl. tso_pending_store p (mw_Mark r fl)
        fl = sys_fM
          r in_ghost_honorary_grey p
          tso_locked_by p
          white r
          tso_pending_mark p = [mw_Mark r fl] ))"


subsection‹Coarse invariants about the stores a process can issue›

abbreviation gc_writes :: "('field, 'payload, 'ref) mem_store_action  bool" where
  "gc_writes w  case w of mw_Mark _ _  True | mw_Phase _  True | mw_fM _  True | mw_fA _  True | _  False"

abbreviation mut_writes :: "('field, 'payload, 'ref) mem_store_action  bool" where
  "mut_writes w  case w of mw_Mutate _ _ _  True | mw_Mark _ _  True | _  False"

definition tso_store_inv :: "('field, 'mut, 'payload, 'ref) lsts_pred" where
  "tso_store_inv =
    ((w.   tso_pending_store gc          w  gc_writes w)
    (m w. tso_pending_store (mutator m) w  mut_writes w))"


subsection‹The global invariants collected›

definition invs :: "('field, 'mut, 'payload, 'ref) lsts_pred" where
  "invs =
   (handshake_phase_inv
   phase_rel_inv
   strong_tricolour_inv
   sys_phase_inv
   tso_store_inv
   valid_refs_inv
   valid_W_inv
   mutators_phase_inv
   fA_rel_inv  fM_rel_inv)"


subsection‹Initial conditions \label{sec:initial-conditions}›

text‹

We ask that the GC and system initially agree on some things:
\begin{itemize}

\item All objects on the heap are marked (have their flags equal to
  @{const "sys_fM"}, and there are no grey references, i.e. the heap
  is uniformly black.

\item The GC and system have the same values for @{term "fA"}, @{term
  "fM"}, etc. and the phase is @{term "Idle"}.

\item No process holds the TSO lock and all write buffers are empty.

\item All root-reachable references are backed by objects.

\end{itemize}
Note that these are merely sufficient initial conditions and can be
weakened.

›

locale gc_system =
  fixes initial_mark :: gc_mark
begin

definition gc_initial_state :: "('field, 'mut, 'payload, 'ref) lst_pred" where
  "gc_initial_state s =
    (fM s = initial_mark
    phase s = ph_Idle
    ghost_honorary_grey s = {}
    W s = {})"

definition mut_initial_state :: "('field, 'mut, 'payload, 'ref) lst_pred" where
  "mut_initial_state s =
    (ghost_hs_phase s = hp_IdleMarkSweep
    ghost_honorary_grey s = {}
    ghost_honorary_root s = {}
    W s = {})"

definition sys_initial_state :: "('field, 'mut, 'payload, 'ref) lst_pred" where
  "sys_initial_state s =
    ((m. ¬hs_pending s m  ghost_hs_in_sync s m)
    ghost_hs_phase s = hp_IdleMarkSweep  hs_type s = ht_GetRoots
    obj_mark ` ran (heap s)  {initial_mark}
    fA s = initial_mark
    fM s = initial_mark
    phase s = ph_Idle
    ghost_honorary_grey s = {}
    W s = {}
    (p. mem_store_buffers s p = [])
    mem_lock s = None)"

abbreviation
  "root_reachable y  m x. x  mut_m.mut_roots m  x reaches y"

definition valid_refs :: "('field, 'mut, 'payload, 'ref) lsts_pred" where
  "valid_refs = (y. root_reachable y  valid_ref y)"

definition gc_system_init :: "('field, 'mut, 'payload, 'ref) lsts_pred" where
  "gc_system_init =
      ((λs. gc_initial_state (s gc))
      (λs. m. mut_initial_state (s (mutator m)))
      (λs. sys_initial_state (s sys))
      valid_refs)"

text‹

The system consists of the programs and these constraints on the initial state.

›

abbreviation gc_system :: "('field, 'mut, 'payload, 'ref) gc_system" where
  "gc_system  PGMs = gc_coms, INIT = gc_system_init, FAIR = True" (* FIXME add fairness hypotheses *)

end

(*<*)

end
(*>*)