Theory Compiler2

(*  Title:       A Shorter Compiler Correctness Proof for Language IMP
    Author:      Pasquale Noce
                 Software Engineer at HID Global, Italy
                 pasquale dot noce dot lavoro at gmail dot com
                 pasquale dot noce at hidglobal dot com
*)

section "Compiler correctness"

theory Compiler2
  imports Compiler
begin


subsection "Preliminary definitions and lemmas"

text ‹
Now everything is ready for the compiler correctness proof. First, two predicates are introduced,
@{text execl} and @{text execl_all}, both taking as inputs a program, i.e. a list of instructions,
@{text P} and a list of program configurations @{text cfs}, and respectively denoted using notations
@{text "P ⊨ cfs"} and @{text "P ⊨ cfs□"}. In more detail:

  ▪ @{text "P ⊨ cfs"} means that program @{text P} \emph{may} transform each configuration within
@{text cfs} into the subsequent one, if any (word \emph{may} reflects the fact that programs can be
non-deterministic in this case study).
\\Thus, @{text execl} formalizes the notion of a \emph{small-step} program execution.

  ▪ @{text "P ⊨ cfs□"} reinforces @{text "P ⊨ cfs"} by additionally requiring that @{text cfs} be
nonempty, the initial program counter be zero (viz. execution starts from the first instruction in
@{text P}), and the final program counter falls outside @{text P} (viz. execution terminates).
\\Thus, @{text execl_all} formalizes the notion of a \emph{complete small-step} program execution,
so that assumptions @{text "acomp a ⊨ cfs□"}, @{text "bcomp x ⊨ cfs□"}, @{text "ccomp c ⊨ cfs□"}
will be used in the compiler correctness proofs for arithmetic/boolean expressions and commands.

Moreover, predicates @{text apred}, @{text bpred}, and @{text cpred} are defined to capture the link
between the initial and the final configuration upon the execution of an arithmetic expression, a
boolean expression, and a whole program, respectively, and abbreviation @{text off} is introduced as
a commodity to shorten the subsequent formal text.

\null
›

fun execl :: "instr list ⇒ config list ⇒ bool" (infix ‹⊨› 55) where
"P ⊨ cf # cf' # cfs = (P ⊢ cf → cf' ∧ P ⊨ cf' # cfs)" |
"P ⊨ _ = True"

definition execl_all :: "instr list ⇒ config list ⇒ bool" (‹(_/ ⊨/ _□)› 55) where
"P ⊨ cfs□ ≡ P ⊨ cfs ∧ cfs ≠ [] ∧
  fst (cfs ! 0) = 0 ∧ fst (cfs ! (length cfs - 1)) ∉ {0..<size P}"

definition apred :: "aexp ⇒ config ⇒ config ⇒ bool" where
"apred ≡ λa (pc, s, stk) (pc', s', stk').
  pc' = pc + size (acomp a) ∧ s' = s ∧ stk' = aval a s # stk"

definition bpred :: "bexp × bool × int ⇒ config ⇒ config ⇒ bool" where
"bpred ≡ λ(b, f, i) (pc, s, stk) (pc', s', stk').
  pc' = pc + size (bcomp (b, f, i)) + (if bval b s = f then i else 0) ∧
    s' = s ∧ stk' = stk"

definition cpred :: "com ⇒ config ⇒ config ⇒ bool" where
"cpred ≡ λc (pc, s, stk) (pc', s', stk').
  pc' = pc + size (ccomp c) ∧ (c, s) ⇒ s' ∧ stk' = stk"

abbreviation off :: "instr list ⇒ config ⇒ config" where
"off P cf ≡ (fst cf - size P, snd cf)"

text ‹
\null

Next, some lemmas about @{const [source] execl} and @{const execl_all} are proven. In more detail,
given a program @{text P} and a list of configurations @{text cfs} such that @{prop "P ⊨ cfs"}:

  ▪ Lemma @{text execl_next} states that for any configuration in @{text cfs} but the last one, the
subsequent configuration must result from the execution of the referenced instruction of @{text P}
in that configuration.
\\Thus, @{text execl_next} permits to reproduce the execution of a single instruction.

  ▪ Lemma @{text execl_last} states that a configuration in @{text cfs} whose program counter falls
outside @{text P} must be the last one in @{text cfs}.
\\Thus, @{text execl_last} permits to infer the completion of program execution.

  ▪ Lemma @{text execl_drop} states that @{prop "P ⊨ drop n cfs"} for any natural number @{text n},
and will be used to prove compiler correctness for loops by induction over the length of the list of
configurations @{text cfs}.

Furthermore, some other lemmas enabling to prove compiler correctness automatically for constructors
@{const N}, @{const V} (arithmetic expressions), @{const Bc} (boolean expressions) and @{const SKIP}
(commands) are also proven.

\null
›

lemma iexec_offset_aux:
 "(i :: int) + 1 - j = i - j + 1 ∧ i + j - k + 1 = i - k + j + 1"
by simp

lemma iexec_offset [intro]:
 "(ins, pc, s, stk) ↦ (pc', s', stk') ⟹
    (ins, pc - i, s, stk) ↦ (pc' - i, s', stk')"
by (erule iexec.cases, auto simp: iexec_offset_aux)

lemma execl_next:
 "⟦P ⊨ cfs; k < length cfs; k ≠ length cfs - 1⟧ ⟹
    (P !! fst (cfs ! k), cfs ! k) ↦ cfs ! Suc k"
by (induction cfs arbitrary: k rule: execl.induct, auto
 simp: nth_Cons exec1_def split: nat.split)

lemma execl_last:
 "⟦P ⊨ cfs; k < length cfs; fst (cfs ! k) ∉ {0..<size P}⟧ ⟹
    length cfs - 1 = k"
by (induction cfs arbitrary: k rule: execl.induct, auto
 simp: nth_Cons exec1_def split: nat.split_asm)

lemma execl_drop:
 "P ⊨ cfs ⟹ P ⊨ drop n cfs"
by (induction cfs arbitrary: n rule: execl.induct,
 simp_all add: drop_Cons split: nat.split)

lemma execl_all_N [simplified, intro]:
 "[LOADI i] ⊨ cfs□ ⟹ apred (N i) (cfs ! 0) (cfs ! (length cfs - 1))"
by (clarsimp simp: execl_all_def apred_def, cases "cfs ! 0",
 subgoal_tac "length cfs - 1 = 1", frule_tac [!] execl_next,
 ((rule ccontr)?, fastforce, (rule execl_last)?)+)

lemma execl_all_V [simplified, intro]:
 "[LOAD x] ⊨ cfs□ ⟹ apred (V x) (cfs ! 0) (cfs ! (length cfs - 1))"
by (clarsimp simp: execl_all_def apred_def, cases "cfs ! 0",
 subgoal_tac "length cfs - 1 = 1", frule_tac [!] execl_next,
 ((rule ccontr)?, fastforce, (rule execl_last)?)+)

lemma execl_all_Bc [simplified, intro]:
 "⟦if v = f then [JMP i] else [] ⊨ cfs□; 0 ≤ i⟧ ⟹
    bpred (Bc v, f, i) (cfs ! 0) (cfs ! (length cfs - 1))"
by (clarsimp simp: execl_all_def bpred_def split: if_split_asm, cases "cfs ! 0",
 subgoal_tac "length cfs - 1 = 1", frule_tac [1-2] execl_next,
 ((rule ccontr)?, force, (rule execl_last)?)+, rule execl.cases [of "([], cfs)"],
 auto simp: exec1_def)

lemma execl_all_SKIP [simplified, intro]:
 "[] ⊨ cfs□ ⟹ cpred SKIP (cfs ! 0) (cfs ! (length cfs - 1))"
by (rule execl.cases [of "([], cfs)"], auto simp: execl_all_def exec1_def cpred_def)

text ‹
\null

Next, lemma @{text execl_all_sub} is proven. It states that, if @{prop "P @ P' x @ P'' ⊨ cfs□"},
configuration @{text cf} within @{text cfs} refers to the start of program @{text "P' x"}, and the
initial and the final configuration in every complete execution of @{text "P' x"} satisfy predicate
@{text "Q x"}, then there exists a configuration @{text cf'} in @{text cfs} such that @{text cf} and
@{text cf'} satisfy @{text "Q x"}.
\\Thus, this lemma permits to reproduce the execution of a subprogram, particularly:

  ▪ a compiled arithmetic expression @{text a}, where @{prop "Q = apred"} and @{prop "x = a"},

  ▪ a compiled boolean expression @{text b}, where @{prop "Q = bpred"} and @{prop "x = (b, f, i)"}
(given a flag @{text f} and a jump offset @{text i}), and

  ▪ a compiled command @{text c}, where @{prop "Q = cpred"} and @{prop "x = c"}.

Furthermore, lemma @{text execl_all_sub2} is derived from @{text execl_all_sub} to enable a shorter
symbolical execution of two consecutive subprograms.

\null
›

lemma execl_sub_aux:
 "⟦⋀m n. ∀k ∈ {m..<n}. Q P' (((pc, s, stk) # cfs) ! k) ⟹ P' ⊨
    map (off P) (case m of 0 ⇒ (pc, s, stk) # take n cfs | Suc m ⇒ F cfs m n);
    ∀k ∈ {m..<n+m+length cfs'}. Q P' ((cfs' @ (pc, s, stk) # cfs) ! (k-m))⟧ ⟹
  P' ⊨ (pc - size P, s, stk) # map (off P) (take n cfs)"
  (is "⟦⋀_ _. ∀k ∈ _. Q P' (?F k) ⟹ _; ∀k ∈ ?A. Q P' (?G k)⟧ ⟹ _")
by (subgoal_tac "∀k ∈ {0..<n}. Q P' (?F k)", fastforce, subgoal_tac
 "∀k ∈ {0..<n}. k + m + length cfs' ∈ ?A ∧ ?F k = ?G (k + m + length cfs')",
 fastforce, simp add: nth_append)

lemma execl_sub:
 "⟦P @ P' @ P'' ⊨ cfs; ∀k ∈ {m..<n}.
     size P ≤ fst (cfs ! k) ∧ fst (cfs ! k) - size P < size P'⟧ ⟹
   P' ⊨ map (off P) (drop m (take (Suc n) cfs))"
  (is "⟦_; ∀k ∈ _. ?P P' cfs k⟧ ⟹ P' ⊨ map _ (?F cfs m (Suc n))")
proof (induction cfs arbitrary: m n rule: execl.induct [of _ P'], auto
 simp: take_Cons drop_Cons exec1_def split: nat.split, force, force, force,
 erule execl_sub_aux [where m = 0], subst append_Cons [of _ "[]"], simp,
 erule execl_sub_aux [where m = "Suc 0" and cfs' = "[]"], simp)
  fix P' pc s stk cfs m n
  let ?cf = "(pc, s, stk) :: config"
  assume "⋀m n. ∀k ∈ {m..<n}. ?P P' (?cf # cfs) k ⟹ P' ⊨
    map (off P) (case m of 0 ⇒ ?cf # take n cfs | Suc m ⇒ ?F cfs m n)"
  moreover assume "∀k ∈ {Suc (Suc m)..<Suc n}. ?P P' cfs (k - Suc (Suc 0))"
  hence "∀k ∈ {Suc m..<n}. ?P P' (?cf # cfs) k"
    by force
  ultimately show "P' ⊨ map (off P) (?F cfs m n)"
    by fastforce
qed

lemma execl_all_sub [rule_format]:
  assumes
    A: "P @ P' x @ P'' ⊨ cfs□" and
    B: "k < length cfs" and
    C: "fst (cfs ! k) = size P" and
    D: "∀cfs. P' x ⊨ cfs□ ⟶ Q x (cfs ! 0) (cfs ! (length cfs - 1))"
  shows "∃k' < length cfs. Q x (off P (cfs ! k)) (off P (cfs ! k'))"
proof -
  let ?P = "λk. size P ≤ fst (cfs ! k) ∧ fst (cfs ! k) - size P < size (P' x)"
  let ?A = "{k'. k' ∈ {k..<length cfs} ∧ ¬ ?P k'}"
  have E: "Min ?A ∈ ?A"
    using A and B by (rule_tac Min_in, simp_all add: execl_all_def,
     rule_tac exI [of _ "length cfs - 1"], auto)
  hence "map (off P) (drop k (take (Suc (Min ?A)) cfs)) ! 0 = off P (cfs ! k)"
    (is "?cfs ! _ = _")
    by auto
  moreover have "length cfs ≤ Suc (Min ?A) ⟶ Min ?A = length cfs - 1"
    using E by auto
  with A and B and E have F: "?cfs ! (length ?cfs - 1) = off P (cfs ! Min ?A)"
    by (subst nth_map, auto simp: min_def execl_all_def, arith)
  hence "?cfs ≠ [] ∧ fst (?cfs ! (length ?cfs - 1)) ∉ {0..<size (P' x)}"
    using E by (auto simp: min_def)
  moreover have "¬ (∃k'. k' ∈ {k'. k' ∈ {k..<Min ?A} ∧ ¬ ?P k'})"
    by (rule notI, erule exE, frule rev_subsetD [of _ _ ?A], rule subsetI,
     insert E, simp, subgoal_tac "finite ?A", drule Min_le, force+)
  hence "P' x ⊨ ?cfs"
    using A by (subst (asm) execl_all_def, rule_tac execl_sub, blast+)
  ultimately have "Q x (?cfs ! 0) (?cfs ! (length ?cfs - 1))"
    using C and D by (auto simp: execl_all_def)
  thus ?thesis
    using E and F by (rule_tac exI [of _ "Min ?A"], auto)
qed

lemma execl_all_sub2:
  assumes
    A: "P x @ P' x' @ P'' ⊨ cfs□"
      (is "?P ⊨ _□") and
    B: "⋀cfs. P x ⊨ cfs□ ⟹ (λ(pc, s, stk) (pc', s', stk').
      pc' = pc + size (P x) + I s ∧ Q s s' ∧ stk' = F s stk)
        (cfs ! 0) (cfs ! (length cfs - 1))"
      (is "⋀cfs. _ ⟹ ?Q x (cfs ! 0) (cfs ! (length cfs - 1))") and
    C: "⋀cfs. P' x' ⊨ cfs□ ⟹ (λ(pc, s, stk) (pc', s', stk').
      pc' = pc + size (P' x') + I' s ∧ Q' s s' ∧ stk' = F' s stk)
        (cfs ! 0) (cfs ! (length cfs - 1))"
      (is "⋀cfs. _ ⟹ ?Q' x' (cfs ! 0) (cfs ! (length cfs - 1))") and
    D: "I (fst (snd (cfs ! 0))) = 0"
  shows "∃k < length cfs. ∃t. (λ(pc, s, stk) (pc', s', stk').
    pc = 0 ∧ pc' = size (P x) + size (P' x') + I' t ∧ Q s t ∧ Q' t s' ∧
      stk' = F' t (F s stk)) (cfs ! 0) (cfs ! k)"
by (subgoal_tac "[] @ ?P ⊨ cfs□", drule execl_all_sub [where k = 0 and Q = ?Q],
 insert A B, (clarsimp simp: execl_all_def)+, insert A C D, drule execl_all_sub
 [where Q = ?Q'], simp+, clarify, rule exI, rule conjI, simp, rule exI, auto)


subsection "Main theorem"

text ‹
It is time to prove compiler correctness. First, lemmas @{text acomp_acomp}, @{text bcomp_bcomp} are
derived from @{thm [source] execl_all_sub2} to reproduce the execution of two consecutive compiled
arithmetic expressions (possibly generated by both @{const acomp} and @{const bcomp}) and boolean
expressions (possibly generated by @{const bcomp}), respectively. Subsequently, the correctness of
@{const acomp} and @{const bcomp} is proven in lemmas @{text acomp_correct}, @{text bcomp_correct}.

\null
›

lemma acomp_acomp:
 "⟦acomp a⇩₁ @ acomp a⇩₂ @ P ⊨ cfs□;
    ⋀cfs. acomp a⇩₁ ⊨ cfs□ ⟹ apred a⇩₁ (cfs ! 0) (cfs ! (length cfs - 1));
    ⋀cfs. acomp a⇩₂ ⊨ cfs□ ⟹ apred a⇩₂ (cfs ! 0) (cfs ! (length cfs - 1))⟧ ⟹
  case cfs ! 0 of (pc, s, stk) ⇒ pc = 0 ∧ (∃k < length cfs. cfs ! k =
    (size (acomp a⇩₁ @ acomp a⇩₂), s, aval a⇩₂ s # aval a⇩₁ s # stk))"
by (drule execl_all_sub2 [where I = "λs. 0" and I' = "λs. 0" and Q = "λs s'. s' = s"
 and Q' = "λs s'. s' = s" and F = "λs stk. aval a⇩₁ s # stk"
 and F' = "λs stk. aval a⇩₂ s # stk"], auto simp: apred_def)

lemma bcomp_bcomp:
 "⟦bcomp (b⇩₁, f⇩₁, i⇩₁) @ bcomp (b⇩₂, f⇩₂, i⇩₂) ⊨ cfs□;
    ⋀cfs. bcomp (b⇩₁, f⇩₁, i⇩₁) ⊨ cfs□ ⟹
      bpred (b⇩₁, f⇩₁, i⇩₁) (cfs ! 0) (cfs ! (length cfs - 1));
    ⋀cfs. bcomp (b⇩₂, f⇩₂, i⇩₂) ⊨ cfs□ ⟹
      bpred (b⇩₂, f⇩₂, i⇩₂) (cfs ! 0) (cfs ! (length cfs - 1))⟧ ⟹
  case cfs ! 0 of (pc, s, stk) ⇒ pc = 0 ∧ (bval b⇩₁ s ≠ f⇩₁ ⟶
    (∃k < length cfs. cfs ! k = (size (bcomp (b⇩₁, f⇩₁, i⇩₁) @ bcomp (b⇩₂, f⇩₂, i⇩₂)) +
      (if bval b⇩₂ s = f⇩₂ then i⇩₂ else 0), s, stk)))"
by (clarify, rule conjI, simp add: execl_all_def, rule impI, subst (asm) append_Nil2
 [symmetric], drule execl_all_sub2 [where I = "λs. if bval b⇩₁ s = f⇩₁ then i⇩₁ else 0"
 and I' = "λs. if bval b⇩₂ s = f⇩₂ then i⇩₂ else 0" and Q = "λs s'. s' = s"
 and Q' = "λs s'. s' = s" and F = "λs stk. stk" and F' = "λs stk. stk"],
 auto simp: bpred_def)

lemma acomp_correct [simplified, intro]:
 "acomp a ⊨ cfs□ ⟹ apred a (cfs ! 0) (cfs ! (length cfs - 1))"
proof (induction a arbitrary: cfs, simp_all, frule_tac [3] acomp_acomp, auto)
  fix a⇩₁ a⇩₂ cfs s stk k
  assume A: "acomp a⇩₁ @ acomp a⇩₂ @ [ADD] ⊨ cfs□"
    (is "?ca⇩₁ @ ?ca⇩₂ @ ?i ⊨ _□")
  assume B: "k < length cfs" and
    C: "cfs ! k = (size ?ca⇩₁ + size ?ca⇩₂, s, aval a⇩₂ s # aval a⇩₁ s # stk)"
  hence "cfs ! Suc k = (size (?ca⇩₁ @ ?ca⇩₂ @ ?i), s, aval (Plus a⇩₁ a⇩₂) s # stk)"
    using A by (insert execl_next [of "?ca⇩₁ @ ?ca⇩₂ @ ?i" cfs k],
     simp add: execl_all_def, drule_tac impI, auto)
  thus "apred (Plus a⇩₁ a⇩₂) (0, s, stk) (cfs ! (length cfs - Suc 0))"
    using A and B and C by (insert execl_last [of "?ca⇩₁ @ ?ca⇩₂ @ ?i" cfs "Suc k"],
     simp add: execl_all_def apred_def, drule_tac impI, auto)
qed

lemma bcomp_correct [simplified, intro]:
 "⟦bcomp x ⊨ cfs□; 0 ≤ snd (snd x)⟧ ⟹ bpred x (cfs ! 0) (cfs ! (length cfs - 1))"
proof (induction x arbitrary: cfs rule: bcomp.induct, simp_all add: Let_def,
 frule_tac [4] acomp_acomp, frule_tac [3] bcomp_bcomp, auto, force simp: bpred_def)
  fix b⇩₁ b⇩₂ f i cfs s stk
  assume A: "bcomp (b⇩₁, False, size (bcomp (b⇩₂, f, i)) + (if f then 0 else i)) @
    bcomp (b⇩₂, f, i) ⊨ cfs□"
    (is "bcomp (_, _, ?n + ?i) @ ?cb ⊨ _□")
  moreover assume B: "cfs ! 0 = (0, s, stk)" and
   "⋀cb cfs. ⟦cb = ?cb; bcomp (b⇩₁, False, ?n + ?i) ⊨ cfs□⟧ ⟹
      bpred (b⇩₁, False, ?n + ?i) (cfs ! 0) (cfs ! (length cfs - Suc 0))"
  ultimately have "∃k < length cfs. bpred (b⇩₁, False, ?n + ?i)
    (off [] (cfs ! 0)) (off [] (cfs ! k))"
    by (rule_tac execl_all_sub, auto simp: execl_all_def)
  moreover assume C: "¬ bval b⇩₁ s"
  ultimately obtain k where D: "k < length cfs" and
    E: "cfs ! k = (size (bcomp (b⇩₁, False, ?n + ?i)) + ?n + ?i, s, stk)"
    using B by (auto simp: bpred_def)
  assume "0 ≤ i"
  thus "bpred (And b⇩₁ b⇩₂, f, i) (0, s, stk) (cfs ! (length cfs - Suc 0))"
    using A and C and D and E by (insert execl_last, auto simp:
     execl_all_def bpred_def Let_def)
next
  fix b⇩₁ b⇩₂ f i cfs s stk k
  assume A: "bcomp (b⇩₁, False, size (bcomp (b⇩₂, f, i)) + (if f then 0 else i)) @
    bcomp (b⇩₂, f, i) ⊨ cfs□"
    (is "?cb⇩₁ @ ?cb⇩₂ ⊨ _□")
  assume "k < length cfs" and "0 ≤ i" and "bval b⇩₁ s" and
   "cfs ! k = (size ?cb⇩₁ + size ?cb⇩₂ + (if bval b⇩₂ s = f then i else 0), s, stk)"
  thus "bpred (And b⇩₁ b⇩₂, f, i) (0, s, stk) (cfs ! (length cfs - Suc 0))"
    using A by (insert execl_last, auto simp: execl_all_def bpred_def Let_def)
next
  fix a⇩₁ a⇩₂ f i cfs s stk k
  assume A: "acomp a⇩₁ @ acomp a⇩₂ @
    (if f then [JMPLESS i] else [JMPGE i]) ⊨ cfs□"
    (is "?ca⇩₁ @ ?ca⇩₂ @ ?i ⊨ _□")
  assume B: "k < length cfs" and
    C: "cfs ! k = (size ?ca⇩₁ + size ?ca⇩₂, s, aval a⇩₂ s # aval a⇩₁ s # stk)"
  hence D: "cfs ! Suc k =
    (size (?ca⇩₁ @ ?ca⇩₂ @ ?i) + (if bval (Less a⇩₁ a⇩₂) s = f then i else 0), s, stk)"
    using A by (insert execl_next [of "?ca⇩₁ @ ?ca⇩₂ @ ?i" cfs k],
     simp add: execl_all_def, drule_tac impI, auto split: if_split_asm)
  assume "0 ≤ i"
  with A and B and C and D have "length cfs - 1 = Suc k"
    by (rule_tac execl_last, auto simp: execl_all_def, simp split: if_split_asm)
  thus "bpred (Less a⇩₁ a⇩₂, f, i) (0, s, stk) (cfs ! (length cfs - Suc 0))"
    using D by (simp add: bpred_def)
qed

text ‹
\null

Next, lemmas @{text bcomp_ccomp}, @{text ccomp_ccomp} are derived to reproduce the execution of a
compiled boolean expression followed by a compiled command and of two consecutive compiled commands,
respectively (possibly generated by @{const ccomp}). Then, compiler correctness for loops and for
all commands is proven in lemmas @{text while_correct} and @{text ccomp_correct}, respectively by
induction over the length of the list of configurations and by structural induction over commands.

\null
›

lemma bcomp_ccomp:
 "⟦bcomp (b, f, i) @ ccomp c @ P ⊨ cfs□; 0 ≤ i;
    ⋀cfs. ccomp c ⊨ cfs□ ⟹ cpred c (cfs ! 0) (cfs ! (length cfs - 1))⟧ ⟹
  case cfs ! 0 of (pc, s, stk) ⇒ pc = 0 ∧ (bval b s ≠ f ⟶
    (∃k < length cfs. case cfs ! k of (pc', s', stk') ⇒
      pc' = size (bcomp (b, f, i) @ ccomp c) ∧ (c, s) ⇒ s' ∧ stk' = stk))"
by (clarify, rule conjI, simp add: execl_all_def, rule impI, drule execl_all_sub2
 [where I = "λs. if bval b s = f then i else 0" and I' = "λs. 0"
 and Q = "λs s'. s' = s" and Q' = "λs s'. (c, s) ⇒ s'" and F = "λs stk. stk"
 and F' = "λs stk. stk"], insert bcomp_correct [of "(b, f, i)"],
 auto simp: bpred_def cpred_def)

lemma ccomp_ccomp:
 "⟦ccomp c⇩₁ @ ccomp c⇩₂ ⊨ cfs□;
    ⋀cfs. ccomp c⇩₁ ⊨ cfs□ ⟹ cpred c⇩₁ (cfs ! 0) (cfs ! (length cfs - 1));
    ⋀cfs. ccomp c⇩₂ ⊨ cfs□ ⟹ cpred c⇩₂ (cfs ! 0) (cfs ! (length cfs - 1))⟧ ⟹
  case cfs ! 0 of (pc, s, stk) ⇒ pc = 0 ∧ (∃k < length cfs. ∃t.
    case cfs ! k of (pc', s', stk') ⇒ pc' = size (ccomp c⇩₁ @ ccomp c⇩₂) ∧
      (c⇩₁, s) ⇒ t ∧ (c⇩₂, t) ⇒ s' ∧ stk' = stk)"
by (subst (asm) append_Nil2 [symmetric], drule execl_all_sub2 [where I = "λs. 0"
 and I' = "λs. 0" and Q = "λs s'. (c⇩₁, s) ⇒ s'" and Q' = "λs s'. (c⇩₂, s) ⇒ s'"
 and F = "λs stk. stk" and F' = "λs stk. stk"], auto simp: cpred_def, force)

lemma while_correct [simplified, intro]:
 "⟦bcomp (b, False, size (ccomp c) + 1) @ ccomp c @
    [JMP (- (size (bcomp (b, False, size (ccomp c) + 1) @ ccomp c) + 1))] ⊨ cfs□;
    ⋀cfs. ccomp c ⊨ cfs□ ⟹ cpred c (cfs ! 0) (cfs ! (length cfs - 1))⟧ ⟹
  cpred (WHILE b DO c) (cfs ! 0) (cfs ! (length cfs - Suc 0))"
  (is "⟦?cb @ ?cc @ [JMP (- ?n)] ⊨ _□; ⋀_. _ ⟹ _⟧ ⟹ ?Q cfs")
proof (induction cfs rule: length_induct, frule bcomp_ccomp, auto)
  fix cfs s stk
  assume A: "?cb @ ?cc @ [JMP (- size ?cb - size ?cc - 1)] ⊨ cfs□"
  hence "∃k < length cfs. bpred (b, False, size (ccomp c) + 1)
    (off [] (cfs ! 0)) (off [] (cfs ! k))"
    by (rule_tac execl_all_sub, auto simp: execl_all_def)
  moreover assume B: "¬ bval b s" and "cfs ! 0 = (0, s, stk)"
  ultimately obtain k where "k < length cfs" and "cfs ! k = (?n, s, stk)"
    by (auto simp: bpred_def)
  thus "cpred (WHILE b DO c) (0, s, stk) (cfs ! (length cfs - Suc 0))"
    using A and B by (insert execl_last, auto simp: execl_all_def cpred_def Let_def)
next
  fix cfs s s' stk k
  assume A: "?cb @ ?cc @ [JMP (- size ?cb - size ?cc - 1)] ⊨ cfs□"
    (is "?P ⊨ _□")
  assume B: "k < length cfs" and "cfs ! k = (size ?cb + size ?cc, s', stk)"
  moreover from this have C: "k ≠ length cfs - 1"
    using A by (rule_tac notI, simp add: execl_all_def)
  ultimately have D: "cfs ! Suc k = (0, s', stk)"
    using A by (insert execl_next [of ?P cfs k], auto simp: execl_all_def)
  moreover have E: "Suc k + (length (drop (Suc k) cfs) - 1) = length cfs - 1"
    (is "Suc k + (length ?cfs - 1) = _")
    using B and C by simp
  ultimately have "?P ⊨ ?cfs□"
    using A and B and C by (auto simp: execl_all_def intro: execl_drop)
  moreover assume "∀cfs'. length cfs' < length cfs ⟶ ?P ⊨ cfs'□ ⟶ ?Q cfs'"
  hence "length ?cfs < length cfs ⟶ ?P ⊨ ?cfs□ ⟶ ?Q ?cfs" ..
  ultimately have "cpred (WHILE b DO c) (cfs ! Suc k) (cfs ! (length cfs - 1))"
    using B and C and E by simp
  moreover assume "bval b s" and "(c, s) ⇒ s'"
  ultimately show "cpred (WHILE b DO c) (0, s, stk) (cfs ! (length cfs - Suc 0))"
    using D by (auto simp: cpred_def)
qed

lemma ccomp_correct:
 "ccomp c ⊨ cfs□ ⟹ cpred c (cfs ! 0) (cfs ! (length cfs - 1))"
proof (induction c arbitrary: cfs, simp_all add: Let_def, frule_tac [4] bcomp_ccomp,
 frule_tac [3] ccomp_ccomp, auto)
  fix a x cfs
  assume A: "acomp a @ [STORE x] ⊨ cfs□"
  hence "∃k < length cfs. apred a (off [] (cfs ! 0)) (off [] (cfs ! k))"
    by (rule_tac execl_all_sub, auto simp: execl_all_def)
  moreover obtain s stk where B: "cfs ! 0 = (0, s, stk)"
    using A by (cases "cfs ! 0", simp add: execl_all_def)
  ultimately obtain k where C: "k < length cfs" and
    D: "cfs ! k = (size (acomp a), s, aval a s # stk)"
    by (auto simp: apred_def)
  hence "cfs ! Suc k = (size (acomp a) + 1, s(x := aval a s), stk)"
    using A by (insert execl_next [of "acomp a @ [STORE x]" cfs k],
     simp add: execl_all_def, drule_tac impI, auto)
  thus "cpred (x ::= a) (cfs ! 0) (cfs ! (length cfs - Suc 0))"
    using A and B and C and D by (insert execl_last [of "acomp a @ [STORE x]"
     cfs "Suc k"], simp add: execl_all_def cpred_def, drule_tac impI, auto)
next
  fix c⇩₁ c⇩₂ cfs s s' t stk k
  assume "ccomp c⇩₁ @ ccomp c⇩₂ ⊨ cfs□" and "k < length cfs" and
   "cfs ! k = (size (ccomp c⇩₁) + size (ccomp c⇩₂), s', stk)"
  moreover assume "(c⇩₁, s) ⇒ t" and "(c⇩₂, t) ⇒ s'"
  ultimately show "cpred (c⇩₁;; c⇩₂) (0, s, stk) (cfs ! (length cfs - Suc 0))"
    by (insert execl_last, auto simp: execl_all_def cpred_def)
next
  fix b c⇩₁ c⇩₂ cfs s stk
  assume A: "bcomp (b, False, size (ccomp c⇩₁) + 1) @ ccomp c⇩₁ @
    JMP (size (ccomp c⇩₂)) # ccomp c⇩₂ ⊨ cfs□"
    (is "bcomp ?x @ ?cc⇩₁ @ _ # ?cc⇩₂ ⊨ _□")
  let ?P = "bcomp ?x @ ?cc⇩₁ @ [JMP (size ?cc⇩₂)]"
  have "∃k < length cfs. bpred ?x (off [] (cfs ! 0)) (off [] (cfs ! k))"
    using A by (rule_tac execl_all_sub, auto simp: execl_all_def)
  moreover assume B: "¬ bval b s" and "cfs ! 0 = (0, s, stk)"
  ultimately obtain k where C: "k < length cfs" and D: "cfs ! k = (size ?P, s, stk)"
    by (force simp: bpred_def)
  assume "⋀cfs. ?cc⇩₂ ⊨ cfs□ ⟹ cpred c⇩₂ (cfs ! 0) (cfs ! (length cfs - Suc 0))"
  hence "∃k' < length cfs. cpred c⇩₂ (off ?P (cfs ! k)) (off ?P (cfs ! k'))"
    using A and C and D by (rule_tac execl_all_sub [where P'' = "[]"], auto)
  then obtain k' where "k' < length cfs" and "case cfs ! k' of (pc', s', stk') ⇒
    pc' = size (?P @ ?cc⇩₂) ∧ (c⇩₂, s) ⇒ s' ∧ stk' = stk"
    using D by (fastforce simp: cpred_def)
  thus "cpred (IF b THEN c⇩₁ ELSE c⇩₂) (0, s, stk) (cfs ! (length cfs - Suc 0))"
    using A and B by (insert execl_last, auto simp: execl_all_def cpred_def Let_def)
next
  fix b c⇩₁ c⇩₂ cfs s s' stk k
  assume A: "bcomp (b, False, size (ccomp c⇩₁) + 1) @ ccomp c⇩₁ @
    JMP (size (ccomp c⇩₂)) # ccomp c⇩₂ ⊨ cfs□"
    (is "?cb @ ?cc⇩₁ @ ?i # ?cc⇩₂ ⊨ _□")
  assume B: "k < length cfs" and C: "cfs ! k = (size ?cb + size ?cc⇩₁, s', stk)"
  hence D: "cfs ! Suc k = (size (?cb @ ?cc⇩₁ @ ?i # ?cc⇩₂), s', stk)"
    (is "_ = (size ?P, _, _)")
    using A by (insert execl_next [of ?P cfs k], simp add: execl_all_def,
     drule_tac impI, auto)
  assume "bval b s" and "(c⇩₁, s) ⇒ s'"
  thus "cpred (IF b THEN c⇩₁ ELSE c⇩₂) (0, s, stk) (cfs ! (length cfs - Suc 0))"
    using A and B and C and D by (insert execl_last [of ?P cfs "Suc k"],
     simp add: execl_all_def cpred_def Let_def, drule_tac impI, auto)
next
  fix c⇩₁ c⇩₂ cfs
  assume A: "JMPND (size (ccomp c⇩₁) + 1) # ccomp c⇩₁ @
    JMP (size (ccomp c⇩₂)) # ccomp c⇩₂ ⊨ cfs□"
    (is "JMPND (?n⇩₁ + 1) # ?cc⇩₁ @ JMP ?n⇩₂ # ?cc⇩₂ ⊨ _□")
  let ?P = "JMPND (?n⇩₁ + 1) # ?cc⇩₁ @ [JMP ?n⇩₂]"
  assume
    B: "⋀cfs. ?cc⇩₁ ⊨ cfs□ ⟹ cpred c⇩₁ (cfs ! 0) (cfs ! (length cfs - Suc 0))" and
    C: "⋀cfs. ?cc⇩₂ ⊨ cfs□ ⟹ cpred c⇩₂ (cfs ! 0) (cfs ! (length cfs - Suc 0))"
  from A obtain s stk where D: "cfs ! 0 = (0, s, stk)"
    by (cases "cfs ! 0", simp add: execl_all_def)
  with A have "cfs ! 1 = (1, s, stk) ∨ cfs ! 1 = (?n⇩₁ + 2, s, stk)"
    by (insert execl_next [of "?P @ ?cc⇩₂" cfs 0], simp add: execl_all_def,
     drule_tac impI, auto)
  moreover {
    assume E: "cfs ! 1 = (1, s, stk)"
    hence "∃k < length cfs. cpred c⇩₁ (off [hd ?P] (cfs ! 1)) (off [hd ?P] (cfs ! k))"
      using A and B by (rule_tac execl_all_sub, auto simp: execl_all_def)
    then obtain k where "k < length cfs" and "case cfs ! k of (pc', s', stk') ⇒
      pc' = ?n⇩₁ + 1 ∧ (c⇩₁, s) ⇒ s' ∧ stk' = stk"
      using E by (fastforce simp: cpred_def)
    moreover from this have "case cfs ! Suc k of (pc', s', stk') ⇒
      pc' = ?n⇩₁ + ?n⇩₂ + 2 ∧ (c⇩₁, s) ⇒ s' ∧ stk' = stk"
      using A by (insert execl_next [of "?P @ ?cc⇩₂" cfs k], simp add: execl_all_def,
       drule_tac impI, auto)
    ultimately have "cpred (c⇩₁ OR c⇩₂) (cfs ! 0) (cfs ! (length cfs - Suc 0))"
      using A and D by (insert execl_last [of "?P @ ?cc⇩₂" cfs "Suc k"],
       simp add: execl_all_def cpred_def split_def Let_def, drule_tac impI, auto)
  }
  moreover {
    assume E: "cfs ! 1 = (?n⇩₁ + 2, s, stk)"
    with A and C have "∃k<length cfs. cpred c⇩₂ (off ?P (cfs!1)) (off ?P (cfs!k))"
      by (rule_tac execl_all_sub [where P'' = "[]"], auto simp: execl_all_def)
    then obtain k where "k < length cfs" and "case cfs ! k of (pc', s', stk') ⇒
      pc' = ?n⇩₁ + ?n⇩₂ + 2 ∧ (c⇩₂, s) ⇒ s' ∧ stk' = stk"
      using E by (fastforce simp: cpred_def)
    with A and D have "cpred (c⇩₁ OR c⇩₂) (cfs ! 0) (cfs ! (length cfs - Suc 0))"
      by (insert execl_last, auto simp: execl_all_def cpred_def Let_def)
  }
  ultimately show "cpred (c⇩₁ OR c⇩₂) (cfs ! 0) (cfs ! (length cfs - Suc 0))" ..
qed

text ‹
\null

Finally, the main compiler correctness theorem, expressed using predicate @{const exec}, is proven.
First, @{prop "P ⊢ cf →* cf'"} is shown to imply the existence of a nonempty list of configurations
@{text cfs} such that @{prop "P ⊨ cfs"}, whose initial and final configurations match @{text cf}
and @{text cf'}, respectively. Then, the main theorem is derived as a straightforward consequence of
this lemma and of the previous lemma @{thm [source] ccomp_correct}.

\null
›

lemma exec_execl [dest!]:
 "P ⊢ cf →* cf' ⟹ ∃cfs. P ⊨ cfs ∧ cfs ≠ [] ∧ hd cfs = cf ∧ last cfs = cf'"
by (erule star.induct, force, erule exE, rule list.exhaust, blast,
 simp del: last.simps, rule exI, subst execl.simps(1), simp)

theorem ccomp_exec:
 "ccomp c ⊢ (0, s, stk) →* (size (ccomp c), s', stk') ⟹ (c, s) ⇒ s' ∧ stk' = stk"
by (insert ccomp_correct, force simp: hd_conv_nth last_conv_nth execl_all_def cpred_def)

end