Theory KMP
theory KMP
  imports Refine_Imperative_HOL.IICF
    "HOL-Library.Sublist"
begin
declare len_greater_imp_nonempty[simp del] min_absorb2[simp]
no_notation Ref.update (‹_ := _› 62)
section‹Specification›text_raw‹\label{sec:spec}›
subsection‹Sublist-predicate with a position check›
subsubsection‹Definition›
text ‹One could define›
definition "sublist_at' xs ys i ≡ take (length xs) (drop i ys) = xs"  
text‹However, this doesn't handle out-of-bound indexes uniformly:›
value[nbe] "sublist_at' [] [a] 5"
value[nbe] "sublist_at' [a] [a] 5"
value[nbe] "sublist_at' [] [] 5"
text‹Instead, we use a recursive definition:›
fun sublist_at :: "'a list ⇒ 'a list ⇒ nat ⇒ bool" where
  "sublist_at (x#xs) (y#ys) 0 ⟷ x=y ∧ sublist_at xs ys 0" |
  "sublist_at xs (y#ys) (Suc i) ⟷ sublist_at xs ys i" |
  "sublist_at [] ys 0 ⟷ True" |
  "sublist_at _ [] _ ⟷ False"
text‹In the relevant cases, both definitions agree:›
lemma "i ≤ length ys ⟹ sublist_at xs ys i ⟷ sublist_at' xs ys i"
  unfolding sublist_at'_def
  by (induction xs ys i rule: sublist_at.induct) auto
text‹However, the new definition has some reasonable properties:›
subsubsection‹Properties›
lemma sublist_lengths: "sublist_at xs ys i ⟹ i + length xs ≤ length ys"
  by (induction xs ys i rule: sublist_at.induct) auto
lemma Nil_is_sublist: "sublist_at ([] :: 'x list) ys i ⟷ i ≤ length ys"
  by (induction "[] :: 'x list" ys i rule: sublist_at.induct) auto
text‹Furthermore, we need:›
lemma sublist_step[intro]:
  "⟦i + length xs < length ys; sublist_at xs ys i; ys!(i + length xs) = x⟧ ⟹ sublist_at (xs@[x]) ys i"
  apply (induction xs ys i rule: sublist_at.induct)
      apply auto
  using sublist_at.elims(3) by fastforce
lemma all_positions_sublist:
"⟦i + length xs ≤ length ys; ∀jj<length xs. ys!(i+jj) = xs!jj⟧ ⟹ sublist_at xs ys i"
proof (induction xs rule: rev_induct)
  case Nil
  then show ?case by (simp add: Nil_is_sublist)
next
  case (snoc x xs)
  from ‹i + length (xs @ [x]) ≤ length ys› have "i + length xs ≤ length ys" by simp
  moreover have "∀jj<length xs. ys!(i + jj) = xs!jj"
    by (simp add: nth_append snoc.prems(2))
  ultimately have "sublist_at xs ys i"
    using snoc.IH by simp
  then show ?case
    using snoc.prems by auto
qed
lemma sublist_all_positions: "sublist_at xs ys i ⟹ ∀jj<length xs. ys!(i+jj) = xs!jj"
  by (induction xs ys i rule: sublist_at.induct) (auto simp: nth_Cons')
text‹It also connects well to theory @{theory "HOL-Library.Sublist"} (compare @{thm[source] sublist_def}):›
lemma sublist_at_altdef:
  "sublist_at xs ys i ⟷ (∃ps ss. ys = ps@xs@ss ∧ i = length ps)"
proof (induction xs ys i rule: sublist_at.induct)
  case (2 ss t ts i)
  show "sublist_at ss (t#ts) (Suc i) ⟷ (∃xs ys. t#ts = xs@ss@ys ∧ Suc i = length xs)"
    (is "?lhs ⟷ ?rhs")
  proof
    assume ?lhs
    then have "sublist_at ss ts i" by simp
    with "2.IH" obtain xs where "∃ys. ts = xs@ss@ys ∧ i = length xs" by auto
    then have "∃ys. t#ts = (t#xs)@ss@ys ∧ Suc i = length (t#xs)" by simp
    then show ?rhs by blast
  next
    assume ?rhs
    then obtain xs where "∃ys. t#ts = xs@ss@ys ∧ length xs = Suc i"
      by (blast dest: sym)
    then have "∃ys. ts = (tl xs)@ss@ys ∧ i = length (tl xs)"
      by (auto simp add: length_Suc_conv)
    then have "∃xs ys. ts = xs@ss@ys ∧ i = length xs" by blast
    with "2.IH" show ?lhs by simp
  qed
qed auto
corollary sublist_iff_sublist_at: "Sublist.sublist xs ys ⟷ (∃i. sublist_at xs ys i)"
  by (simp add: sublist_at_altdef Sublist.sublist_def)
subsection‹Sublist-check algorithms›
text‹
  We use the Isabelle Refinement Framework (Theory @{theory Refine_Monadic.Refine_Monadic}) to
  phrase the specification and the algorithm. 
›
text‹@{term s} for "searchword" / "searchlist", @{term t} for "text"›
definition "kmp_SPEC s t = SPEC (λ
  None ⇒ ∄i. sublist_at s t i |
  Some i ⇒ sublist_at s t i ∧ (∀ii<i. ¬sublist_at s t ii))"
lemma is_arg_min_id: "is_arg_min id P i ⟷ P i ∧ (∀ii<i. ¬P ii)"
  unfolding is_arg_min_def by auto
lemma kmp_result: "kmp_SPEC s t =
  RETURN (if sublist s t then Some (LEAST i. sublist_at s t i) else None)"
  unfolding kmp_SPEC_def sublist_iff_sublist_at
  apply (auto intro: LeastI dest: not_less_Least split: option.splits)
  by (meson LeastI nat_neq_iff not_less_Least)
corollary weak_kmp_SPEC: "kmp_SPEC s t ≤ SPEC (λpos. pos≠None ⟷ Sublist.sublist s t)"
  by (simp add: kmp_result)
lemmas kmp_SPEC_altdefs =
  kmp_SPEC_def[folded is_arg_min_id]
  kmp_SPEC_def[folded sublist_iff_sublist_at]
  kmp_result
section‹Naive algorithm›
text‹Since KMP is a direct advancement of the naive "test-all-starting-positions" approach, we provide it here for comparison:›
subsection‹Invariants›
definition "I_out_na s t ≡ λ(i,j,pos).
  (∀ii<i. ¬sublist_at s t ii) ∧
  (case pos of None ⇒ j = 0
    | Some p ⇒ p=i ∧ sublist_at s t i)"
definition "I_in_na s t i ≡ λ(j,pos).
  case pos of None ⇒ j < length s ∧ (∀jj<j. t!(i+jj) = s!(jj))
    | Some p ⇒ sublist_at s t i"
subsection‹Algorithm›
text‹The following definition is taken from Helmut Seidl's lecture on algorithms and data structures\<^cite>‹GAD› except that we
▪ output the identified position @{term ‹pos :: nat option›} instead of just @{const True}
▪ use @{term ‹pos :: nat option›} as break-flag to support the abort within the loops
▪ rewrite @{prop ‹i ≤ length t - length s›} in the first while-condition to @{prop ‹i + length s ≤ length t›} to avoid having to use @{typ int} for list indexes (or the additional precondition @{prop ‹length s ≤ length t›})
›
definition "naive_algorithm s t ≡ do {
  let i=0;
  let j=0;
  let pos=None;
  (_,_,pos) ← WHILEIT (I_out_na s t) (λ(i,_,pos). i + length s ≤ length t ∧ pos=None) (λ(i,j,pos). do {
    (_,pos) ← WHILEIT (I_in_na s t i) (λ(j,pos). t!(i+j) = s!j ∧ pos=None) (λ(j,_). do {
      let j=j+1;
      if j=length s then RETURN (j,Some i) else RETURN (j,None)
    }) (j,pos);
    if pos=None then do {
      let i = i + 1;
      let j = 0;
      RETURN (i,j,None)
    } else RETURN (i,j,Some i)
  }) (i,j,pos);
  RETURN pos
}"
subsection‹Correctness›
text‹The basic lemmas on @{const sublist_at} from the previous chapter together with @{theory Refine_Monadic.Refine_Monadic}'s verification condition generator / solver suffice:›
lemma "s ≠ [] ⟹ naive_algorithm s t ≤ kmp_SPEC s t"
  unfolding naive_algorithm_def kmp_SPEC_def I_out_na_def I_in_na_def
  apply (refine_vcg
    WHILEIT_rule[where R="measure (λ(i,_,pos). length t - i + (if pos = None then 1 else 0))"]
    WHILEIT_rule[where R="measure (λ(j,_::nat option). length s - j)"]
    )
                 apply (vc_solve solve: asm_rl)
  subgoal by (metis add_Suc_right all_positions_sublist less_antisym)
  subgoal using less_Suc_eq by blast
  subgoal by (metis less_SucE sublist_all_positions)
  subgoal by (auto split: option.splits) (metis sublist_lengths add_less_cancel_right leI le_less_trans)
  done
text‹Note that the precondition cannot be removed without an extra branch: If @{prop ‹s = []›}, the inner while-condition accesses out-of-bound memory. This will apply to KMP, too.›
section‹Knuth--Morris--Pratt algorithm›
text‹Just like our templates\<^cite>‹KMP77›\<^cite>‹GAD›, we first verify the main routine and discuss the computation of the auxiliary values @{term ‹𝔣 s›} only in a later section.›
subsection‹Preliminaries: Borders of lists›
definition "border xs ys ⟷ prefix xs ys ∧ suffix xs ys"
definition "strict_border xs ys ⟷ border xs ys ∧ length xs < length ys"
definition "intrinsic_border ls ≡ ARG_MAX length b. strict_border b ls"
subsubsection‹Properties›