Theory Border_Array

(*  Title:      Border Array
    File:       Combinatorics_Words.Border_Array
    Author:     Štěpán Holub, Charles University

Part of Combinatorics on Words Formalized. See https://gitlab.com/formalcow/combinatorics-on-words-formalized/
*)

theory Border_Array

imports
 CoWBasic

begin

subsection ‹Auxiliary lemmas on suffix and border extension›

lemma border_ConsD: assumes "b#x ≤b a#w"
  shows "a = b" and
        "x ≠ ε ⟹ x ≤b w" and
        border_ConsD_neq: "x ≠ w" and
        border_ConsD_pref: "x ≤p w" and
        border_ConsD_suf: "x ≤s w"
proof-
  show "a = b"
    using borderD_pref[OF assms] by force
  show "x ≠ w" and "x ≤p w" and "x ≤s w"
    using borderD_neq[OF assms, unfolded ‹a = b›]
      borderD_pref[OF assms, unfolded Cons_prefix_Cons]
      suffix_ConsD2[OF borderD_suf[OF assms]] by force+
  thus "x ≠ ε ⟹ x ≤b w"
    unfolding border_def  by blast
qed

lemma ext_suf_Cons:
  "Suc i + ❙|u❙| = ❙|w❙| ⟹ u ≤s w ⟹ (w!i)#u ≤s (w!i)#w"
proof-
  assume "Suc i + ❙|u❙| = ❙|w❙|" and "u ≤s w"
  hence "u = drop (Suc i) w"
    unfolding suffix_def using ‹Suc i + ❙|u❙| = ❙|w❙|› by auto
  have "i < ❙|w❙|"
    using ‹Suc i + ❙|u❙| = ❙|w❙|› by auto
  from id_take_nth_drop[OF this, folded ‹u = drop (Suc i) w›]
  show "w ! i # u ≤s w ! i # w"
    using suffix_ConsI triv_suf by metis
qed


lemma ext_suf_Cons_take_drop: assumes "take k (drop (Suc i) w) ≤s drop (Suc i) w" and "w ! i = w ! (❙|w❙| - Suc k)"
  shows "take (Suc k) (drop i w) ≤s drop i w"
proof (cases "(Suc k) + i < ❙|w❙|", simp_all)
  assume "Suc (k + i) < ❙|w❙|"

  hence "i < ❙|w❙|"
    by simp

  have "Suc (❙|w❙| - Suc i - Suc k) = ❙|w❙| - Suc(i+k)"
    using Suc_diff_Suc ‹Suc (k + i) < ❙|w❙|›
    by (simp add: Suc_diff_Suc)

  have "❙|take k (drop (Suc i) w)❙| = k"
    using ‹Suc (k + i) < ❙|w❙|› by fastforce

  have "Suc (❙|w❙| - Suc i - Suc k) + ❙|take k (drop (Suc i) w)❙| = ❙|drop (Suc i) w❙|"
    unfolding ‹❙|take k (drop (Suc i) w)❙| = k› ‹Suc (❙|w❙| - Suc i - Suc k) = ❙|w❙| - Suc(i+k)›
    using  ‹Suc (k + i) < ❙|w❙|› by simp

  hence "❙|drop (Suc (❙|w❙| - Suc i - k)) (drop i w)❙| = k"
    using  ‹i < ❙|w❙|› by fastforce
  have "❙|w❙| - Suc i - k < ❙|drop i w❙|"
    by (metis Suc_diff_Suc ‹i < ❙|w❙|› diff_less_Suc length_drop)

  have "(drop i w)!(❙|w❙| - Suc i - k) = w ! i"
    using ‹Suc (k + i) < ❙|w❙|› ‹w ! i = w ! (❙|w❙| - Suc k)› by auto

  have "take (Suc k) (drop i w) = w!i#take k (drop (Suc i) w)"
    using Cons_nth_drop_Suc[OF ‹i < ❙|w❙|›] take_Suc_Cons[of k "w!i" "drop (Suc i) w"] by argo

  have "drop (Suc (❙|w❙| - Suc i - k)) (drop i w) = drop (❙|w❙| - Suc i - k) (drop (Suc i) w)"
    by auto
  hence "drop (Suc (❙|w❙| - Suc i - k)) (drop i w) = take k (drop (Suc i) w)"
    using ‹❙|take k (drop (Suc i) w)❙| = k›
      ‹take k (drop (Suc i) w) ≤s drop (Suc i) w› suf_drop_conv length_drop by metis

  with
    id_take_nth_drop[OF ‹❙|w❙| - Suc i - k < ❙|drop i w❙|›]
  show ?thesis
    unfolding ‹(drop i w)!(❙|w❙| - Suc i - k) = w ! i›
      ‹take (Suc k) (drop i w) = w!i#take k (drop (Suc i) w)›
    unfolding suffix_def by auto
qed

lemma ext_border_Cons:
  "Suc i + ❙|u❙| = ❙|w❙| ⟹ u ≤b w ⟹ (w!i)#u ≤b (w!i)#w"
  unfolding border_def using ext_suf_Cons Cons_prefix_Cons list.discI list.inject by metis

lemma border_add_Cons_len: assumes "max_borderP u w" and "v ≤b (a#w)" shows "❙|v❙| ≤ Suc ❙|u❙|"
proof-
  have "v ≠ ε"
    using ‹v ≤b (a#w)› by simp
  then obtain v' where "v = a#v'"
    using  borderD_pref[OF ‹v ≤b (a#w)›, unfolded prefix_Cons] by blast
  show "❙|v❙| ≤ Suc ❙|u❙|"
  proof (cases "v' = ε")
    assume "v' ≠ ε"
    have "w ≠ ε"
      using  borderedI[OF ‹v ≤b (a#w)›] sing_not_bordered[of a] by blast
    have "v' ≤b w"
      using border_ConsD(2)[OF ‹v ≤b (a#w)›[unfolded ‹v = a # v'›] ‹v' ≠ ε›].
    thus "❙|v❙| ≤ Suc ❙|u❙|"
      unfolding ‹v = a # v'› length_Cons Suc_le_mono
      using ‹max_borderP u w›[unfolded max_borderP_def]
        prefix_length_le by blast
  qed (simp add: ‹v = a#v'›)
qed

section ‹Computing the Border Array›

text‹The computation is a special case of the Knuth-Morris-Pratt algorithm.›

text‹
▪ KMP w arr bord pos
▪ w: processed word does not change; it is processed starting from the last letter
▪ pos: actually examined pos-th letter; that is, it is w!(pos-1)
▪ arr: already calculated suffix-border-array of w;
       that is, the length of array is (|w| - pos)
       and arr!(|w| - pos - bord) is the max border length of the suffix of w of length bord
▪ bord: length of the current max border length candidate
       to see whether it can be extended we compare: w!(pos-1) ?= w!(|w| - (Suc bord));
       (Suc bord) is the length of the max border if the comparison is succesful
▪      if the comparison fails we move to the max border of the suffix of length bord;
       its max border length is stored in arr!(|w| - pos - bord)
▪      if bord was 0 and the comparison failed, the word is unbordered
›

fun   KMP_arr :: "'a list ⇒ nat list ⇒ nat ⇒ nat ⇒ nat list"
  where    "KMP_arr _ arr _ 0 = arr" |
    "KMP_arr w arr bord (Suc i) =
          (if w!i = w!(❙|w❙| - (Suc bord))
          then  (Suc bord) # arr
          else (if bord = 0
                then  0#arr
                else (if (arr!(❙|w❙| - (Suc i) - bord)) < bord ― ‹always True, for sake of termination›
                      then arr
                      else  undefined#arr ― ‹else: dummy termination condition›
                      )
                )
          )"

fun KMP_bord :: "'a list ⇒ nat list ⇒ nat ⇒ nat ⇒ nat"
  where     "KMP_bord _ _ bord 0 = bord" |
    "KMP_bord w arr bord (Suc i) =
          (if w!i = w!(❙|w❙| - (Suc bord))
          then Suc bord
          else (if bord = 0
                 then  0
                 else (if (arr!(❙|w❙| - (Suc i) - bord)) < bord ― ‹always True, for sake of termination›
                       then arr!(❙|w❙| - (Suc i) - bord)
                       else  0 ― ‹dummy termination condition›
                      )
                 )
          )"

fun KMP_pos :: "'a list ⇒ nat list ⇒ nat ⇒ nat ⇒ nat"
  where
    "KMP_pos _ _ _ 0 = 0" |
    "KMP_pos w arr bord (Suc i) =
          (if w!i = w!(❙|w❙| - (Suc bord))
          then i
          else (if bord = 0
                 then  i
                 else (if (arr!(❙|w❙| - (Suc i) - bord)) < bord ― ‹always True, for sake of termination›
                       then Suc i
                       else  i ― ‹else: dummy termination condition›
                      )
                 )
          )"

thm prod_cases
    nat.exhaust
    prod.exhaust
    prod_cases3

function KMP :: "'a list ⇒ nat list ⇒ nat ⇒ nat ⇒ nat list" where
  "KMP w arr bord 0 = arr"  |
  "KMP w arr bord (Suc i) = KMP w (KMP_arr w arr bord (Suc i)) (KMP_bord w arr bord (Suc i)) (KMP_pos w arr bord (Suc i))"
  using not0_implies_Suc by (force+)
termination
  by (relation "measures [λ(_, _ , compar, pos). pos,λ(_, _ , compar, pos). compar]", simp_all)

lemma KMP_len: "❙|KMP w arr bord pos❙| = ❙|arr❙| + pos"
  by (induct w arr bord pos rule: KMP.induct, auto)

value[nbe] "KMP [a] [0] 0 0"

value "KMP [ 0::nat] [0] 0 0"
value "KMP [5,4,5,3,5,5::nat] [0] 0 5"
value "KMP [5,4::nat,5,3,5,5] [1,0] 1 4"
value "KMP [0,1,1,0::nat,0,0,1,1,1] [0] 0 8"
value "KMP [0::nat,1] [0] 0 1"

subsection ‹Verification of the computation›

definition KMP_valid :: "'a list ⇒ nat list ⇒ nat ⇒ nat ⇒ bool"
  where "KMP_valid w arr bord pos = (❙|arr❙| + pos = ❙|w❙|  ∧
                                    ― ‹bord is the length of a border of (drop pos w), or 0›
                                    pos + bord < ❙|w❙| ∧
                                    take bord (drop pos w) ≤p (drop pos w) ∧
                                    take bord (drop pos w) ≤s (drop pos w) ∧
                                    ― ‹... and no longer border can be extended›
                                    (∀ v. v ≤b w!(pos - 1)#(drop pos w) ⟶ ❙|v❙| ≤ Suc bord) ∧
                                    ― ‹the array gives maximal border lengths of corresponding suffixes›
                                    (∀ k < ❙|arr❙|. max_borderP (take (arr!k) (drop (pos + k) w)) (drop (pos + k) w))
                                    )"

lemma " KMP_valid w arr bord pos  ⟹ w ≠ ε"
  unfolding KMP_valid_def
  using le_antisym less_imp_le_nat less_not_refl2 take_Nil take_all_iff by metis

lemma KMP_valid_base: assumes "w ≠ ε" shows "KMP_valid w [0] 0 (❙|w❙|-1)"
proof (unfold KMP_valid_def, intro conjI)
  show "❙|[0]❙| + (❙|w❙| - 1) = ❙|w❙|"
    by (simp add: assms)
  show "❙|w❙| - 1 + 0 < ❙|w❙|"
    using ‹w ≠ ε›  by simp
  show "take 0 (drop (❙|w❙| - 1) w) ≤p drop (❙|w❙| - 1) w"
    by simp
  show "take 0 (drop (❙|w❙| - 1) w) ≤s drop (❙|w❙| - 1) w"
    by simp
  show "∀v. v ≤b w ! (❙|w❙| - 1 - 1) # drop (❙|w❙| - 1) w ⟶ ❙|v❙| ≤ Suc 0"
  proof (rule allI, rule impI)
     fix v assume b: "v ≤b w ! (❙|w❙| - 1 - 1) # drop (❙|w❙| - 1) w"
    have "❙|w ! (❙|w❙| - 1 - 1) # drop (❙|w❙| - 1) w❙| = Suc (Suc 0)"
      using ‹❙|[0]❙| + (❙|w❙| - 1) = ❙|w❙|› by auto
    from border_len(3)[OF b, unfolded this]
    show "❙|v❙| ≤ Suc 0"
      using border_len(3)[OF b] by simp
  qed
  have  "❙|w❙| - Suc 0 = ❙|butlast w❙|"
    by simp
  have all: "∀v. v ≤b [last w] ⟶ v ≤p ε"
      by (meson borderedI sing_not_bordered)
  have "butlast w ⋅ [last w] = w"
    by (simp add: assms)
  hence last:  "drop (❙|w❙| - Suc 0) w = [last w]"
    unfolding ‹❙|w❙| - Suc 0 = ❙|butlast w❙|› using drop_pref by metis
  hence "max_borderP ε (drop (❙|w❙| - Suc 0) w)"
    unfolding max_borderP_def using all by simp
  thus "∀k<❙|[0]❙|. max_borderP (take ([0] ! k) (drop (❙|w❙| - 1 + k) w)) (drop (❙|w❙| - 1 + k) w)"
    by simp
qed

lemma KMP_valid_step: assumes "KMP_valid w arr bord (Suc i)"
  shows "KMP_valid  w (KMP_arr w arr bord (Suc i)) (KMP_bord w arr bord (Suc i)) (KMP_pos w arr bord (Suc i))"
proof-
  ― ‹Consequences of the assumption›
  have all_k: "∀k<❙|arr❙|. max_borderP (take (arr ! k) (drop (Suc i + k) w)) (drop (Suc i + k) w)"
    using assms[unfolded KMP_valid_def] by blast
  have "❙|arr❙| + Suc i = ❙|w❙|" and
    "Suc i + bord < ❙|w❙|" and
    bord_pref: "take bord (drop (Suc i) w) ≤p drop (Suc i) w" and
    bord_suf: "take bord (drop (Suc i) w) ≤s drop (Suc i) w" and
    up_bord: "⋀ v. v ≤b w!i#(drop (Suc i) w) ⟹ ❙|v❙| ≤ Suc bord" and
    all_k_neq0: "⋀ k. k < ❙|arr❙| ⟹ take (arr ! k) (drop (Suc i + k) w) = drop (Suc i + k) w ⟶ drop (Suc i + k) w = ε" and
    all_k_pref: "⋀ k. k < ❙|arr❙| ⟹ take (arr ! k) (drop (Suc i + k) w) ≤p drop (Suc i + k) w" and
    all_k_suf: "⋀ k. k < ❙|arr❙| ⟹ take (arr ! k) (drop (Suc i + k) w) ≤s drop (Suc i + k) w" and
    all_k_v: "⋀ k v. k < ❙|arr❙| ⟹ v ≤b drop (Suc i + k) w ⟹ v ≤p take (arr ! k) (drop (Suc i + k) w)"
    using assms[unfolded KMP_valid_def max_borderP_def diff_Suc_1] by blast+
  have all_k_neq: "⋀ k. k < ❙|arr❙| ⟹ take (arr ! k) (drop (Suc i + k) w) ≠ drop (Suc i + k) w"
    using ‹Suc i + bord < ❙|w❙|› ‹❙|arr❙| + Suc i = ❙|w❙|› all_k_neq0
    add.commute add_le_imp_le_left drop_all_iff le_antisym less_imp_le_nat less_not_refl2 by metis

  have "w ≠ ε"
    using ‹❙|arr❙| + Suc i = ❙|w❙|› by auto
  have "Suc i < ❙|w❙|"
    using ‹Suc i + bord < ❙|w❙|›  by simp
  have pop_i: "drop i w = (w!i)# (drop (Suc i) w)"
    by (simp add: Cons_nth_drop_Suc Suc_lessD ‹Suc i < ❙|w❙|›)
  have "drop (Suc i) w ≠ ε"
    using ‹Suc i < ❙|w❙|› by fastforce
  have "Suc i + (❙|w❙| - Suc i - bord) = ❙|w❙| - bord"
    unfolding diff_right_commute[of _ _ bord] using ‹Suc i + bord < ❙|w❙|›  by linarith

  show "KMP_valid  w (KMP_arr w arr bord (Suc i)) (KMP_bord w arr bord (Suc i)) (KMP_pos w arr bord (Suc i))"
  proof (cases "w ! i = w ! (❙|w❙| - Suc bord)")
    assume match: "w ! i = w ! (❙|w❙| - Suc bord)" ― ‹The current candidate is extendable›
    show ?thesis
    proof (unfold KMP_valid_def KMP_arr.simps KMP_bord.simps KMP_pos.simps if_P[OF match], intro conjI)
      show "❙|Suc bord # arr❙| + i = ❙|w❙|"
        using ‹❙|arr❙| + Suc i = ❙|w❙|› by auto
      show "i + Suc bord < ❙|w❙|"
        using ‹Suc i + bord < ❙|w❙|› by auto
      show "take (Suc bord) (drop i w) ≤p drop i w"
        using take_is_prefix by auto
      show "take (Suc bord) (drop i w) ≤s drop i w"
        using ‹take bord (drop (Suc i) w) ≤s drop (Suc i) w› ext_suf_Cons_take_drop match by blast
          ― ‹The new border array is correct›
      show all_k_new: "∀k<❙|Suc bord # arr❙|. max_borderP (take ((Suc bord # arr) ! k) (drop (i + k) w)) (drop (i + k) w)"
      proof (rule allI, rule impI)
        fix k assume "k < ❙|Suc bord # arr❙|"
        show "max_borderP (take ((Suc bord # arr) ! k) (drop (i + k) w)) (drop (i + k) w)"
        proof (cases "0 < k")
          assume "0 < k" ― ‹old entries are valid:›
          thus ?thesis using all_k
            by (metis Suc_less_eq ‹k < ❙|Suc bord # arr❙|› add.right_neutral add_Suc_shift gr0_implies_Suc list.size(4) nth_Cons_Suc)
        next
          assume "¬ 0 < k" hence "k = 0" by simp
          show ?thesis ― ‹the extended border is maximal:›
            unfolding max_borderP_def ‹k = 0›
          proof (intro conjI)
            show "take ((Suc bord # arr) ! 0) (drop (i + 0) w) = drop (i + 0) w ⟶ drop (i + 0) w = ε"
              using ‹i + Suc bord < ❙|w❙|› by fastforce
            show "take ((Suc bord # arr) ! 0) (drop (i + 0) w) ≤p drop (i + 0) w"
              using ‹take (Suc bord) (drop i w) ≤p drop i w› by auto
            show "take ((Suc bord # arr) ! 0) (drop (i + 0) w) ≤s drop (i + 0) w"
              by simp fact
            show "∀v. v ≤b drop (i + 0) w ⟶ v ≤p take ((Suc bord # arr) ! 0) (drop (i + 0) w)"
            proof (rule allI, rule impI)
              fix v assume "v ≤b drop (i + 0) w" hence "v ≤b drop i w" by simp
              from borderD_pref[OF this] up_bord[OF this[unfolded pop_i]]
              show "v ≤p take ((Suc bord # arr) ! 0) (drop (i + 0) w)"
                 unfolding prefix_def by force
            qed
          qed
        qed
      qed
        ― ‹the extended border is the longest candidate:›
      have "max_borderP (take (Suc bord) (drop i w)) (drop i w)"
        using  all_k_new[rule_format, of 0, unfolded length_Cons nth_Cons_0 add_0_right, OF zero_less_Suc].
      from border_add_Cons_len[OF this] max_borderP_D_max[OF this] max_borderP_D_neq[OF _ this]
      show "∀v. v ≤b w ! (i - 1) # drop i w ⟶ ❙|v❙| ≤ Suc (Suc bord)"
        using nat_le_linear take_all take_len list.discI pop_i by metis
    qed
  next
    assume mismatch: "w ! i ≠ w ! (❙|w❙| - Suc bord)" ― ‹The current candidate is not extendable›
    show ?thesis
    proof (cases "bord = 0")
      assume  "bord ≠ 0" ― ‹Recursion: try the maximal border of the current candidate...›
        let ?k  =  "❙|w❙| - Suc i - bord" and
            ?w' = "drop (Suc i) w"
        have "?k < ❙|arr❙|"
        using ‹Suc i + bord < ❙|w❙|› ‹❙|arr❙| + Suc i = ❙|w❙|› ‹bord ≠ 0› by linarith
      from all_k_neq[OF this]
      have "arr ! ?k < bord" ― ‹... which is stored in the array, and is shorter›
        by (simp add: ‹take (arr ! ?k) (drop (Suc i + ?k) w) ≠ drop (Suc i + ?k) w› ‹Suc i + ?k = ❙|w❙| - bord› ‹Suc i + bord < ❙|w❙|› add_diff_inverse_nat diff_add_inverse2 gr0I less_diff_conv nat_diff_split_asm )
        let ?old_pref = "take bord ?w'" and
            ?old_suf =  "drop (❙|w❙| - bord) w" and
            ?new_pref = "take (arr ! ?k) ?w'"
      show ?thesis
      proof (unfold KMP_valid_def KMP_arr.simps KMP_bord.simps KMP_pos.simps if_not_P[OF mismatch] if_not_P[OF ‹bord ≠ 0›] if_P[OF ‹arr ! ?k < bord›] diff_Suc_1, intro conjI)
        show "❙|arr❙| + Suc i = ❙|w❙|"
          using ‹❙|arr❙| + Suc i = ❙|w❙|› by auto
        show "Suc i + arr ! ?k < ❙|w❙|"
          using ‹Suc i + bord < ❙|w❙|› ‹arr ! ?k < bord› by linarith
        show "take (arr ! ?k) (drop (Suc i) w) ≤p drop (Suc i) w"
          using take_is_prefix by blast

        ― ‹Next goal: the new border is a suffix›

        have "?old_suf ≤s ?w'"
          by (meson ‹Suc i + bord < ❙|w❙|› le_suf_drop less_diff_conv nat_less_le)
        have "❙|?old_pref❙| = bord"
          using ‹Suc i + bord < ❙|w❙|› take_len len_after_drop nat_less_le by blast
        also have "... = ❙|?old_suf❙|"
          using ‹Suc i + bord < ❙|w❙|›  by simp
        ultimately have eq1: "?old_pref = ?old_suf" ― ‹bord defines a border›
          using ‹take bord (drop (Suc i) w) ≤s drop (Suc i) w› ‹drop (❙|w❙| - bord) w ≤s drop (Suc i) w› suf_ruler_eq_len by metis


        have "❙|?new_pref❙| = arr!?k"
          using take_len ‹Suc i + bord < ❙|w❙|› ‹arr ! ?k < bord› diff_diff_left by force
        have "take (arr ! ?k) ?old_suf ≤p ?old_pref"
          using take_is_prefix ‹?old_pref = ?old_suf› by metis
        from pref_take[OF pref_trans[OF this take_is_prefix], unfolded ‹❙|?new_pref❙| = arr!?k›, symmetric]
        have "take (arr ! ?k) ?old_suf = take (arr ! ?k) ?w'"
          using take_len ‹arr ! ?k < bord› ‹bord = ❙|drop (❙|w❙| - bord) w❙|› less_imp_le_nat by metis
        from all_k_suf[OF ‹?k < ❙|arr❙|›, unfolded ‹Suc i + ?k = ❙|w❙| - bord›] this
        have "take (arr ! ?k) ?w' ≤s ?old_suf" by simp ― ‹The new prefix is a suffix of the old suffix›

        with ‹?old_pref ≤s ?w'›[unfolded ‹?old_pref = ?old_suf›]
        show "take (arr ! ?k) ?w' ≤s ?w'"
          using suf_trans by blast

        ― ‹Key facts about borders of the w'›
            have "?old_pref ≠ ε"
              using ‹bord ≠ 0› ‹❙|?old_pref❙| = bord› by force
            moreover have "?old_pref ≠ ?w'"
              using ‹Suc i + bord < ❙|w❙|›
              by (intro lenarg_not, unfold length_drop ‹❙|take bord ?w'❙| = bord›, linarith)
            ultimately have "?old_pref ≤b ?w'" ― ‹bord is the length of a border›
              by (intro borderI[OF bord_pref bord_suf])

        ― ‹We want to prove that the new border is the longest candidate›
           show "∀v. v ≤b w !i # ?w' ⟶ ❙|v❙| ≤ Suc (arr ! ?k)"
             proof (rule allI,rule impI)
               have extendable: "w ! i # v' ≤b w ! i # ?w' ⟹ v' ≠ ε ⟹ ❙|v'❙| ≤ arr ! ?k" for v' ― ‹First consider a border of w', which is extendable›
               proof-
                 assume "w!i # v' ≤b w!i # ?w'" and "v' ≠ ε"
                 from suf_trans[OF borderD_suf[OF ‹w!i # v' ≤b w ! i # ?w'›, folded pop_i] suffix_drop]
                 have "w!i # v' ≤s w".
                 from this[unfolded suf_drop_conv, THEN nth_via_drop] mismatch
                 have "❙|w!i # v'❙| ≠ Suc bord"
                   by force
                 with up_bord[OF ‹w!i # v' ≤b w ! i # ?w'›]
                 have "❙|v'❙| < bord"  ― ‹It is shorter than the old candidate border›
                   by simp
                 from border_ConsD(2)[OF ‹w!i # v' ≤b w ! i # ?w'› ‹v' ≠ ε›]
                 have "v' ≤b ?w'".
                 from borders_compare[OF ‹?old_pref ≤b ?w'› this, unfolded  ‹❙|?old_pref❙| = bord›, unfolded ‹?old_pref = ?old_suf›, OF ‹❙|v'❙| < bord›]
                 have "v' ≤b ?old_suf". ― ‹... and therefore its border›
                 from prefix_length_le[OF max_borderP_D_max[OF all_k[rule_format, OF ‹?k < ❙|arr❙|›], unfolded ‹Suc i + ?k = ❙|w❙| - bord›, OF this]]
                 show "❙|v'❙| ≤ arr!?k" ― ‹... and hence short›
                   using len_take1[of "arr!?k", of w] by simp
               qed
            fix v assume "v ≤b w!i # ?w'"  ― ‹Now consider a border of the extended word›
            show "❙|v❙| ≤ Suc (arr ! ?k)"
            proof (cases "❙|v❙| ≤ Suc 0")
              assume "¬ ❙|v❙| ≤ Suc 0" hence "Suc 0 < ❙|v❙|" by simp
              from hd_tl_longE[OF this]
              obtain a v' where "v = a#v'" and "v' ≠ ε"
                by blast
              with borderD_pref[OF ‹v ≤b w!i # ?w'›, unfolded prefix_Cons]
              have "v = w!i#v'"
                by simp
              from extendable[OF ‹v ≤b w!i # ?w'›[unfolded ‹v = w!i#v'›] ‹v' ≠ ε›]
              show ?thesis
                by (simp add: ‹v = a # v'›)
          qed simp
        qed
        show " ∀k<❙|arr❙|. max_borderP (take (arr ! k) (drop (Suc i + k) w)) (drop (Suc i + k) w)"
          using all_k by blast
      qed
    next
      assume "bord = 0" ― ‹End of recursion.›
      show ?thesis
      proof (unfold KMP_valid_def KMP_arr.simps KMP_bord.simps KMP_pos.simps if_not_P[OF mismatch] if_P[OF ‹bord = 0›], intro conjI)
        show "❙|0 # arr❙| + i = ❙|w❙|"
          using ‹❙|arr❙| + Suc i = ❙|w❙|› by auto
        show "i + 0 < ❙|w❙|"
          by (simp add: Suc_lessD ‹Suc i < ❙|w❙|›)
        show "take 0 (drop i w) ≤p drop i w"
          by simp
        show "take 0 (drop i w) ≤s drop i w"
          using ext_suf_Cons_take_drop by simp
            ― ‹The extension is unbordered›
        have "max_borderP ε (drop i w)"
        proof(rule ccontr)
          assume "¬ max_borderP ε (drop i w)"
          then obtain a t where "max_borderP (a#t) (drop i w)"
            unfolding pop_i  using max_border_ex[of "w ! i # drop (Suc i) w"] neq_Nil_conv by metis
          from up_bord[OF max_borderP_border[OF this list.simps(3), unfolded pop_i], unfolded ‹bord = 0›]
          have "t = ε" by simp
          from max_borderP_border[OF ‹max_borderP (a#t) (drop i w)›[unfolded this] list.simps(3)]
          have "[a] ≤b drop i w".
          from borderD_pref[OF this]
          have "w!i = a"
            by (simp add: pop_i)
          moreover have "w!(❙|w❙| - 1) = a"
            using  borderD_suf[OF ‹[a] ≤b drop i w›] nth_via_drop sing_len suf_drop_conv suf_share_take suffix_drop suffix_length_le by metis
          ultimately show False
            using mismatch[unfolded ‹bord = 0›] by simp
        qed
        thus "∀v. v ≤b w ! (i - 1) # drop i w ⟶ ❙|v❙| ≤ Suc 0"
          by (metis border_add_Cons_len list.size(3))
            ― ‹The array is valid: old values from assumption, the first 0 since the extension is unbordered›
        show "∀k<❙|0 # arr❙|. max_borderP (take ((0 # arr) ! k) (drop (i + k) w)) (drop (i + k) w)"
        proof (rule allI, rule impI)
          fix k assume "k < ❙|0 # arr❙|"
          show "max_borderP (take ((0 # arr) ! k) (drop (i + k) w)) (drop (i + k) w)"
          proof (cases "0 < k")
            assume "0 < k"
            thus ?thesis using all_k
              by (metis Suc_less_eq ‹k < ❙|0 # arr❙|› add.right_neutral add_Suc_shift gr0_implies_Suc list.size(4) nth_Cons_Suc)
          next
            assume "¬ 0 < k" hence "k = 0" by simp
            thus ?thesis
              using ‹max_borderP ε (drop i w)› by auto
          qed
        qed
      qed
    qed
  qed
qed

lemma KMP_valid_max: assumes  "KMP_valid w arr bord pos" "k < ❙|w❙|"
  shows "max_borderP (take ((KMP w arr bord pos)!k) (drop k w)) (drop k w)"
  using assms
proof (induct w arr bord pos arbitrary: k rule: KMP.induct)
  case (2 w arr bord i)
  then show ?case
    unfolding KMP.simps using KMP_valid_step  by blast
qed (simp add: KMP_valid_def)

section ‹Border array›

fun border_array :: "'a list ⇒ nat list" where
  "border_array ε = ε"
| "border_array (a#w) = rev (KMP (rev (a#w)) [0] 0 (❙|a#w❙|-1))"

lemma border_array_len: "❙|border_array w❙| = ❙|w❙|"
  by (induct w, simp_all add: KMP_len)

theorem bord_array: assumes "Suc k ≤ ❙|w❙|" shows "(border_array w)!k = ❙|max_border (take (Suc k) w)❙|"
proof-
  define m where "m = ❙|w❙| - Suc k"
  hence "m < ❙|rev w❙|"
    by (simp add: Suc_diff_Suc assms less_eq_Suc_le)
  have "rev w ≠ ε" and "k < ❙|rev w❙|"
    using ‹Suc k ≤ ❙|w❙|› by auto
  hence "w = hd w#tl w"
    by simp
  from arg_cong[OF border_array.simps(2)[of "hd w" "tl w", folded this], of rev, unfolded rev_rev_ident]
  have "rev (border_array w) =  (KMP (rev w) [0] 0 (❙|w❙|-1))".
  hence "max_borderP (take (rev (border_array w)!m) (drop m (rev w))) (drop m (rev w))"
    using KMP_valid_max[rule_format, OF KMP_valid_base[OF ‹rev w ≠ ε›] ‹m < ❙|rev w❙|›] by simp
  hence  "max_border (drop m (rev w)) = take (rev (border_array w)!m) (drop m (rev w))"
    using max_borderP_max_border by blast
  hence  "❙|max_border (drop m (rev w))❙| =  rev (border_array w)!m"
    by (metis ‹m < ❙|rev w❙|› drop_all_iff leD max_border_nemp_neq nat_le_linear take_all take_len)
  thus ?thesis
    using m_def
    by (metis Suc_diff_Suc ‹k < ❙|rev w❙|› ‹m < ❙|rev w❙|› border_array_len diff_diff_cancel drop_rev length_rev less_imp_le_nat max_border_len_rev rev_nth)
qed

lemma max_border_comp [code]: "max_border w = take ((border_array w)!(❙|w❙|-1)) w"
proof (cases "w = ε")
  assume "w = ε"
  thus "max_border w = take ((border_array w)!(❙|w❙|-1)) w"
    using max_bord_take take_Nil by metis
next
  assume "w ≠ ε"
  hence "Suc (❙|w❙| - 1) ≤ ❙|w❙|" by simp
  from bord_array[OF this]
  have "(border_array w)!(❙|w❙|-1) = ❙|max_border w❙|"
    by (simp add: ‹w ≠ ε›)
  thus "max_border w = take ((border_array w)!(❙|w❙|-1)) w"
    using max_bord_take by auto
qed

value[nbe] "primitive [a,b,a]"

value "primitive [0,1,0::nat]"

value "border_array [5,4,5,3,5,5,5,4,5::nat]"

value "primitive [5,4,5,3,5,5,5,4,5::nat]"

value "primitive [5,4,5,3,5,5,5,4,5::nat]"

value[nbe] "bordered []"

value "border_array [0,1,1,0,0,0,1,1,1,1,1,0,0,0,1,1,1,0,1,1,0,0,0,1,1,1,1,1,1,0,0,0,1,1,1,0,1,1,0,0,0,1,1,1,0,1,1,0,0,0,1,1,1,0,0,1,0::nat]"

value[nbe] "border_array ε"

value "border_array [1,0,1,0,1,1,0,0::nat]"

value "max_border [1,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0,1,1,0,1,0,1,1,0,0,1,0,1,0,0,1::nat]"

thm max_border_comp ― ‹code for @{term max_border}, based on @{term border_array}›

value "bordered [1,0::nat,1,0,1,1,0,1]"

value "π [1::nat,0,1,0,1,1,0,1]"

thm  min_per_root_take ― ‹code for @{term π}, based on @{term max_border}›

value "❙|π [1::nat,0,1,0,1,1,0,1]❙|"

value "ρ [1::nat,0,1,1,0,1,1,0,1]"

thm primroot_code  ― ‹code for @{term ρ}, based on @{term π}›

value "ρ [1::nat,0,1,1,0,1,1,0]"


value[nbe] "π ε"


end