Theory NN_Layers_Matrix_Main

(***********************************************************************************
 * Copyright (c) University of Exeter, UK
 *
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * * Redistributions of source code must retain the above copyright notice, this
 *
 * * Redistributions in binary form must reproduce the above copyright notice,
 *   this list of conditions and the following disclaimer in the documentation
 *   and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * SPDX-License-Identifier: BSD-2-Clause
 ***********************************************************************************)


subsection‹Neural Network as Sequential Layers using Vector Spaces (\thy)›

theory
  NN_Layers_Matrix_Main
  imports
    NN_Lipschitz_Continuous
    NN_Layers
    Matrix_Utils
    Properties_Matrix
begin


text‹\label{sec:matrix}
  In this theory, we model feed-forward neural networks as ``computational layers'' following 
  the structure of TensorFlow~\<^cite>‹"abadi.ea:tensorflow:2015"› closely.
›



definition ‹valid_activation_tab⇩_m tab = (∀ f ∈ ran tab. ∀ xs. dim_vec xs = dim_vec (f xs))›

lemma valid_activation_preserves_dim: 
  assumes ‹valid_activation_tab⇩_m t›
  assumes ‹t n = Some f›
  shows ‹dim_vec xs = dim_vec (f xs)›
  using assms unfolding valid_activation_tab⇩_m_def
  using ranI by metis 


fun layer_consistent⇩_m :: "('a vec, 'b, 'c mat) neural_network_seq_layers ⇒ nat ⇒ ('a vec, 'b, 'c mat) layer ⇒ bool" 
  where
    ‹layer_consistent⇩_m _  nc  (In l) = (0 < units l ∧  nc = units l)›
  | ‹layer_consistent⇩_m _ nc (Out l) = (0 < units l ∧ nc = units l)›
  | ‹layer_consistent⇩_m N nc (Activation l) = ( (0 < units l ∧ nc = units l)
                                            ∧ ( ((activation_tab N) (φ l)) ≠ None  ))›
  | ‹layer_consistent⇩_m N nc (Dense l) = (0 < units l ∧ 0 < nc 
                                     ∧ dim_vec (β l) = units l 
                                     ∧ dim_col (ω l) = units l
                                     ∧ dim_row (ω l) = nc
                                     ∧ ( ((activation_tab N) (φ l)) ≠ None  ))›

fun layers_consistent⇩_m where
  ‹layers_consistent⇩_m  N _ [] = True›
|‹layers_consistent⇩_m  N w (l#ls) = ((layer_consistent⇩_m N w l) ∧ (layers_consistent⇩_m N (out_deg_layer l)  ls))›

lemma layer_consistent⇩_m_activation_tab_const: 
  ‹layer_consistent⇩_m N nc l = layer_consistent⇩_m ⦇layers = ls, activation_tab = activation_tab N⦈ nc l›
  by(cases l, simp_all) 

lemma layers_consistent⇩_m_activation_tab_const: 
  ‹layers_consistent⇩_m N nc ls = layers_consistent⇩_m ⦇layers = ls', activation_tab = activation_tab N⦈ nc ls›
proof(induction ls arbitrary: nc)
  case Nil
  then show ?case by simp
next
  case (Cons a ls)
  then show ?case 
    by(simp add: layer_consistent⇩_m_activation_tab_const[symmetric])
qed

lemma layers_consistent⇩_mAll: 
  assumes ‹layers_consistent⇩_m N inputs (layers N)›
  shows ‹∀ l ∈ set (layers N). ∃ n . layer_consistent⇩_m N n l›
proof(cases N)
  case (fields layers activation_tab) note i = this
  then show ?thesis 
    apply(insert assms, simp add: i, safe)
    apply(thin_tac "N = ⦇layers = layers, activation_tab = activation_tab⦈")
    apply(subst layer_consistent⇩_m_activation_tab_const[of _ _ _ "[]"])
    apply(induction "layers" arbitrary:inputs activation_tab)
    apply(simp)
    using layers_consistent⇩_m_activation_tab_const layer_consistent⇩_m_activation_tab_const[of _ _ _ "[]"] 
    apply(clarsimp)[1] 
    using layer_consistent⇩_m_activation_tab_const by fastforce
qed


lemma layers_consistent⇩_mAll': 
  assumes ‹layers_consistent⇩_m N (in_deg_NN N) (layers N)›
  shows ‹∀ l ∈ set (layers N). ∃ n . layer_consistent⇩_m N n l›
  using assms layers_consistent⇩_mAll by blast

locale neural_network_sequential_layers⇩_m = 
  fixes N::‹('a::comm_ring Matrix.vec, 'b, 'a Matrix.mat) neural_network_seq_layers› 
  assumes head_is_In:  ‹isIn (hd (layers N))›
    and     last_is_Out: ‹isOut (last (layers N))›
    and     layer_internal: ‹list_all isInternal  ((tl o butlast) (layers N))›
    and     activation_tab_valid: ‹valid_activation_tab⇩_m (activation_tab N)›  
    and     layer_valid:  ‹layers_consistent⇩_m N (in_deg_NN N) (layers N)› 
begin

lemma layers_nonempty: ‹layers N ≠ []›
  by (metis hd_Nil_eq_last head_is_In isIn.elims(2) isOut.simps(2) last_is_Out) 

lemma min_length_layers_two: ‹1 < length (layers N)›
  by (metis (no_types, lifting) One_nat_def add_0 append_Nil append_butlast_last_id 
      append_eq_append_conv head_is_In isIn.simps(2) isOut.elims(2) last_is_Out 
      layers_nonempty length_0_conv less_one linorder_neqE_nat list.sel(1) list.size(4)) 

lemma layers_structure: ‹∃ il ol ls. layers N = (In il)#ls@[Out ol]›
  by (metis (no_types, lifting) append_butlast_last_id head_is_In isIn.elims(2) isOut.elims(2) 
      last.simps last_is_Out layer.distinct(1) layers_nonempty list.exhaust list.sel(1)) 
end 


fun predict⇩_l⇩_a⇩_y⇩_e⇩_r_⇩_m::‹('a::comm_ring Matrix.vec, 'b, 'a Matrix.mat) neural_network_seq_layers ⇒ ('a Matrix.vec) option ⇒ ('a Matrix.vec, 'b, 'a Matrix.mat) layer ⇒ ('a Matrix.vec) option› where
  ‹predict⇩_l⇩_a⇩_y⇩_e⇩_r_⇩_m N (Some vs) (In l) = (if layer_consistent⇩_m N (dim_vec vs) (In l) then Some vs else None) ›
| ‹predict⇩_l⇩_a⇩_y⇩_e⇩_r_⇩_m N (Some vs) (Out l) = (if layer_consistent⇩_m N (dim_vec vs) (Out l) then Some vs else None) ›
| ‹predict⇩_l⇩_a⇩_y⇩_e⇩_r_⇩_m N (Some vs) (Dense pl) = (if layer_consistent⇩_m N (dim_vec vs) (Dense pl) then
                                             (case activation_tab N (φ pl) of
                                               None ⇒ None
                                             | Some f ⇒ Some (f ((vs ⇩_v*  ω pl) + β pl) )
                                            ) else None )›
| ‹predict⇩_l⇩_a⇩_y⇩_e⇩_r_⇩_m N (Some vs) (Activation pl) = (if layer_consistent⇩_m N (dim_vec vs) (Activation pl) then
                                                 (case activation_tab N (φ pl) of
                                                   None ⇒ None
                                                 | Some f ⇒ Some (f vs)
                                               ) else None )›
| ‹predict⇩_l⇩_a⇩_y⇩_e⇩_r_⇩_m _ None _ =  None ›

fun  
  predict⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m_impl ::‹('a::{comm_ring} Matrix.vec, 'b, 'a Matrix.mat) neural_network_seq_layers ⇒ 'a Matrix.vec ⇒ ('a Matrix.vec, 'b, 'a Matrix.mat) layer ⇒ 'a Matrix.vec›  
  where
    ‹predict⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m_impl N vs (In l)  = vs›
  | ‹predict⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m_impl N vs (Out l) = vs›
  | ‹predict⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m_impl N vs (Dense pl) = ((the (activation_tab N (φ pl))) ((vs ⇩_v*  ω pl) + β pl))›
  | ‹predict⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m_impl N vs (Activation pl) = ( the (activation_tab N (φ pl)) vs)›


lemma predict_layer_Some:
  assumes ‹(layer_consistent⇩_m N (dim_vec xs) l)› 
  shows ‹(predict⇩_l⇩_a⇩_y⇩_e⇩_r_⇩_m N (Some xs) l ≠ None) ›  
proof(cases l)
  case (In x1)
  then show ?thesis using assms by(simp)
next
  case (Out x2)
  then show ?thesis using assms by simp 
next
  case (Dense x3)
  then show ?thesis using assms by force
next
  case (Activation x4)
  then show ?thesis using assms by force
qed

definition ‹predict⇩_s⇩_e⇩_q⇩_{_}⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m N inputs = foldl (predict⇩_l⇩_a⇩_y⇩_e⇩_r_⇩_m N) (Some inputs) (layers N)›
definition ‹predict⇩_s⇩_e⇩_q⇩_{_}⇩_l⇩_a⇩_y⇩_e⇩_r_⇩_m_impl N inputs = foldl (predict⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m_impl N) inputs (layers N)›

definition ‹predict⇩_s⇩_e⇩_q⇩_{_}⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m' N inputs = map_option list_of_vec (predict⇩_s⇩_e⇩_q⇩_{_}⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m N (vec_of_list inputs))›

lemma predict⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_l_impl_activation_tab_const: ‹predict⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m_impl N = predict⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m_impl ⦇layers = l, activation_tab = activation_tab N⦈›
  apply(rule ext)+
  using neural_network_seq_layers.select_convs predict⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m_impl.elims predict⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m_impl.simps
  by (smt (verit))


lemma layers_consistent⇩_m_layersN_const: 
  ‹layers_consistent⇩_m N = layers_consistent⇩_m ⦇layers = ls', activation_tab = activation_tab N⦈›
  using layers_consistent⇩_m_activation_tab_const by blast 


lemma predit⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m_impl_eq:
  assumes ‹layer_consistent⇩_m N (dim_vec inputs) l›
  shows   ‹predict⇩_l⇩_a⇩_y⇩_e⇩_r_⇩_m N (Some inputs) l = Some (predict⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m_impl N inputs l)›
proof(cases "l")
  case (In x1)
  then show ?thesis using assms by(simp)
next
  case (Out x2)
  then show ?thesis using assms by(simp)
next
  case (Dense x3)
  then show ?thesis 
    using Dense assms by(force)
next
  case (Activation x4)
  then show ?thesis using Activation assms by(force)
qed

lemma valid_activation_preserves_length: 
  assumes ‹valid_activation_tab⇩_m t›
  assumes ‹t n = Some f›
  shows ‹dim_vec xs = dim_vec (f xs)›
  using assms unfolding valid_activation_tab⇩_m_def
  by (simp add: ranI) 


lemma fold_predict_m_strict: ‹(foldl (predict⇩_l⇩_a⇩_y⇩_e⇩_r_⇩_m N) None ls) = None›
  by(induction "ls", simp_all)

lemmas [nn_layer] = predict⇩_l⇩_a⇩_y⇩_e⇩_r_⇩_m.simps predict_layer_Some fold_predict_m_strict

lemma predict⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m_activation_tab: assumes "activation_tab N = activation_tab N'" shows
  ‹predict⇩_l⇩_a⇩_y⇩_e⇩_r_⇩_m N x xs = predict⇩_l⇩_a⇩_y⇩_e⇩_r_⇩_m N' x xs›
proof(cases xs)
  case (In x1)
  then show ?thesis 
    by (metis layer_consistent⇩_m.simps(1) option.collapse predict⇩_l⇩_a⇩_y⇩_e⇩_r_⇩_m.simps(1) predict⇩_l⇩_a⇩_y⇩_e⇩_r_⇩_m.simps(5)) 
next
  case (Out x2)
  then show ?thesis 
    by (metis layer_consistent⇩_m.simps(2) option.exhaust predict⇩_l⇩_a⇩_y⇩_e⇩_r_⇩_m.simps(2) predict⇩_l⇩_a⇩_y⇩_e⇩_r_⇩_m.simps(5)) 
next
  case (Dense x3)
  then show ?thesis proof(cases x)
    case None
    then show ?thesis by simp
  next
    case (Some a)
    then show ?thesis using Dense assms by force  
  qed 
next
  case (Activation x4)
  then show ?thesis proof(cases x)
    case None
    then show ?thesis by simp
  next
    case (Some a)
    then show ?thesis using Activation assms by force  
  qed 
qed

lemma predict⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m_activation_tab_const: ‹predict⇩_l⇩_a⇩_y⇩_e⇩_r_⇩_m N = predict⇩_l⇩_a⇩_y⇩_e⇩_r_⇩_m ⦇layers = l, activation_tab = activation_tab N⦈›
  apply(rule ext)+
  by (metis neural_network_seq_layers.select_convs(2) predict⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m_activation_tab) 

context neural_network_sequential_layers⇩_m begin 
lemma img_None_1: assumes ‹((predict⇩_s⇩_e⇩_q⇩_{_}⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m N xs) ≠ None)› shows ‹(dim_vec xs = (in_deg_NN N))›
proof - 
  have ‹((predict⇩_s⇩_e⇩_q⇩_{_}⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m N xs) ≠ None) ⟶ (dim_vec xs = (in_deg_NN N))›
  proof(cases N)
    case (fields layers' activation_tab') note i = this
    then show ?thesis 
      unfolding predict⇩_s⇩_e⇩_q⇩_{_}⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m_def
      using i layers_nonempty
    proof(induction layers')
      case Nil
      then show ?case by simp
    next
      case (Cons a layers')
      then show ?case proof(cases a)
        case (In x1)
        then show ?thesis using Cons 
          by (simp add: fold_predict_m_strict in_deg_NN_def) 
      next
        case (Out x2)
        then show ?thesis using Cons 
          by (simp add: fold_predict_m_strict in_deg_NN_def) 
      next
        case (Dense x3)
        then show ?thesis using Cons neural_network_sequential_layers⇩_m_axioms i
          apply(simp add: fold_predict_m_strict in_deg_NN_def)
          using Cons by (metis isIn.simps(3) list.sel(1) neural_network_seq_layers.select_convs(1) 
              neural_network_sequential_layers⇩_m.head_is_In) 
      next
        case (Activation x4)
        then show ?thesis using Cons neural_network_sequential_layers⇩_m_axioms i
          by(simp add: fold_predict_m_strict in_deg_NN_def)
      qed
    qed
  qed
  then show ?thesis using assms by simp 
qed

lemma img_None_2': 
  assumes  a0: ‹layers' ≠ [] › 
    and a4: ‹valid_activation_tab⇩_m activation_tab'›
    and a1:  ‹layers_consistent⇩_m ⦇layers = [], activation_tab=activation_tab' ⦈ (dim_vec xs) layers'› 
  shows ‹foldl (predict⇩_l⇩_a⇩_y⇩_e⇩_r_⇩_m ⦇layers = [], activation_tab = activation_tab'⦈) (Some xs) layers' ≠ None›
proof(insert assms, induction "layers'" arbitrary: xs rule:list_nonempty_induct)
  case (single l)
  then show ?case 
  proof(cases l)
    case (In x1)
    then show ?thesis 
      using single by(simp)
  next
    case (Out x2)
    then show ?thesis 
      using single by(force)
  next
    case (Dense x3)
    then show ?thesis 
      using single by(force)
  next
    case (Activation x4)
    then show ?thesis 
      using single by(force)
  qed
next
  case (cons l ls)
  then show ?case 
  proof(cases l)
    case (In x1)
    then show ?thesis using cons by force 
  next
    case (Out x2)
    then show ?thesis using cons by force 
  next
    case (Dense x3)
    then show ?thesis 
      using cons assms apply(clarsimp)[1]
      apply(subst cons.IH[simplified], simp_all)
      using valid_activation_preserves_dim by force 
  next
    case (Activation x4)
    then show ?thesis 
      using cons assms apply(clarsimp)[1]
      apply(subst cons.IH[simplified], simp_all) 
      by (metis valid_activation_preserves_dim) 
  qed
qed

lemma img_None_2: 
  assumes ‹dim_vec xs = in_deg_NN N› 
  shows ‹((predict⇩_s⇩_e⇩_q⇩_{_}⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m N xs) ≠ None)›
proof(cases N)
  case (fields layers activation_tab)
  then show ?thesis 
    using assms layer_valid apply(simp)
    unfolding neural_network_sequential_layers⇩_m_def  predict⇩_s⇩_e⇩_q⇩_{_}⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m_def
    apply(subst         predict⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m_activation_tab_const[of _ "[]"]) 
    apply(simp)
    apply(subst img_None_2'[of layers activation_tab xs, simplified], simp_all)
    using layers_nonempty apply force 
    using activation_tab_valid apply fastforce 
    using assms layer_valid unfolding in_deg_NN_def apply(simp) 
    using layers_consistent⇩_m_activation_tab_const by fastforce 
qed

lemma img_None: ‹((predict⇩_s⇩_e⇩_q⇩_{_}⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m N xs) ≠ None) = (dim_vec xs = in_deg_NN N)›
  using img_None_1 img_None_2 by blast 

lemma img_Some: ‹(∃ y. (predict⇩_s⇩_e⇩_q⇩_{_}⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m N xs) = Some y) = (dim_vec xs = in_deg_NN N)›
  using img_None by simp 

lemma img_deg: ‹(∃ y. ((predict⇩_s⇩_e⇩_q⇩_{_}⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m N xs) = Some y) ⟶ (dim_vec y = out_deg_NN N))›
proof(cases N)
  case (fields layers' activation_tab')
  then show ?thesis 
    unfolding fields predict⇩_s⇩_e⇩_q⇩_{_}⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m_def
    using fields layers_nonempty
  proof(induction layers' arbitrary: xs rule:rev_induct)
    case Nil
    then show ?case by simp
  next
    case (snoc a layers')
    then show ?case proof(cases a)
      case (In x1)
      then show ?thesis using snoc
        using dim_vec by blast
    next
      case (Out x2)
      then show ?thesis using snoc
        apply (simp add: fold_predict_m_strict out_deg_NN_def neural_network_sequential_layers⇩_m_def) 
        using dim_vec_first by blast 
    next
      case (Dense x3)
      then show ?thesis using Cons neural_network_sequential_layers⇩_m_axioms
        apply(simp add: fold_predict_m_strict in_deg_NN_def)
        using snoc 
        by (simp add: neural_network_sequential_layers⇩_m_def)
    next
      case (Activation x4)
      then show ?thesis using Cons neural_network_sequential_layers⇩_m_axioms
        apply(simp add: fold_predict_m_strict in_deg_NN_def)
        using snoc
        using snoc 
        by (simp add: neural_network_sequential_layers⇩_m_def)
    qed
  qed
qed

lemma aux_length: " 0 < units x3 ⟹
         0 < dim_vec inputs ⟹
         dim_vec (β x3) = units x3 ⟹
         dim_col (ω x3) = units x3 ⟹
         dim_row (ω x3) = dim_vec inputs ⟹
         layers_consistent⇩_m ⦇layers = [], activation_tab = atab⦈ (units x3) xs ⟹
         atab (φ x3) = Some y ⟹ valid_activation_tab⇩_m atab ⟹ layers_consistent⇩_m ⦇layers = [], activation_tab = atab⦈ (dim_vec (y (inputs ⇩_v* ω x3 + β x3))) xs"
  using valid_activation_preserves_length[of atab "(φ x3)" "y", symmetric] by simp

lemma pred_mat_impl_aux': 
  assumes ‹ls ≠ []›
    and     layer_valid:  ‹layers_consistent⇩_m ⦇layers = [], activation_tab = atab⦈ (dim_vec inputs) ls› 
    and     activation_tab_valid: ‹valid_activation_tab⇩_m atab›  
  shows ‹
    foldl (predict⇩_l⇩_a⇩_y⇩_e⇩_r_⇩_m ⦇layers = [], activation_tab = atab⦈) (Some inputs) ls =
    Some (foldl (predict⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m_impl ⦇layers = [], activation_tab = atab⦈) inputs ls)
  ›
proof(insert assms(1) assms(2), induction "ls"  arbitrary:inputs rule:list_nonempty_induct)
  case (single x)
  then show ?case proof(cases x)
    case (In x1)
    then show ?thesis 
      using In single by force 
  next
    case (Out x2)
    then show ?thesis 
      using Out single.prems by force
  next
    case (Dense x3)
    then show ?thesis 
      using Dense single by force
  next
    case (Activation x4)
    then show ?thesis 
      using Activation single by force
  qed
next
  case (cons x xs)
  then show ?case proof(cases x)
    case (In x1)
    then show ?thesis 
      by (metis (no_types, lifting) cons.IH cons.prems foldl_Cons layer_consistent⇩_m.simps(1) layers_consistent⇩_m.simps(2) 
          layers_consistent⇩_m_layersN_const neural_network_seq_layers.select_convs(2) out_deg_layer.simps(1) predict⇩_l⇩_a⇩_y⇩_e⇩_r_⇩_m.simps(1) predict⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m_impl.simps(1)) 
  next
    case (Out x2)
    then show ?thesis 
      apply(clarsimp simp add:cons assms)[1] 
     using cons.IH cons.prems layers_consistent⇩_m.simps(2) out_deg_layer.simps(2) 
       cons.prems cons(2,3) 
     by fastforce 
  next
    case (Dense x3)
    then show ?thesis 
      apply(thin_tac "x = Dense x3")
      using cons.prems apply(clarsimp simp add:Dense)[1]
      apply(subst cons.IH, simp_all)
      apply(insert assms(3))
      by(subst aux_length, simp_all)  
  next
    case (Activation x4)
    then show ?thesis 
      apply(clarsimp simp add:cons assms)[1]
      using assms(3) cons.IH cons.prems layers_consistent⇩_m.simps(2) 
            layers_consistent⇩_m_layersN_const neural_network_seq_layers.select_convs(2) out_deg_layer.simps(3) 
            valid_activation_preserves_length 
        by (smt (z3) layer_consistent⇩_m.simps(3) neq0_conv option.case_eq_if option.sel) 
     
  qed
qed


lemma pred_mat_impl_aux:
  assumes layer_valid:  ‹layers_consistent⇩_m ⦇layers = ls, activation_tab = atab⦈ (dim_vec inputs) ls› 
    and     activation_tab_valid: ‹valid_activation_tab⇩_m atab›  
  shows ‹
    foldl (predict⇩_l⇩_a⇩_y⇩_e⇩_r_⇩_m ⦇layers = ls, activation_tab = atab⦈) (Some inputs) ls =
    Some (foldl (predict⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m_impl ⦇layers = ls, activation_tab = atab⦈) inputs ls)
  ›
proof(cases ls)
  case Nil
  then show ?thesis by simp
next
  case (Cons a list)
  then show ?thesis 
    using pred_mat_impl_aux' assms layers_consistent⇩_m_activation_tab_const list.discI 
      neural_network_seq_layers.select_convs(2) predict⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m_activation_tab_const
    by (metis predict⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m_activation_tab_const predict⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_l_impl_activation_tab_const) 
qed

lemma predict⇩_s⇩_e⇩_q⇩_{_}⇩_l⇩_a⇩_y⇩_e⇩_r_⇩_m_code [code]:
  assumes ‹in_deg_NN N = dim_vec inputs›
  shows ‹predict⇩_s⇩_e⇩_q⇩_{_}⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m N inputs = Some (predict⇩_s⇩_e⇩_q⇩_{_}⇩_l⇩_a⇩_y⇩_e⇩_r_⇩_m_impl N inputs)›
proof(cases N)
  case (fields ls atab)
  then show ?thesis 
    unfolding predict⇩_s⇩_e⇩_q⇩_{_}⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m_def predict⇩_s⇩_e⇩_q⇩_{_}⇩_l⇩_a⇩_y⇩_e⇩_r_⇩_m_impl_def
    using assms apply(simp,subst pred_mat_impl_aux)
    using layer_valid apply force
    using activation_tab_valid apply force
    by simp
qed

lemma predict⇩_s⇩_e⇩_q⇩_{_}⇩_l⇩_a⇩_y⇩_e⇩_r_⇩_m_code' [code]:
  assumes ‹in_deg_NN N = dim_vec inputs›
  shows ‹the (predict⇩_s⇩_e⇩_q⇩_{_}⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m N inputs) = predict⇩_s⇩_e⇩_q⇩_{_}⇩_l⇩_a⇩_y⇩_e⇩_r_⇩_m_impl N inputs›
  by (simp add: assms predict⇩_s⇩_e⇩_q⇩_{_}⇩_l⇩_a⇩_y⇩_e⇩_r_⇩_m_code)

end 


ML‹
  val layer_config_matrix =  {
         InC    = @{const "NN_Layers.layer.In"(‹real Matrix.vec›, ‹activation⇩_m⇩_u⇩_l⇩_t⇩_i›, ‹real Matrix.mat›)},
         OutC   = @{const "NN_Layers.layer.Out"(‹real Matrix.vec›, ‹activation⇩_m⇩_u⇩_l⇩_t⇩_i›, ‹real Matrix.mat›)},
         DenseC = @{const "NN_Layers.layer.Dense"(‹activation⇩_m⇩_u⇩_l⇩_t⇩_i›,‹real Matrix.vec›, ‹real Matrix.mat›)},
         InOutRecordC = @{const "NN_Layers.InOutRecord.InOutRecord_ext"(‹(activation⇩_m⇩_u⇩_l⇩_t⇩_i, (real Matrix.vec, real Matrix.mat, unit)LayerRecord_ext) ActivationRecord_ext›)},
         LayerRecordC = @{const "LayerRecord_ext"(‹real Matrix.vec›, ‹real Matrix.mat›, ‹unit›)},
         ActivationRecordC = @{const "ActivationRecord_ext" (‹activation⇩_m⇩_u⇩_l⇩_t⇩_i›,‹(real Matrix.vec, real Matrix.mat, unit) LayerRecord_ext›)},
         biasT_conv = (fn x => @{const "vec_of_list"(‹real›)} $ x),
         weightsT_conv = (fn x => fn dim => @{const "mat_of_cols_list"(‹real›)} $ HOLogic.mk_number \<^typ>‹nat› dim $ x),
         ltype = @{typ ‹(real Matrix.vec, activation⇩_m⇩_u⇩_l⇩_t⇩_i, real Matrix.mat) layer›},
         activation_term = Activation_Term.MultiMatrix,
         layersT = @{typ ‹ ((real Matrix.vec, activation⇩_m⇩_u⇩_l⇩_t⇩_i, real Matrix.mat) layer) list›},
         phiT    = @{typ ‹activation⇩_m⇩_u⇩_l⇩_t⇩_i ⇒ (real Matrix.vec ⇒ real Matrix.vec) option›}, 
         layer_extC = @{const ‹NN_Layers.neural_network_seq_layers.neural_network_seq_layers_ext› (‹real Matrix.vec›,‹activation⇩_m⇩_u⇩_l⇩_t⇩_i›, ‹real Matrix.mat›, ‹unit›)},
         layer_def = @{thm neural_network_sequential_layers⇩_m_def},
         valid_activation_tab = @{thm valid_activation_tab⇩_m_def},
         locale_name = "neural_network_sequential_layers⇩_m" 
  }:Convert_TensorFlow_Seq_Layer.layer_config
  val _ = Theory.setup
           (Convert_TensorFlow_Symtab.add_encoding("seq_layer_matrix", 
                    Convert_TensorFlow_Seq_Layer.def_seq_layer_nn layer_config_matrix))
›

end