Theory NN_Layers_List_Main

(***********************************************************************************
 * Copyright (c) University of Exeter, UK
 *
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * * Redistributions of source code must retain the above copyright notice, this
 *
 * * Redistributions in binary form must reproduce the above copyright notice,
 *   this list of conditions and the following disclaimer in the documentation
 *   and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * SPDX-License-Identifier: BSD-2-Clause
 ***********************************************************************************)

subsection‹Neural Network as Sequential Layers using Lists (\thy)›
theory 
  NN_Layers_List_Main
  imports
    Main
    NN_Layers
    "HOL-Library.Interval"
    Properties
begin 
text‹\label{sec:list}›

definition valid_activation_tabl tab = ( f  ran tab.  xs. length xs = length (f xs))

lemma valid_activation_preserves_length: 
  assumes valid_activation_tabl t
  assumes t n = Some f
  shows length xs = length (f xs)
  using assms unfolding valid_activation_tabl_def
  by (simp add: ranI) 

fun  layer_consistentl :: "('a list, 'b, 'a list list) neural_network_seq_layers  nat  ('a list, 'b , 'a list list) layer  bool"  where
  layer_consistentl _ nc (In l) = (0 < units l  nc = units l)
| layer_consistentl _ nc (Out l) = (0 < units l  nc = units l)
| layer_consistentl N nc (Activation l) = ( (0 < units l  nc =  units l) 
                                              ( ((activation_tab N) (φ l))  None  ))
| layer_consistentl N nc (Dense l) =  (0 < units l  0 < nc 
                                              length (β l) = units l 
                                              length (ω l) = units l
                                              (rset (ω l). length r = nc)
                                              ( ((activation_tab N) (φ l))  None  ))

fun layers_consistentl where
  layers_consistentl N  _ [] = True
 |layers_consistentl  N w (l#ls) = ((layer_consistentl N w l)  (layers_consistentl N (out_deg_layer l)  ls))


lemma layer_consistentl_in_deg_layer:
  assumes "layer_consistentl N nc l " 
  shows "in_deg_layer l = nc "
proof(cases l)
  case (In x1)
  then show ?thesis using assms by simp
next
  case (Out x2)
  then show ?thesis using assms by simp
next
  case (Dense x3)
  then show ?thesis using assms by simp
next
  case (Activation x4)
  then show ?thesis using assms by simp
qed

lemma layers_consistentl_in_deg:
  assumes "(layers_consistentl N nc (l#ls'))"
shows "in_deg_layer l = nc"
proof(insert assms, cases l)
  case (In x1)
  then show ?thesis 
    using assms by simp
next
  case (Out x2)
  then show ?thesis using assms by simp
next
  case (Dense x3)
  then show ?thesis using assms by simp
next
  case (Activation x4)
  then show ?thesis using assms by simp
qed


lemma layer_consistentl_activation_tab_const:
  layer_consistentl N nc l = layer_consistentl layers = ls, activation_tab = activation_tab N nc l
  by(cases l, simp_all) 


lemma layers_consistentl_activation_tab_const: 
  layers_consistentl N nc ls = layers_consistentl layers = ls', activation_tab = activation_tab N nc ls
proof(induction ls arbitrary: nc)
  case Nil
  then show ?case by simp
next
  case (Cons a ls)
  then show ?case 
    by(simp add: layer_consistentl_activation_tab_const[symmetric])
qed

lemma layers_consistentl_layersN_const: 
  layers_consistentl N = layers_consistentl layers = ls', activation_tab = activation_tab N
  using layers_consistentl_activation_tab_const by blast 

lemma layers_consistentlAll:
  assumes layers_consistentl N inputs (layers N)
  shows  l  set (layers N).  n . layer_consistentl N n l
proof(cases N)
  case (fields layers activation_tab)
  then have "lset layers. n. layer_consistentl layers = layers, activation_tab = activation_tab n l"
  proof(insert assms[simplified fields, simplified], induction "layers" arbitrary:inputs activation_tab N)
    case Nil
    then show ?case by simp
  next
    case (Cons a layers)
    then show ?case 
      apply(simp)
      using Cons fields layers_consistentl_activation_tab_const layer_consistentl_activation_tab_const
        neural_network_seq_layers.select_convs(2) 
      by metis
  qed
  then 
  show ?thesis
    using fields by simp 
qed

lemma layers_consistentlAll': 
  assumes layers_consistentl N (in_deg_NN N) (layers N)
  shows  l  set (layers N).  n . layer_consistentl N n l
  using assms layers_consistentlAll by blast

lemma layers_consistentl_layer_consistentl_Dense:
  assumes layers_consistentl N (in_deg_NN N) (layers N) 
    and "(Dense x3)  set (layers N )"
  shows layer_consistentl N (length (ω x3 ! 0)) (Dense x3)
  using assms layer_consistentl.simps(4)[of N "(length (ω x3 ! 0))" x3] 
  by (metis layer_consistentl.simps(4) layers_consistentlAll' nth_mem)

lemma layers_consistentl_layer_consistentl_Activation:
  assumes layers_consistentl N (in_deg_NN N) (layers N) 
    and "(Activation x3)  set (layers N )"
  shows layer_consistentl N (units x3) (Activation x3)
  using assms layer_consistentl.simps(3)[of N _ x3] 
  by (meson layer_consistentl.simps(3) layers_consistentlAll)


locale neural_network_sequential_layersl = 
  fixes N::('a::comm_ring list, 'b, 'a list list) neural_network_seq_layers
  assumes head_is_In:  isIn (hd (layers N))
    and     last_is_Out: isOut (last (layers N))
    and     layer_internal: list_all isInternal  ((tl o butlast) (layers N))
    and     activation_tab_valid: valid_activation_tabl (activation_tab N)  
    and     layer_valid:  layers_consistentl N (in_deg_NN N) (layers N) 
begin              
lemma layers_nonempty: layers N  []
  by (metis hd_Nil_eq_last head_is_In isIn.elims(2) isOut.simps(2) last_is_Out) 

lemma min_length_layers_two: 1 < length (layers N)
  by (metis (no_types, lifting) One_nat_def add_0 append_Nil append_butlast_last_id 
      append_eq_append_conv head_is_In isIn.simps(2) isOut.elims(2) last_is_Out 
      layers_nonempty length_0_conv less_one linorder_neqE_nat list.sel(1) list.size(4)) 

lemma layers_structure:  il ol ls. layers N = (In il)#ls@[Out ol] 
  by (metis (no_types, lifting) append_butlast_last_id head_is_In isIn.elims(2) isOut.elims(2) 
      last.simps last_is_Out layer.distinct(1) layers_nonempty list.exhaust list.sel(1)) 

end 


text ‹We use locales (i.e., Isabelle's mechanism for parametric theories) to capture
fundamental concepts that are shared between different models of neural
networks. 

We start by defining a locale @{term "neural_network_sequential_layersl"} to
describe the common concepts of all neural network models that use layers are core building 
blocks. For our representation to be a well-formed sequential model, we require that the 
first layer is an input layer and the last layer is an output layer›



fun  predictlayer_l ::('a::{monoid_add,times} list, 'b, 'a list list) neural_network_seq_layers  ('a list) option  ('a list, 'b, 'a list list) layer  ('a list ) option  
  where
    predictlayer_l N (Some vs) (In l)  = (if layer_consistentl N (length vs) (In l) then Some vs else None)
  | predictlayer_l N (Some vs) (Out l) = (if layer_consistentl N (length vs) (Out l) then Some vs else None)
  | predictlayer_l N (Some vs) (Dense pl) = (if layer_consistentl N (length vs) (Dense pl)  then 
                                              (let
                                                in_w_pairs = map (λ e. zip vs e) (ω pl);
                                                wsums      = map (λ vs'. (x,y)vs'. x*y) in_w_pairs;
                                                wsum_bias  = map (λ (s,b). s+b) (zip wsums (β pl))
                                              in 
                                              (case activation_tab N (φ pl) of 
                                                  None    None 
                                                | Some f  Some (f wsum_bias )))
                                            else None)
  | predictlayer_l N (Some vs) (Activation pl) = (if layer_consistentl N (length vs) (Activation pl) then
                                                  (case activation_tab N (φ pl) of
                                                     None  None
                                                   | Some f  Some (f vs))
                                                else None)
  | predictlayer_l _ None _ = None
lemma length_out: predictlayer_l N' vs (Out l) = Some res  length(res) = (units l)
  by(cases vs, auto split:if_splits)

fun  
  predictlayer_l_impl ::('a::{monoid_add,times} list, 'b, 'a list list) neural_network_seq_layers  'a list  ('a list, 'b, 'a list list) layer  'a list  
  where
    predictlayer_l_impl N vs (In l)  = vs
  | predictlayer_l_impl N vs (Out l) = vs
  | predictlayer_l_impl N vs (Dense pl) = (let
                                             in_w_pairs = map (λ e. zip vs e) (ω pl);
                                             wsums      = map (λ vs'. (x,y)vs'. x*y) in_w_pairs;
                                             wsum_bias  = map (λ (s,b). s+b) (zip wsums (β pl));
                                             φl = the (activation_tab N (φ pl)) 
                                           in 
                                             φl wsum_bias)

| predictlayer_l_impl N vs (Activation pl) = (let
                                             φl = the (activation_tab N (φ pl)) 
                                           in 
                                             φl vs)

definition predictseq_layer_l N inputs = foldl (predictlayer_l N) (Some inputs) (layers N)
definition predictseq_layer_l_impl N inputs = foldl (predictlayer_l_impl N) inputs (layers N)


lemma predict_layer_Some:
  assumes (layer_consistentl N (length xs) l) 
  shows (predictlayer_l N (Some xs) l  None)  
proof(cases l)
  case (In x1)
  then show ?thesis using assms by(simp)
next
  case (Out x2)
  then show ?thesis using assms by simp 
next
  case (Dense x3)
  then show ?thesis using assms by force
next
  case (Activation x4)
  then show ?thesis using assms by force
qed

text ‹The input and output layers of our network pass the inputs directly onto the
next layer without any calculation performed; this can be seen in the first two
cases of the @{const "predictlayer_l"} function. The dense layer of the network is where 
the weighted sum is calculated, case three in @{const "predictlayer_l"}, where first the 
input weights are transposed (@{term "in_weights"}), then zipped with their input value
(@{term "in_w_pairs"}), before calculating the weighted sum (@{term" wsums"}), adding the bias (@{term "wsum_bias"}), 
and finally applying the activation function on the result, producing the output for a
single dense layer. To calculate the prediction of the network given a set of
inputs we then fold @{const "predictlayer_l"} over the network from left to right 
(@{const "foldl"}) in @{const "predictlayer_l"}. ›

lemma fold_predict_l_strict: (foldl (predictlayer_l N) None ls) = None
  by(induction "ls", simp_all)


lemmas [nn_layer] = predictlayer_l.simps predict_layer_Some fold_predict_l_strict

lemma predictlayer_l_activation_tab: assumes "activation_tab N = activation_tab N'" shows
  predictlayer_l N x xs = predictlayer_l N' x xs
proof(cases xs)
  case (In x1)
  then show ?thesis proof(cases "x")
    case None
    then show ?thesis using In assms by(simp) 
  next
    case (Some a)
    then show ?thesis using In assms by(simp) 
  qed  
next
  case (Out x2)
  then show ?thesis proof(cases "x")
    case None
    then show ?thesis using Out assms by(simp) 
  next
    case (Some a)
    then show ?thesis using Out assms by(simp) 
  qed  
next
  case (Dense x3)
  then show ?thesis proof(cases x)
    case None
    then show ?thesis by simp
  next
    case (Some a)
    then show ?thesis by(auto simp add:assms Dense)
  qed
next
  case (Activation x4)
  then show ?thesis proof(cases "x")
    case None
    then show ?thesis using Activation assms by(simp) 
  next
    case (Some a)
    then show ?thesis using Activation assms by(simp) 
  qed  
qed

lemma predictlayer_l_activation_tab_const: predictlayer_l N = predictlayer_l layers = l, activation_tab = activation_tab N
  apply(rule ext)+
  by (metis neural_network_seq_layers.select_convs(2) predictlayer_l_activation_tab)

lemma input_layer:
  assumes y = length i and 0 < y
  shows predictlayer_l N (Some i) (In  name = x, units = y) = (Some i)
  using assms 
  by simp

lemma output_layer:
  assumes y = length i and 0 < y
  shows predictlayer_l N (Some i) (Out  name = x, units = y) = (Some i)
  using assms 
  by simp

lemma dense_layer:
  shows predictlayer_l N (Some i) (Dense name = x, units = y, ActivationRecord.φ = p, LayerRecord.β = b, ω = w) = 
        (if layer_consistentl N (length i) (Dense name = x, units = y, ActivationRecord.φ = p, LayerRecord.β = b, ω = w) then 
          (let in_w_pairs = map (λ e. zip i e) w;
               wsums      = map (λ vs'. (x,y)vs'. x*y) in_w_pairs;
               wsum_bias  = map (λ (s,b). s+b) (zip wsums b)
          in 
           (case activation_tab N p of 
                  None    None
                | Some f  Some (f wsum_bias )))
           else None)
  by simp 


lemma dense_layer':
  assumes activation_tab N p = Some a
  shows predictlayer_l N (Some i) (Dense name = x, units = y, ActivationRecord.φ = p, LayerRecord.β = b, ω = w) = 
        (if layer_consistentl N (length i) (Dense name = x, units = y, ActivationRecord.φ = p, LayerRecord.β = b, ω = w) then 
          (let in_w_pairs = map (λ e. zip i e) w;
               wsums      = map (λ vs'. (x,y)vs'. x*y) in_w_pairs;
               wsum_bias  = map (λ (s,b). s+b) (zip wsums b)
          in Some (a wsum_bias))
         else None)
  using assms
  by simp

lemma activation_layer:
  assumes y = length i
  shows predictlayer_l N (Some i) (Activation  name = x, units = y, ActivationRecord.φ = p) = 
        (if layer_consistentl N (length i) (Activation name = x, units = y, ActivationRecord.φ = p) then 
              (case activation_tab N p of None  None | Some f  Some (f i))
        else None)
  using assms
  by simp

lemma activation_layer':
  assumes y = length i
    and activation_tab N p = Some a
  shows predictlayer_l N (Some i) (Activation  name = x, units = y, ActivationRecord.φ = p) = 
        (if layer_consistentl N (length i) (Activation name = x, units = y, ActivationRecord.φ = p) then Some (a i) else None)
  using assms
  by simp

lemma predictlayer_l_impl_activation_tab_const: predictlayer_l_impl N = predictlayer_l_impl layers = l, activation_tab = activation_tab N
  apply(rule ext)+
  using neural_network_seq_layers.select_convs predictlayer_l_activation_tab predictlayer_l_impl.elims predictlayer_l_impl.simps
  by (smt (verit))

context neural_network_sequential_layersl begin 

lemma img_None_1: assumes ((predictseq_layer_l N xs)  None) shows (length xs = (in_deg_NN N))
proof - 
  have 1: ((predictseq_layer_l N xs)  None)  (length xs = (in_deg_NN N))
  proof(cases N)
    case (fields layers' activation_tab') note i = this
    then show ?thesis 
      unfolding predictseq_layer_l_def 
      using i layers_nonempty 
    proof(induction layers')
      case Nil
      then show ?case by simp
    next
      case (Cons a layers')
      then show ?case proof(cases a)
        case (In x1)
        then show ?thesis using Cons 
          by (simp add: fold_predict_l_strict in_deg_NN_def) 
      next
        case (Out x2)
        then show ?thesis using Cons neural_network_sequential_layersl_axioms
          by (simp add: fold_predict_l_strict in_deg_NN_def) 
      next
        case (Dense x3)
        then show ?thesis using Cons neural_network_sequential_layersl_axioms i
          apply(simp add: fold_predict_l_strict in_deg_NN_def)
          using Cons   by (metis isIn.simps(3) list.sel(1) neural_network_seq_layers.select_convs(1) 
              neural_network_sequential_layersl.head_is_In) 
      next
        case (Activation x4)
        then show ?thesis using Cons neural_network_sequential_layersl_axioms i
          by(simp add: fold_predict_l_strict in_deg_NN_def)
      qed
    qed
  qed 
  show ?thesis using assms 1 by simp 
qed

lemma img_None_2':
  assumes  a0: layers'  [] 
    and a4: valid_activation_tabl activation_tab'
    and a1:  layers_consistentl layers = [], activation_tab=activation_tab'  (length xs) layers' 
  shows foldl (predictlayer_l layers = [], activation_tab = activation_tab') (Some xs) layers'  None
proof(insert assms, induction "layers'" arbitrary: xs rule:list_nonempty_induct)
  case (single l)
  then show ?case 
  proof(cases l)
    case (In x1)
    then show ?thesis 
      using single by(simp)
  next
    case (Out x2)
    then show ?thesis 
      using single by(simp)
  next
    case (Dense x3)
    then show ?thesis 
      using single by(force)
  next
    case (Activation x4)
    then show ?thesis 
      using single by(force)
  qed
next
  case (cons l ls)
  then show ?case 
  proof(cases l)
    case (In x1)
    then show ?thesis using cons by simp 
  next
    case (Out x2)
    then show ?thesis using cons by simp 
  next
    case (Dense x3)
    then show ?thesis 
     using cons assms
     apply clarsimp
     apply (subst cons.IH[simplified], simp_all)
     using valid_activation_preserves_length by force
  next
    case (Activation x4)
    then show ?thesis 
      using cons assms apply clarsimp
      apply(subst cons.IH[simplified], simp_all)
      by (metis valid_activation_preserves_length) 
  qed
qed

lemma img_None_2: 
  assumes length xs = in_deg_NN N 
  shows ((predictseq_layer_l N xs)  None)
proof(cases N)
  case (fields layers activation_tab)
  then show ?thesis 
    using assms layer_valid apply simp
    unfolding neural_network_sequential_layersl_def  predictseq_layer_l_def
    apply(subst         predictlayer_l_activation_tab_const[of _ "[]"]) 
    apply simp
    apply(subst img_None_2'[of layers activation_tab xs, simplified], simp_all)
    using layers_nonempty apply force 
    using activation_tab_valid apply fastforce 
    using assms layer_valid unfolding in_deg_NN_def apply simp
    using layers_consistentl_activation_tab_const by fastforce 
qed


lemma img_None: ((predictseq_layer_l N xs)  None) = (length xs = in_deg_NN N)
  using img_None_1 img_None_2 by blast 

lemma img_Some: ( y. (predictseq_layer_l N xs) = Some y) = (length xs = in_deg_NN N)
  using img_None by simp 

lemma img_length: ( y. ((predictseq_layer_l N xs) = Some y)  (length y = out_deg_NN N))
proof(cases N)
  case (fields layers' activation_tab') note i = this
  then show ?thesis 
    unfolding predictseq_layer_l_def i
  proof(induction layers' arbitrary: xs rule:rev_induct)
    case Nil
    then show ?case by auto
  next
    case (snoc a layers')
    then show ?case proof(cases a)
      case (In x1)
      then show ?thesis using snoc
        using Ex_list_of_length by blast 
    next
      case (Out x2)
      then show ?thesis using snoc
        apply (simp add: fold_predict_l_strict out_deg_NN_def neural_network_sequential_layersl_def) 
        using Ex_list_of_length by blast
    next
      case (Dense x3)
      then show ?thesis using Cons neural_network_sequential_layersl_axioms i
        apply(simp add: fold_predict_l_strict in_deg_NN_def)
        using snoc
        by (smt (verit, ccfv_threshold) Ex_list_of_length)
    next
      case (Activation x4)
      then show ?thesis using Cons neural_network_sequential_layersl_axioms i
        apply(simp add: fold_predict_l_strict in_deg_NN_def)
        using snoc 
        by (smt (verit, ccfv_threshold) Ex_list_of_length)
    qed
  qed
qed 

lemma preditlayer_l_impl_eq:
  assumes layer_consistentl N (length inputs) l
  shows   predictlayer_l N (Some inputs) l = Some (predictlayer_l_impl N inputs l)
proof(cases "l")
  case (In x1)
  then show ?thesis using assms by(simp)
next
  case (Out x2)
  then show ?thesis using assms by(simp)
next
  case (Dense x3)
  then show ?thesis 
    using Dense assms by(force)
next
  case (Activation x4)
  then show ?thesis using Activation assms by(force)
qed


lemma aux_length: 0 < units x3  valid_activation_tabl atab  
         inputs  [] 
         length (β x3) = units x3 
         length (ω x3) = units x3 
         rset (ω x3). length r = length inputs 
         atab (φ x3) = Some y 
         (length (y (map2 (+) (map ((λvs'. (x, y)vs'. x * y)  zip inputs) (ω x3)) (β x3)))) = units x3
  using valid_activation_preserves_length[of atab "(φ x3)" "y", symmetric] by simp

lemma pred_list_impl_aux': 
  assumes ls  []
    and     layer_valid:  layers_consistentl layers = [], activation_tab = atab (length inputs) ls 
    and     activation_tab_valid: valid_activation_tabl atab  
  shows foldl (predictlayer_l layers = [], activation_tab = atab) (Some inputs) ls =
    Some (foldl (predictlayer_l_impl layers = [], activation_tab = atab) inputs ls)
proof(insert assms(1) assms(2), induction "ls"  arbitrary:inputs rule:list_nonempty_induct)
  case (single x)
  then show ?case proof(cases x)
    case (In x1)
    then show ?thesis 
      using In single by force 
  next
    case (Out x2)
    then show ?thesis 
      using Out single.prems by force
  next
    case (Dense x3)
    then show ?thesis 
      using Dense single by force
  next
    case (Activation x4)
    then show ?thesis 
      using Activation single by force
  qed
next
  case (cons x xs)
  then show ?case proof(cases x)
    case (In x1)
    then show ?thesis 
      by (metis (no_types, lifting) cons.IH cons.prems foldl_Cons layer_consistentl.simps(1) layers_consistentl.simps(2) 
          layers_consistentl_layersN_const neural_network_seq_layers.select_convs(2) out_deg_layer.simps(1) predictlayer_l.simps(1) predictlayer_l_impl.simps(1)) 
  next
    case (Out x2)
    then show ?thesis 
      apply(simp add:cons assms)
       using cons.prems
       cons.IH cons.prems layers_consistentl.simps(2) out_deg_layer.simps(2) 
        by simp
  next
    case (Dense x3)
    then show ?thesis 
      apply(thin_tac "x = Dense x3")
      using cons.prems apply(clarsimp simp add:Dense)
      apply(subst cons.IH, simp_all)
      apply(insert assms(3))
      apply(subst aux_length, simp_all)  
      unfolding valid_activation_tabl_def ran_def by auto
  next
    case (Activation x4)
    then show ?thesis 
      apply(simp add:cons assms)
      using cons.prems assms(3) cons.IH cons.prems layers_consistentl.simps(2) 
            layers_consistentl_layersN_const neural_network_seq_layers.select_convs(2) out_deg_layer.simps(3) 
            valid_activation_preserves_length 
      by (smt (verit, del_insts) layer_consistentl.simps(3) neq0_conv option.sel option.simps(5)) 
  qed
qed

lemma pred_list_impl_aux:
  assumes layer_valid:  layers_consistentl layers = ls, activation_tab = atab (length inputs) ls 
    and     activation_tab_valid: valid_activation_tabl atab  
  shows foldl (predictlayer_l layers = ls, activation_tab = atab) (Some inputs) ls =
    Some (foldl (predictlayer_l_impl layers = ls, activation_tab = atab) inputs ls)
proof(cases ls)
  case Nil
  then show ?thesis by simp
next
  case (Cons a list)
  then show ?thesis 
    using pred_list_impl_aux' assms layers_consistentl_activation_tab_const list.discI 
      neural_network_seq_layers.select_convs(2) predictlayer_l_activation_tab_const
    by (metis predictlayer_l_activation_tab_const predictlayer_l_impl_activation_tab_const) 
qed


lemma predictseq_layer_l_code [code]:
  assumes in_deg_NN N = length inputs
  shows predictseq_layer_l N inputs = Some (predictseq_layer_l_impl N inputs)
proof(cases N)
  case (fields ls atab)
  then show ?thesis 
    unfolding predictseq_layer_l_def predictseq_layer_l_impl_def
    using assms apply(simp,subst pred_list_impl_aux)
    using layer_valid apply force
    using activation_tab_valid apply force
    by simp
qed

lemma predictseq_layer_l_code' [code]:
  assumes in_deg_NN N = length inputs
  shows the (predictseq_layer_l N inputs) = predictseq_layer_l_impl N inputs
  by (simp add: assms predictseq_layer_l_code)


end

MLval layer_config_list =  {
         InC    = @{const "NN_Layers.layer.In"(real list, activationmulti, real list list)},
         OutC   = @{const "NN_Layers.layer.Out"(real list, activationmulti, real list list)},
         DenseC = @{const "NN_Layers.layer.Dense"(activationmulti,real list, real list list)},
         InOutRecordC = @{const "NN_Layers.InOutRecord.InOutRecord_ext"((activationmulti, (real list,real list list, unit)LayerRecord_ext) ActivationRecord_ext)},
         LayerRecordC = @{const "LayerRecord_ext"(real list, real list list, unit)},
         ActivationRecordC = @{const "ActivationRecord_ext" (activationmulti,(real list, real list list, unit) LayerRecord_ext)},
         biasT_conv = (fn x => x),
         weightsT_conv = (fn x => fn _ => x),
         ltype =  @{typ (real list, activationmulti, real list list) layer},
         activation_term = Activation_Term.MultiList,
         layersT = @{typ ((real list, activationmulti, real list list ) layer) list},
         phiT    = @{typ activationmulti  (real list  real list) option},   
         layer_extC =  @{constNN_Layers.neural_network_seq_layers.neural_network_seq_layers_ext(real list,      activationmulti, real list list,  unit)},
         layer_def = @{thm neural_network_sequential_layersl_def},
         valid_activation_tab = @{thm valid_activation_tabl_def},
         locale_name = "neural_network_sequential_layersl"
  }:Convert_TensorFlow_Seq_Layer.layer_config
  val _ = Theory.setup
           (Convert_TensorFlow_Symtab.add_encoding("seq_layer_list", 
                    Convert_TensorFlow_Seq_Layer.def_seq_layer_nn layer_config_list))
end