Theory NN_Digraph_Layers

(***********************************************************************************
 * Copyright (c) University of Exeter, UK
 *
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * * Redistributions of source code must retain the above copyright notice, this
 *
 * * Redistributions in binary form must reproduce the above copyright notice,
 *   this list of conditions and the following disclaimer in the documentation
 *   and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * SPDX-License-Identifier: BSD-2-Clause
 ***********************************************************************************)

subsection‹Digraphs as Layers (\thy)›	
theory
  NN_Digraph_Layers
  imports   
  NN_Digraph
  "HOL-Combinatorics.Permutations" 
begin

definition layer_equiv :: "('a list ⇒ 'b list) ⇒ ('a list ⇒ 'b list) ⇒ bool" ("_ ≡⇩_l _")
  where 
‹layer_equiv f g = (∃ p p'. ∀ xs. f xs = permute_list p' (f (permute_list p xs)))›

lemma mset_eq_layer_equiv:
  assumes ‹mset xs = mset ys›
      and ‹mset (f xs) = mset (g ys)›
    shows ‹f ≡⇩_l g›
  unfolding layer_equiv_def using assms 
  by (metis permute_list_id) 

 

fun output_neuron where
   ‹output_neuron (In nid) = False›
 | ‹output_neuron (Out nid) = True›
 | ‹output_neuron (Neuron n) = False›

fun input_neuron where
   ‹input_neuron (In nid) = True›
 | ‹input_neuron (Out nid) = False›
 | ‹input_neuron (Neuron n) = False›

fun hidden_neuron where
   ‹hidden_neuron (In nid) = False›
 | ‹hidden_neuron (Out nid) = False›
 | ‹hidden_neuron (Neuron n) = True›

subsubsection ‹Defining layer types as lists of edges›

text ‹This subsection details definitions which allow for the easy creation of common layer types.
The Activation and Dense layer types map to the layer types used by TensorFlow 
(see  🌐‹https://www.tensorflow.org/api_docs/python/tf/keras/layers›) ›

paragraph ‹Edge construction functions›

definition mk_edge :: ‹('a::{one}, 'b, 'c) neural_network ⇒ 'a ⇒ 'b ⇒ 'a ⇒ 'a ⇒ id ⇒ id ⇒ ('a, 'b) edge› 
  where
 ‹mk_edge N ω' φ' α' β' id' nid = ⦇ ω = ω', 
                                    tl =  (the_elem {n . n ∈ neurons N ∧ uid n = id'}), 
                                    hd = Neuron ⦇ φ = φ', α = α', β = β', uid = nid ⦈ ⦈ ›

definition mk_out_edge :: ‹('a::{one}, 'b, 'c) neural_network ⇒ id ⇒ id ⇒ ('a, 'b) edge› 
  where
 ‹mk_out_edge N id' nid' = ⦇ ω = 1, 
                             tl = (the_elem {n . n ∈ neurons N ∧ uid n = id'}),  
                             hd = Out nid' ⦈›

definition mk_new_ids :: ‹('a::{one}, 'b, 'c) neural_network  ⇒ nat list ›  
  where
 ‹mk_new_ids N = upt (Max(uids (graph N)) + 1) 
                     (Max(uids (graph N)) + card (output_layer_ids N) + 1)›

text ‹@{const "mk_new_ids"} makes a list of new ids corresponding to the size of the current last 
layer in a given network and the current maximum id in the network. This is used in the 
activation and out functions in order to generate the new neurons in the edges. In order to help 
validate that the @{const "mk_new_ids"} returns the correct sized list and that the ids are unique 
in the network the following lemmas are needed to simplify this.›

lemma new_id_len: ‹length(mk_new_ids N) = length(sorted_list_of_set(output_layer_ids N))›
  by (simp add: mk_new_ids_def)

lemma new_id_len_card: ‹length(mk_new_ids N) = card(output_layer_ids N)›
  by (simp add: mk_new_ids_def)

lemma new_id_distinct: ‹distinct(mk_new_ids N)›
  by (metis distinct_upt mk_new_ids_def)

lemma new_id_greater:
  assumes ‹card (output_layer_ids N) > 0›
  shows ‹Min(set(mk_new_ids N)) > Max(uids (graph N))›
  using assms
  by (simp add: mk_new_ids_def)

lemma new_id_sorted: 
  shows ‹sorted (mk_new_ids N)›
  by (metis mk_new_ids_def sorted_upt)

lemma new_ids_unique:
  assumes new_ids_finite:   "finite (set(mk_new_ids N))"
  and current_ids_finite:   "finite (uids (graph N))"
  and MinMax:     "Max (uids (graph N)) < Min (set(mk_new_ids N))"
  shows "uids (graph N) ∩ set(mk_new_ids N) = {}"
  using assms
  by (metis Min_gr_iff  disjnt_def disjnt_ge_max disjoint_iff_not_equal equals0D) 

text‹Or by rewriting disjointness:›

lemma new_ids_unique': 
  assumes new_ids_finite:   "finite (set(mk_new_ids N))"
  and current_ids_finite:   "finite (uids (graph N))"
  and MinMax:     "Max (uids (graph N)) < Min (set(mk_new_ids N))"
  shows "∀ x ∈ set(mk_new_ids N). x ∉ uids (graph N)"
  using assms
  by (meson Max_ge Min_le linorder_not_le order_trans)

paragraph ‹Template layer types as list of edges›

definition dense :: ‹('a::{one}, 'b, 'c) neural_network ⇒ nat ⇒ 'a list ⇒ 'b ⇒ 'a ⇒ 'a ⇒ ('a, 'b) edge list› 
  where
 ‹dense N n ω' φ' α' β' = (if length ω' = n then
                            (let nids = upt (Max(uids (graph N)) + 1) (Max(uids (graph N)) + n + 1)
                             in concat(map (λ w . (concat(map 
                                           (λ b . map (λ a . mk_edge N w φ' α' β' a b ) 
                                           (sorted_list_of_set(output_layer_ids N))) nids))) ω')) 
                            else [])›
text ‹In @{const "dense"} we also take a list of weights which we want our dense layer to be 
initialised with (requiring another map operator).›

definition out :: ‹('a::{one}, 'b, 'c) neural_network ⇒ ('a,'b) edge list› 
  where
 ‹out N = (let nids = mk_new_ids N;
               nedges = map (λ a . mk_out_edge N (fst a) (snd a)) 
                            (zip (sorted_list_of_set(output_layer_ids N)) nids)
            in (if distinct nedges then nedges else []))›

definition activation :: ‹('a::{one}, 'b, 'c) neural_network ⇒ 'b ⇒ 'a ⇒ 'a ⇒ ('a, 'b) edge list› 
  where
 ‹activation N φ' α' β' = (let nids = mk_new_ids N;
                               nedges = map (λ a . mk_edge N 1 φ' α' β' (fst a) (snd a))
                                            (zip (sorted_list_of_set(output_layer_ids N)) nids)
                            in (if distinct nedges then nedges else []))›

text ‹here we call @{const "mk_edge"} with the weight @{term "ω"} set to @{term "1"} as we do not 
    want to change the output of the previous layer (we are simply applying the activation function)›

definition ‹add_edges N edge_list = foldr (λ a b. add_nn_edge b a) edge_list (graph N)›
definition ‹add_out N = add_edges N (out N) ›
definition ‹add_dense N n ω' φ' α' β' = add_edges N (dense N n ω' φ' α' β')›
definition ‹add_activation N φ' α' β' = add_edges N (activation N φ' α' β')›

text ‹definitions @{const "add_edges"}, @{const "add_out"}, @{const "add_dense"} and
 @{const "add_activation"} allow for easy addition of TensorFlow layer types to an existing Neural 
Network.›
                                                             
subsubsection ‹Defining Layers in the Digraph Model›

fun layers⇩_d⇩_i⇩_g⇩_r⇩_a⇩_p⇩_h::‹nat ⇒ ('a::{zero,linorder,numeral}, 'b, 'c) neural_network 
                    ⇒ ('a, 'b) edge ⇒ ('a ×  error)›
  where 
  ‹layers⇩_d⇩_i⇩_g⇩_r⇩_a⇩_p⇩_h _ N (⦇ω=_,  tl= _, hd=In _⦈)       = (0, ERROR)›
| ‹layers⇩_d⇩_i⇩_g⇩_r⇩_a⇩_p⇩_h _ N (⦇ω=_,  tl=Out _   , hd=_ ⦈)   = (0, ERROR)›
| ‹layers⇩_d⇩_i⇩_g⇩_r⇩_a⇩_p⇩_h _ N (⦇ω=_, tl=In uid⇩_i⇩_n , hd=_ ⦈)   = (0, OK)›
| ‹layers⇩_d⇩_i⇩_g⇩_r⇩_a⇩_p⇩_h n N e = (if 0 < n then  
                          (let 
                              tl'   = (case (tl e) of (Neuron t) ⇒ t);
                              E'    = incoming_arcs N (Neuron.uid tl');
                              lvals = ((λ e'. (case layers⇩_d⇩_i⇩_g⇩_r⇩_a⇩_p⇩_h (n-1) N e' of  
                                                    (_, ERROR) ⇒ ((0,0),  ERROR) 
                                                  | (v, OK)    ⇒ ((v+1 ,uid (tl e')), OK))) ` E')
                          in  
                          (Max ((λ a .fst(fst a )) ` {n. n ∈ lvals ∧ snd n = OK }) , OK))
                      else  (0, ERROR))›

text ‹Layers are defined as the path from the output node, this allows all dependencies to be 
calculated before prediction. In @{const "layers⇩_d⇩_i⇩_g⇩_r⇩_a⇩_p⇩_h" } the layer is calculated using the edges.›

fun layers⇩_d⇩_i⇩_g⇩_r⇩_a⇩_p⇩_h⇩_{_}⇩_n⇩_e⇩_u⇩_r⇩_o⇩_n :: ‹nat ⇒ ('a::{zero,linorder,numeral}, 'b, 'c) neural_network 
                    ⇒ ('a, 'b) neuron ⇒ ('a ×  error)› 
  where
  ‹layers⇩_d⇩_i⇩_g⇩_r⇩_a⇩_p⇩_h⇩_{_}⇩_n⇩_e⇩_u⇩_r⇩_o⇩_n _ N (In uid⇩_i⇩_n) = (0, OK)›
| ‹layers⇩_d⇩_i⇩_g⇩_r⇩_a⇩_p⇩_h⇩_{_}⇩_n⇩_e⇩_u⇩_r⇩_o⇩_n n N (Out uid⇩_o⇩_u⇩_t) = (if 0 < n then 
                                         (let
                                            E' = tl ` (incoming_arcs N uid⇩_o⇩_u⇩_t);
                                            lvals = ((λ n' . (case layers⇩_d⇩_i⇩_g⇩_r⇩_a⇩_p⇩_h⇩_{_}⇩_n⇩_e⇩_u⇩_r⇩_o⇩_n (n-1) N n' of
                                                           (_, ERROR) ⇒ ((0,0),  ERROR)
                                                         | (v, OK) ⇒ ((v+1, uid n'), OK))) ` E')
                                          in (Max ((λ a .fst(fst a )) ` {n. n ∈ lvals ∧ snd n = OK }) , OK))
                                       else (0, ERROR))›
| ‹layers⇩_d⇩_i⇩_g⇩_r⇩_a⇩_p⇩_h⇩_{_}⇩_n⇩_e⇩_u⇩_r⇩_o⇩_n n N (Neuron a) = (if 0 < n then 
                                         (let
                                            E' = tl ` (incoming_arcs N (Neuron.uid a));
                                            lvals = ((λ n' . (case layers⇩_d⇩_i⇩_g⇩_r⇩_a⇩_p⇩_h⇩_{_}⇩_n⇩_e⇩_u⇩_r⇩_o⇩_n (n-1) N n' of
                                                           (_, ERROR) ⇒ ((0,0),  ERROR)
                                                         | (v, OK) ⇒ ((v+1, uid n'), OK))) ` E')
                                          in (Max ((λ a .fst(fst a )) ` {n. n ∈ lvals ∧ snd n = OK }) , OK))
                                       else (0, ERROR))›

text ‹ In @{const "layers⇩_d⇩_i⇩_g⇩_r⇩_a⇩_p⇩_h⇩_{_}⇩_n⇩_e⇩_u⇩_r⇩_o⇩_n" } the layer is calculated using the neurons instead, this is
more intuitive as it is the neurons that are arranged in layers.›

paragraph ‹Defining the behaviour of layers ›

fun layers⇩_e⇩_d⇩_g⇩_e⇩_s :: ‹'a ⇒ 'a ⇒ ('a::{zero,numeral,linorder}, 'b, 'c) neural_network 
                    ⇒ ('a, 'b) edge set› where
   ‹layers⇩_e⇩_d⇩_g⇩_e⇩_s l l' N = (let n⇩_a⇩_l⇩_l   = neurons N;
                             layer =  (λ n . ((layers⇩_d⇩_i⇩_g⇩_r⇩_a⇩_p⇩_h⇩_{_}⇩_n⇩_e⇩_u⇩_r⇩_o⇩_n (card n⇩_a⇩_l⇩_l) N n), uid n)) ` n⇩_a⇩_l⇩_l;
                             n⇩_i⇩_n    = snd ` {n . n ∈ layer ∧ fst(fst n) = l};
                             n⇩_o⇩_u⇩_t   = snd ` {n . n ∈ layer ∧ fst(fst n) = l'}
                          in { e . e ∈ edges N ∧ uid (tl e) ∈ n⇩_i⇩_n ∧ uid (hd e) ∈ n⇩_o⇩_u⇩_t } )›
text ‹get all edges between layer n and n+1›

subparagraph ‹Predicates to distinguish different layer types›

text ‹The following for functions test whether sets of edges correspond to the correct type of 
connections for Dense, Activation, Input and Output layers.›

definition isDense⇩_s :: ‹('a, 'b) edge set ⇒ bool › where
          ‹isDense⇩_s e = ((∀ n' ∈  tl ` e . ∀ n'' ∈ hd ` e . ∃ e' ∈ e . tl e' = n' ∧ hd e' = n''))›

definition isActivation⇩_s :: ‹('a, 'b) edge set ⇒ bool›  where
          ‹isActivation⇩_s e = ((∀ n' ∈  tl ` e . ∃! e' ∈ e . tl e' = n') ∧ 
                              (∀ n'' ∈ hd ` e . ∃! e'' ∈ e . hd e'' = n''))›

definition isInput⇩_s :: ‹('a, 'b) edge set ⇒ bool› where
          ‹isInput⇩_s e = (isDense⇩_s e ∧ (∀ n ∈ hd ` e . input_neuron n))›

definition isOutput⇩_s :: ‹('a, 'b) edge set ⇒ bool› where
          ‹isOutput⇩_s e = (isActivation⇩_s e ∧ (∀ n''' ∈ hd ` e . output_neuron n'''))›


text ‹The following for functions test whether lists of edges correspond to the correct type of 
connections for Dense, Activation, Input and Output layers. 
We want these definitions over lists and sets in order to allow us to use whichever is more 
efficient in specific situations.›

definition isDense⇩_l :: ‹('a, 'b) edge list ⇒ bool› where
          ‹isDense⇩_l e = (let t = (map tl e); h = (map hd e) in 
                        (∀ n' ∈  set t . ∀ n'' ∈ set h . 
                         filter (λ e' . tl e' = n' ∧ hd e' = n'') e  ≠ [] ))› 

definition isInput⇩_l :: ‹('a, 'b) edge list ⇒ bool› where
          ‹isInput⇩_l e = (isDense⇩_l e  ∧ foldr (∧) (map input_neuron (map hd e)) True)› 

definition isActivation⇩_l :: ‹('a, 'b) edge list ⇒ bool›  where
          ‹isActivation⇩_l e = (let t = (map tl e); h = (map hd e) in 
                             distinct t ∧ distinct h ∧ length t = length h ∧ length e = length h ∧
                             length t = length e )›  

definition isOutput⇩_l :: ‹('a, 'b) edge list ⇒ bool› where
           ‹isOutput⇩_l e = (isActivation⇩_l e ∧ foldr (∧) (map (output_neuron ∘ hd) e) True)› 

text‹Prove that the list and set definitions of our layers define the same behaviour, e.g. it does
not matter whether @{const "isActivation⇩_l"} or @{const "isActivation⇩_s"} is used, the same 
connections are ensured›

lemma allOutput:
  shows ‹foldr (∧) (map (output_neuron ∘ hd) e) True =  (∀ n' ∈ hd ` set e . output_neuron n')›
proof (induction "e")
  case Nil
  then show ?case by simp
next
  case (Cons a e)
  then show ?case by simp 
qed

lemma allInput:
  shows ‹foldr (∧) (map (input_neuron ∘ hd) e) True =  (∀ n' ∈ hd ` set e . input_neuron n')›
proof (induction "e")
  case Nil
  then show ?case by simp
next
  case (Cons a e)
  then show ?case by simp
qed

lemma forAll:
  ‹(∀ n'∈ set (map tl e).∀ n''∈ set(map hd e).filter(λe'. tl e' = n' ∧ hd e' = n'') e ≠ []) =
   (∀ n'∈ tl` set e. ∀ n''∈ hd ` set e . ∃ e' ∈ set e . tl e' = n' ∧ hd e' = n'')›
proof (induction "e")
  case Nil
  then show ?case by simp 
next
  case (Cons a e)
  then show ?case by (smt (verit, del_insts) empty_filter_conv list.set_map) 
qed

lemma isDense⇩_l_isDense⇩_s_equivalence: ‹isDense⇩_l E = isDense⇩_s (set E)›
  apply (safe)
  subgoal apply (simp add: isDense⇩_l_def isDense⇩_s_def)
    subgoal apply (metis (mono_tags, lifting) empty_filter_conv) done
    done
  subgoal apply (simp add: isDense⇩_l_def isDense⇩_s_def)
    subgoal apply (smt (verit, ccfv_threshold) empty_filter_conv) done
    done
  done

lemma isnput⇩_l_isInput⇩_s_equivalence: ‹isInput⇩_l E = isInput⇩_s (set E)›
  apply(safe)
  subgoal apply (simp add: isInput⇩_l_def isInput⇩_s_def)
    using allInput isDense⇩_l_isDense⇩_s_equivalence by auto
  subgoal apply (simp add: isInput⇩_l_def isInput⇩_s_def)
    using allInput isDense⇩_l_isDense⇩_s_equivalence by auto
  done

lemma isActivation⇩_l_isActivation⇩_s_equivalence: 
  assumes distinct: ‹distinct E›
  shows ‹isActivation⇩_l E = isActivation⇩_s (set E)›
  using assms
  apply(safe)
  subgoal apply(simp add: isActivation⇩_l_def isActivation⇩_s_def) 
    by (metis distinct_map inj_onD)
  subgoal apply(simp add: isActivation⇩_l_def isActivation⇩_s_def)
    using distinct_map inj_on_def by fastforce
  done

lemma isOutput⇩_l_isOutput⇩_s_equivalence:
  assumes distinct: ‹distinct E›
  shows ‹isOutput⇩_l E = isOutput⇩_s (set E)› 
  using assms
  apply (safe)
  subgoal 
    apply (simp add: isOutput⇩_l_def isOutput⇩_s_def)
    using allOutput isActivation⇩_l_isActivation⇩_s_equivalence by blast
  subgoal 
    apply (simp add: isOutput⇩_l_def isOutput⇩_s_def) 
    using allOutput isActivation⇩_l_isActivation⇩_s_equivalence by blast
  done

text ‹We currently support the following 4 types of layers:›

definition ‹layers⇩_i⇩_n⇩_p⇩_u⇩_t l l' N = isInput⇩_s (layers⇩_e⇩_d⇩_g⇩_e⇩_s l l' N)›
definition ‹layers⇩_o⇩_u⇩_t⇩_p⇩_u⇩_t l l' N = isOutput⇩_s (layers⇩_e⇩_d⇩_g⇩_e⇩_s l l' N)›
definition ‹layers⇩_d⇩_e⇩_n⇩_s⇩_e l l' N = isDense⇩_s (layers⇩_e⇩_d⇩_g⇩_e⇩_s l l' N)›
definition ‹layers⇩_a⇩_c⇩_t⇩_i⇩_v⇩_a⇩_t⇩_i⇩_o⇩_n l l' N = isActivation⇩_s (layers⇩_e⇩_d⇩_g⇩_e⇩_s l  l' N)›
 

subsubsection ‹Conversion of layer types›

paragraph ‹The following helper lemmas are needed to prove that tails are unique within the edge lists.›

context neural_network_digraph begin

lemma "nn_pregraph (graph N)"
  by (meson neural_network_digraph_axioms neural_network_digraph_def nn_graph.axioms(1)) 

lemma uid_is_singleton: ‹x ∈ NN_Digraph.uid ` (neurons N) 
   ⟹ is_singleton {n ∈ neurons N. NN_Digraph.uid n = x}›
  using neurons_def o_def nn_pregraph.id_vert_inj
  by (smt (verit, best) empty_iff image_iff inj_onD is_singletonI' 
      mem_Collect_eq neural_network_digraph_axioms neural_network_digraph_def nn_graph.id_vert_inj) 

lemma distinct_elem:
  assumes a1: ‹distinct X ›
  and a2: ‹set X ⊆ uid ` (neurons N) ›
shows ‹distinct (map (λx. the_elem {n ∈ neurons N. NN_Digraph.uid n = x}) X)›
  by (smt (verit, best) a1 a2 distinct_map image_iff inj_on_def is_singleton_the_elem 
                        mem_Collect_eq subset_code(1) uid_is_singleton) 

lemma output_layer_ids_subset_neuron_ids: ‹output_layer_ids N ⊆ uid ` (neurons N) ›
  unfolding image_def neurons_def output_layer_ids_def o_def output_layer_def output_verts_def
  by auto

end

paragraph ‹Activation layer proofs›

lemma distinct_activation_edges: ‹distinct (activation N φ' α' β')›
  apply (simp add: activation_def)
  by (smt (verit) distinct.simps(1))

lemma output_activation_layer_length_equal:
   assumes notEmptyNeurons: ‹neurons N ≠ {}› 
   and notEmptyActivationLayer: ‹length(activation N φ' α' β' ) ≠ 0›
   shows ‹card(output_layer_ids N) = length(activation N φ' α' β')›
   using assms
   apply (simp add: activation_def mk_new_ids_def mk_out_edge_def output_layer_ids_def)
   apply auto
   done

lemma new_ids_activation_layer_length_equal: 
  assumes notEmptyNeurons: ‹neurons N ≠ {}› 
  and notEmptyActivationLayer: ‹length(activation N φ' α' β') ≠ 0› 
  and notEmptyNewIds: ‹length(mk_new_ids N) ≠ 0›
  shows ‹length(mk_new_ids N) = length(activation N φ' α' β')›
  using assms 
  apply (simp add: out_def mk_new_ids_def)
  apply (metis assms(2) output_activation_layer_length_equal)
  done

lemma map_neuron_hd_id: 
  ‹(map (λx. Neuron ⦇φ = φ', α = α', β = β', uid = f x⦈) X) =  
   (map (λx. Neuron ⦇φ = φ', α = α', β = β', uid = x⦈) (map f X))›
  by simp 

lemma map_neuron_tl_id:
  ‹(map (λx. the_elem {n ∈ neurons N. NN_Digraph.uid n = f x}) X) = 
   (map (λx. the_elem {n ∈ neurons N. NN_Digraph.uid n = x})(map f X))›
  by simp


context nn_pregraph begin

lemma distinct_head_activation: ‹distinct(map hd (activation N φ' α' β'))›
  apply (simp add: activation_def Let_def mk_edge_def o_def) 
  apply (simp only: map_neuron_hd_id[of _ _ _ snd _] map_snd_zip new_id_len)
  using new_id_distinct
  apply (simp add: distinct_conv_nth)
  by auto

end 

context neural_network_digraph begin

lemma distinct_tail_activation: ‹distinct(map tl (activation N φ' α' β'))›
  apply (simp add: activation_def Let_def mk_edge_def o_def)
  apply(simp only:map_neuron_tl_id[of _ ‹fst›] map_fst_zip new_id_len)
  using distinct_elem[of ‹sorted_list_of_set (output_layer_ids N)›]
         distinct_sorted_list_of_set[of ‹output_layer_ids N›]
         output_layer_ids_subset_neuron_ids set_sorted_list_of_set
  by (metis bot.extremum set_empty sorted_list_of_set.fold_insort_key.infinite) 

lemma activation_is_activation: ‹isActivation⇩_l(activation N φ' α' β')›
  apply (simp add: isActivation⇩_l_def)
  apply (safe)
  subgoal apply (rule distinct_tail_activation) done
  subgoal apply (rule nn_pregraph.distinct_head_activation) 
          apply (rule NN_Digraph.nn_pregraph_mk ) done
  done

end 

paragraph ‹Output layer proofs›

lemma output_output_layer_length_equal:
   assumes notEmptyNeurons: ‹neurons N ≠ {}› 
   and notEmptyOutputLayer: ‹length(out N) ≠ 0›
   shows ‹card(output_layer_ids N) = length(out N)›
   using assms
   apply (simp add: out_def mk_new_ids_def mk_out_edge_def output_layer_ids_def)
   by auto

lemma new_ids_output_layer_length_equal: 
  assumes notEmptyNeurons: ‹neurons N ≠ {}› 
  and notEmptyOutputLayer: ‹length(out N) ≠ 0› 
  shows ‹length(mk_new_ids N) = length(out N)›
  using assms 
  apply (simp add: out_def)
  using output_output_layer_length_equal new_id_len 
  apply (simp add: new_id_len notEmptyNeurons out_def output_output_layer_length_equal)
  done

lemma distinct_output_edges: ‹distinct (out N)›
  apply (smt (verit, best) out_def card.empty card_distinct list.set(1) list.size(3))
  done

lemma map_out_neuron_hd_id: ‹(map (λx. Out (f x)) X) = (map (λx. Out x) (map f X))›
  by simp

context nn_pregraph begin 

lemma distinct_head_output: ‹distinct(map hd (out N))›
  apply (simp add: out_def Let_def mk_out_edge_def o_def) 
  apply (simp only: map_out_neuron_hd_id[of snd _] map_snd_zip new_id_len)
  using new_id_distinct
  apply (simp add: distinct_conv_nth)
  by auto

end 

lemma fold_and_map: ‹foldr (∧) (map (λx. True) X) True› 
proof (induction "X")
  case Nil
  then show ?case by simp
next
  case (Cons a X)
  then show ?case by simp 
qed

lemma head_output_neurons: ‹foldr (∧) (map (output_neuron ∘ edge.hd) (out N)) True› 
  apply (simp add: o_def out_def Let_def mk_out_edge_def)
  using fold_and_map 
  by(auto)

context neural_network_digraph begin 

lemma distinct_tail_output: ‹distinct(map tl (out N))›
  apply (simp add: out_def Let_def mk_out_edge_def o_def) 
  apply (simp only: map_neuron_tl_id[of _ fst _] map_fst_zip new_id_len)
  using distinct_sorted_list_of_set distinct_elem
  by (metis (mono_tags, lifting) distinct.simps(1) list.simps(8) output_layer_ids_subset_neuron_ids 
            sorted_list_of_set.fold_insort_key.infinite sorted_list_of_set.set_sorted_key_list_of_set)

lemma output_is_output:‹isOutput⇩_l(out N)›
  apply (simp add: isOutput⇩_l_def isActivation⇩_l_def)
  apply (safe)
  subgoal apply (rule distinct_tail_output)  done
  subgoal apply (rule nn_pregraph.distinct_head_output) apply(rule NN_Digraph.nn_pregraph_empty) done
  subgoal apply (rule head_output_neurons) done
  done

paragraph ‹Dense layer proofs›

lemma dense_is_dense: 
  assumes neuronsNotZero: ‹n > 0›
  and weightEqualNeurons: ‹length ω' = n›
  shows ‹isDense⇩_s(set(dense N n ω' φ' α' β'))›
  using assms
  apply (safe)
  apply (simp add: isDense⇩_s_def dense_def activation_def)
  apply (simp add: neurons_def output_layer_ids_def output_layer_def output_verts_def mk_edge_def)
  apply (force)
  done

end
end