Theory NN_Digraph_Layers

(***********************************************************************************
 * Copyright (c) University of Exeter, UK
 *
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * * Redistributions of source code must retain the above copyright notice, this
 *
 * * Redistributions in binary form must reproduce the above copyright notice,
 *   this list of conditions and the following disclaimer in the documentation
 *   and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * SPDX-License-Identifier: BSD-2-Clause
 ***********************************************************************************)

subsection‹Digraphs as Layers (\thy)›	
theory
  NN_Digraph_Layers
  imports   
  NN_Digraph
  "HOL-Combinatorics.Permutations" 
begin

definition layer_equiv :: "('a list  'b list)  ('a list  'b list)  bool" ("_ l _")
  where 
layer_equiv f g = ( p p'.  xs. f xs = permute_list p' (f (permute_list p xs)))

lemma mset_eq_layer_equiv:
  assumes mset xs = mset ys
      and mset (f xs) = mset (g ys)
    shows f l g
  unfolding layer_equiv_def using assms 
  by (metis permute_list_id) 

 

fun output_neuron where
   output_neuron (In nid) = False
 | output_neuron (Out nid) = True
 | output_neuron (Neuron n) = False

fun input_neuron where
   input_neuron (In nid) = True
 | input_neuron (Out nid) = False
 | input_neuron (Neuron n) = False

fun hidden_neuron where
   hidden_neuron (In nid) = False
 | hidden_neuron (Out nid) = False
 | hidden_neuron (Neuron n) = True

subsubsection ‹Defining layer types as lists of edges›

text ‹This subsection details definitions which allow for the easy creation of common layer types.
The Activation and Dense layer types map to the layer types used by TensorFlow 
(see  🌐‹https://www.tensorflow.org/api_docs/python/tf/keras/layers›) ›

paragraph ‹Edge construction functions›

definition mk_edge :: ('a::{one}, 'b, 'c) neural_network  'a  'b  'a  'a  id  id  ('a, 'b) edge 
  where
 mk_edge N ω' φ' α' β' id' nid =  ω = ω', 
                                    tl =  (the_elem {n . n  neurons N  uid n = id'}), 
                                    hd = Neuron  φ = φ', α = α', β = β', uid = nid  

definition mk_out_edge :: ('a::{one}, 'b, 'c) neural_network  id  id  ('a, 'b) edge 
  where
 mk_out_edge N id' nid' =  ω = 1, 
                             tl = (the_elem {n . n  neurons N  uid n = id'}),  
                             hd = Out nid' 

definition mk_new_ids :: ('a::{one}, 'b, 'c) neural_network   nat list  
  where
 mk_new_ids N = upt (Max(uids (graph N)) + 1) 
                     (Max(uids (graph N)) + card (output_layer_ids N) + 1)

text @{const "mk_new_ids"} makes a list of new ids corresponding to the size of the current last 
layer in a given network and the current maximum id in the network. This is used in the 
activation and out functions in order to generate the new neurons in the edges. In order to help 
validate that the @{const "mk_new_ids"} returns the correct sized list and that the ids are unique 
in the network the following lemmas are needed to simplify this.›

lemma new_id_len: length(mk_new_ids N) = length(sorted_list_of_set(output_layer_ids N))
  by (simp add: mk_new_ids_def)

lemma new_id_len_card: length(mk_new_ids N) = card(output_layer_ids N)
  by (simp add: mk_new_ids_def)

lemma new_id_distinct: distinct(mk_new_ids N)
  by (metis distinct_upt mk_new_ids_def)

lemma new_id_greater:
  assumes card (output_layer_ids N) > 0
  shows Min(set(mk_new_ids N)) > Max(uids (graph N))
  using assms
  by (simp add: mk_new_ids_def)

lemma new_id_sorted: 
  shows sorted (mk_new_ids N)
  by (metis mk_new_ids_def sorted_upt)

lemma new_ids_unique:
  assumes new_ids_finite:   "finite (set(mk_new_ids N))"
  and current_ids_finite:   "finite (uids (graph N))"
  and MinMax:     "Max (uids (graph N)) < Min (set(mk_new_ids N))"
  shows "uids (graph N)  set(mk_new_ids N) = {}"
  using assms
  by (metis Min_gr_iff  disjnt_def disjnt_ge_max disjoint_iff_not_equal equals0D) 

text‹Or by rewriting disjointness:›

lemma new_ids_unique': 
  assumes new_ids_finite:   "finite (set(mk_new_ids N))"
  and current_ids_finite:   "finite (uids (graph N))"
  and MinMax:     "Max (uids (graph N)) < Min (set(mk_new_ids N))"
  shows " x  set(mk_new_ids N). x  uids (graph N)"
  using assms
  by (meson Max_ge Min_le linorder_not_le order_trans)

paragraph ‹Template layer types as list of edges›

definition dense :: ('a::{one}, 'b, 'c) neural_network  nat  'a list  'b  'a  'a  ('a, 'b) edge list 
  where
 dense N n ω' φ' α' β' = (if length ω' = n then
                            (let nids = upt (Max(uids (graph N)) + 1) (Max(uids (graph N)) + n + 1)
                             in concat(map (λ w . (concat(map 
                                           (λ b . map (λ a . mk_edge N w φ' α' β' a b ) 
                                           (sorted_list_of_set(output_layer_ids N))) nids))) ω')) 
                            else [])
text ‹In @{const "dense"} we also take a list of weights which we want our dense layer to be 
initialised with (requiring another map operator).›

definition out :: ('a::{one}, 'b, 'c) neural_network  ('a,'b) edge list 
  where
 out N = (let nids = mk_new_ids N;
               nedges = map (λ a . mk_out_edge N (fst a) (snd a)) 
                            (zip (sorted_list_of_set(output_layer_ids N)) nids)
            in (if distinct nedges then nedges else []))

definition activation :: ('a::{one}, 'b, 'c) neural_network  'b  'a  'a  ('a, 'b) edge list 
  where
 activation N φ' α' β' = (let nids = mk_new_ids N;
                               nedges = map (λ a . mk_edge N 1 φ' α' β' (fst a) (snd a))
                                            (zip (sorted_list_of_set(output_layer_ids N)) nids)
                            in (if distinct nedges then nedges else []))

text ‹here we call @{const "mk_edge"} with the weight @{term "ω"} set to @{term "1"} as we do not 
    want to change the output of the previous layer (we are simply applying the activation function)›

definition add_edges N edge_list = foldr (λ a b. add_nn_edge b a) edge_list (graph N)
definition add_out N = add_edges N (out N)
definition add_dense N n ω' φ' α' β' = add_edges N (dense N n ω' φ' α' β')
definition add_activation N φ' α' β' = add_edges N (activation N φ' α' β')

text ‹definitions @{const "add_edges"}, @{const "add_out"}, @{const "add_dense"} and
 @{const "add_activation"} allow for easy addition of TensorFlow layer types to an existing Neural 
Network.›
                                                             
subsubsection ‹Defining Layers in the Digraph Model›

fun layersdigraph::nat  ('a::{zero,linorder,numeral}, 'b, 'c) neural_network 
                     ('a, 'b) edge  ('a ×  error)
  where 
  layersdigraph _ N (ω=_,  tl= _, hd=In _)       = (0, ERROR)
| layersdigraph _ N (ω=_,  tl=Out _   , hd=_ )   = (0, ERROR)
| layersdigraph _ N (ω=_, tl=In uidin , hd=_ )   = (0, OK)
| layersdigraph n N e = (if 0 < n then  
                          (let 
                              tl'   = (case (tl e) of (Neuron t)  t);
                              E'    = incoming_arcs N (Neuron.uid tl');
                              lvals = ((λ e'. (case layersdigraph (n-1) N e' of  
                                                    (_, ERROR)  ((0,0),  ERROR) 
                                                  | (v, OK)     ((v+1 ,uid (tl e')), OK))) ` E')
                          in  
                          (Max ((λ a .fst(fst a )) ` {n. n  lvals  snd n = OK }) , OK))
                      else  (0, ERROR))

text ‹Layers are defined as the path from the output node, this allows all dependencies to be 
calculated before prediction. In @{const "layersdigraph" } the layer is calculated using the edges.›

fun layersdigraph_neuron :: nat  ('a::{zero,linorder,numeral}, 'b, 'c) neural_network 
                     ('a, 'b) neuron  ('a ×  error) 
  where
  layersdigraph_neuron _ N (In uidin) = (0, OK)
| layersdigraph_neuron n N (Out uidout) = (if 0 < n then 
                                         (let
                                            E' = tl ` (incoming_arcs N uidout);
                                            lvals = ((λ n' . (case layersdigraph_neuron (n-1) N n' of
                                                           (_, ERROR)  ((0,0),  ERROR)
                                                         | (v, OK)  ((v+1, uid n'), OK))) ` E')
                                          in (Max ((λ a .fst(fst a )) ` {n. n  lvals  snd n = OK }) , OK))
                                       else (0, ERROR))
| layersdigraph_neuron n N (Neuron a) = (if 0 < n then 
                                         (let
                                            E' = tl ` (incoming_arcs N (Neuron.uid a));
                                            lvals = ((λ n' . (case layersdigraph_neuron (n-1) N n' of
                                                           (_, ERROR)  ((0,0),  ERROR)
                                                         | (v, OK)  ((v+1, uid n'), OK))) ` E')
                                          in (Max ((λ a .fst(fst a )) ` {n. n  lvals  snd n = OK }) , OK))
                                       else (0, ERROR))

text ‹ In @{const "layersdigraph_neuron" } the layer is calculated using the neurons instead, this is
more intuitive as it is the neurons that are arranged in layers.›

paragraph ‹Defining the behaviour of layers ›

fun layersedges :: 'a  'a  ('a::{zero,numeral,linorder}, 'b, 'c) neural_network 
                     ('a, 'b) edge set where
   layersedges l l' N = (let nall   = neurons N;
                             layer =  (λ n . ((layersdigraph_neuron (card nall) N n), uid n)) ` nall;
                             nin    = snd ` {n . n  layer  fst(fst n) = l};
                             nout   = snd ` {n . n  layer  fst(fst n) = l'}
                          in { e . e  edges N  uid (tl e)  nin  uid (hd e)  nout } )
text ‹get all edges between layer n and n+1›

subparagraph ‹Predicates to distinguish different layer types›

text ‹The following for functions test whether sets of edges correspond to the correct type of 
connections for Dense, Activation, Input and Output layers.›

definition isDenses :: ('a, 'b) edge set  bool where
          isDenses e = (( n'   tl ` e .  n''  hd ` e .  e'  e . tl e' = n'  hd e' = n''))

definition isActivations :: ('a, 'b) edge set  bool  where
          isActivations e = (( n'   tl ` e . ∃! e'  e . tl e' = n')  
                              ( n''  hd ` e . ∃! e''  e . hd e'' = n''))

definition isInputs :: ('a, 'b) edge set  bool where
          isInputs e = (isDenses e  ( n  hd ` e . input_neuron n))

definition isOutputs :: ('a, 'b) edge set  bool where
          isOutputs e = (isActivations e  ( n'''  hd ` e . output_neuron n'''))


text ‹The following for functions test whether lists of edges correspond to the correct type of 
connections for Dense, Activation, Input and Output layers. 
We want these definitions over lists and sets in order to allow us to use whichever is more 
efficient in specific situations.›

definition isDensel :: ('a, 'b) edge list  bool where
          isDensel e = (let t = (map tl e); h = (map hd e) in 
                        ( n'   set t .  n''  set h . 
                         filter (λ e' . tl e' = n'  hd e' = n'') e   [] )) 

definition isInputl :: ('a, 'b) edge list  bool where
          isInputl e = (isDensel e   foldr (∧) (map input_neuron (map hd e)) True) 

definition isActivationl :: ('a, 'b) edge list  bool  where
          isActivationl e = (let t = (map tl e); h = (map hd e) in 
                             distinct t  distinct h  length t = length h  length e = length h 
                             length t = length e )  

definition isOutputl :: ('a, 'b) edge list  bool where
           isOutputl e = (isActivationl e  foldr (∧) (map (output_neuron  hd) e) True) 

text‹Prove that the list and set definitions of our layers define the same behaviour, e.g. it does
not matter whether @{const "isActivationl"} or @{const "isActivations"} is used, the same 
connections are ensured›

lemma allOutput:
  shows foldr (∧) (map (output_neuron  hd) e) True =  ( n'  hd ` set e . output_neuron n')
proof (induction "e")
  case Nil
  then show ?case by simp
next
  case (Cons a e)
  then show ?case by simp 
qed

lemma allInput:
  shows foldr (∧) (map (input_neuron  hd) e) True =  ( n'  hd ` set e . input_neuron n')
proof (induction "e")
  case Nil
  then show ?case by simp
next
  case (Cons a e)
  then show ?case by simp
qed

lemma forAll:
  ( n' set (map tl e). n'' set(map hd e).filter(λe'. tl e' = n'  hd e' = n'') e  []) =
   ( n' tl` set e.  n'' hd ` set e .  e'  set e . tl e' = n'  hd e' = n'')
proof (induction "e")
  case Nil
  then show ?case by simp 
next
  case (Cons a e)
  then show ?case by (smt (verit, del_insts) empty_filter_conv list.set_map) 
qed

lemma isDensel_isDenses_equivalence: isDensel E = isDenses (set E)
  apply (safe)
  subgoal apply (simp add: isDensel_def isDenses_def)
    subgoal apply (metis (mono_tags, lifting) empty_filter_conv) done
    done
  subgoal apply (simp add: isDensel_def isDenses_def)
    subgoal apply (smt (verit, ccfv_threshold) empty_filter_conv) done
    done
  done

lemma isnputl_isInputs_equivalence: isInputl E = isInputs (set E)
  apply(safe)
  subgoal apply (simp add: isInputl_def isInputs_def)
    using allInput isDensel_isDenses_equivalence by auto
  subgoal apply (simp add: isInputl_def isInputs_def)
    using allInput isDensel_isDenses_equivalence by auto
  done

lemma isActivationl_isActivations_equivalence: 
  assumes distinct: distinct E
  shows isActivationl E = isActivations (set E)
  using assms
  apply(safe)
  subgoal apply(simp add: isActivationl_def isActivations_def) 
    by (metis distinct_map inj_onD)
  subgoal apply(simp add: isActivationl_def isActivations_def)
    using distinct_map inj_on_def by fastforce
  done

lemma isOutputl_isOutputs_equivalence:
  assumes distinct: distinct E
  shows isOutputl E = isOutputs (set E) 
  using assms
  apply (safe)
  subgoal 
    apply (simp add: isOutputl_def isOutputs_def)
    using allOutput isActivationl_isActivations_equivalence by blast
  subgoal 
    apply (simp add: isOutputl_def isOutputs_def) 
    using allOutput isActivationl_isActivations_equivalence by blast
  done

text ‹We currently support the following 4 types of layers:›

definition layersinput l l' N = isInputs (layersedges l l' N)
definition layersoutput l l' N = isOutputs (layersedges l l' N)
definition layersdense l l' N = isDenses (layersedges l l' N)
definition layersactivation l l' N = isActivations (layersedges l  l' N)
 

subsubsection ‹Conversion of layer types›

paragraph ‹The following helper lemmas are needed to prove that tails are unique within the edge lists.›

context neural_network_digraph begin

lemma "nn_pregraph (graph N)"
  by (meson neural_network_digraph_axioms neural_network_digraph_def nn_graph.axioms(1)) 

lemma uid_is_singleton: x  NN_Digraph.uid ` (neurons N) 
    is_singleton {n  neurons N. NN_Digraph.uid n = x}
  using neurons_def o_def nn_pregraph.id_vert_inj
  by (smt (verit, best) empty_iff image_iff inj_onD is_singletonI' 
      mem_Collect_eq neural_network_digraph_axioms neural_network_digraph_def nn_graph.id_vert_inj) 

lemma distinct_elem:
  assumes a1: distinct X
  and a2: set X  uid ` (neurons N)
shows distinct (map (λx. the_elem {n  neurons N. NN_Digraph.uid n = x}) X)
  by (smt (verit, best) a1 a2 distinct_map image_iff inj_on_def is_singleton_the_elem 
                        mem_Collect_eq subset_code(1) uid_is_singleton) 

lemma output_layer_ids_subset_neuron_ids: output_layer_ids N  uid ` (neurons N)
  unfolding image_def neurons_def output_layer_ids_def o_def output_layer_def output_verts_def
  by auto

end

paragraph ‹Activation layer proofs›

lemma distinct_activation_edges: distinct (activation N φ' α' β')
  apply (simp add: activation_def)
  by (smt (verit) distinct.simps(1))

lemma output_activation_layer_length_equal:
   assumes notEmptyNeurons: neurons N  {} 
   and notEmptyActivationLayer: length(activation N φ' α' β' )  0
   shows card(output_layer_ids N) = length(activation N φ' α' β')
   using assms
   apply (simp add: activation_def mk_new_ids_def mk_out_edge_def output_layer_ids_def)
   apply auto
   done

lemma new_ids_activation_layer_length_equal: 
  assumes notEmptyNeurons: neurons N  {} 
  and notEmptyActivationLayer: length(activation N φ' α' β')  0 
  and notEmptyNewIds: length(mk_new_ids N)  0
  shows length(mk_new_ids N) = length(activation N φ' α' β')
  using assms 
  apply (simp add: out_def mk_new_ids_def)
  apply (metis assms(2) output_activation_layer_length_equal)
  done

lemma map_neuron_hd_id: 
  (map (λx. Neuron φ = φ', α = α', β = β', uid = f x) X) =  
   (map (λx. Neuron φ = φ', α = α', β = β', uid = x) (map f X))
  by simp 

lemma map_neuron_tl_id:
  (map (λx. the_elem {n  neurons N. NN_Digraph.uid n = f x}) X) = 
   (map (λx. the_elem {n  neurons N. NN_Digraph.uid n = x})(map f X))
  by simp


context nn_pregraph begin

lemma distinct_head_activation: distinct(map hd (activation N φ' α' β'))
  apply (simp add: activation_def Let_def mk_edge_def o_def) 
  apply (simp only: map_neuron_hd_id[of _ _ _ snd _] map_snd_zip new_id_len)
  using new_id_distinct
  apply (simp add: distinct_conv_nth)
  by auto

end 

context neural_network_digraph begin

lemma distinct_tail_activation: distinct(map tl (activation N φ' α' β'))
  apply (simp add: activation_def Let_def mk_edge_def o_def)
  apply(simp only:map_neuron_tl_id[of _ fst] map_fst_zip new_id_len)
  using distinct_elem[of sorted_list_of_set (output_layer_ids N)]
         distinct_sorted_list_of_set[of output_layer_ids N]
         output_layer_ids_subset_neuron_ids set_sorted_list_of_set
  by (metis bot.extremum set_empty sorted_list_of_set.fold_insort_key.infinite) 

lemma activation_is_activation: isActivationl(activation N φ' α' β')
  apply (simp add: isActivationl_def)
  apply (safe)
  subgoal apply (rule distinct_tail_activation) done
  subgoal apply (rule nn_pregraph.distinct_head_activation) 
          apply (rule NN_Digraph.nn_pregraph_mk ) done
  done

end 

paragraph ‹Output layer proofs›

lemma output_output_layer_length_equal:
   assumes notEmptyNeurons: neurons N  {} 
   and notEmptyOutputLayer: length(out N)  0
   shows card(output_layer_ids N) = length(out N)
   using assms
   apply (simp add: out_def mk_new_ids_def mk_out_edge_def output_layer_ids_def)
   by auto

lemma new_ids_output_layer_length_equal: 
  assumes notEmptyNeurons: neurons N  {} 
  and notEmptyOutputLayer: length(out N)  0 
  shows length(mk_new_ids N) = length(out N)
  using assms 
  apply (simp add: out_def)
  using output_output_layer_length_equal new_id_len 
  apply (simp add: new_id_len notEmptyNeurons out_def output_output_layer_length_equal)
  done

lemma distinct_output_edges: distinct (out N)
  apply (smt (verit, best) out_def card.empty card_distinct list.set(1) list.size(3))
  done

lemma map_out_neuron_hd_id: (map (λx. Out (f x)) X) = (map (λx. Out x) (map f X))
  by simp

context nn_pregraph begin 

lemma distinct_head_output: distinct(map hd (out N))
  apply (simp add: out_def Let_def mk_out_edge_def o_def) 
  apply (simp only: map_out_neuron_hd_id[of snd _] map_snd_zip new_id_len)
  using new_id_distinct
  apply (simp add: distinct_conv_nth)
  by auto

end 

lemma fold_and_map: foldr (∧) (map (λx. True) X) True 
proof (induction "X")
  case Nil
  then show ?case by simp
next
  case (Cons a X)
  then show ?case by simp 
qed

lemma head_output_neurons: foldr (∧) (map (output_neuron  edge.hd) (out N)) True 
  apply (simp add: o_def out_def Let_def mk_out_edge_def)
  using fold_and_map 
  by(auto)

context neural_network_digraph begin 

lemma distinct_tail_output: distinct(map tl (out N))
  apply (simp add: out_def Let_def mk_out_edge_def o_def) 
  apply (simp only: map_neuron_tl_id[of _ fst _] map_fst_zip new_id_len)
  using distinct_sorted_list_of_set distinct_elem
  by (metis (mono_tags, lifting) distinct.simps(1) list.simps(8) output_layer_ids_subset_neuron_ids 
            sorted_list_of_set.fold_insort_key.infinite sorted_list_of_set.set_sorted_key_list_of_set)

lemma output_is_output:isOutputl(out N)
  apply (simp add: isOutputl_def isActivationl_def)
  apply (safe)
  subgoal apply (rule distinct_tail_output)  done
  subgoal apply (rule nn_pregraph.distinct_head_output) apply(rule NN_Digraph.nn_pregraph_empty) done
  subgoal apply (rule head_output_neurons) done
  done

paragraph ‹Dense layer proofs›

lemma dense_is_dense: 
  assumes neuronsNotZero: n > 0
  and weightEqualNeurons: length ω' = n
  shows isDenses(set(dense N n ω' φ' α' β'))
  using assms
  apply (safe)
  apply (simp add: isDenses_def dense_def activation_def)
  apply (simp add: neurons_def output_layer_ids_def output_layer_def output_verts_def mk_edge_def)
  apply (force)
  done

end
end