Theory NN_Layers

(***********************************************************************************
 * Copyright (c) University of Exeter, UK
 *
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * * Redistributions of source code must retain the above copyright notice, this
 *
 * * Redistributions in binary form must reproduce the above copyright notice,
 *   this list of conditions and the following disclaimer in the documentation
 *   and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * SPDX-License-Identifier: BSD-2-Clause
 ***********************************************************************************)

subsection‹Sequential Layers (\thy)›
theory 
  NN_Layers
  imports
  Activation_Functions
begin 

text‹
  In this theory, we model feed-forward neural networks as ``computational layers'' following 
  the structure of TensorFlow~cite"abadi.ea:tensorflow:2015" closely.
›

record InOutRecord = 
       name:: String.literal
       units:: nat


record ('b) ActivationRecord = InOutRecord +
       φ :: 'b


record ('a, 'b, 'c) LayerRecord = ('b) ActivationRecord + 
       β :: 'a
       ω :: 'c 

datatype ('a, 'b, 'c) layer = In InOutRecord                    
                        | Out InOutRecord  
                        | Dense ('a,'b, 'c) LayerRecord 
                        | Activation ('b) ActivationRecord 

fun unitsl where 
   unitsl (In l) = units l
 | unitsl (Out l) = units l
 | unitsl (Dense l) = units l
 | unitsl (Activation l) = units l

lemmas [nn_layer] = InOutRecord.simps ActivationRecord.simps LayerRecord.simps layer.simps unitsl.simps
                        
text‹
  The datatype @{type "layer"} models the currently supported layer types
›

text ‹As we are using a representation of a network as a list of layers, we also
support different layer types and their computations. Currently, our sequential
layers model supports five layer types @{term "In (input::InOutRecord)"} (input layer), 
@{term "Out (output::InOutRecord)"} (output layer), 
@{term "Dense (dense_layer::('a, 'b, 'c) LayerRecord)"} (dense layer),  and 
@{term "Activation (activation_layer::'b ActivationRecord)"} (activation layer).
As we allow for the abstraction of activation functions, we abstract from the actual type
for the activation function (modelled by the type variable @{typ "'b"} and from the 
actual type of weight and bias (modelled by the type variables @{typ "'a"} and @{typ "'c"} 
respectively).

Therefore, we do not need to model TensorFlow's Lambda layer explicitly
(which is TensorFlow's mechanism for supporting custom activation functions).›

text ‹Each @{type "LayerRecord"} contains the activation, weights and bias in our network 
@{term "φ"}, @{term "β"} and @{term "ω"} respectively), while our @{type "ActivationRecord"} 
only contains our abstracted activation function. ›

fun isIn where
    isIn (In _) = True
  | isIn _      = False

fun isOut where
    isOut (Out _) = True
  | isOut _       = False

fun isInternal where
    isInternal (Out _) = False
  | isInternal (In _)  = False
  | isInternal _       = True

lemma isInternal': isInternal n = (¬ (isIn n)  ¬ (isOut n))
  by (metis isIn.elims(2) isIn.simps(1) isInternal.elims(3) isInternal.simps(1) isInternal.simps(2) 
            isOut.elims(2) isOut.simps(1)) 

record ('a, 'b,'c) neural_network_seq_layers = 
  layers ::  ('a, 'b, 'c) layer  list
  activation_tab :: 'b  (('a  'a ) option)

lemmas [nn_layer] = neural_network_seq_layers.simps

text ‹For this encoding of a neural network, we mostly follow TensorFlow Sequential model 
cite"abadi.ea:tensorflow:2015" and represent our network as a sequential list of layers 
with an abstract table of activation functions, allowing for extensible and customisable 
functionality. The record @{typ "('a, 'b, 'c) neural_network_seq_layers"} represents our network where 
@{typ "'a"} is type variable representing the type of our bias, @{typ "'b"} is the 
type of the activation function, and @{typ "'c"} is the type variable representing the type of our
weights.›

fun  out_deg_layer 
where 
   out_deg_layer (In l) = (units l)
 | out_deg_layer (Out l) = (units l)
 | out_deg_layer (Activation l) = units l
 | out_deg_layer (Dense l) = units l
 
fun units_layer where
   units_layer (In l) = units l
 | units_layer (Out l) = units l
 | units_layer (Activation l) = units l 
 | units_layer (Dense l) = units l

fun φ_layer where
   φ_layer (In l) = None
 | φ_layer (Out l) = None
 | φ_layer (Activation l) = Some (φ l) 
 | φ_layer (Dense l) = Some (φ l)

fun in_deg_layer where
  "in_deg_layer (In l) = units l"
| "in_deg_layer (Out l) = units l"
| "in_deg_layer (Activation l) =  units l" 
| "in_deg_layer (Dense l) = length (ω l ! 0)"

lemmas [nn_layer] = out_deg_layer.simps units_layer.simps φ_layer.simps

definition 
  out_deg_NN  N = (if layers N = [] then 0 else (units_layer  last  layers) N)

definition 
  in_deg_NN  N = (if layers N = [] then 0 else (units_layer  hd  layers) N)


MLsignature CONVERT_TENSORFLOW_SEQ_LAYER = 
  sig
    type layer_config =   {ActivationRecordC: term, DenseC: term, InC: term, InOutRecordC: term, LayerRecordC: term, OutC: term, activation_term: Activation_Term.mode, biasT_conv: term -> term, layer_def: thm, layer_extC: term, layersT: typ, locale_name: string, ltype: typ, phiT: typ, valid_activation_tab: thm, weightsT_conv: term -> int -> term}
   val def_seq_layer_nn: layer_config -> bstring -> typ -> typ -> Nano_Json_Type.json -> local_theory 
                          -> Proof.context
  end

ML_file‹Tools/Convert_TensorFlow_Seq_Layer.ML› 

end