Theory Grid_Layers_Matrix

(***********************************************************************************
 * Copyright (c) University of Exeter, UK
 *
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * * Redistributions of source code must retain the above copyright notice, this
 *
 * * Redistributions in binary form must reproduce the above copyright notice,
 *   this list of conditions and the following disclaimer in the documentation
 *   and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * SPDX-License-Identifier: BSD-2-Clause
 ***********************************************************************************)

subsection‹Layer-based Modelling using List Types (\thy)›

theory
  Grid_Layers_Matrix
  imports
  Grid_Layers
  NN_Layers_Matrix_Main
  Jordan_Normal_Form.Matrix_Impl (* improves performance of methods relying on code generation, e.g., normalization *)
begin

declare[[nn_proof_mode = "eval"]]
import_TensorFlow grid file "model/trained-model_binary-step_linear/model.json"
  as seq_layer_matrix
declare[[nn_proof_mode = "nbe"]]

text ‹
Our new Isabelle/Isar command @{term "import_TensorFlow"} encodes the neural network model stored in 
the file model.json as sequence of layers, i.e., the formal encoding we developed. Our datatype 
package also proves that the imported model complies with the requirements of our formal model as 
well as proves various auxiliary properties (e.g., conversion between different representations)
that can be useful during interactive verification.›

import_data_file "model/trained-model_binary-step_linear/input_small.txt"
  defining inputs_small
import_data_file "model/trained-model_binary-step_linear/expectations_small.txt" 
  defining expectations_small

import_data_file "model/trained-model_binary-step_linear/input.txt"        
  defining inputs
import_data_file "model/trained-model_binary-step_linear/expectations.txt" 
  defining expectations
import_data_file "model/trained-model_binary-step_linear/predictions.txt" 
  defining predictions

text ‹To ensure that our formalisation is a faithful representation of the neural 
networks that we defined in TensorFlow, we provide a framework that supports 
the import of trained TensorFlow networks and their test data. We can then 
use this to evaluate our Isabelle network and validate that the output is the 
same, hence providing confidence that our formalisation is accurate.

We can import text files containing NumPy arrays of our test inputs, 
expectations and predictions from our trained TensorFlow network.›


thm grid.Layers_def
thm grid.Layers.dense_input_def
thm grid.Layers.OUTPUT_def
thm grid.layer_defs
thm grid.Layers_def
thm grid.φ_grid.simps
thm grid.NeuralNet_def
thm inputs_def
thm predictions_def

lemmas grid_defs = grid.Layers_def grid.layer_defs grid.NeuralNet_def
lemmas activation_defs = identity_def binary_step_def


subsubsection ‹Proving using the matrix prediction function.›

lemma grid_closed_mat [simp]: 
      ‹predict⇩_s⇩_e⇩_q⇩_{_}⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m grid.NeuralNet (vec_of_list xs) = (case xs of  
      [x3, x2, x1, x0] ⇒ let y = 2 * (x3 + (x2 * 2 + (x1 * 4 + x0 * 8))) 
       in Some (
         vec_of_list[(if y - 7 ≤ 0 then 0 else 1) +
            ((if y - 11 ≤ 0 then 0 else 1) + ((if y - 21 ≤ 0 then 0 else 1) 
               + ((if y - 25 ≤ 0 then 0 else 1) - (if y - 23 ≤ 0 then 0 else 1)) 
                 - (if y - 19 ≤ 0 then 0 else 1)) - (if y - 9 ≤ 0 then 0 else 1)) 
                    - (if y - 5 ≤ 0 then 0 else 1) + 1,
          (if y - 5 ≤ 0 then 0 else 1) + ((if y - 23 ≤ 0 then 0 else 1) 
              - (if y - 25 ≤ 0 then 0 else 1) - (if y - 7 ≤ 0 then 0 else 1)),
          (if y - 9 ≤ 0 then 0 else 1) + ((if y - 19 ≤ 0 then 0 else 1) 
              - (if y - 21 ≤ 0 then 0 else 1) - (if y - 11 ≤ 0 then 0 else 1))]
       )
       | _ ⇒ None)›
  apply (auto simp add: predict⇩_s⇩_e⇩_q⇩_{_}⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m_def grid_defs activation_defs split:list.split)[1]
  apply(normalization)
  by (simp_all add: grid_defs predict⇩_s⇩_e⇩_q⇩_{_}⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m_def activation_defs dom_def dim_col_mat_of_col_list dim_row_mat_of_col_list)

lemma grid_img_defined_mat: ‹((predict⇩_s⇩_e⇩_q⇩_{_}⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m grid.NeuralNet (vec_of_list xs)) ≠ None) = (dim_vec (vec_of_list xs) = 4)›
  by(auto simp add:Let_def split:list.split)

lemma grid_img_defined_mat': ‹(∃ y. (predict⇩_s⇩_e⇩_q⇩_{_}⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m grid.NeuralNet (vec_of_list xs)) = Some (vec_of_list y)) = (dim_vec (vec_of_list xs) = 4)›
proof(cases "(dim_vec (vec_of_list xs)) = 4")
  case True
  then show ?thesis
    by (metis grid_img_defined_mat option.exhaust vec_list) 
next
  case False
  then show ?thesis 
    using grid_img_defined_mat by blast
qed
   
lemma grid_image_mat:
  assumes ‹predict⇩_s⇩_e⇩_q⇩_{_}⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m grid.NeuralNet (vec_of_list xs) = Some y›
  shows ‹y ∈ { vec_of_list [0, 0, 1], vec_of_list [0, 1, 0], vec_of_list [1, 0, 0]}›
  using assms grid_img_defined_mat 
  apply(simp split:list.splits)
  unfolding Let_def vec_of_list_def vCons_def
  apply(simp)
  by auto

lemma ran_aux:          
  assumes ‹∀ x . f x ≠ None ⟶ the (f x) ∈ Y›
  shows ‹ran (f) ⊆ Y›
  unfolding ran_def using assms   
  by force

lemma grid_image_approx_mat:
 ‹ran (λ x . predict⇩_s⇩_e⇩_q⇩_{_}⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m grid.NeuralNet (vec_of_list x)) 
              ⊆ {vec_of_list [0, 0, 1], vec_of_list [0, 1, 0], vec_of_list [1, 0, 0]}›
  using grid_image_mat ran_aux
  by (metis (no_types, lifting) option.exhaust_sel)
  

text ‹The lemma @{term "grid_image_approx"} shows that the output of the classification
is never ambiguous (i.e., two or more classification output having the value 1). ›

lemma grid_dom_mat: ‹dom ( λ x . predict⇩_s⇩_e⇩_q⇩_{_}⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m grid.NeuralNet  (vec_of_list x)) = {a. length a = 4}›
  by (simp add: grid_defs predict⇩_s⇩_e⇩_q⇩_{_}⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m_def activation_defs dom_def dim_col_mat_of_col_list dim_row_mat_of_col_list)

definition "range_of x = (if x = (0::real) then {0..0.04::real} else {0.96..1})"

lemma ‹x3 ∈ {0.96..1.00} ∧ x2 ∈ {0.96..1.00} 
     ∧ x1 ∈ {0.00..0.04} ∧ x0 ∈ {0.00..0.04} ⟹  predict⇩_s⇩_e⇩_q⇩_{_}⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m grid.NeuralNet (vec_of_list [x3, x2, x1, x0]) = Some(vec_of_list [0, 1, 0])›
    by(simp add: Let_def, normalization, argo)
lemma ‹x3 ∈ {0.00..0.04} ∧ x2 ∈ {0.00..0.04}
     ∧ x1 ∈ {0.96..1.00} ∧ x0 ∈ {0.96..1.00} ⟹ predict⇩_s⇩_e⇩_q⇩_{_}⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m grid.NeuralNet (vec_of_list [x3, x2, x1, x0]) = Some(vec_of_list [0, 1, 0])›
    by(simp add: Let_def, normalization, argo)
lemma ‹x3 ∈ {0.95..1.00} ∧ x2 ∈ {0.00..0.05} 
     ∧ x1 ∈ {0.95..1.00} ∧ x0 ∈ {0.00..0.05} ⟹predict⇩_s⇩_e⇩_q⇩_{_}⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m grid.NeuralNet (vec_of_list [x3, x2, x1, x0]) = Some(vec_of_list [0, 0, 1])›
  by(simp add: Let_def, normalization, argo)
lemma ‹x3 ∈ {0.00..0.1} ∧ x2 ∈ {0.96..1.00} 
     ∧ x1 ∈ {0.00..0.1} ∧ x0 ∈ {0.96..1.00} ⟹  predict⇩_s⇩_e⇩_q⇩_{_}⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m grid.NeuralNet (vec_of_list [x3, x2, x1, x0]) = Some(vec_of_list [0, 0, 1])›
   by(simp add: Let_def, normalization, argo)

text ‹A common definition of safety in neural networks is the requirement that small changes to an 
input should not change the classification. For this grid example, we express such a verification 
goal as shown above, where we set a small bound of noise on the input, and verify that the output 
classification remains constant.›

subsubsection ‹Proving using the list to matrix prediction›

text ‹The following proofs on the grid example use our wrapper function that converts lists to 
vectors, uses the matrix based prediction function and converts the output back into a list›

lemma grid_closed' [simp]: 
      ‹predict⇩_s⇩_e⇩_q⇩_{_}⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m' grid.NeuralNet xs = (case xs of  
      [x3, x2, x1, x0] ⇒ let y = 2 * (x3 + (x2 * 2 + (x1 * 4 + x0 * 8))) 
       in Some (
         [(if y - 7 ≤ 0 then 0 else 1) +
            ((if y - 11 ≤ 0 then 0 else 1) + ((if y - 21 ≤ 0 then 0 else 1) 
               + ((if y - 25 ≤ 0 then 0 else 1) - (if y - 23 ≤ 0 then 0 else 1)) 
                 - (if y - 19 ≤ 0 then 0 else 1)) - (if y - 9 ≤ 0 then 0 else 1)) 
                    - (if y - 5 ≤ 0 then 0 else 1) + 1,
          (if y - 5 ≤ 0 then 0 else 1) + ((if y - 23 ≤ 0 then 0 else 1) 
              - (if y - 25 ≤ 0 then 0 else 1) - (if y - 7 ≤ 0 then 0 else 1)),
          (if y - 9 ≤ 0 then 0 else 1) + ((if y - 19 ≤ 0 then 0 else 1) 
              - (if y - 21 ≤ 0 then 0 else 1) - (if y - 11 ≤ 0 then 0 else 1))]
       )
       | _ ⇒ None)›
   apply (auto simp add: predict⇩_s⇩_e⇩_q⇩_{_}⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m'_def predict⇩_s⇩_e⇩_q⇩_{_}⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m_def grid_defs activation_defs split:list.split)[1]
       apply (normalization)
       apply(simp add:dim_col_mat_of_col_list dim_row_mat_of_col_list)+
done

lemma grid_img_defined': ‹((predict⇩_s⇩_e⇩_q⇩_{_}⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m' grid.NeuralNet xs) ≠ None) = (length xs = 4)›
  by(auto simp add:Let_def split:list.split)

lemma grid_img_defined: ‹(∃ y. (predict⇩_s⇩_e⇩_q⇩_{_}⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m' grid.NeuralNet xs) = Some y) = (length xs = 4)›
  using grid_img_defined' by auto 

lemma grid_image:
  assumes ‹(predict⇩_s⇩_e⇩_q⇩_{_}⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m' grid.NeuralNet xs) ≠ None›
  shows ‹the (predict⇩_s⇩_e⇩_q⇩_{_}⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m' grid.NeuralNet xs) ∈ { [0, 0, 1], 
                                                    [0, 1, 0],
                                                    [1, 0, 0]}›
  using assms grid_img_defined' apply simp 
  by(auto simp add:Let_def split:list.split) 

lemma grid_image_approx:
 ‹ran (predict⇩_s⇩_e⇩_q⇩_{_}⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m' grid.NeuralNet) ⊆ {[0, 0, 1], [0, 1, 0], [1, 0, 0]}›
  apply(simp only:ran_def) 
  using grid_image by force

text ‹The lemma @{term "grid_image_approx"} shows that the output of the classification
is never ambiguous (i.e., two or more classification output having the value 1).›

lemma grid_dom': ‹dom (predict⇩_s⇩_e⇩_q⇩_{_}⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m' grid.NeuralNet) = {a. length a = 4} ›
  unfolding predict⇩_s⇩_e⇩_q⇩_{_}⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m'_def   predict⇩_s⇩_e⇩_q⇩_{_}⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m_def
  by (smt (verit, best) Collect_cong dom_def grid_img_defined' predict⇩_s⇩_e⇩_q⇩_{_}⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m'_def predict⇩_s⇩_e⇩_q⇩_{_}⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m_def) 

lemma grid_dom_mat': ‹dom ( λ x . predict⇩_s⇩_e⇩_q⇩_{_}⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m grid.NeuralNet  (vec_of_list x)) = {a. length a = 4}›
  by (simp add: grid_defs predict⇩_s⇩_e⇩_q⇩_{_}⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m_def activation_defs dom_def dim_col_mat_of_col_list dim_row_mat_of_col_list)


lemma ‹x3 ∈ {0.96..1.00} ∧ x2 ∈ {0.96..1.00} 
     ∧ x1 ∈ {0.00..0.04} ∧ x0 ∈ {0.00..0.04} ⟹  predict⇩_s⇩_e⇩_q⇩_{_}⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m' grid.NeuralNet [x3, x2, x1, x0] = Some [0, 1, 0]›
  by(simp add: Let_def)
lemma ‹x3 ∈ {0.00..0.04} ∧ x2 ∈ {0.00..0.04} 
     ∧ x1 ∈ {0.96..1.00} ∧ x0 ∈ {0.96..1.00} ⟹  predict⇩_s⇩_e⇩_q⇩_{_}⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m' grid.NeuralNet [x3, x2, x1, x0] = Some [0, 1, 0]›
  by(simp add: Let_def)
lemma ‹x3 ∈ {0.95..1.00} ∧ x2 ∈ {0.00..0.05} 
     ∧ x1 ∈ {0.95..1.00} ∧ x0 ∈ {0.00..0.05} ⟹  predict⇩_s⇩_e⇩_q⇩_{_}⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m' grid.NeuralNet [x3, x2, x1, x0] = Some [0, 0, 1]›
  by(simp add: Let_def)
lemma ‹x3 ∈ {0.00..0.1} ∧ x2 ∈ {0.96..1.00} 
     ∧ x1 ∈ {0.00..0.1} ∧ x0 ∈ {0.96..1.00} ⟹  predict⇩_s⇩_e⇩_q⇩_{_}⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m' grid.NeuralNet [x3, x2, x1, x0] = Some [0, 0, 1]›
  by(simp add: Let_def)

text ‹A common definition of safety in neural networks is the requirement that small changes to an 
input should not change the classification. For this grid example, we express such a verification 
goal as shown above, where we set a small bound of noise on the input, and verify that the output 
classification remains constant.›

lemma grid_meets_predictions: 
      ‹⊨⇩_i⇩_l {inputs} (predict⇩_s⇩_e⇩_q⇩_{_}⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m' grid.NeuralNet) {intervals_of_l 0.000001 predictions}›
  by(simp add: ensure_testdata_interval_list_def upper_Interval lower_Interval predictions_def 
               intervals_of_l_def inputs_def in_set_interval) 
 
lemma grid_meets_expectations_max_classifier:
      ‹⊨⇩_l {inputs_small} (predict⇩_s⇩_e⇩_q⇩_{_}⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m' grid.NeuralNet) {expectations_small}›
  by (simp add: ensure_testdata_max_list_def expectations_small_def inputs_small_def)

lemma grid_min_delta_classifier: 
      ‹1.0 ⊨ predict⇩_s⇩_e⇩_q⇩_{_}⇩_l⇩_a⇩_y⇩_e⇩_r⇩_{_}⇩_m' grid.NeuralNet› 
  unfolding ensure_delta_min_def Prediction_Utils.δ⇩_m⇩_i⇩_n_def max⇩_l⇩_i⇩_s⇩_t_def
  using grid_image_approx
  by auto

text ‹The lemmas @{thm [source] "grid_meets_predictions"},  @{thm [source] "grid_meets_expectations_max_classifier"}
and @{thm [source] "grid_min_delta_classifier"} show that our definition of the grid neural network computes 
the same prediction as the TensorFlow trained network. ›

end