Theory First_Order_Conditions

section ‹Minimizer Implications›

theory First_Order_Conditions
  imports Minimizers_Definition 
begin

notation norm ("_")

subsection ‹Implications for a Given Minimizer Type›

lemma strict_local_minimizer_imp_local_minimizer:
  assumes "strict_local_minimizer f x_star"
  shows "local_minimizer f x_star"
  by (smt (verit) Diff_iff assms local_minimizer_def singletonD strict_local_minimizer_def strict_local_minimizer_on_def)

lemma isolated_local_minimizer_imp_strict:
  assumes "isolated_local_minimizer f x_star"
  shows "strict_local_minimizer f x_star"
proof -
  ― ‹From isolated\_local\_minimizer we obtain an open set $U$
        such that $x^\star $ is the \emph{only} local minimizer.›
  from assms obtain U where iso_props:
    "isolated_local_minimizer_on f x_star U"
    unfolding isolated_local_minimizer_def
    using isolated_local_minimizer_on_def by blast 

  ― ‹Unpack \texttt{isolated\_local\_minimizer\_on}: 
  \(x^\star\) is a \texttt{local\_minimizer\_on} \(U\), and \(x^\star\) is unique.›

  from iso_props have lm_on: "local_minimizer_on f x_star U"
    unfolding isolated_local_minimizer_on_def using local_minimizer_on_def by presburger
  moreover from iso_props have unique_min: "{x  U. local_minimizer f x} = {x_star}"
    unfolding isolated_local_minimizer_on_def by auto

  ― ‹From \texttt{local\_minimizer\_on}, we have: 
  \(U\) open, \(x^\star \in U\), and \(\forall x \in U.\; f(x^\star) \le f(x)\).›

  from lm_on have open_U: "open U" and x_in_U: "x_star  U" and le_prop: "x  U. f x_star  f x"
    unfolding local_minimizer_on_def by auto

  ― ‹Assume, for contradiction, that \(x^\star\) is not a strict local minimizer.  
      Then there exists \(y \in U \setminus \{x^\star\}\) with \(f(y) \le f(x^\star)\).›

  show "strict_local_minimizer f x_star"
  proof (rule ccontr)
    assume "¬ strict_local_minimizer f x_star"
    then obtain y where y_props:
      "y  U - {x_star}" and "f y  f x_star"
      unfolding strict_local_minimizer_def strict_local_minimizer_on_def
      by (smt (verit, ccfv_SIG) open_U x_in_U)

    from y_props have "y  U" and "y  x_star"
      by auto

    ― ‹We already have \(f(x^\star) \le f(y)\) from @{thm le_prop} and \(y \in U\).  
      Together with \(f(y) \le f(x^\star)\), this yields \(f(x^\star) = f(y)\).›

    from le_prop `y  U` have "f x_star  f y"
      by auto
    with `f y  f x_star` have "f x_star = f y"
      by auto

    ― ‹Now we show that \(y\) is also a local minimizer, contradicting the uniqueness of \(x^\star\).  
      To prove this, we must exhibit an open set \(V\) around \(y\) such that  
      \(f(y) \le f(x)\) for all \(x \in V\).›

    have "local_minimizer f y"
    proof -
      ― ‹Since \(U\) is open and \(y \in U\), there exists an open set \(V \subseteq U\) containing \(y\).›
      obtain V where "open V" and "y  V" and "V  U"
        using `open U` `y  U` open_subset by auto

      ― ‹On this subset, \(f(y) = f(x^\star) \le f(x)\) for all \(x \in V\) (since \(V \subseteq U\)).›

      moreover from le_prop and `f x_star = f y` have "x  V. f y  f x"
        using calculation(3) by auto

      ultimately show "local_minimizer f y"
        unfolding local_minimizer_def local_minimizer_on_def by auto
    qed

    ― ‹Since \(y\) is a local minimizer and \(y \in U\), we have 
      \(y \in \{x \in U.\ \texttt{local\_minimizer}\ f\ x\}\).  
      By uniqueness, \(\{x \in U.\ \texttt{local\_minimizer}\ f\ x\} = \{x^\star\}\),  
      hence \(y = x^\star\), contradicting \(y \neq x^\star\).›

    hence "y  {x  U. local_minimizer f x}"
      by (simp add: y  U)
    with unique_min have "y = x_star" by auto
    thus False using `y  x_star` by contradiction
  qed
  ― ‹Having reached a contradiction under the assumption that \(x^\star\) is not a strict local minimizer,  
      it follows that \(x^\star\) must indeed be a strict local minimizer.›
qed

subsection ‹Characterization of Non-Isolated Minimizers›

lemma not_isolated_minimizer_def:
  assumes "local_minimizer f x_star"
  shows "(x_seq::nat  real. (n. local_minimizer f (x_seq n)  x_seq n  x_star)  ((x_seq  x_star) at_top)) = (¬ isolated_local_minimizer f x_star)"         
proof(safe)
  show "x_seq. isolated_local_minimizer f x_star  n. local_minimizer f (x_seq n)  x_seq n  x_star  x_seq  x_star  False"
  proof - 
    fix x_seq :: "nat   real"
    assume x_star_isolated_minimizer: "isolated_local_minimizer f x_star"
    assume with_sequence_of_local_miniziers: "n. local_minimizer f (x_seq n)  x_seq n  x_star"
    assume converging_to_x_star: "x_seq  x_star"
    have open_ball_with_unique_min: "N > 0. x  ball x_star N. (local_minimizer f x  x = x_star)"
      by (simp add: isolated_local_minimizer_def2 x_star_isolated_minimizer)
    then obtain N where N_pos: "N > 0" and N_prop: "x  ball x_star N. (local_minimizer f x  x = x_star)"
      by blast
    ― ‹Use convergence to show \(x_{\mathrm{seq}}\) eventually lies in \(\mathrm{ball}(x^\star, N)\).›
    from converging_to_x_star have "M. n  M. x_seq n  ball x_star N"
      by (metis LIMSEQ_iff_nz N_pos dist_commute mem_ball)
    then obtain M where M_def: "n  M. x_seq n  ball x_star N"
      by auto
    then show False
      by (meson N_prop linorder_not_le order_less_irrefl with_sequence_of_local_miniziers)
  qed
next
  show "¬ isolated_local_minimizer f x_star  x_seq. (n. local_minimizer f (x_seq n)  x_seq n  x_star)  x_seq  x_star"
  proof(rule ccontr) 
    assume not_isolated_minimizer: "¬isolated_local_minimizer f x_star"
    assume BWOC: "x_seq. (n. local_minimizer f (x_seq n)  x_seq n  x_star)  x_seq  x_star"

    have "N > 0. x. dist x x_star < N  f x_star  f x"
      by (simp add: assms local_minimizer_def2)
    then obtain N where N_pos: "(N::nat) > 0" and x_star_min_on_N_ball: "x. dist x x_star < 1/ real N  f x_star  f x"
      by (metis dual_order.strict_trans ex_inverse_of_nat_less inverse_eq_divide)
    
    obtain S_n :: "nat  real set" where S_n_def: "S_n = (λn. {x. dist x x_star < 1 /  (real n + N)  x  x_star  local_minimizer f x})"
      by blast

    from not_isolated_minimizer
    have non_isolated: "U. local_minimizer_on f x_star U  (y  U. y  x_star  local_minimizer f y)"
      by (smt (verit, best) Collect_cong assms isolated_local_minimizer_def local_minimizer_on_def singleton_conv2)

    have "n::nat. x. x  S_n n"
    proof (intro allI)
      fix n::nat
      have pos_radius: "1 / (real n + N) > 0"
        using N_pos by simp

      obtain U where U_def: "U = ball x_star (1 / (real n + N))"  and open_U: "open U" and U_contains_x_star: "x_star  U"
        using pos_radius by auto

      have U_contained_in_Inverse_N_Ball: "x  U. dist x x_star < 1 / N"
      proof(safe)
        fix x:: real
        assume x_in_U: "x  U"
        then have "dist x x_star < (1 / (real n + N))"
          by (simp add: U_def dist_commute)
        also have  "...  1 / real N"
          by (simp add: N_pos frac_le)
        finally show "dist x x_star < 1 / real N".
      qed

      have ball_non_empty: "y  U. y  x_star  local_minimizer f y"
      proof -
        have "local_minimizer_on f x_star U"
          by (simp add: U_contains_x_star U_contained_in_Inverse_N_Ball local_minimizer_on_def open_U x_star_min_on_N_ball)
        then show "yU. y  x_star  local_minimizer f y"
          by (simp add: non_isolated)
      qed
      then obtain y where y_in_ball: "y  U" and "y  x_star" and "local_minimizer f y"
        by blast
      then show "x. x  S_n n"
        by (smt (verit, best) S_n_def U_def dist_commute mem_Collect_eq mem_ball)
    qed
    then obtain x_seq where x_seq_def: "n. x_seq n  S_n n"
      by metis
    have x_seq_converges_to_x_star: "x_seq  x_star"
    proof (rule LIMSEQ_I)
      fix r :: real
      assume r_pos: "0 < r"
      obtain n_min where n_min_def: "1 / (real n_min + N) < r"
        using real_arch_inverse N_pos r_pos
        by (smt (verit, ccfv_SIG) frac_le inverse_eq_divide inverse_positive_iff_positive)
      show "no. nno. norm (x_seq n - x_star) < r"
      proof (intro exI allI impI)
        fix n
        assume "n  n_min"
        then have n_large_enough: "1 / (real n + N)  1 / (real n_min + N)"
          using N_pos by (subst frac_le, simp_all)
        have "dist (x_seq n) x_star < 1 / (real n + N)"
          using x_seq_def S_n_def by auto
        also have "...  1 / (real n_min + N)"
          using n_large_enough by auto
        also have "... < r"
          using n_min_def by auto
        finally show "norm (x_seq n - x_star) < r"
          by (simp add: dist_real_def)
      qed
    qed
    have "x_seq. (n. local_minimizer f (x_seq n)  x_seq n  x_star)  x_seq  x_star"
      using S_n_def x_seq_converges_to_x_star x_seq_def by blast
    then show False
      using BWOC by auto
  qed
qed

subsection ‹First-Order Condition›

theorem Fermat's_theorem_on_stationary_points:
  fixes f :: "real  real"
  assumes "(f has_derivative f') (at x_min)"
  assumes "local_minimizer f x_min"
  shows "(deriv f) x_min = 0"
  by (metis assms has_derivative_imp differential_zero_maxmin local_minimizer_def)

definition stand_basis_vector :: "'n::finite  real^'n"  ― ‹the i-th standard basis vector›
  where   "stand_basis_vector i = (χ j. if j = i then 1 else 0)"

lemma stand_basis_vector_index[simp]:   "(stand_basis_vector i) $ j = (if j = i then (1::real) else 0)"
  by (simp add: stand_basis_vector_def)

lemma stand_basis_vector_nonzero[simp]: "stand_basis_vector i  0"
  by (smt (verit, del_insts) stand_basis_vector_index zero_index)

lemma norm_stand_basis_vector[simp]:    "norm (stand_basis_vector i) = 1"
  by (smt (verit, best) axis_nth component_le_norm_cart norm_axis_1 norm_le_componentwise_cart real_norm_def stand_basis_vector_index)

lemma inner_stand_basis_vector[simp]:   "inner (stand_basis_vector i) (stand_basis_vector j) = (if i = j then 1 else 0)"
  by (metis axis_nth cart_eq_inner_axis norm_eq_1 norm_stand_basis_vector stand_basis_vector_index vector_eq)

lemma Basis_characterisation:
  "stand_basis_vector i  (Basis :: (real^'n) set)"   and
  "b(Basis::(real^'n)set). i. b = stand_basis_vector i"
  by (metis (no_types, lifting) Basis_real_def axis_in_Basis_iff cart_eq_inner_axis 
      inner_stand_basis_vector insert_iff norm_axis_1 norm_eq_1 stand_basis_vector_index vector_eq,
      metis axis_index axis_nth cart_eq_inner_axis inner_stand_basis_vector stand_basis_vector_index vector_eq)

lemma stand_basis_expansion:
  fixes x :: "real^'n"
  shows "x = ( jUNIV. (x $ j) *R stand_basis_vector j)"
proof -
  have "( jUNIV. (x $ j) *R stand_basis_vector j) $ k = x $ k" for k
  proof -
    have "( jUNIV. (x $ j) *R stand_basis_vector j) $ k
          = ( jUNIV. (x $ j) * (stand_basis_vector j $ k))"
      by simp
    also have " = ( jUNIV. (x $ j) * (if j = k then 1 else 0))"
      by (smt (verit, best) stand_basis_vector_index sum.cong)
    also have " = ( jUNIV. (if j = k then x $ j else 0))"
      by (smt (verit, best) mult_cancel_left1 mult_cancel_right1 sum.cong)
    also have " = x $ k"
      by (subst sum.delta, simp_all)
    finally show ?thesis.
  qed
  thus ?thesis
    by (simp add: vec_eq_iff)
qed

lemma has_derivative_affine:
  fixes a v :: "'a::real_normed_vector"
  shows "((λt. a + t *R v) has_derivative (λh. h *R v)) (at x)"
  unfolding has_derivative_def
proof safe
  have "a + y *R v - (a + netlimit (at x) *R v) - (y - netlimit (at x)) *R v = 0"  if "y  netlimit (at x)" for y
    by (simp add: cross3_simps(32))
  then show "(λy. (a + y *R v - (a + netlimit (at x) *R v) - (y - netlimit (at x)) *R v) /R y - netlimit (at x)) x 0"
    by (simp add: scaleR_left_diff_distrib)
  show "bounded_linear (λh. h *R v)"
    by (simp add: bounded_linearI' vector_space_assms(2))
qed

theorem Fermat's_theorem_on_stationary_points_mult:
  fixes f :: "real ^ 'n  real"
  assumes der_f: "(f has_derivative f') (at x_min)"
  assumes min_f: "local_minimizer f x_min"
  shows "GDERIV f x_min :> 0"
proof - 
 ― ‹Show that \(f'\) kills every standard-basis vector.›

  {
    fix i :: 'n
    ― ‹Define the 1‑D slice \(g_i(t) = f(x_{\min} + t \cdot e_i)\).›
    let ?g = "λt::real. f (x_min + t *R stand_basis_vector i)"

    ― ‹Chain rule gives \(g_i'(0) = f'(e_i)\).›
    from has_derivative_affine have g_der:
      "((λt. f (x_min + t *R stand_basis_vector i))
      has_derivative (λh. f' (h *R stand_basis_vector i))) (at 0)"
      by (metis (no_types) arithmetic_simps(50) der_f has_derivative_compose scaleR_simps(1))

    ― ‹\(0\) is a local minimizer of \(g_i\) because \(x_{\min}\) is one for \(f\).›
    have g_min: "local_minimizer ?g 0"      
    proof(rule local_minimizer_from_neighborhood)
      obtain δ where δ_pos: "δ > 0"
        and mono: "x. dist x_min x < δ  f x  f x_min"
        by (metis assms(2) dist_commute local_minimizer_def2)

      have "x. ¦x - 0¦ < δ  f (x_min + 0 *R stand_basis_vector i)  f (x_min + x *R stand_basis_vector i)"
        using mono by (simp add: dist_norm)
      then show "δ>0. x. ¦x - 0¦ < δ  f (x_min + 0 *R stand_basis_vector i)  f (x_min + x *R stand_basis_vector i)"
        using δ_pos by blast
    qed

    ― ‹Apply the 1-D Fermat lemma to \(g_i\).›
    from Fermat's_theorem_on_stationary_points 
    have "f' (stand_basis_vector i) = 0"
      using g_der g_min  by (metis has_derivative_imp scale_one)      
  }

    ― ‹Collecting the result for every \(i\):›
  hence zero_on_basis: "i. f' (stand_basis_vector i) = 0".

  ― ‹Use linearity and the coordinate expansion to show \(f' = 0\) everywhere.›
  {
    fix v :: "real^'n"
    ― ‹Expand \(v = \sum_j v_j \cdot e_j\) and push \(f'\) through the finite sum.›
    have "f' v = 0"
    proof -
      have "f' v = f' (jUNIV. (v $ j) *R stand_basis_vector j)"
        by (metis stand_basis_expansion)
      also have " = (jUNIV. (v $ j) *R f' (stand_basis_vector j))"
        by (smt (verit) assms differential_zero_maxmin local_minimizer_def scale_eq_0_iff sum.neutral)
      also have " = 0"
        using zero_on_basis by simp
      finally show ?thesis.
    qed
  }
  hence f'_zero: "f' = (λ_. 0)"
    by (simp add: fun_eq_iff)

  ― ‹Translate \(f' = 0\) into the gradient statement.›
  have "(f has_derivative (λh. 0)) (at x_min)"
    using der_f f'_zero by simp
  hence "GDERIV f x_min :> (0::real^'n)"
    by (simp add: gderiv_def)
  thus ?thesis.
qed

end