Theory Asymptotic_Qualitative_Properties
section ‹Asymptotic and Qualitative Properties›
theory Asymptotic_Qualitative_Properties
imports Derivative_Identities_Smoothness
begin
subsection ‹Limits at Infinity of Sigmoid and its Derivative›
text ‹
--- Asymptotic Behaviour ---
We have
\[
\lim_{x\to+\infty} \sigma(x) = 1,
\quad
\lim_{x\to-\infty} \sigma(x) = 0.
\]
›
lemma lim_sigmoid_infinity: "((λx. sigmoid x) ⤏ 1) at_top"
unfolding sigmoid_def by real_asymp
lemma lim_sigmoid_minus_infinity: "(sigmoid ⤏ 0) at_bot"
unfolding sigmoid_def by real_asymp
lemma sig_deriv_lim_at_top: "(deriv sigmoid ⤏ 0) at_top"
proof (subst tendsto_at_top_epsilon_def, clarify)
fix ε :: real
assume ε_pos: "0 < ε"
text ‹Using the fact that \(\sigma(x) \to 1\) as \(x \to +\infty\).›
obtain N where N_def: "∀x ≥ N. ¦sigmoid x - 1¦ < ε / 2"
using lim_sigmoid_infinity[unfolded tendsto_at_top_epsilon_def] ε_pos
by (metis half_gt_zero)
have deriv_bound: "∀x ≥ N. ¦deriv sigmoid x¦ ≤ ¦sigmoid x - 1¦"
proof (clarify)
fix x
assume "x ≥ N"
hence "¦deriv sigmoid x¦ = ¦sigmoid x - 1 + 1¦ * ¦1 - sigmoid x¦"
by (simp add: abs_mult sigmoid_derivative)
also have "... ≤ ¦sigmoid x - 1¦"
by (smt (verit) mult_cancel_right1 mult_right_mono sigmoid_range)
finally show "¦deriv sigmoid x¦ ≤ ¦sigmoid x - 1¦".
qed
have "∀x ≥ N. ¦deriv sigmoid x¦ < ε"
proof (clarify)
fix x
assume "x ≥ N"
hence "¦deriv sigmoid x¦ ≤ ¦sigmoid x - 1¦"
using deriv_bound by simp
also have "... < ε / 2"
using `x ≥ N` N_def by simp
also have "... < ε"
using ε_pos by simp
finally show "¦deriv sigmoid x¦ < ε" .
qed
then show "∃N::real. ∀x≥N. ¦deriv sigmoid x - (0::real)¦ < ε"
by (metis diff_zero)
qed
lemma sig_deriv_lim_at_bot: "(deriv sigmoid ⤏ 0) at_bot"
proof (subst tendsto_at_bot_epsilon_def, clarify)
fix ε :: real
assume ε_pos: "0 < ε"
text ‹Using the fact that \(\sigma(x) \to 0\) as \(x \to -\infty\).›
obtain N where N_def: "∀x ≤ N. ¦sigmoid x - 0¦ < ε / 2"
using lim_sigmoid_minus_infinity[unfolded tendsto_at_bot_epsilon_def] ε_pos
by (meson half_gt_zero)
have deriv_bound: "∀x ≤ N. ¦deriv sigmoid x¦ ≤ ¦sigmoid x - 0¦"
proof (clarify)
fix x
assume "x ≤ N"
hence "¦deriv sigmoid x¦ = ¦sigmoid x - 0 + 0¦ * ¦1 - sigmoid x¦"
by (simp add: abs_mult sigmoid_derivative)
also have "... ≤ ¦sigmoid x - 0¦"
by (smt (verit, del_insts) mult_cancel_left2 mult_left_mono sigmoid_range)
finally show "¦deriv sigmoid x¦ ≤ ¦sigmoid x - 0¦".
qed
have "∀x ≤ N. ¦deriv sigmoid x¦ < ε"
proof (clarify)
fix x
assume "x ≤ N"
hence "¦deriv sigmoid x¦ ≤ ¦sigmoid x - 0¦"
using deriv_bound by simp
also have "... < ε / 2"
using `x ≤ N` N_def by simp
also have "... < ε"
using ε_pos by simp
finally show "¦deriv sigmoid x¦ < ε".
qed
then show "∃N::real. ∀x ≤ N. ¦deriv sigmoid x - (0::real)¦ < ε"
by (metis diff_zero)
qed
subsection ‹Curvature and Inflection›
lemma second_derivative_sigmoid_positive_on:
assumes "x < 0"
shows "Nth_derivative 2 sigmoid x > 0"
proof -
have "1 - 2 * sigmoid x > 0"
using assms sigmoid_left_dom_range by force
then show "Nth_derivative 2 sigmoid x > 0"
by (simp add: sigmoid_range sigmoid_second_derivative)
qed
lemma second_derivative_sigmoid_negative_on:
assumes "x > 0"
shows "Nth_derivative 2 sigmoid x < 0"
proof -
have "1 - 2 * sigmoid x < 0"
by (smt (verit) assms sigmoid_strictly_increasing sigmoid_symmetry)
then show "Nth_derivative 2 sigmoid x < 0"
by (simp add: mult_pos_neg sigmoid_range sigmoid_second_derivative)
qed
lemma sigmoid_inflection_point:
"Nth_derivative 2 sigmoid 0 = 0"
by (simp add: sigmoid_alt_def sigmoid_second_derivative)
subsection ‹Monotonicity and Bounds of the First Derivative›
lemma sigmoid_positive_derivative:
"deriv sigmoid x > 0"
by (simp add: sigmoid_derivative sigmoid_range)
lemma sigmoid_deriv_0:
"deriv sigmoid 0 = 1/4"
proof -
have f1: "1 / (1 + 1) = sigmoid 0"
by (simp add: sigmoid_def)
then have f2: "∀r. sigmoid 0 * (r + r) = r"
by simp
then have f3: "∀n. sigmoid 0 * numeral (num.Bit0 n) = numeral n"
by (metis (no_types) numeral_Bit0)
have f4: "∀r. sigmoid r * sigmoid (- r) = deriv sigmoid r"
using sigmoid_derivative sigmoid_symmetry by presburger
have "sigmoid 0 = 0 ⟶ deriv sigmoid 0 = 1 / 4"
using f1 by force
then show ?thesis
using f4 f3 f2 by (metis (no_types) add.inverse_neutral divide_divide_eq_right nonzero_mult_div_cancel_left one_add_one zero_neq_numeral)
qed
lemma deriv_sigmoid_increase_on_negatives:
assumes "x2 < 0"
assumes "x1 < x2"
shows "deriv sigmoid x1 < deriv sigmoid x2"
by(rule DERIV_pos_imp_increasing, simp add: assms(2), metis assms(1) deriv_sigmoid_has_deriv
dual_order.strict_trans linorder_not_le nle_le second_derivative_alt_def second_derivative_sigmoid_positive_on)
lemma deriv_sigmoid_decreases_on_positives:
assumes "0 < x1"
assumes "x1 < x2"
shows "deriv sigmoid x2 < deriv sigmoid x1"
by(rule DERIV_neg_imp_decreasing, simp add: assms(2), metis assms(1) deriv_sigmoid_has_deriv
dual_order.strict_trans linorder_not_le nle_le second_derivative_alt_def second_derivative_sigmoid_negative_on)
lemma sigmoid_derivative_upper_bound:
assumes "x≠ 0"
shows "deriv sigmoid x < 1/4"
proof(cases "x ≤ 0")
assume "x≤0"
then have neg_case: "x < 0"
using assms by linarith
then have "deriv sigmoid x < deriv sigmoid 0"
proof(rule DERIV_pos_imp_increasing_open)
show "⋀xa::real. x < xa ⟹ xa < 0 ⟹ ∃y::real. (deriv sigmoid has_real_derivative y) (at xa) ∧ 0 < y"
by (metis (no_types) deriv_sigmoid_has_deriv second_derivative_alt_def second_derivative_sigmoid_positive_on)
show "continuous_on {x..0::real} (deriv sigmoid)"
by (meson DERIV_atLeastAtMost_imp_continuous_on deriv_sigmoid_has_deriv)
qed
then show "deriv sigmoid x < 1/4"
by (simp add: sigmoid_deriv_0)
next
assume "¬ x ≤ 0"
then have "0 < x"
by linarith
then have "deriv sigmoid x < deriv sigmoid 0"
proof(rule DERIV_neg_imp_decreasing_open)
show "⋀xa::real. 0 < xa ⟹ xa < x ⟹ ∃y::real. (deriv sigmoid has_real_derivative y) (at xa) ∧ y < 0"
by (metis (no_types) deriv_sigmoid_has_deriv second_derivative_alt_def second_derivative_sigmoid_negative_on)
show "continuous_on {0..x::real} (deriv sigmoid)"
by (meson DERIV_atLeastAtMost_imp_continuous_on deriv_sigmoid_has_deriv)
qed
then show "deriv sigmoid x < 1/4"
by (simp add: sigmoid_deriv_0)
qed
corollary sigmoid_derivative_range:
"0 < deriv sigmoid x ∧ deriv sigmoid x ≤ 1/4"
by (smt (verit, best) sigmoid_deriv_0 sigmoid_derivative_upper_bound sigmoid_positive_derivative)
subsection ‹Sigmoidal and Heaviside Step Functions›
definition sigmoidal :: "(real ⇒ real) ⇒ bool" where
"sigmoidal f ≡ (f ⤏ 1) at_top ∧ (f ⤏ 0) at_bot"
lemma sigmoid_is_sigmoidal: "sigmoidal sigmoid"
unfolding sigmoidal_def
by (simp add: lim_sigmoid_infinity lim_sigmoid_minus_infinity)
definition heaviside :: "real ⇒ real" where
"heaviside x = (if x < 0 then 0 else 1)"
lemma heaviside_right: "x ≥ 0 ⟹ heaviside x = 1"
by (simp add: heaviside_def)
lemma heaviside_left: "x < 0 ⟹ heaviside x = 0"
by (simp add: heaviside_def)
lemma heaviside_mono: "x < y ⟹ heaviside x ≤ heaviside y"
by (simp add: heaviside_def)
lemma heaviside_limit_neg_infinity:
"(heaviside ⤏ 0) at_bot"
by(rule tendsto_eventually, subst eventually_at_bot_dense, meson heaviside_def)
lemma heaviside_limit_pos_infinity:
"(heaviside ⤏ 1) at_top"
by(rule tendsto_eventually, subst eventually_at_top_dense, meson heaviside_def order.asym)
lemma heaviside_is_sigmoidal: "sigmoidal heaviside"
by (simp add: heaviside_limit_neg_infinity heaviside_limit_pos_infinity sigmoidal_def)
subsection ‹Uniform Approximation by Sigmoids›
lemma sigmoidal_uniform_approximation:
assumes "sigmoidal σ"
assumes "(ε :: real) > 0" and "(h :: real) > 0"
shows "∃(ω::real)>0. ∀w≥ω. ∀k<length (xs :: real list).
(∀x. x - xs!k ≥ h ⟶ ¦σ (w * (x - xs!k)) - 1¦ < ε) ∧
(∀x. x - xs!k ≤ -h ⟶ ¦σ (w * (x - xs!k))¦ < ε)"
proof -
text ‹By the sigmoidal assumption, we extract the limits
\[
\lim_{x\to +\infty} \sigma(x) = 1
\quad\text{(limit at\_top)}
\quad\text{and}\quad
\lim_{x\to -\infty} \sigma(x) = 0
\quad\text{(limit at\_bot)}.
\]›
have lim_at_top: "(σ ⤏ 1) at_top"
using assms(1) unfolding sigmoidal_def by simp
then obtain Ntop where Ntop_def: "∀x ≥ Ntop. ¦σ x - 1¦ < ε"
using assms(2) tendsto_at_top_epsilon_def by blast
have lim_at_bot: "(σ ⤏ 0) at_bot"
using assms(1) unfolding sigmoidal_def by simp
then obtain Nbot where Nbot_def: "∀x ≤ Nbot. ¦σ x¦ < ε"
using assms(2) tendsto_at_bot_epsilon_def by fastforce
text ‹ Define \(\omega\) to control the approximation.›
obtain ω where ω_def: "ω = max (max 1 (Ntop / h)) (-Nbot / h)"
by blast
then have ω_pos: "0 < ω" using assms(2) by simp
text ‹ Show that \(\omega\) satisfies the required property.›
show ?thesis
proof (intro exI[where x = ω] allI impI conjI insert ω_pos)
fix w :: real and k :: nat and x :: real
assume w_ge_ω: "ω ≤ w"
assume k_bound: "k < length xs"
text ‹ Case 1: \(x - xs!k \ge h\).›
have "w * h ≥ Ntop"
using ω_def assms(3) pos_divide_le_eq w_ge_ω by auto
then show "x - xs!k ≥ h ⟹ ¦σ (w * (x - xs!k)) - 1¦ < ε"
using Ntop_def
by (smt (verit) ω_pos mult_less_cancel_left w_ge_ω)
text ‹ Case 2: \(x - xs!k \le -\,h\).›
have "-w * h ≤ Nbot"
using ω_def assms(3) pos_divide_le_eq w_ge_ω
by (smt (verit, ccfv_SIG) mult_minus_left)
then show "x - xs!k ≤ -h ⟹ ¦σ (w * (x - xs!k))¦ < ε"
using Nbot_def
by (smt (verit, best) ω_pos minus_mult_minus mult_less_cancel_left w_ge_ω)
qed
qed
end