task2
This commit is contained in:
25
task2/common/__init__.py
Normal file
25
task2/common/__init__.py
Normal file
@@ -0,0 +1,25 @@
|
||||
# Common utilities for gradient descent optimization
|
||||
from .functions import (
|
||||
Function1D,
|
||||
Function2D,
|
||||
TaskFunction1D,
|
||||
HimmelblauFunction,
|
||||
RavineFunction,
|
||||
)
|
||||
from .line_search import golden_section_search, armijo_step
|
||||
from .gradient_descent import gradient_descent_1d, gradient_descent_2d, heavy_ball_1d, heavy_ball_2d
|
||||
|
||||
__all__ = [
|
||||
"Function1D",
|
||||
"Function2D",
|
||||
"TaskFunction1D",
|
||||
"HimmelblauFunction",
|
||||
"RavineFunction",
|
||||
"golden_section_search",
|
||||
"armijo_step",
|
||||
"gradient_descent_1d",
|
||||
"gradient_descent_2d",
|
||||
"heavy_ball_1d",
|
||||
"heavy_ball_2d",
|
||||
]
|
||||
|
||||
147
task2/common/functions.py
Normal file
147
task2/common/functions.py
Normal file
@@ -0,0 +1,147 @@
|
||||
"""Function definitions with their gradients for optimization."""
|
||||
|
||||
import math
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Tuple
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
class Function1D(ABC):
|
||||
"""Abstract base class for 1D functions."""
|
||||
|
||||
name: str = "Abstract 1D Function"
|
||||
|
||||
@abstractmethod
|
||||
def __call__(self, x: float) -> float:
|
||||
"""Evaluate function at x."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def gradient(self, x: float) -> float:
|
||||
"""Compute gradient (derivative) at x."""
|
||||
pass
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def domain(self) -> Tuple[float, float]:
|
||||
"""Return the domain [a, b] for this function."""
|
||||
pass
|
||||
|
||||
|
||||
class Function2D(ABC):
|
||||
"""Abstract base class for 2D functions."""
|
||||
|
||||
name: str = "Abstract 2D Function"
|
||||
|
||||
@abstractmethod
|
||||
def __call__(self, x: np.ndarray) -> float:
|
||||
"""Evaluate function at point x = [x1, x2]."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def gradient(self, x: np.ndarray) -> np.ndarray:
|
||||
"""Compute gradient at point x = [x1, x2]."""
|
||||
pass
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def plot_bounds(self) -> Tuple[Tuple[float, float], Tuple[float, float]]:
|
||||
"""Return bounds ((x1_min, x1_max), (x2_min, x2_max)) for plotting."""
|
||||
pass
|
||||
|
||||
|
||||
class TaskFunction1D(Function1D):
|
||||
"""
|
||||
f(x) = sqrt(x^2 + 9) / 4 + (5 - x) / 5
|
||||
|
||||
Derivative: f'(x) = x / (4 * sqrt(x^2 + 9)) - 1/5
|
||||
"""
|
||||
|
||||
name = "f(x) = √(x² + 9)/4 + (5 - x)/5"
|
||||
|
||||
def __call__(self, x: float) -> float:
|
||||
return math.sqrt(x**2 + 9) / 4 + (5 - x) / 5
|
||||
|
||||
def gradient(self, x: float) -> float:
|
||||
return x / (4 * math.sqrt(x**2 + 9)) - 1 / 5
|
||||
|
||||
@property
|
||||
def domain(self) -> Tuple[float, float]:
|
||||
return (-3.0, 8.0)
|
||||
|
||||
|
||||
class HimmelblauFunction(Function2D):
|
||||
"""
|
||||
Himmelblau's function:
|
||||
f(x, y) = (x^2 + y - 11)^2 + (x + y^2 - 7)^2
|
||||
|
||||
Has 4 identical local minima at:
|
||||
- (3.0, 2.0)
|
||||
- (-2.805118, 3.131312)
|
||||
- (-3.779310, -3.283186)
|
||||
- (3.584428, -1.848126)
|
||||
|
||||
Gradient:
|
||||
∂f/∂x = 4x(x² + y - 11) + 2(x + y² - 7)
|
||||
∂f/∂y = 2(x² + y - 11) + 4y(x + y² - 7)
|
||||
"""
|
||||
|
||||
name = "Himmelblau: (x² + y - 11)² + (x + y² - 7)²"
|
||||
|
||||
def __call__(self, x: np.ndarray) -> float:
|
||||
x1, x2 = x[0], x[1]
|
||||
return (x1**2 + x2 - 11) ** 2 + (x1 + x2**2 - 7) ** 2
|
||||
|
||||
def gradient(self, x: np.ndarray) -> np.ndarray:
|
||||
x1, x2 = x[0], x[1]
|
||||
df_dx1 = 4 * x1 * (x1**2 + x2 - 11) + 2 * (x1 + x2**2 - 7)
|
||||
df_dx2 = 2 * (x1**2 + x2 - 11) + 4 * x2 * (x1 + x2**2 - 7)
|
||||
return np.array([df_dx1, df_dx2])
|
||||
|
||||
@property
|
||||
def plot_bounds(self) -> Tuple[Tuple[float, float], Tuple[float, float]]:
|
||||
return ((-5.0, 5.0), (-5.0, 5.0))
|
||||
|
||||
|
||||
class RavineFunction(Function2D):
|
||||
"""
|
||||
Овражная функция (эллиптический параболоид):
|
||||
f(x, y) = x² + 20y²
|
||||
|
||||
Минимум в (0, 0), f(0,0) = 0
|
||||
|
||||
Демонстрирует "эффект оврага" - градиент почти перпендикулярен
|
||||
направлению к минимуму, что замедляет сходимость.
|
||||
|
||||
Gradient:
|
||||
∂f/∂x = 2x
|
||||
∂f/∂y = 40y
|
||||
"""
|
||||
|
||||
name = "Овраг: f(x,y) = x² + 20y²"
|
||||
|
||||
def __call__(self, x: np.ndarray) -> float:
|
||||
x1, x2 = x[0], x[1]
|
||||
return x1**2 + 20 * x2**2
|
||||
|
||||
def gradient(self, x: np.ndarray) -> np.ndarray:
|
||||
x1, x2 = x[0], x[1]
|
||||
df_dx1 = 2 * x1
|
||||
df_dx2 = 40 * x2
|
||||
return np.array([df_dx1, df_dx2])
|
||||
|
||||
@property
|
||||
def plot_bounds(self) -> Tuple[Tuple[float, float], Tuple[float, float]]:
|
||||
return ((-2.0, 2.0), (-0.5, 0.5))
|
||||
|
||||
|
||||
# Registry of available functions
|
||||
FUNCTIONS_1D = {
|
||||
"task": TaskFunction1D,
|
||||
}
|
||||
|
||||
FUNCTIONS_2D = {
|
||||
"himmelblau": HimmelblauFunction,
|
||||
"ravine": RavineFunction,
|
||||
}
|
||||
441
task2/common/gradient_descent.py
Normal file
441
task2/common/gradient_descent.py
Normal file
@@ -0,0 +1,441 @@
|
||||
"""Gradient descent implementations."""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import List, Literal, Optional
|
||||
import numpy as np
|
||||
|
||||
from .functions import Function1D, Function2D
|
||||
from .line_search import golden_section_search, armijo_step, armijo_step_1d
|
||||
|
||||
|
||||
StepMethod = Literal["constant", "golden_section", "armijo"]
|
||||
|
||||
|
||||
@dataclass
|
||||
class IterationInfo1D:
|
||||
"""Information about a single iteration of 1D gradient descent."""
|
||||
iteration: int
|
||||
x: float
|
||||
f_x: float
|
||||
grad: float
|
||||
step_size: float
|
||||
|
||||
|
||||
@dataclass
|
||||
class GradientDescentResult1D:
|
||||
"""Result of 1D gradient descent."""
|
||||
x_star: float
|
||||
f_star: float
|
||||
iterations: List[IterationInfo1D]
|
||||
converged: bool
|
||||
method: str
|
||||
|
||||
@property
|
||||
def trajectory(self) -> List[float]:
|
||||
return [it.x for it in self.iterations]
|
||||
|
||||
|
||||
@dataclass
|
||||
class IterationInfo2D:
|
||||
"""Information about a single iteration of 2D gradient descent."""
|
||||
iteration: int
|
||||
x: np.ndarray
|
||||
f_x: float
|
||||
grad: np.ndarray
|
||||
step_size: float
|
||||
|
||||
|
||||
@dataclass
|
||||
class GradientDescentResult2D:
|
||||
"""Result of 2D gradient descent."""
|
||||
x_star: np.ndarray
|
||||
f_star: float
|
||||
iterations: List[IterationInfo2D]
|
||||
converged: bool
|
||||
method: str
|
||||
|
||||
@property
|
||||
def trajectory(self) -> List[np.ndarray]:
|
||||
return [it.x for it in self.iterations]
|
||||
|
||||
|
||||
def gradient_descent_1d(
|
||||
func: Function1D,
|
||||
x0: float,
|
||||
step_method: StepMethod = "constant",
|
||||
step_size: float = 0.1,
|
||||
eps_x: float = 0.05,
|
||||
eps_f: float = 0.001,
|
||||
max_iters: int = 100,
|
||||
armijo_params: Optional[dict] = None,
|
||||
golden_section_bounds: Optional[tuple] = None,
|
||||
) -> GradientDescentResult1D:
|
||||
"""
|
||||
Gradient descent for 1D function.
|
||||
|
||||
Args:
|
||||
func: Function to minimize
|
||||
x0: Starting point
|
||||
step_method: Step selection method ("constant", "golden_section", "armijo")
|
||||
step_size: Step size for constant method
|
||||
eps_x: Tolerance for x convergence
|
||||
eps_f: Tolerance for f convergence
|
||||
max_iters: Maximum number of iterations
|
||||
armijo_params: Parameters for Armijo rule (d_init, epsilon, theta)
|
||||
golden_section_bounds: Search bounds for golden section (a, b)
|
||||
|
||||
Returns:
|
||||
GradientDescentResult1D with trajectory and final result
|
||||
"""
|
||||
x = x0
|
||||
iterations: List[IterationInfo1D] = []
|
||||
converged = False
|
||||
|
||||
armijo_params = armijo_params or {"d_init": 1.0, "epsilon": 0.1, "theta": 0.5}
|
||||
|
||||
for k in range(max_iters):
|
||||
f_x = func(x)
|
||||
grad = func.gradient(x)
|
||||
|
||||
# Select step size
|
||||
if step_method == "constant":
|
||||
alpha = step_size
|
||||
elif step_method == "golden_section":
|
||||
# Optimize phi(alpha) = f(x - alpha * grad) using golden section
|
||||
bounds = golden_section_bounds or (0.0, 2.0)
|
||||
phi = lambda a: func(x - a * grad)
|
||||
alpha = golden_section_search(phi, bounds[0], bounds[1])
|
||||
elif step_method == "armijo":
|
||||
alpha = armijo_step_1d(
|
||||
func, x, grad,
|
||||
d_init=armijo_params.get("d_init", 1.0),
|
||||
epsilon=armijo_params.get("epsilon", 0.1),
|
||||
theta=armijo_params.get("theta", 0.5),
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Unknown step method: {step_method}")
|
||||
|
||||
iterations.append(IterationInfo1D(
|
||||
iteration=k + 1,
|
||||
x=x,
|
||||
f_x=f_x,
|
||||
grad=grad,
|
||||
step_size=alpha,
|
||||
))
|
||||
|
||||
# Update x
|
||||
x_new = x - alpha * grad
|
||||
f_new = func(x_new)
|
||||
|
||||
# Check convergence
|
||||
if abs(x_new - x) < eps_x and abs(f_new - f_x) < eps_f:
|
||||
x = x_new
|
||||
converged = True
|
||||
break
|
||||
|
||||
x = x_new
|
||||
|
||||
# Add final point
|
||||
iterations.append(IterationInfo1D(
|
||||
iteration=len(iterations) + 1,
|
||||
x=x,
|
||||
f_x=func(x),
|
||||
grad=func.gradient(x),
|
||||
step_size=0.0,
|
||||
))
|
||||
|
||||
method_names = {
|
||||
"constant": "Константный шаг",
|
||||
"golden_section": "Золотое сечение",
|
||||
"armijo": "Правило Армихо",
|
||||
}
|
||||
|
||||
return GradientDescentResult1D(
|
||||
x_star=x,
|
||||
f_star=func(x),
|
||||
iterations=iterations,
|
||||
converged=converged,
|
||||
method=method_names.get(step_method, step_method),
|
||||
)
|
||||
|
||||
|
||||
def gradient_descent_2d(
|
||||
func: Function2D,
|
||||
x0: np.ndarray,
|
||||
step_method: StepMethod = "constant",
|
||||
step_size: float = 0.01,
|
||||
eps_x: float = 1e-5,
|
||||
eps_f: float = 1e-6,
|
||||
max_iters: int = 1000,
|
||||
armijo_params: Optional[dict] = None,
|
||||
golden_section_bounds: Optional[tuple] = None,
|
||||
) -> GradientDescentResult2D:
|
||||
"""
|
||||
Gradient descent for 2D function.
|
||||
|
||||
Args:
|
||||
func: Function to minimize
|
||||
x0: Starting point [x1, x2]
|
||||
step_method: Step selection method ("constant", "golden_section", "armijo")
|
||||
step_size: Step size for constant method
|
||||
eps_x: Tolerance for x convergence
|
||||
eps_f: Tolerance for f convergence
|
||||
max_iters: Maximum number of iterations
|
||||
armijo_params: Parameters for Armijo rule
|
||||
golden_section_bounds: Search bounds for golden section
|
||||
|
||||
Returns:
|
||||
GradientDescentResult2D with trajectory and final result
|
||||
"""
|
||||
x = np.array(x0, dtype=float)
|
||||
iterations: List[IterationInfo2D] = []
|
||||
converged = False
|
||||
|
||||
armijo_params = armijo_params or {"d_init": 1.0, "epsilon": 0.1, "theta": 0.5}
|
||||
|
||||
for k in range(max_iters):
|
||||
f_x = func(x)
|
||||
grad = func.gradient(x)
|
||||
grad_norm = np.linalg.norm(grad)
|
||||
|
||||
# Check if gradient is too small
|
||||
if grad_norm < 1e-10:
|
||||
converged = True
|
||||
iterations.append(IterationInfo2D(
|
||||
iteration=k + 1,
|
||||
x=x.copy(),
|
||||
f_x=f_x,
|
||||
grad=grad.copy(),
|
||||
step_size=0.0,
|
||||
))
|
||||
break
|
||||
|
||||
# Select step size
|
||||
if step_method == "constant":
|
||||
alpha = step_size
|
||||
elif step_method == "golden_section":
|
||||
bounds = golden_section_bounds or (0.0, 1.0)
|
||||
phi = lambda a: func(x - a * grad)
|
||||
alpha = golden_section_search(phi, bounds[0], bounds[1])
|
||||
elif step_method == "armijo":
|
||||
alpha = armijo_step(
|
||||
func, x, grad,
|
||||
d_init=armijo_params.get("d_init", 1.0),
|
||||
epsilon=armijo_params.get("epsilon", 0.1),
|
||||
theta=armijo_params.get("theta", 0.5),
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Unknown step method: {step_method}")
|
||||
|
||||
iterations.append(IterationInfo2D(
|
||||
iteration=k + 1,
|
||||
x=x.copy(),
|
||||
f_x=f_x,
|
||||
grad=grad.copy(),
|
||||
step_size=alpha,
|
||||
))
|
||||
|
||||
# Update x
|
||||
x_new = x - alpha * grad
|
||||
f_new = func(x_new)
|
||||
|
||||
# Check convergence
|
||||
if np.linalg.norm(x_new - x) < eps_x and abs(f_new - f_x) < eps_f:
|
||||
x = x_new
|
||||
converged = True
|
||||
break
|
||||
|
||||
x = x_new
|
||||
|
||||
# Add final point
|
||||
iterations.append(IterationInfo2D(
|
||||
iteration=len(iterations) + 1,
|
||||
x=x.copy(),
|
||||
f_x=func(x),
|
||||
grad=func.gradient(x),
|
||||
step_size=0.0,
|
||||
))
|
||||
|
||||
method_names = {
|
||||
"constant": "Константный шаг",
|
||||
"golden_section": "Золотое сечение",
|
||||
"armijo": "Правило Армихо",
|
||||
}
|
||||
|
||||
return GradientDescentResult2D(
|
||||
x_star=x,
|
||||
f_star=func(x),
|
||||
iterations=iterations,
|
||||
converged=converged,
|
||||
method=method_names.get(step_method, step_method),
|
||||
)
|
||||
|
||||
|
||||
def heavy_ball_1d(
|
||||
func: Function1D,
|
||||
x0: float,
|
||||
alpha: float = 0.1,
|
||||
beta: float = 0.9,
|
||||
eps_x: float = 0.05,
|
||||
eps_f: float = 0.001,
|
||||
max_iters: int = 100,
|
||||
) -> GradientDescentResult1D:
|
||||
"""
|
||||
Heavy Ball method for 1D function.
|
||||
|
||||
x_{k+1} = x_k - α f'(x_k) + β (x_k - x_{k-1})
|
||||
|
||||
Args:
|
||||
func: Function to minimize
|
||||
x0: Starting point
|
||||
alpha: Step size (learning rate)
|
||||
beta: Momentum parameter (0 <= beta < 1)
|
||||
eps_x: Tolerance for x convergence
|
||||
eps_f: Tolerance for f convergence
|
||||
max_iters: Maximum number of iterations
|
||||
|
||||
Returns:
|
||||
GradientDescentResult1D with trajectory and final result
|
||||
"""
|
||||
x = x0
|
||||
x_prev = x0 # For first iteration, no momentum
|
||||
iterations: List[IterationInfo1D] = []
|
||||
converged = False
|
||||
|
||||
for k in range(max_iters):
|
||||
f_x = func(x)
|
||||
grad = func.gradient(x)
|
||||
|
||||
# Heavy ball update: x_{k+1} = x_k - α∇f(x_k) + β(x_k - x_{k-1})
|
||||
momentum = beta * (x - x_prev) if k > 0 else 0.0
|
||||
|
||||
iterations.append(IterationInfo1D(
|
||||
iteration=k + 1,
|
||||
x=x,
|
||||
f_x=f_x,
|
||||
grad=grad,
|
||||
step_size=alpha,
|
||||
))
|
||||
|
||||
# Update x
|
||||
x_new = x - alpha * grad + momentum
|
||||
f_new = func(x_new)
|
||||
|
||||
# Check convergence
|
||||
if abs(x_new - x) < eps_x and abs(f_new - f_x) < eps_f:
|
||||
x_prev = x
|
||||
x = x_new
|
||||
converged = True
|
||||
break
|
||||
|
||||
x_prev = x
|
||||
x = x_new
|
||||
|
||||
# Add final point
|
||||
iterations.append(IterationInfo1D(
|
||||
iteration=len(iterations) + 1,
|
||||
x=x,
|
||||
f_x=func(x),
|
||||
grad=func.gradient(x),
|
||||
step_size=0.0,
|
||||
))
|
||||
|
||||
return GradientDescentResult1D(
|
||||
x_star=x,
|
||||
f_star=func(x),
|
||||
iterations=iterations,
|
||||
converged=converged,
|
||||
method=f"Тяжёлый шарик (α={alpha}, β={beta})",
|
||||
)
|
||||
|
||||
|
||||
def heavy_ball_2d(
|
||||
func: Function2D,
|
||||
x0: np.ndarray,
|
||||
alpha: float = 0.01,
|
||||
beta: float = 0.9,
|
||||
eps_x: float = 1e-5,
|
||||
eps_f: float = 1e-6,
|
||||
max_iters: int = 1000,
|
||||
) -> GradientDescentResult2D:
|
||||
"""
|
||||
Heavy Ball method for 2D function.
|
||||
|
||||
x_{k+1} = x_k - α ∇f(x_k) + β (x_k - x_{k-1})
|
||||
|
||||
Args:
|
||||
func: Function to minimize
|
||||
x0: Starting point [x1, x2]
|
||||
alpha: Step size (learning rate)
|
||||
beta: Momentum parameter (0 <= beta < 1)
|
||||
eps_x: Tolerance for x convergence
|
||||
eps_f: Tolerance for f convergence
|
||||
max_iters: Maximum number of iterations
|
||||
|
||||
Returns:
|
||||
GradientDescentResult2D with trajectory and final result
|
||||
"""
|
||||
x = np.array(x0, dtype=float)
|
||||
x_prev = x.copy() # For first iteration, no momentum
|
||||
iterations: List[IterationInfo2D] = []
|
||||
converged = False
|
||||
|
||||
for k in range(max_iters):
|
||||
f_x = func(x)
|
||||
grad = func.gradient(x)
|
||||
grad_norm = np.linalg.norm(grad)
|
||||
|
||||
# Check if gradient is too small
|
||||
if grad_norm < 1e-10:
|
||||
converged = True
|
||||
iterations.append(IterationInfo2D(
|
||||
iteration=k + 1,
|
||||
x=x.copy(),
|
||||
f_x=f_x,
|
||||
grad=grad.copy(),
|
||||
step_size=0.0,
|
||||
))
|
||||
break
|
||||
|
||||
# Heavy ball update: x_{k+1} = x_k - α∇f(x_k) + β(x_k - x_{k-1})
|
||||
momentum = beta * (x - x_prev) if k > 0 else np.zeros_like(x)
|
||||
|
||||
iterations.append(IterationInfo2D(
|
||||
iteration=k + 1,
|
||||
x=x.copy(),
|
||||
f_x=f_x,
|
||||
grad=grad.copy(),
|
||||
step_size=alpha,
|
||||
))
|
||||
|
||||
# Update x
|
||||
x_new = x - alpha * grad + momentum
|
||||
f_new = func(x_new)
|
||||
|
||||
# Check convergence
|
||||
if np.linalg.norm(x_new - x) < eps_x and abs(f_new - f_x) < eps_f:
|
||||
x_prev = x.copy()
|
||||
x = x_new
|
||||
converged = True
|
||||
break
|
||||
|
||||
x_prev = x.copy()
|
||||
x = x_new
|
||||
|
||||
# Add final point
|
||||
iterations.append(IterationInfo2D(
|
||||
iteration=len(iterations) + 1,
|
||||
x=x.copy(),
|
||||
f_x=func(x),
|
||||
grad=func.gradient(x),
|
||||
step_size=0.0,
|
||||
))
|
||||
|
||||
return GradientDescentResult2D(
|
||||
x_star=x,
|
||||
f_star=func(x),
|
||||
iterations=iterations,
|
||||
converged=converged,
|
||||
method=f"Тяжёлый шарик (α={alpha}, β={beta})",
|
||||
)
|
||||
|
||||
139
task2/common/line_search.py
Normal file
139
task2/common/line_search.py
Normal file
@@ -0,0 +1,139 @@
|
||||
"""Line search methods for step size selection."""
|
||||
|
||||
import math
|
||||
from typing import Callable, Tuple
|
||||
import numpy as np
|
||||
|
||||
|
||||
def golden_section_search(
|
||||
phi: Callable[[float], float],
|
||||
a: float,
|
||||
b: float,
|
||||
tol: float = 1e-5,
|
||||
max_iters: int = 100,
|
||||
) -> float:
|
||||
"""
|
||||
Golden section search for 1D optimization.
|
||||
|
||||
Finds argmin phi(alpha) on [a, b].
|
||||
|
||||
Args:
|
||||
phi: Function to minimize (typically f(x - alpha * grad))
|
||||
a: Left bound of search interval
|
||||
b: Right bound of search interval
|
||||
tol: Tolerance for stopping
|
||||
max_iters: Maximum number of iterations
|
||||
|
||||
Returns:
|
||||
Optimal step size alpha
|
||||
"""
|
||||
# Golden ratio constants
|
||||
gr = (1 + math.sqrt(5)) / 2
|
||||
r = 1 / gr # ~0.618
|
||||
c = 1 - r # ~0.382
|
||||
|
||||
y = a + c * (b - a)
|
||||
z = a + r * (b - a)
|
||||
fy = phi(y)
|
||||
fz = phi(z)
|
||||
|
||||
for _ in range(max_iters):
|
||||
if b - a < tol:
|
||||
break
|
||||
|
||||
if fy <= fz:
|
||||
b = z
|
||||
z = y
|
||||
fz = fy
|
||||
y = a + c * (b - a)
|
||||
fy = phi(y)
|
||||
else:
|
||||
a = y
|
||||
y = z
|
||||
fy = fz
|
||||
z = a + r * (b - a)
|
||||
fz = phi(z)
|
||||
|
||||
return (a + b) / 2
|
||||
|
||||
|
||||
def armijo_step(
|
||||
f: Callable[[np.ndarray], float],
|
||||
x: np.ndarray,
|
||||
grad: np.ndarray,
|
||||
d_init: float = 1.0,
|
||||
epsilon: float = 0.1,
|
||||
theta: float = 0.5,
|
||||
max_iters: int = 100,
|
||||
) -> float:
|
||||
"""
|
||||
Armijo rule for step size selection.
|
||||
|
||||
Finds step d such that:
|
||||
f(x - d * grad) <= f(x) - epsilon * d * ||grad||^2
|
||||
|
||||
Note: Using descent direction s = -grad, so inner product <grad, s> = -||grad||^2
|
||||
|
||||
Args:
|
||||
f: Function to minimize
|
||||
x: Current point
|
||||
grad: Gradient at x
|
||||
d_init: Initial step size
|
||||
epsilon: Armijo parameter (0 < epsilon < 1)
|
||||
theta: Step reduction factor (0 < theta < 1)
|
||||
max_iters: Maximum number of reductions
|
||||
|
||||
Returns:
|
||||
Step size satisfying Armijo condition
|
||||
"""
|
||||
d = d_init
|
||||
fx = f(x)
|
||||
grad_norm_sq = np.dot(grad, grad)
|
||||
|
||||
for _ in range(max_iters):
|
||||
# Armijo condition: f(x - d*grad) <= f(x) - epsilon * d * ||grad||^2
|
||||
x_new = x - d * grad
|
||||
if f(x_new) <= fx - epsilon * d * grad_norm_sq:
|
||||
return d
|
||||
d *= theta
|
||||
|
||||
return d
|
||||
|
||||
|
||||
def armijo_step_1d(
|
||||
f: Callable[[float], float],
|
||||
x: float,
|
||||
grad: float,
|
||||
d_init: float = 1.0,
|
||||
epsilon: float = 0.1,
|
||||
theta: float = 0.5,
|
||||
max_iters: int = 100,
|
||||
) -> float:
|
||||
"""
|
||||
Armijo rule for step size selection (1D version).
|
||||
|
||||
Args:
|
||||
f: Function to minimize
|
||||
x: Current point
|
||||
grad: Gradient (derivative) at x
|
||||
d_init: Initial step size
|
||||
epsilon: Armijo parameter (0 < epsilon < 1)
|
||||
theta: Step reduction factor (0 < theta < 1)
|
||||
max_iters: Maximum number of reductions
|
||||
|
||||
Returns:
|
||||
Step size satisfying Armijo condition
|
||||
"""
|
||||
d = d_init
|
||||
fx = f(x)
|
||||
grad_sq = grad * grad
|
||||
|
||||
for _ in range(max_iters):
|
||||
# Armijo condition: f(x - d*grad) <= f(x) - epsilon * d * grad^2
|
||||
x_new = x - d * grad
|
||||
if f(x_new) <= fx - epsilon * d * grad_sq:
|
||||
return d
|
||||
d *= theta
|
||||
|
||||
return d
|
||||
|
||||
Reference in New Issue
Block a user