442 lines
12 KiB
Python
442 lines
12 KiB
Python
"""Gradient descent implementations."""
|
||
|
||
from dataclasses import dataclass, field
|
||
from typing import List, Literal, Optional
|
||
import numpy as np
|
||
|
||
from .functions import Function1D, Function2D
|
||
from .line_search import golden_section_search, armijo_step, armijo_step_1d
|
||
|
||
|
||
StepMethod = Literal["constant", "golden_section", "armijo"]
|
||
|
||
|
||
@dataclass
|
||
class IterationInfo1D:
|
||
"""Information about a single iteration of 1D gradient descent."""
|
||
iteration: int
|
||
x: float
|
||
f_x: float
|
||
grad: float
|
||
step_size: float
|
||
|
||
|
||
@dataclass
|
||
class GradientDescentResult1D:
|
||
"""Result of 1D gradient descent."""
|
||
x_star: float
|
||
f_star: float
|
||
iterations: List[IterationInfo1D]
|
||
converged: bool
|
||
method: str
|
||
|
||
@property
|
||
def trajectory(self) -> List[float]:
|
||
return [it.x for it in self.iterations]
|
||
|
||
|
||
@dataclass
|
||
class IterationInfo2D:
|
||
"""Information about a single iteration of 2D gradient descent."""
|
||
iteration: int
|
||
x: np.ndarray
|
||
f_x: float
|
||
grad: np.ndarray
|
||
step_size: float
|
||
|
||
|
||
@dataclass
|
||
class GradientDescentResult2D:
|
||
"""Result of 2D gradient descent."""
|
||
x_star: np.ndarray
|
||
f_star: float
|
||
iterations: List[IterationInfo2D]
|
||
converged: bool
|
||
method: str
|
||
|
||
@property
|
||
def trajectory(self) -> List[np.ndarray]:
|
||
return [it.x for it in self.iterations]
|
||
|
||
|
||
def gradient_descent_1d(
|
||
func: Function1D,
|
||
x0: float,
|
||
step_method: StepMethod = "constant",
|
||
step_size: float = 0.1,
|
||
eps_x: float = 0.05,
|
||
eps_f: float = 0.001,
|
||
max_iters: int = 100,
|
||
armijo_params: Optional[dict] = None,
|
||
golden_section_bounds: Optional[tuple] = None,
|
||
) -> GradientDescentResult1D:
|
||
"""
|
||
Gradient descent for 1D function.
|
||
|
||
Args:
|
||
func: Function to minimize
|
||
x0: Starting point
|
||
step_method: Step selection method ("constant", "golden_section", "armijo")
|
||
step_size: Step size for constant method
|
||
eps_x: Tolerance for x convergence
|
||
eps_f: Tolerance for f convergence
|
||
max_iters: Maximum number of iterations
|
||
armijo_params: Parameters for Armijo rule (d_init, epsilon, theta)
|
||
golden_section_bounds: Search bounds for golden section (a, b)
|
||
|
||
Returns:
|
||
GradientDescentResult1D with trajectory and final result
|
||
"""
|
||
x = x0
|
||
iterations: List[IterationInfo1D] = []
|
||
converged = False
|
||
|
||
armijo_params = armijo_params or {"d_init": 1.0, "epsilon": 0.1, "theta": 0.5}
|
||
|
||
for k in range(max_iters):
|
||
f_x = func(x)
|
||
grad = func.gradient(x)
|
||
|
||
# Select step size
|
||
if step_method == "constant":
|
||
alpha = step_size
|
||
elif step_method == "golden_section":
|
||
# Optimize phi(alpha) = f(x - alpha * grad) using golden section
|
||
bounds = golden_section_bounds or (0.0, 2.0)
|
||
phi = lambda a: func(x - a * grad)
|
||
alpha = golden_section_search(phi, bounds[0], bounds[1])
|
||
elif step_method == "armijo":
|
||
alpha = armijo_step_1d(
|
||
func, x, grad,
|
||
d_init=armijo_params.get("d_init", 1.0),
|
||
epsilon=armijo_params.get("epsilon", 0.1),
|
||
theta=armijo_params.get("theta", 0.5),
|
||
)
|
||
else:
|
||
raise ValueError(f"Unknown step method: {step_method}")
|
||
|
||
iterations.append(IterationInfo1D(
|
||
iteration=k + 1,
|
||
x=x,
|
||
f_x=f_x,
|
||
grad=grad,
|
||
step_size=alpha,
|
||
))
|
||
|
||
# Update x
|
||
x_new = x - alpha * grad
|
||
f_new = func(x_new)
|
||
|
||
# Check convergence
|
||
if abs(x_new - x) < eps_x and abs(f_new - f_x) < eps_f:
|
||
x = x_new
|
||
converged = True
|
||
break
|
||
|
||
x = x_new
|
||
|
||
# Add final point
|
||
iterations.append(IterationInfo1D(
|
||
iteration=len(iterations) + 1,
|
||
x=x,
|
||
f_x=func(x),
|
||
grad=func.gradient(x),
|
||
step_size=0.0,
|
||
))
|
||
|
||
method_names = {
|
||
"constant": "Константный шаг",
|
||
"golden_section": "Золотое сечение",
|
||
"armijo": "Правило Армихо",
|
||
}
|
||
|
||
return GradientDescentResult1D(
|
||
x_star=x,
|
||
f_star=func(x),
|
||
iterations=iterations,
|
||
converged=converged,
|
||
method=method_names.get(step_method, step_method),
|
||
)
|
||
|
||
|
||
def gradient_descent_2d(
|
||
func: Function2D,
|
||
x0: np.ndarray,
|
||
step_method: StepMethod = "constant",
|
||
step_size: float = 0.01,
|
||
eps_x: float = 1e-5,
|
||
eps_f: float = 1e-6,
|
||
max_iters: int = 1000,
|
||
armijo_params: Optional[dict] = None,
|
||
golden_section_bounds: Optional[tuple] = None,
|
||
) -> GradientDescentResult2D:
|
||
"""
|
||
Gradient descent for 2D function.
|
||
|
||
Args:
|
||
func: Function to minimize
|
||
x0: Starting point [x1, x2]
|
||
step_method: Step selection method ("constant", "golden_section", "armijo")
|
||
step_size: Step size for constant method
|
||
eps_x: Tolerance for x convergence
|
||
eps_f: Tolerance for f convergence
|
||
max_iters: Maximum number of iterations
|
||
armijo_params: Parameters for Armijo rule
|
||
golden_section_bounds: Search bounds for golden section
|
||
|
||
Returns:
|
||
GradientDescentResult2D with trajectory and final result
|
||
"""
|
||
x = np.array(x0, dtype=float)
|
||
iterations: List[IterationInfo2D] = []
|
||
converged = False
|
||
|
||
armijo_params = armijo_params or {"d_init": 1.0, "epsilon": 0.1, "theta": 0.5}
|
||
|
||
for k in range(max_iters):
|
||
f_x = func(x)
|
||
grad = func.gradient(x)
|
||
grad_norm = np.linalg.norm(grad)
|
||
|
||
# Check if gradient is too small
|
||
if grad_norm < 1e-10:
|
||
converged = True
|
||
iterations.append(IterationInfo2D(
|
||
iteration=k + 1,
|
||
x=x.copy(),
|
||
f_x=f_x,
|
||
grad=grad.copy(),
|
||
step_size=0.0,
|
||
))
|
||
break
|
||
|
||
# Select step size
|
||
if step_method == "constant":
|
||
alpha = step_size
|
||
elif step_method == "golden_section":
|
||
bounds = golden_section_bounds or (0.0, 1.0)
|
||
phi = lambda a: func(x - a * grad)
|
||
alpha = golden_section_search(phi, bounds[0], bounds[1])
|
||
elif step_method == "armijo":
|
||
alpha = armijo_step(
|
||
func, x, grad,
|
||
d_init=armijo_params.get("d_init", 1.0),
|
||
epsilon=armijo_params.get("epsilon", 0.1),
|
||
theta=armijo_params.get("theta", 0.5),
|
||
)
|
||
else:
|
||
raise ValueError(f"Unknown step method: {step_method}")
|
||
|
||
iterations.append(IterationInfo2D(
|
||
iteration=k + 1,
|
||
x=x.copy(),
|
||
f_x=f_x,
|
||
grad=grad.copy(),
|
||
step_size=alpha,
|
||
))
|
||
|
||
# Update x
|
||
x_new = x - alpha * grad
|
||
f_new = func(x_new)
|
||
|
||
# Check convergence
|
||
if np.linalg.norm(x_new - x) < eps_x and abs(f_new - f_x) < eps_f:
|
||
x = x_new
|
||
converged = True
|
||
break
|
||
|
||
x = x_new
|
||
|
||
# Add final point
|
||
iterations.append(IterationInfo2D(
|
||
iteration=len(iterations) + 1,
|
||
x=x.copy(),
|
||
f_x=func(x),
|
||
grad=func.gradient(x),
|
||
step_size=0.0,
|
||
))
|
||
|
||
method_names = {
|
||
"constant": "Константный шаг",
|
||
"golden_section": "Золотое сечение",
|
||
"armijo": "Правило Армихо",
|
||
}
|
||
|
||
return GradientDescentResult2D(
|
||
x_star=x,
|
||
f_star=func(x),
|
||
iterations=iterations,
|
||
converged=converged,
|
||
method=method_names.get(step_method, step_method),
|
||
)
|
||
|
||
|
||
def heavy_ball_1d(
|
||
func: Function1D,
|
||
x0: float,
|
||
alpha: float = 0.1,
|
||
beta: float = 0.9,
|
||
eps_x: float = 0.05,
|
||
eps_f: float = 0.001,
|
||
max_iters: int = 100,
|
||
) -> GradientDescentResult1D:
|
||
"""
|
||
Heavy Ball method for 1D function.
|
||
|
||
x_{k+1} = x_k - α f'(x_k) + β (x_k - x_{k-1})
|
||
|
||
Args:
|
||
func: Function to minimize
|
||
x0: Starting point
|
||
alpha: Step size (learning rate)
|
||
beta: Momentum parameter (0 <= beta < 1)
|
||
eps_x: Tolerance for x convergence
|
||
eps_f: Tolerance for f convergence
|
||
max_iters: Maximum number of iterations
|
||
|
||
Returns:
|
||
GradientDescentResult1D with trajectory and final result
|
||
"""
|
||
x = x0
|
||
x_prev = x0 # For first iteration, no momentum
|
||
iterations: List[IterationInfo1D] = []
|
||
converged = False
|
||
|
||
for k in range(max_iters):
|
||
f_x = func(x)
|
||
grad = func.gradient(x)
|
||
|
||
# Heavy ball update: x_{k+1} = x_k - α∇f(x_k) + β(x_k - x_{k-1})
|
||
momentum = beta * (x - x_prev) if k > 0 else 0.0
|
||
|
||
iterations.append(IterationInfo1D(
|
||
iteration=k + 1,
|
||
x=x,
|
||
f_x=f_x,
|
||
grad=grad,
|
||
step_size=alpha,
|
||
))
|
||
|
||
# Update x
|
||
x_new = x - alpha * grad + momentum
|
||
f_new = func(x_new)
|
||
|
||
# Check convergence
|
||
if abs(x_new - x) < eps_x and abs(f_new - f_x) < eps_f:
|
||
x_prev = x
|
||
x = x_new
|
||
converged = True
|
||
break
|
||
|
||
x_prev = x
|
||
x = x_new
|
||
|
||
# Add final point
|
||
iterations.append(IterationInfo1D(
|
||
iteration=len(iterations) + 1,
|
||
x=x,
|
||
f_x=func(x),
|
||
grad=func.gradient(x),
|
||
step_size=0.0,
|
||
))
|
||
|
||
return GradientDescentResult1D(
|
||
x_star=x,
|
||
f_star=func(x),
|
||
iterations=iterations,
|
||
converged=converged,
|
||
method=f"Тяжёлый шарик (α={alpha}, β={beta})",
|
||
)
|
||
|
||
|
||
def heavy_ball_2d(
|
||
func: Function2D,
|
||
x0: np.ndarray,
|
||
alpha: float = 0.01,
|
||
beta: float = 0.9,
|
||
eps_x: float = 1e-5,
|
||
eps_f: float = 1e-6,
|
||
max_iters: int = 1000,
|
||
) -> GradientDescentResult2D:
|
||
"""
|
||
Heavy Ball method for 2D function.
|
||
|
||
x_{k+1} = x_k - α ∇f(x_k) + β (x_k - x_{k-1})
|
||
|
||
Args:
|
||
func: Function to minimize
|
||
x0: Starting point [x1, x2]
|
||
alpha: Step size (learning rate)
|
||
beta: Momentum parameter (0 <= beta < 1)
|
||
eps_x: Tolerance for x convergence
|
||
eps_f: Tolerance for f convergence
|
||
max_iters: Maximum number of iterations
|
||
|
||
Returns:
|
||
GradientDescentResult2D with trajectory and final result
|
||
"""
|
||
x = np.array(x0, dtype=float)
|
||
x_prev = x.copy() # For first iteration, no momentum
|
||
iterations: List[IterationInfo2D] = []
|
||
converged = False
|
||
|
||
for k in range(max_iters):
|
||
f_x = func(x)
|
||
grad = func.gradient(x)
|
||
grad_norm = np.linalg.norm(grad)
|
||
|
||
# Check if gradient is too small
|
||
if grad_norm < 1e-10:
|
||
converged = True
|
||
iterations.append(IterationInfo2D(
|
||
iteration=k + 1,
|
||
x=x.copy(),
|
||
f_x=f_x,
|
||
grad=grad.copy(),
|
||
step_size=0.0,
|
||
))
|
||
break
|
||
|
||
# Heavy ball update: x_{k+1} = x_k - α∇f(x_k) + β(x_k - x_{k-1})
|
||
momentum = beta * (x - x_prev) if k > 0 else np.zeros_like(x)
|
||
|
||
iterations.append(IterationInfo2D(
|
||
iteration=k + 1,
|
||
x=x.copy(),
|
||
f_x=f_x,
|
||
grad=grad.copy(),
|
||
step_size=alpha,
|
||
))
|
||
|
||
# Update x
|
||
x_new = x - alpha * grad + momentum
|
||
f_new = func(x_new)
|
||
|
||
# Check convergence
|
||
if np.linalg.norm(x_new - x) < eps_x and abs(f_new - f_x) < eps_f:
|
||
x_prev = x.copy()
|
||
x = x_new
|
||
converged = True
|
||
break
|
||
|
||
x_prev = x.copy()
|
||
x = x_new
|
||
|
||
# Add final point
|
||
iterations.append(IterationInfo2D(
|
||
iteration=len(iterations) + 1,
|
||
x=x.copy(),
|
||
f_x=func(x),
|
||
grad=func.gradient(x),
|
||
step_size=0.0,
|
||
))
|
||
|
||
return GradientDescentResult2D(
|
||
x_star=x,
|
||
f_star=func(x),
|
||
iterations=iterations,
|
||
converged=converged,
|
||
method=f"Тяжёлый шарик (α={alpha}, β={beta})",
|
||
)
|
||
|