Files
optimization/task2/common/gradient_descent.py
2026-01-09 12:13:15 +03:00

442 lines
12 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Gradient descent implementations."""
from dataclasses import dataclass, field
from typing import List, Literal, Optional
import numpy as np
from .functions import Function1D, Function2D
from .line_search import golden_section_search, armijo_step, armijo_step_1d
StepMethod = Literal["constant", "golden_section", "armijo"]
@dataclass
class IterationInfo1D:
"""Information about a single iteration of 1D gradient descent."""
iteration: int
x: float
f_x: float
grad: float
step_size: float
@dataclass
class GradientDescentResult1D:
"""Result of 1D gradient descent."""
x_star: float
f_star: float
iterations: List[IterationInfo1D]
converged: bool
method: str
@property
def trajectory(self) -> List[float]:
return [it.x for it in self.iterations]
@dataclass
class IterationInfo2D:
"""Information about a single iteration of 2D gradient descent."""
iteration: int
x: np.ndarray
f_x: float
grad: np.ndarray
step_size: float
@dataclass
class GradientDescentResult2D:
"""Result of 2D gradient descent."""
x_star: np.ndarray
f_star: float
iterations: List[IterationInfo2D]
converged: bool
method: str
@property
def trajectory(self) -> List[np.ndarray]:
return [it.x for it in self.iterations]
def gradient_descent_1d(
func: Function1D,
x0: float,
step_method: StepMethod = "constant",
step_size: float = 0.1,
eps_x: float = 0.05,
eps_f: float = 0.001,
max_iters: int = 100,
armijo_params: Optional[dict] = None,
golden_section_bounds: Optional[tuple] = None,
) -> GradientDescentResult1D:
"""
Gradient descent for 1D function.
Args:
func: Function to minimize
x0: Starting point
step_method: Step selection method ("constant", "golden_section", "armijo")
step_size: Step size for constant method
eps_x: Tolerance for x convergence
eps_f: Tolerance for f convergence
max_iters: Maximum number of iterations
armijo_params: Parameters for Armijo rule (d_init, epsilon, theta)
golden_section_bounds: Search bounds for golden section (a, b)
Returns:
GradientDescentResult1D with trajectory and final result
"""
x = x0
iterations: List[IterationInfo1D] = []
converged = False
armijo_params = armijo_params or {"d_init": 1.0, "epsilon": 0.1, "theta": 0.5}
for k in range(max_iters):
f_x = func(x)
grad = func.gradient(x)
# Select step size
if step_method == "constant":
alpha = step_size
elif step_method == "golden_section":
# Optimize phi(alpha) = f(x - alpha * grad) using golden section
bounds = golden_section_bounds or (0.0, 2.0)
phi = lambda a: func(x - a * grad)
alpha = golden_section_search(phi, bounds[0], bounds[1])
elif step_method == "armijo":
alpha = armijo_step_1d(
func, x, grad,
d_init=armijo_params.get("d_init", 1.0),
epsilon=armijo_params.get("epsilon", 0.1),
theta=armijo_params.get("theta", 0.5),
)
else:
raise ValueError(f"Unknown step method: {step_method}")
iterations.append(IterationInfo1D(
iteration=k + 1,
x=x,
f_x=f_x,
grad=grad,
step_size=alpha,
))
# Update x
x_new = x - alpha * grad
f_new = func(x_new)
# Check convergence
if abs(x_new - x) < eps_x and abs(f_new - f_x) < eps_f:
x = x_new
converged = True
break
x = x_new
# Add final point
iterations.append(IterationInfo1D(
iteration=len(iterations) + 1,
x=x,
f_x=func(x),
grad=func.gradient(x),
step_size=0.0,
))
method_names = {
"constant": "Константный шаг",
"golden_section": "Золотое сечение",
"armijo": "Правило Армихо",
}
return GradientDescentResult1D(
x_star=x,
f_star=func(x),
iterations=iterations,
converged=converged,
method=method_names.get(step_method, step_method),
)
def gradient_descent_2d(
func: Function2D,
x0: np.ndarray,
step_method: StepMethod = "constant",
step_size: float = 0.01,
eps_x: float = 1e-5,
eps_f: float = 1e-6,
max_iters: int = 1000,
armijo_params: Optional[dict] = None,
golden_section_bounds: Optional[tuple] = None,
) -> GradientDescentResult2D:
"""
Gradient descent for 2D function.
Args:
func: Function to minimize
x0: Starting point [x1, x2]
step_method: Step selection method ("constant", "golden_section", "armijo")
step_size: Step size for constant method
eps_x: Tolerance for x convergence
eps_f: Tolerance for f convergence
max_iters: Maximum number of iterations
armijo_params: Parameters for Armijo rule
golden_section_bounds: Search bounds for golden section
Returns:
GradientDescentResult2D with trajectory and final result
"""
x = np.array(x0, dtype=float)
iterations: List[IterationInfo2D] = []
converged = False
armijo_params = armijo_params or {"d_init": 1.0, "epsilon": 0.1, "theta": 0.5}
for k in range(max_iters):
f_x = func(x)
grad = func.gradient(x)
grad_norm = np.linalg.norm(grad)
# Check if gradient is too small
if grad_norm < 1e-10:
converged = True
iterations.append(IterationInfo2D(
iteration=k + 1,
x=x.copy(),
f_x=f_x,
grad=grad.copy(),
step_size=0.0,
))
break
# Select step size
if step_method == "constant":
alpha = step_size
elif step_method == "golden_section":
bounds = golden_section_bounds or (0.0, 1.0)
phi = lambda a: func(x - a * grad)
alpha = golden_section_search(phi, bounds[0], bounds[1])
elif step_method == "armijo":
alpha = armijo_step(
func, x, grad,
d_init=armijo_params.get("d_init", 1.0),
epsilon=armijo_params.get("epsilon", 0.1),
theta=armijo_params.get("theta", 0.5),
)
else:
raise ValueError(f"Unknown step method: {step_method}")
iterations.append(IterationInfo2D(
iteration=k + 1,
x=x.copy(),
f_x=f_x,
grad=grad.copy(),
step_size=alpha,
))
# Update x
x_new = x - alpha * grad
f_new = func(x_new)
# Check convergence
if np.linalg.norm(x_new - x) < eps_x and abs(f_new - f_x) < eps_f:
x = x_new
converged = True
break
x = x_new
# Add final point
iterations.append(IterationInfo2D(
iteration=len(iterations) + 1,
x=x.copy(),
f_x=func(x),
grad=func.gradient(x),
step_size=0.0,
))
method_names = {
"constant": "Константный шаг",
"golden_section": "Золотое сечение",
"armijo": "Правило Армихо",
}
return GradientDescentResult2D(
x_star=x,
f_star=func(x),
iterations=iterations,
converged=converged,
method=method_names.get(step_method, step_method),
)
def heavy_ball_1d(
func: Function1D,
x0: float,
alpha: float = 0.1,
beta: float = 0.9,
eps_x: float = 0.05,
eps_f: float = 0.001,
max_iters: int = 100,
) -> GradientDescentResult1D:
"""
Heavy Ball method for 1D function.
x_{k+1} = x_k - α f'(x_k) + β (x_k - x_{k-1})
Args:
func: Function to minimize
x0: Starting point
alpha: Step size (learning rate)
beta: Momentum parameter (0 <= beta < 1)
eps_x: Tolerance for x convergence
eps_f: Tolerance for f convergence
max_iters: Maximum number of iterations
Returns:
GradientDescentResult1D with trajectory and final result
"""
x = x0
x_prev = x0 # For first iteration, no momentum
iterations: List[IterationInfo1D] = []
converged = False
for k in range(max_iters):
f_x = func(x)
grad = func.gradient(x)
# Heavy ball update: x_{k+1} = x_k - α∇f(x_k) + β(x_k - x_{k-1})
momentum = beta * (x - x_prev) if k > 0 else 0.0
iterations.append(IterationInfo1D(
iteration=k + 1,
x=x,
f_x=f_x,
grad=grad,
step_size=alpha,
))
# Update x
x_new = x - alpha * grad + momentum
f_new = func(x_new)
# Check convergence
if abs(x_new - x) < eps_x and abs(f_new - f_x) < eps_f:
x_prev = x
x = x_new
converged = True
break
x_prev = x
x = x_new
# Add final point
iterations.append(IterationInfo1D(
iteration=len(iterations) + 1,
x=x,
f_x=func(x),
grad=func.gradient(x),
step_size=0.0,
))
return GradientDescentResult1D(
x_star=x,
f_star=func(x),
iterations=iterations,
converged=converged,
method=f"Тяжёлый шарик (α={alpha}, β={beta})",
)
def heavy_ball_2d(
func: Function2D,
x0: np.ndarray,
alpha: float = 0.01,
beta: float = 0.9,
eps_x: float = 1e-5,
eps_f: float = 1e-6,
max_iters: int = 1000,
) -> GradientDescentResult2D:
"""
Heavy Ball method for 2D function.
x_{k+1} = x_k - α ∇f(x_k) + β (x_k - x_{k-1})
Args:
func: Function to minimize
x0: Starting point [x1, x2]
alpha: Step size (learning rate)
beta: Momentum parameter (0 <= beta < 1)
eps_x: Tolerance for x convergence
eps_f: Tolerance for f convergence
max_iters: Maximum number of iterations
Returns:
GradientDescentResult2D with trajectory and final result
"""
x = np.array(x0, dtype=float)
x_prev = x.copy() # For first iteration, no momentum
iterations: List[IterationInfo2D] = []
converged = False
for k in range(max_iters):
f_x = func(x)
grad = func.gradient(x)
grad_norm = np.linalg.norm(grad)
# Check if gradient is too small
if grad_norm < 1e-10:
converged = True
iterations.append(IterationInfo2D(
iteration=k + 1,
x=x.copy(),
f_x=f_x,
grad=grad.copy(),
step_size=0.0,
))
break
# Heavy ball update: x_{k+1} = x_k - α∇f(x_k) + β(x_k - x_{k-1})
momentum = beta * (x - x_prev) if k > 0 else np.zeros_like(x)
iterations.append(IterationInfo2D(
iteration=k + 1,
x=x.copy(),
f_x=f_x,
grad=grad.copy(),
step_size=alpha,
))
# Update x
x_new = x - alpha * grad + momentum
f_new = func(x_new)
# Check convergence
if np.linalg.norm(x_new - x) < eps_x and abs(f_new - f_x) < eps_f:
x_prev = x.copy()
x = x_new
converged = True
break
x_prev = x.copy()
x = x_new
# Add final point
iterations.append(IterationInfo2D(
iteration=len(iterations) + 1,
x=x.copy(),
f_x=func(x),
grad=func.gradient(x),
step_size=0.0,
))
return GradientDescentResult2D(
x_star=x,
f_star=func(x),
iterations=iterations,
converged=converged,
method=f"Тяжёлый шарик (α={alpha}, β={beta})",
)