140 lines
3.2 KiB
Python
140 lines
3.2 KiB
Python
"""Line search methods for step size selection."""
|
|
|
|
import math
|
|
from typing import Callable, Tuple
|
|
import numpy as np
|
|
|
|
|
|
def golden_section_search(
|
|
phi: Callable[[float], float],
|
|
a: float,
|
|
b: float,
|
|
tol: float = 1e-5,
|
|
max_iters: int = 100,
|
|
) -> float:
|
|
"""
|
|
Golden section search for 1D optimization.
|
|
|
|
Finds argmin phi(alpha) on [a, b].
|
|
|
|
Args:
|
|
phi: Function to minimize (typically f(x - alpha * grad))
|
|
a: Left bound of search interval
|
|
b: Right bound of search interval
|
|
tol: Tolerance for stopping
|
|
max_iters: Maximum number of iterations
|
|
|
|
Returns:
|
|
Optimal step size alpha
|
|
"""
|
|
# Golden ratio constants
|
|
gr = (1 + math.sqrt(5)) / 2
|
|
r = 1 / gr # ~0.618
|
|
c = 1 - r # ~0.382
|
|
|
|
y = a + c * (b - a)
|
|
z = a + r * (b - a)
|
|
fy = phi(y)
|
|
fz = phi(z)
|
|
|
|
for _ in range(max_iters):
|
|
if b - a < tol:
|
|
break
|
|
|
|
if fy <= fz:
|
|
b = z
|
|
z = y
|
|
fz = fy
|
|
y = a + c * (b - a)
|
|
fy = phi(y)
|
|
else:
|
|
a = y
|
|
y = z
|
|
fy = fz
|
|
z = a + r * (b - a)
|
|
fz = phi(z)
|
|
|
|
return (a + b) / 2
|
|
|
|
|
|
def armijo_step(
|
|
f: Callable[[np.ndarray], float],
|
|
x: np.ndarray,
|
|
grad: np.ndarray,
|
|
d_init: float = 1.0,
|
|
epsilon: float = 0.1,
|
|
theta: float = 0.5,
|
|
max_iters: int = 100,
|
|
) -> float:
|
|
"""
|
|
Armijo rule for step size selection.
|
|
|
|
Finds step d such that:
|
|
f(x - d * grad) <= f(x) - epsilon * d * ||grad||^2
|
|
|
|
Note: Using descent direction s = -grad, so inner product <grad, s> = -||grad||^2
|
|
|
|
Args:
|
|
f: Function to minimize
|
|
x: Current point
|
|
grad: Gradient at x
|
|
d_init: Initial step size
|
|
epsilon: Armijo parameter (0 < epsilon < 1)
|
|
theta: Step reduction factor (0 < theta < 1)
|
|
max_iters: Maximum number of reductions
|
|
|
|
Returns:
|
|
Step size satisfying Armijo condition
|
|
"""
|
|
d = d_init
|
|
fx = f(x)
|
|
grad_norm_sq = np.dot(grad, grad)
|
|
|
|
for _ in range(max_iters):
|
|
# Armijo condition: f(x - d*grad) <= f(x) - epsilon * d * ||grad||^2
|
|
x_new = x - d * grad
|
|
if f(x_new) <= fx - epsilon * d * grad_norm_sq:
|
|
return d
|
|
d *= theta
|
|
|
|
return d
|
|
|
|
|
|
def armijo_step_1d(
|
|
f: Callable[[float], float],
|
|
x: float,
|
|
grad: float,
|
|
d_init: float = 1.0,
|
|
epsilon: float = 0.1,
|
|
theta: float = 0.5,
|
|
max_iters: int = 100,
|
|
) -> float:
|
|
"""
|
|
Armijo rule for step size selection (1D version).
|
|
|
|
Args:
|
|
f: Function to minimize
|
|
x: Current point
|
|
grad: Gradient (derivative) at x
|
|
d_init: Initial step size
|
|
epsilon: Armijo parameter (0 < epsilon < 1)
|
|
theta: Step reduction factor (0 < theta < 1)
|
|
max_iters: Maximum number of reductions
|
|
|
|
Returns:
|
|
Step size satisfying Armijo condition
|
|
"""
|
|
d = d_init
|
|
fx = f(x)
|
|
grad_sq = grad * grad
|
|
|
|
for _ in range(max_iters):
|
|
# Armijo condition: f(x - d*grad) <= f(x) - epsilon * d * grad^2
|
|
x_new = x - d * grad
|
|
if f(x_new) <= fx - epsilon * d * grad_sq:
|
|
return d
|
|
d *= theta
|
|
|
|
return d
|
|
|