task2
This commit is contained in:
139
task2/common/line_search.py
Normal file
139
task2/common/line_search.py
Normal file
@@ -0,0 +1,139 @@
|
||||
"""Line search methods for step size selection."""
|
||||
|
||||
import math
|
||||
from typing import Callable, Tuple
|
||||
import numpy as np
|
||||
|
||||
|
||||
def golden_section_search(
|
||||
phi: Callable[[float], float],
|
||||
a: float,
|
||||
b: float,
|
||||
tol: float = 1e-5,
|
||||
max_iters: int = 100,
|
||||
) -> float:
|
||||
"""
|
||||
Golden section search for 1D optimization.
|
||||
|
||||
Finds argmin phi(alpha) on [a, b].
|
||||
|
||||
Args:
|
||||
phi: Function to minimize (typically f(x - alpha * grad))
|
||||
a: Left bound of search interval
|
||||
b: Right bound of search interval
|
||||
tol: Tolerance for stopping
|
||||
max_iters: Maximum number of iterations
|
||||
|
||||
Returns:
|
||||
Optimal step size alpha
|
||||
"""
|
||||
# Golden ratio constants
|
||||
gr = (1 + math.sqrt(5)) / 2
|
||||
r = 1 / gr # ~0.618
|
||||
c = 1 - r # ~0.382
|
||||
|
||||
y = a + c * (b - a)
|
||||
z = a + r * (b - a)
|
||||
fy = phi(y)
|
||||
fz = phi(z)
|
||||
|
||||
for _ in range(max_iters):
|
||||
if b - a < tol:
|
||||
break
|
||||
|
||||
if fy <= fz:
|
||||
b = z
|
||||
z = y
|
||||
fz = fy
|
||||
y = a + c * (b - a)
|
||||
fy = phi(y)
|
||||
else:
|
||||
a = y
|
||||
y = z
|
||||
fy = fz
|
||||
z = a + r * (b - a)
|
||||
fz = phi(z)
|
||||
|
||||
return (a + b) / 2
|
||||
|
||||
|
||||
def armijo_step(
|
||||
f: Callable[[np.ndarray], float],
|
||||
x: np.ndarray,
|
||||
grad: np.ndarray,
|
||||
d_init: float = 1.0,
|
||||
epsilon: float = 0.1,
|
||||
theta: float = 0.5,
|
||||
max_iters: int = 100,
|
||||
) -> float:
|
||||
"""
|
||||
Armijo rule for step size selection.
|
||||
|
||||
Finds step d such that:
|
||||
f(x - d * grad) <= f(x) - epsilon * d * ||grad||^2
|
||||
|
||||
Note: Using descent direction s = -grad, so inner product <grad, s> = -||grad||^2
|
||||
|
||||
Args:
|
||||
f: Function to minimize
|
||||
x: Current point
|
||||
grad: Gradient at x
|
||||
d_init: Initial step size
|
||||
epsilon: Armijo parameter (0 < epsilon < 1)
|
||||
theta: Step reduction factor (0 < theta < 1)
|
||||
max_iters: Maximum number of reductions
|
||||
|
||||
Returns:
|
||||
Step size satisfying Armijo condition
|
||||
"""
|
||||
d = d_init
|
||||
fx = f(x)
|
||||
grad_norm_sq = np.dot(grad, grad)
|
||||
|
||||
for _ in range(max_iters):
|
||||
# Armijo condition: f(x - d*grad) <= f(x) - epsilon * d * ||grad||^2
|
||||
x_new = x - d * grad
|
||||
if f(x_new) <= fx - epsilon * d * grad_norm_sq:
|
||||
return d
|
||||
d *= theta
|
||||
|
||||
return d
|
||||
|
||||
|
||||
def armijo_step_1d(
|
||||
f: Callable[[float], float],
|
||||
x: float,
|
||||
grad: float,
|
||||
d_init: float = 1.0,
|
||||
epsilon: float = 0.1,
|
||||
theta: float = 0.5,
|
||||
max_iters: int = 100,
|
||||
) -> float:
|
||||
"""
|
||||
Armijo rule for step size selection (1D version).
|
||||
|
||||
Args:
|
||||
f: Function to minimize
|
||||
x: Current point
|
||||
grad: Gradient (derivative) at x
|
||||
d_init: Initial step size
|
||||
epsilon: Armijo parameter (0 < epsilon < 1)
|
||||
theta: Step reduction factor (0 < theta < 1)
|
||||
max_iters: Maximum number of reductions
|
||||
|
||||
Returns:
|
||||
Step size satisfying Armijo condition
|
||||
"""
|
||||
d = d_init
|
||||
fx = f(x)
|
||||
grad_sq = grad * grad
|
||||
|
||||
for _ in range(max_iters):
|
||||
# Armijo condition: f(x - d*grad) <= f(x) - epsilon * d * grad^2
|
||||
x_new = x - d * grad
|
||||
if f(x_new) <= fx - epsilon * d * grad_sq:
|
||||
return d
|
||||
d *= theta
|
||||
|
||||
return d
|
||||
|
||||
Reference in New Issue
Block a user