optimization/task2/common/gradient_descent.py

"""Gradient descent implementations."""

from dataclasses import dataclass, field
from typing import List, Literal, Optional
import numpy as np

from .functions import Function1D, Function2D
from .line_search import golden_section_search, armijo_step, armijo_step_1d


StepMethod = Literal["constant", "golden_section", "armijo"]


@dataclass
class IterationInfo1D:
    """Information about a single iteration of 1D gradient descent."""
    iteration: int
    x: float
    f_x: float
    grad: float
    step_size: float


@dataclass
class GradientDescentResult1D:
    """Result of 1D gradient descent."""
    x_star: float
    f_star: float
    iterations: List[IterationInfo1D]
    converged: bool
    method: str

    @property
    def trajectory(self) -> List[float]:
        return [it.x for it in self.iterations]


@dataclass
class IterationInfo2D:
    """Information about a single iteration of 2D gradient descent."""
    iteration: int
    x: np.ndarray
    f_x: float
    grad: np.ndarray
    step_size: float


@dataclass
class GradientDescentResult2D:
    """Result of 2D gradient descent."""
    x_star: np.ndarray
    f_star: float
    iterations: List[IterationInfo2D]
    converged: bool
    method: str

    @property
    def trajectory(self) -> List[np.ndarray]:
        return [it.x for it in self.iterations]


def gradient_descent_1d(
    func: Function1D,
    x0: float,
    step_method: StepMethod = "constant",
    step_size: float = 0.1,
    eps_x: float = 0.05,
    eps_f: float = 0.001,
    max_iters: int = 100,
    armijo_params: Optional[dict] = None,
    golden_section_bounds: Optional[tuple] = None,
) -> GradientDescentResult1D:
    """
    Gradient descent for 1D function.

    Args:
        func: Function to minimize
        x0: Starting point
        step_method: Step selection method ("constant", "golden_section", "armijo")
        step_size: Step size for constant method
        eps_x: Tolerance for x convergence
        eps_f: Tolerance for f convergence
        max_iters: Maximum number of iterations
        armijo_params: Parameters for Armijo rule (d_init, epsilon, theta)
        golden_section_bounds: Search bounds for golden section (a, b)

    Returns:
        GradientDescentResult1D with trajectory and final result
    """
    x = x0
    iterations: List[IterationInfo1D] = []
    converged = False

    armijo_params = armijo_params or {"d_init": 1.0, "epsilon": 0.1, "theta": 0.5}

    for k in range(max_iters):
        f_x = func(x)
        grad = func.gradient(x)

        # Select step size
        if step_method == "constant":
            alpha = step_size
        elif step_method == "golden_section":
            # Optimize phi(alpha) = f(x - alpha * grad) using golden section
            bounds = golden_section_bounds or (0.0, 2.0)
            phi = lambda a: func(x - a * grad)
            alpha = golden_section_search(phi, bounds[0], bounds[1])
        elif step_method == "armijo":
            alpha = armijo_step_1d(
                func, x, grad,
                d_init=armijo_params.get("d_init", 1.0),
                epsilon=armijo_params.get("epsilon", 0.1),
                theta=armijo_params.get("theta", 0.5),
            )
        else:
            raise ValueError(f"Unknown step method: {step_method}")

        iterations.append(IterationInfo1D(
            iteration=k + 1,
            x=x,
            f_x=f_x,
            grad=grad,
            step_size=alpha,
        ))

        # Update x
        x_new = x - alpha * grad
        f_new = func(x_new)

        # Check convergence
        if abs(x_new - x) < eps_x and abs(f_new - f_x) < eps_f:
            x = x_new
            converged = True
            break

        x = x_new

    # Add final point
    iterations.append(IterationInfo1D(
        iteration=len(iterations) + 1,
        x=x,
        f_x=func(x),
        grad=func.gradient(x),
        step_size=0.0,
    ))

    method_names = {
        "constant": "Константный шаг",
        "golden_section": "Золотое сечение",
        "armijo": "Правило Армихо",
    }

    return GradientDescentResult1D(
        x_star=x,
        f_star=func(x),
        iterations=iterations,
        converged=converged,
        method=method_names.get(step_method, step_method),
    )


def gradient_descent_2d(
    func: Function2D,
    x0: np.ndarray,
    step_method: StepMethod = "constant",
    step_size: float = 0.01,
    eps_x: float = 1e-5,
    eps_f: float = 1e-6,
    max_iters: int = 1000,
    armijo_params: Optional[dict] = None,
    golden_section_bounds: Optional[tuple] = None,
) -> GradientDescentResult2D:
    """
    Gradient descent for 2D function.

    Args:
        func: Function to minimize
        x0: Starting point [x1, x2]
        step_method: Step selection method ("constant", "golden_section", "armijo")
        step_size: Step size for constant method
        eps_x: Tolerance for x convergence
        eps_f: Tolerance for f convergence
        max_iters: Maximum number of iterations
        armijo_params: Parameters for Armijo rule
        golden_section_bounds: Search bounds for golden section

    Returns:
        GradientDescentResult2D with trajectory and final result
    """
    x = np.array(x0, dtype=float)
    iterations: List[IterationInfo2D] = []
    converged = False

    armijo_params = armijo_params or {"d_init": 1.0, "epsilon": 0.1, "theta": 0.5}

    for k in range(max_iters):
        f_x = func(x)
        grad = func.gradient(x)
        grad_norm = np.linalg.norm(grad)

        # Check if gradient is too small
        if grad_norm < 1e-10:
            converged = True
            iterations.append(IterationInfo2D(
                iteration=k + 1,
                x=x.copy(),
                f_x=f_x,
                grad=grad.copy(),
                step_size=0.0,
            ))
            break

        # Select step size
        if step_method == "constant":
            alpha = step_size
        elif step_method == "golden_section":
            bounds = golden_section_bounds or (0.0, 1.0)
            phi = lambda a: func(x - a * grad)
            alpha = golden_section_search(phi, bounds[0], bounds[1])
        elif step_method == "armijo":
            alpha = armijo_step(
                func, x, grad,
                d_init=armijo_params.get("d_init", 1.0),
                epsilon=armijo_params.get("epsilon", 0.1),
                theta=armijo_params.get("theta", 0.5),
            )
        else:
            raise ValueError(f"Unknown step method: {step_method}")

        iterations.append(IterationInfo2D(
            iteration=k + 1,
            x=x.copy(),
            f_x=f_x,
            grad=grad.copy(),
            step_size=alpha,
        ))

        # Update x
        x_new = x - alpha * grad
        f_new = func(x_new)

        # Check convergence
        if np.linalg.norm(x_new - x) < eps_x and abs(f_new - f_x) < eps_f:
            x = x_new
            converged = True
            break

        x = x_new

    # Add final point
    iterations.append(IterationInfo2D(
        iteration=len(iterations) + 1,
        x=x.copy(),
        f_x=func(x),
        grad=func.gradient(x),
        step_size=0.0,
    ))

    method_names = {
        "constant": "Константный шаг",
        "golden_section": "Золотое сечение",
        "armijo": "Правило Армихо",
    }

    return GradientDescentResult2D(
        x_star=x,
        f_star=func(x),
        iterations=iterations,
        converged=converged,
        method=method_names.get(step_method, step_method),
    )


def heavy_ball_1d(
    func: Function1D,
    x0: float,
    alpha: float = 0.1,
    beta: float = 0.9,
    eps_x: float = 0.05,
    eps_f: float = 0.001,
    max_iters: int = 100,
) -> GradientDescentResult1D:
    """
    Heavy Ball method for 1D function.

    x_{k+1} = x_k - α f'(x_k) + β (x_k - x_{k-1})

    Args:
        func: Function to minimize
        x0: Starting point
        alpha: Step size (learning rate)
        beta: Momentum parameter (0 <= beta < 1)
        eps_x: Tolerance for x convergence
        eps_f: Tolerance for f convergence
        max_iters: Maximum number of iterations

    Returns:
        GradientDescentResult1D with trajectory and final result
    """
    x = x0
    x_prev = x0  # For first iteration, no momentum
    iterations: List[IterationInfo1D] = []
    converged = False

    for k in range(max_iters):
        f_x = func(x)
        grad = func.gradient(x)

        # Heavy ball update: x_{k+1} = x_k - α∇f(x_k) + β(x_k - x_{k-1})
        momentum = beta * (x - x_prev) if k > 0 else 0.0

        iterations.append(IterationInfo1D(
            iteration=k + 1,
            x=x,
            f_x=f_x,
            grad=grad,
            step_size=alpha,
        ))

        # Update x
        x_new = x - alpha * grad + momentum
        f_new = func(x_new)

        # Check convergence
        if abs(x_new - x) < eps_x and abs(f_new - f_x) < eps_f:
            x_prev = x
            x = x_new
            converged = True
            break

        x_prev = x
        x = x_new

    # Add final point
    iterations.append(IterationInfo1D(
        iteration=len(iterations) + 1,
        x=x,
        f_x=func(x),
        grad=func.gradient(x),
        step_size=0.0,
    ))

    return GradientDescentResult1D(
        x_star=x,
        f_star=func(x),
        iterations=iterations,
        converged=converged,
        method=f"Тяжёлый шарик (α={alpha}, β={beta})",
    )


def heavy_ball_2d(
    func: Function2D,
    x0: np.ndarray,
    alpha: float = 0.01,
    beta: float = 0.9,
    eps_x: float = 1e-5,
    eps_f: float = 1e-6,
    max_iters: int = 1000,
) -> GradientDescentResult2D:
    """
    Heavy Ball method for 2D function.

    x_{k+1} = x_k - α ∇f(x_k) + β (x_k - x_{k-1})

    Args:
        func: Function to minimize
        x0: Starting point [x1, x2]
        alpha: Step size (learning rate)
        beta: Momentum parameter (0 <= beta < 1)
        eps_x: Tolerance for x convergence
        eps_f: Tolerance for f convergence
        max_iters: Maximum number of iterations

    Returns:
        GradientDescentResult2D with trajectory and final result
    """
    x = np.array(x0, dtype=float)
    x_prev = x.copy()  # For first iteration, no momentum
    iterations: List[IterationInfo2D] = []
    converged = False

    for k in range(max_iters):
        f_x = func(x)
        grad = func.gradient(x)
        grad_norm = np.linalg.norm(grad)

        # Check if gradient is too small
        if grad_norm < 1e-10:
            converged = True
            iterations.append(IterationInfo2D(
                iteration=k + 1,
                x=x.copy(),
                f_x=f_x,
                grad=grad.copy(),
                step_size=0.0,
            ))
            break

        # Heavy ball update: x_{k+1} = x_k - α∇f(x_k) + β(x_k - x_{k-1})
        momentum = beta * (x - x_prev) if k > 0 else np.zeros_like(x)

        iterations.append(IterationInfo2D(
            iteration=k + 1,
            x=x.copy(),
            f_x=f_x,
            grad=grad.copy(),
            step_size=alpha,
        ))

        # Update x
        x_new = x - alpha * grad + momentum
        f_new = func(x_new)

        # Check convergence
        if np.linalg.norm(x_new - x) < eps_x and abs(f_new - f_x) < eps_f:
            x_prev = x.copy()
            x = x_new
            converged = True
            break

        x_prev = x.copy()
        x = x_new

    # Add final point
    iterations.append(IterationInfo2D(
        iteration=len(iterations) + 1,
        x=x.copy(),
        f_x=func(x),
        grad=func.gradient(x),
        step_size=0.0,
    ))

    return GradientDescentResult2D(
        x_star=x,
        f_star=func(x),
        iterations=iterations,
        converged=converged,
        method=f"Тяжёлый шарик (α={alpha}, β={beta})",
    )