From 033de60cad123bfceacb261cd55802cba40d657f Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Fri, 7 Nov 2025 15:33:51 +0200
Subject: [PATCH 01/17] 390 convert StepDecay to Numpower

---
 docs/neural-network/optimizers/step-decay.md  |   4 +-
 .../Optimizers/StepDecay/StepDecay.php        | 115 ++++++++++++++++++
 .../Optimizers/StepDecay/StepDecayTest.php    | 100 +++++++++++++++
 3 files changed, 217 insertions(+), 2 deletions(-)
 create mode 100644 src/NeuralNet/Optimizers/StepDecay/StepDecay.php
 create mode 100644 tests/NeuralNet/Optimizers/StepDecay/StepDecayTest.php

diff --git a/docs/neural-network/optimizers/step-decay.md b/docs/neural-network/optimizers/step-decay.md
index 1a21f0804..0ec9395cc 100644
--- a/docs/neural-network/optimizers/step-decay.md
+++ b/docs/neural-network/optimizers/step-decay.md
@@ -12,7 +12,7 @@ A learning rate decay optimizer that reduces the global learning rate by a facto
 
 ## Example
 ```php
-use Rubix\ML\NeuralNet\Optimizers\StepDecay;
+use Rubix\ML\NeuralNet\Optimizers\StepDecay\StepDecay;
 
 $optimizer = new StepDecay(0.1, 50, 1e-3);
-```
\ No newline at end of file
+```
diff --git a/src/NeuralNet/Optimizers/StepDecay/StepDecay.php b/src/NeuralNet/Optimizers/StepDecay/StepDecay.php
new file mode 100644
index 000000000..246adc6c7
--- /dev/null
+++ b/src/NeuralNet/Optimizers/StepDecay/StepDecay.php
@@ -0,0 +1,115 @@
+<?php
+
+namespace Rubix\ML\NeuralNet\Optimizers\StepDecay;
+
+use NDArray;
+use NumPower;
+use Rubix\ML\NeuralNet\Optimizers\Base\Optimizer;
+use Rubix\ML\NeuralNet\Parameters\Parameter;
+use Rubix\ML\Exceptions\InvalidArgumentException;
+
+/**
+ * Step Decay
+ *
+ * A linear learning rate scheduler that reduces the learning rate by a factor
+ * of the decay parameter whenever it reaches a new *floor*. The number of
+ * steps needed to reach a new floor is defined by the *steps* parameter.
+ *
+ * @category    Machine Learning
+ * @package     Rubix/ML
+ * @author      Andrew DalPino
+ * @author      Samuel Akopyan <leumas.a@gmail.com>
+ */
+class StepDecay implements Optimizer
+{
+    /**
+     * The learning rate that controls the global step size.
+     *
+     * @var float
+     */
+    protected float $rate;
+
+    /**
+     * The size of every floor in steps. i.e. the number of steps to take before applying another factor of decay.
+     *
+     * @var int
+     */
+    protected int $losses;
+
+    /**
+     * The factor to decrease the learning rate by over a period of k steps.
+     *
+     * @var float
+     */
+    protected float $decay;
+
+    /**
+     * The number of steps taken so far.
+     *
+     * @var int
+     */
+    protected int $steps = 0;
+
+    /**
+     * @param float $rate
+     * @param int $losses
+     * @param float $decay
+     * @throws InvalidArgumentException
+     */
+    public function __construct(float $rate = 0.01, int $losses = 100, float $decay = 1e-3)
+    {
+        if ($rate <= 0.0) {
+            throw new InvalidArgumentException(
+                "Learning rate must be greater than 0, $rate given."
+            );
+        }
+
+        if ($losses < 1) {
+            throw new InvalidArgumentException(
+                "The number of steps per floor must be greater than 0, $losses given."
+            );
+        }
+
+        if ($decay < 0.0) {
+            throw new InvalidArgumentException(
+                "Decay rate must be positive, $decay given."
+            );
+        }
+
+        $this->rate = $rate;
+        $this->losses = $losses;
+        $this->decay = $decay;
+    }
+
+    /**
+     * Take a step of gradient descent for a given parameter.
+     *
+     * @internal
+     *
+     * @param Parameter $param
+     * @param NDArray $gradient
+     * @return NDArray
+     */
+    public function step(Parameter $param, NDArray $gradient) : NDArray
+    {
+        $floor = floor($this->steps / $this->losses);
+
+        $rate = $this->rate * (1.0 / (1.0 + $floor * $this->decay));
+
+        ++$this->steps;
+
+        return NumPower::multiply($gradient, $rate);
+    }
+
+    /**
+     * Return the string representation of the object.
+     *
+     * @internal
+     *
+     * @return string
+     */
+    public function __toString() : string
+    {
+        return "Step Decay (rate: {$this->rate}, steps: {$this->losses}, decay: {$this->decay})";
+    }
+}
diff --git a/tests/NeuralNet/Optimizers/StepDecay/StepDecayTest.php b/tests/NeuralNet/Optimizers/StepDecay/StepDecayTest.php
new file mode 100644
index 000000000..f3535552b
--- /dev/null
+++ b/tests/NeuralNet/Optimizers/StepDecay/StepDecayTest.php
@@ -0,0 +1,100 @@
+<?php
+
+declare(strict_types = 1);
+
+namespace Rubix\ML\Tests\NeuralNet\Optimizers\StepDecay;
+
+use Generator;
+use NDArray;
+use NumPower;
+use PHPUnit\Framework\Attributes\CoversClass;
+use PHPUnit\Framework\Attributes\DataProvider;
+use PHPUnit\Framework\Attributes\Group;
+use PHPUnit\Framework\Attributes\Test;
+use PHPUnit\Framework\Attributes\TestDox;
+use Rubix\ML\Exceptions\InvalidArgumentException;
+use Rubix\ML\NeuralNet\Optimizers\Stochastic\Stochastic;
+use Rubix\ML\NeuralNet\Parameters\Parameter;
+use Rubix\ML\NeuralNet\Optimizers\StepDecay\StepDecay;
+use PHPUnit\Framework\TestCase;
+
+#[Group('Optimizers')]
+#[CoversClass(StepDecay::class)]
+class StepDecayTest extends TestCase
+{
+    protected StepDecay $optimizer;
+
+    public static function stepProvider() : Generator
+    {
+        yield [
+            new Parameter(NumPower::array([
+                [0.1, 0.6, -0.4],
+                [0.5, 0.6, -0.4],
+                [0.1, 0.1, -0.7],
+            ])),
+            NumPower::array([
+                [0.01, 0.05, -0.02],
+                [-0.01, 0.02, 0.03],
+                [0.04, -0.01, -0.5],
+            ]),
+            [
+                [0.00001, 0.00005, -0.00002],
+                [-0.00001, 0.00002, 0.00003],
+                [0.00004, -0.00001, -0.0005],
+            ],
+        ];
+    }
+
+    protected function setUp() : void
+    {
+        $this->optimizer = new StepDecay(rate: 0.001);
+    }
+
+    #[Test]
+    #[TestDox('Throws exception when constructed with invalid learning rate')]
+    public function testConstructorWithInvalidRate() : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+
+        new StepDecay(rate: 0.0);
+    }
+
+    #[Test]
+    #[TestDox('Throws exception when constructed with invalid losses')]
+    public function testConstructorWithInvalidLosses() : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+
+        new StepDecay(rate: 0.01, losses: 0);
+    }
+
+    #[Test]
+    #[TestDox('Throws exception when constructed with invalid decay')]
+    public function testConstructorWithInvalidDecay() : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+
+        new StepDecay(rate: 0.01, losses: 100, decay: -0.1);
+    }
+
+    #[Test]
+    #[TestDox('Can be cast to a string')]
+    public function testToString() : void
+    {
+        self::assertEquals('Step Decay (rate: 0.001, steps: 100, decay: 0.001)', (string) $this->optimizer);
+    }
+
+    /**
+     * @param Parameter $param
+     * @param NDArray $gradient
+     * @param list<list<float>> $expected
+     */
+    #[DataProvider('stepProvider')]
+    public function testStep(Parameter $param, NDArray $gradient, array $expected) : void
+    {
+        $step = $this->optimizer->step(param: $param, gradient: $gradient);
+
+        self::assertEqualsWithDelta($expected, $step->toArray(), 1e-7);
+    }
+}
+

From a02c4a08dc490ff158174a4890e8facb91140d23 Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Fri, 7 Nov 2025 17:36:38 +0200
Subject: [PATCH 02/17] 390 convert RMSProp to Numpower

---
 docs/neural-network/optimizers/rms-prop.md    |   4 +-
 src/NeuralNet/Optimizers/Base/Adaptive.php    |  25 +++
 src/NeuralNet/Optimizers/RMSProp/RMSProp.php  | 158 ++++++++++++++++++
 .../Optimizers/RMSProp/RMSPropTest.php        | 127 ++++++++++++++
 4 files changed, 312 insertions(+), 2 deletions(-)
 create mode 100644 src/NeuralNet/Optimizers/Base/Adaptive.php
 create mode 100644 src/NeuralNet/Optimizers/RMSProp/RMSProp.php
 create mode 100644 tests/NeuralNet/Optimizers/RMSProp/RMSPropTest.php

diff --git a/docs/neural-network/optimizers/rms-prop.md b/docs/neural-network/optimizers/rms-prop.md
index fdca6fd05..ae6b847bc 100644
--- a/docs/neural-network/optimizers/rms-prop.md
+++ b/docs/neural-network/optimizers/rms-prop.md
@@ -11,10 +11,10 @@ An adaptive gradient technique that divides the current gradient over a rolling
 
 ## Example
 ```php
-use Rubix\ML\NeuralNet\Optimizers\RMSProp;
+use Rubix\ML\NeuralNet\Optimizers\RMSProp\RMSProp;
 
 $optimizer = new RMSProp(0.01, 0.1);
 ```
 
 ## References
-[^1]: T. Tieleman et al. (2012). Lecture 6e rmsprop: Divide the gradient by a running average of its recent magnitude.
\ No newline at end of file
+[^1]: T. Tieleman et al. (2012). Lecture 6e rmsprop: Divide the gradient by a running average of its recent magnitude.
diff --git a/src/NeuralNet/Optimizers/Base/Adaptive.php b/src/NeuralNet/Optimizers/Base/Adaptive.php
new file mode 100644
index 000000000..35ee5323b
--- /dev/null
+++ b/src/NeuralNet/Optimizers/Base/Adaptive.php
@@ -0,0 +1,25 @@
+<?php
+
+namespace Rubix\ML\NeuralNet\Optimizers\Base;
+
+use Rubix\ML\NeuralNet\Parameters\Parameter;
+
+/**
+ * Adaptive
+ *
+ * @internal
+ *
+ * @category    Machine Learning
+ * @package     Rubix/ML
+ * @author      Andrew DalPino
+ * @author      Samuel Akopyan <leumas.a@gmail.com>
+ */
+interface Adaptive extends Optimizer
+{
+    /**
+     * Warm the parameter cache.
+     *
+     * @param Parameter $param
+     */
+    public function warm(Parameter $param) : void;
+}
diff --git a/src/NeuralNet/Optimizers/RMSProp/RMSProp.php b/src/NeuralNet/Optimizers/RMSProp/RMSProp.php
new file mode 100644
index 000000000..531f3ad80
--- /dev/null
+++ b/src/NeuralNet/Optimizers/RMSProp/RMSProp.php
@@ -0,0 +1,158 @@
+<?php
+
+namespace Rubix\ML\NeuralNet\Optimizers\RMSProp;
+
+use NDArray;
+use NumPower;
+use Rubix\ML\NeuralNet\Optimizers\Base\Adaptive;
+use Rubix\ML\NeuralNet\Optimizers\Base\Optimizer;
+use Rubix\ML\NeuralNet\Parameters\Parameter;
+use Rubix\ML\Exceptions\InvalidArgumentException;
+use Rubix\ML\Exceptions\RuntimeException;
+
+use function get_class;
+
+use const Rubix\ML\EPSILON;
+use const PHP_FLOAT_MAX;
+
+/**
+ * RMS Prop
+ *
+ * An adaptive gradient technique that divides the current gradient over a rolling window
+ * of magnitudes of recent gradients.
+ *
+ * References:
+ * [1] T. Tieleman et al. (2012). Lecture 6e rmsprop: Divide the
+ * gradient by a running average of its recent magnitude.
+ *
+ * @category    Machine Learning
+ * @package     Rubix/ML
+ * @author      Andrew DalPino
+ * @author      Samuel Akopyan <leumas.a@gmail.com>
+ */
+class RMSProp implements Optimizer, Adaptive
+{
+    /**
+     * The learning rate that controls the global step size.
+     *
+     * @var float
+     */
+    protected float $rate;
+
+    /**
+     * The rms decay rate.
+     *
+     * @var float
+     */
+    protected float $decay;
+
+    /**
+     * The opposite of the rms decay rate.
+     *
+     * @var float
+     */
+    protected float $rho;
+
+    /**
+     * The cache of running squared gradients.
+     *
+     * @var array<NDArray|array>
+     */
+    protected array $cache = [
+        //
+    ];
+
+    /**
+     * @param float $rate
+     * @param float $decay
+     * @throws InvalidArgumentException
+     */
+    public function __construct(float $rate = 0.001, float $decay = 0.1)
+    {
+        if ($rate <= 0.0) {
+            throw new InvalidArgumentException(
+                "Learning rate must be greater than 0, $rate given."
+            );
+        }
+
+        if ($decay <= 0.0 or $decay >= 1.0) {
+            throw new InvalidArgumentException(
+                "Decay must be between 0 and 1, $decay given."
+            );
+        }
+
+        $this->rate = $rate;
+        $this->decay = $decay;
+        $this->rho = 1.0 - $decay;
+    }
+
+    /**
+     * Warm the parameter cache.
+     *
+     * @internal
+     *
+     * @param Parameter $param
+     * @throws RuntimeException
+     */
+    public function warm(Parameter $param) : void
+    {
+        $class = get_class($param->param());
+
+        if (!$class) {
+            throw new RuntimeException('Could not locate parameter class.');
+        }
+
+        $this->cache[$param->id()] = NumPower::zeros($param->param()->shape());
+    }
+
+    /**
+     * Take a step of gradient descent for a given parameter.
+     *
+     * RMSProp update (element-wise):
+     *   v_t = ρ · v_{t-1} + (1 − ρ) · g_t^2
+     *   Δθ_t = η · g_t / max(sqrt(v_t), ε)
+     *
+     * where:
+     *   - g_t is the current gradient,
+     *   - v_t is the running average of squared gradients,
+     *   - ρ = 1 − decay, η is the learning rate,
+     *   - ε is a small constant to avoid division by zero (implemented by clipping √v_t to [ε, +∞)).
+     *
+     * @internal
+     *
+     * @param Parameter $param
+     * @param NDArray $gradient
+     * @return NDArray
+     */
+    public function step(Parameter $param, NDArray $gradient) : NDArray
+    {
+        $norm = $this->cache[$param->id()];
+
+        $norm = NumPower::add(
+            NumPower::multiply($norm, $this->rho),
+            NumPower::multiply(NumPower::square($gradient), $this->decay)
+        );
+
+        $this->cache[$param->id()] = $norm;
+
+        $denominator = NumPower::sqrt($norm);
+        $denominator = NumPower::clip($denominator, EPSILON, PHP_FLOAT_MAX);
+
+        return NumPower::divide(
+            NumPower::multiply($gradient, $this->rate),
+            $denominator
+        );
+    }
+
+    /**
+     * Return the string representation of the object.
+     *
+     * @internal
+     *
+     * @return string
+     */
+    public function __toString() : string
+    {
+        return "RMS Prop (rate: {$this->rate}, decay: {$this->decay})";
+    }
+}
diff --git a/tests/NeuralNet/Optimizers/RMSProp/RMSPropTest.php b/tests/NeuralNet/Optimizers/RMSProp/RMSPropTest.php
new file mode 100644
index 000000000..456bd54c0
--- /dev/null
+++ b/tests/NeuralNet/Optimizers/RMSProp/RMSPropTest.php
@@ -0,0 +1,127 @@
+<?php
+
+declare(strict_types = 1);
+
+namespace Rubix\ML\Tests\NeuralNet\Optimizers\RMSProp;
+
+use Generator;
+use NDArray;
+use NumPower;
+use PHPUnit\Framework\Attributes\CoversClass;
+use PHPUnit\Framework\Attributes\DataProvider;
+use PHPUnit\Framework\Attributes\Group;
+use PHPUnit\Framework\Attributes\Test;
+use PHPUnit\Framework\Attributes\TestDox;
+use PHPUnit\Framework\TestCase;
+use Rubix\ML\Exceptions\InvalidArgumentException;
+use Rubix\ML\NeuralNet\Parameters\Parameter;
+use Rubix\ML\NeuralNet\Optimizers\RMSProp\RMSProp;
+
+#[Group('Optimizers')]
+#[CoversClass(RMSProp::class)]
+class RMSPropTest extends TestCase
+{
+    protected RMSProp $optimizer;
+
+    public static function stepProvider() : Generator
+    {
+        yield [
+            new Parameter(NumPower::array([
+                [0.1, 0.6, -0.4],
+                [0.5, 0.6, -0.4],
+                [0.1, 0.1, -0.7],
+            ])),
+            NumPower::array([
+                [0.01, 0.05, -0.02],
+                [-0.01, 0.02, 0.03],
+                [0.04, -0.01, -0.5],
+            ]),
+            [
+                [0.0031622, 0.0031622, -0.0031622],
+                [-0.0031622, 0.0031622, 0.0031622],
+                [0.0031622, -0.0031622, -0.0031622],
+            ],
+        ];
+    }
+
+    protected function setUp() : void
+    {
+        $this->optimizer = new RMSProp(rate: 0.001, decay: 0.1);
+    }
+
+    #[Test]
+    #[TestDox('Throws exception when constructed with zero rate')]
+    public function testConstructorWithZeroRate() : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+
+        new RMSProp(rate: 0.0);
+    }
+
+    #[Test]
+    #[TestDox('Throws exception when constructed with negative rate')]
+    public function testConstructorWithNegativeRate() : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+
+        new RMSProp(rate: -0.001);
+    }
+
+    #[Test]
+    #[TestDox('Throws exception when constructed with zero decay')]
+    public function testConstructorWithZeroDecay() : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+
+        new RMSProp(rate: 0.001, decay: 0.0);
+    }
+
+    #[Test]
+    #[TestDox('Throws exception when constructed with decay equal to 1')]
+    public function testConstructorWithDecayEqualToOne() : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+
+        new RMSProp(rate: 0.001, decay: 1.0);
+    }
+
+    #[Test]
+    #[TestDox('Throws exception when constructed with decay greater than 1')]
+    public function testConstructorWithDecayGreaterThanOne() : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+
+        new RMSProp(rate: 0.001, decay: 1.5);
+    }
+
+    #[Test]
+    #[TestDox('Throws exception when constructed with negative decay')]
+    public function testConstructorWithNegativeDecay() : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+
+        new RMSProp(rate: 0.001, decay: -0.1);
+    }
+
+    #[Test]
+    #[TestDox('Can be cast to a string')]
+    public function testToString() : void
+    {
+        self::assertEquals('RMS Prop (rate: 0.001, decay: 0.1)', (string) $this->optimizer);
+    }
+
+    /**
+     * @param Parameter $param
+     * @param NDArray $gradient
+     * @param list<list<float>> $expected
+     */
+    #[DataProvider('stepProvider')]
+    public function testStep(Parameter $param, NDArray $gradient, array $expected) : void
+    {
+        $this->optimizer->warm($param);
+
+        $step = $this->optimizer->step(param: $param, gradient: $gradient);
+
+        self::assertEqualsWithDelta($expected, $step->toArray(), 1e-7);
+    }
+}

From cccfa79140a0769604b5d92e36dd300a89405dff Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Fri, 7 Nov 2025 17:45:33 +0200
Subject: [PATCH 03/17] 390 added math explanation for step() methods

---
 src/NeuralNet/Optimizers/StepDecay/StepDecay.php   | 12 ++++++++++++
 src/NeuralNet/Optimizers/Stochastic/Stochastic.php |  7 +++++++
 2 files changed, 19 insertions(+)

diff --git a/src/NeuralNet/Optimizers/StepDecay/StepDecay.php b/src/NeuralNet/Optimizers/StepDecay/StepDecay.php
index 246adc6c7..abfeb6f7e 100644
--- a/src/NeuralNet/Optimizers/StepDecay/StepDecay.php
+++ b/src/NeuralNet/Optimizers/StepDecay/StepDecay.php
@@ -84,6 +84,18 @@ public function __construct(float $rate = 0.01, int $losses = 100, float $decay
     /**
      * Take a step of gradient descent for a given parameter.
      *
+     * Step Decay update (element-wise):
+     *   floor = ⌊t / k⌋
+     *   η_t = η₀ / (1 + floor · λ)
+     *   Δθ_t = η_t · g_t
+     *
+     * where:
+     *   - t is the current step number,
+     *   - k is the number of steps per floor,
+     *   - η₀ is the initial learning rate,
+     *   - λ is the decay factor,
+     *   - g_t is the current gradient.
+     *
      * @internal
      *
      * @param Parameter $param
diff --git a/src/NeuralNet/Optimizers/Stochastic/Stochastic.php b/src/NeuralNet/Optimizers/Stochastic/Stochastic.php
index ffd9daf30..004489a78 100644
--- a/src/NeuralNet/Optimizers/Stochastic/Stochastic.php
+++ b/src/NeuralNet/Optimizers/Stochastic/Stochastic.php
@@ -44,6 +44,13 @@ public function __construct(float $rate = 0.01)
     /**
      * Take a step of gradient descent for a given parameter.
      *
+     * SGD update (element-wise):
+     *   Δθ_t = η · g_t
+     *
+     * where:
+     *   - g_t is the current gradient,
+     *   - η is the learning rate.
+     *
      * @internal
      *
      * @param Parameter $param

From f1c55e67537832e5e49a299a639fd3cfe163d5d2 Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Fri, 7 Nov 2025 23:02:43 +0200
Subject: [PATCH 04/17] 390 convert Momentum to Numpower

---
 docs/neural-network/optimizers/momentum.md    |   2 +-
 .../Optimizers/Momentum/Momentum.php          | 164 ++++++++++++++++++
 src/NeuralNet/Optimizers/RMSProp/RMSProp.php  |   2 +-
 .../Optimizers/Momentum/MomentumTest.php      | 154 ++++++++++++++++
 .../Optimizers/RMSProp/RMSPropTest.php        |  27 +++
 5 files changed, 347 insertions(+), 2 deletions(-)
 create mode 100644 src/NeuralNet/Optimizers/Momentum/Momentum.php
 create mode 100644 tests/NeuralNet/Optimizers/Momentum/MomentumTest.php

diff --git a/docs/neural-network/optimizers/momentum.md b/docs/neural-network/optimizers/momentum.md
index 7556ca008..017cf0efa 100644
--- a/docs/neural-network/optimizers/momentum.md
+++ b/docs/neural-network/optimizers/momentum.md
@@ -12,7 +12,7 @@ Momentum accelerates each update step by accumulating velocity from past updates
 
 ## Example
 ```php
-use Rubix\ML\NeuralNet\Optimizers\Momentum;
+use Rubix\ML\NeuralNet\Optimizers\Momentum\Momentum;
 
 $optimizer = new Momentum(0.01, 0.1, true);
 ```
diff --git a/src/NeuralNet/Optimizers/Momentum/Momentum.php b/src/NeuralNet/Optimizers/Momentum/Momentum.php
new file mode 100644
index 000000000..05e62fa0b
--- /dev/null
+++ b/src/NeuralNet/Optimizers/Momentum/Momentum.php
@@ -0,0 +1,164 @@
+<?php
+
+namespace Rubix\ML\NeuralNet\Optimizers\Momentum;
+
+use NDArray;
+use NumPower;
+use Rubix\ML\NeuralNet\Optimizers\Base\Adaptive;
+use Rubix\ML\NeuralNet\Optimizers\Base\Optimizer;
+use Rubix\ML\Helpers\Params;
+use Rubix\ML\NeuralNet\Parameters\Parameter;
+use Rubix\ML\Exceptions\InvalidArgumentException;
+use Rubix\ML\Exceptions\RuntimeException;
+
+use function get_class;
+
+/**
+ * Momentum
+ *
+ * Momentum adds velocity to each step until exhausted. It does so by accumulating momentum from past updates and adding
+ * a factor of the previous velocity to the current step.
+ *
+ * References:
+ * [1] D. E. Rumelhart et al. (1988). Learning representations by back-propagating errors.
+ * [2] I. Sutskever et al. (2013). On the importance of initialization and momentum in deep learning.
+ *
+ * @category    Machine Learning
+ * @package     Rubix/ML
+ * @author      Andrew DalPino
+ * @author      Samuel Akopyan <leumas.a@gmail.com>
+ */
+class Momentum implements Optimizer, Adaptive
+{
+    /**
+     * The learning rate that controls the global step size.
+     *
+     * @var float
+     */
+    protected float $rate;
+
+    /**
+     * The rate at which the momentum force decays.
+     *
+     * @var float
+     */
+    protected float $decay;
+
+    /**
+     * Should we employ Nesterov's lookahead (NAG) when updating the parameters?
+     *
+     * @var bool
+     */
+    protected bool $lookahead;
+
+    /**
+     * The parameter cache of velocity NDArrays.
+     *
+     * @var NDArray[]
+     */
+    protected array $cache = [
+        //
+    ];
+
+    /**
+     * @param float $rate
+     * @param float $decay
+     * @param bool $lookahead
+     * @throws InvalidArgumentException
+     */
+    public function __construct(float $rate = 0.001, float $decay = 0.1, bool $lookahead = false)
+    {
+        if ($rate <= 0.0) {
+            throw new InvalidArgumentException(
+                "Learning rate must be greater than 0, $rate given."
+            );
+        }
+
+        if ($decay <= 0.0 or $decay >= 1.0) {
+            throw new InvalidArgumentException(
+                "Decay must be between 0 and 1, $decay given."
+            );
+        }
+
+        $this->rate = $rate;
+        $this->decay = $decay;
+        $this->lookahead = $lookahead;
+    }
+
+    /**
+     * Warm the cache.
+     *
+     * @internal
+     *
+     * @param Parameter $param
+     * @throws RuntimeException
+     */
+    public function warm(Parameter $param) : void
+    {
+        $class = get_class($param->param());
+
+        if (!$class) {
+            throw new RuntimeException('Could not locate parameter class.');
+        }
+
+        $this->cache[$param->id()] = NumPower::zeros($param->param()->shape());
+    }
+
+    /**
+     * Take a step of gradient descent for a given parameter.
+     *
+     * Mathematical formulation (per-parameter element):
+     * - Velocity update: v_t = β · v_{t-1} + η · g_t
+     *   where β = 1 − decay and η = rate, and g_t is the current gradient.
+     * - Returned step (the amount added to the parameter by the trainer): Δθ_t = v_t
+     *
+     * Nesterov lookahead (when lookahead = true):
+     * - We apply the same velocity update a second time to approximate NAG:
+     *   v_t ← β · v_t + η · g_t
+     *
+     * Notes:
+     * - This method updates and caches the velocity tensor per Parameter id.
+     * - The actual parameter update is performed by the training loop using the returned velocity.
+     *
+     * @internal
+     *
+     * @param Parameter $param
+     * @param NDArray $gradient
+     * @return NDArray
+     */
+    public function step(Parameter $param, NDArray $gradient) : NDArray
+    {
+        $velocity = $this->cache[$param->id()];
+
+        // velocity = gradient * rate + velocity * (1 - decay)
+        $velocity = NumPower::add(
+            NumPower::multiply($gradient, $this->rate),
+            NumPower::multiply($velocity, 1.0 - $this->decay)
+        );
+
+        $this->cache[$param->id()] = $velocity;
+
+        if ($this->lookahead) {
+            // Apply lookahead: velocity = gradient * rate + velocity * (1 - decay)
+            $velocity = NumPower::add(
+                NumPower::multiply($gradient, $this->rate),
+                NumPower::multiply($velocity, 1.0 - $this->decay)
+            );
+        }
+
+        return $velocity;
+    }
+
+    /**
+     * Return the string representation of the object.
+     *
+     * @internal
+     *
+     * @return string
+     */
+    public function __toString() : string
+    {
+        return "Momentum (rate: {$this->rate}, decay: {$this->decay},"
+            . ' lookahead: ' . Params::toString($this->lookahead) . ')';
+    }
+}
diff --git a/src/NeuralNet/Optimizers/RMSProp/RMSProp.php b/src/NeuralNet/Optimizers/RMSProp/RMSProp.php
index 531f3ad80..7c08aebb2 100644
--- a/src/NeuralNet/Optimizers/RMSProp/RMSProp.php
+++ b/src/NeuralNet/Optimizers/RMSProp/RMSProp.php
@@ -56,7 +56,7 @@ class RMSProp implements Optimizer, Adaptive
     /**
      * The cache of running squared gradients.
      *
-     * @var array<NDArray|array>
+     * @var NDArray[]
      */
     protected array $cache = [
         //
diff --git a/tests/NeuralNet/Optimizers/Momentum/MomentumTest.php b/tests/NeuralNet/Optimizers/Momentum/MomentumTest.php
new file mode 100644
index 000000000..1b2a90378
--- /dev/null
+++ b/tests/NeuralNet/Optimizers/Momentum/MomentumTest.php
@@ -0,0 +1,154 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Rubix\ML\Tests\NeuralNet\Optimizers\Momentum;
+
+use Generator;
+use NDArray;
+use NumPower;
+use PHPUnit\Framework\Attributes\CoversClass;
+use PHPUnit\Framework\Attributes\DataProvider;
+use PHPUnit\Framework\Attributes\Group;
+use PHPUnit\Framework\Attributes\Test;
+use PHPUnit\Framework\Attributes\TestDox;
+use PHPUnit\Framework\TestCase;
+use Rubix\ML\Exceptions\InvalidArgumentException;
+use Rubix\ML\NeuralNet\Optimizers\Momentum\Momentum;
+use Rubix\ML\NeuralNet\Parameters\Parameter;
+
+#[Group('Optimizers')]
+#[CoversClass(Momentum::class)]
+class MomentumTest extends TestCase
+{
+    protected Momentum $optimizer;
+
+    public static function stepProvider() : Generator
+    {
+        yield [
+            new Parameter(NumPower::array([
+                [0.1, 0.6, -0.4],
+                [0.5, 0.6, -0.4],
+                [0.1, 0.1, -0.7],
+            ])),
+            NumPower::array([
+                [0.01, 0.05, -0.02],
+                [-0.01, 0.02, 0.03],
+                [0.04, -0.01, -0.5],
+            ]),
+            [
+                [0.00001, 0.00005, -0.00002],
+                [-0.00001, 0.00002, 0.00003],
+                [0.00004, -0.00001, -0.0005],
+            ],
+        ];
+    }
+
+    protected function setUp() : void
+    {
+        $this->optimizer = new Momentum(rate: 0.001, decay: 0.1, lookahead: false);
+    }
+
+    #[Test]
+    #[TestDox('Throws exception when constructed with zero rate')]
+    public function testConstructorWithZeroRate() : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+
+        new Momentum(rate: 0.0);
+    }
+
+    #[Test]
+    #[TestDox('Throws exception when constructed with negative rate')]
+    public function testConstructorWithNegativeRate() : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+
+        new Momentum(rate: -0.001);
+    }
+
+    #[Test]
+    #[TestDox('Throws exception when constructed with zero decay')]
+    public function testConstructorWithZeroDecay() : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+
+        new Momentum(rate: 0.001, decay: 0.0);
+    }
+
+    #[Test]
+    #[TestDox('Throws exception when constructed with decay equal to 1')]
+    public function testConstructorWithDecayEqualToOne() : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+
+        new Momentum(rate: 0.001, decay: 1.0);
+    }
+
+    #[Test]
+    #[TestDox('Throws exception when constructed with decay greater than 1')]
+    public function testConstructorWithDecayGreaterThanOne() : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+
+        new Momentum(rate: 0.001, decay: 1.5);
+    }
+
+    #[Test]
+    #[TestDox('Throws exception when constructed with negative decay')]
+    public function testConstructorWithNegativeDecay() : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+
+        new Momentum(rate: 0.001, decay: -0.1);
+    }
+
+    #[Test]
+    #[TestDox('Can be cast to a string')]
+    public function testToString() : void
+    {
+        self::assertEquals('Momentum (rate: 0.001, decay: 0.1, lookahead: false)', (string) $this->optimizer);
+    }
+
+    #[Test]
+    #[TestDox('Warm initializes a zeroed velocity cache with the parameter\'s shape')]
+    public function testWarmInitializesZeroedCache() : void
+    {
+        $param = new Parameter(NumPower::array([
+            [1.0, 2.0, 3.0],
+            [4.0, 5.0, 6.0],
+        ]));
+
+        // Warm the optimizer for this parameter
+        $this->optimizer->warm($param);
+
+        // Use reflection to read the protected cache
+        $ref = new \ReflectionClass($this->optimizer);
+        $prop = $ref->getProperty('cache');
+        $prop->setAccessible(true);
+        $cache = $prop->getValue($this->optimizer);
+
+        self::assertArrayHasKey($param->id(), $cache);
+
+        $velocity = $cache[$param->id()];
+
+        // Verify the velocity is an all-zeros tensor of the correct shape
+        $zeros = NumPower::zeros($param->param()->shape());
+        self::assertEqualsWithDelta($zeros->toArray(), $velocity->toArray(), 0.0);
+    }
+
+    /**
+     * @param Parameter $param
+     * @param NDArray $gradient
+     * @param list<list<float>> $expected
+     */
+    #[DataProvider('stepProvider')]
+    public function testStep(Parameter $param, NDArray $gradient, array $expected) : void
+    {
+        $this->optimizer->warm($param);
+
+        $step = $this->optimizer->step(param: $param, gradient: $gradient);
+
+        self::assertEqualsWithDelta($expected, $step->toArray(), 1e-7);
+    }
+}
diff --git a/tests/NeuralNet/Optimizers/RMSProp/RMSPropTest.php b/tests/NeuralNet/Optimizers/RMSProp/RMSPropTest.php
index 456bd54c0..09d43ac24 100644
--- a/tests/NeuralNet/Optimizers/RMSProp/RMSPropTest.php
+++ b/tests/NeuralNet/Optimizers/RMSProp/RMSPropTest.php
@@ -110,6 +110,33 @@ public function testToString() : void
         self::assertEquals('RMS Prop (rate: 0.001, decay: 0.1)', (string) $this->optimizer);
     }
 
+    #[Test]
+    #[TestDox('Warm initializes a zeroed velocity cache with the parameter\'s shape')]
+    public function testWarmInitializesZeroedCache() : void
+    {
+        $param = new Parameter(NumPower::array([
+            [1.0, 2.0, 3.0],
+            [4.0, 5.0, 6.0],
+        ]));
+
+        // Warm the optimizer for this parameter
+        $this->optimizer->warm($param);
+
+        // Use reflection to read the protected cache
+        $ref = new \ReflectionClass($this->optimizer);
+        $prop = $ref->getProperty('cache');
+        $prop->setAccessible(true);
+        $cache = $prop->getValue($this->optimizer);
+
+        self::assertArrayHasKey($param->id(), $cache);
+
+        $velocity = $cache[$param->id()];
+
+        // Verify the velocity is an all-zeros tensor of the correct shape
+        $zeros = NumPower::zeros($param->param()->shape());
+        self::assertEqualsWithDelta($zeros->toArray(), $velocity->toArray(), 0.0);
+    }
+
     /**
      * @param Parameter $param
      * @param NDArray $gradient

From 919ce3629cf005ec20fb9f8a4070e2dc2d1835be Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Sat, 8 Nov 2025 18:54:42 +0200
Subject: [PATCH 05/17] 390 convert Cyclical to NumPower

---
 docs/neural-network/optimizers/cyclical.md    |  26 ++-
 .../Optimizers/Cyclical/Cyclical.php          | 166 ++++++++++++++++++
 .../Optimizers/Cyclical/CyclicalTest.php      | 143 +++++++++++++++
 3 files changed, 332 insertions(+), 3 deletions(-)
 create mode 100644 src/NeuralNet/Optimizers/Cyclical/Cyclical.php
 create mode 100644 tests/NeuralNet/Optimizers/Cyclical/CyclicalTest.php

diff --git a/docs/neural-network/optimizers/cyclical.md b/docs/neural-network/optimizers/cyclical.md
index 9773004da..f6d3940db 100644
--- a/docs/neural-network/optimizers/cyclical.md
+++ b/docs/neural-network/optimizers/cyclical.md
@@ -1,8 +1,28 @@
-<span style="float:right;"><a href="https://github.com/RubixML/ML/blob/master/src/NeuralNet/Optimizers/Cyclical.php">[source]</a></span>
+<span style="float:right;"><a href="https://github.com/RubixML/ML/blob/master/src/NeuralNet/Optimizers/Cyclical/Cyclical.php">[source]</a></span>
 
 # Cyclical
 The Cyclical optimizer uses a global learning rate that cycles between the lower and upper bound over a designated period while also decaying the upper bound by a factor at each step. Cyclical learning rates have been shown to help escape bad local minima and saddle points of the gradient.
 
+## Mathematical formulation
+Per step (element-wise), the cyclical learning rate and update are computed as:
+
+$$
+\begin{aligned}
+\text{cycle} &= \left\lfloor 1 + \frac{t}{2\,\text{steps}} \right\rfloor \\
+x &= \left| \frac{t}{\text{steps}} - 2\,\text{cycle} + 1 \right| \\
+\text{scale} &= \text{decay}^{\,t} \\
+\eta_t &= \text{lower} + (\text{upper} - \text{lower})\,\max\bigl(0\,1 - x\bigr)\,\text{scale} \\
+\Delta\theta_t &= \eta_t\,g_t
+\end{aligned}
+$$
+
+where:
+- `t` is the current step counter,
+- `steps` is the number of steps in every half cycle,
+- `lower` and `upper` are the learning rate bounds,
+- `decay` is the multiplicative decay applied each step,
+- `g_t` is the current gradient.
+
 ## Parameters
 | # | Name | Default | Type | Description |
 |---|---|---|---|---|
@@ -13,10 +33,10 @@ The Cyclical optimizer uses a global learning rate that cycles between the lower
 
 ## Example
 ```php
-use Rubix\ML\NeuralNet\Optimizers\Cyclical;
+use Rubix\ML\NeuralNet\Optimizers\Cyclical\Cyclical;
 
 $optimizer = new Cyclical(0.001, 0.005, 1000);
 ```
 
 ## References
-[^1]: L. N. Smith. (2017). Cyclical Learning Rates for Training Neural Networks.
\ No newline at end of file
+[^1]: L. N. Smith. (2017). Cyclical Learning Rates for Training Neural Networks.
diff --git a/src/NeuralNet/Optimizers/Cyclical/Cyclical.php b/src/NeuralNet/Optimizers/Cyclical/Cyclical.php
new file mode 100644
index 000000000..ac22d9d52
--- /dev/null
+++ b/src/NeuralNet/Optimizers/Cyclical/Cyclical.php
@@ -0,0 +1,166 @@
+<?php
+
+namespace Rubix\ML\NeuralNet\Optimizers\Cyclical;
+
+use NDArray;
+use NumPower;
+use Rubix\ML\NeuralNet\Optimizers\Base\Optimizer;
+use Rubix\ML\NeuralNet\Parameters\Parameter;
+use Rubix\ML\Exceptions\InvalidArgumentException;
+
+/**
+ * Cyclical
+ *
+ * The Cyclical optimizer uses a global learning rate that cycles between the
+ * lower and upper bound over a designated period while also decaying the
+ * upper bound by the decay coefficient at each step. Cyclical learning rates
+ * have been shown to help escape bad local minima and saddle points thus
+ * achieving lower training loss.
+ *
+ * References:
+ * [1] L. N. Smith. (2017). Cyclical Learning Rates for Training Neural Networks.
+ *
+ * @category    Machine Learning
+ * @package     Rubix/ML
+ * @author      Andrew DalPino
+ * @author      Samuel Akopyan <leumas.a@gmail.com>
+ */
+class Cyclical implements Optimizer
+{
+    /**
+     * The lower bound on the learning rate.
+     *
+     * @var float
+     */
+    protected float $lower;
+
+    /**
+     * The upper bound on the learning rate.
+     *
+     * @var float
+     */
+    protected float $upper;
+
+    /**
+     * The range of the learning rate.
+     *
+     * @var float
+     */
+    protected float $range;
+
+    /**
+     * The number of steps in every cycle.
+     *
+     * @var int
+     */
+    protected int $losses;
+
+    /**
+     * The exponential scaling factor applied to each step as decay.
+     *
+     * @var float
+     */
+    protected float $decay;
+
+    /**
+     * The number of steps taken so far.
+     *
+     * @var int
+     */
+    protected int $t = 0;
+
+    /**
+     * @param float $lower
+     * @param float $upper
+     * @param int $losses
+     * @param float $decay
+     * @throws InvalidArgumentException
+     */
+    public function __construct(
+        float $lower = 0.001,
+        float $upper = 0.006,
+        int $losses = 2000,
+        float $decay = 0.99994
+    ) {
+        if ($lower <= 0.0) {
+            throw new InvalidArgumentException(
+                "Lower bound must be greater than 0, $lower given."
+            );
+        }
+
+        if ($lower > $upper) {
+            throw new InvalidArgumentException(
+                'Lower bound cannot be reater than the upper bound.'
+            );
+        }
+
+        if ($losses < 1) {
+            throw new InvalidArgumentException(
+                "The number of steps per cycle must be greater than 0, $losses given."
+            );
+        }
+
+        if ($decay <= 0.0 or $decay >= 1.0) {
+            throw new InvalidArgumentException(
+                "Decay must be between 0 and 1, $decay given."
+            );
+        }
+
+        $this->lower = $lower;
+        $this->upper = $upper;
+        $this->range = $upper - $lower;
+        $this->losses = $losses;
+        $this->decay = $decay;
+    }
+
+    /**
+     * Take a step of gradient descent for a given parameter.
+     *
+     * Cyclical learning rate schedule (per-step, element-wise update):
+     *   - Cycle index:           cycle = floor(1 + t / (2 · losses))
+     *   - Triangular position:   x     = | t / losses − 2 · cycle + 1 |
+     *   - Exponential decay:     scale = decay^t
+     *   - Learning rate at t:    η_t   = lower + (upper − lower) · max(0, 1 − x) · scale
+     *   - Returned step:         Δθ_t  = η_t · g_t
+     *
+     * where:
+     *   - t is the current step counter (incremented after computing η_t),
+     *   - losses is the number of steps per cycle,
+     *   - lower and upper are the learning rate bounds,
+     *   - decay is the multiplicative decay applied each step,
+     *   - g_t is the current gradient.
+     *
+     * @internal
+     *
+     * @param Parameter $param
+     * @param NDArray $gradient
+     * @return NDArray
+     */
+    public function step(Parameter $param, NDArray $gradient) : NDArray
+    {
+        $cycle = floor(1 + $this->t / (2 * $this->losses));
+
+        $x = abs($this->t / $this->losses - 2 * $cycle + 1);
+
+        $scale = $this->decay ** $this->t;
+
+        $rate = $this->lower + $this->range * max(0, 1 - $x) * $scale;
+
+        ++$this->t;
+
+        return NumPower::multiply($gradient, $rate);
+    }
+
+    /**
+     * Return the string representation of the object.
+     *
+     * @internal
+     *
+     * @return string
+     */
+    public function __toString() : string
+    {
+        return "Cyclical (lower: {$this->lower}, upper: {$this->upper},"
+            . " steps: {$this->losses}, decay: {$this->decay})";
+    }
+}
diff --git a/tests/NeuralNet/Optimizers/Cyclical/CyclicalTest.php b/tests/NeuralNet/Optimizers/Cyclical/CyclicalTest.php
new file mode 100644
index 000000000..7d1691fe8
--- /dev/null
+++ b/tests/NeuralNet/Optimizers/Cyclical/CyclicalTest.php
@@ -0,0 +1,143 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Rubix\ML\Tests\NeuralNet\Optimizers\Cyclical;
+
+use Generator;
+use NDArray;
+use NumPower;
+use PHPUnit\Framework\Attributes\CoversClass;
+use PHPUnit\Framework\Attributes\DataProvider;
+use PHPUnit\Framework\Attributes\Group;
+use PHPUnit\Framework\Attributes\Test;
+use PHPUnit\Framework\Attributes\TestDox;
+use PHPUnit\Framework\TestCase;
+use Rubix\ML\Exceptions\InvalidArgumentException;
+use Rubix\ML\NeuralNet\Optimizers\Cyclical\Cyclical;
+use Rubix\ML\NeuralNet\Parameters\Parameter;
+
+#[Group('Optimizers')]
+#[CoversClass(Cyclical::class)]
+class CyclicalTest extends TestCase
+{
+    protected Cyclical $optimizer;
+
+    public static function stepProvider() : Generator
+    {
+        yield [
+            new Parameter(NumPower::array([
+                [0.1, 0.6, -0.4],
+                [0.5, 0.6, -0.4],
+                [0.1, 0.1, -0.7],
+            ])),
+            NumPower::array([
+                [0.01, 0.05, -0.02],
+                [-0.01, 0.02, 0.03],
+                [0.04, -0.01, -0.5],
+            ]),
+            [
+                [0.00001, 0.00005, -0.00002],
+                [-0.00001, 0.00002, 0.00003],
+                [0.00004, -0.00001, -0.0005],
+            ]
+        ];
+    }
+
+    protected function setUp() : void
+    {
+        $this->optimizer = new Cyclical(lower: 0.001, upper: 0.006, losses: 2000);
+    }
+
+    #[Test]
+    #[TestDox('Throws exception when constructed with zero lower bound')]
+    public function testConstructorWithZeroLower() : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+        new Cyclical(lower: 0.0, upper: 0.006, losses: 2000);
+    }
+
+    #[Test]
+    #[TestDox('Throws exception when constructed with negative lower bound')]
+    public function testConstructorWithNegativeLower() : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+        new Cyclical(lower: -0.001, upper: 0.006, losses: 2000);
+    }
+
+    #[Test]
+    #[TestDox('Throws exception when lower bound is greater than upper bound')]
+    public function testConstructorWithLowerGreaterThanUpper() : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+        new Cyclical(lower: 0.01, upper: 0.006, losses: 2000);
+    }
+
+    #[Test]
+    #[TestDox('Throws exception when constructed with zero steps per cycle')]
+    public function testConstructorWithZeroSteps() : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+        new Cyclical(lower: 0.001, upper: 0.006, losses: 0);
+    }
+
+    #[Test]
+    #[TestDox('Throws exception when constructed with negative steps per cycle')]
+    public function testConstructorWithNegativeSteps() : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+        new Cyclical(lower: 0.001, upper: 0.006, losses: -5);
+    }
+
+    #[Test]
+    #[TestDox('Throws exception when constructed with zero decay')]
+    public function testConstructorWithZeroDecay() : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+        new Cyclical(lower: 0.001, upper: 0.006, losses: 2000, decay: 0.0);
+    }
+
+    #[Test]
+    #[TestDox('Throws exception when constructed with decay equal to 1')]
+    public function testConstructorWithDecayEqualToOne() : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+        new Cyclical(lower: 0.001, upper: 0.006, losses: 2000, decay: 1.0);
+    }
+
+    #[Test]
+    #[TestDox('Throws exception when constructed with decay greater than 1')]
+    public function testConstructorWithDecayGreaterThanOne() : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+        new Cyclical(lower: 0.001, upper: 0.006, losses: 2000, decay: 1.5);
+    }
+
+    #[Test]
+    #[TestDox('Throws exception when constructed with negative decay')]
+    public function testConstructorWithNegativeDecay() : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+        new Cyclical(lower: 0.001, upper: 0.006, losses: 2000, decay: -0.1);
+    }
+
+    #[Test]
+    #[TestDox('Can be cast to a string')]
+    public function testToString() : void
+    {
+        self::assertEquals('Cyclical (lower: 0.001, upper: 0.006, steps: 2000, decay: 0.99994)', (string) $this->optimizer);
+    }
+
+    /**
+     * @param Parameter $param
+     * @param NDArray $gradient
+     * @param list<list<float>> $expected
+     */
+    #[DataProvider('stepProvider')]
+    public function testStep(Parameter $param, NDArray $gradient, array $expected) : void
+    {
+        $step = $this->optimizer->step(param: $param, gradient: $gradient);
+
+        self::assertEqualsWithDelta($expected, $step->toArray(), 1e-7);
+    }
+}

From d806494ef7db5ca520d1c121bda49f1d63724fb4 Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Sat, 8 Nov 2025 18:59:59 +0200
Subject: [PATCH 06/17] 390 added math formulas to momentum.md

---
 docs/neural-network/optimizers/momentum.md | 27 +++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/docs/neural-network/optimizers/momentum.md b/docs/neural-network/optimizers/momentum.md
index 017cf0efa..f949a4115 100644
--- a/docs/neural-network/optimizers/momentum.md
+++ b/docs/neural-network/optimizers/momentum.md
@@ -1,8 +1,33 @@
-<span style="float:right;"><a href="https://github.com/RubixML/ML/blob/master/src/NeuralNet/Optimizers/Momentum.php">[source]</a></span>
+<span style="float:right;"><a href="https://github.com/RubixML/ML/blob/master/src/NeuralNet/Optimizers/Momentum/Momentum.php">[source]</a></span>
 
 # Momentum
 Momentum accelerates each update step by accumulating velocity from past updates and adding a factor of the previous velocity to the current step. Momentum can help speed up training and escape bad local minima when compared with [Stochastic](stochastic.md) Gradient Descent.
 
+## Mathematical formulation
+Per step (element-wise), Momentum updates the velocity and applies it as the parameter step:
+
+$$
+\begin{aligned}
+\beta &= 1 - \text{decay}, \quad \eta = \text{rate} \\
+\text{Velocity update:}\quad v_t &= \beta\,v_{t-1} + \eta\,g_t \\
+\text{Returned step:}\quad \Delta\theta_t &= v_t
+\end{aligned}
+$$
+
+Nesterov lookahead (when `lookahead = true`) is approximated by applying the velocity update a second time:
+
+$$
+\begin{aligned}
+v_t &\leftarrow \beta\,v_t + \eta\,g_t
+\end{aligned}
+$$
+
+where:
+- `g_t` is the current gradient,
+- `v_t` is the velocity (accumulated update),
+- `\beta` is the momentum coefficient (`1 − decay`),
+- `\eta` is the learning rate (`rate`).
+
 ## Parameters
 | # | Name | Default | Type | Description |
 |---|---|---|---|---|

From 3fa08ec89e9132a43c5ab4e34319404ce8f140d9 Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Sat, 8 Nov 2025 19:10:18 +0200
Subject: [PATCH 07/17] 390 added math formulas to rms-prop.md

---
 docs/neural-network/optimizers/cyclical.md | 10 +++++-----
 docs/neural-network/optimizers/momentum.md |  8 ++++----
 docs/neural-network/optimizers/rms-prop.md | 22 ++++++++++++++++++++--
 3 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/docs/neural-network/optimizers/cyclical.md b/docs/neural-network/optimizers/cyclical.md
index f6d3940db..eed8b2779 100644
--- a/docs/neural-network/optimizers/cyclical.md
+++ b/docs/neural-network/optimizers/cyclical.md
@@ -17,11 +17,11 @@ x &= \left| \frac{t}{\text{steps}} - 2\,\text{cycle} + 1 \right| \\
 $$
 
 where:
-- `t` is the current step counter,
-- `steps` is the number of steps in every half cycle,
-- `lower` and `upper` are the learning rate bounds,
-- `decay` is the multiplicative decay applied each step,
-- `g_t` is the current gradient.
+- $t$ is the current step counter,
+- $steps$ is the number of steps in every half cycle,
+- $lower$ and $upper$ are the learning rate bounds,
+- $decay$ is the multiplicative decay applied each step,
+- $g_t$ is the current gradient.
 
 ## Parameters
 | # | Name | Default | Type | Description |
diff --git a/docs/neural-network/optimizers/momentum.md b/docs/neural-network/optimizers/momentum.md
index f949a4115..e9c787a2f 100644
--- a/docs/neural-network/optimizers/momentum.md
+++ b/docs/neural-network/optimizers/momentum.md
@@ -23,10 +23,10 @@ v_t &\leftarrow \beta\,v_t + \eta\,g_t
 $$
 
 where:
-- `g_t` is the current gradient,
-- `v_t` is the velocity (accumulated update),
-- `\beta` is the momentum coefficient (`1 − decay`),
-- `\eta` is the learning rate (`rate`).
+- $g_t$ is the current gradient,
+- $v_t$ is the velocity (accumulated update),
+- $\beta$ is the momentum coefficient ($1 − decay$),
+- $\eta$ is the learning rate ($rate$).
 
 ## Parameters
 | # | Name | Default | Type | Description |
diff --git a/docs/neural-network/optimizers/rms-prop.md b/docs/neural-network/optimizers/rms-prop.md
index ae6b847bc..c531a863e 100644
--- a/docs/neural-network/optimizers/rms-prop.md
+++ b/docs/neural-network/optimizers/rms-prop.md
@@ -1,7 +1,25 @@
-<span style="float:right;"><a href="https://github.com/RubixML/ML/blob/master/src/NeuralNet/Optimizers/RMSProp.php">[source]</a></span>
+<span style="float:right;"><a href="https://github.com/RubixML/ML/blob/master/src/NeuralNet/Optimizers/RMSProp/RMSProp.php">[source]</a></span>
 
 # RMS Prop
-An adaptive gradient technique that divides the current gradient over a rolling window of the magnitudes of recent gradients. Unlike [AdaGrad](adagrad.md), RMS Prop does not suffer from an infinitely decaying step size.
+An adaptive gradient technique that divides the current gradient over a rolling window of magnitudes of recent gradients. Unlike [AdaGrad](adagrad.md), RMS Prop does not suffer from an infinitely decaying step size.
+
+## Mathematical formulation
+Per step (element-wise), RMSProp maintains a running average of squared gradients and scales the step by the root-mean-square:
+
+$$
+\begin{aligned}
+\rho &= 1 - \text{decay}, \quad \eta = \text{rate} \\
+\text{Running average:}\quad v_t &= \rho\,v_{t-1} + (1 - \rho)\,g_t^{\,2} \\
+\text{Returned step:}\quad \Delta\theta_t &= \frac{\eta\,g_t}{\max\bigl(\sqrt{v_t},\,\varepsilon\bigr)}
+\end{aligned}
+$$
+
+where:
+- $g_t$ - is the current gradient,
+- $v_t$ - is the running average of squared gradients,
+- $\rho$ - is the averaging coefficient ($1 − decay$),
+- $\eta$ - is the learning rate ($rate$),
+- $\varepsilon$ - is a small constant to avoid division by zero (implemented by clipping $\sqrt{v_t}$ to $[ε, +∞)$).
 
 ## Parameters
 | # | Name | Default | Type | Description |

From 537b586d807fae161b3c6f7760d3a495e78cce52 Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Sat, 8 Nov 2025 19:19:08 +0200
Subject: [PATCH 08/17] 390 added math formulas to stochastic.md

---
 docs/neural-network/optimizers/step-decay.md  | 20 ++++++++++++++++++-
 docs/neural-network/optimizers/stochastic.md  | 14 +++++++++++++
 .../Optimizers/Stochastic/Stochastic.php      |  4 +++-
 3 files changed, 36 insertions(+), 2 deletions(-)

diff --git a/docs/neural-network/optimizers/step-decay.md b/docs/neural-network/optimizers/step-decay.md
index 0ec9395cc..f5da99c8b 100644
--- a/docs/neural-network/optimizers/step-decay.md
+++ b/docs/neural-network/optimizers/step-decay.md
@@ -1,8 +1,26 @@
-<span style="float:right;"><a href="https://github.com/RubixML/ML/blob/master/src/NeuralNet/Optimizers/StepDecay.php">[source]</a></span>
+<span style="float:right;"><a href="https://github.com/RubixML/ML/blob/master/src/NeuralNet/Optimizers/StepDecay/StepDecay.php">[source]</a></span>
 
 # Step Decay
 A learning rate decay optimizer that reduces the global learning rate by a factor whenever it reaches a new *floor*. The number of steps needed to reach a new floor is defined by the *steps* hyper-parameter.
 
+## Mathematical formulation
+Per step (element-wise), the Step Decay learning rate and update are:
+
+$$
+\begin{aligned}
+\text{floor} &= \left\lfloor \frac{t}{k} \right\rfloor \\
+\eta_t &= \frac{\eta_0}{1 + \text{floor}\cdot \lambda} \\
+\Delta\theta_t &= \eta_t\,g_t
+\end{aligned}
+$$
+
+where:
+- $t$ is the current step number,
+- $k$ is the number of steps per floor,
+- $\eta_0$ is the initial learning rate ($rate$),
+- $\lambda$ is the decay factor ($decay$),
+- $g_t$ is the current gradient.
+
 ## Parameters
 | # | Name | Default | Type | Description |
 |---|---|---|---|---|
diff --git a/docs/neural-network/optimizers/stochastic.md b/docs/neural-network/optimizers/stochastic.md
index 4422e0ddc..bb0096b87 100644
--- a/docs/neural-network/optimizers/stochastic.md
+++ b/docs/neural-network/optimizers/stochastic.md
@@ -3,6 +3,20 @@
 # Stochastic
 A constant learning rate optimizer based on vanilla Stochastic Gradient Descent (SGD).
 
+## Mathematical formulation
+Per step (element-wise), the SGD update scales the gradient by a constant learning rate:
+
+$$
+\begin{aligned}
+\eta &= \text{rate} \\
+\Delta\theta_t &= \eta\,g_t
+\end{aligned}
+$$
+
+where:
+- $g_t$ is the current gradient,
+- $\eta$ is the learning rate ($rate$).
+
 ## Parameters
 | # | Name | Default | Type | Description |
 |---|---|---|---|---|
diff --git a/src/NeuralNet/Optimizers/Stochastic/Stochastic.php b/src/NeuralNet/Optimizers/Stochastic/Stochastic.php
index 004489a78..b2cd6ebac 100644
--- a/src/NeuralNet/Optimizers/Stochastic/Stochastic.php
+++ b/src/NeuralNet/Optimizers/Stochastic/Stochastic.php
@@ -35,7 +35,9 @@ class Stochastic implements Optimizer
     public function __construct(float $rate = 0.01)
     {
         if ($rate <= 0.0) {
-            throw new InvalidArgumentException("Learning rate must be greater than 0, $rate given.");
+            throw new InvalidArgumentException(
+                "Learning rate must be greater than 0, $rate given."
+            );
         }
 
         $this->rate = $rate;

From 331fb3639329c9c021e1a783ea20d8df59815f43 Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Tue, 11 Nov 2025 16:59:27 +0200
Subject: [PATCH 09/17] 390 convert Adam to NumPower

---
 docs/neural-network/optimizers/adam.md       |  27 ++-
 src/NeuralNet/Optimizers/Adam/Adam.php       | 181 +++++++++++++++++++
 tests/NeuralNet/Optimizers/Adam/AdamTest.php | 101 +++++++++++
 3 files changed, 306 insertions(+), 3 deletions(-)
 create mode 100644 src/NeuralNet/Optimizers/Adam/Adam.php
 create mode 100644 tests/NeuralNet/Optimizers/Adam/AdamTest.php

diff --git a/docs/neural-network/optimizers/adam.md b/docs/neural-network/optimizers/adam.md
index 3b9898649..0470a9d4a 100644
--- a/docs/neural-network/optimizers/adam.md
+++ b/docs/neural-network/optimizers/adam.md
@@ -1,8 +1,29 @@
-<span style="float:right;"><a href="https://github.com/RubixML/ML/blob/master/src/NeuralNet/Optimizers/Adam.php">[source]</a></span>
+<span style="float:right;"><a href="https://github.com/RubixML/ML/blob/master/src/NeuralNet/Optimizers/Adam.Adam.php">[source]</a></span>
 
 # Adam
 Short for *Adaptive Moment Estimation*, the Adam Optimizer combines both Momentum and RMS properties. In addition to storing an exponentially decaying average of past squared gradients like [RMSprop](rms-prop.md), Adam also keeps an exponentially decaying average of past gradients, similar to [Momentum](momentum.md). Whereas Momentum can be seen as a ball running down a slope, Adam behaves like a heavy ball with friction.
 
+## Mathematical formulation
+Per step (element-wise), Adam maintains exponentially decaying moving averages of the gradient and its element-wise square and uses them to scale the update:
+
+$$
+\begin{aligned}
+\mathbf{v}_t &= (1 - \beta_1)\,\mathbf{v}_{t-1} + \beta_1\,\mathbf{g}_t \\
+\mathbf{n}_t &= (1 - \beta_2)\,\mathbf{n}_{t-1} + \beta_2\,\mathbf{g}_t^{2} \\
+\Delta{\theta}_t &= \alpha\, \frac{\mathbf{v}_t}{\sqrt{\mathbf{n}_t} + \varepsilon}
+\end{aligned}
+$$
+
+where:
+- $t$ is the current step,
+- $\alpha$ is the learning rate (`rate`),
+- $\beta_1$ is the momentum decay (`momentumDecay`),
+- $\beta_2$ is the norm decay (`normDecay`),
+- $\mathbf{g}_t$ is the current gradient, and $\mathbf{g}_t^{2}$ denotes element-wise square,
+- $\varepsilon$ is a small constant for numerical stability (in the implementation, the denominator is clipped from below by `EPSILON`).
+
+Note: This formulation follows the implementation in Rubix ML and does not include bias-correction terms.
+
 ## Parameters
 | # | Name | Default | Type | Description |
 |---|---|---|---|---|
@@ -12,10 +33,10 @@ Short for *Adaptive Moment Estimation*, the Adam Optimizer combines both Momentu
 
 ## Example
 ```php
-use Rubix\ML\NeuralNet\Optimizers\Adam;
+use Rubix\ML\NeuralNet\Optimizers\Adam\Adam;
 
 $optimizer = new Adam(0.0001, 0.1, 0.001);
 ```
 
 ## References
-[^1]: D. P. Kingma et al. (2014). Adam: A Method for Stochastic Optimization.
\ No newline at end of file
+[^1]: D. P. Kingma et al. (2014). Adam: A Method for Stochastic Optimization.
diff --git a/src/NeuralNet/Optimizers/Adam/Adam.php b/src/NeuralNet/Optimizers/Adam/Adam.php
new file mode 100644
index 000000000..fad8ac1bf
--- /dev/null
+++ b/src/NeuralNet/Optimizers/Adam/Adam.php
@@ -0,0 +1,181 @@
+<?php
+
+namespace Rubix\ML\NeuralNet\Optimizers\Adam;
+
+use NDArray;
+use NumPower;
+use Rubix\ML\NeuralNet\Optimizers\Base\Adaptive;
+use Rubix\ML\NeuralNet\Optimizers\Base\Optimizer;
+use Rubix\ML\NeuralNet\Parameters\Parameter;
+use Rubix\ML\Exceptions\InvalidArgumentException;
+use Rubix\ML\Exceptions\RuntimeException;
+
+use function get_class;
+
+use const Rubix\ML\EPSILON;
+use const PHP_FLOAT_MAX;
+
+/**
+ * Adam
+ *
+ * Short for *Adaptive Moment Estimation*, the Adam Optimizer combines both
+ * Momentum and RMS prop to achieve a balance of velocity and stability. In
+ * addition to storing an exponentially decaying average of past squared
+ * gradients like RMSprop, Adam also keeps an exponentially decaying average
+ * of past gradients, similar to Momentum. Whereas Momentum can be seen as a
+ * ball running down a slope, Adam behaves like a heavy ball with friction.
+ *
+ * References:
+ * [1] D. P. Kingma et al. (2014). Adam: A Method for Stochastic Optimization.
+ *
+ * @category    Machine Learning
+ * @package     Rubix/ML
+ * @author      Andrew DalPino
+ * @author      Samuel Akopyan <leumas.a@gmail.com>
+ */
+class Adam implements Optimizer, Adaptive
+{
+    /**
+     * The learning rate that controls the global step size.
+     *
+     * @var float
+     */
+    protected float $rate;
+
+    /**
+     * The momentum decay rate.
+     *
+     * @var float
+     */
+    protected float $momentumDecay;
+
+    /**
+     * The decay rate of the previous norms.
+     *
+     * @var float
+     */
+    protected float $normDecay;
+
+    /**
+     * The parameter cache of running velocity and squared gradients.
+     *
+     * @var array{0: NDArray, 1: NDArray}[]
+     */
+    protected array $cache = [
+        // id => [velocity, norm]
+    ];
+
+    /**
+     * @param float $rate
+     * @param float $momentumDecay
+     * @param float $normDecay
+     * @throws InvalidArgumentException
+     */
+    public function __construct(float $rate = 0.001, float $momentumDecay = 0.1, float $normDecay = 0.001)
+    {
+        if ($rate <= 0.0) {
+            throw new InvalidArgumentException(
+                "Learning rate must be greater than 0, $rate given."
+            );
+        }
+
+        if ($momentumDecay <= 0.0 or $momentumDecay >= 1.0) {
+            throw new InvalidArgumentException(
+                "Momentum decay must be between 0 and 1, $momentumDecay given."
+            );
+        }
+
+        if ($normDecay <= 0.0 or $normDecay >= 1.0) {
+            throw new InvalidArgumentException(
+                "Norm decay must be between 0 and 1, $normDecay given."
+            );
+        }
+
+        $this->rate = $rate;
+        $this->momentumDecay = $momentumDecay;
+        $this->normDecay = $normDecay;
+    }
+
+    /**
+     * Warm the parameter cache.
+     *
+     * @internal
+     *
+     * @param Parameter $param
+     * @throws RuntimeException
+     */
+    public function warm(Parameter $param) : void
+    {
+        $class = get_class($param->param());
+
+        if (!$class) {
+            throw new RuntimeException('Could not locate parameter class.');
+        }
+
+        $zeros = NumPower::zeros($param->param()->shape());
+
+        $this->cache[$param->id()] = [clone $zeros, $zeros];
+    }
+
+    /**
+     * Take a step of gradient descent for a given parameter.
+     *
+     * Adam update (element-wise):
+     *   v_t = v_{t-1} + β1 · (g_t − v_{t-1})        // exponential moving average of gradients
+     *   n_t = n_{t-1} + β2 · (g_t^2 − n_{t-1})      // exponential moving average of squared gradients
+     *   Δθ_t = η · v_t / max(√n_t, ε)
+     *
+     * where:
+     *   - g_t is the current gradient,
+     *   - v_t is the running average of gradients ("velocity"), β1 = momentumDecay,
+     *   - n_t is the running average of squared gradients ("norm"), β2 = normDecay,
+     *   - η is the learning rate (rate), ε is a small constant to avoid division by zero (implemented by clipping √n_t to [ε, +∞)).
+     *
+     * @internal
+     *
+     * @param Parameter $param
+     * @param NDArray $gradient
+     * @return NDArray
+     */
+    public function step(Parameter $param, NDArray $gradient) : NDArray
+    {
+        [$velocity, $norm] = $this->cache[$param->id()];
+
+        $vHat = NumPower::multiply(
+            NumPower::subtract($gradient, $velocity),
+            $this->momentumDecay
+        );
+
+        $velocity = NumPower::add($velocity, $vHat);
+
+        $nHat = NumPower::multiply(
+            NumPower::subtract(NumPower::square($gradient), $norm),
+            $this->normDecay
+        );
+
+        $norm = NumPower::add($norm, $nHat);
+
+        $this->cache[$param->id()] = [$velocity, $norm];
+
+        $denominator = NumPower::sqrt($norm);
+        $denominator = NumPower::clip($denominator, EPSILON, PHP_FLOAT_MAX);
+
+        return NumPower::divide(
+            NumPower::multiply($velocity, $this->rate),
+            $denominator
+        );
+    }
+
+    /**
+     * Return the string representation of the object.
+     *
+     * @internal
+     *
+     * @return string
+     */
+    public function __toString() : string
+    {
+        return "Adam (rate: {$this->rate}, momentum decay: {$this->momentumDecay},"
+            . " norm decay: {$this->normDecay})";
+    }
+}
diff --git a/tests/NeuralNet/Optimizers/Adam/AdamTest.php b/tests/NeuralNet/Optimizers/Adam/AdamTest.php
new file mode 100644
index 000000000..dbd6cedd6
--- /dev/null
+++ b/tests/NeuralNet/Optimizers/Adam/AdamTest.php
@@ -0,0 +1,101 @@
+<?php
+
+declare(strict_types=1);
+
+namespace Rubix\ML\Tests\NeuralNet\Optimizers\Adam;
+
+use PHPUnit\Framework\Attributes\CoversClass;
+use PHPUnit\Framework\Attributes\DataProvider;
+use PHPUnit\Framework\Attributes\Group;
+use NDArray;
+use NumPower;
+use Rubix\ML\Exceptions\InvalidArgumentException;
+use Rubix\ML\NeuralNet\Parameters\Parameter;
+use Rubix\ML\NeuralNet\Optimizers\Adam\Adam;
+use Rubix\ML\NeuralNet\Optimizers\Base\Adaptive;
+use PHPUnit\Framework\TestCase;
+use Generator;
+
+#[Group('Optimizers')]
+#[CoversClass(Adaptive::class)]
+class AdamTest extends TestCase
+{
+    protected Adam $optimizer;
+
+    public static function invalidConstructorProvider() : Generator
+    {
+        // Invalid rates (<= 0)
+        yield [0.0, 0.1, 0.001];
+        yield [-0.5, 0.1, 0.001];
+
+        // Invalid momentumDecay (<= 0 or >= 1)
+        yield [0.001, 0.0, 0.001];
+        yield [0.001, -0.1, 0.001];
+        yield [0.001, 1.0, 0.001];
+        yield [0.001, 1.1, 0.001];
+
+        // Invalid normDecay (<= 0 or >= 1)
+        yield [0.001, 0.1, 0.0];
+        yield [0.001, 0.1, -0.1];
+        yield [0.001, 0.1, 1.0];
+        yield [0.001, 0.1, 1.1];
+    }
+
+    public static function stepProvider() : Generator
+    {
+        yield [
+            new Parameter(NumPower::array([
+                [0.1, 0.6, -0.4],
+                [0.5, 0.6, -0.4],
+                [0.1, 0.1, -0.7],
+            ])),
+            NumPower::array([
+                [0.01, 0.05, -0.02],
+                [-0.01, 0.02, 0.03],
+                [0.04, -0.01, -0.5],
+            ]),
+            [
+                [0.0031622, 0.0031622, -0.0031622],
+                [-0.0031622, 0.0031622, 0.0031622],
+                [0.0031622, -0.0031622, -0.0031622],
+            ],
+        ];
+    }
+
+    protected function setUp() : void
+    {
+        $this->optimizer = new Adam(
+            rate: 0.001,
+            momentumDecay: 0.1,
+            normDecay: 0.001
+        );
+    }
+
+    public function testToString() : void
+    {
+        $expected = 'Adam (rate: 0.001, momentum decay: 0.1, norm decay: 0.001)';
+        self::assertSame($expected, (string) $this->optimizer);
+    }
+
+    #[DataProvider('invalidConstructorProvider')]
+    public function testInvalidConstructorParams(float $rate, float $momentumDecay, float $normDecay) : void
+    {
+        $this->expectException(InvalidArgumentException::class);
+        new Adam(rate: $rate, momentumDecay: $momentumDecay, normDecay: $normDecay);
+    }
+
+    /**
+     * @param Parameter $param
+     * @param NDArray $gradient
+     * @param list<list<float>> $expected
+     */
+    #[DataProvider('stepProvider')]
+    public function testStep(Parameter $param, NDArray $gradient, array $expected) : void
+    {
+        $this->optimizer->warm($param);
+
+        $step = $this->optimizer->step(param: $param, gradient: $gradient);
+
+        self::assertEqualsWithDelta($expected, $step->toArray(), 1e-7);
+    }
+}

From 47ad66599d0a102df19a1dd8eda120a80d8e18c4 Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Tue, 11 Nov 2025 17:02:17 +0200
Subject: [PATCH 10/17] 390 refactoring CyclicalTest - added dataprovider for
 constructor tests

---
 .../Optimizers/Cyclical/CyclicalTest.php      | 100 ++++++------------
 1 file changed, 31 insertions(+), 69 deletions(-)

diff --git a/tests/NeuralNet/Optimizers/Cyclical/CyclicalTest.php b/tests/NeuralNet/Optimizers/Cyclical/CyclicalTest.php
index 7d1691fe8..5bab9a6c1 100644
--- a/tests/NeuralNet/Optimizers/Cyclical/CyclicalTest.php
+++ b/tests/NeuralNet/Optimizers/Cyclical/CyclicalTest.php
@@ -23,6 +23,19 @@ class CyclicalTest extends TestCase
 {
     protected Cyclical $optimizer;
 
+    public static function invalidConstructorProvider() : Generator
+    {
+        yield 'zero lower' => [0.0, 0.006, 2000, null];
+        yield 'negative lower' => [-0.001, 0.006, 2000, null];
+        yield 'lower > upper' => [0.01, 0.006, 2000, null];
+        yield 'zero steps' => [0.001, 0.006, 0, null];
+        yield 'negative steps' => [0.001, 0.006, -5, null];
+        yield 'zero decay' => [0.001, 0.006, 2000, 0.0];
+        yield 'decay == 1' => [0.001, 0.006, 2000, 1.0];
+        yield 'decay > 1' => [0.001, 0.006, 2000, 1.5];
+        yield 'negative decay' => [0.001, 0.006, 2000, -0.1];
+    }
+
     public static function stepProvider() : Generator
     {
         yield [
@@ -50,82 +63,31 @@ protected function setUp() : void
     }
 
     #[Test]
-    #[TestDox('Throws exception when constructed with zero lower bound')]
-    public function testConstructorWithZeroLower() : void
-    {
-        $this->expectException(InvalidArgumentException::class);
-        new Cyclical(lower: 0.0, upper: 0.006, losses: 2000);
-    }
-
-    #[Test]
-    #[TestDox('Throws exception when constructed with negative lower bound')]
-    public function testConstructorWithNegativeLower() : void
-    {
-        $this->expectException(InvalidArgumentException::class);
-        new Cyclical(lower: -0.001, upper: 0.006, losses: 2000);
-    }
-
-    #[Test]
-    #[TestDox('Throws exception when lower bound is greater than upper bound')]
-    public function testConstructorWithLowerGreaterThanUpper() : void
-    {
-        $this->expectException(InvalidArgumentException::class);
-        new Cyclical(lower: 0.01, upper: 0.006, losses: 2000);
-    }
-
-    #[Test]
-    #[TestDox('Throws exception when constructed with zero steps per cycle')]
-    public function testConstructorWithZeroSteps() : void
-    {
-        $this->expectException(InvalidArgumentException::class);
-        new Cyclical(lower: 0.001, upper: 0.006, losses: 0);
-    }
-
-    #[Test]
-    #[TestDox('Throws exception when constructed with negative steps per cycle')]
-    public function testConstructorWithNegativeSteps() : void
-    {
-        $this->expectException(InvalidArgumentException::class);
-        new Cyclical(lower: 0.001, upper: 0.006, losses: -5);
-    }
-
-    #[Test]
-    #[TestDox('Throws exception when constructed with zero decay')]
-    public function testConstructorWithZeroDecay() : void
-    {
-        $this->expectException(InvalidArgumentException::class);
-        new Cyclical(lower: 0.001, upper: 0.006, losses: 2000, decay: 0.0);
-    }
-
-    #[Test]
-    #[TestDox('Throws exception when constructed with decay equal to 1')]
-    public function testConstructorWithDecayEqualToOne() : void
-    {
-        $this->expectException(InvalidArgumentException::class);
-        new Cyclical(lower: 0.001, upper: 0.006, losses: 2000, decay: 1.0);
-    }
-
-    #[Test]
-    #[TestDox('Throws exception when constructed with decay greater than 1')]
-    public function testConstructorWithDecayGreaterThanOne() : void
+    #[TestDox('Can be cast to a string')]
+    public function testToString() : void
     {
-        $this->expectException(InvalidArgumentException::class);
-        new Cyclical(lower: 0.001, upper: 0.006, losses: 2000, decay: 1.5);
+        self::assertEquals('Cyclical (lower: 0.001, upper: 0.006, steps: 2000, decay: 0.99994)', (string) $this->optimizer);
     }
 
+    /**
+     * @param float $lower
+     * @param float $upper
+     * @param int $losses
+     * @param float|null $decay
+     * @return void
+     */
     #[Test]
-    #[TestDox('Throws exception when constructed with negative decay')]
-    public function testConstructorWithNegativeDecay() : void
+    #[DataProvider('invalidConstructorProvider')]
+    #[TestDox('Throws exception when constructed with invalid arguments')]
+    public function testConstructorInvalidArgs(float $lower, float $upper, int $losses, ?float $decay) : void
     {
         $this->expectException(InvalidArgumentException::class);
-        new Cyclical(lower: 0.001, upper: 0.006, losses: 2000, decay: -0.1);
-    }
 
-    #[Test]
-    #[TestDox('Can be cast to a string')]
-    public function testToString() : void
-    {
-        self::assertEquals('Cyclical (lower: 0.001, upper: 0.006, steps: 2000, decay: 0.99994)', (string) $this->optimizer);
+        if ($decay === null) {
+            new Cyclical(lower: $lower, upper: $upper, losses: $losses);
+        } else {
+            new Cyclical(lower: $lower, upper: $upper, losses: $losses, decay: $decay);
+        }
     }
 
     /**

From 3575565b042faa80eae294374479cecf9315e652 Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Tue, 11 Nov 2025 17:05:06 +0200
Subject: [PATCH 11/17] 390 refactoring CyclicalTest - added dataprovider for
 constructor tests

---
 tests/NeuralNet/Optimizers/Cyclical/CyclicalTest.php | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/NeuralNet/Optimizers/Cyclical/CyclicalTest.php b/tests/NeuralNet/Optimizers/Cyclical/CyclicalTest.php
index 5bab9a6c1..aa7102f0f 100644
--- a/tests/NeuralNet/Optimizers/Cyclical/CyclicalTest.php
+++ b/tests/NeuralNet/Optimizers/Cyclical/CyclicalTest.php
@@ -95,7 +95,9 @@ public function testConstructorInvalidArgs(float $lower, float $upper, int $loss
      * @param NDArray $gradient
      * @param list<list<float>> $expected
      */
+    #[Test]
     #[DataProvider('stepProvider')]
+    #[TestDox('Can compute the step')]
     public function testStep(Parameter $param, NDArray $gradient, array $expected) : void
     {
         $step = $this->optimizer->step(param: $param, gradient: $gradient);

From 8677c7670eaefb9add6b8d4d8c90bdce92239c4f Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Tue, 11 Nov 2025 17:05:15 +0200
Subject: [PATCH 12/17] 390 refactoring AdamTest - added dataprovider for
 constructor tests

---
 tests/NeuralNet/Optimizers/Adam/AdamTest.php | 34 ++++++++++++++------
 1 file changed, 24 insertions(+), 10 deletions(-)

diff --git a/tests/NeuralNet/Optimizers/Adam/AdamTest.php b/tests/NeuralNet/Optimizers/Adam/AdamTest.php
index dbd6cedd6..04444001d 100644
--- a/tests/NeuralNet/Optimizers/Adam/AdamTest.php
+++ b/tests/NeuralNet/Optimizers/Adam/AdamTest.php
@@ -9,6 +9,8 @@
 use PHPUnit\Framework\Attributes\Group;
 use NDArray;
 use NumPower;
+use PHPUnit\Framework\Attributes\Test;
+use PHPUnit\Framework\Attributes\TestDox;
 use Rubix\ML\Exceptions\InvalidArgumentException;
 use Rubix\ML\NeuralNet\Parameters\Parameter;
 use Rubix\ML\NeuralNet\Optimizers\Adam\Adam;
@@ -25,20 +27,20 @@ class AdamTest extends TestCase
     public static function invalidConstructorProvider() : Generator
     {
         // Invalid rates (<= 0)
-        yield [0.0, 0.1, 0.001];
-        yield [-0.5, 0.1, 0.001];
+        yield 'zero rate' => [0.0, 0.1, 0.001];
+        yield 'negative rate' => [-0.5, 0.1, 0.001];
 
         // Invalid momentumDecay (<= 0 or >= 1)
-        yield [0.001, 0.0, 0.001];
-        yield [0.001, -0.1, 0.001];
-        yield [0.001, 1.0, 0.001];
-        yield [0.001, 1.1, 0.001];
+        yield 'zero momentumDecay' => [0.001, 0.0, 0.001];
+        yield 'negative momentumDecay' => [0.001, -0.1, 0.001];
+        yield 'momentumDecay == 1' => [0.001, 1.0, 0.001];
+        yield 'momentumDecay > 1' => [0.001, 1.1, 0.001];
 
         // Invalid normDecay (<= 0 or >= 1)
-        yield [0.001, 0.1, 0.0];
-        yield [0.001, 0.1, -0.1];
-        yield [0.001, 0.1, 1.0];
-        yield [0.001, 0.1, 1.1];
+        yield 'zero normDecay' => [0.001, 0.1, 0.0];
+        yield 'negative normDecay' => [0.001, 0.1, -0.1];
+        yield 'normDecay == 1' => [0.001, 0.1, 1.0];
+        yield 'normDecay > 1' => [0.001, 0.1, 1.1];
     }
 
     public static function stepProvider() : Generator
@@ -71,13 +73,23 @@ protected function setUp() : void
         );
     }
 
+    #[Test]
+    #[TestDox('Can be cast to a string')]
     public function testToString() : void
     {
         $expected = 'Adam (rate: 0.001, momentum decay: 0.1, norm decay: 0.001)';
         self::assertSame($expected, (string) $this->optimizer);
     }
 
+    /**
+     * @param float $rate
+     * @param float $momentumDecay
+     * @param float $normDecay
+     * @return void
+     */
+    #[Test]
     #[DataProvider('invalidConstructorProvider')]
+    #[TestDox('Throws exception when constructed with invalid arguments')]
     public function testInvalidConstructorParams(float $rate, float $momentumDecay, float $normDecay) : void
     {
         $this->expectException(InvalidArgumentException::class);
@@ -89,7 +101,9 @@ public function testInvalidConstructorParams(float $rate, float $momentumDecay,
      * @param NDArray $gradient
      * @param list<list<float>> $expected
      */
+    #[Test]
     #[DataProvider('stepProvider')]
+    #[TestDox('Can compute the step')]
     public function testStep(Parameter $param, NDArray $gradient, array $expected) : void
     {
         $this->optimizer->warm($param);

From 269405beeb16678e1a676213bc848f437aaec180 Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Tue, 11 Nov 2025 17:07:48 +0200
Subject: [PATCH 13/17] 390 refactoring MomentumTest - added dataprovider for
 constructor tests

---
 .../Optimizers/Momentum/MomentumTest.php      | 74 ++++++-------------
 1 file changed, 23 insertions(+), 51 deletions(-)

diff --git a/tests/NeuralNet/Optimizers/Momentum/MomentumTest.php b/tests/NeuralNet/Optimizers/Momentum/MomentumTest.php
index 1b2a90378..fb84d6d5c 100644
--- a/tests/NeuralNet/Optimizers/Momentum/MomentumTest.php
+++ b/tests/NeuralNet/Optimizers/Momentum/MomentumTest.php
@@ -23,6 +23,16 @@ class MomentumTest extends TestCase
 {
     protected Momentum $optimizer;
 
+    public static function invalidConstructorProvider() : Generator
+    {
+        yield 'zero rate' => [0.0, 0.1];
+        yield 'negative rate' => [-0.001, 0.1];
+        yield 'zero decay' => [0.001, 0.0];
+        yield 'decay == 1' => [0.001, 1.0];
+        yield 'decay > 1' => [0.001, 1.5];
+        yield 'negative decay' => [0.001, -0.1];
+    }
+
     public static function stepProvider() : Generator
     {
         yield [
@@ -50,64 +60,25 @@ protected function setUp() : void
     }
 
     #[Test]
-    #[TestDox('Throws exception when constructed with zero rate')]
-    public function testConstructorWithZeroRate() : void
-    {
-        $this->expectException(InvalidArgumentException::class);
-
-        new Momentum(rate: 0.0);
-    }
-
-    #[Test]
-    #[TestDox('Throws exception when constructed with negative rate')]
-    public function testConstructorWithNegativeRate() : void
-    {
-        $this->expectException(InvalidArgumentException::class);
-
-        new Momentum(rate: -0.001);
-    }
-
-    #[Test]
-    #[TestDox('Throws exception when constructed with zero decay')]
-    public function testConstructorWithZeroDecay() : void
-    {
-        $this->expectException(InvalidArgumentException::class);
-
-        new Momentum(rate: 0.001, decay: 0.0);
-    }
-
-    #[Test]
-    #[TestDox('Throws exception when constructed with decay equal to 1')]
-    public function testConstructorWithDecayEqualToOne() : void
-    {
-        $this->expectException(InvalidArgumentException::class);
-
-        new Momentum(rate: 0.001, decay: 1.0);
-    }
-
-    #[Test]
-    #[TestDox('Throws exception when constructed with decay greater than 1')]
-    public function testConstructorWithDecayGreaterThanOne() : void
+    #[TestDox('Can be cast to a string')]
+    public function testToString() : void
     {
-        $this->expectException(InvalidArgumentException::class);
-
-        new Momentum(rate: 0.001, decay: 1.5);
+        self::assertEquals('Momentum (rate: 0.001, decay: 0.1, lookahead: false)', (string) $this->optimizer);
     }
 
+    /**
+     * @param float $rate
+     * @param float $decay
+     * @return void
+     */
     #[Test]
-    #[TestDox('Throws exception when constructed with negative decay')]
-    public function testConstructorWithNegativeDecay() : void
+    #[DataProvider('invalidConstructorProvider')]
+    #[TestDox('Throws exception when constructed with invalid arguments')]
+    public function testInvalidConstructorParams(float $rate, float $decay) : void
     {
         $this->expectException(InvalidArgumentException::class);
 
-        new Momentum(rate: 0.001, decay: -0.1);
-    }
-
-    #[Test]
-    #[TestDox('Can be cast to a string')]
-    public function testToString() : void
-    {
-        self::assertEquals('Momentum (rate: 0.001, decay: 0.1, lookahead: false)', (string) $this->optimizer);
+        new Momentum(rate: $rate, decay: $decay);
     }
 
     #[Test]
@@ -143,6 +114,7 @@ public function testWarmInitializesZeroedCache() : void
      * @param list<list<float>> $expected
      */
     #[DataProvider('stepProvider')]
+    #[TestDox('Can compute the step')]
     public function testStep(Parameter $param, NDArray $gradient, array $expected) : void
     {
         $this->optimizer->warm($param);

From aca753eeaf5420b656ea8631061cb2e5f437cb56 Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Tue, 11 Nov 2025 17:09:57 +0200
Subject: [PATCH 14/17] 390 refactoring RMSPropTest - added dataprovider for
 constructor tests

---
 .../Optimizers/Momentum/MomentumTest.php      |  1 +
 .../Optimizers/RMSProp/RMSPropTest.php        | 70 +++++--------------
 2 files changed, 20 insertions(+), 51 deletions(-)

diff --git a/tests/NeuralNet/Optimizers/Momentum/MomentumTest.php b/tests/NeuralNet/Optimizers/Momentum/MomentumTest.php
index fb84d6d5c..1b486efa5 100644
--- a/tests/NeuralNet/Optimizers/Momentum/MomentumTest.php
+++ b/tests/NeuralNet/Optimizers/Momentum/MomentumTest.php
@@ -113,6 +113,7 @@ public function testWarmInitializesZeroedCache() : void
      * @param NDArray $gradient
      * @param list<list<float>> $expected
      */
+    #[Test]
     #[DataProvider('stepProvider')]
     #[TestDox('Can compute the step')]
     public function testStep(Parameter $param, NDArray $gradient, array $expected) : void
diff --git a/tests/NeuralNet/Optimizers/RMSProp/RMSPropTest.php b/tests/NeuralNet/Optimizers/RMSProp/RMSPropTest.php
index 09d43ac24..f47e4f2b3 100644
--- a/tests/NeuralNet/Optimizers/RMSProp/RMSPropTest.php
+++ b/tests/NeuralNet/Optimizers/RMSProp/RMSPropTest.php
@@ -23,6 +23,16 @@ class RMSPropTest extends TestCase
 {
     protected RMSProp $optimizer;
 
+    public static function invalidConstructorProvider() : Generator
+    {
+        yield 'zero rate' => [0.0, 0.1];
+        yield 'negative rate' => [-0.001, 0.1];
+        yield 'zero decay' => [0.001, 0.0];
+        yield 'decay == 1' => [0.001, 1.0];
+        yield 'decay > 1' => [0.001, 1.5];
+        yield 'negative decay' => [0.001, -0.1];
+    }
+
     public static function stepProvider() : Generator
     {
         yield [
@@ -50,64 +60,20 @@ protected function setUp() : void
     }
 
     #[Test]
-    #[TestDox('Throws exception when constructed with zero rate')]
-    public function testConstructorWithZeroRate() : void
-    {
-        $this->expectException(InvalidArgumentException::class);
-
-        new RMSProp(rate: 0.0);
-    }
-
-    #[Test]
-    #[TestDox('Throws exception when constructed with negative rate')]
-    public function testConstructorWithNegativeRate() : void
-    {
-        $this->expectException(InvalidArgumentException::class);
-
-        new RMSProp(rate: -0.001);
-    }
-
-    #[Test]
-    #[TestDox('Throws exception when constructed with zero decay')]
-    public function testConstructorWithZeroDecay() : void
-    {
-        $this->expectException(InvalidArgumentException::class);
-
-        new RMSProp(rate: 0.001, decay: 0.0);
-    }
-
-    #[Test]
-    #[TestDox('Throws exception when constructed with decay equal to 1')]
-    public function testConstructorWithDecayEqualToOne() : void
-    {
-        $this->expectException(InvalidArgumentException::class);
-
-        new RMSProp(rate: 0.001, decay: 1.0);
-    }
-
-    #[Test]
-    #[TestDox('Throws exception when constructed with decay greater than 1')]
-    public function testConstructorWithDecayGreaterThanOne() : void
+    #[TestDox('Can be cast to a string')]
+    public function testToString() : void
     {
-        $this->expectException(InvalidArgumentException::class);
-
-        new RMSProp(rate: 0.001, decay: 1.5);
+        self::assertEquals('RMS Prop (rate: 0.001, decay: 0.1)', (string) $this->optimizer);
     }
 
     #[Test]
-    #[TestDox('Throws exception when constructed with negative decay')]
-    public function testConstructorWithNegativeDecay() : void
+    #[DataProvider('invalidConstructorProvider')]
+    #[TestDox('Throws exception when constructed with invalid arguments')]
+    public function testInvalidConstructorParams(float $rate, float $decay) : void
     {
         $this->expectException(InvalidArgumentException::class);
 
-        new RMSProp(rate: 0.001, decay: -0.1);
-    }
-
-    #[Test]
-    #[TestDox('Can be cast to a string')]
-    public function testToString() : void
-    {
-        self::assertEquals('RMS Prop (rate: 0.001, decay: 0.1)', (string) $this->optimizer);
+        new RMSProp(rate: $rate, decay: $decay);
     }
 
     #[Test]
@@ -142,7 +108,9 @@ public function testWarmInitializesZeroedCache() : void
      * @param NDArray $gradient
      * @param list<list<float>> $expected
      */
+    #[Test]
     #[DataProvider('stepProvider')]
+    #[TestDox('Can compute the step')]
     public function testStep(Parameter $param, NDArray $gradient, array $expected) : void
     {
         $this->optimizer->warm($param);

From e9c48315a21f9cb66160ab1f3a8b3059db1e19cc Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Tue, 11 Nov 2025 17:12:11 +0200
Subject: [PATCH 15/17] 390 refactoring StepDecayTest - added dataprovider for
 constructor tests

---
 .../Optimizers/StepDecay/StepDecayTest.php    | 48 +++++++++----------
 1 file changed, 24 insertions(+), 24 deletions(-)

diff --git a/tests/NeuralNet/Optimizers/StepDecay/StepDecayTest.php b/tests/NeuralNet/Optimizers/StepDecay/StepDecayTest.php
index f3535552b..7d581e31b 100644
--- a/tests/NeuralNet/Optimizers/StepDecay/StepDecayTest.php
+++ b/tests/NeuralNet/Optimizers/StepDecay/StepDecayTest.php
@@ -24,6 +24,15 @@ class StepDecayTest extends TestCase
 {
     protected StepDecay $optimizer;
 
+    public static function invalidConstructorProvider() : Generator
+    {
+        yield 'zero rate' => [0.0, 100, 0.001];
+        yield 'negative rate' => [-0.001, 100, 0.001];
+        yield 'zero losses' => [0.01, 0, 0.001];
+        yield 'negative losses' => [0.01, -5, 0.001];
+        yield 'negative decay' => [0.01, 100, -0.1];
+    }
+
     public static function stepProvider() : Generator
     {
         yield [
@@ -51,37 +60,26 @@ protected function setUp() : void
     }
 
     #[Test]
-    #[TestDox('Throws exception when constructed with invalid learning rate')]
-    public function testConstructorWithInvalidRate() : void
-    {
-        $this->expectException(InvalidArgumentException::class);
-
-        new StepDecay(rate: 0.0);
-    }
-
-    #[Test]
-    #[TestDox('Throws exception when constructed with invalid losses')]
-    public function testConstructorWithInvalidLosses() : void
+    #[TestDox('Can be cast to a string')]
+    public function testToString() : void
     {
-        $this->expectException(InvalidArgumentException::class);
-
-        new StepDecay(rate: 0.01, losses: 0);
+        self::assertEquals('Step Decay (rate: 0.001, steps: 100, decay: 0.001)', (string) $this->optimizer);
     }
 
+    /**
+     * @param float $rate
+     * @param int $losses
+     * @param float $decay
+     * @return void
+     */
     #[Test]
-    #[TestDox('Throws exception when constructed with invalid decay')]
-    public function testConstructorWithInvalidDecay() : void
+    #[DataProvider('invalidConstructorProvider')]
+    #[TestDox('Throws exception when constructed with invalid arguments')]
+    public function testInvalidConstructorParams(float $rate, int $losses, float $decay) : void
     {
         $this->expectException(InvalidArgumentException::class);
 
-        new StepDecay(rate: 0.01, losses: 100, decay: -0.1);
-    }
-
-    #[Test]
-    #[TestDox('Can be cast to a string')]
-    public function testToString() : void
-    {
-        self::assertEquals('Step Decay (rate: 0.001, steps: 100, decay: 0.001)', (string) $this->optimizer);
+        new StepDecay(rate: $rate, losses: $losses, decay: $decay);
     }
 
     /**
@@ -89,7 +87,9 @@ public function testToString() : void
      * @param NDArray $gradient
      * @param list<list<float>> $expected
      */
+    #[Test]
     #[DataProvider('stepProvider')]
+    #[TestDox('Can compute the step')]
     public function testStep(Parameter $param, NDArray $gradient, array $expected) : void
     {
         $step = $this->optimizer->step(param: $param, gradient: $gradient);

From 8d3f76a4baf0d28894d66c6a651e73047839c16b Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Tue, 11 Nov 2025 17:13:39 +0200
Subject: [PATCH 16/17] 390 refactoring StochasticTest - added dataprovider for
 constructor tests

---
 .../Optimizers/Stochastic/StochasticTest.php  | 29 ++++++++++++++-----
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/tests/NeuralNet/Optimizers/Stochastic/StochasticTest.php b/tests/NeuralNet/Optimizers/Stochastic/StochasticTest.php
index 57a50335f..2e16462d1 100644
--- a/tests/NeuralNet/Optimizers/Stochastic/StochasticTest.php
+++ b/tests/NeuralNet/Optimizers/Stochastic/StochasticTest.php
@@ -23,6 +23,12 @@ class StochasticTest extends TestCase
 {
     protected Stochastic $optimizer;
 
+    public static function invalidConstructorProvider() : Generator
+    {
+        yield 'zero rate' => [0.0];
+        yield 'negative rate' => [-0.001];
+    }
+
     public static function stepProvider() : Generator
     {
         yield [
@@ -50,19 +56,24 @@ protected function setUp() : void
     }
 
     #[Test]
-    #[TestDox('Throws exception when constructed with invalid learning rate')]
-    public function testConstructorWithInvalidRate() : void
+    #[TestDox('Can be cast to a string')]
+    public function testToString() : void
     {
-        $this->expectException(InvalidArgumentException::class);
-
-        new Stochastic(0.0);
+        self::assertEquals('Stochastic (rate: 0.001)', (string) $this->optimizer);
     }
 
+    /**
+     * @param float $rate
+     * @return void
+     */
     #[Test]
-    #[TestDox('Can be cast to a string')]
-    public function testToString() : void
+    #[DataProvider('invalidConstructorProvider')]
+    #[TestDox('Throws exception when constructed with invalid arguments')]
+    public function testInvalidConstructorParams(float $rate) : void
     {
-        self::assertEquals('Stochastic (rate: 0.001)', (string) $this->optimizer);
+        $this->expectException(InvalidArgumentException::class);
+
+        new Stochastic($rate);
     }
 
     /**
@@ -70,7 +81,9 @@ public function testToString() : void
      * @param NDArray $gradient
      * @param list<list<float>> $expected
      */
+    #[Test]
     #[DataProvider('stepProvider')]
+    #[TestDox('Can compute the step')]
     public function testStep(Parameter $param, NDArray $gradient, array $expected) : void
     {
         $step = $this->optimizer->step(param: $param, gradient: $gradient);

From 23397ef90a74606a16fe130b5331c2d38503a662 Mon Sep 17 00:00:00 2001
From: Samuel Akopyan <samuel.akopyan@thrivedx.com>
Date: Tue, 11 Nov 2025 23:44:16 +0200
Subject: [PATCH 17/17] 390 convert AdaMax to NumPower

---
 src/NeuralNet/Optimizers/AdaMax/AdaMax.php | 90 ++++++++++++++++++++++
 1 file changed, 90 insertions(+)
 create mode 100644 src/NeuralNet/Optimizers/AdaMax/AdaMax.php

diff --git a/src/NeuralNet/Optimizers/AdaMax/AdaMax.php b/src/NeuralNet/Optimizers/AdaMax/AdaMax.php
new file mode 100644
index 000000000..ae13d2249
--- /dev/null
+++ b/src/NeuralNet/Optimizers/AdaMax/AdaMax.php
@@ -0,0 +1,90 @@
+<?php
+
+namespace Rubix\ML\NeuralNet\Optimizers\AdaMax;
+
+use NDArray;
+use NumPower;
+use Rubix\ML\NeuralNet\Optimizers\Adam\Adam;
+use Rubix\ML\NeuralNet\Parameters\Parameter;
+
+use const Rubix\ML\EPSILON;
+use const PHP_FLOAT_MAX;
+
+/**
+ * AdaMax
+ *
+ * A version of Adam that replaces the RMS property with the infinity norm of the gradients.
+ *
+ * References:
+ * [1] D. P. Kingma et al. (2014). Adam: A Method for Stochastic Optimization.
+ *
+ * @category    Machine Learning
+ * @package     Rubix/ML
+ * @author      Andrew DalPino
+ * @author      Samuel Akopyan <leumas.a@gmail.com>
+ */
+class AdaMax extends Adam
+{
+    /**
+     * @param float $rate
+     * @param float $momentumDecay
+     * @param float $normDecay
+     */
+    public function __construct(float $rate = 0.001, float $momentumDecay = 0.1, float $normDecay = 0.001)
+    {
+        parent::__construct($rate, $momentumDecay, $normDecay);
+    }
+
+    /**
+     * Take a step of gradient descent for a given parameter.
+     *
+     * AdaMax update (element-wise):
+     *   v_t = v_{t-1} + β1 · (g_t − v_{t-1})
+     *   u_t = max(β2 · u_{t-1}, |g_t|)
+     *   Δθ_t = η · v_t / max(u_t, ε)
+     *
+     * @internal
+     *
+     * @param Parameter $param
+     * @param NDArray $gradient
+     * @return NDArray
+     */
+    public function step(Parameter $param, NDArray $gradient) : NDArray
+    {
+        [$velocity, $norm] = $this->cache[$param->id()];
+
+        $vHat = NumPower::multiply(
+            NumPower::subtract($gradient, $velocity),
+            $this->momentumDecay
+        );
+
+        $velocity = NumPower::add($velocity, $vHat);
+
+        // Infinity norm accumulator
+        $norm = NumPower::multiply($norm, 1.0 - $this->normDecay);
+        $absGrad = NumPower::abs($gradient);
+        $norm = NumPower::maximum($norm, $absGrad);
+
+        $this->cache[$param->id()] = [$velocity, $norm];
+
+        $norm = NumPower::clip($norm, EPSILON, PHP_FLOAT_MAX);
+
+        return NumPower::multiply(
+            NumPower::divide($velocity, $norm),
+            $this->rate
+        );
+    }
+
+    /**
+     * Return the string representation of the object.
+     *
+     * @internal
+     *
+     * @return string
+     */
+    public function __toString() : string
+    {
+        return "AdaMax (rate: {$this->rate}, momentum decay: {$this->momentumDecay},"
+            . " norm decay: {$this->normDecay})";
+    }
+}