huggingface · dacorvo · Sep 23, 2025 · Sep 23, 2025 · Sep 23, 2025 · Sep 23, 2025
diff --git a/.github/actions/install_neuronx_runtime/action.yml b/.github/actions/install_neuronx_runtime/action.yml
@@ -12,5 +12,5 @@ runs:
           EOF
           wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | sudo apt-key add -
           sudo apt-get update -y
-          sudo apt-get install aws-neuronx-tools=2.24.54.0 aws-neuronx-runtime-lib=2.26.42.0-2ff3b5c7d aws-neuronx-collectives=2.26.43.0-47cc904ea -y
+          sudo apt-get install aws-neuronx-tools=2.26.14.0 aws-neuronx-runtime-lib=2.28.23.0-dd5879008 aws-neuronx-collectives=2.28.27.0-bc30ece58 -y
           export PATH=/opt/aws/neuron/bin:$PATH
diff --git a/docker/vllm/Dockerfile b/docker/vllm/Dockerfile
@@ -25,10 +25,10 @@ RUN wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEU
 # Install neuronx packages
 RUN apt-get update -y \
     && apt-get install -y --no-install-recommends \
-    aws-neuronx-dkms=2.22.2.0 \
-    aws-neuronx-collectives=2.26.43.0-47cc904ea \
-    aws-neuronx-runtime-lib=2.26.42.0-2ff3b5c7d \
-    aws-neuronx-tools=2.24.54.0 \
+    aws-neuronx-dkms=2.24.7.0 \
+    aws-neuronx-collectives=2.28.27.0-bc30ece58 \
+    aws-neuronx-runtime-lib=2.28.23.0-dd5879008 \
+    aws-neuronx-tools=2.26.14.0 \
     && rm -rf /var/lib/apt/lists/* \
     && apt-get clean
 
@@ -40,8 +40,8 @@ RUN pip3 install \
 
 # Install manually torch CPU version to avoid pulling CUDA
 RUN pip3 install \
-    torch==2.7.1 \
-    torchvision==0.22.1 \
+    torch==2.8.0 \
+    torchvision==0.23.0 \
     --index-url https://download.pytorch.org/whl/cpu
 
 # Install optimum-neuron

diff --git a/infrastructure/ami/hcl2-files/build.pkr.hcl b/infrastructure/ami/hcl2-files/build.pkr.hcl
@@ -16,7 +16,7 @@ build {
   provisioner "shell" {
     inline = [
       "echo 'export HF_HUB_ENABLE_HF_TRANSFER=1' | sudo tee -a /home/ubuntu/.bashrc",
-      "echo 'source /opt/aws_neuronx_venv_pytorch_2_7/bin/activate' | sudo tee -a /home/ubuntu/.bashrc"
+      "echo 'source /opt/aws_neuronx_venv_pytorch_2_8/bin/activate' | sudo tee -a /home/ubuntu/.bashrc"
     ]
   }
   provisioner "file" {

diff --git a/infrastructure/ami/hcl2-files/variables.pkr.hcl b/infrastructure/ami/hcl2-files/variables.pkr.hcl
@@ -10,7 +10,7 @@ variable "instance_type" {
 }
 
 variable "source_ami" {
-  default     = "ami-0ffd183ece0ca0475"
+  default     = "ami-0ec4ab14b1c5a10f2"
   description = "Base Image"
   type        = string
   /*

diff --git a/infrastructure/ami/scripts/install-huggingface-libraries.sh b/infrastructure/ami/scripts/install-huggingface-libraries.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 
 # Activate the neuron virtual environment
-source /opt/aws_neuronx_venv_pytorch_2_7/bin/activate
+source /opt/aws_neuronx_venv_pytorch_2_8/bin/activate
 
 echo "Step: install-hugging-face-libraries"
 

diff --git a/infrastructure/ami/scripts/validate-neuron.sh b/infrastructure/ami/scripts/validate-neuron.sh
@@ -3,7 +3,7 @@ echo "Step: validate-neuron-devices"
 neuron-ls
 
 # Activate the neuron virtual environment
-source /opt/aws_neuronx_venv_pytorch_2_7/bin/activate
+source /opt/aws_neuronx_venv_pytorch_2_8/bin/activate
 
 python -c 'import torch'
 python -c 'import torch_neuronx'

diff --git a/optimum/neuron/version.py b/optimum/neuron/version.py
@@ -12,6 +12,6 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 
-__version__ = "0.3.1.dev6"
+__version__ = "0.3.1.dev7"
 
-__sdk_version__ = "2.24.0"
+__sdk_version__ = "2.26.0"
diff --git a/optimum/neuron/vllm/platform.py b/optimum/neuron/vllm/platform.py
@@ -21,9 +21,12 @@
 
 
 class OptimumNeuronPlatform(Platform):
-    _enum = PlatformEnum.NEURON
+    _enum = PlatformEnum.UNSPECIFIED
     device_name: str = "neuron"
-    device_type: str = "neuron"
+    # Device type is set to "cpu" to prevent vLLM from preemptively moving tensors
+    # to the XLA device and trigger spurious neuron runtime intializations.
+    # The CPU tensors will be moved when required to the XLA device by the neuron SDK.
+    device_type: str = "cpu"
     ray_device_key: str = "neuron_cores"
     device_control_env_var: str = "NEURON_RT_VISIBLE_CORES"
 

diff --git a/optimum/neuron/vllm/worker.py b/optimum/neuron/vllm/worker.py
@@ -21,7 +21,6 @@
 from vllm.lora.request import LoRARequest
 from vllm.model_executor import set_random_seed
 from vllm.sequence import ExecuteModelRequest
-from vllm.worker.neuron_model_runner import NeuronModelRunner
 from vllm.worker.worker_base import LocalOrDistributedWorkerBase, WorkerBase, WorkerInput
 
 from .runner import OptimumNeuronModelRunner
@@ -33,7 +32,7 @@
 class OptimumNeuronWorker(LocalOrDistributedWorkerBase):
     """A worker class that executes the model on a group of neuron cores."""
 
-    model_runner: NeuronModelRunner
+    model_runner: OptimumNeuronModelRunner
 
     def __init__(
         self,

diff --git a/pyproject.toml b/pyproject.toml
@@ -41,7 +41,7 @@ dependencies = [
     "accelerate == 1.8.1",
     "optimum ~= 1.24.0",
     "huggingface_hub >= 0.29.0",
-    "numpy>=1.22.2, <=1.25.2",
+    "numpy>=1.22.2, <=1.26.4",
     "protobuf>=3.20.3, <4",
 ]
 
@@ -102,12 +102,12 @@ neuron = [
 ]
 neuronx = [
     "wheel",
-    "neuronx-cc==2.19.8089.0",
-    "torch-neuronx==2.7.0.2.8.6734+ac864f72",
-    "torch==2.7.1.*",
-    "torchvision==0.22.*",
-    "neuronx_distributed==0.13.14393",
-    "libneuronxla==2.2.4410.0",
+    "neuronx-cc==2.21.18209.0",
+    "torch-neuronx==2.8.0.2.10.13553",
+    "torch==2.8.0.*",
+    "torchvision==0.23.*",
+    "neuronx_distributed==0.15.22404",
+    "libneuronxla==2.2.12677.0",
 ]
 diffusers = [
     "diffusers==0.35.*",
@@ -117,7 +117,7 @@ sentence-transformers = [
     "sentence-transformers >= 2.2.0",
 ]
 vllm = [
-    "vllm == 0.10.0",
+    "vllm == 0.10.2",
 ]
 
 [project.scripts]