Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/actions/install_neuronx_runtime/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,5 @@ runs:
EOF
wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | sudo apt-key add -
sudo apt-get update -y
sudo apt-get install aws-neuronx-tools=2.24.54.0 aws-neuronx-runtime-lib=2.26.42.0-2ff3b5c7d aws-neuronx-collectives=2.26.43.0-47cc904ea -y
sudo apt-get install aws-neuronx-tools=2.26.14.0 aws-neuronx-runtime-lib=2.28.23.0-dd5879008 aws-neuronx-collectives=2.28.27.0-bc30ece58 -y
export PATH=/opt/aws/neuron/bin:$PATH
12 changes: 6 additions & 6 deletions docker/vllm/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@ RUN wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEU
# Install neuronx packages
RUN apt-get update -y \
&& apt-get install -y --no-install-recommends \
aws-neuronx-dkms=2.22.2.0 \
aws-neuronx-collectives=2.26.43.0-47cc904ea \
aws-neuronx-runtime-lib=2.26.42.0-2ff3b5c7d \
aws-neuronx-tools=2.24.54.0 \
aws-neuronx-dkms=2.24.7.0 \
aws-neuronx-collectives=2.28.27.0-bc30ece58 \
aws-neuronx-runtime-lib=2.28.23.0-dd5879008 \
aws-neuronx-tools=2.26.14.0 \
&& rm -rf /var/lib/apt/lists/* \
&& apt-get clean

Expand All @@ -40,8 +40,8 @@ RUN pip3 install \

# Install manually torch CPU version to avoid pulling CUDA
RUN pip3 install \
torch==2.7.1 \
torchvision==0.22.1 \
torch==2.8.0 \
torchvision==0.23.0 \
--index-url https://download.pytorch.org/whl/cpu

# Install optimum-neuron
Expand Down
2 changes: 1 addition & 1 deletion infrastructure/ami/hcl2-files/build.pkr.hcl
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ build {
provisioner "shell" {
inline = [
"echo 'export HF_HUB_ENABLE_HF_TRANSFER=1' | sudo tee -a /home/ubuntu/.bashrc",
"echo 'source /opt/aws_neuronx_venv_pytorch_2_7/bin/activate' | sudo tee -a /home/ubuntu/.bashrc"
"echo 'source /opt/aws_neuronx_venv_pytorch_2_8/bin/activate' | sudo tee -a /home/ubuntu/.bashrc"
]
}
provisioner "file" {
Expand Down
2 changes: 1 addition & 1 deletion infrastructure/ami/hcl2-files/variables.pkr.hcl
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ variable "instance_type" {
}

variable "source_ami" {
default = "ami-0ffd183ece0ca0475"
default = "ami-0ec4ab14b1c5a10f2"
description = "Base Image"
type = string
/*
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/bin/bash

# Activate the neuron virtual environment
source /opt/aws_neuronx_venv_pytorch_2_7/bin/activate
source /opt/aws_neuronx_venv_pytorch_2_8/bin/activate

echo "Step: install-hugging-face-libraries"

Expand Down
2 changes: 1 addition & 1 deletion infrastructure/ami/scripts/validate-neuron.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ echo "Step: validate-neuron-devices"
neuron-ls

# Activate the neuron virtual environment
source /opt/aws_neuronx_venv_pytorch_2_7/bin/activate
source /opt/aws_neuronx_venv_pytorch_2_8/bin/activate

python -c 'import torch'
python -c 'import torch_neuronx'
Expand Down
4 changes: 2 additions & 2 deletions optimum/neuron/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.

__version__ = "0.3.1.dev6"
__version__ = "0.3.1.dev7"

__sdk_version__ = "2.24.0"
__sdk_version__ = "2.26.0"
7 changes: 5 additions & 2 deletions optimum/neuron/vllm/platform.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,12 @@


class OptimumNeuronPlatform(Platform):
_enum = PlatformEnum.NEURON
_enum = PlatformEnum.UNSPECIFIED
device_name: str = "neuron"
device_type: str = "neuron"
# Device type is set to "cpu" to prevent vLLM from preemptively moving tensors
# to the XLA device and trigger spurious neuron runtime intializations.
# The CPU tensors will be moved when required to the XLA device by the neuron SDK.
device_type: str = "cpu"
ray_device_key: str = "neuron_cores"
device_control_env_var: str = "NEURON_RT_VISIBLE_CORES"

Expand Down
3 changes: 1 addition & 2 deletions optimum/neuron/vllm/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
from vllm.lora.request import LoRARequest
from vllm.model_executor import set_random_seed
from vllm.sequence import ExecuteModelRequest
from vllm.worker.neuron_model_runner import NeuronModelRunner
from vllm.worker.worker_base import LocalOrDistributedWorkerBase, WorkerBase, WorkerInput

from .runner import OptimumNeuronModelRunner
Expand All @@ -33,7 +32,7 @@
class OptimumNeuronWorker(LocalOrDistributedWorkerBase):
"""A worker class that executes the model on a group of neuron cores."""

model_runner: NeuronModelRunner
model_runner: OptimumNeuronModelRunner

def __init__(
self,
Expand Down
16 changes: 8 additions & 8 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ dependencies = [
"accelerate == 1.8.1",
"optimum ~= 1.24.0",
"huggingface_hub >= 0.29.0",
"numpy>=1.22.2, <=1.25.2",
"numpy>=1.22.2, <=1.26.4",
"protobuf>=3.20.3, <4",
]

Expand Down Expand Up @@ -102,12 +102,12 @@ neuron = [
]
neuronx = [
"wheel",
"neuronx-cc==2.19.8089.0",
"torch-neuronx==2.7.0.2.8.6734+ac864f72",
"torch==2.7.1.*",
"torchvision==0.22.*",
"neuronx_distributed==0.13.14393",
"libneuronxla==2.2.4410.0",
"neuronx-cc==2.21.18209.0",
"torch-neuronx==2.8.0.2.10.13553",
"torch==2.8.0.*",
"torchvision==0.23.*",
"neuronx_distributed==0.15.22404",
"libneuronxla==2.2.12677.0",
]
diffusers = [
"diffusers==0.35.*",
Expand All @@ -117,7 +117,7 @@ sentence-transformers = [
"sentence-transformers >= 2.2.0",
]
vllm = [
"vllm == 0.10.0",
"vllm == 0.10.2",
]

[project.scripts]
Expand Down
Loading