diff --git a/.github/actions/install_neuronx_runtime/action.yml b/.github/actions/install_neuronx_runtime/action.yml index 1dcdbdd14..b09df2f45 100644 --- a/.github/actions/install_neuronx_runtime/action.yml +++ b/.github/actions/install_neuronx_runtime/action.yml @@ -12,5 +12,10 @@ runs: EOF wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | sudo apt-key add - sudo apt-get update -y - sudo apt-get install aws-neuronx-tools=2.24.54.0 aws-neuronx-runtime-lib=2.26.42.0-2ff3b5c7d aws-neuronx-collectives=2.26.43.0-47cc904ea -y + sudo apt-get install aws-neuronx-tools=2.26.14.0 aws-neuronx-runtime-lib=2.28.23.0-dd5879008 aws-neuronx-collectives=2.28.27.0-bc30ece58 -y export PATH=/opt/aws/neuron/bin:$PATH + dpkg -l | grep neuron + - name: Display driver version + shell: bash + run: | + apt show aws-neuronx-dkms diff --git a/.github/actions/prepare_venv/action.yml b/.github/actions/prepare_venv/action.yml index 58d86b419..c18578c8f 100644 --- a/.github/actions/prepare_venv/action.yml +++ b/.github/actions/prepare_venv/action.yml @@ -1,5 +1,18 @@ name: Prepare virtual environment -description: prepare virtual environment to install pyhton packages +description: prepare virtual environment to install python packages +inputs: + torch_version: + description: 'The pytorch version to be installed' + required: true + default: '2.8.0' + torchvision_version: + description: 'The torchvision version to be installed' + required: true + default: '0.23.0' + use_cuda: + description: 'requires pytorch cuda to be installed' + required: true + default: 'false' runs: using: "composite" steps: @@ -13,6 +26,19 @@ runs: python -m pip install -U pip python -m pip install --upgrade setuptools==69.5.1 python -m pip install hf_transfer + - name: Install torch and torchvision (CUDA) + if: ${{ inputs.use_cuda == 'true' }} + shell: bash + run: | + source aws_neuron_venv_pytorch/bin/activate + # Install torch and torchvision for CUDA: required by diffusers even if neuron doesn't use it + python -m pip install torch==${{ inputs.torch_version }} torchvision~=${{ inputs.torchvision_version }} + - name: Install torch and torchvision (CPU) + if: ${{ inputs.use_cuda == 'false' }} + shell: bash + run: | + source aws_neuron_venv_pytorch/bin/activate + # Install torch and torchvision for CUDA: this avoids having to install CUDA related dependencies, that # Install torch and torchvision for CPU: this avoids having to install CUDA related dependencies, that use a lot # of disk space. Note dependencies should be updated when we bump the PyTorch version. - python -m pip install torch==2.7.1 torchvision~=0.22 --index-url https://download.pytorch.org/whl/cpu + python -m pip install torch==${{ inputs.torch_version }} torchvision~=${{ inputs.torchvision_version }} --index-url https://download.pytorch.org/whl/cpu diff --git a/.github/workflows/doc-build.yml b/.github/workflows/doc-build.yml index caac101da..b3d040039 100644 --- a/.github/workflows/doc-build.yml +++ b/.github/workflows/doc-build.yml @@ -27,7 +27,7 @@ jobs: - uses: actions/checkout@v3 - uses: actions/setup-node@v3 with: - node-version: '18' + node-version: '20' cache-dependency-path: "kit/package-lock.json" - name: Set up Python uses: actions/setup-python@v4 diff --git a/.github/workflows/doc-pr-build.yml b/.github/workflows/doc-pr-build.yml index aba91c037..8680dcee7 100644 --- a/.github/workflows/doc-pr-build.yml +++ b/.github/workflows/doc-pr-build.yml @@ -24,9 +24,9 @@ jobs: steps: - uses: actions/checkout@v3 - - uses: actions/setup-node@v3 + - uses: actions/setup-node@v4 with: - node-version: '18' + node-version: '20' cache-dependency-path: "kit/package-lock.json" - name: Set up Python uses: actions/setup-python@v4 diff --git a/.github/workflows/test_inf2_transformers.yml b/.github/workflows/test_inf2_transformers.yml index 3f59c8579..8734bdd89 100644 --- a/.github/workflows/test_inf2_transformers.yml +++ b/.github/workflows/test_inf2_transformers.yml @@ -56,6 +56,11 @@ jobs: uses: ./.github/actions/prepare_venv - name: Install optimum-neuron uses: ./.github/actions/install_optimum_neuron + - name: Install datasets dependencies + run: | + sudo apt-get install ffmpeg -y + source aws_neuron_venv_pytorch/bin/activate + pip install datasets[audio] - name: Run transformers export CLI tests run: | source aws_neuron_venv_pytorch/bin/activate diff --git a/.github/workflows/test_inf2_vllm.yml b/.github/workflows/test_inf2_vllm.yml index 0b08c6138..1182304de 100644 --- a/.github/workflows/test_inf2_vllm.yml +++ b/.github/workflows/test_inf2_vllm.yml @@ -72,10 +72,10 @@ jobs: uses: ./.github/actions/prepare_venv - name: Install optimum-neuron uses: ./.github/actions/install_optimum_neuron - - name: Install vLLM + - name: Install vLLM and test prerequisites run: | source aws_neuron_venv_pytorch/bin/activate - pip install .[vllm] + pip install .[vllm,vllm-tests] - name: Export test models run: | source aws_neuron_venv_pytorch/bin/activate diff --git a/docker/vllm/Dockerfile b/docker/vllm/Dockerfile index d54dd3264..d15ccf8f6 100644 --- a/docker/vllm/Dockerfile +++ b/docker/vllm/Dockerfile @@ -25,10 +25,10 @@ RUN wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEU # Install neuronx packages RUN apt-get update -y \ && apt-get install -y --no-install-recommends \ - aws-neuronx-dkms=2.22.2.0 \ - aws-neuronx-collectives=2.26.43.0-47cc904ea \ - aws-neuronx-runtime-lib=2.26.42.0-2ff3b5c7d \ - aws-neuronx-tools=2.24.54.0 \ + aws-neuronx-dkms=2.24.7.0 \ + aws-neuronx-collectives=2.28.27.0-bc30ece58 \ + aws-neuronx-runtime-lib=2.28.23.0-dd5879008 \ + aws-neuronx-tools=2.26.14.0 \ && rm -rf /var/lib/apt/lists/* \ && apt-get clean @@ -40,8 +40,8 @@ RUN pip3 install \ # Install manually torch CPU version to avoid pulling CUDA RUN pip3 install \ - torch==2.7.1 \ - torchvision==0.22.1 \ + torch==2.8.0 \ + torchvision==0.23.0 \ --index-url https://download.pytorch.org/whl/cpu # Install optimum-neuron diff --git a/infrastructure/ami/hcl2-files/build.pkr.hcl b/infrastructure/ami/hcl2-files/build.pkr.hcl index da72dfe5f..60e2532b0 100644 --- a/infrastructure/ami/hcl2-files/build.pkr.hcl +++ b/infrastructure/ami/hcl2-files/build.pkr.hcl @@ -16,7 +16,7 @@ build { provisioner "shell" { inline = [ "echo 'export HF_HUB_ENABLE_HF_TRANSFER=1' | sudo tee -a /home/ubuntu/.bashrc", - "echo 'source /opt/aws_neuronx_venv_pytorch_2_7/bin/activate' | sudo tee -a /home/ubuntu/.bashrc" + "echo 'source /opt/aws_neuronx_venv_pytorch_2_8/bin/activate' | sudo tee -a /home/ubuntu/.bashrc" ] } provisioner "file" { diff --git a/infrastructure/ami/hcl2-files/variables.pkr.hcl b/infrastructure/ami/hcl2-files/variables.pkr.hcl index ef5c0e850..f557a0c97 100644 --- a/infrastructure/ami/hcl2-files/variables.pkr.hcl +++ b/infrastructure/ami/hcl2-files/variables.pkr.hcl @@ -10,7 +10,7 @@ variable "instance_type" { } variable "source_ami" { - default = "ami-0ffd183ece0ca0475" + default = "ami-0ec4ab14b1c5a10f2" description = "Base Image" type = string /* diff --git a/infrastructure/ami/scripts/install-huggingface-libraries.sh b/infrastructure/ami/scripts/install-huggingface-libraries.sh index b697b4509..51a525d8f 100644 --- a/infrastructure/ami/scripts/install-huggingface-libraries.sh +++ b/infrastructure/ami/scripts/install-huggingface-libraries.sh @@ -1,7 +1,7 @@ #!/bin/bash # Activate the neuron virtual environment -source /opt/aws_neuronx_venv_pytorch_2_7/bin/activate +source /opt/aws_neuronx_venv_pytorch_2_8/bin/activate echo "Step: install-hugging-face-libraries" diff --git a/infrastructure/ami/scripts/validate-neuron.sh b/infrastructure/ami/scripts/validate-neuron.sh index aa8fc7545..c7a362421 100644 --- a/infrastructure/ami/scripts/validate-neuron.sh +++ b/infrastructure/ami/scripts/validate-neuron.sh @@ -3,7 +3,7 @@ echo "Step: validate-neuron-devices" neuron-ls # Activate the neuron virtual environment -source /opt/aws_neuronx_venv_pytorch_2_7/bin/activate +source /opt/aws_neuronx_venv_pytorch_2_8/bin/activate python -c 'import torch' python -c 'import torch_neuronx' diff --git a/optimum/neuron/utils/import_utils.py b/optimum/neuron/utils/import_utils.py index 3c74ceb37..dccd340c0 100644 --- a/optimum/neuron/utils/import_utils.py +++ b/optimum/neuron/utils/import_utils.py @@ -35,58 +35,46 @@ def _get_package_version(package_name: str) -> str | None: return None +def is_package_available(package_name: str, min_version: str | None = None) -> bool: + package_version = _get_package_version(package_name) + if package_version is None: + return False + if min_version is None: + return True + return version.parse(package_version) >= version.parse(min_version) + + def is_neuron_available() -> bool: - return importlib.util.find_spec("torch_neuron") is not None + return is_package_available("torch_neuron") def is_neuronx_available() -> bool: - return importlib.util.find_spec("torch_neuronx") is not None + return is_package_available("torch_neuronx") def is_accelerate_available(min_version: str | None = MIN_ACCELERATE_VERSION) -> bool: - _accelerate_available = importlib.util.find_spec("accelerate") is not None - if min_version is not None: - if _accelerate_available: - import accelerate - - _accelerate_version = accelerate.__version__ - return version.parse(_accelerate_version) >= version.parse(min_version) - else: - return False - return _accelerate_available + return is_package_available("accelerate", min_version=min_version) def is_torch_neuronx_available() -> bool: - return importlib.util.find_spec("torch_neuronx") is not None + return is_package_available("torch_neuronx") def is_trl_available(required_version: str | None = None) -> bool: - trl_available = importlib.util.find_spec("trl") is not None - if trl_available: - import trl - - if required_version is None: - required_version = trl.__version__ - - if version.parse(trl.__version__) == version.parse(required_version): + trl_version = _get_package_version("trl") + if trl_version is None: + return False + if required_version is not None: + if version.parse(trl_version) == version.parse(required_version): return True - raise RuntimeError(f"Only `trl=={required_version}` is supported, but {trl.__version__} is installed.") - return False + raise RuntimeError(f"Only `trl=={required_version}` is supported, but {trl_version} is installed.") + return True def is_peft_available(min_version: str | None = MIN_PEFT_VERSION) -> bool: - _peft_available = importlib.util.find_spec("peft") is not None - if min_version is not None: - if _peft_available: - import peft - - _peft_version = peft.__version__ - return version.parse(_peft_version) >= version.parse(min_version) - else: - return False - return _peft_available + return is_package_available("peft", min_version=min_version) def is_vllm_available() -> bool: - return _get_package_version("vllm") is not None + return is_package_available("vllm") diff --git a/optimum/neuron/version.py b/optimum/neuron/version.py index e71c81100..6507c9f25 100644 --- a/optimum/neuron/version.py +++ b/optimum/neuron/version.py @@ -12,6 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "0.3.1.dev6" +__version__ = "0.3.1.dev7" -__sdk_version__ = "2.24.0" +__sdk_version__ = "2.26.0" diff --git a/optimum/neuron/vllm/platform.py b/optimum/neuron/vllm/platform.py index 9f32bd5ac..9db47c7a9 100644 --- a/optimum/neuron/vllm/platform.py +++ b/optimum/neuron/vllm/platform.py @@ -21,9 +21,12 @@ class OptimumNeuronPlatform(Platform): - _enum = PlatformEnum.NEURON + _enum = PlatformEnum.UNSPECIFIED device_name: str = "neuron" - device_type: str = "neuron" + # Device type is set to "cpu" to prevent vLLM from preemptively moving tensors + # to the XLA device and trigger spurious neuron runtime intializations. + # The CPU tensors will be moved when required to the XLA device by the neuron SDK. + device_type: str = "cpu" ray_device_key: str = "neuron_cores" device_control_env_var: str = "NEURON_RT_VISIBLE_CORES" diff --git a/optimum/neuron/vllm/worker.py b/optimum/neuron/vllm/worker.py index c695cd00a..beaeb8eb4 100644 --- a/optimum/neuron/vllm/worker.py +++ b/optimum/neuron/vllm/worker.py @@ -21,7 +21,6 @@ from vllm.lora.request import LoRARequest from vllm.model_executor import set_random_seed from vllm.sequence import ExecuteModelRequest -from vllm.worker.neuron_model_runner import NeuronModelRunner from vllm.worker.worker_base import LocalOrDistributedWorkerBase, WorkerBase, WorkerInput from .runner import OptimumNeuronModelRunner @@ -33,7 +32,7 @@ class OptimumNeuronWorker(LocalOrDistributedWorkerBase): """A worker class that executes the model on a group of neuron cores.""" - model_runner: NeuronModelRunner + model_runner: OptimumNeuronModelRunner def __init__( self, diff --git a/pyproject.toml b/pyproject.toml index 0b242f92e..0db101f36 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,7 +41,7 @@ dependencies = [ "accelerate == 1.8.1", "optimum ~= 1.24.0", "huggingface_hub >= 0.29.0", - "numpy>=1.22.2, <=1.25.2", + "numpy>=1.22.2, <=1.26.4", "protobuf>=3.20.3, <4", ] @@ -68,10 +68,6 @@ tests = [ "librosa", "controlnet-aux", "hf_transfer", - "torchcodec < 0.6.0", - "docker", - "pytest-asyncio", - "openai", ] quality = [ "pre-commit", @@ -82,7 +78,6 @@ training = [ "trl == 0.11.4", "peft == 0.17.0", "evaluate == 0.4.3", - "neuronx_distributed==0.13.14393", ] neuron = [ "wheel", @@ -95,25 +90,30 @@ neuron = [ ] neuronx = [ "wheel", - "neuronx-cc==2.19.8089.0", - "torch-neuronx==2.7.0.2.8.6734+ac864f72", - "torch==2.7.1.*", - "torchvision==0.22.*", - "neuronx_distributed==0.13.14393", - "libneuronxla==2.2.4410.0", + "neuronx-cc==2.21.18209.0", + "torch-neuronx==2.8.0.2.10.13553", + "torch==2.8.0.*", + "torchvision==0.23.*", + "neuronx_distributed==0.15.22404", + "libneuronxla==2.2.12677.0", ] diffusers = [ "diffusers==0.35.*", "peft==0.17.0", ] diffusers-tests = [ - "compel", + "compel==2.1.1", ] sentence-transformers = [ "sentence-transformers >= 2.2.0", ] vllm = [ - "vllm == 0.10.0", + "vllm == 0.10.2", +] +vllm-tests = [ + "docker", + "pytest-asyncio", + "openai", ] [project.scripts] diff --git a/tests/fixtures/llm/export_models.py b/tests/fixtures/llm/export_models.py index 40bdcd5dd..7ef0cfd75 100644 --- a/tests/fixtures/llm/export_models.py +++ b/tests/fixtures/llm/export_models.py @@ -7,7 +7,12 @@ import huggingface_hub import pytest -from transformers import AutoConfig, AutoTokenizer + +from optimum.neuron.utils.import_utils import is_package_available + + +if is_package_available("transformers"): + from transformers import AutoConfig, AutoTokenizer from optimum.neuron import NeuronModelForCausalLM from optimum.neuron.cache import synchronize_hub_cache diff --git a/tests/fixtures/llm/vllm_docker_service.py b/tests/fixtures/llm/vllm_docker_service.py index 131d3c26d..d727c3af5 100644 --- a/tests/fixtures/llm/vllm_docker_service.py +++ b/tests/fixtures/llm/vllm_docker_service.py @@ -10,9 +10,14 @@ import huggingface_hub import pytest import torch -from docker.errors import NotFound -import docker +from optimum.neuron.utils.import_utils import is_package_available + + +if is_package_available("docker"): + from docker.errors import NotFound + + import docker from .vllm_service import LauncherHandle diff --git a/tests/fixtures/llm/vllm_service.py b/tests/fixtures/llm/vllm_service.py index 22ded00d8..efdca7478 100644 --- a/tests/fixtures/llm/vllm_service.py +++ b/tests/fixtures/llm/vllm_service.py @@ -11,7 +11,16 @@ import huggingface_hub import pytest import torch -from openai import APIConnectionError, AsyncOpenAI + +from optimum.neuron.utils.import_utils import is_package_available + + +if is_package_available("openai"): + from openai import APIConnectionError, AsyncOpenAI +else: + + class AsyncOpenAI: + pass OPTIMUM_CACHE_REPO_ID = "optimum-internal-testing/neuron-testing-cache" diff --git a/tests/inference/diffusers/test_export_cli.py b/tests/inference/diffusers/test_export_cli.py index c2e3992d5..8d245fb3d 100644 --- a/tests/inference/diffusers/test_export_cli.py +++ b/tests/inference/diffusers/test_export_cli.py @@ -16,10 +16,12 @@ import tempfile import unittest +import pytest from optimum.utils import logging from optimum.exporters.neuron.model_configs import * # noqa: F403 from optimum.neuron.utils.testing_utils import is_inferentia_test, requires_neuronx +from optimum.neuron.version import __sdk_version__ as sdk_version logger = logging.get_logger(__name__) # pylint: disable=invalid-name @@ -87,6 +89,7 @@ def test_pixart(self): check=True, ) + @pytest.mark.skipif(sdk_version == "2.26.0", reason="This test hangs with SDK 2.26.0") @requires_neuronx def test_flux_tp2(self): model_ids = ["hf-internal-testing/tiny-flux-pipe-gated-silu"] diff --git a/tests/inference/diffusers/test_flux.py b/tests/inference/diffusers/test_flux.py index 82347c923..100a657cd 100644 --- a/tests/inference/diffusers/test_flux.py +++ b/tests/inference/diffusers/test_flux.py @@ -14,6 +14,7 @@ # limitations under the License. import PIL +import pytest import torch from diffusers.utils import load_image from optimum.utils.testing_utils import require_diffusers @@ -26,8 +27,10 @@ NeuronModelVaeEncoder, ) from optimum.neuron.utils.testing_utils import is_inferentia_test, requires_neuronx +from optimum.neuron.version import __sdk_version__ as sdk_version +@pytest.mark.skipif(sdk_version == "2.26.0", reason="This test hangs with SDK 2.26.0") @is_inferentia_test @requires_neuronx @require_diffusers @@ -47,6 +50,7 @@ def test_flux_txt2img(neuron_flux_tp2_path): assert isinstance(image, PIL.Image.Image) +@pytest.mark.skipif(sdk_version == "2.26.0", reason="This test hangs with SDK 2.26.0") @is_inferentia_test @requires_neuronx @require_diffusers @@ -68,6 +72,7 @@ def test_flux_inpaint(neuron_flux_tp2_path): assert isinstance(image, PIL.Image.Image) +@pytest.mark.skipif(sdk_version == "2.26.0", reason="This test hangs with SDK 2.26.0") @is_inferentia_test @requires_neuronx @require_diffusers diff --git a/tests/vllm/docker/test_vllm_docker_service_generate.py b/tests/vllm/docker/test_vllm_docker_service_generate.py index dcf04044b..e0e909929 100644 --- a/tests/vllm/docker/test_vllm_docker_service_generate.py +++ b/tests/vllm/docker/test_vllm_docker_service_generate.py @@ -1,5 +1,10 @@ import pytest + +# Do not collect tests from this file if docker or vllm are not installed +pytest.importorskip("docker") +pytest.importorskip("vllm") + from optimum.neuron.utils import DTYPE_MAPPER diff --git a/tests/vllm/service/test_vllm_service_generate.py b/tests/vllm/service/test_vllm_service_generate.py index 28546047a..c9f47cba8 100644 --- a/tests/vllm/service/test_vllm_service_generate.py +++ b/tests/vllm/service/test_vllm_service_generate.py @@ -1,5 +1,9 @@ import pytest + +# Do not collect tests from this file if vllm is not installed +pytest.importorskip("vllm") + from optimum.neuron.utils import DTYPE_MAPPER