Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions .github/workflows/integration_test_8gpu_compiler_toolkit.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
name: Compiler Toolkit 8 GPU Integration Tests

on:
push:
branches: [ main ]
paths:
- 'torchtitan/experiments/compiler_toolkit/**'
- '.github/workflows/integration_test_8gpu_compiler_toolkit.yaml'
pull_request:
paths:
- 'torchtitan/experiments/compiler_toolkit/**'
- '.github/workflows/integration_test_8gpu_compiler_toolkit.yaml'
schedule:
# Runs every 12 hours
- cron: '0 */12 * * *'

concurrency:
group: unit-test${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_number || github.ref }}
cancel-in-progress: true

defaults:
run:
shell: bash -l -eo pipefail {0}

jobs:
build-test:
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
with:
runner: linux.g5.48xlarge.nvidia.gpu
gpu-arch-type: cuda
gpu-arch-version: "12.6"
# This image is faster to clone than the default, but it lacks CC needed by triton
# (1m25s vs 2m37s).
docker-image: torchtitan-ubuntu-20.04-clang12
repository: pytorch/torchtitan
upload-artifact: outputs
script: |
set -eux

# The generic Linux job chooses to use base env, not the one setup by the image
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
conda activate "${CONDA_ENV}"

# Log CUDA driver version for debugging.
DRIVER_VERSION=$(nvidia-smi --query-gpu=driver_version --format=csv,noheader | head -n 1 || true)
echo "CUDA driver version: ${DRIVER_VERSION}"

pip config --user set global.progress_bar off

python -m pip install --force-reinstall --pre torch --index-url https://download.pytorch.org/whl/nightly/cu126

mkdir artifacts-to-be-uploaded
python -m torchtitan.experiments.compiler_toolkit.tests.integration_tests artifacts-to-be-uploaded --ngpu 4
2 changes: 1 addition & 1 deletion torchtitan/experiments/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,4 @@ We provide this `experiments/` folder to host experiments that add significant v
| [torchcomms](./torchcomms/) | TBA | [@d4l3k](https://https://github.com/d4l3k) [@fduwjj](https://github.com/fduwjj) [@mori360 ](https://github.com/mori360) |
| [moe_symm_mem_kernels](./moe_symm_mem_kernels/) | TBA | [@kwen2501](https://github.com/kwen2501) |
| [gpt_oss](./gpt_oss/) | TBA | [@jianiw](https://github.com/jianiw) |
| [compiler_toolkit](./compiler_tookit/) | TBA | [@SherlockNoMad](https://github.com/SherlockNoMad) [@yiming0416](https://github.com/yiming0416) |
| [compiler_toolkit](./compiler_tookit/) | [![Compiler Toolkit 8 GPU Integration Tests](https://github.com/pytorch/torchtitan/actions/workflows/integration_test_8gpu_compiler_toolkit.yaml/badge.svg?branch=main)](https://github.com/pytorch/torchtitan/actions/workflows/integration_test_8gpu_compiler_toolkit.yaml?query=branch%3Amain) | [@SherlockNoMad](https://github.com/SherlockNoMad) [@yiming0416](https://github.com/yiming0416) |
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import torch

from torch.fx.traceback import annotate_fn

from torchtitan.config import JobConfig
from torchtitan.distributed import ParallelDims
from torchtitan.experiments.compiler_toolkit.common_utils import (
Expand Down
2 changes: 1 addition & 1 deletion torchtitan/experiments/compiler_toolkit/graph_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ def joint_graph_builder(
model_kwargs: Dict of model input keyword arguments
fw_compiler: Optional custom forward compiler function
bw_compiler: Optional custom backward compiler function
validation_fn: Optional function to validate the joint graph
joint_custom_pass: Optional custom pass to run on the joint graph
"""
assert isinstance(model_args, tuple)
for arg in model_args:
Expand Down
5 changes: 5 additions & 0 deletions torchtitan/experiments/compiler_toolkit/tests/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
115 changes: 115 additions & 0 deletions torchtitan/experiments/compiler_toolkit/tests/integration_tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

import argparse
import os

from tests.integration_tests import OverrideDefinitions
from tests.integration_tests.run_tests import run_tests


def build_compiler_toolkit_test_list() -> list[OverrideDefinitions]:
"""
returns a list of OverrideDefinitions that is used to generate
variations of integration tests based on the same root config file.
"""
integration_tests_flavors = [
# llama3 tests
OverrideDefinitions(
[
[
"--model.name compiler_toolkit.llama3",
"--parallelism.data_parallel_shard_degree 2",
"--parallelism.tensor_parallel_degree 2",
"--activation_checkpoint.mode none",
],
],
"llama3 FSDP+TP",
"llama3_fsdp_tp",
ngpu=4,
),
OverrideDefinitions(
[
[
"--model.name compiler_toolkit.llama3",
"--parallelism.data_parallel_shard_degree 2",
"--parallelism.tensor_parallel_degree 2",
"--model.flavor debugmodel_flex_attn",
"--activation_checkpoint.mode none",
],
],
"llama3 FSDP+TP+FlexAttn",
"llama3_fsdp_tp_flexattn",
ngpu=4,
),
# deepseek_v3 tests
OverrideDefinitions(
[
[
"--model.name compiler_toolkit.deepseek_v3",
"--parallelism.data_parallel_shard_degree 2",
"--parallelism.tensor_parallel_degree 2",
"--parallelism.expert_parallel_degree 4",
"--parallelism.expert_tensor_parallel_degree 1",
"--activation_checkpoint.mode none",
],
],
"deepseek_v3 FSDP+TP+EP",
"deepseekv3_fsdp_tp_ep",
ngpu=4,
),
OverrideDefinitions(
[
[
"--model.name compiler_toolkit.deepseek_v3",
"--parallelism.data_parallel_shard_degree 2",
"--parallelism.tensor_parallel_degree 2",
"--parallelism.expert_parallel_degree 4",
"--parallelism.expert_tensor_parallel_degree 1",
"--activation_checkpoint.mode none",
"--model.flavor debugmodel_flex_attn",
],
],
"deepseek_v3 FSDP+TP+EP+FlexAttention",
"deepseekv3_fsdp_tp_ep_flexattention",
ngpu=4,
),
]
return integration_tests_flavors


_TEST_SUITES_FUNCTION = {
"compiler_toolkit": build_compiler_toolkit_test_list,
}


def main():
parser = argparse.ArgumentParser()
parser.add_argument("output_dir")
parser.add_argument(
"--config_path",
default="./tests/integration_tests/base_config.toml",
help="Base config path for integration tests. This is the config that will be used as a base for all tests.",
)
parser.add_argument(
"--test_name",
default="all",
help="test to run, acceptable values: `test_name` in `build_test_list` (default: all)",
)
parser.add_argument("--ngpu", default=8, type=int)
args = parser.parse_args()

if not os.path.exists(args.output_dir):
os.makedirs(args.output_dir)
if os.listdir(args.output_dir):
raise RuntimeError("Please provide an empty output directory.")

test_list = _TEST_SUITES_FUNCTION["compiler_toolkit"]()
run_tests(args, test_list)


if __name__ == "__main__":
main()