Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions .github/workflows/testing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ jobs:
name: python-coverage-comment-action
path: python-coverage-comment-action.txt

integration-tests:
integration-tests-modal:
runs-on: ubuntu-latest
timeout-minutes: 30
env:
Expand All @@ -47,4 +47,15 @@ jobs:
- uses: astral-sh/setup-uv@v4
- run: uv sync --extra dev
- run: uv run modal token set --token-id ${MODAL_TOKEN_ID} --token-secret ${MODAL_TOKEN_SECRET}
- run: uv run pytest -m integration tests -v
- run: uv run pytest -m integration tests/test_modal.py -v

integration-tests-github:
runs-on: ubuntu-latest
timeout-minutes: 45
env:
GITHUB_TOKEN: ${{ secrets.GH_TOKEN }}
steps:
- uses: actions/checkout@v4
- uses: astral-sh/setup-uv@v4
- run: uv sync --extra dev
- run: uv run pytest -m integration tests/test_github.py -v
7 changes: 5 additions & 2 deletions src/libkernelbot/launchers/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
RunResult,
SystemInfo,
)
from libkernelbot.utils import setup_logging
from libkernelbot.utils import KernelBotError, setup_logging

from .launcher import Launcher

Expand Down Expand Up @@ -174,7 +174,10 @@ class GitHubArtifact:
class GitHubRun:
def __init__(self, repo: str, token: str, branch: str, workflow_file: str):
gh = Github(token)
self.repo = gh.get_repo(repo)
try:
self.repo = gh.get_repo(repo)
except UnknownObjectException as e:
raise KernelBotError(f"Could not find GitHub repository {repo}: 404") from e
self.token = token
self.branch = branch
self.workflow_file = workflow_file
Expand Down
263 changes: 263 additions & 0 deletions tests/test_github.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,263 @@
import os
import subprocess
from collections import namedtuple
from pathlib import Path

import pytest
from dotenv import load_dotenv

from libkernelbot.consts import GitHubGPU, SubmissionMode
from libkernelbot.launchers import GitHubLauncher
from libkernelbot.report import RunProgressReporter
from libkernelbot.task import build_task_config, make_task_definition
from libkernelbot.utils import get_github_branch_name

# Named tuple for better readability
GitHubConfig = namedtuple('GitHubConfig', ['token', 'repo', 'branch'])


class MockProgressReporter(RunProgressReporter):
"""Test progress reporter that captures messages."""

def __init__(self, title: str = "Test GitHub Run"):
super().__init__(title)
self.messages = []
self.updates = []

async def push(self, message: str):
self.messages.append(message)

async def update(self, message: str):
self.updates.append(message)


def get_github_repo():
"""Get GitHub repository from git remote."""
try:
result = subprocess.run(
["git", "remote", "get-url", "origin"],
capture_output=True,
text=True,
check=True,
)
remote_url = result.stdout.strip()

# Parse GitHub repo from remote URL
# Handle both SSH and HTTPS formats
if remote_url.startswith("[email protected]:"):
repo = remote_url.replace("[email protected]:", "").replace(".git", "")
elif remote_url.startswith("https://github.com/"):
repo = remote_url.replace("https://github.com/", "").replace(".git", "")
else:
return None

return repo
except subprocess.CalledProcessError:
return None


@pytest.fixture(scope="session")
def github_config():
"""
Get GitHub test configuration from environment or git.
Skips tests if required configuration is missing.
"""
# Load .env file if it exists
load_dotenv()

token = os.getenv("GITHUB_TOKEN")
repo = os.getenv("GITHUB_REPO") or get_github_repo()
branch = os.getenv("GITHUB_BRANCH") or get_github_branch_name()

if not token:
pytest.skip("GitHub integration tests require GITHUB_TOKEN environment variable")

if not repo:
pytest.skip(
"GitHub integration tests require GITHUB_REPO environment variable "
"or a valid git remote origin"
)

return GitHubConfig(token=token, repo=repo, branch=branch)


@pytest.mark.integration
@pytest.mark.asyncio
@pytest.mark.parametrize("gpu_type", [GitHubGPU.NVIDIA, GitHubGPU.MI300])
async def test_github_launcher_python_script(project_root: Path, github_config: GitHubConfig, gpu_type: GitHubGPU):
"""
Test GitHubLauncher with a real Python script using real GitHub Actions.
Tests all GPU types to verify runners are working.
"""
launcher = GitHubLauncher(repo=github_config.repo, token=github_config.token, branch=github_config.branch)
reporter = MockProgressReporter("GitHub Integration Test")

# Load the real identity_py task
task_path = project_root / "examples" / "identity_py"
if not task_path.exists():
pytest.skip("examples/identity_py not found - skipping GitHub integration test")

task_definition = make_task_definition(task_path)
submission_content = (task_path / "submission.py").read_text()

config = build_task_config(
task=task_definition.task,
submission_content=submission_content,
arch=0, # Not used for GitHub launcher
mode=SubmissionMode.TEST,
)

result = await launcher.run_submission(config, gpu_type, reporter)

# Basic structure and success
assert result.success, f"Expected successful run, got: {result.error}"
assert result.error == ""
assert isinstance(result.runs, dict)

# System info - test actual expected values based on GPU type
if gpu_type == GitHubGPU.NVIDIA:
assert "NVIDIA" in result.system.gpu or "GeForce" in result.system.gpu or "RTX" in result.system.gpu
else: # AMD GPUs
assert "MI" in result.system.gpu or "AMD" in result.system.gpu

assert "Linux" in result.system.platform

# Test run structure
assert "test" in result.runs
test_run = result.runs["test"]

# For Python runs, compilation is None
assert test_run.compilation is None

# Run needs to succeed
assert test_run.run.success is True
assert test_run.run.passed is True
assert test_run.run.exit_code == 0
assert test_run.run.duration > 0

# Test results need to succeed
assert test_run.run.result["check"] == "pass"
test_count = int(test_run.run.result["test-count"])
assert test_count == 5
for i in range(test_count):
assert test_run.run.result[f"test.{i}.status"] == "pass"
assert "size:" in test_run.run.result[f"test.{i}.spec"]
assert "seed:" in test_run.run.result[f"test.{i}.spec"]

# Sanity check for timings
assert test_run.start < test_run.end

# Check reporter messages
assert any("Waiting for workflow" in msg for msg in reporter.messages)
assert any("artifacts" in msg.lower() for msg in reporter.messages)
assert any("completed" in update for update in reporter.updates)


@pytest.mark.integration
@pytest.mark.asyncio
async def test_github_launcher_failing_script(project_root: Path, github_config: GitHubConfig):
"""
Test GitHubLauncher with a script designed to fail.
Simple test to ensure we don't pass wrong submissions.
"""
launcher = GitHubLauncher(repo=github_config.repo, token=github_config.token, branch=github_config.branch)
reporter = MockProgressReporter("GitHub Failing Test")
gpu_type = GitHubGPU.NVIDIA # Use NVIDIA for simplicity

# Load the real identity_py task
task_path = project_root / "examples" / "identity_py"
if not task_path.exists():
pytest.skip("examples/identity_py not found - skipping GitHub integration test")

task_definition = make_task_definition(task_path)
# Use one of the cheating scripts
submission_content = (task_path / "cheat-rng.py").read_text()

# Set a specific seed for reproducible results
task_definition.task.seed = 653212
config = build_task_config(
task=task_definition.task,
submission_content=submission_content,
arch=0,
mode=SubmissionMode.LEADERBOARD,
)

result = await launcher.run_submission(config, gpu_type, reporter)

# Basic structure should still be successful (the workflow ran)
assert result.success, f"Expected successful workflow run, got: {result.error}"
assert result.error == ""

# But the actual test or benchmark should fail
test_passed = result.runs.get("test", {}).run.passed if "test" in result.runs else True
benchmark_passed = result.runs.get("benchmark", {}).run.passed if "benchmark" in result.runs else True

assert not (test_passed and benchmark_passed), "Expected at least one run to fail for cheating script"




@pytest.mark.integration
@pytest.mark.asyncio
@pytest.mark.parametrize("gpu_type", [GitHubGPU.MI300x8])
async def test_github_launcher_multi_gpu(project_root: Path, github_config: GitHubConfig, gpu_type: GitHubGPU):
"""
Test GitHubLauncher with a real Python script using real GitHub Actions.
Tests all GPU types to verify runners are working.
"""
launcher = GitHubLauncher(repo=github_config.repo, token=github_config.token, branch=github_config.branch)
reporter = MockProgressReporter("GitHub Integration Test")

# Load the real identity_py task
task_path = project_root / "examples" / "gather"
if not task_path.exists():
pytest.skip("examples/gather not found - skipping GitHub integration test")

task_definition = make_task_definition(task_path)
submission_content = (task_path / "submission.py").read_text()

config = build_task_config(
task=task_definition.task,
submission_content=submission_content,
arch=0, # Not used for GitHub launcher
mode=SubmissionMode.TEST,
)

result = await launcher.run_submission(config, gpu_type, reporter)

# Basic structure and success
assert result.success, f"Expected successful run, got: {result.error}"
assert result.error == ""
assert isinstance(result.runs, dict)

assert result.system.device_count == 8

# Test run structure
assert "test" in result.runs
test_run = result.runs["test"]

# For Python runs, compilation is None
assert test_run.compilation is None

# Run needs to succeed
assert test_run.run.success is True
assert test_run.run.passed is True
assert test_run.run.exit_code == 0
assert test_run.run.duration > 0

# Test results need to succeed
assert test_run.run.result["check"] == "pass"
test_count = int(test_run.run.result["test-count"])
assert test_count == 1
for i in range(test_count):
assert test_run.run.result[f"test.{i}.status"] == "pass"
assert "size:" in test_run.run.result[f"test.{i}.spec"]
assert "seed:" in test_run.run.result[f"test.{i}.spec"]

# Sanity check for timings
assert test_run.start < test_run.end

# Check reporter messages
assert any("Waiting for workflow" in msg for msg in reporter.messages)
assert any("artifacts" in msg.lower() for msg in reporter.messages)
assert any("completed" in update for update in reporter.updates)
Loading