From 020bc628160a47a5b34702e6bfb7b706758b9ccc Mon Sep 17 00:00:00 2001 From: ngc92 <7938269+ngc92@users.noreply.github.com> Date: Fri, 22 Aug 2025 22:16:22 +0200 Subject: [PATCH 1/4] first attempt at github testing --- .github/workflows/testing.yml | 15 ++- tests/test_github.py | 195 ++++++++++++++++++++++++++++++++++ 2 files changed, 208 insertions(+), 2 deletions(-) create mode 100644 tests/test_github.py diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index 02c4a91c..8d167ba7 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -36,7 +36,7 @@ jobs: name: python-coverage-comment-action path: python-coverage-comment-action.txt - integration-tests: + integration-tests-modal: runs-on: ubuntu-latest timeout-minutes: 30 env: @@ -47,4 +47,15 @@ jobs: - uses: astral-sh/setup-uv@v4 - run: uv sync --extra dev - run: uv run modal token set --token-id ${MODAL_TOKEN_ID} --token-secret ${MODAL_TOKEN_SECRET} - - run: uv run pytest -m integration tests -v + - run: uv run pytest -m integration tests/test_modal.py -v + + integration-tests-github: + runs-on: ubuntu-latest + timeout-minutes: 45 + env: + GITHUB_TOKEN: ${{ secrets.GH_TOKEN }} + steps: + - uses: actions/checkout@v4 + - uses: astral-sh/setup-uv@v4 + - run: uv sync --extra dev + - run: uv run pytest -m integration tests/test_github.py -v diff --git a/tests/test_github.py b/tests/test_github.py new file mode 100644 index 00000000..90482690 --- /dev/null +++ b/tests/test_github.py @@ -0,0 +1,195 @@ +import os +import subprocess +from collections import namedtuple +from pathlib import Path + +import pytest +from dotenv import load_dotenv + +from libkernelbot.consts import GitHubGPU, SubmissionMode +from libkernelbot.launchers import GitHubLauncher +from libkernelbot.report import RunProgressReporter +from libkernelbot.task import build_task_config, make_task_definition +from libkernelbot.utils import get_github_branch_name + +# Named tuple for better readability +GitHubConfig = namedtuple('GitHubConfig', ['token', 'repo', 'branch']) + + +class MockProgressReporter(RunProgressReporter): + """Test progress reporter that captures messages.""" + + def __init__(self, title: str = "Test GitHub Run"): + super().__init__(title) + self.messages = [] + self.updates = [] + + async def push(self, message: str): + self.messages.append(message) + + async def update(self, message: str): + self.updates.append(message) + + +def get_github_repo(): + """Get GitHub repository from git remote.""" + try: + result = subprocess.run( + ["git", "remote", "get-url", "origin"], + capture_output=True, + text=True, + check=True, + ) + remote_url = result.stdout.strip() + + # Parse GitHub repo from remote URL + # Handle both SSH and HTTPS formats + if remote_url.startswith("git@github.com:"): + repo = remote_url.replace("git@github.com:", "").replace(".git", "") + elif remote_url.startswith("https://github.com/"): + repo = remote_url.replace("https://github.com/", "").replace(".git", "") + else: + return None + + return repo + except subprocess.CalledProcessError: + return None + + +@pytest.fixture(scope="session") +def github_config(): + """ + Get GitHub test configuration from environment or git. + Skips tests if required configuration is missing. + """ + # Load .env file if it exists + load_dotenv() + + token = os.getenv("GITHUB_TOKEN") + repo = os.getenv("GITHUB_REPO") or get_github_repo() + branch = os.getenv("GITHUB_BRANCH") or get_github_branch_name() + + if not token: + pytest.skip("GitHub integration tests require GITHUB_TOKEN environment variable") + + if not repo: + pytest.skip( + "GitHub integration tests require GITHUB_REPO environment variable " + "or a valid git remote origin" + ) + + return GitHubConfig(token=token, repo=repo, branch=branch) + + +@pytest.mark.integration +@pytest.mark.asyncio +@pytest.mark.parametrize("gpu_type", [GitHubGPU.NVIDIA, GitHubGPU.MI300, GitHubGPU.MI250]) +async def test_github_launcher_python_script(project_root: Path, github_config: GitHubConfig, gpu_type: GitHubGPU): + """ + Test GitHubLauncher with a real Python script using real GitHub Actions. + Tests all GPU types to verify runners are working. + """ + launcher = GitHubLauncher(repo=github_config.repo, token=github_config.token, branch=github_config.branch) + reporter = MockProgressReporter("GitHub Integration Test") + + # Load the real identity_py task + task_path = project_root / "examples" / "identity_py" + if not task_path.exists(): + pytest.skip("examples/identity_py not found - skipping GitHub integration test") + + task_definition = make_task_definition(task_path) + submission_content = (task_path / "submission.py").read_text() + + config = build_task_config( + task=task_definition.task, + submission_content=submission_content, + arch=0, # Not used for GitHub launcher + mode=SubmissionMode.TEST, + ) + + result = await launcher.run_submission(config, gpu_type, reporter) + + # Basic structure and success + assert result.success, f"Expected successful run, got: {result.error}" + assert result.error == "" + assert isinstance(result.runs, dict) + + # System info - test actual expected values based on GPU type + if gpu_type == GitHubGPU.NVIDIA: + assert "NVIDIA" in result.system.gpu or "GeForce" in result.system.gpu or "RTX" in result.system.gpu + else: # AMD GPUs + assert "MI" in result.system.gpu or "AMD" in result.system.gpu + + assert "Linux" in result.system.platform + + # Test run structure + assert "test" in result.runs + test_run = result.runs["test"] + + # For Python runs, compilation is None + assert test_run.compilation is None + + # Run needs to succeed + assert test_run.run.success is True + assert test_run.run.passed is True + assert test_run.run.exit_code == 0 + assert test_run.run.duration > 0 + + # Test results need to succeed + assert test_run.run.result["check"] == "pass" + test_count = int(test_run.run.result["test-count"]) + assert test_count == 5 + for i in range(test_count): + assert test_run.run.result[f"test.{i}.status"] == "pass" + assert "size:" in test_run.run.result[f"test.{i}.spec"] + assert "seed:" in test_run.run.result[f"test.{i}.spec"] + + # Sanity check for timings + assert test_run.start < test_run.end + + # Check reporter messages + assert any("Waiting for workflow" in msg for msg in reporter.messages) + assert any("artifacts" in msg.lower() for msg in reporter.messages) + assert any("completed" in update for update in reporter.updates) + + +@pytest.mark.integration +@pytest.mark.asyncio +async def test_github_launcher_failing_script(project_root: Path, github_config: GitHubConfig): + """ + Test GitHubLauncher with a script designed to fail. + Simple test to ensure we don't pass wrong submissions. + """ + launcher = GitHubLauncher(repo=github_config.repo, token=github_config.token, branch=github_config.branch) + reporter = MockProgressReporter("GitHub Failing Test") + gpu_type = GitHubGPU.NVIDIA # Use NVIDIA for simplicity + + # Load the real identity_py task + task_path = project_root / "examples" / "identity_py" + if not task_path.exists(): + pytest.skip("examples/identity_py not found - skipping GitHub integration test") + + task_definition = make_task_definition(task_path) + # Use one of the cheating scripts + submission_content = (task_path / "cheat-rng.py").read_text() + + # Set a specific seed for reproducible results + task_definition.task.seed = 653212 + config = build_task_config( + task=task_definition.task, + submission_content=submission_content, + arch=0, + mode=SubmissionMode.LEADERBOARD, + ) + + result = await launcher.run_submission(config, gpu_type, reporter) + + # Basic structure should still be successful (the workflow ran) + assert result.success, f"Expected successful workflow run, got: {result.error}" + assert result.error == "" + + # But the actual test or benchmark should fail + test_passed = result.runs.get("test", {}).run.passed if "test" in result.runs else True + benchmark_passed = result.runs.get("benchmark", {}).run.passed if "benchmark" in result.runs else True + + assert not (test_passed and benchmark_passed), "Expected at least one run to fail for cheating script" \ No newline at end of file From 8dceec8e979bbb678e176ef59b08dc6106b344af Mon Sep 17 00:00:00 2001 From: ngc92 <7938269+ngc92@users.noreply.github.com> Date: Tue, 26 Aug 2025 16:43:14 +0200 Subject: [PATCH 2/4] multi-gpu test --- tests/test_github.py | 70 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 69 insertions(+), 1 deletion(-) diff --git a/tests/test_github.py b/tests/test_github.py index 90482690..1770641a 100644 --- a/tests/test_github.py +++ b/tests/test_github.py @@ -192,4 +192,72 @@ async def test_github_launcher_failing_script(project_root: Path, github_config: test_passed = result.runs.get("test", {}).run.passed if "test" in result.runs else True benchmark_passed = result.runs.get("benchmark", {}).run.passed if "benchmark" in result.runs else True - assert not (test_passed and benchmark_passed), "Expected at least one run to fail for cheating script" \ No newline at end of file + assert not (test_passed and benchmark_passed), "Expected at least one run to fail for cheating script" + + + + +@pytest.mark.integration +@pytest.mark.asyncio +@pytest.mark.parametrize("gpu_type", [GitHubGPU.MI300x8]) +async def test_github_launcher_multi_gpu(project_root: Path, github_config: GitHubConfig, gpu_type: GitHubGPU): + """ + Test GitHubLauncher with a real Python script using real GitHub Actions. + Tests all GPU types to verify runners are working. + """ + launcher = GitHubLauncher(repo=github_config.repo, token=github_config.token, branch=github_config.branch) + reporter = MockProgressReporter("GitHub Integration Test") + + # Load the real identity_py task + task_path = project_root / "examples" / "gather" + if not task_path.exists(): + pytest.skip("examples/gather not found - skipping GitHub integration test") + + task_definition = make_task_definition(task_path) + submission_content = (task_path / "submission.py").read_text() + + config = build_task_config( + task=task_definition.task, + submission_content=submission_content, + arch=0, # Not used for GitHub launcher + mode=SubmissionMode.TEST, + ) + + result = await launcher.run_submission(config, gpu_type, reporter) + + # Basic structure and success + assert result.success, f"Expected successful run, got: {result.error}" + assert result.error == "" + assert isinstance(result.runs, dict) + + assert result.system.device_count == 8 + + # Test run structure + assert "test" in result.runs + test_run = result.runs["test"] + + # For Python runs, compilation is None + assert test_run.compilation is None + + # Run needs to succeed + assert test_run.run.success is True + assert test_run.run.passed is True + assert test_run.run.exit_code == 0 + assert test_run.run.duration > 0 + + # Test results need to succeed + assert test_run.run.result["check"] == "pass" + test_count = int(test_run.run.result["test-count"]) + assert test_count == 1 + for i in range(test_count): + assert test_run.run.result[f"test.{i}.status"] == "pass" + assert "size:" in test_run.run.result[f"test.{i}.spec"] + assert "seed:" in test_run.run.result[f"test.{i}.spec"] + + # Sanity check for timings + assert test_run.start < test_run.end + + # Check reporter messages + assert any("Waiting for workflow" in msg for msg in reporter.messages) + assert any("artifacts" in msg.lower() for msg in reporter.messages) + assert any("completed" in update for update in reporter.updates) From 819f3fd8ac43be40bcd775c2b55d6fc15d54b0bf Mon Sep 17 00:00:00 2001 From: ngc92 <7938269+ngc92@users.noreply.github.com> Date: Thu, 28 Aug 2025 10:47:11 +0200 Subject: [PATCH 3/4] validate repo lookup --- src/libkernelbot/launchers/github.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/libkernelbot/launchers/github.py b/src/libkernelbot/launchers/github.py index c748e0fe..1c58f0d0 100644 --- a/src/libkernelbot/launchers/github.py +++ b/src/libkernelbot/launchers/github.py @@ -32,7 +32,7 @@ RunResult, SystemInfo, ) -from libkernelbot.utils import setup_logging +from libkernelbot.utils import KernelBotError, setup_logging from .launcher import Launcher @@ -174,7 +174,10 @@ class GitHubArtifact: class GitHubRun: def __init__(self, repo: str, token: str, branch: str, workflow_file: str): gh = Github(token) - self.repo = gh.get_repo(repo) + try: + self.repo = gh.get_repo(repo) + except UnknownObjectException as e: + raise KernelBotError(f"Could not find GitHub repository {repo}: 404") from e self.token = token self.branch = branch self.workflow_file = workflow_file From 2438be15476bd83204bcdf20d413474750453988 Mon Sep 17 00:00:00 2001 From: ngc92 <7938269+ngc92@users.noreply.github.com> Date: Tue, 16 Sep 2025 11:27:31 +0200 Subject: [PATCH 4/4] removed MI250 from tests --- tests/test_github.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_github.py b/tests/test_github.py index 1770641a..327f75a3 100644 --- a/tests/test_github.py +++ b/tests/test_github.py @@ -83,7 +83,7 @@ def github_config(): @pytest.mark.integration @pytest.mark.asyncio -@pytest.mark.parametrize("gpu_type", [GitHubGPU.NVIDIA, GitHubGPU.MI300, GitHubGPU.MI250]) +@pytest.mark.parametrize("gpu_type", [GitHubGPU.NVIDIA, GitHubGPU.MI300]) async def test_github_launcher_python_script(project_root: Path, github_config: GitHubConfig, gpu_type: GitHubGPU): """ Test GitHubLauncher with a real Python script using real GitHub Actions.