Skip to content

Commit dccd1fc

Browse files
committed
multi-gpu test
1 parent 5403246 commit dccd1fc

File tree

1 file changed

+69
-1
lines changed

1 file changed

+69
-1
lines changed

tests/test_github.py

Lines changed: 69 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,4 +192,72 @@ async def test_github_launcher_failing_script(project_root: Path, github_config:
192192
test_passed = result.runs.get("test", {}).run.passed if "test" in result.runs else True
193193
benchmark_passed = result.runs.get("benchmark", {}).run.passed if "benchmark" in result.runs else True
194194

195-
assert not (test_passed and benchmark_passed), "Expected at least one run to fail for cheating script"
195+
assert not (test_passed and benchmark_passed), "Expected at least one run to fail for cheating script"
196+
197+
198+
199+
200+
@pytest.mark.integration
201+
@pytest.mark.asyncio
202+
@pytest.mark.parametrize("gpu_type", [GitHubGPU.MI300x8])
203+
async def test_github_launcher_multi_gpu(project_root: Path, github_config: GitHubConfig, gpu_type: GitHubGPU):
204+
"""
205+
Test GitHubLauncher with a real Python script using real GitHub Actions.
206+
Tests all GPU types to verify runners are working.
207+
"""
208+
launcher = GitHubLauncher(repo=github_config.repo, token=github_config.token, branch=github_config.branch)
209+
reporter = MockProgressReporter("GitHub Integration Test")
210+
211+
# Load the real identity_py task
212+
task_path = project_root / "examples" / "gather"
213+
if not task_path.exists():
214+
pytest.skip("examples/gather not found - skipping GitHub integration test")
215+
216+
task_definition = make_task_definition(task_path)
217+
submission_content = (task_path / "submission.py").read_text()
218+
219+
config = build_task_config(
220+
task=task_definition.task,
221+
submission_content=submission_content,
222+
arch=0, # Not used for GitHub launcher
223+
mode=SubmissionMode.TEST,
224+
)
225+
226+
result = await launcher.run_submission(config, gpu_type, reporter)
227+
228+
# Basic structure and success
229+
assert result.success, f"Expected successful run, got: {result.error}"
230+
assert result.error == ""
231+
assert isinstance(result.runs, dict)
232+
233+
assert result.system.device_count == 8
234+
235+
# Test run structure
236+
assert "test" in result.runs
237+
test_run = result.runs["test"]
238+
239+
# For Python runs, compilation is None
240+
assert test_run.compilation is None
241+
242+
# Run needs to succeed
243+
assert test_run.run.success is True
244+
assert test_run.run.passed is True
245+
assert test_run.run.exit_code == 0
246+
assert test_run.run.duration > 0
247+
248+
# Test results need to succeed
249+
assert test_run.run.result["check"] == "pass"
250+
test_count = int(test_run.run.result["test-count"])
251+
assert test_count == 5
252+
for i in range(test_count):
253+
assert test_run.run.result[f"test.{i}.status"] == "pass"
254+
assert "size:" in test_run.run.result[f"test.{i}.spec"]
255+
assert "seed:" in test_run.run.result[f"test.{i}.spec"]
256+
257+
# Sanity check for timings
258+
assert test_run.start < test_run.end
259+
260+
# Check reporter messages
261+
assert any("Waiting for workflow" in msg for msg in reporter.messages)
262+
assert any("artifacts" in msg.lower() for msg in reporter.messages)
263+
assert any("completed" in update for update in reporter.updates)

0 commit comments

Comments
 (0)