@@ -192,4 +192,72 @@ async def test_github_launcher_failing_script(project_root: Path, github_config:
192
192
test_passed = result .runs .get ("test" , {}).run .passed if "test" in result .runs else True
193
193
benchmark_passed = result .runs .get ("benchmark" , {}).run .passed if "benchmark" in result .runs else True
194
194
195
- assert not (test_passed and benchmark_passed ), "Expected at least one run to fail for cheating script"
195
+ assert not (test_passed and benchmark_passed ), "Expected at least one run to fail for cheating script"
196
+
197
+
198
+
199
+
200
+ @pytest .mark .integration
201
+ @pytest .mark .asyncio
202
+ @pytest .mark .parametrize ("gpu_type" , [GitHubGPU .MI300x8 ])
203
+ async def test_github_launcher_multi_gpu (project_root : Path , github_config : GitHubConfig , gpu_type : GitHubGPU ):
204
+ """
205
+ Test GitHubLauncher with a real Python script using real GitHub Actions.
206
+ Tests all GPU types to verify runners are working.
207
+ """
208
+ launcher = GitHubLauncher (repo = github_config .repo , token = github_config .token , branch = github_config .branch )
209
+ reporter = MockProgressReporter ("GitHub Integration Test" )
210
+
211
+ # Load the real identity_py task
212
+ task_path = project_root / "examples" / "gather"
213
+ if not task_path .exists ():
214
+ pytest .skip ("examples/gather not found - skipping GitHub integration test" )
215
+
216
+ task_definition = make_task_definition (task_path )
217
+ submission_content = (task_path / "submission.py" ).read_text ()
218
+
219
+ config = build_task_config (
220
+ task = task_definition .task ,
221
+ submission_content = submission_content ,
222
+ arch = 0 , # Not used for GitHub launcher
223
+ mode = SubmissionMode .TEST ,
224
+ )
225
+
226
+ result = await launcher .run_submission (config , gpu_type , reporter )
227
+
228
+ # Basic structure and success
229
+ assert result .success , f"Expected successful run, got: { result .error } "
230
+ assert result .error == ""
231
+ assert isinstance (result .runs , dict )
232
+
233
+ assert result .system .device_count == 8
234
+
235
+ # Test run structure
236
+ assert "test" in result .runs
237
+ test_run = result .runs ["test" ]
238
+
239
+ # For Python runs, compilation is None
240
+ assert test_run .compilation is None
241
+
242
+ # Run needs to succeed
243
+ assert test_run .run .success is True
244
+ assert test_run .run .passed is True
245
+ assert test_run .run .exit_code == 0
246
+ assert test_run .run .duration > 0
247
+
248
+ # Test results need to succeed
249
+ assert test_run .run .result ["check" ] == "pass"
250
+ test_count = int (test_run .run .result ["test-count" ])
251
+ assert test_count == 5
252
+ for i in range (test_count ):
253
+ assert test_run .run .result [f"test.{ i } .status" ] == "pass"
254
+ assert "size:" in test_run .run .result [f"test.{ i } .spec" ]
255
+ assert "seed:" in test_run .run .result [f"test.{ i } .spec" ]
256
+
257
+ # Sanity check for timings
258
+ assert test_run .start < test_run .end
259
+
260
+ # Check reporter messages
261
+ assert any ("Waiting for workflow" in msg for msg in reporter .messages )
262
+ assert any ("artifacts" in msg .lower () for msg in reporter .messages )
263
+ assert any ("completed" in update for update in reporter .updates )
0 commit comments