Skip to content

Commit cf5b048

Browse files
committed
initial work towards having a standalone runner
1 parent 5caf5c1 commit cf5b048

File tree

8 files changed

+134
-5
lines changed

8 files changed

+134
-5
lines changed

examples/identity_py/submission.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#!POPCORN leaderboard identity_py
1+
#!POPCORN leaderboard identity_py-dev
22

33
from task import input_t, output_t
44

src/discord-cluster-manager/bot.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
POSTGRES_USER,
2727
init_environment,
2828
)
29-
from launchers import GitHubLauncher, ModalLauncher
29+
from launchers import GitHubLauncher, ModalLauncher, GenericLauncher
3030
from leaderboard_db import LeaderboardDB
3131
from utils import setup_logging
3232

@@ -80,6 +80,7 @@ async def setup_hook(self):
8080
submit_cog = SubmitCog(self)
8181
submit_cog.register_launcher(ModalLauncher(consts.MODAL_CUDA_INCLUDE_DIRS))
8282
submit_cog.register_launcher(GitHubLauncher(env.GITHUB_REPO, env.GITHUB_TOKEN))
83+
submit_cog.register_launcher(GenericLauncher("http://65.108.32.167:8000/run", token='TOKEN'))
8384
await self.add_cog(submit_cog)
8485
await self.add_cog(BotManagerCog(self))
8586
await self.add_cog(LeaderboardCog(self))

src/discord-cluster-manager/cogs/admin_cog.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
import discord
1111
import env
1212
import yaml
13-
from consts import GitHubGPU, ModalGPU
13+
from consts import GitHubGPU, ModalGPU, OtherGPU
1414
from discord import app_commands
1515
from discord.ext import commands, tasks
1616
from leaderboard_db import leaderboard_name_autocomplete
@@ -153,6 +153,7 @@ async def is_creator_check(
153153
@app_commands.choices(
154154
gpu=[app_commands.Choice(name=gpu.name, value=gpu.value) for gpu in GitHubGPU]
155155
+ [app_commands.Choice(name=gpu.name, value=gpu.value) for gpu in ModalGPU]
156+
+ [app_commands.Choice(name=gpu.name, value=gpu.value) for gpu in OtherGPU]
156157
)
157158
@with_error_handling
158159
async def leaderboard_create_local(

src/discord-cluster-manager/consts.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@ class ModalGPU(Enum):
3131
B200 = "B200"
3232

3333

34+
class OtherGPU(Enum):
35+
A6000 = "A6000"
36+
37+
3438
@dataclasses.dataclass
3539
class GPU:
3640
name: str
@@ -48,7 +52,7 @@ def _make_gpu_lookup(runner_map: dict[str, Type[Enum]]):
4852
return lookup
4953

5054

51-
_GPU_LOOKUP = _make_gpu_lookup({"Modal": ModalGPU, "GitHub": GitHubGPU})
55+
_GPU_LOOKUP = _make_gpu_lookup({"Modal": ModalGPU, "GitHub": GitHubGPU, "Generic": OtherGPU})
5256

5357

5458
def get_gpu_by_name(name: str) -> GPU:
@@ -114,6 +118,7 @@ class RankCriterion(Enum):
114118
"T4": "75",
115119
"L4": "80",
116120
"A100": "80",
121+
"A6000": "86",
117122
"H100": "90a",
118123
"B200": "100",
119124
"NVIDIA": None,
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from .github import GitHubLauncher
22
from .launcher import Launcher
33
from .modal import ModalLauncher
4-
4+
from .generic import GenericLauncher
55
__all__ = [Launcher, GitHubLauncher, ModalLauncher]
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
# Generic launcher POSTs to a specific URL
2+
import asyncio
3+
import datetime
4+
import json
5+
6+
import requests
7+
8+
from consts import GPU, OtherGPU
9+
from report import RunProgressReporter
10+
from run_eval import FullResult, CompileResult, RunResult, EvalResult, SystemInfo
11+
from utils import setup_logging, KernelBotError
12+
13+
from .launcher import Launcher
14+
15+
logger = setup_logging(__name__)
16+
17+
18+
class GenericLauncher(Launcher):
19+
def __init__(self, url: str, token: str):
20+
super().__init__("Generic", gpus=OtherGPU)
21+
self.url = url
22+
self.token = token
23+
24+
async def run_submission(
25+
self, config: dict, gpu_type: GPU, status: RunProgressReporter
26+
) -> FullResult:
27+
loop = asyncio.get_event_loop()
28+
logger.info(f"Calling {self.url}")
29+
30+
await status.push("⏳ Waiting for run to finish...")
31+
result = await loop.run_in_executor(
32+
None,
33+
lambda: requests.post(self.url, json={"config": config, "token": self.token})
34+
)
35+
36+
print(result.text)
37+
38+
await status.update("✅ Waiting for run to finish... Done")
39+
if result.status_code != 200:
40+
logger.error("Error running submission. Status code %d, Message: %s", result.status_code, result.text)
41+
raise KernelBotError(f"Error running submission. Status code {result.status_code}")
42+
43+
# TODO: this code is duplicated :(
44+
data = result.json()
45+
runs = {}
46+
# convert json back to EvalResult structures, which requires
47+
# special handling for datetime and our dataclasses.
48+
for k, v in data["runs"].items():
49+
if "compilation" in v and v["compilation"] is not None:
50+
comp = CompileResult(**v["compilation"])
51+
else:
52+
comp = None
53+
run = RunResult(**v["run"])
54+
res = EvalResult(
55+
start=datetime.datetime.fromisoformat(v["start"]),
56+
end=datetime.datetime.fromisoformat(v["end"]),
57+
compilation=comp,
58+
run=run,
59+
)
60+
runs[k] = res
61+
62+
system = SystemInfo(**data.get("system", {}))
63+
return FullResult(success=True, error="", runs=runs, system=system)
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
apt install python3-pip
2+
pip install uv --break-system-packages
3+
uv venv
4+
source .venv/bin/activate
5+
uv pip install -r requirements.txt
6+
uv pip install torch numpy
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
import asyncio
2+
import os
3+
from dataclasses import asdict
4+
from pydantic import BaseModel
5+
6+
import uvicorn
7+
8+
from run_eval import run_config
9+
10+
from fastapi import FastAPI, HTTPException
11+
12+
app = FastAPI()
13+
14+
15+
_serial_run = asyncio.Semaphore(1)
16+
_runner_token = None
17+
18+
19+
class RunRequest(BaseModel):
20+
config: dict
21+
token: str
22+
23+
24+
@app.post("/run")
25+
async def run(request: RunRequest) -> dict:
26+
# only one submission can run at any given time
27+
if request.token != _runner_token:
28+
raise HTTPException(status_code=401, detail="Invalid token")
29+
async with _serial_run:
30+
return asdict(run_config(request.config))
31+
32+
33+
async def run_server(port):
34+
config = uvicorn.Config(
35+
app,
36+
host="0.0.0.0",
37+
port=port,
38+
log_level="info",
39+
limit_concurrency=2,
40+
)
41+
server = uvicorn.Server(config)
42+
43+
# we need this as discord and fastapi both run on the same event loop
44+
await server.serve()
45+
46+
47+
def main():
48+
with asyncio.Runner() as runner:
49+
runner.run(run_server(port=int(os.environ.get("PORT") or 8000)))
50+
51+
52+
if __name__ == "__main__":
53+
main()

0 commit comments

Comments
 (0)