diff --git a/.github/workflows/nvidia_workflow.yml b/.github/workflows/nvidia_workflow.yml index b1f46949..2e52374c 100644 --- a/.github/workflows/nvidia_workflow.yml +++ b/.github/workflows/nvidia_workflow.yml @@ -28,6 +28,10 @@ jobs: - name: Create input files shell: bash run: | + + # install jq + apt update && apt install -y jq + # Extract the payload content without printing it apt-get update && apt-get install -y jq PAYLOAD=$(jq -r '.inputs.payload' $GITHUB_EVENT_PATH) diff --git a/examples/eval.py b/examples/eval.py index e414a580..7bec7f87 100644 --- a/examples/eval.py +++ b/examples/eval.py @@ -345,7 +345,7 @@ def main(): if mode == "benchmark": return run_benchmarking(logger, pool, tests) - if mode == "leaderboard": + if mode == "leaderboard" or mode == "milestone": # warmup run_single_benchmark(pool, tests[0], False, 100, 1e7) logger.log("benchmark-count", len(tests)) diff --git a/examples/matmul_py/pytorch_ref.py b/examples/matmul_py/pytorch_ref.py new file mode 100644 index 00000000..3d92d2a8 --- /dev/null +++ b/examples/matmul_py/pytorch_ref.py @@ -0,0 +1,8 @@ +#!POPCORN leaderboard matmul_py + +from task import input_t, output_t + + +def custom_kernel(data: input_t) -> output_t: + a, b = data + return a @ b diff --git a/examples/matmul_py/reference.py b/examples/matmul_py/reference.py index 76da5c6a..569ac27e 100644 --- a/examples/matmul_py/reference.py +++ b/examples/matmul_py/reference.py @@ -20,7 +20,7 @@ def check_implementation(data: input_t, output: output_t) -> str: reasons = verbose_allclose(output, expected) if len(reasons) > 0: # TODO better processing of reasons - return "mismatch found! custom implementation doesn't match reference.: " + reasons[0] + return False, "mismatch found! custom implementation doesn't match reference.: " + reasons[0] - return '' + return True, '' diff --git a/examples/matmul_py/task.yml b/examples/matmul_py/task.yml index 6bb74d69..46a65f2c 100644 --- a/examples/matmul_py/task.yml +++ b/examples/matmul_py/task.yml @@ -6,6 +6,20 @@ files: - {"name": "utils.py", "source": "../utils.py"} - {"name": "reference.py", "source": "reference.py"} - {"name": "eval.py", "source": "../eval.py"} + - {"name": "pytorch_ref.py", "source": "pytorch_ref.py"} + - {"name": "torch_mm_ref.py", "source": "torch_mm_ref.py"} + +milestones: + - { + "milestone_name": "pytorch reference", + "filename": "pytorch_ref.py", + "description": "PyTorch reference implementation as a performance baseline for matmul" + } + - { + "milestone_name": "torch.mm reference", + "filename": "torch_mm_ref.py", + "description": "torch.mm reference implementation as a performance baseline for matmul" + } lang: "py" diff --git a/examples/matmul_py/torch_mm_ref.py b/examples/matmul_py/torch_mm_ref.py new file mode 100644 index 00000000..0761b4e4 --- /dev/null +++ b/examples/matmul_py/torch_mm_ref.py @@ -0,0 +1,9 @@ +#!POPCORN leaderboard matmul_py + +from task import input_t, output_t +import torch + + +def custom_kernel(data: input_t) -> output_t: + a, b = data + return torch.mm(a, b) diff --git a/src/discord-cluster-manager/api/main.py b/src/discord-cluster-manager/api/main.py index b0b2d34f..9315279f 100644 --- a/src/discord-cluster-manager/api/main.py +++ b/src/discord-cluster-manager/api/main.py @@ -356,7 +356,11 @@ async def run_submission( # noqa: C901 all_leaderboards = [lb["name"] for lb in db.get_leaderboards()] if leaderboard_name not in all_leaderboards: raise HTTPException( - status_code=404, detail=f"Leaderboard '{leaderboard_name}' not found." + status_code=404, + detail=( + f"Leaderboard '{leaderboard_name}' not found " + "when trying to run submission." + ) ) else: raise HTTPException( diff --git a/src/discord-cluster-manager/api/utils.py b/src/discord-cluster-manager/api/utils.py index c40965a0..569430a6 100644 --- a/src/discord-cluster-manager/api/utils.py +++ b/src/discord-cluster-manager/api/utils.py @@ -155,7 +155,7 @@ async def _run_submission( submission: SubmissionRequest, user_info: dict, mode: SubmissionMode, bot ): try: - req = prepare_submission(submission, bot.leaderboard_db) + req = prepare_submission(submission, bot.leaderboard_db, mode) except Exception as e: raise HTTPException(status_code=400, detail=str(e)) from e diff --git a/src/discord-cluster-manager/cogs/admin_cog.py b/src/discord-cluster-manager/cogs/admin_cog.py index 8e39ee2f..753d7d54 100644 --- a/src/discord-cluster-manager/cogs/admin_cog.py +++ b/src/discord-cluster-manager/cogs/admin_cog.py @@ -1,3 +1,4 @@ +import asyncio import json import subprocess import tempfile @@ -10,16 +11,18 @@ import discord import env import yaml -from consts import GitHubGPU, ModalGPU +from consts import GitHubGPU, ModalGPU, SubmissionMode, get_system_user_name from discord import app_commands from discord.ext import commands, tasks from leaderboard_db import leaderboard_name_autocomplete +from submission import lookup_leaderboard from task import LeaderboardTask, make_task from ui.misc import ConfirmationView, DeleteConfirmationModal, GPUSelectionView from utils import ( KernelBotError, LeaderboardItem, SubmissionItem, + format_time, send_discord_message, setup_logging, with_error_handling, @@ -120,6 +123,18 @@ def __init__(self, bot: "ClusterBot"): name="set-forum-ids", description="Sets forum IDs" )(self.set_forum_ids) + self.submit_milestones = bot.admin_group.command( + name="submit-milestones", description="Start a milestone run to get milestone results" + )(self.submit_milestones) + + self.list_milestones = bot.admin_group.command( + name="list-milestones", description="List all milestones for a leaderboard" + )(self.list_milestones) + + self.milestone_results = bot.admin_group.command( + name="milestone-results", description="Show results for a milestone" + )(self.milestone_results) + self._scheduled_cleanup_temp_users.start() # -------------------------------------------------------------------------- @@ -366,7 +381,149 @@ async def create_leaderboard_in_db( ephemeral=True, ) return False - return True + + # Check if the task has milestones and automatically submit them + if hasattr(task, 'milestones') and task.milestones: + try: + await send_discord_message( + interaction, + f"🚀 Leaderboard `{leaderboard_name}` created successfully! " + f"Auto-submitting {len(task.milestones)} milestone(s)...", + ephemeral=True, + ) + + # Call the underlying milestone submission logic directly + await self._submit_milestones_directly(leaderboard_name, task, selected_gpus) + + await send_discord_message( + interaction, + f"✅ Milestone submissions completed for `{leaderboard_name}`!", + ephemeral=True, + ) + except Exception as e: + logger.exception("Error auto-submitting milestones for new leaderboard", exc_info=e) + await send_discord_message( + interaction, + f"⚠️ Leaderboard `{leaderboard_name}` created but milestone " + f"auto-submission failed: {str(e)}", + ephemeral=True, + ) + + return True + + async def _submit_milestones_directly( + self, + leaderboard_name: str, + task: LeaderboardTask, + selected_gpus: list[str] + ): + """Directly submit milestones without going through Discord command layer""" + from consts import SYSTEM_USER_ID, SubmissionMode, get_gpu_by_name + from report import RunProgressReporterAPI + from submission import SubmissionRequest, prepare_submission + + # Ensure system user exists in database + with self.bot.leaderboard_db as db: + db.cursor.execute( + "SELECT 1 FROM leaderboard.user_info WHERE id = %s", + (str(SYSTEM_USER_ID),), + ) + if not db.cursor.fetchone(): + user_name, user_id = get_system_user_name() + db.cursor.execute( + "INSERT INTO leaderboard.user_info (id, user_name) VALUES (%s, %s)", + (str(user_id), user_name), + ) + db.connection.commit() + + # Prepare submission request for milestones + req = SubmissionRequest( + code="", # Not used for milestones + file_name="performance milestone", + user_id=SYSTEM_USER_ID, + gpus=selected_gpus, + leaderboard=leaderboard_name, + ) + + # Prepare the submission (validates leaderboard, deadline, etc.) + processed_req = prepare_submission(req, self.bot.leaderboard_db, SubmissionMode.MILESTONE) + + # Convert GPU strings to GPU objects + gpu_objects = [get_gpu_by_name(gpu) for gpu in selected_gpus] + + # Sync milestones to database + leaderboard_item = lookup_leaderboard(leaderboard_name, self.bot.leaderboard_db) + with self.bot.leaderboard_db as db: + existing_milestones = db.get_leaderboard_milestones(leaderboard_item["id"]) + existing_names = {m["milestone_name"] for m in existing_milestones} + + # Create any new milestones in the database + for milestone in task.milestones: + if milestone["milestone_name"] not in existing_names: + db.create_milestone( + leaderboard_item["id"], + milestone["milestone_name"], + milestone["filename"], + description=milestone.get( + "description", + f"Milestone for {milestone['filename']}" + ) + ) + + # Get submit cog for the submission runner + submit_cog = self.bot.get_cog("SubmitCog") + if not submit_cog: + raise Exception("SubmitCog not available") + + # Create separate submission for each milestone + submission_ids = [] + tasks = [] + + for milestone in task.milestones: + milestone_filename = milestone["filename"] + milestone_code = task.files[milestone_filename] + milestone_name = milestone["milestone_name"] + + # Create separate submission entry for each milestone + with self.bot.leaderboard_db as db: + user_name, user_id = get_system_user_name(milestone_name) + sub_id = db.create_submission( + leaderboard=leaderboard_name, + file_name=milestone_filename, + code=milestone_code, + user_id=user_id, + time=datetime.now(), + user_name=user_name, + ) + submission_ids.append(sub_id) + + # Create tasks for this milestone on all selected GPUs + for gpu in gpu_objects: + # Create a background reporter for this submission + reporter = RunProgressReporterAPI( + f"Milestone {milestone['milestone_name']} on {gpu.name}" + ) + + tasks.append( + submit_cog.submit_leaderboard( + sub_id, + milestone_code, + milestone_filename, + gpu, + reporter, + processed_req.task, + SubmissionMode.MILESTONE, + None, + ) + ) + + # Execute all milestone submissions + await asyncio.gather(*tasks) + + # Mark all submissions as done + with self.bot.leaderboard_db as db: + for sub_id in submission_ids: + db.mark_submission_done(sub_id) @discord.app_commands.describe(leaderboard_name="Name of the leaderboard") @discord.app_commands.autocomplete(leaderboard_name=leaderboard_name_autocomplete) @@ -1025,3 +1182,187 @@ async def set_forum_ids(self, interaction: discord.Interaction): error_message = f"Error updating forum ids: {str(e)}" logger.error(error_message, exc_info=True) await send_discord_message(interaction, error_message, ephemeral=True) + + @app_commands.describe( + leaderboard_name="Name of Leaderboard", + gpu="Select GPU. Leave empty for interactive or automatic selection.", + ) + @app_commands.autocomplete(leaderboard_name=leaderboard_name_autocomplete) + @with_error_handling + async def submit_milestones( + self, + interaction: discord.Interaction, + leaderboard_name: Optional[str], + gpu: Optional[str], + ): + if not await self.admin_check(interaction): + await send_discord_message( + interaction, + "You do not have permission to submit milestones.", + ephemeral=True + ) + return + + # Get the submit cog to access the submission logic + submit_cog = self.bot.get_cog("SubmitCog") + if not submit_cog: + await send_discord_message( + interaction, + "Submission system is not available.", + ephemeral=True + ) + return + + # Get the submit group from the leaderboard cog + submit_group = None + for command in self.bot.leaderboard_group.commands: + if hasattr(command, 'name') and command.name == "submit": + submit_group = command + break + + if not submit_group: + await send_discord_message( + interaction, + "Submission system is not available.", + ephemeral=True + ) + return + + return await submit_group.submit( + interaction, leaderboard_name, None, mode=SubmissionMode.MILESTONE, gpu=gpu + ) + + @app_commands.describe(leaderboard_name="Name of the leaderboard") + @app_commands.autocomplete(leaderboard_name=leaderboard_name_autocomplete) + @with_error_handling + async def list_milestones( + self, + interaction: discord.Interaction, + leaderboard_name: str, + ): + if not await self.admin_check(interaction): + await send_discord_message( + interaction, + "You need to have Admin permissions to run this command", + ephemeral=True, + ) + return + + leaderboard = lookup_leaderboard(leaderboard_name, self.bot.leaderboard_db) + with self.bot.leaderboard_db as db: + milestones = db.get_leaderboard_milestones(leaderboard["id"]) + + if not milestones: + await interaction.response.send_message(f"No milestones found for {leaderboard_name}") + return + + message = f"**Milestones for {leaderboard_name}:**\n" + for milestone in milestones: + message += ( + f"• {milestone['milestone_name']} " + f"({milestone['filename']}) - " + f"{milestone['description']}\n" + ) + + await interaction.response.send_message(message) + + async def _format_milestone_runs(self, runs: list[dict], max_runs: int = 5) -> str: + """Format milestone runs into a string message.""" + message = "" + for i, run in enumerate(runs[:max_runs], 1): + score = format_time(float(run['score']) * 1e9) if run['score'] else "N/A" + status = '✅' if run['passed'] else '❌' + message += ( + f" {i}. {run['user_name']} - {score} {status} " + f"(#{run['submission_id']})\n" + ) + + if len(runs) > max_runs: + message += f" _... and {len(runs) - max_runs} more runs_\n" + + return message + + def _format_milestone_section( + self, milestone: dict, runs: list[dict] + ) -> str: + """Format a single milestone section with its runs.""" + section = ( + f"📍 **{milestone['milestone_name']}** " + f"({milestone['filename']}) | {milestone['description']}\n" + ) + + if not runs: + section += " _No runs found_\n\n" + return section + + section += self._format_milestone_runs(runs) + section += "\n" + return section + + def _create_milestone_messages( + self, leaderboard_name: str, milestones: list[dict] + ) -> list[str]: + """Create a list of messages for all milestones, splitting if needed.""" + messages = [] + current_message = f"**All Milestone Results for {leaderboard_name}:**\n\n" + + for milestone in milestones: + with self.bot.leaderboard_db as db: + runs = db.get_milestone_runs(milestone["id"]) + runs.sort(key=lambda x: x['submission_time'], reverse=True) + + milestone_section = self._format_milestone_section(milestone, runs) + + if len(current_message) + len(milestone_section) > 1900: + messages.append(current_message) + current_message = milestone_section + else: + current_message += milestone_section + + if current_message.strip(): + messages.append(current_message) + + return messages + + @app_commands.describe(leaderboard_name="Name of the leaderboard") + @app_commands.autocomplete(leaderboard_name=leaderboard_name_autocomplete) + @with_error_handling + async def milestone_results( + self, + interaction: discord.Interaction, + leaderboard_name: str, + ): + if not await self.admin_check(interaction): + await send_discord_message( + interaction, + "You need to have Admin permissions to run this command", + ephemeral=True, + ) + return + + leaderboard = lookup_leaderboard(leaderboard_name, self.bot.leaderboard_db) + with self.bot.leaderboard_db as db: + milestones = db.get_leaderboard_milestones(leaderboard["id"]) + + if not milestones: + await interaction.response.send_message( + f"No milestones found for {leaderboard_name}" + ) + return + + # Create a single message if it fits within Discord's limit + message = f"**All Milestone Results for {leaderboard_name}:**\n\n" + for milestone in milestones: + with self.bot.leaderboard_db as db: + runs = db.get_milestone_runs(milestone["id"]) + message += self._format_milestone_section(milestone, runs) + + if len(message) <= 2000: + await interaction.response.send_message(message) + return + + # If message is too long, split it into multiple messages + messages = self._create_milestone_messages(leaderboard_name, milestones) + await interaction.response.send_message(messages[0]) + for msg in messages[1:]: + await interaction.followup.send(msg) diff --git a/src/discord-cluster-manager/cogs/leaderboard_cog.py b/src/discord-cluster-manager/cogs/leaderboard_cog.py index 3598b0c6..22aab3c8 100644 --- a/src/discord-cluster-manager/cogs/leaderboard_cog.py +++ b/src/discord-cluster-manager/cogs/leaderboard_cog.py @@ -5,14 +5,16 @@ import discord from consts import ( + SYSTEM_USER_ID, SubmissionMode, get_gpu_by_name, + get_system_user_name, ) from discord import app_commands from discord.ext import commands from leaderboard_db import leaderboard_name_autocomplete from report import MultiProgressReporter -from submission import SubmissionRequest, prepare_submission +from submission import SubmissionRequest, lookup_leaderboard, prepare_submission from ui.misc import GPUSelectionView from ui.table import create_table from utils import ( @@ -38,6 +40,7 @@ def __init__(self, bot: "ClusterBot"): super().__init__(name="submit", description="Submit to leaderboard") self.bot = bot + async def select_gpu_view( self, interaction: discord.Interaction, @@ -63,32 +66,39 @@ async def on_submit_hook( # noqa: C901 self, interaction: discord.Interaction, leaderboard_name: Optional[str], - script: discord.Attachment, + script: Optional[discord.Attachment], mode: SubmissionMode, cmd_gpus: Optional[List[str]], ) -> int: """ Called as the main body of a submission to route to the correct runner. """ - # Read the template file - submission_content = await script.read() - - try: - submission_content = submission_content.decode() - except UnicodeError: - await send_discord_message( - interaction, "Could not decode your file. Is it UTF-8?", ephemeral=True - ) - return -1 + # Read the template file + submission_content = "" + if mode != SubmissionMode.MILESTONE: + # for milestones we don't have a submission file and instead use the ones in the task + submission_content = await script.read() + try: + submission_content = submission_content.decode() + except UnicodeError: + await send_discord_message( + interaction, "Could not decode your file. Is it UTF-8?", ephemeral=True + ) + return -1 + filename = ( + script.filename + if not mode == SubmissionMode.MILESTONE + else "performance milestone" + ) req = SubmissionRequest( code=submission_content, - file_name=script.filename, + file_name=filename, user_id=interaction.user.id, gpus=cmd_gpus, leaderboard=leaderboard_name, ) - req = prepare_submission(req, self.bot.leaderboard_db) + req = prepare_submission(req, self.bot.leaderboard_db, mode) # if there is more than one candidate GPU, display UI to let user select, # otherwise just run on that GPU @@ -105,58 +115,192 @@ async def on_submit_hook( # noqa: C901 command = self.bot.get_cog("SubmitCog").submit_leaderboard - user_name = interaction.user.global_name or interaction.user.name + # For milestone submissions, use consistent system user + if mode == SubmissionMode.MILESTONE: + # Get the milestone name from the task + leaderboard = lookup_leaderboard(leaderboard_name, self.bot.leaderboard_db) + milestone_name = None + if leaderboard["task"].milestones: + milestone_name = leaderboard["task"].milestones[0]["milestone_name"] + user_name, user_id = get_system_user_name(milestone_name) + else: + user_id = interaction.user.id + user_name = interaction.user.global_name or interaction.user.name + + run_msg = ( + f"Milestone submissions for `{req.leaderboard}`" + if mode == SubmissionMode.MILESTONE + else f"Submission: `{filename}` for `{req.leaderboard}`" + ) + reporter = MultiProgressReporter(interaction, run_msg) + + try: + if mode == SubmissionMode.MILESTONE: + submission_ids = await self._handle_milestone_submissions( + req, user_id, user_name, selected_gpus, reporter, command + ) + return submission_ids + else: + sub_id = await self._handle_regular_submission( + req, submission_content, filename, user_id, user_name, + selected_gpus, reporter, command, mode + ) + + if mode == SubmissionMode.LEADERBOARD: + await self.post_submit_hook(interaction, sub_id) + return [sub_id] + finally: + # Mark all submissions as done + if mode == SubmissionMode.MILESTONE: + # submission_ids is a list for milestones + if 'submission_ids' in locals(): + with self.bot.leaderboard_db as db: + for sub_id in submission_ids: + db.mark_submission_done(sub_id) + else: + # sub_id is a single ID for regular submissions + if 'sub_id' in locals(): + with self.bot.leaderboard_db as db: + db.mark_submission_done(sub_id) + + async def _handle_milestone_submissions( + self, req, user_id, user_name, selected_gpus, reporter, command + ): + """Handle milestone submissions with separate submission IDs for each milestone""" + milestones = req.task.milestones + files = req.task.files + + # Ensure system user exists in database for milestone submissions + with self.bot.leaderboard_db as db: + # Check if system user exists + db.cursor.execute( + """ + SELECT 1 FROM leaderboard.user_info WHERE id = %s + """, + (str(SYSTEM_USER_ID),), + ) + if not db.cursor.fetchone(): + # Create system user + db.cursor.execute( + """ + INSERT INTO leaderboard.user_info (id, user_name) + VALUES (%s, %s) + """, + (str(SYSTEM_USER_ID), get_system_user_name(None)), + ) + db.connection.commit() + + # Sync milestones to database + leaderboard_item = lookup_leaderboard(req.leaderboard, self.bot.leaderboard_db) + with self.bot.leaderboard_db as db: + existing_milestones = db.get_leaderboard_milestones(leaderboard_item["id"]) + existing_names = {m["milestone_name"] for m in existing_milestones} + + # Create any new milestones in the database + for milestone in milestones: + if milestone["milestone_name"] not in existing_names: + db.create_milestone( + leaderboard_item["id"], + milestone["milestone_name"], + milestone["filename"], + description=milestone.get( + "description", + f"Milestone for {milestone['filename']}" + ) + ) + + # Create separate submission for each milestone + submission_ids = [] + tasks = [] + + for milestone in milestones: + milestone_filename = milestone["filename"] + milestone_code = files[milestone_filename] + milestone_name = milestone["milestone_name"] + + # Create separate submission entry for each milestone + with self.bot.leaderboard_db as db: + user_name, user_id = get_system_user_name(milestone_name) + sub_id = db.create_submission( + leaderboard=req.leaderboard, + file_name=milestone_filename, + code=milestone_code, + user_id=user_id, + time=datetime.now(), + user_name=user_name, + ) + submission_ids.append(sub_id) + + # Create tasks for this milestone on all selected GPUs + for gpu in selected_gpus: + tasks.append( + command( + sub_id, + milestone_code, + milestone_filename, + gpu, + reporter.add_run( + f"{gpu.name} on {gpu.runner} for milestone " + f"{milestone_name} (#{sub_id})" + ), + req.task, + SubmissionMode.MILESTONE, + None, + ) + ) + + await reporter.show() + await asyncio.gather(*tasks) + return submission_ids + + async def _handle_regular_submission( + self, req, submission_content, filename, user_id, user_name, + selected_gpus, reporter, command, mode + ): + """Handle regular submissions with a single submission ID""" # Create a submission entry in the database with self.bot.leaderboard_db as db: sub_id = db.create_submission( leaderboard=req.leaderboard, - file_name=script.filename, + file_name=filename, code=submission_content, - user_id=interaction.user.id, + user_id=user_id, time=datetime.now(), user_name=user_name, ) - run_msg = f"Submission **{sub_id}**: `{script.filename}` for `{req.leaderboard}`" - reporter = MultiProgressReporter(interaction, run_msg) - try: - tasks = [ + tasks = [ + command( + sub_id, + submission_content, + filename, + gpu, + reporter.add_run(f"{gpu.name} on {gpu.runner}"), + req.task, + mode, + None, + ) + for gpu in selected_gpus + ] + + # Add secret run for leaderboard submissions + if mode == SubmissionMode.LEADERBOARD: + tasks += [ command( sub_id, submission_content, - script.filename, + filename, gpu, - reporter.add_run(f"{gpu.name} on {gpu.runner}"), + reporter.add_run(f"{gpu.name} on {gpu.runner} (secret)"), req.task, - mode, - None, + SubmissionMode.PRIVATE, + req.secret_seed, ) for gpu in selected_gpus ] - # also schedule secret run - if mode == SubmissionMode.LEADERBOARD: - tasks += [ - command( - sub_id, - submission_content, - script.filename, - gpu, - reporter.add_run(f"{gpu.name} on {gpu.runner} (secret)"), - req.task, - SubmissionMode.PRIVATE, - req.secret_seed, - ) - for gpu in selected_gpus - ] - await reporter.show() - await asyncio.gather(*tasks) - finally: - with self.bot.leaderboard_db as db: - db.mark_submission_done(sub_id) - - if mode == SubmissionMode.LEADERBOARD: - await self.post_submit_hook(interaction, sub_id) + await reporter.show() + await asyncio.gather(*tasks) return sub_id def generate_run_verdict(self, run: RunItem, sub_data: SubmissionItem): @@ -224,10 +368,18 @@ async def submit( self, interaction: discord.Interaction, leaderboard_name: Optional[str], - script: discord.Attachment, + script: Optional[discord.Attachment], mode: SubmissionMode, gpu: Optional[str], ): + + if mode != SubmissionMode.MILESTONE and script is None: + await interaction.response.send_message( + "Script is required for non-milestone submissions.", + ephemeral=True, + ) + return + if not self.bot.accepts_jobs: await send_discord_message( interaction, @@ -236,8 +388,10 @@ async def submit( ) return - if gpu is not None: + if gpu is not None and gpu.strip(): gpu = [gpu.strip() for gpu in gpu.split(",")] + else: + gpu = None return await self.on_submit_hook(interaction, leaderboard_name, script, mode, gpu) @@ -279,6 +433,8 @@ async def submit_bench( interaction, leaderboard_name, script, mode=SubmissionMode.BENCHMARK, gpu=gpu ) + + @app_commands.command(name="profile", description="Start a profiling run") @app_commands.describe( leaderboard_name="Name of the competition / kernel to optimize", diff --git a/src/discord-cluster-manager/cogs/submit_cog.py b/src/discord-cluster-manager/cogs/submit_cog.py index 0657641f..484c4e76 100644 --- a/src/discord-cluster-manager/cogs/submit_cog.py +++ b/src/discord-cluster-manager/cogs/submit_cog.py @@ -103,13 +103,24 @@ async def submit_leaderboard( # noqa: C901 if result.success: score = None + # Calculate score for both leaderboard and milestone runs + score_run_key = None if ( "leaderboard" in result.runs and result.runs["leaderboard"].run.success and result.runs["leaderboard"].run.passed ): + score_run_key = "leaderboard" + elif ( + "milestone" in result.runs + and result.runs["milestone"].run.success + and result.runs["milestone"].run.passed + ): + score_run_key = "milestone" + + if score_run_key: score = 0.0 - num_benchmarks = int(result.runs["leaderboard"].run.result["benchmark-count"]) + num_benchmarks = int(result.runs[score_run_key].run.result["benchmark-count"]) if task.ranking_by == RankCriterion.LAST: if num_benchmarks != 1: logger.error( @@ -122,12 +133,12 @@ async def submit_leaderboard( # noqa: C901 f"Expected submission to have exactly one benchmark," f"got {num_benchmarks}." ) - score = float(result.runs["leaderboard"].run.result["benchmark.0.mean"]) / 1e9 + score = float(result.runs[score_run_key].run.result["benchmark.0.mean"]) / 1e9 else: scores = [] for i in range(num_benchmarks): scores.append( - float(result.runs["leaderboard"].run.result[f"benchmark.{i}.mean"]) + float(result.runs[score_run_key].run.result[f"benchmark.{i}.mean"]) / 1e9 ) if task.ranking_by == RankCriterion.MEAN: @@ -139,19 +150,49 @@ async def submit_leaderboard( # noqa: C901 if submission_id != -1: with self.bot.leaderboard_db as db: for key, value in result.runs.items(): - db.create_submission_run( + # Assign score for leaderboard and milestone runs + run_score = None + if ( + key == "leaderboard" + or (key == "milestone" and mode == SubmissionMode.MILESTONE) + ): + run_score = score + + run_id = db.create_submission_run( submission_id, value.start, value.end, mode=key, runner=gpu_type.name, - score=None if key != "leaderboard" else score, + score=run_score, secret=mode == SubmissionMode.PRIVATE, compilation=value.compilation, result=value.run, system=result.system, ) + # If this is a milestone submission, record the milestone run + if mode == SubmissionMode.MILESTONE and run_id: + # Get submission data to find the leaderboard + submission_data = db.get_submission_by_id(submission_id) + if submission_data: + leaderboard = db.get_leaderboard( + submission_data["leaderboard_name"] + ) + if leaderboard: + # Find the milestone ID based on the filename + milestones = db.get_leaderboard_milestones(leaderboard["id"]) + milestone = next( + (m for m in milestones if m["filename"] == name), + None + ) + if milestone: + db.record_milestone_run( + milestone["id"], + submission_id, + run_id + ) + return result @with_error_handling diff --git a/src/discord-cluster-manager/consts.py b/src/discord-cluster-manager/consts.py index efce3934..ef8d33d4 100644 --- a/src/discord-cluster-manager/consts.py +++ b/src/discord-cluster-manager/consts.py @@ -1,4 +1,5 @@ import dataclasses +import hashlib from enum import Enum, IntEnum from typing import Type @@ -97,6 +98,7 @@ class SubmissionMode(Enum): LEADERBOARD = "leaderboard" PRIVATE = "private" SCRIPT = "script" + MILESTONE = "milestone" class Language(Enum): @@ -152,6 +154,18 @@ class RankCriterion(Enum): --index-url https://download.pytorch.org/whl/rocm6.2.4 torch """ +SYSTEM_USER_ID = -123 + +def get_milestone_user_id(milestone_name: str | None = None) -> int: + if not milestone_name: + return SYSTEM_USER_ID + # Generate a consistent negative ID between -1000 and -100 based on the milestone name + hash_value = int(hashlib.md5(milestone_name.encode()).hexdigest(), 16) + return -100 - (hash_value % 900) # This ensures ID is between -1000 and -100 + +def get_system_user_name(milestone_name: str | None = None) -> tuple[str, int]: + if milestone_name: + return f"KernelBot - {milestone_name}", get_milestone_user_id(milestone_name) + return "KernelBot", SYSTEM_USER_ID -# A buffer for timeouts to account for github setup time TIMEOUT_BUFFER_MINUTES = 2 diff --git a/src/discord-cluster-manager/leaderboard_db.py b/src/discord-cluster-manager/leaderboard_db.py index 173fd5b8..27b7609a 100644 --- a/src/discord-cluster-manager/leaderboard_db.py +++ b/src/discord-cluster-manager/leaderboard_db.py @@ -173,6 +173,35 @@ def update_leaderboard(self, name, deadline, task): def delete_leaderboard(self, leaderboard_name: str, force: bool = False): try: + # Get leaderboard ID first + self.cursor.execute( + "SELECT id FROM leaderboard.leaderboard WHERE name = %s", + (leaderboard_name,), + ) + result = self.cursor.fetchone() + if not result: + # if there is no leaderboard, there is nothing to do + return + leaderboard_id = result[0] + + # Delete milestone runs first (they reference milestones) + self.cursor.execute( + """ + DELETE FROM leaderboard.milestone_runs + WHERE milestone_id IN ( + SELECT id FROM leaderboard.milestones + WHERE leaderboard_id = %s + ) + """, + (leaderboard_id,), + ) + + # Delete milestones (they reference the leaderboard) + self.cursor.execute( + "DELETE FROM leaderboard.milestones WHERE leaderboard_id = %s", + (leaderboard_id,), + ) + if force: self.cursor.execute( """ @@ -211,6 +240,114 @@ def delete_leaderboard(self, leaderboard_name: str, force: bool = False): logger.exception("Could not delete leaderboard %s.", leaderboard_name, exc_info=e) raise KernelBotError(f"Could not delete leaderboard {leaderboard_name}.") from e + def create_milestone( + self, + leaderboard_id: int, + milestone_name: str, + filename: str, + description: str = None, + ) -> int: + """Create a new milestone for a leaderboard""" + try: + self.cursor.execute( + """ + INSERT INTO leaderboard.milestones ( + leaderboard_id, milestone_name, filename, description + ) + VALUES (%s, %s, %s, %s) + RETURNING id + """, + (leaderboard_id, milestone_name, filename, description), + ) + milestone_id = self.cursor.fetchone()[0] + self.connection.commit() + return milestone_id + except psycopg2.Error as e: + self.connection.rollback() + logger.exception("Error creating milestone", exc_info=e) + raise KernelBotError("Error creating milestone") from e + + def get_leaderboard_milestones(self, leaderboard_id: int) -> list[dict]: + """Get all milestones for a leaderboard""" + self.cursor.execute( + """ + SELECT id, milestone_name, filename, description, created_at + FROM leaderboard.milestones + WHERE leaderboard_id = %s + ORDER BY created_at + """, + (leaderboard_id,), + ) + return [ + { + "id": row[0], + "milestone_name": row[1], + "filename": row[2], + "description": row[3], + "created_at": row[4], + } + for row in self.cursor.fetchall() + ] + + def record_milestone_run( + self, + milestone_id: int, + submission_id: int, + run_id: int, + ) -> None: + """Record that a milestone was run as part of a submission""" + try: + self.cursor.execute( + """ + INSERT INTO leaderboard.milestone_runs (milestone_id, submission_id, run_id) + VALUES (%s, %s, %s) + """, + (milestone_id, submission_id, run_id), + ) + self.connection.commit() + except psycopg2.Error as e: + self.connection.rollback() + logger.exception("Error recording milestone run", exc_info=e) + raise KernelBotError("Error recording milestone run") from e + + def get_milestone_runs(self, milestone_id: int) -> list[dict]: + """Get all runs for a specific milestone""" + self.cursor.execute( + """ + SELECT + mr.id, + mr.submission_id, + mr.run_id, + s.user_id, + s.submission_time, + r.score, + r.passed, + r.runner, + ui.user_name + FROM leaderboard.milestone_runs mr + JOIN leaderboard.submission s ON mr.submission_id = s.id + JOIN leaderboard.runs r ON mr.run_id = r.id + JOIN leaderboard.user_info ui ON s.user_id = ui.id + WHERE mr.milestone_id = %s + ORDER BY r.score ASC NULLS LAST, s.submission_time DESC + """, + (milestone_id,), + ) + return [ + { + "id": row[0], + "submission_id": row[1], + "run_id": row[2], + "user_id": row[3], + "user_name": row[8], + "submission_time": row[4], + "score": row[5], + "passed": row[6], + "runner": row[7], + } + for row in self.cursor.fetchall() + ] + def create_submission( self, leaderboard: str, @@ -340,6 +477,7 @@ def create_submission_run( secret, runner, score, passed, compilation, meta, result, system_info ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) + RETURNING id """, ( submission, @@ -356,7 +494,9 @@ def create_submission_run( json.dumps(dataclasses.asdict(system)), ), ) + run_id = self.cursor.fetchone()[0] self.connection.commit() + return run_id except psycopg2.Error as e: logger.exception( "Error during adding %s run on %s for submission '%s'", diff --git a/src/discord-cluster-manager/migrations/20250605_01_hwite-add-milestone-table.py b/src/discord-cluster-manager/migrations/20250605_01_hwite-add-milestone-table.py new file mode 100644 index 00000000..1368bd50 --- /dev/null +++ b/src/discord-cluster-manager/migrations/20250605_01_hwite-add-milestone-table.py @@ -0,0 +1,33 @@ +""" +Add milestone table for better milestone tracking +""" + +from yoyo import step + +__depends__ = {"20250506_01_38PkG-add-index-on-runs-runner-score"} # Update to latest migration + +steps = [ + step(""" + CREATE TABLE IF NOT EXISTS leaderboard.milestones ( + id SERIAL PRIMARY KEY, + leaderboard_id INTEGER NOT NULL REFERENCES leaderboard.leaderboard(id), + milestone_name TEXT NOT NULL, + filename TEXT NOT NULL, + description TEXT, + created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(), + UNIQUE(leaderboard_id, milestone_name) + ) + """), + step("CREATE INDEX ON leaderboard.milestones (leaderboard_id)"), + step(""" + CREATE TABLE IF NOT EXISTS leaderboard.milestone_runs ( + id SERIAL PRIMARY KEY, + milestone_id INTEGER NOT NULL REFERENCES leaderboard.milestones(id), + submission_id INTEGER NOT NULL REFERENCES leaderboard.submission(id), + run_id INTEGER NOT NULL REFERENCES leaderboard.runs(id), + created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW() + ) + """), + step("CREATE INDEX ON leaderboard.milestone_runs (milestone_id)"), + step("CREATE INDEX ON leaderboard.milestone_runs (submission_id)"), +] diff --git a/src/discord-cluster-manager/report.py b/src/discord-cluster-manager/report.py index 4e09c2c5..f626aa08 100644 --- a/src/discord-cluster-manager/report.py +++ b/src/discord-cluster-manager/report.py @@ -218,6 +218,16 @@ def make_short_report(runs: dict[str, EvalResult], full=True) -> list[str]: # n result.append("✅ Leaderboard run successful") elif full: result.append("❌ Leaderboard missing") + + if "milestone" in runs: + ms_run = runs["milestone"].run + if not ms_run.success: + result.append("❌ Running milestone failed" + _short_fail_reason(ms_run)) + elif not ms_run.passed: + result.append("❌ Milestone run failed") + else: + result.append("✅ Milestone run successful") + return result diff --git a/src/discord-cluster-manager/run_eval.py b/src/discord-cluster-manager/run_eval.py index 5e7ab046..4a5d8d41 100644 --- a/src/discord-cluster-manager/run_eval.py +++ b/src/discord-cluster-manager/run_eval.py @@ -296,7 +296,7 @@ def run_single_evaluation( tests_file.write(tests) tests_file.flush() return run_program(call + [mode, tests_file.name], seed=seed, timeout=test_timeout) - elif mode in ["benchmark", "profile", "leaderboard"]: + elif mode in ["benchmark", "profile", "leaderboard", "milestone"]: timeout = ranked_timeout if mode == "leaderboard" else benchmark_timeout with tempfile.NamedTemporaryFile("w") as bench_file: if ranking_by == "last": @@ -511,7 +511,7 @@ def run_evaluation( require multiple runner calls. """ results: dict[str, EvalResult] = {} - if mode in ["test", "benchmark", "profile", "script"]: + if mode in ["test", "benchmark", "profile", "script", "milestone"]: results[mode] = call(mode=mode) elif mode in ["private", "leaderboard"]: # first, run the tests @@ -528,7 +528,7 @@ def run_evaluation( # if they pass, run the leaderboard validation results["leaderboard"] = call(mode="leaderboard") else: - raise AssertionError("Invalid mode") + raise AssertionError(f"Invalid mode: {mode}") return results diff --git a/src/discord-cluster-manager/submission.py b/src/discord-cluster-manager/submission.py index 2777b15f..6845c6dc 100644 --- a/src/discord-cluster-manager/submission.py +++ b/src/discord-cluster-manager/submission.py @@ -6,7 +6,7 @@ from better_profanity import profanity from leaderboard_db import LeaderboardDB from task import LeaderboardTask -from utils import KernelBotError, LeaderboardItem +from utils import KernelBotError, LeaderboardItem, SubmissionMode @dataclasses.dataclass @@ -26,15 +26,20 @@ class ProcessedSubmissionRequest(SubmissionRequest): task_gpus: list -def prepare_submission(req: SubmissionRequest, lb_db: LeaderboardDB) -> ProcessedSubmissionRequest: +def prepare_submission( + req: SubmissionRequest, + lb_db: LeaderboardDB, + mode: SubmissionMode +) -> ProcessedSubmissionRequest: if profanity.contains_profanity(req.file_name): raise KernelBotError("Please provide a non rude filename") - - # check file extension - if not req.file_name.endswith((".py", ".cu", ".cuh", ".cpp")): - raise KernelBotError( - "Please provide a Python (.py) or CUDA (.cu / .cuh / .cpp) file", - ) + if mode != SubmissionMode.MILESTONE: + # for milestones we don't have a submission file + # check file extension + if not req.file_name.endswith((".py", ".cu", ".cuh", ".cpp")): + raise KernelBotError( + "Please provide a Python (.py) or CUDA (.cu / .cuh / .cpp) file", + ) # process file directives req = handle_popcorn_directives(req) @@ -69,7 +74,7 @@ def lookup_leaderboard(leaderboard: str, lb_db: LeaderboardDB) -> LeaderboardIte with lb_db as db: leaderboard_item = db.get_leaderboard(leaderboard) if not leaderboard_item: - raise KernelBotError(f"Leaderboard {leaderboard} not found.") + raise KernelBotError(f"Tried to lookup leaderboard {leaderboard} but it was not found.") return leaderboard_item diff --git a/src/discord-cluster-manager/task.py b/src/discord-cluster-manager/task.py index 3a14bc51..d95f86d7 100644 --- a/src/discord-cluster-manager/task.py +++ b/src/discord-cluster-manager/task.py @@ -64,6 +64,7 @@ class LeaderboardTask: ranking_by: RankCriterion = RankCriterion.LAST templates: dict[str, str] = dataclasses.field(default_factory=dict) seed: Optional[int] = None + milestones: list[dict[str, str]] = dataclasses.field(default_factory=list) @staticmethod def from_dict(data: dict): diff --git a/src/discord-cluster-manager/utils.py b/src/discord-cluster-manager/utils.py index c39192f7..6af160c0 100644 --- a/src/discord-cluster-manager/utils.py +++ b/src/discord-cluster-manager/utils.py @@ -271,6 +271,7 @@ def build_task_config( "ranked_timeout": task.ranked_timeout, "ranking_by": task.ranking_by.value, "seed": task.seed, + "milestones": task.milestones, } if task.lang == Language.Python: