Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
77d04e1
push to test workflow
PaliC May 28, 2025
f42f885
test push
PaliC May 28, 2025
1c5d122
test push
PaliC May 28, 2025
f9b0ea3
test push
PaliC May 28, 2025
729045e
test push
PaliC May 28, 2025
72ac686
test push
PaliC May 28, 2025
54a2418
test push
PaliC May 28, 2025
089445b
test push
PaliC May 28, 2025
b1c1152
test push
PaliC May 28, 2025
e3b49dd
test push
PaliC May 28, 2025
110aea6
test push
PaliC May 28, 2025
371cad8
test push
PaliC May 28, 2025
cb81911
test push
PaliC May 28, 2025
dc17b30
test push
PaliC May 29, 2025
c2a5367
test push
PaliC May 29, 2025
19647d2
test push
PaliC May 29, 2025
a240485
test push
PaliC May 29, 2025
7f1e04a
test push
PaliC May 29, 2025
104d1c9
test push
PaliC May 29, 2025
1f54c96
test push
PaliC May 29, 2025
7f8cf64
test push
PaliC May 29, 2025
79672f0
test push
PaliC May 29, 2025
419fb22
test push
PaliC May 29, 2025
ea8812c
test push
PaliC May 29, 2025
13b2aef
test push
PaliC May 29, 2025
3ae64d5
test push
PaliC May 29, 2025
f265a37
test push
PaliC May 29, 2025
5b8c698
test push
PaliC May 29, 2025
fdb1ed6
test push
PaliC May 29, 2025
2e48944
test push
PaliC May 29, 2025
d78119c
test push
PaliC May 29, 2025
474d9c4
test push
PaliC May 29, 2025
6fda930
it works now cleanup
PaliC May 29, 2025
8967956
cleanup
PaliC May 29, 2025
4bc6e8d
test push
PaliC May 30, 2025
f1eb9a7
test push
PaliC May 30, 2025
748a537
fix lint errors and cleanup
PaliC May 30, 2025
8224cb0
fix lint errors and cleanup
PaliC May 30, 2025
9ea4e77
final cleanup
PaliC May 30, 2025
d9b2d02
get full error to fix bug in ci
PaliC May 30, 2025
ad9e758
fix ci bug
PaliC May 30, 2025
6a365ff
fix ci bug
PaliC May 30, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 19 additions & 12 deletions .github/workflows/nvidia_workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,24 +18,31 @@ jobs:
container:
image: nvidia/cuda:12.4.0-devel-ubuntu22.04
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: '3.10'

- name: Create input files
shell: bash
run: |
# Extract the payload content without printing it
PAYLOAD=$(jq -r '.inputs.payload' $GITHUB_EVENT_PATH)

# Apply mask to the extracted content
echo "::add-mask::$PAYLOAD"

# Now write to file (won't be logged since it's masked)
echo "$PAYLOAD" > payload.json
uses: nick-fields/retry@v3
with:
timeout_minutes: 2
max_attempts: 5
shell: bash
command: |
# install jq
apt update
apt install -y jq
# Extract the payload content without printing it
PAYLOAD=$(jq -r '.inputs.payload' $GITHUB_EVENT_PATH)

# Apply mask to the extracted content
echo "::add-mask::$PAYLOAD"

# Now write to file (won't be logged since it's masked)
echo "$PAYLOAD" > payload.json

- name: Install uv
uses: astral-sh/setup-uv@v3
Expand Down
38 changes: 22 additions & 16 deletions examples/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
except ImportError:
TestSpec = dict

from reference import check_implementation, generate_input
from reference import check_implementation, generate_input, ref_kernel


class PopcornOutput:
Expand Down Expand Up @@ -198,18 +198,21 @@ def run_testing(logger: PopcornOutput, pool: multiprocessing.Pool, tests: list[T
return 112


def _run_single_benchmark(test: TestCase, recheck: bool, max_repeats: int, max_time_ns: float) -> Stats | Any:
def _run_single_benchmark(test: TestCase, recheck: bool, max_repeats: int, max_time_ns: float, is_baseline_run: bool) -> Stats | Any:
"""
Runs one benchmark. Do not call directly.
"""
from submission import custom_kernel
if not is_baseline_run:
# submission does not exist for a baseline run
from submission import custom_kernel

durations = []
# generate input data once
data = generate_input(**test.args)
check_copy = _clone_data(data)
active_kernel = ref_kernel if is_baseline_run else custom_kernel
# first, one obligatory correctness check
output = custom_kernel(data)
output = active_kernel(data)
good, message = wrap_check_implementation(check_copy, output)
if not good:
return message
Expand All @@ -229,12 +232,12 @@ def _run_single_benchmark(test: TestCase, recheck: bool, max_repeats: int, max_t
check_copy = _clone_data(data)
torch.cuda.synchronize()
start = time.perf_counter_ns()
output = custom_kernel(data)
output = active_kernel(data)
torch.cuda.synchronize()
end = time.perf_counter_ns()

if recheck:
good, message = check_implementation(check_copy, output)
good, message = wrap_check_implementation(check_copy, output)
if not good:
return message

Expand All @@ -249,7 +252,7 @@ def _run_single_benchmark(test: TestCase, recheck: bool, max_repeats: int, max_t
return calculate_stats(durations)


def run_single_benchmark(pool: multiprocessing.Pool, test: TestCase, recheck: bool, max_repeats: int, max_time_ns: float):
def run_single_benchmark(pool: multiprocessing.Pool, test: TestCase, recheck: bool, max_repeats: int, max_time_ns: float, is_baseline_run: bool = False):
"""
For a particular test case, check correctness (if applicable) and grab runtime results.

Expand All @@ -260,7 +263,7 @@ def run_single_benchmark(pool: multiprocessing.Pool, test: TestCase, recheck: bo
@param max_time_ns: Timeout time in nanoseconds.
@return: A Stats object for this particular benchmark case or an error if the test fails.
"""
return pool.apply(_run_single_benchmark, (test, recheck, max_repeats, max_time_ns))
return pool.apply(_run_single_benchmark, (test, recheck, max_repeats, max_time_ns, is_baseline_run))


def run_benchmarking(logger: PopcornOutput, pool: multiprocessing.Pool, tests: list[TestCase]):
Expand Down Expand Up @@ -300,13 +303,13 @@ def run_single_profile(test: TestCase) -> str:
"""
Runs a single test case. Do not call directly
"""
from submission import custom_kernel
from torch.profiler import profile, record_function, ProfilerActivity
from submission import custom_kernel
data = generate_input(**test.args)
torch.cuda.synchronize()

with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA]) as prof:
submission_output = custom_kernel(_clone_data(data))
submission_output = active_kernel(_clone_data(data))
torch.cuda.synchronize()
return prof.key_averages().table(sort_by="self_cuda_time_total", row_limit=20)

Expand All @@ -327,9 +330,9 @@ def main():
return 111

if len(sys.argv) < 3:
return 2
return 222

mode = sys.argv[1]
mode = sys.argv[1].strip()
seed = os.getenv("POPCORN_SEED")
os.unsetenv("POPCORN_SEED")
seed = int(seed) if seed else None
Expand All @@ -345,13 +348,14 @@ def main():
if mode == "benchmark":
return run_benchmarking(logger, pool, tests)

if mode == "leaderboard":
if (mode == "leaderboard") or (mode == "baseline"):
is_baseline_run = mode == "baseline"
# warmup
run_single_benchmark(pool, tests[0], False, 100, 1e7)
run_single_benchmark(pool, tests[0], False, 100, 1e7, is_baseline_run)
logger.log("benchmark-count", len(tests))
passed = True
for i in range(len(tests)):
result = run_single_benchmark(pool, tests[i], True, 100, 30e9)
result = run_single_benchmark(pool, tests[i], True, 100, 30e9, is_baseline_run)
logger.log(f"benchmark.{i}.spec", tests[i].spec)
if isinstance(result, Stats):
for field in dataclasses.fields(Stats):
Expand All @@ -367,7 +371,9 @@ def main():
run_profiling(logger, tests)
else:
# TODO: Implement script mode
return 2
logger.log(mode, "not implemented")
print(f"mode {mode} not implemented")
return 333


if __name__ == "__main__":
Expand Down
62 changes: 61 additions & 1 deletion src/discord-cluster-manager/cogs/admin_cog.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
import discord
import env
import yaml
from consts import GitHubGPU, ModalGPU
from cogs.leaderboard_cog import LeaderboardSubmitCog
from consts import GitHubGPU, ModalGPU, SubmissionMode
from discord import app_commands
from discord.ext import commands, tasks
from leaderboard_db import leaderboard_name_autocomplete
Expand Down Expand Up @@ -120,6 +121,10 @@ def __init__(self, bot: "ClusterBot"):
name="set-forum-ids", description="Sets forum IDs"
)(self.set_forum_ids)

self.baseline_run = bot.admin_group.command(
name="baseline-run", description="Create a baseline run for a leaderboard"
)(self.baseline_run)

self._scheduled_cleanup_temp_users.start()

# --------------------------------------------------------------------------
Expand Down Expand Up @@ -1025,3 +1030,58 @@ async def set_forum_ids(self, interaction: discord.Interaction):
error_message = f"Error updating forum ids: {str(e)}"
logger.error(error_message, exc_info=True)
await send_discord_message(interaction, error_message, ephemeral=True)

# ----------------------------------------------------------------------
# Baseline run submission (admin only)
# ----------------------------------------------------------------------
@discord.app_commands.describe(
leaderboard_name="Name of the leaderboard to create a baseline run for",
gpu="GPU(s) to use; leave empty for interactive selection",
force="Create another baseline run even if one already exists.",
)
@discord.app_commands.autocomplete(
leaderboard_name=leaderboard_name_autocomplete,
)
@with_error_handling
async def baseline_run(
self,
interaction: discord.Interaction,
leaderboard_name: str,
gpu: Optional[str] = None,
force: bool = False,
):
"""Admin command to create (or force-create) a baseline run."""

# Ensure caller is admin
is_admin = await self.admin_check(interaction)
if not is_admin:
await send_discord_message(
interaction,
"You need Admin permissions to run this command.",
ephemeral=True,
)
return

# Check for existing baseline run unless forcing
if not force:
with self.bot.leaderboard_db as db:
if db.has_baseline_run(leaderboard_name):
await send_discord_message(
interaction,
(
"A baseline run already exists for this leaderboard. "
"Use the 'force' flag to create another."
),
ephemeral=True,
)
return

lb_cog = LeaderboardSubmitCog(self.bot)

await lb_cog.submit(
interaction=interaction,
leaderboard_name=leaderboard_name,
script=None,
mode=SubmissionMode.BASELINE,
gpu=gpu,
)
Loading
Loading