From 5c5f1c33b4e4c4b8fa90542799c539e6849d6493 Mon Sep 17 00:00:00 2001 From: PaliC Date: Fri, 29 Aug 2025 05:41:09 +0000 Subject: [PATCH 1/2] Rename torchbench, datasets, and HF repo --- .gitignore | 2 +- BackendBench/data_loaders.py | 8 ++--- .../scripts/generate_operator_coverage_csv.py | 26 +++++++------- BackendBench/scripts/get_tests_stat.py | 6 ++-- BackendBench/scripts/main.py | 35 +++++++++++++------ .../scripts/parquet_trace_converter.py | 22 ++++++------ ...taset_filters.py => test_suite_filters.py} | 0 BackendBench/suite/__init__.py | 6 ++-- .../suite/{torchbench.py => modeltraces.py} | 20 +++++------ test/test_adverse_cases.py | 6 ++-- ...nch_suite.py => test_modeltraces_suite.py} | 6 ++-- 11 files changed, 75 insertions(+), 62 deletions(-) rename BackendBench/scripts/{dataset_filters.py => test_suite_filters.py} (100%) rename BackendBench/suite/{torchbench.py => modeltraces.py} (85%) rename test/{test_torchbench_suite.py => test_modeltraces_suite.py} (91%) diff --git a/.gitignore b/.gitignore index 136956f2..6f07afed 100644 --- a/.gitignore +++ b/.gitignore @@ -6,7 +6,7 @@ backendbench.egg-info/ CLAUDE.md venv/ ops/ -datasets/ +local_test_suites/ uv.lock .pre-commit-cache/ logs/ diff --git a/BackendBench/data_loaders.py b/BackendBench/data_loaders.py index 94a6a2d6..23894605 100644 --- a/BackendBench/data_loaders.py +++ b/BackendBench/data_loaders.py @@ -27,8 +27,8 @@ # you can explore the dataset here # https://huggingface.co/datasets/GPUMODE/backendbench_tests HUGGINGFACE_REPO = "GPUMODE/backendbench_tests" -TORCHBENCH_SUITE_HF_COMMIT = "25a7c56b0a4029b192b61e32fd403e19258487e1" -TORCHBENCH_SUITE_FILE = "backend_bench_problems.parquet" +MODELTRACES_SUITE_HF_COMMIT = "25a7c56b0a4029b192b61e32fd403e19258487e1" +MODELTRACES_SUITE_FILE = "backend_bench_problems.parquet" def _args_size(args): @@ -231,8 +231,8 @@ def _load_from_parquet( # read parquet file from huggingface table = load_dataset( HUGGINGFACE_REPO, - data_files=TORCHBENCH_SUITE_FILE, - revision=TORCHBENCH_SUITE_HF_COMMIT, + data_files=MODELTRACES_SUITE_FILE, + revision=MODELTRACES_SUITE_HF_COMMIT, )["train"] else: # read parquet file directly diff --git a/BackendBench/scripts/generate_operator_coverage_csv.py b/BackendBench/scripts/generate_operator_coverage_csv.py index 3318484f..04c5dc78 100644 --- a/BackendBench/scripts/generate_operator_coverage_csv.py +++ b/BackendBench/scripts/generate_operator_coverage_csv.py @@ -17,12 +17,12 @@ extract_aten_ops, extract_operator_name, ) -from BackendBench.suite import OpInfoTestSuite, TorchBenchTestSuite +from BackendBench.suite import OpInfoTestSuite, ModelTracesTestSuite -def get_torchbench_ops(): - """Get operations from TorchBench suite""" - suite = TorchBenchTestSuite("torchbench", None) +def get_modeltraces_ops(): + """Get operations from ModelTraces suite""" + suite = ModelTracesTestSuite("modeltraces", None) ops = set() for optest in suite: op_str = str(optest.op) @@ -48,27 +48,27 @@ def generate_coverage_csv(): print(f" Unique successful ops: {len(set(opinfo_successful_ops))}") opinfo_ops = set(extract_aten_ops(opinfo_successful_ops)) - torchbench_ops = get_torchbench_ops() + modeltraces_ops = get_modeltraces_ops() print("\nOperator counts:") print(f"- Total native functions: {len(all_native_ops)}") print(f"- Core operators: {len(core_ops)}") print(f"- OpInfo: {len(opinfo_ops)}") - print(f"- TorchBench: {len(torchbench_ops)}") + print(f"- ModelTraces: {len(modeltraces_ops)}") # Create comprehensive operator list - all_operators = set(all_native_ops) | set(core_ops) | opinfo_ops | torchbench_ops + all_operators = set(all_native_ops) | set(core_ops) | opinfo_ops | modeltraces_ops core_ops_set = set(core_ops) # Generate CSV - csv_data = [["op_name", "is_core", "is_in_opinfo", "is_in_torchbench"]] + csv_data = [["op_name", "is_core", "is_in_opinfo", "is_in_modeltraces"]] for op in sorted(all_operators): row = [ op, True if op in core_ops_set else False, True if op in opinfo_ops else False, - True if op in torchbench_ops else False, + True if op in modeltraces_ops else False, ] csv_data.append(row) @@ -81,15 +81,15 @@ def generate_coverage_csv(): # Analysis core_in_opinfo = core_ops_set & opinfo_ops - core_in_torchbench = core_ops_set & torchbench_ops - core_in_either = core_ops_set & (opinfo_ops | torchbench_ops) - core_missing_both = core_ops_set - (opinfo_ops | torchbench_ops) + core_in_modeltraces = core_ops_set & modeltraces_ops + core_in_either = core_ops_set & (opinfo_ops | modeltraces_ops) + core_missing_both = core_ops_set - (opinfo_ops | modeltraces_ops) print( f"\nCore in OpInfo: {len(core_in_opinfo)}/{len(core_ops)} ({len(core_in_opinfo) / len(core_ops) * 100:.1f}%)" ) print( - f"Core in TorchBench: {len(core_in_torchbench)}/{len(core_ops)} ({len(core_in_torchbench) / len(core_ops) * 100:.1f}%)" + f"Core in ModelTraces: {len(core_in_modeltraces)}/{len(core_ops)} ({len(core_in_modeltraces) / len(core_ops) * 100:.1f}%)" ) print( f"Combined coverage: {len(core_in_either)}/{len(core_ops)} ({len(core_in_either) / len(core_ops) * 100:.1f}%)" diff --git a/BackendBench/scripts/get_tests_stat.py b/BackendBench/scripts/get_tests_stat.py index 76704b9f..59c1b4ea 100644 --- a/BackendBench/scripts/get_tests_stat.py +++ b/BackendBench/scripts/get_tests_stat.py @@ -11,7 +11,7 @@ import statistics import torch -from BackendBench.suite import OpInfoTestSuite, TorchBenchTestSuite, FactoTestSuite +from BackendBench.suite import OpInfoTestSuite, ModelTracesTestSuite, FactoTestSuite from BackendBench.scripts.pytorch_operators import extract_operator_name @@ -133,8 +133,8 @@ def main(): "cuda", torch.bfloat16, ), - "torchbench": TorchBenchTestSuite( - "torchbench", + "modeltraces": ModelTracesTestSuite( + "modeltraces", ), "facto": FactoTestSuite( "facto_cuda_bfloat16", diff --git a/BackendBench/scripts/main.py b/BackendBench/scripts/main.py index 5e2e8a14..1b2f1cb8 100644 --- a/BackendBench/scripts/main.py +++ b/BackendBench/scripts/main.py @@ -20,8 +20,9 @@ FactoTestSuite, OpInfoTestSuite, SmokeTestSuite, - TorchBenchTestSuite, + ModelTracesTestSuite, ) +import warnings logger = logging.getLogger(__name__) @@ -49,7 +50,7 @@ def setup_logging(log_level): @click.option( "--suite", default="smoke", - type=click.Choice(["smoke", "opinfo", "torchbench", "facto"]), + type=click.Choice(["smoke", "opinfo", "torchbench", "modeltraces", "facto"]), help="Which suite to run", ) @click.option( @@ -96,7 +97,7 @@ def setup_logging(log_level): help="Maximum refinement rounds per worker for KernelAgent backend", ) @click.option( - "--alternative-torchbench-data-path", + "--alternative-modeltraces-data-path", default=None, type=str, help="Internal testing flag for BackendBench development. Users should not use this.", @@ -151,7 +152,7 @@ def cli( llm_relay_model, kernel_agent_workers, kernel_agent_max_rounds, - alternative_torchbench_data_path, + alternative_modeltraces_data_path, ops_directory, log_dir, disable_output_logs, @@ -159,13 +160,20 @@ def cli( check_overhead_dominated_ops, p, ): - if suite != "torchbench": + setup_logging(log_level) + + # Handle deprecated torchbench suite name + give_torchbench_warning = False + if suite == "torchbench": + give_torchbench_warning = True + suite = "modeltraces" + + if suite not in ["modeltraces"]: if topn_inputs is not None: - raise ValueError("topn-inputs is only supported for torchbench suite") + raise ValueError("topn-inputs is only supported for modeltraces suite") if check_overhead_dominated_ops: - raise ValueError("check-overhead-dominated-ops is only supported for torchbench suite") + raise ValueError("check-overhead-dominated-ops is only supported for modeltraces suite") - setup_logging(log_level) if ops: ops = ops.split(",") @@ -189,9 +197,9 @@ def cli( torch.bfloat16, filter=ops, ), - "torchbench": lambda: TorchBenchTestSuite( - "torchbench", - alternative_torchbench_data_path, + "modeltraces": lambda: ModelTracesTestSuite( + "modeltraces", + alternative_modeltraces_data_path, filter=ops, topn=topn_inputs, check_overhead_dominated_ops=check_overhead_dominated_ops, @@ -340,6 +348,11 @@ def cli( ) print(f"Results saved to: {log_dir}") + if give_torchbench_warning: + warnings.warn( + "'torchbench' suite has been renamed to 'modeltraces'. Please use `--suite modeltraces` instead. `--suite torchbench` may no longer be supported in future versions of BackendBench.", + ) + def setup_llm_backend(llm_backend, llm_client, suite, max_attempts=5): """Setup LLM backend by generating kernels for all operations in the suite.""" diff --git a/BackendBench/scripts/parquet_trace_converter.py b/BackendBench/scripts/parquet_trace_converter.py index 6160424d..aee7e7b2 100644 --- a/BackendBench/scripts/parquet_trace_converter.py +++ b/BackendBench/scripts/parquet_trace_converter.py @@ -17,7 +17,7 @@ import pyarrow as pa import pyarrow.parquet as pq from BackendBench.data_loaders import _load_from_trace -from BackendBench.scripts.dataset_filters import ( +from BackendBench.scripts.test_suite_filters import ( apply_runtime_filter, apply_skip_ops_filter, ) @@ -28,7 +28,7 @@ """ -Columns for the parquet dataset: +Columns for the parquet test suite: - uuid (int) (hash of op + args) - op_name (string) - args (string) @@ -46,13 +46,13 @@ def _upload_to_hf(file_path: str) -> None: - """Upload file to GPUMODE/huggingface_op_trace.""" + """Upload file to GPUMODE/backendbench_tests.""" try: api = HfApi() api.upload_file( path_or_fileobj=file_path, path_in_repo=Path(file_path).name, - repo_id="GPUMODE/huggingface_op_trace", + repo_id="GPUMODE/backendbench_tests", repo_type="dataset", ) logger.info(f"Uploaded {Path(file_path).name} to Hugging Face") @@ -182,9 +182,9 @@ def _validate_parquet_name(parquet_name: str) -> str: if not parquet_name.endswith(".parquet"): raise click.BadParameter("Parquet file must end with .parquet suffix") - # Ensure local files are in datasets directory - if not parquet_name.startswith("datasets/"): - parquet_name = os.path.join("datasets", parquet_name) + # Ensure local files are in local_test_suites directory + if not parquet_name.startswith("local_test_suites/"): + parquet_name = os.path.join("local_test_suites", parquet_name) return parquet_name @@ -231,13 +231,13 @@ def _validate_trace_file(trace_file: str, is_input: bool = True) -> str: "--parquet-name", default="backend_bench_problems.parquet", type=str, - help="Parquet filename: URL allowed as input in parquet-to-trace mode, local files in datasets/.", + help="Parquet filename: URL allowed as input in parquet-to-trace mode, local files in local_test_suites/.", ) @click.option( "--upload-to-hf", is_flag=True, default=False, - help="Upload generated parquet files to Hugging Face (GPUMODE/huggingface_op_trace) in trace-to-parquet mode", + help="Upload generated parquet files to Hugging Face (GPUMODE/backendbench_tests) in trace-to-parquet mode", ) @click.option( "--limit", @@ -249,8 +249,8 @@ def main(log_level, mode, trace_file, parquet_name, upload_to_hf, limit): """Convert trace files to parquet format or vice versa.""" setup_logging(log_level) - # Create datasets directory - os.makedirs("datasets", exist_ok=True) + # Create local_test_suites directory + os.makedirs("local_test_suites", exist_ok=True) if mode == "trace-to-parquet": # Validate inputs/outputs diff --git a/BackendBench/scripts/dataset_filters.py b/BackendBench/scripts/test_suite_filters.py similarity index 100% rename from BackendBench/scripts/dataset_filters.py rename to BackendBench/scripts/test_suite_filters.py diff --git a/BackendBench/suite/__init__.py b/BackendBench/suite/__init__.py index 410a5d6e..5b1bf4e5 100644 --- a/BackendBench/suite/__init__.py +++ b/BackendBench/suite/__init__.py @@ -17,7 +17,7 @@ from .facto import FactoTestSuite from .opinfo import OpInfoTestSuite from .smoke import randn, SmokeTestSuite -from .torchbench import TorchBenchOpTest, TorchBenchTestSuite +from .modeltraces import ModelTracesOpTest, ModelTracesTestSuite __all__ = [ "Test", @@ -27,6 +27,6 @@ "OpInfoTestSuite", "SmokeTestSuite", "randn", - "TorchBenchOpTest", - "TorchBenchTestSuite", + "ModelTracesOpTest", + "ModelTracesTestSuite", ] diff --git a/BackendBench/suite/torchbench.py b/BackendBench/suite/modeltraces.py similarity index 85% rename from BackendBench/suite/torchbench.py rename to BackendBench/suite/modeltraces.py index f105f731..22d9aa2e 100644 --- a/BackendBench/suite/torchbench.py +++ b/BackendBench/suite/modeltraces.py @@ -11,15 +11,15 @@ - Dataset: https://huggingface.co/datasets/GPUMODE/backendbench_tests - Configuration: Set in data_loaders.py: - HUGGINGFACE_REPO: HF repository name - - TORCHBENCH_SUITE_FILE: Specific file name in the repo - - TORCHBENCH_SUITE_HF_COMMIT: Git commit hash for reproducibility + - MODELTRACES_SUITE_FILE: Specific file name in the repo + - MODELTRACES_SUITE_HF_COMMIT: Git commit hash for reproducibility Updating the Test Set: 1. Choose a test file from https://huggingface.co/datasets/GPUMODE/backendbench_tests (it will likely be the same) -2. Update TORCHBENCH_SUITE_FILE in data_loaders.py with the file name (it will likely be the same) +2. Update MODELTRACES_SUITE_FILE in data_loaders.py with the file name (it will likely be the same) 3. Get the current commit hash: python -c "from huggingface_hub import HfApi; print(HfApi().dataset_info('GPUMODE/backendbench_tests', revision='main').sha)" -4. Update TORCHBENCH_SUITE_HF_COMMIT in data_loaders.py with the hash +4. Update MODELTRACES_SUITE_HF_COMMIT in data_loaders.py with the hash Creating New Test Sets: Use scripts/parquet_to_trace.py to generate and upload new datasets to HuggingFace. @@ -35,13 +35,13 @@ from BackendBench.utils import deserialize_args -class TorchBenchTest: +class ModelTracesTest: def __init__(self, *args, **kwargs): self.args = args self.kwargs = kwargs -class TorchBenchOpTest: +class ModelTracesOpTest: def __init__(self, op, inputs, topn): self.op = eval(f"torch.ops.{op}") self.inputs = inputs @@ -60,16 +60,16 @@ def tests(self): def correctness_tests(self): for inp in self.tests(): args, kwargs = deserialize_args(inp) - yield TorchBenchTest(*args, **kwargs) + yield ModelTracesTest(*args, **kwargs) @property def performance_tests(self): for inp in self.tests(): args, kwargs = deserialize_args(inp) - yield TorchBenchTest(*args, **kwargs) + yield ModelTracesTest(*args, **kwargs) -class TorchBenchTestSuite: +class ModelTracesTestSuite: def __init__( self, name, @@ -101,4 +101,4 @@ def __iter__(self): for op, inputs in self.optests.items(): if any(s in op for s in UNSUPPORTED_OPERATORS): continue - yield TorchBenchOpTest(op, inputs, self.topn) + yield ModelTracesOpTest(op, inputs, self.topn) diff --git a/test/test_adverse_cases.py b/test/test_adverse_cases.py index 25bc4cd8..4c78b3be 100644 --- a/test/test_adverse_cases.py +++ b/test/test_adverse_cases.py @@ -5,7 +5,7 @@ # LICENSE file in the root directory of this source tree. import pytest -from BackendBench.suite import TorchBenchOpTest +from BackendBench.suite import ModelTracesOpTest import BackendBench.multiprocessing_eval as multiprocessing_eval import BackendBench.backends as backends import torch @@ -16,13 +16,13 @@ class TestAdaptiveAvgPool2dBackward: @pytest.mark.skipif(not torch.cuda.is_available(), reason="Requires GPU") def test_adaptive_avg_pool2d_backward_gpu(self): """Test on GPU with eval_one_op.""" - op_test_should_error = TorchBenchOpTest( + op_test_should_error = ModelTracesOpTest( "aten._adaptive_avg_pool2d_backward.default", ["((T([512, 4096, 56, 56], f16), T([512, 4096, 56, 56], f16)), {})"], None, ) - op_test_should_succeed = TorchBenchOpTest( + op_test_should_succeed = ModelTracesOpTest( "aten.addmm.default", ["((T([14, 14], f32), T([14, 14], f32), T([14, 14], f32)), {})"], None, diff --git a/test/test_torchbench_suite.py b/test/test_modeltraces_suite.py similarity index 91% rename from test/test_torchbench_suite.py rename to test/test_modeltraces_suite.py index 9f4d759f..e94abacb 100644 --- a/test/test_torchbench_suite.py +++ b/test/test_modeltraces_suite.py @@ -5,12 +5,12 @@ # LICENSE file in the root directory of this source tree. import torch -from BackendBench.suite import TorchBenchOpTest +from BackendBench.suite import ModelTracesOpTest class TestOpTest: def test_op_test(self): - op_test = TorchBenchOpTest( + op_test = ModelTracesOpTest( "aten.relu.default", ["((T([32, 128, 512], f16, None, 'cpu'),), {})"], None ) for test in op_test.correctness_tests: @@ -24,7 +24,7 @@ def test_op_test(self): torch.testing.assert_close(torch.relu(arg), op_test.op(arg)) def test_topn(self): - op_test = TorchBenchOpTest( + op_test = ModelTracesOpTest( "aten.relu.default", [ "((T([32, 128, 512], f16, None, 'cpu'),), {})", From 24b22d049bcf475f0a9cc55433ad15e79b77d58e Mon Sep 17 00:00:00 2001 From: PaliC Date: Fri, 29 Aug 2025 05:56:33 +0000 Subject: [PATCH 2/2] more renames --- BackendBench/scripts/debug_operator_mapping.py | 10 +++++----- .../scripts/setup_operator_directories.py | 18 +++++++++--------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/BackendBench/scripts/debug_operator_mapping.py b/BackendBench/scripts/debug_operator_mapping.py index 936940a4..aa9f9f3a 100644 --- a/BackendBench/scripts/debug_operator_mapping.py +++ b/BackendBench/scripts/debug_operator_mapping.py @@ -8,14 +8,14 @@ """ -Debug script to show how TorchBench operator names map to DirectoryBackend folder names. +Debug script to show how ModelTraces operator names map to DirectoryBackend folder names. Creates a CSV file showing the mapping for debugging purposes. Usage: python -m BackendBench.scripts.debug_operator_mapping Output: - torchbench_operator_folder_mapping.csv - CSV file with operator mappings + modeltraces_operator_folder_mapping.csv - CSV file with operator mappings """ import csv @@ -24,7 +24,7 @@ def get_operator_mapping(): - """Get the mapping from TorchBench operators to folder names.""" + """Get the mapping from ModelTraces operators to folder names.""" mappings = [] # Create a DirectoryBackend to see what operators it loads @@ -87,7 +87,7 @@ def create_mapping_csv(): """Create a CSV file with the operator mapping.""" mappings = get_operator_mapping() - csv_file = "torchbench_operator_folder_mapping.csv" + csv_file = "modeltraces_operator_folder_mapping.csv" with open(csv_file, "w", newline="") as f: if mappings: @@ -113,7 +113,7 @@ def create_mapping_csv(): if __name__ == "__main__": - print("Creating TorchBench operator to folder mapping...") + print("Creating ModelTraces operator to folder mapping...") csv_file = create_mapping_csv() print(f"\nDebug CSV created: {csv_file}") print("This file shows how PyTorch operators map to generated_kernels/ folder names") diff --git a/BackendBench/scripts/setup_operator_directories.py b/BackendBench/scripts/setup_operator_directories.py index a9ec61c5..5cddbf7d 100755 --- a/BackendBench/scripts/setup_operator_directories.py +++ b/BackendBench/scripts/setup_operator_directories.py @@ -57,7 +57,7 @@ def clean_op_name_for_directory(op_name: str) -> str: def create_readme_for_op( - op_dir: Path, op_name: str, is_core: bool, is_opinfo: bool, is_torchbench: bool + op_dir: Path, op_name: str, is_core: bool, is_opinfo: bool, is_modeltraces: bool ): """Create a README.md file for each operator directory.""" readme_path = op_dir / "README.md" @@ -67,8 +67,8 @@ def create_readme_for_op( status.append("Core PyTorch operator") if is_opinfo: status.append("Has OpInfo tests") - if is_torchbench: - status.append("Used in TorchBench") + if is_modeltraces: + status.append("Used in ModelTraces") content = f"""# {op_name} @@ -119,15 +119,15 @@ def setup_operator_directories(base_dir: str = "generated_kernels", include_all: "name": row["op_name"], "is_core": row["is_core"] == "True", "is_opinfo": row["is_in_opinfo"] == "True", - "is_torchbench": row["is_in_torchbench"] == "True", + "is_modeltraces": row["is_in_modeltraces"] == "True", } ) # Filter operators based on criteria if not include_all: - # By default, only include operators that are in TorchBench - operators = [op for op in operators if op["is_torchbench"]] - print(f"Setting up directories for {len(operators)} TorchBench operators") + # By default, only include operators that are in ModelTraces + operators = [op for op in operators if op["is_modeltraces"]] + print(f"Setting up directories for {len(operators)} ModelTraces operators") else: print(f"Setting up directories for all {len(operators)} operators") @@ -151,7 +151,7 @@ def setup_operator_directories(base_dir: str = "generated_kernels", include_all: continue op_dir.mkdir(exist_ok=True) - create_readme_for_op(op_dir, op_name, op["is_core"], op["is_opinfo"], op["is_torchbench"]) + create_readme_for_op(op_dir, op_name, op["is_core"], op["is_opinfo"], op["is_modeltraces"]) created_count += 1 print("\nDirectory setup complete:") @@ -201,7 +201,7 @@ def main(): parser.add_argument( "--include-all", action="store_true", - help="Include all operators, not just TorchBench operators", + help="Include all operators, not just ModelTraces operators", ) parser.add_argument( "--regenerate-csv",