Add load and run tests for checkpoints that we want to have BC

jerryzh168 · jerryzh168 · commit ce3e167f994a · 2025-08-18T17:19:43.000-07:00
Summary: Added load and run tests to make sure previously saved checkpoints can continue to load and run. includes FP8, INT4 and INT4 + preshuffled checkpoints since these might reach larger audience Test Plan: python test/integration/test_load_and_run_checkpoint.py Reviewers: Subscribers: Tasks: Tags: stack-info: PR: #2792, branch: jerryzh168/stack/28
diff --git a/test/integration/test_load_and_run_checkpoint.py b/test/integration/test_load_and_run_checkpoint.py
@@ -16,40 +16,53 @@
 
 from torchao.utils import is_fbcode, is_sm_at_least_89
 
-_MODEL_NAME_AND_VERSIONS = [
-    ("torchao-testing/opt-125m-float8dq-row-v1-0.13-dev", 1),
+_DEPRECATED_MODEL_INFO = [
+    (
+        "torchao-testing/opt-125m-float8dq-row-v1-0.13-dev",
+        1,
+        "Float8DynamicActivationFloat8WeightConfig",
+    ),
 ]
 
+_MODEL_NAMES = [
+    "torchao-testing/single-linear-FP8-v2-0.13-dev",
+    "torchao-testing/single-linear-INT4-preshuffled-v2-0.13-dev",
+    "torchao-testing/single-linear-INT4-v2-0.13-dev",
+]
+
+_MODEL = torch.nn.Sequential(
+    torch.nn.Linear(32, 256, dtype=torch.bfloat16, device="cuda")
+)
+
 
 @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available")
 @unittest.skipIf(not is_sm_at_least_89(), "Nedd sm89+")
 @unittest.skipIf(
     is_fbcode(),
     "Skipping the test in fbcode for now, not sure how to download from transformers",
 )
-class TestLoadingDeprecatedCheckpoint(TestCase):
-    @common_utils.parametrize("model_name_and_version", _MODEL_NAME_AND_VERSIONS)
-    def test_load_model_and_run(self, model_name_and_version):
+class TestLoadAndRunCheckpoint(TestCase):
+    @common_utils.parametrize("model_info", _DEPRECATED_MODEL_INFO)
+    def test_load_and_run_deprecated_checkpoints(self, model_info):
         """Test that we print correct warning message when loading a deprecated checkpoint
         and making sure the deprecated checkpoints can still be loaded
         """
         # Load and quantize model
-        model_name, version = model_name_and_version
+        model_name, version, config_name = model_info
         with warnings.catch_warnings(record=True) as caught_warnings:
             quantized_model = AutoModelForCausalLM.from_pretrained(
                 model_name,
                 torch_dtype="bfloat16",
-                device_map="cuda",
+                device_map="cuda:0",
             )
             assert any(
                 "Stored version is not the same as current default version of the config"
                 in str(w.message)
                 for w in caught_warnings
             ), "Didn't get expected warning message for version mismatch"
 
-            # TODO: generalize when we test more checkpoints
             assert any(
-                "Models quantized with version 1 of Float8DynamicActivationFloat8WeightConfig is deprecated"
+                f"Models quantized with version 1 of {config_name} is deprecated"
                 in str(w.message)
                 for w in caught_warnings
             ), "Didn't get expected warning message for deprecation"
@@ -70,8 +83,36 @@ def test_load_model_and_run(self, model_name_and_version):
             generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
         )
 
+    @common_utils.parametrize("model_name", _MODEL_NAMES)
+    def test_load_and_run_checkpoints(self, model_name):
+        """Test that we print correct warning message when loading a deprecated checkpoint
+        and making sure the deprecated checkpoints can still be loaded
+        """
+        from huggingface_hub import hf_hub_download
+
+        downloaded_model = hf_hub_download(model_name, filename="model.bin")
+        # Load and quantize model
+        with torch.device("meta"):
+            model = torch.nn.Sequential(
+                torch.nn.Linear(32, 256, dtype=torch.bfloat16, device="cuda")
+            )
+        with open(downloaded_model, "rb") as f:
+            model.load_state_dict(torch.load(f), assign=True)
+
+        downloaded_example_inputs = hf_hub_download(
+            model_name, filename="model_inputs.pt"
+        )
+        with open(downloaded_example_inputs, "rb") as f:
+            example_inputs = torch.load(f)
+        downloaded_output = hf_hub_download(model_name, filename="model_output.pt")
+        with open(downloaded_output, "rb") as f:
+            ref_output = torch.load(f)
+
+        output = model(*example_inputs)
+        self.assertTrue(torch.allclose(output, ref_output))
+
 
-common_utils.instantiate_parametrized_tests(TestLoadingDeprecatedCheckpoint)
+common_utils.instantiate_parametrized_tests(TestLoadAndRunCheckpoint)
 
 if __name__ == "__main__":
     run_tests()