quantization done, need calibration

kylesayrs · kylesayrs · commit 49e1d906cb6d · 2025-08-19T21:42:02.000-04:00
Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;
diff --git a/examples/transform/spinquant_example.py b/examples/transform/spinquant_example.py
@@ -18,7 +18,7 @@
 #   * apply spinquant transforms to model to reduce quantization loss
 #   * quantize the weights to 4 bit with group size 128
 recipe = [
-    SpinQuantModifier(rotations=["R1", "R2"], transform_type="hadamard"),
+    SpinQuantModifier(rotations=["R3"], transform_type="hadamard"),
     QuantizationModifier(targets="Linear", scheme="W4A16", ignore=["lm_head"]),
 ]
 
@@ -35,6 +35,6 @@
 print("==========================================\n\n")
 
 # Save to disk compressed.
-SAVE_DIR = MODEL_ID.split("/")[1] + "-spinquantR1R2-w4a16"
+SAVE_DIR = MODEL_ID.split("/")[1] + "-spinquantR3-w4a16"
 model.save_pretrained(SAVE_DIR, save_compressed=True)
 tokenizer.save_pretrained(SAVE_DIR)
diff --git a/src/llmcompressor/modifiers/transform/spinquant/base.py b/src/llmcompressor/modifiers/transform/spinquant/base.py
@@ -128,7 +128,7 @@ def on_initialize(self, state: State, **kwargs) -> bool:
             config_groups["R2"] = self._create_r2_scheme(state.model)
 
         if SpinquantRotation.R3 in self.rotations:
-            config_groups["R3"] = self._create_r3_scheme()
+            config_groups["R3"] = self._create_r3_scheme(state.model)
 
         if SpinquantRotation.R4 in self.rotations:
             config_groups["R4"] = self._create_r4_scheme()
@@ -235,12 +235,49 @@ def _create_r2_scheme(self, model: PreTrainedModel) -> TransformScheme:
             ],
         )
 
-    def _create_r3_scheme(self) -> TransformScheme:
-        raise NotImplementedError(
-            "SpinQuant R3 and R4 rotations will be added in a future release"
+    def _create_r3_scheme(self, model: PreTrainedModel) -> TransformScheme:
+        config = model.config
+
+        if hasattr(config, "head_dim"):
+            head_dim = config.head_dim
+        elif hasattr(config, "hidden_size") and hasattr(config, "num_attention_heads"):
+            head_dim = config.hidden_size // config.num_attention_heads
+        else:
+            raise NotImplementedError()
+
+        return TransformScheme(
+            type=self.transform_type,
+            randomize=self.randomize,
+            requires_grad=self.learnable,
+            precision=self.precision,
+            head_dim=head_dim,
+            apply=[
+                TransformArgs(
+                    targets=[self.mappings.attn],
+                    location="q_attn",
+                ),
+                TransformArgs(
+                    targets=[self.mappings.attn],
+                    location="k_cache",
+                ),
+            ],
         )
 
     def _create_r4_scheme(self) -> TransformScheme:
-        raise NotImplementedError(
-            "SpinQuant R3 and R4 rotations will be added in a future release"
+        return TransformScheme(
+            type=self.transform_type,
+            randomize=self.randomize,
+            requires_grad=self.learnable,
+            precision=self.precision,
+            apply=[
+                TransformArgs(
+                    targets=[*self.mappings.mlp_out],
+                    location="input",
+                ),
+                TransformArgs(
+                    targets=[*self.mappings.mlp_out],
+                    location="weight_input",
+                    inverse=True,
+                ),
+            ],
         )
diff --git a/src/llmcompressor/modifiers/transform/spinquant/mappings.py b/src/llmcompressor/modifiers/transform/spinquant/mappings.py
@@ -29,6 +29,7 @@ class SpinQuantMapping(BaseModel):
 
     embedding: str
 
+    attn: str
     attn_q: str
     attn_k: str
     attn_v: str
@@ -50,6 +51,7 @@ def cast_to_list(cls, value):
 
 _default_mappings = SpinQuantMapping(
     embedding="re:.*embed_tokens$",
+    attn="re:.*self_attn$",
     attn_q="re:.*q_proj$",
     attn_k="re:.*k_proj$",
     attn_v="re:.*v_proj$",