shellphish · DeviRule · Oct 4, 2025 · Sep 23, 2025 · Sep 23, 2025 · Oct 4, 2025
diff --git a/.gitignore b/.gitignore
@@ -5,6 +5,7 @@ __pycache__/
 
 # C extensions
 *.so
+*.bin
 
 # Distribution / packaging
 .Python
@@ -25,6 +26,7 @@ share/python-wheels/
 .installed.cfg
 *.egg
 MANIFEST
+.venv/
 
 # PyInstaller
 #  Usually these files are written by a python script from a template
@@ -182,9 +184,9 @@ cython_debug/
 .abstra/
 
 # Visual Studio Code
-#  Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore 
+#  Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
 #  that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
-#  and can be added to the global gitignore or merged into this file. However, if you prefer, 
+#  and can be added to the global gitignore or merged into this file. However, if you prefer,
 #  you could uncomment the following to ignore the entire vscode folder
 # .vscode/
 

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,20 @@
+exclude: ^tests/generic_tests/targets/
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.5.0
+    hooks:
+      - id: check-added-large-files
+      - id: check-merge-conflict
+      - id: check-yaml
+      - id: end-of-file-fixer
+      - id: mixed-line-ending
+        args: ["--fix=no"]
+      - id: trailing-whitespace
+  - repo: https://github.com/python-jsonschema/check-jsonschema
+    rev: 0.27.0
+    hooks:
+      - id: check-dependabot
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.4.8
+    hooks:
+      - id: ruff-format
diff --git a/patchery/__init__.py b/patchery/__init__.py
@@ -1,17 +1,15 @@
 __version__ = "0.0.0"
 
 import logging
+
 logging.getLogger("patchery").addHandler(logging.NullHandler())
 from .logger import Loggers
 
 loggers = Loggers()
 del Loggers
 
 import os
+
 # stop LiteLLM from querying at all to the remote server
 # https://github.com/BerriAI/litellm/blob/4d29c1fb6941e49191280c4fd63961dec1a1e7c5/litellm/__init__.py#L286C20-L286C48
 os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
-from .data import Patch
-from .generator import LLMPatchGenerator
-#from .verifier import PatchVerifier
-#from .patcher import Patcher
diff --git a/patchery/aicc_patcher.py b/patchery/aicc_patcher.py
@@ -7,25 +7,37 @@
 
 import yaml
 
-from shellphish_crs_utils.models.crs_reports import RepresentativeFullPoVReport, POIReport
-from shellphish_crs_utils.models.patch import PatchMetaData
-from shellphish_crs_utils.oss_fuzz.project import OSSFuzzProject
+from patchery.data.models.crs_reports import RepresentativeFullPoVReport, POIReport
+from patchery.data.models.patch import PatchMetaData
+# from shellphish_crs_utils.oss_fuzz.project import OSSFuzzProject
 
 import patchery
 from patchery import Patcher, LLMPatchGenerator
-from patchery.utils import absolute_path_finder, read_src_from_file, find_src_root_from_commit, llm_model_name
+from patchery.utils import (
+    absolute_path_finder,
+    read_src_from_file,
+    find_src_root_from_commit,
+    llm_model_name,
+)
 
 from patchery.kumushi.root_cause_analyzer import RootCauseAnalyzer
 from patchery.kumushi.rca_mode import RCAMode
 from patchery.kumushi.aixcc import AICCProgram
-from patchery.data import ProgramInput, ProgramInputType, PoI, PoICluster, PoISource, Program
+from patchery.data import (
+    ProgramInput,
+    ProgramInputType,
+    PoI,
+    PoICluster,
+    PoISource,
+    Program,
+)
 from patchery.kumushi.util import load_clusters_from_yaml
 
 _l = logging.getLogger(__name__)
 
 
 class AICCPatcher(Patcher):
-    DEFAULT_LLM_MODEL = 'claude-3.7-sonnet'
+    DEFAULT_LLM_MODEL = "claude-3.7-sonnet"
 
     def __init__(
         self,
@@ -35,9 +47,8 @@ def __init__(
         patch_metadata_output_dir=None,
         local_run: bool = False,
         kumushi_clusters: list | None = None,
-        **kwargs
+        **kwargs,
     ):
-
         # private api
         self._patch_output_dir = patch_output_dir
         self._patch_metadata_output_dir = patch_metadata_output_dir
@@ -48,7 +59,9 @@ def __init__(
         self.is_local_run = local_run
         self.pois = []
 
-        super().__init__(program, llm_model_name(model=self.DEFAULT_LLM_MODEL), **kwargs)
+        super().__init__(
+            program, llm_model_name(model=self.DEFAULT_LLM_MODEL), **kwargs
+        )
 
         # generate pois for patching
         self.pois = self.poi_clusters_from_kumushi()
@@ -60,13 +73,15 @@ def __init__(
 
     def poi_clusters_from_kumushi(self, kumushi_report=None):
         if not self._kumushi_clusters:
-            _l.info("No KumuShi report provided, generating PoIs from local KumuShi run...")
-            rca = RootCauseAnalyzer(
-                self.program_info,
-                rca_mode=RCAMode.WEIGHTLESS
+            _l.info(
+                "No KumuShi report provided, generating PoIs from local KumuShi run..."
             )
+            rca = RootCauseAnalyzer(self.program_info, rca_mode=RCAMode.WEIGHTLESS)
             poi_clusters = rca.weightless_pois
-            _l.info(f"Since we are using KumuShi in weightless, we will limit attempts to only %d.", self._weightless_limited_attempts)
+            _l.info(
+                f"Since we are using KumuShi in weightless, we will limit attempts to only %d.",
+                self._weightless_limited_attempts,
+            )
             self.max_attempts = self._weightless_limited_attempts
             self.program_info.code.reinit_or_get_function_resolver()
         else:
@@ -79,8 +94,14 @@ def poi_clusters_from_kumushi(self, kumushi_report=None):
     def _update_patch_output_locations(self) -> tuple[Path, Path]:
         # patch output location
         patch_name = hashlib.md5(os.urandom(16)).hexdigest()
-        patch_output_dir = Path(self._patch_output_dir) if self._patch_output_dir else None
-        patch_metadata_output_dir = Path(self._patch_metadata_output_dir) if self._patch_metadata_output_dir else None
+        patch_output_dir = (
+            Path(self._patch_output_dir) if self._patch_output_dir else None
+        )
+        patch_metadata_output_dir = (
+            Path(self._patch_metadata_output_dir)
+            if self._patch_metadata_output_dir
+            else None
+        )
         assert patch_output_dir.exists()
         assert patch_metadata_output_dir.exists()
         return patch_output_dir / patch_name, patch_metadata_output_dir / patch_name
@@ -93,62 +114,76 @@ def generate_verified_patches(self, *args, **kwargs):
         verified_patches = super().generate_verified_patches(self.pois, **kwargs)
         if verified_patches:
             for patch_group in verified_patches:
-                for patch in patch_group['patches']:
+                for patch in patch_group["patches"]:
                     patch_diff = self.program_info.git_diff(patch)
-                    patch_output_file, patch_metadata_output_file = self._update_patch_output_locations()
-                    build_request = patch.metadata.get('build_request_id', None)
-                    summary = patch.metadata.get('summary', None)
+                    patch_output_file, patch_metadata_output_file = (
+                        self._update_patch_output_locations()
+                    )
+                    build_request = patch.metadata.get("build_request_id", None)
+                    summary = patch.metadata.get("summary", None)
                     if build_request is None:
-                        _l.critical("No build request ID found in patch metadata, using crash report ID instead.")
+                        _l.critical(
+                            "No build request ID found in patch metadata, using crash report ID instead."
+                        )
 
                     with open(patch_metadata_output_file, "w") as f:
                         patch_metadata: PatchMetaData = PatchMetaData(
                             patcher_name=patcher_name,
-                            total_cost=patch_group['cost'],
+                            total_cost=patch_group["cost"],
                             poi_report_id=self.program_info.poi_report.crash_report_id,
                             pdt_project_id=self.program_info.poi_report.project_id,
                             pdt_project_name=self.program_info.poi_report.project_name,
                             pdt_harness_info_id=self.program_info.poi_report.harness_info_id,
                             build_request_id=build_request,
                         )
-                        yaml.safe_dump(patch_metadata.model_dump(), f, default_flow_style=False, sort_keys=False)
+                        yaml.safe_dump(
+                            patch_metadata.model_dump(),
+                            f,
+                            default_flow_style=False,
+                            sort_keys=False,
+                        )
                     with open(patch_output_file, "w") as f:
                         f.write(patch_diff)
 
-                    _l.info(f'Patch data saved! Patch: %s | Metadata: %s', patch_output_file, patch_metadata_output_file)
+                    _l.info(
+                        f"Patch data saved! Patch: %s | Metadata: %s",
+                        patch_output_file,
+                        patch_metadata_output_file,
+                    )
             _l.info(f"💸 The total cost of this patch was {self.total_cost} dollars.")
         else:
-            _l.info(f"💸 We could not make a patch. The total cost was {self.total_cost} dollars.")
+            _l.info(
+                f"💸 We could not make a patch. The total cost was {self.total_cost} dollars."
+            )
             _l.error("Failed to generate any verified patches.")
         return verified_patches
 
     @classmethod
     def from_files(
-            cls,
-            *args,
-            target_root: Path = None,
-            source_root: Path = None,
-            report_yaml_path: Path = None,
-            project_metadata_path=None,
-            raw_report_path=None,
-            function_json_dir=None,
-            function_indices=None,
-            alerting_inputs_path=None,
-            patch_output_dir=None,
-            patch_metadata_output_dir=None,
-            crashing_commit=None,
-            indices_by_commit=None,
-            changed_func_by_commit=None,
-            patch_planning=None,
-            local_run=False,
-            kumushi_report_path=None,
-            delta_mode=False,
-            coverage_build_project_path: Path=None,
-            patch_request_meta: Path = None,
-            bypassing_inputs: str = None,
-            **kwargs
+        cls,
+        *args,
+        target_root: Path = None,
+        source_root: Path = None,
+        report_yaml_path: Path = None,
+        project_metadata_path=None,
+        raw_report_path=None,
+        function_json_dir=None,
+        function_indices=None,
+        alerting_inputs_path=None,
+        patch_output_dir=None,
+        patch_metadata_output_dir=None,
+        crashing_commit=None,
+        indices_by_commit=None,
+        changed_func_by_commit=None,
+        patch_planning=None,
+        local_run=False,
+        kumushi_report_path=None,
+        delta_mode=False,
+        coverage_build_project_path: Path = None,
+        patch_request_meta: Path = None,
+        bypassing_inputs: str = None,
+        **kwargs,
     ) -> "AICCPatcher":
-
         # validate outputs locations exists
         if patch_output_dir is not None:
             Path(patch_output_dir).mkdir(exist_ok=True)
@@ -178,17 +213,17 @@ def from_files(
                 with raw_report_path.open("r") as f:
                     rep = yaml.safe_load(f)
 
-                    #rep["dedup_crash_report"]["dedup_tokens_shellphish"] = {}
-                    #rep["run_pov_result"]["pov"]["organizer_crash_eval"] = {}
-                    #rep["run_pov_result"]["pov"]["dedup_crash_report"]["dedup_tokens_shellphish"] = {}
-                    #rep["run_pov_result"]["pov"]["organizer_crash_eval"]["code_label"] = ""
-                    #rep["run_pov_result"]["pov"]["organizer_crash_eval"]["significance"] = 0
-                    #rep["run_pov_result"]["pov"]["organizer_crash_eval"]["significance_message"] = ""
-                    #rep["run_pov_result"]["pov"]["organizer_crash_eval"]["crash_state"] = ""
-                    #rep["run_pov_result"]["pov"]["dedup_crash_report"]["dedup_tokens_shellphish"]["code_label"] = ""
-                    #rep["run_pov_result"]["pov"]["dedup_crash_report"]["dedup_tokens_shellphish"]["significance"] = ""
-                    #rep["run_pov_result"]["pov"]["dedup_crash_report"]["dedup_tokens_shellphish"]["significance_message"] = ""
-                    #rep["run_pov_result"]["pov"]["dedup_crash_report"]["dedup_tokens_shellphish"]["crash_state"] = ""
+                    # rep["dedup_crash_report"]["dedup_tokens_shellphish"] = {}
+                    # rep["run_pov_result"]["pov"]["organizer_crash_eval"] = {}
+                    # rep["run_pov_result"]["pov"]["dedup_crash_report"]["dedup_tokens_shellphish"] = {}
+                    # rep["run_pov_result"]["pov"]["organizer_crash_eval"]["code_label"] = ""
+                    # rep["run_pov_result"]["pov"]["organizer_crash_eval"]["significance"] = 0
+                    # rep["run_pov_result"]["pov"]["organizer_crash_eval"]["significance_message"] = ""
+                    # rep["run_pov_result"]["pov"]["organizer_crash_eval"]["crash_state"] = ""
+                    # rep["run_pov_result"]["pov"]["dedup_crash_report"]["dedup_tokens_shellphish"]["code_label"] = ""
+                    # rep["run_pov_result"]["pov"]["dedup_crash_report"]["dedup_tokens_shellphish"]["significance"] = ""
+                    # rep["run_pov_result"]["pov"]["dedup_crash_report"]["dedup_tokens_shellphish"]["significance_message"] = ""
+                    # rep["run_pov_result"]["pov"]["dedup_crash_report"]["dedup_tokens_shellphish"]["crash_state"] = ""
 
                     pov_report = RepresentativeFullPoVReport.model_validate(rep)
 
@@ -224,7 +259,9 @@ def from_files(
         if kumushi_report_path:
             kumushi_report_path = Path(kumushi_report_path)
             if kumushi_report_path.exists() and kumushi_report_path.is_file():
-                kumushi_clusters = load_clusters_from_yaml(kumushi_report_path, aicc_program)
+                kumushi_clusters = load_clusters_from_yaml(
+                    kumushi_report_path, aicc_program
+                )
 
         patcher = cls(
             aicc_program,

diff --git a/patchery/data/__init__.py b/patchery/data/__init__.py
@@ -4,5 +4,16 @@
 from .program_input import ProgramInput, ProgramInputType
 from .program_alert import ProgramAlert, ProgramExitType
 from .program import Program
+from .models import (
+    PatchRequestMeta,
+    POIReport,
+    RootCauseReport,
+    RepresentativeFullPoVReport,
+)
+from .function_resolver import (
+    FunctionResolver,
+    LocalFunctionResolver,
+    RemoteFunctionResolver,
+)
 
 JAZZER_CMD_INJECT_STR = "OS Command Injection"