Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ __pycache__/

# C extensions
*.so
*.bin

# Distribution / packaging
.Python
Expand All @@ -25,6 +26,7 @@ share/python-wheels/
.installed.cfg
*.egg
MANIFEST
.venv/

# PyInstaller
# Usually these files are written by a python script from a template
Expand Down Expand Up @@ -182,9 +184,9 @@ cython_debug/
.abstra/

# Visual Studio Code
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
# and can be added to the global gitignore or merged into this file. However, if you prefer,
# and can be added to the global gitignore or merged into this file. However, if you prefer,
# you could uncomment the following to ignore the entire vscode folder
# .vscode/

Expand Down
20 changes: 20 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
exclude: ^tests/generic_tests/targets/
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.5.0
hooks:
- id: check-added-large-files
- id: check-merge-conflict
- id: check-yaml
- id: end-of-file-fixer
- id: mixed-line-ending
args: ["--fix=no"]
- id: trailing-whitespace
- repo: https://github.com/python-jsonschema/check-jsonschema
rev: 0.27.0
hooks:
- id: check-dependabot
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.4.8
hooks:
- id: ruff-format
6 changes: 2 additions & 4 deletions patchery/__init__.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,15 @@
__version__ = "0.0.0"

import logging

logging.getLogger("patchery").addHandler(logging.NullHandler())
from .logger import Loggers

loggers = Loggers()
del Loggers

import os

# stop LiteLLM from querying at all to the remote server
# https://github.com/BerriAI/litellm/blob/4d29c1fb6941e49191280c4fd63961dec1a1e7c5/litellm/__init__.py#L286C20-L286C48
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
from .data import Patch
from .generator import LLMPatchGenerator
#from .verifier import PatchVerifier
#from .patcher import Patcher
159 changes: 98 additions & 61 deletions patchery/aicc_patcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,25 +7,37 @@

import yaml

from shellphish_crs_utils.models.crs_reports import RepresentativeFullPoVReport, POIReport
from shellphish_crs_utils.models.patch import PatchMetaData
from shellphish_crs_utils.oss_fuzz.project import OSSFuzzProject
from patchery.data.models.crs_reports import RepresentativeFullPoVReport, POIReport
from patchery.data.models.patch import PatchMetaData
# from shellphish_crs_utils.oss_fuzz.project import OSSFuzzProject

import patchery
from patchery import Patcher, LLMPatchGenerator
from patchery.utils import absolute_path_finder, read_src_from_file, find_src_root_from_commit, llm_model_name
from patchery.utils import (
absolute_path_finder,
read_src_from_file,
find_src_root_from_commit,
llm_model_name,
)

from patchery.kumushi.root_cause_analyzer import RootCauseAnalyzer
from patchery.kumushi.rca_mode import RCAMode
from patchery.kumushi.aixcc import AICCProgram
from patchery.data import ProgramInput, ProgramInputType, PoI, PoICluster, PoISource, Program
from patchery.data import (
ProgramInput,
ProgramInputType,
PoI,
PoICluster,
PoISource,
Program,
)
from patchery.kumushi.util import load_clusters_from_yaml

_l = logging.getLogger(__name__)


class AICCPatcher(Patcher):
DEFAULT_LLM_MODEL = 'claude-3.7-sonnet'
DEFAULT_LLM_MODEL = "claude-3.7-sonnet"

def __init__(
self,
Expand All @@ -35,9 +47,8 @@ def __init__(
patch_metadata_output_dir=None,
local_run: bool = False,
kumushi_clusters: list | None = None,
**kwargs
**kwargs,
):

# private api
self._patch_output_dir = patch_output_dir
self._patch_metadata_output_dir = patch_metadata_output_dir
Expand All @@ -48,7 +59,9 @@ def __init__(
self.is_local_run = local_run
self.pois = []

super().__init__(program, llm_model_name(model=self.DEFAULT_LLM_MODEL), **kwargs)
super().__init__(
program, llm_model_name(model=self.DEFAULT_LLM_MODEL), **kwargs
)

# generate pois for patching
self.pois = self.poi_clusters_from_kumushi()
Expand All @@ -60,13 +73,15 @@ def __init__(

def poi_clusters_from_kumushi(self, kumushi_report=None):
if not self._kumushi_clusters:
_l.info("No KumuShi report provided, generating PoIs from local KumuShi run...")
rca = RootCauseAnalyzer(
self.program_info,
rca_mode=RCAMode.WEIGHTLESS
_l.info(
"No KumuShi report provided, generating PoIs from local KumuShi run..."
)
rca = RootCauseAnalyzer(self.program_info, rca_mode=RCAMode.WEIGHTLESS)
poi_clusters = rca.weightless_pois
_l.info(f"Since we are using KumuShi in weightless, we will limit attempts to only %d.", self._weightless_limited_attempts)
_l.info(
f"Since we are using KumuShi in weightless, we will limit attempts to only %d.",
self._weightless_limited_attempts,
)
self.max_attempts = self._weightless_limited_attempts
self.program_info.code.reinit_or_get_function_resolver()
else:
Expand All @@ -79,8 +94,14 @@ def poi_clusters_from_kumushi(self, kumushi_report=None):
def _update_patch_output_locations(self) -> tuple[Path, Path]:
# patch output location
patch_name = hashlib.md5(os.urandom(16)).hexdigest()
patch_output_dir = Path(self._patch_output_dir) if self._patch_output_dir else None
patch_metadata_output_dir = Path(self._patch_metadata_output_dir) if self._patch_metadata_output_dir else None
patch_output_dir = (
Path(self._patch_output_dir) if self._patch_output_dir else None
)
patch_metadata_output_dir = (
Path(self._patch_metadata_output_dir)
if self._patch_metadata_output_dir
else None
)
assert patch_output_dir.exists()
assert patch_metadata_output_dir.exists()
return patch_output_dir / patch_name, patch_metadata_output_dir / patch_name
Expand All @@ -93,62 +114,76 @@ def generate_verified_patches(self, *args, **kwargs):
verified_patches = super().generate_verified_patches(self.pois, **kwargs)
if verified_patches:
for patch_group in verified_patches:
for patch in patch_group['patches']:
for patch in patch_group["patches"]:
patch_diff = self.program_info.git_diff(patch)
patch_output_file, patch_metadata_output_file = self._update_patch_output_locations()
build_request = patch.metadata.get('build_request_id', None)
summary = patch.metadata.get('summary', None)
patch_output_file, patch_metadata_output_file = (
self._update_patch_output_locations()
)
build_request = patch.metadata.get("build_request_id", None)
summary = patch.metadata.get("summary", None)
if build_request is None:
_l.critical("No build request ID found in patch metadata, using crash report ID instead.")
_l.critical(
"No build request ID found in patch metadata, using crash report ID instead."
)

with open(patch_metadata_output_file, "w") as f:
patch_metadata: PatchMetaData = PatchMetaData(
patcher_name=patcher_name,
total_cost=patch_group['cost'],
total_cost=patch_group["cost"],
poi_report_id=self.program_info.poi_report.crash_report_id,
pdt_project_id=self.program_info.poi_report.project_id,
pdt_project_name=self.program_info.poi_report.project_name,
pdt_harness_info_id=self.program_info.poi_report.harness_info_id,
build_request_id=build_request,
)
yaml.safe_dump(patch_metadata.model_dump(), f, default_flow_style=False, sort_keys=False)
yaml.safe_dump(
patch_metadata.model_dump(),
f,
default_flow_style=False,
sort_keys=False,
)
with open(patch_output_file, "w") as f:
f.write(patch_diff)

_l.info(f'Patch data saved! Patch: %s | Metadata: %s', patch_output_file, patch_metadata_output_file)
_l.info(
f"Patch data saved! Patch: %s | Metadata: %s",
patch_output_file,
patch_metadata_output_file,
)
_l.info(f"💸 The total cost of this patch was {self.total_cost} dollars.")
else:
_l.info(f"💸 We could not make a patch. The total cost was {self.total_cost} dollars.")
_l.info(
f"💸 We could not make a patch. The total cost was {self.total_cost} dollars."
)
_l.error("Failed to generate any verified patches.")
return verified_patches

@classmethod
def from_files(
cls,
*args,
target_root: Path = None,
source_root: Path = None,
report_yaml_path: Path = None,
project_metadata_path=None,
raw_report_path=None,
function_json_dir=None,
function_indices=None,
alerting_inputs_path=None,
patch_output_dir=None,
patch_metadata_output_dir=None,
crashing_commit=None,
indices_by_commit=None,
changed_func_by_commit=None,
patch_planning=None,
local_run=False,
kumushi_report_path=None,
delta_mode=False,
coverage_build_project_path: Path=None,
patch_request_meta: Path = None,
bypassing_inputs: str = None,
**kwargs
cls,
*args,
target_root: Path = None,
source_root: Path = None,
report_yaml_path: Path = None,
project_metadata_path=None,
raw_report_path=None,
function_json_dir=None,
function_indices=None,
alerting_inputs_path=None,
patch_output_dir=None,
patch_metadata_output_dir=None,
crashing_commit=None,
indices_by_commit=None,
changed_func_by_commit=None,
patch_planning=None,
local_run=False,
kumushi_report_path=None,
delta_mode=False,
coverage_build_project_path: Path = None,
patch_request_meta: Path = None,
bypassing_inputs: str = None,
**kwargs,
) -> "AICCPatcher":

# validate outputs locations exists
if patch_output_dir is not None:
Path(patch_output_dir).mkdir(exist_ok=True)
Expand Down Expand Up @@ -178,17 +213,17 @@ def from_files(
with raw_report_path.open("r") as f:
rep = yaml.safe_load(f)

#rep["dedup_crash_report"]["dedup_tokens_shellphish"] = {}
#rep["run_pov_result"]["pov"]["organizer_crash_eval"] = {}
#rep["run_pov_result"]["pov"]["dedup_crash_report"]["dedup_tokens_shellphish"] = {}
#rep["run_pov_result"]["pov"]["organizer_crash_eval"]["code_label"] = ""
#rep["run_pov_result"]["pov"]["organizer_crash_eval"]["significance"] = 0
#rep["run_pov_result"]["pov"]["organizer_crash_eval"]["significance_message"] = ""
#rep["run_pov_result"]["pov"]["organizer_crash_eval"]["crash_state"] = ""
#rep["run_pov_result"]["pov"]["dedup_crash_report"]["dedup_tokens_shellphish"]["code_label"] = ""
#rep["run_pov_result"]["pov"]["dedup_crash_report"]["dedup_tokens_shellphish"]["significance"] = ""
#rep["run_pov_result"]["pov"]["dedup_crash_report"]["dedup_tokens_shellphish"]["significance_message"] = ""
#rep["run_pov_result"]["pov"]["dedup_crash_report"]["dedup_tokens_shellphish"]["crash_state"] = ""
# rep["dedup_crash_report"]["dedup_tokens_shellphish"] = {}
# rep["run_pov_result"]["pov"]["organizer_crash_eval"] = {}
# rep["run_pov_result"]["pov"]["dedup_crash_report"]["dedup_tokens_shellphish"] = {}
# rep["run_pov_result"]["pov"]["organizer_crash_eval"]["code_label"] = ""
# rep["run_pov_result"]["pov"]["organizer_crash_eval"]["significance"] = 0
# rep["run_pov_result"]["pov"]["organizer_crash_eval"]["significance_message"] = ""
# rep["run_pov_result"]["pov"]["organizer_crash_eval"]["crash_state"] = ""
# rep["run_pov_result"]["pov"]["dedup_crash_report"]["dedup_tokens_shellphish"]["code_label"] = ""
# rep["run_pov_result"]["pov"]["dedup_crash_report"]["dedup_tokens_shellphish"]["significance"] = ""
# rep["run_pov_result"]["pov"]["dedup_crash_report"]["dedup_tokens_shellphish"]["significance_message"] = ""
# rep["run_pov_result"]["pov"]["dedup_crash_report"]["dedup_tokens_shellphish"]["crash_state"] = ""

pov_report = RepresentativeFullPoVReport.model_validate(rep)

Expand Down Expand Up @@ -224,7 +259,9 @@ def from_files(
if kumushi_report_path:
kumushi_report_path = Path(kumushi_report_path)
if kumushi_report_path.exists() and kumushi_report_path.is_file():
kumushi_clusters = load_clusters_from_yaml(kumushi_report_path, aicc_program)
kumushi_clusters = load_clusters_from_yaml(
kumushi_report_path, aicc_program
)

patcher = cls(
aicc_program,
Expand Down
11 changes: 11 additions & 0 deletions patchery/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,16 @@
from .program_input import ProgramInput, ProgramInputType
from .program_alert import ProgramAlert, ProgramExitType
from .program import Program
from .models import (
PatchRequestMeta,
POIReport,
RootCauseReport,
RepresentativeFullPoVReport,
)
from .function_resolver import (
FunctionResolver,
LocalFunctionResolver,
RemoteFunctionResolver,
)

JAZZER_CMD_INJECT_STR = "OS Command Injection"
Loading