Skip to content

Commit c191fa9

Browse files
committed
add github notification
ghstack-source-id: 22b1d9d Pull-Request: #7096
1 parent e12af11 commit c191fa9

File tree

5 files changed

+167
-47
lines changed

5 files changed

+167
-47
lines changed

aws/lambda/benchmark_regression_summary_report/common/benchmark_time_series_api_model.py

Lines changed: 1 addition & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
1-
import datetime as dt
21
from dataclasses import dataclass, field
3-
from typing import Any, Dict, List, Optional
2+
from typing import Any, Dict, List
43

54
import requests
65

@@ -63,24 +62,3 @@ def from_request(
6362
except Exception as e:
6463
raise RuntimeError(f"Malformed API payload: {e}")
6564
return cls(data=BenchmarkTimeSeriesApiData(time_series=ts, time_range=tr))
66-
67-
68-
def get_latest_meta_info(
69-
time_series: List[BenchmarkTimeSeriesItem],
70-
) -> Optional[dict[str, Any]]:
71-
if not time_series:
72-
return None
73-
74-
pts = [p for s in time_series for p in s.data]
75-
latest = max(
76-
pts,
77-
key=lambda p: dt.datetime.fromisoformat(
78-
p["granularity_bucket"].replace("Z", "+00:00")
79-
),
80-
)
81-
return {
82-
"commit": latest.get("commit", ""),
83-
"branch": latest.get("branch", ""),
84-
"timestamp": latest.get("granularity_bucket", ""),
85-
"workflow_id": latest.get("workflow_id", ""),
86-
}

aws/lambda/benchmark_regression_summary_report/common/config.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
# their own benchmark regression config, currenlty place
1616
# here for lambda
1717

18-
1918
COMPILER_BENCHMARK_CONFIG = BenchmarkConfig(
2019
name="Compiler Benchmark Regression",
2120
id="compiler_regression",
@@ -44,6 +43,9 @@
4443
}
4544
""",
4645
),
46+
hud_info={
47+
"url": "https://hud.pytorch.org/benchmark/compilers",
48+
},
4749
# set baseline from past 7 days using avg, and compare with the last 1 day
4850
policy=Policy(
4951
frequency=Frequency(value=1, unit="days"),
@@ -67,7 +69,7 @@
6769
"compression_ratio": RegressionPolicy(
6870
name="compression_ratio",
6971
condition="greater_equal",
70-
threshold=0.9,
72+
threshold=0.95,
7173
baseline_aggregation="max",
7274
),
7375
},

aws/lambda/benchmark_regression_summary_report/common/config_model.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,7 @@ class BenchmarkConfig:
241241
id: str
242242
source: BenchmarkApiSource
243243
policy: Policy
244+
hud_info: Optional[dict[str, Any]] = None
244245

245246

246247
@dataclass

aws/lambda/benchmark_regression_summary_report/common/report_manager.py

Lines changed: 125 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,62 @@
1212
get_regression_status,
1313
PerGroupResult,
1414
)
15+
from jinja2 import Template
1516

1617

1718
logger = logging.getLogger()
19+
REPORT_MD_TEMPLATE = """# Benchmark Report {{ id }}
20+
config_id: `{{ report_id }}`
21+
22+
We have detected **{{ status }}** in benchmark results for `{{ report_id }}` (id: `{{ id }}`).
23+
(HUD benchmark regression page coming soon...)
24+
25+
> **Status:** {{ status }} · **Frequency:** {{ frequency }}
26+
27+
## Summary
28+
| Metric | Value |
29+
| :-- | --: |
30+
| Total | {{ summary.total_count | default(0) }} |
31+
| Regressions | {{ summary.regression_count | default(0) }} |
32+
| Suspicious | {{ summary.suspicious_count | default(0) }} |
33+
| No Regression | {{ summary.no_regression_count | default(0) }} |
34+
| Insufficient Data | {{ summary.insufficient_data_count | default(0) }} |
35+
36+
## Data Windows
37+
Baseline is a single reference value (e.g., mean, max, min, latest) aggregated from the previous few days,
38+
used to detect regressions by comparing against metric values in the target window.
39+
40+
### Baseline window (used to calculate baseline value)
41+
- **Start:** `{{ baseline.start.timestamp | default('') }}` (commit: `{{ baseline.start.commit | default('') }}`)
42+
- **End:** `{{ baseline.end.timestamp | default('') }}` (commit: `{{ baseline.end.commit | default('') }}`)
43+
44+
### Target window (used to compare against baseline value)
45+
- **Start:** `{{ target.start.timestamp | default('') }}` (commit: `{{ target.start.commit | default('') }}`)
46+
- **End:** `{{ target.end.timestamp | default('') }}` (commit: `{{ target.end.commit | default('') }}`)
47+
48+
{% if regression_items and regression_items|length > 0 %}
49+
## Regression Glance
50+
{% if url %}
51+
Use items below in [HUD]({{ url }}) to see regression.
52+
{% endif %}
53+
54+
{% set items = regression_items if regression_items|length <= 10 else regression_items[:10] %}
55+
{% if regression_items|length > 10 %}
56+
… (showing first 10 only, total {{ regression_items|length }} regressions)
57+
{% endif %}
58+
{% for item in items %}
59+
{% set kv = item.group_info|dictsort %}
60+
{{ "" }}|{% for k, _ in kv %}{{ k }} |{% endfor %}{{ "\n" -}}
61+
|{% for _k, _ in kv %}---|{% endfor %}{{ "\n" -}}
62+
|{% for _k, v in kv %}{{ v }} |{% endfor %}{{ "\n\n" -}}
63+
{% if item.baseline_point -%}
64+
- **startTime**: {{ item.baseline_point.timestamp }}, **endTime**: {{ target.end.timestamp }}
65+
- **lcommit**: `{{ item.baseline_point.commit }}`, **rcommit**: `{{ target.end.commit }}`
66+
{{ "\n" }}
67+
{%- endif %}
68+
{% endfor %}
69+
{% endif %}
70+
"""
1871

1972

2073
class ReportManager:
@@ -68,6 +121,57 @@ def run(
68121
except Exception as e:
69122
logger.error(f"failed to insert report to db, error: {e}")
70123
raise
124+
self.notify_github_comment(github_token)
125+
126+
def notify_github_comment(self, github_token: str):
127+
if self.status != "regression":
128+
logger.info(
129+
"[%s] no regression found, skip notification",
130+
self.config_id,
131+
)
132+
return
133+
134+
github_notification = self.config.policy.get_github_notification_config()
135+
if not github_notification:
136+
logger.info(
137+
"[%s] no github notification config found, skip notification",
138+
self.config_id,
139+
)
140+
return
141+
logger.info("[%s] prepareing gitub comment content", self.config_id)
142+
content = self._to_markdoown()
143+
if self.is_dry_run:
144+
logger.info(
145+
"[%s]dry run, skip sending comment to github, report(%s)",
146+
self.config_id,
147+
self.id,
148+
)
149+
logger.info("[dry run] printing comment content")
150+
print(json.dumps(content, indent=2, default=str))
151+
logger.info("[dry run] Done! Finish printing comment content")
152+
return
153+
logger.info("[%s] create comment to github issue", self.config_id)
154+
github_notification.create_github_comment(content, github_token)
155+
logger.info("[%s] done. comment is sent to github", self.config_id)
156+
157+
def _to_markdoown(self):
158+
self.regression_items = self._collect_regression_items()
159+
url = ""
160+
if self.config.hud_info:
161+
url = self.config.hud_info.get("url", "")
162+
163+
md = Template(REPORT_MD_TEMPLATE, trim_blocks=True, lstrip_blocks=True).render(
164+
id=self.id,
165+
url=url,
166+
status=self.status,
167+
report_id=self.config_id,
168+
summary=self.report["summary"],
169+
baseline=self.baseline,
170+
target=self.target,
171+
frequency=self.config.policy.frequency.get_text(),
172+
regression_items=self.regression_items,
173+
)
174+
return md
71175

72176
def _collect_regression_items(self) -> list[PerGroupResult]:
73177
items = []
@@ -120,11 +224,30 @@ def insert_to_db(
120224
"repo": self.repo,
121225
"report_json": report_json,
122226
}
227+
228+
if self.is_dry_run:
229+
logger.info(
230+
"[%s]dry run, skip inserting report to db, report(%s)",
231+
self.config_id,
232+
self.id,
233+
)
234+
logger.info("[dry run] printing db params data")
235+
if self.is_dry_run:
236+
print(json.dumps(params, indent=2, default=str))
237+
logger.info("[dry run] Done! Finish printing db params data")
238+
return
123239
logger.info(
124240
"[%s]inserting benchmark regression report(%s)", self.config_id, self.id
125241
)
126-
self._db_insert(cc, self.db_table_name, params)
127-
242+
try:
243+
self._db_insert(cc, self.db_table_name, params)
244+
except Exception:
245+
logger.exception(
246+
"[%s] failed to insert report to target table %s",
247+
self.config_id,
248+
self.db_table_name,
249+
)
250+
raise
128251
logger.info(
129252
"[%s] Done. inserted benchmark regression report(%s)",
130253
self.config_id,

0 commit comments

Comments
 (0)