Skip to content

Commit 386d14d

Browse files
committed
add github notification
ghstack-source-id: 8624785 Pull-Request: #7096
1 parent a8954d9 commit 386d14d

File tree

5 files changed

+230
-88
lines changed

5 files changed

+230
-88
lines changed

aws/lambda/benchmark_regression_summary_report/common/benchmark_time_series_api_model.py

Lines changed: 1 addition & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
1-
import datetime as dt
21
from dataclasses import dataclass, field
3-
from typing import Any, Dict, List, Optional
2+
from typing import Any, Dict, List
43

54
import requests
65

@@ -63,24 +62,3 @@ def from_request(
6362
except Exception as e:
6463
raise RuntimeError(f"Malformed API payload: {e}")
6564
return cls(data=BenchmarkTimeSeriesApiData(time_series=ts, time_range=tr))
66-
67-
68-
def get_latest_meta_info(
69-
time_series: List[BenchmarkTimeSeriesItem],
70-
) -> Optional[dict[str, Any]]:
71-
if not time_series:
72-
return None
73-
74-
pts = [p for s in time_series for p in s.data]
75-
latest = max(
76-
pts,
77-
key=lambda p: dt.datetime.fromisoformat(
78-
p["granularity_bucket"].replace("Z", "+00:00")
79-
),
80-
)
81-
return {
82-
"commit": latest.get("commit", ""),
83-
"branch": latest.get("branch", ""),
84-
"timestamp": latest.get("granularity_bucket", ""),
85-
"workflow_id": latest.get("workflow_id", ""),
86-
}

aws/lambda/benchmark_regression_summary_report/common/config.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
# their own benchmark regression config, currenlty place
1616
# here for lambda
1717

18-
1918
COMPILER_BENCHMARK_CONFIG = BenchmarkConfig(
2019
name="Compiler Benchmark Regression",
2120
id="compiler_regression",
@@ -44,6 +43,9 @@
4443
}
4544
""",
4645
),
46+
hud_info={
47+
"url": "https://hud.pytorch.org/benchmark/compilers",
48+
},
4749
# set baseline from past 7 days using avg, and compare with the last 1 day
4850
policy=Policy(
4951
frequency=Frequency(value=1, unit="days"),
@@ -67,7 +69,7 @@
6769
"compression_ratio": RegressionPolicy(
6870
name="compression_ratio",
6971
condition="greater_equal",
70-
threshold=0.9,
72+
threshold=0.95,
7173
baseline_aggregation="max",
7274
),
7375
},

aws/lambda/benchmark_regression_summary_report/common/config_model.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,7 @@ class BenchmarkConfig:
241241
id: str
242242
source: BenchmarkApiSource
243243
policy: Policy
244+
hud_info: Optional[dict[str, Any]] = None
244245

245246

246247
@dataclass

aws/lambda/benchmark_regression_summary_report/common/report_manager.py

Lines changed: 188 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,63 @@
1212
get_regression_status,
1313
PerGroupResult,
1414
)
15+
from jinja2 import Template
1516

1617

1718
logger = logging.getLogger()
19+
REPORT_MD_TEMPLATE = """# Benchmark Report {{ id }}
20+
config_id: `{{ report_id }}`
21+
22+
We have detected **{{ status }}** in benchmark results for `{{ report_id }}` (id: `{{ id }}`).
23+
(HUD benchmark regression page coming soon...)
24+
25+
> **Status:** {{ status }} · **Frequency:** {{ frequency }}
26+
27+
## Summary
28+
| Metric | Value |
29+
| :-- | --: |
30+
| Total | {{ summary.total_count | default(0) }} |
31+
| Regressions | {{ summary.regression_count | default(0) }} |
32+
| Suspicious | {{ summary.suspicious_count | default(0) }} |
33+
| No Regression | {{ summary.no_regression_count | default(0) }} |
34+
| Insufficient Data | {{ summary.insufficient_data_count | default(0) }} |
35+
36+
## Data Windows
37+
Baseline is a single reference value (e.g., mean, max, min, latest) aggregated from the previous few days,
38+
used to detect regressions by comparing against metric values in the target window.
39+
40+
### Baseline window (used to calculate baseline value)
41+
- **Start:** `{{ baseline.start.timestamp | default('') }}` (commit: `{{ baseline.start.commit | default('') }}`)
42+
- **End:** `{{ baseline.end.timestamp | default('') }}` (commit: `{{ baseline.end.commit | default('') }}`)
43+
44+
### Target window (used to compare against baseline value)
45+
- **Start:** `{{ target.start.timestamp | default('') }}` (commit: `{{ target.start.commit | default('') }}`)
46+
- **End:** `{{ target.end.timestamp | default('') }}` (commit: `{{ target.end.commit | default('') }}`)
47+
48+
{% if regression_items and regression_items|length > 0 %}
49+
## Regression Glance
50+
{% if url %}
51+
Use items below in [HUD]({{ url }}) to see regression.
52+
{% endif %}
53+
54+
{% set items = regression_items if regression_items|length <= 10 else regression_items[:10] %}
55+
{% if regression_items|length > 10 %}
56+
… (showing first 10 only, total {{ regression_items|length }} regressions)
57+
{% endif %}
58+
{% for item in items %}
59+
{% set kv = item.group_info|dictsort %}
60+
{{ "" }}|{% for k, _ in kv %}{{ k }} |{% endfor %}{{ "\n" -}}
61+
|{% for _k, _ in kv %}---|{% endfor %}{{ "\n" -}}
62+
|{% for _k, v in kv %}{{ v }} |{% endfor %}{{ "\n\n" -}}
63+
{% if item.baseline_point -%}
64+
- **baseline**: {{ item.baseline_point.value}},
65+
- **startTime**: {{ item.baseline_point.timestamp }}, **endTime**: {{ target.end.timestamp }}
66+
- **lcommit**: `{{ item.baseline_point.commit }}`, **rcommit**: `{{ target.end.commit }}`
67+
{{ "\n" }}
68+
{%- endif %}
69+
{% endfor %}
70+
{% endif %}
71+
"""
1872

1973

2074
class ReportManager:
@@ -68,6 +122,57 @@ def run(
68122
except Exception as e:
69123
logger.error(f"failed to insert report to db, error: {e}")
70124
raise
125+
self.notify_github_comment(github_token)
126+
127+
def notify_github_comment(self, github_token: str):
128+
if self.status != "regression":
129+
logger.info(
130+
"[%s] no regression found, skip notification",
131+
self.config_id,
132+
)
133+
return
134+
135+
github_notification = self.config.policy.get_github_notification_config()
136+
if not github_notification:
137+
logger.info(
138+
"[%s] no github notification config found, skip notification",
139+
self.config_id,
140+
)
141+
return
142+
logger.info("[%s] prepareing gitub comment content", self.config_id)
143+
content = self._to_markdoown()
144+
if self.is_dry_run:
145+
logger.info(
146+
"[%s]dry run, skip sending comment to github, report(%s)",
147+
self.config_id,
148+
self.id,
149+
)
150+
logger.info("[dry run] printing comment content")
151+
print(json.dumps(content, indent=2, default=str))
152+
logger.info("[dry run] Done! Finish printing comment content")
153+
return
154+
logger.info("[%s] create comment to github issue", self.config_id)
155+
github_notification.create_github_comment(content, github_token)
156+
logger.info("[%s] done. comment is sent to github", self.config_id)
157+
158+
def _to_markdoown(self):
159+
self.regression_items = self._collect_regression_items()
160+
url = ""
161+
if self.config.hud_info:
162+
url = self.config.hud_info.get("url", "")
163+
164+
md = Template(REPORT_MD_TEMPLATE, trim_blocks=True, lstrip_blocks=True).render(
165+
id=self.id,
166+
url=url,
167+
status=self.status,
168+
report_id=self.config_id,
169+
summary=self.report["summary"],
170+
baseline=self.baseline,
171+
target=self.target,
172+
frequency=self.config.policy.frequency.get_text(),
173+
regression_items=self.regression_items,
174+
)
175+
return md
71176

72177
def _collect_regression_items(self) -> list[PerGroupResult]:
73178
items = []
@@ -120,11 +225,30 @@ def insert_to_db(
120225
"repo": self.repo,
121226
"report_json": report_json,
122227
}
228+
229+
if self.is_dry_run:
230+
logger.info(
231+
"[%s]dry run, skip inserting report to db, report(%s)",
232+
self.config_id,
233+
self.id,
234+
)
235+
logger.info("[dry run] printing db params data")
236+
if self.is_dry_run:
237+
print(json.dumps(params, indent=2, default=str))
238+
logger.info("[dry run] Done! Finish printing db params data")
239+
return
123240
logger.info(
124241
"[%s]inserting benchmark regression report(%s)", self.config_id, self.id
125242
)
126-
self._db_insert(cc, self.db_table_name, params)
127-
243+
try:
244+
self._db_insert(cc, self.db_table_name, params)
245+
except Exception:
246+
logger.exception(
247+
"[%s] failed to insert report to target table %s",
248+
self.config_id,
249+
self.db_table_name,
250+
)
251+
raise
128252
logger.info(
129253
"[%s] Done. inserted benchmark regression report(%s)",
130254
self.config_id,
@@ -136,14 +260,28 @@ def _db_insert(
136260
cc: clickhouse_connect.driver.Client,
137261
table: str,
138262
params: dict,
139-
) -> tuple[bool, int]:
263+
):
264+
"""
265+
Insert one row into ClickHouse using cc.insert().
266+
Returns (inserted, written_rows).
267+
"""
268+
if self._row_exists(
269+
cc,
270+
table,
271+
params["report_id"],
272+
params["type"],
273+
params["repo"],
274+
params["last_record_ts"],
275+
):
276+
return False, 0
277+
140278
sql = f"""
141279
INSERT INTO {table} (
142280
id,
143281
report_id,
144282
last_record_ts,
145283
last_record_commit,
146-
`type`,
284+
type,
147285
status,
148286
regression_count,
149287
insufficient_data_count,
@@ -152,49 +290,56 @@ def _db_insert(
152290
repo,
153291
report
154292
)
155-
SELECT
156-
{{id:UUID}},
157-
{{report_id:String}},
158-
{{last_record_ts:DateTime64(0)}},
159-
{{last_record_commit:String}},
160-
{{type:String}},
161-
{{status:String}},
162-
{{regression_count:UInt32}},
163-
{{insufficient_data_count:UInt32}},
164-
{{suspected_regression_count:UInt32}},
165-
{{total_count:UInt32}},
166-
{{repo:String}},
167-
{{report_json:String}}
168-
FROM system.one
169-
WHERE NOT EXISTS (
170-
SELECT 1
171-
FROM {table}
172-
WHERE report_id = {{report_id:String}}
173-
AND `type` = {{type:String}}
174-
AND repo = {{repo:String}}
175-
AND stamp = toDate({{last_record_ts:DateTime64(0)}})
293+
VALUES
294+
(
295+
%(id)s,
296+
%(report_id)s,
297+
%(last_record_ts)s,
298+
%(last_record_commit)s,
299+
%(type)s,
300+
%(status)s,
301+
%(regression_count)s,
302+
%(insufficient_data_count)s,
303+
%(suspected_regression_count)s,
304+
%(total_count)s,
305+
%(repo)s,
306+
%(report_json)s
176307
)
308+
"""
309+
cc.command(sql, parameters=params)
310+
311+
def _row_exists(
312+
self,
313+
cc: clickhouse_connect.driver.Client,
314+
table: str,
315+
report_id: str,
316+
type_str: str,
317+
repo: str,
318+
last_record_ts,
319+
) -> bool:
320+
"""
321+
Check if a row already exists with the same (report_id, type, repo, stamp).
322+
Returns True if found, False otherwise.
323+
"""
324+
sql = f"""
325+
SELECT 1
326+
FROM {table}
327+
WHERE report_id = %(report_id)s
328+
AND type = %(type)s
329+
AND repo = %(repo)s
330+
AND stamp = toDate(%(last_record_ts)s)
177331
LIMIT 1
178332
"""
179-
180-
res = cc.query(sql, parameters=params)
181-
summary = getattr(res, "summary", {}) or {}
182-
183-
written_any = (
184-
summary.get("written_rows")
185-
or summary.get("rows_written")
186-
or summary.get("written", 0)
187-
or 0
333+
res = cc.query(
334+
sql,
335+
parameters={
336+
"report_id": report_id,
337+
"type": type_str,
338+
"repo": repo,
339+
"last_record_ts": last_record_ts,
340+
},
188341
)
189-
190-
logger.info("wrting to db summmary %s", summary)
191-
try:
192-
written = int(written_any)
193-
except (TypeError, ValueError):
194-
written = 0
195-
196-
inserted = written > 0
197-
return inserted, written
342+
return bool(res.result_rows)
198343

199344
def _validate_latest_meta_info(
200345
self, latest_meta_info: Dict[str, Any]

0 commit comments

Comments
 (0)