Skip to content

Commit 49c8c22

Browse files
committed
add github notification
ghstack-source-id: ff6f673 Pull-Request: #7096
1 parent d28a8c3 commit 49c8c22

File tree

5 files changed

+152
-47
lines changed

5 files changed

+152
-47
lines changed

aws/lambda/benchmark_regression_summary_report/common/benchmark_time_series_api_model.py

Lines changed: 1 addition & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
1-
import datetime as dt
21
from dataclasses import dataclass, field
3-
from typing import Any, Dict, List, Optional
2+
from typing import Any, Dict, List
43

54
import requests
65

@@ -63,24 +62,3 @@ def from_request(
6362
except Exception as e:
6463
raise RuntimeError(f"Malformed API payload: {e}")
6564
return cls(data=BenchmarkTimeSeriesApiData(time_series=ts, time_range=tr))
66-
67-
68-
def get_latest_meta_info(
69-
time_series: List[BenchmarkTimeSeriesItem],
70-
) -> Optional[dict[str, Any]]:
71-
if not time_series:
72-
return None
73-
74-
pts = [p for s in time_series for p in s.data]
75-
latest = max(
76-
pts,
77-
key=lambda p: dt.datetime.fromisoformat(
78-
p["granularity_bucket"].replace("Z", "+00:00")
79-
),
80-
)
81-
return {
82-
"commit": latest.get("commit", ""),
83-
"branch": latest.get("branch", ""),
84-
"timestamp": latest.get("granularity_bucket", ""),
85-
"workflow_id": latest.get("workflow_id", ""),
86-
}

aws/lambda/benchmark_regression_summary_report/common/config.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
# their own benchmark regression config, currenlty place
1616
# here for lambda
1717

18-
1918
COMPILER_BENCHMARK_CONFIG = BenchmarkConfig(
2019
name="Compiler Benchmark Regression",
2120
id="compiler_regression",
@@ -44,6 +43,9 @@
4443
}
4544
""",
4645
),
46+
hud_info={
47+
"url": "https://hud.pytorch.org/benchmark/compilers",
48+
},
4749
# set baseline from past 7 days using avg, and compare with the last 1 day
4850
policy=Policy(
4951
frequency=Frequency(value=1, unit="days"),
@@ -67,7 +69,7 @@
6769
"compression_ratio": RegressionPolicy(
6870
name="compression_ratio",
6971
condition="greater_equal",
70-
threshold=0.9,
72+
threshold=0.95,
7173
baseline_aggregation="max",
7274
),
7375
},

aws/lambda/benchmark_regression_summary_report/common/config_model.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,7 @@ class BenchmarkConfig:
241241
id: str
242242
source: BenchmarkApiSource
243243
policy: Policy
244+
hud_info: Optional[dict[str, Any]] = None
244245

245246

246247
@dataclass

aws/lambda/benchmark_regression_summary_report/common/report_manager.py

Lines changed: 109 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,63 @@
1212
get_regression_status,
1313
PerGroupResult,
1414
)
15+
from jinja2 import Template
1516

1617

1718
logger = logging.getLogger()
19+
REPORT_MD_TEMPLATE = """# Benchmark Report {{ id }}
20+
config_id: `{{ report_id }}`
21+
22+
We have detected **{{ status }}** in benchmark results for `{{ report_id }}` (id: `{{ id }}`).
23+
(HUD benchmark regression page coming soon...)
24+
25+
> **Status:** {{ status }} · **Frequency:** {{ frequency }}
26+
27+
## Summary
28+
| Metric | Value |
29+
| :-- | --: |
30+
| Total | {{ summary.total_count | default(0) }} |
31+
| Regressions | {{ summary.regression_count | default(0) }} |
32+
| Suspicious | {{ summary.suspicious_count | default(0) }} |
33+
| No Regression | {{ summary.no_regression_count | default(0) }} |
34+
| Insufficient Data | {{ summary.insufficient_data_count | default(0) }} |
35+
36+
## Data Windows
37+
Baseline is a single reference value (e.g., mean, max, min, latest) aggregated from the previous few days,
38+
used to detect regressions by comparing against metric values in the target window.
39+
40+
### Baseline window (used to calculate baseline value)
41+
- **Start:** `{{ baseline.start.timestamp | default('') }}` (commit: `{{ baseline.start.commit | default('') }}`)
42+
- **End:** `{{ baseline.end.timestamp | default('') }}` (commit: `{{ baseline.end.commit | default('') }}`)
43+
44+
### Target window (used to compare against baseline value)
45+
- **Start:** `{{ target.start.timestamp | default('') }}` (commit: `{{ target.start.commit | default('') }}`)
46+
- **End:** `{{ target.end.timestamp | default('') }}` (commit: `{{ target.end.commit | default('') }}`)
47+
48+
{% if regression_items and regression_items|length > 0 %}
49+
## Regression Glance
50+
{% if url %}
51+
Use items below in [HUD]({{ url }}) to see regression.
52+
{% endif %}
53+
54+
{% set items = regression_items if regression_items|length <= 10 else regression_items[:10] %}
55+
{% if regression_items|length > 10 %}
56+
… (showing first 10 only, total {{ regression_items|length }} regressions)
57+
{% endif %}
58+
{% for item in items %}
59+
{% set kv = item.group_info|dictsort %}
60+
{{ "" }}|{% for k, _ in kv %}{{ k }} |{% endfor %}{{ "\n" -}}
61+
|{% for _k, _ in kv %}---|{% endfor %}{{ "\n" -}}
62+
|{% for _k, v in kv %}{{ v }} |{% endfor %}{{ "\n\n" -}}
63+
{% if item.baseline_point -%}
64+
- **baseline**: {{ item.baseline_point.value}},
65+
- **startTime**: {{ item.baseline_point.timestamp }}, **endTime**: {{ target.end.timestamp }}
66+
- **lcommit**: `{{ item.baseline_point.commit }}`, **rcommit**: `{{ target.end.commit }}`
67+
{{ "\n" }}
68+
{%- endif %}
69+
{% endfor %}
70+
{% endif %}
71+
"""
1872

1973

2074
class ReportManager:
@@ -64,10 +118,64 @@ def run(
64118
main method used to insert the report to db and create github comment in targeted issue
65119
"""
66120
try:
67-
self.insert_to_db(cc)
121+
applied_insertion = self.insert_to_db(cc)
68122
except Exception as e:
69123
logger.error(f"failed to insert report to db, error: {e}")
70124
raise
125+
if not applied_insertion:
126+
logger.info("[%s] skip notification, already exists in db", self.config_id)
127+
return
128+
self.notify_github_comment(github_token)
129+
130+
def notify_github_comment(self, github_token: str):
131+
if self.status != "regression":
132+
logger.info(
133+
"[%s] no regression found, skip notification",
134+
self.config_id,
135+
)
136+
return
137+
138+
github_notification = self.config.policy.get_github_notification_config()
139+
if not github_notification:
140+
logger.info(
141+
"[%s] no github notification config found, skip notification",
142+
self.config_id,
143+
)
144+
return
145+
logger.info("[%s] prepareing gitub comment content", self.config_id)
146+
content = self._to_markdoown()
147+
if self.is_dry_run:
148+
logger.info(
149+
"[%s]dry run, skip sending comment to github, report(%s)",
150+
self.config_id,
151+
self.id,
152+
)
153+
logger.info("[dry run] printing comment content")
154+
print(json.dumps(content, indent=2, default=str))
155+
logger.info("[dry run] Done! Finish printing comment content")
156+
return
157+
logger.info("[%s] create comment to github issue", self.config_id)
158+
github_notification.create_github_comment(content, github_token)
159+
logger.info("[%s] done. comment is sent to github", self.config_id)
160+
161+
def _to_markdoown(self):
162+
self.regression_items = self._collect_regression_items()
163+
url = ""
164+
if self.config.hud_info:
165+
url = self.config.hud_info.get("url", "")
166+
167+
md = Template(REPORT_MD_TEMPLATE, trim_blocks=True, lstrip_blocks=True).render(
168+
id=self.id,
169+
url=url,
170+
status=self.status,
171+
report_id=self.config_id,
172+
summary=self.report["summary"],
173+
baseline=self.baseline,
174+
target=self.target,
175+
frequency=self.config.policy.frequency.get_text(),
176+
regression_items=self.regression_items,
177+
)
178+
return md
71179

72180
def _collect_regression_items(self) -> list[PerGroupResult]:
73181
items = []

aws/lambda/benchmark_regression_summary_report/lambda_function.py

Lines changed: 37 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@
1818
from dateutil.parser import isoparse
1919

2020

21+
# TODO(elainewy): change this to benchmark.benchmark_regression_report once the table is created
2122
BENCHMARK_REGRESSION_REPORT_TABLE = "fortesting.benchmark_regression_report"
23+
BENCHMARK_REGRESSION_TRACKING_CONFIG_IDS = ["compiler_regression"]
2224

2325
logging.basicConfig(
2426
level=logging.INFO,
@@ -33,9 +35,6 @@
3335
"CLICKHOUSE_USERNAME": os.getenv("CLICKHOUSE_USERNAME", ""),
3436
}
3537

36-
# TODO(elainewy): change this to benchmark.benchmark_regression_report once the table is created
37-
BENCHMARK_REGRESSION_TRACKING_CONFIG_IDS = ["compiler_regression"]
38-
3938

4039
def format_ts_with_t(ts: int) -> str:
4140
return dt.datetime.fromtimestamp(ts, tz=dt.timezone.utc).strftime(
@@ -54,7 +53,6 @@ def get_clickhouse_client(
5453
return clickhouse_connect.get_client(
5554
host=host, user=user, password=password, secure=True, verify=False
5655
)
57-
5856
return clickhouse_connect.get_client(
5957
host=host, user=user, password=password, secure=True
6058
)
@@ -77,8 +75,10 @@ def __init__(
7775
config_id: str,
7876
end_time: int,
7977
is_dry_run: bool = False,
78+
is_pass_check: bool = False,
8079
) -> None:
8180
self.is_dry_run = is_dry_run
81+
self.is_pass_check = is_pass_check
8282
self.config_id = config_id
8383
self.end_time = end_time
8484

@@ -127,6 +127,7 @@ def process(
127127
should_generate = self._should_generate_report(
128128
cc, self.end_time, self.config_id, report_freq
129129
)
130+
130131
if not should_generate:
131132
self.log_info(
132133
"Skip generate report",
@@ -138,7 +139,6 @@ def process(
138139
f"with frequency {report_freq.get_text()}..."
139140
)
140141

141-
self.log_info("get target data")
142142
target, ls, le = self.get_target(config, self.end_time)
143143
if not target:
144144
self.log_info(
@@ -157,12 +157,12 @@ def process(
157157
regression_report = generator.generate()
158158
if self.is_dry_run:
159159
print(json.dumps(regression_report, indent=2, default=str))
160-
return
161160

162161
reportManager = ReportManager(
163162
config=config,
164163
regression_report=regression_report,
165164
db_table_name=BENCHMARK_REGRESSION_REPORT_TABLE,
165+
is_dry_run=self.is_dry_run,
166166
)
167167
reportManager.run(cc, ENVS["GITHUB_TOKEN"])
168168
return
@@ -172,7 +172,8 @@ def get_target(self, config: BenchmarkConfig, end_time: int):
172172
target_s = end_time - data_range.comparison_timedelta_s()
173173
target_e = end_time
174174
self.log_info(
175-
f"get baseline data for time range [{format_ts_with_t(target_s)},{format_ts_with_t(target_e)}]"
175+
"getting target data for time range "
176+
f"[{format_ts_with_t(target_s)},{format_ts_with_t(target_e)}] ..."
176177
)
177178
target_data = self._fetch_from_benchmark_ts_api(
178179
config_id=config.id,
@@ -181,7 +182,7 @@ def get_target(self, config: BenchmarkConfig, end_time: int):
181182
source=config.source,
182183
)
183184
self.log_info(
184-
f"found {len(target_data.time_series)} # of data, with time range {target_data.time_range}",
185+
f"done. found {len(target_data.time_series)} # of data groups, with time range {target_data.time_range}",
185186
)
186187
if not target_data.time_range or not target_data.time_range.end:
187188
return None, target_s, target_e
@@ -196,7 +197,8 @@ def get_baseline(self, config: BenchmarkConfig, end_time: int):
196197
baseline_s = end_time - data_range.total_timedelta_s()
197198
baseline_e = end_time - data_range.comparison_timedelta_s()
198199
self.log_info(
199-
f"get baseline data for time range [{format_ts_with_t(baseline_s)},{format_ts_with_t(baseline_e)}]"
200+
"getting baseline data for time range "
201+
f"[{format_ts_with_t(baseline_s)},{format_ts_with_t(baseline_e)}] ..."
200202
)
201203
# fetch baseline from api
202204
raw_data = self._fetch_from_benchmark_ts_api(
@@ -207,11 +209,7 @@ def get_baseline(self, config: BenchmarkConfig, end_time: int):
207209
)
208210

209211
self.log_info(
210-
f"get baseline data for time range [{format_ts_with_t(baseline_s)},{format_ts_with_t(baseline_e)}]"
211-
)
212-
213-
self.log_info(
214-
f"found {len(raw_data.time_series)} # of data, with time range {raw_data.time_range}",
212+
f"Done. found {len(raw_data.time_series)} # of data, with time range {raw_data.time_range}",
215213
)
216214

217215
baseline_latest_ts = int(isoparse(raw_data.time_range.end).timestamp())
@@ -269,11 +267,8 @@ def _fetch_from_benchmark_ts_api(
269267
)
270268

271269
elapsed_ms = (time.perf_counter() - t0) * 1000.0
272-
logger.info(
273-
"[%s] call OK in %.1f ms (query_len=%d)",
274-
config_id,
275-
elapsed_ms,
276-
len(query),
270+
self.log_info(
271+
f"call OK in {elapsed_ms} ms (query_len={len(query)})",
277272
)
278273
return resp.data
279274
except requests.exceptions.HTTPError as e:
@@ -290,7 +285,7 @@ def _fetch_from_benchmark_ts_api(
290285
else str(e)
291286
)
292287
self.log_error(
293-
f"[{config_id}] call FAILED in {elapsed_ms} ms: {err_msg}",
288+
f"call FAILED in {elapsed_ms} ms: {err_msg}",
294289
)
295290
raise
296291

@@ -348,6 +343,12 @@ def _get_latest_record_ts(
348343
f"time_boundary({format_ts_with_t(time_boundary)})"
349344
f"based on latest_record_ts({format_ts_with_t(latest_record_ts)})",
350345
)
346+
# dry_run is True, is_pass_check is True, then we allow to generate report even the time check is not met
347+
if self.is_dry_run and self.is_pass_check:
348+
should_generate = True
349+
self.log_info(
350+
f"[{f.get_text()}] dry_run is True, is_pass_check is True, force generate report for print only",
351+
)
351352
return should_generate
352353

353354

@@ -357,7 +358,12 @@ def main(
357358
args: Optional[argparse.Namespace] = None,
358359
*,
359360
is_dry_run: bool = False,
361+
is_forced: bool = False,
360362
):
363+
if not is_dry_run and is_forced:
364+
is_forced = False
365+
logger.info("is_dry_run is False, force must be disabled, this is not allowed")
366+
361367
if not github_access_token:
362368
raise ValueError("Missing environment variable GITHUB_TOKEN")
363369

@@ -378,7 +384,10 @@ def main(
378384
# caution, raise exception may lead lambda to retry
379385
try:
380386
processor = BenchmarkSummaryProcessor(
381-
config_id=config_id, end_time=end_time_ts, is_dry_run=is_dry_run
387+
config_id=config_id,
388+
end_time=end_time_ts,
389+
is_dry_run=is_dry_run,
390+
is_pass_check=is_forced,
382391
)
383392
processor.process(args=args)
384393
except Exception as e:
@@ -419,6 +428,12 @@ def parse_args() -> argparse.Namespace:
419428
action="store_false",
420429
help="Disable dry-run mode",
421430
)
431+
parser.add_argument(
432+
"--force",
433+
dest="force",
434+
action="store_true",
435+
help="Enable force mode, this only allowed when dry-run is enabled",
436+
)
422437
parser.add_argument(
423438
"--config-id",
424439
type=str,
@@ -466,6 +481,7 @@ def local_run() -> None:
466481
github_access_token=args.github_access_token,
467482
args=args,
468483
is_dry_run=args.dry_run,
484+
is_forced=args.force,
469485
)
470486

471487

0 commit comments

Comments
 (0)