Skip to content

Commit 73a32b8

Browse files
authored
add github notification (#7096)
Stack from [ghstack](https://github.com/ezyang/ghstack) (oldest at bottom): * #7112 * __->__ #7096 * #7095 # Add github notification settings currently we only create github comment if and only a regression status is detected. later we can add if we want to add suspicious too, ## Prerequest currently user must: 1. create a github issue first, and put it in the Policy section to make this work 2. Each issue should be associated with a butterfly rule to link to internal workplace/ oncall emails ## later improvement as you see, currently if a regression does not resolved, it will send notification to github everyday. since we have those report in db, we can later do exponential notification based on previous report status
1 parent b02e16b commit 73a32b8

File tree

4 files changed

+154
-25
lines changed

4 files changed

+154
-25
lines changed

aws/lambda/benchmark_regression_summary_report/common/config.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
# their own benchmark regression config, currenlty place
1616
# here for lambda
1717

18-
1918
COMPILER_BENCHMARK_CONFIG = BenchmarkConfig(
2019
name="Compiler Benchmark Regression",
2120
id="compiler_regression",
@@ -44,6 +43,9 @@
4443
}
4544
""",
4645
),
46+
hud_info={
47+
"url": "https://hud.pytorch.org/benchmark/compilers",
48+
},
4749
# set baseline from past 7 days using avg, and compare with the last 1 day
4850
policy=Policy(
4951
frequency=Frequency(value=1, unit="days"),
@@ -67,7 +69,7 @@
6769
"compression_ratio": RegressionPolicy(
6870
name="compression_ratio",
6971
condition="greater_equal",
70-
threshold=0.9,
72+
threshold=0.95,
7173
baseline_aggregation="max",
7274
),
7375
},

aws/lambda/benchmark_regression_summary_report/common/config_model.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,7 @@ class BenchmarkConfig:
241241
id: str
242242
source: BenchmarkApiSource
243243
policy: Policy
244+
hud_info: Optional[dict[str, Any]] = None
244245

245246

246247
@dataclass

aws/lambda/benchmark_regression_summary_report/common/report_manager.py

Lines changed: 112 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,63 @@
1212
get_regression_status,
1313
PerGroupResult,
1414
)
15+
from jinja2 import Template
16+
17+
18+
logger = logging.getLogger()
19+
REPORT_MD_TEMPLATE = """# Benchmark Report {{ id }}
20+
config_id: `{{ report_id }}`
21+
22+
We have detected **{{ status }}** in benchmark results for `{{ report_id }}` (id: `{{ id }}`).
23+
(HUD benchmark regression page coming soon...)
24+
25+
> **Status:** {{ status }} · **Frequency:** {{ frequency }}
26+
27+
## Summary
28+
| Metric | Value |
29+
| :-- | --: |
30+
| Total | {{ summary.total_count | default(0) }} |
31+
| Regressions | {{ summary.regression_count | default(0) }} |
32+
| Suspicious | {{ summary.suspicious_count | default(0) }} |
33+
| No Regression | {{ summary.no_regression_count | default(0) }} |
34+
| Insufficient Data | {{ summary.insufficient_data_count | default(0) }} |
35+
36+
## Data Windows
37+
Baseline is a single reference value (e.g., mean, max, min, latest) aggregated from the previous few days,
38+
used to detect regressions by comparing against metric values in the target window.
39+
40+
### Baseline window (used to calculate baseline value)
41+
- **Start:** `{{ baseline.start.timestamp | default('') }}` (commit: `{{ baseline.start.commit | default('') }}`)
42+
- **End:** `{{ baseline.end.timestamp | default('') }}` (commit: `{{ baseline.end.commit | default('') }}`)
43+
44+
### Target window (used to compare against baseline value)
45+
- **Start:** `{{ target.start.timestamp | default('') }}` (commit: `{{ target.start.commit | default('') }}`)
46+
- **End:** `{{ target.end.timestamp | default('') }}` (commit: `{{ target.end.commit | default('') }}`)
47+
48+
{% if regression_items and regression_items|length > 0 %}
49+
## Regression Glance
50+
{% if url %}
51+
Use items below in [HUD]({{ url }}) to see regression.
52+
{% endif %}
53+
54+
{% set items = regression_items if regression_items|length <= 10 else regression_items[:10] %}
55+
{% if regression_items|length > 10 %}
56+
… (showing first 10 only, total {{ regression_items|length }} regressions)
57+
{% endif %}
58+
{% for item in items %}
59+
{% set kv = item.group_info|dictsort %}
60+
{{ "" }}|{% for k, _ in kv %}{{ k }} |{% endfor %}{{ "\n" -}}
61+
|{% for _k, _ in kv %}---|{% endfor %}{{ "\n" -}}
62+
|{% for _k, v in kv %}{{ v }} |{% endfor %}{{ "\n\n" -}}
63+
{% if item.baseline_point -%}
64+
- **baseline**: {{ item.baseline_point.value}},
65+
- **startTime**: {{ item.baseline_point.timestamp }}, **endTime**: {{ target.end.timestamp }}
66+
- **lcommit**: `{{ item.baseline_point.commit }}`, **rcommit**: `{{ target.end.commit }}`
67+
{{ "\n" }}
68+
{%- endif %}
69+
{% endfor %}
70+
{% endif %}
71+
"""
1572

1673

1774
logger = logging.getLogger()
@@ -64,10 +121,64 @@ def run(
64121
main method used to insert the report to db and create github comment in targeted issue
65122
"""
66123
try:
67-
self.insert_to_db(cc)
124+
applied_insertion = self.insert_to_db(cc)
68125
except Exception as e:
69126
logger.error(f"failed to insert report to db, error: {e}")
70127
raise
128+
if not applied_insertion:
129+
logger.info("[%s] skip notification, already exists in db", self.config_id)
130+
return
131+
self.notify_github_comment(github_token)
132+
133+
def notify_github_comment(self, github_token: str):
134+
if self.status != "regression":
135+
logger.info(
136+
"[%s] no regression found, skip notification",
137+
self.config_id,
138+
)
139+
return
140+
141+
github_notification = self.config.policy.get_github_notification_config()
142+
if not github_notification:
143+
logger.info(
144+
"[%s] no github notification config found, skip notification",
145+
self.config_id,
146+
)
147+
return
148+
logger.info("[%s] prepareing gitub comment content", self.config_id)
149+
content = self._to_markdoown()
150+
if self.is_dry_run:
151+
logger.info(
152+
"[%s]dry run, skip sending comment to github, report(%s)",
153+
self.config_id,
154+
self.id,
155+
)
156+
logger.info("[dry run] printing comment content")
157+
print(json.dumps(content, indent=2, default=str))
158+
logger.info("[dry run] Done! Finish printing comment content")
159+
return
160+
logger.info("[%s] create comment to github issue", self.config_id)
161+
github_notification.create_github_comment(content, github_token)
162+
logger.info("[%s] done. comment is sent to github", self.config_id)
163+
164+
def _to_markdoown(self):
165+
self.regression_items = self._collect_regression_items()
166+
url = ""
167+
if self.config.hud_info:
168+
url = self.config.hud_info.get("url", "")
169+
170+
md = Template(REPORT_MD_TEMPLATE, trim_blocks=True, lstrip_blocks=True).render(
171+
id=self.id,
172+
url=url,
173+
status=self.status,
174+
report_id=self.config_id,
175+
summary=self.report["summary"],
176+
baseline=self.baseline,
177+
target=self.target,
178+
frequency=self.config.policy.frequency.get_text(),
179+
regression_items=self.regression_items,
180+
)
181+
return md
71182

72183
def _collect_regression_items(self) -> list[PerGroupResult]:
73184
items = []

aws/lambda/benchmark_regression_summary_report/lambda_function.py

Lines changed: 37 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@
1818
from dateutil.parser import isoparse
1919

2020

21+
# TODO(elainewy): change this to benchmark.benchmark_regression_report once the table is created
2122
BENCHMARK_REGRESSION_REPORT_TABLE = "fortesting.benchmark_regression_report"
23+
BENCHMARK_REGRESSION_TRACKING_CONFIG_IDS = ["compiler_regression"]
2224

2325
logging.basicConfig(
2426
level=logging.INFO,
@@ -33,9 +35,6 @@
3335
"CLICKHOUSE_USERNAME": os.getenv("CLICKHOUSE_USERNAME", ""),
3436
}
3537

36-
# TODO(elainewy): change this to benchmark.benchmark_regression_report once the table is created
37-
BENCHMARK_REGRESSION_TRACKING_CONFIG_IDS = ["compiler_regression"]
38-
3938

4039
def format_ts_with_t(ts: int) -> str:
4140
return dt.datetime.fromtimestamp(ts, tz=dt.timezone.utc).strftime(
@@ -54,7 +53,6 @@ def get_clickhouse_client(
5453
return clickhouse_connect.get_client(
5554
host=host, user=user, password=password, secure=True, verify=False
5655
)
57-
5856
return clickhouse_connect.get_client(
5957
host=host, user=user, password=password, secure=True
6058
)
@@ -77,8 +75,10 @@ def __init__(
7775
config_id: str,
7876
end_time: int,
7977
is_dry_run: bool = False,
78+
is_pass_check: bool = False,
8079
) -> None:
8180
self.is_dry_run = is_dry_run
81+
self.is_pass_check = is_pass_check
8282
self.config_id = config_id
8383
self.end_time = end_time
8484

@@ -127,6 +127,7 @@ def process(
127127
should_generate = self._should_generate_report(
128128
cc, self.end_time, self.config_id, report_freq
129129
)
130+
130131
if not should_generate:
131132
self.log_info(
132133
"Skip generate report",
@@ -138,7 +139,6 @@ def process(
138139
f"with frequency {report_freq.get_text()}..."
139140
)
140141

141-
self.log_info("get target data")
142142
target, ls, le = self.get_target(config, self.end_time)
143143
if not target:
144144
self.log_info(
@@ -157,12 +157,11 @@ def process(
157157
regression_report = generator.generate()
158158
if self.is_dry_run:
159159
print(json.dumps(regression_report, indent=2, default=str))
160-
return
161-
162160
reportManager = ReportManager(
163161
config=config,
164162
regression_report=regression_report,
165163
db_table_name=BENCHMARK_REGRESSION_REPORT_TABLE,
164+
is_dry_run=self.is_dry_run,
166165
)
167166
reportManager.run(cc, ENVS["GITHUB_TOKEN"])
168167
return
@@ -172,7 +171,8 @@ def get_target(self, config: BenchmarkConfig, end_time: int):
172171
target_s = end_time - data_range.comparison_timedelta_s()
173172
target_e = end_time
174173
self.log_info(
175-
f"get baseline data for time range [{format_ts_with_t(target_s)},{format_ts_with_t(target_e)}]"
174+
"getting target data for time range "
175+
f"[{format_ts_with_t(target_s)},{format_ts_with_t(target_e)}] ..."
176176
)
177177
target_data = self._fetch_from_benchmark_ts_api(
178178
config_id=config.id,
@@ -181,7 +181,7 @@ def get_target(self, config: BenchmarkConfig, end_time: int):
181181
source=config.source,
182182
)
183183
self.log_info(
184-
f"found {len(target_data.time_series)} # of data, with time range {target_data.time_range}",
184+
f"done. found {len(target_data.time_series)} # of data groups, with time range {target_data.time_range}",
185185
)
186186
if not target_data.time_range or not target_data.time_range.end:
187187
return None, target_s, target_e
@@ -196,7 +196,8 @@ def get_baseline(self, config: BenchmarkConfig, end_time: int):
196196
baseline_s = end_time - data_range.total_timedelta_s()
197197
baseline_e = end_time - data_range.comparison_timedelta_s()
198198
self.log_info(
199-
f"get baseline data for time range [{format_ts_with_t(baseline_s)},{format_ts_with_t(baseline_e)}]"
199+
"getting baseline data for time range "
200+
f"[{format_ts_with_t(baseline_s)},{format_ts_with_t(baseline_e)}] ..."
200201
)
201202
# fetch baseline from api
202203
raw_data = self._fetch_from_benchmark_ts_api(
@@ -207,11 +208,7 @@ def get_baseline(self, config: BenchmarkConfig, end_time: int):
207208
)
208209

209210
self.log_info(
210-
f"get baseline data for time range [{format_ts_with_t(baseline_s)},{format_ts_with_t(baseline_e)}]"
211-
)
212-
213-
self.log_info(
214-
f"found {len(raw_data.time_series)} # of data, with time range {raw_data.time_range}",
211+
f"Done. found {len(raw_data.time_series)} # of data, with time range {raw_data.time_range}",
215212
)
216213

217214
baseline_latest_ts = int(isoparse(raw_data.time_range.end).timestamp())
@@ -269,11 +266,8 @@ def _fetch_from_benchmark_ts_api(
269266
)
270267

271268
elapsed_ms = (time.perf_counter() - t0) * 1000.0
272-
logger.info(
273-
"[%s] call OK in %.1f ms (query_len=%d)",
274-
config_id,
275-
elapsed_ms,
276-
len(query),
269+
self.log_info(
270+
f"call OK in {elapsed_ms} ms (query_len={len(query)})",
277271
)
278272
return resp.data
279273
except requests.exceptions.HTTPError as e:
@@ -290,7 +284,7 @@ def _fetch_from_benchmark_ts_api(
290284
else str(e)
291285
)
292286
self.log_error(
293-
f"[{config_id}] call FAILED in {elapsed_ms} ms: {err_msg}",
287+
f"call FAILED in {elapsed_ms} ms: {err_msg}",
294288
)
295289
raise
296290

@@ -348,6 +342,12 @@ def _get_latest_record_ts(
348342
f"time_boundary({format_ts_with_t(time_boundary)})"
349343
f"based on latest_record_ts({format_ts_with_t(latest_record_ts)})",
350344
)
345+
# dry_run is True, is_pass_check is True, then we allow to generate report even the time check is not met
346+
if self.is_dry_run and self.is_pass_check:
347+
should_generate = True
348+
self.log_info(
349+
f"[{f.get_text()}] dry_run is True, is_pass_check is True, force generate report for print only",
350+
)
351351
return should_generate
352352

353353

@@ -357,7 +357,12 @@ def main(
357357
args: Optional[argparse.Namespace] = None,
358358
*,
359359
is_dry_run: bool = False,
360+
is_forced: bool = False,
360361
):
362+
if not is_dry_run and is_forced:
363+
is_forced = False
364+
logger.info("is_dry_run is False, force must be disabled, this is not allowed")
365+
361366
if not github_access_token:
362367
raise ValueError("Missing environment variable GITHUB_TOKEN")
363368

@@ -378,7 +383,10 @@ def main(
378383
# caution, raise exception may lead lambda to retry
379384
try:
380385
processor = BenchmarkSummaryProcessor(
381-
config_id=config_id, end_time=end_time_ts, is_dry_run=is_dry_run
386+
config_id=config_id,
387+
end_time=end_time_ts,
388+
is_dry_run=is_dry_run,
389+
is_pass_check=is_forced,
382390
)
383391
processor.process(args=args)
384392
except Exception as e:
@@ -419,6 +427,12 @@ def parse_args() -> argparse.Namespace:
419427
action="store_false",
420428
help="Disable dry-run mode",
421429
)
430+
parser.add_argument(
431+
"--force",
432+
dest="force",
433+
action="store_true",
434+
help="Enable force mode, this only allowed when dry-run is enabled",
435+
)
422436
parser.add_argument(
423437
"--config-id",
424438
type=str,
@@ -466,6 +480,7 @@ def local_run() -> None:
466480
github_access_token=args.github_access_token,
467481
args=args,
468482
is_dry_run=args.dry_run,
483+
is_forced=args.force,
469484
)
470485

471486

0 commit comments

Comments
 (0)