Skip to content

Commit 6aa3676

Browse files
committed
add github notification
ghstack-source-id: 87b959c Pull-Request: #7096
1 parent f5ae5ab commit 6aa3676

File tree

5 files changed

+111
-43
lines changed

5 files changed

+111
-43
lines changed

aws/lambda/benchmark_regression_summary_report/common/benchmark_time_series_api_model.py

Lines changed: 1 addition & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
1-
import datetime as dt
21
from dataclasses import dataclass, field
3-
from typing import Any, Dict, List, Optional
2+
from typing import Any, Dict, List
43

54
import requests
65

@@ -63,24 +62,3 @@ def from_request(
6362
except Exception as e:
6463
raise RuntimeError(f"Malformed API payload: {e}")
6564
return cls(data=BenchmarkTimeSeriesApiData(time_series=ts, time_range=tr))
66-
67-
68-
def get_latest_meta_info(
69-
time_series: List[BenchmarkTimeSeriesItem],
70-
) -> Optional[dict[str, Any]]:
71-
if not time_series:
72-
return None
73-
74-
pts = [p for s in time_series for p in s.data]
75-
latest = max(
76-
pts,
77-
key=lambda p: dt.datetime.fromisoformat(
78-
p["granularity_bucket"].replace("Z", "+00:00")
79-
),
80-
)
81-
return {
82-
"commit": latest.get("commit", ""),
83-
"branch": latest.get("branch", ""),
84-
"timestamp": latest.get("granularity_bucket", ""),
85-
"workflow_id": latest.get("workflow_id", ""),
86-
}

aws/lambda/benchmark_regression_summary_report/common/config.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
# their own benchmark regression config, currenlty place
1616
# here for lambda
1717

18-
1918
COMPILER_BENCHMARK_CONFIG = BenchmarkConfig(
2019
name="Compiler Benchmark Regression",
2120
id="compiler_regression",
@@ -44,6 +43,9 @@
4443
}
4544
""",
4645
),
46+
hud_info={
47+
"url": "https://hud.pytorch.org/benchmark/compilers",
48+
},
4749
# set baseline from past 7 days using avg, and compare with the last 1 day
4850
policy=Policy(
4951
frequency=Frequency(value=1, unit="days"),
@@ -67,7 +69,7 @@
6769
"compression_ratio": RegressionPolicy(
6870
name="compression_ratio",
6971
condition="greater_equal",
70-
threshold=0.9,
72+
threshold=0.95,
7173
baseline_aggregation="max",
7274
),
7375
},

aws/lambda/benchmark_regression_summary_report/common/config_model.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,7 @@ class BenchmarkConfig:
241241
id: str
242242
source: BenchmarkApiSource
243243
policy: Policy
244+
hud_info: Optional[dict[str, Any]] = None
244245

245246

246247
@dataclass

aws/lambda/benchmark_regression_summary_report/common/report_manager.py

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,62 @@
1212
get_regression_status,
1313
PerGroupResult,
1414
)
15+
from jinja2 import Template
1516

1617

1718
logger = logging.getLogger()
19+
REPORT_MD_TEMPLATE = """# Benchmark Report {{ id }}
20+
config_id: `{{ report_id }}`
21+
22+
We have detected **{{ status }}** in benchmark results for `{{ report_id }}` (id: `{{ id }}`).
23+
Full report details will be available in HUD soon)
24+
25+
> **Status:** {{ status }} · **Frequency:** {{ frequency }}
26+
27+
## Summary
28+
| Metric | Value |
29+
| :-- | --: |
30+
| Total | {{ summary.total_count | default(0) }} |
31+
| Regressions | {{ summary.regression_count | default(0) }} |
32+
| Suspicious | {{ summary.suspicious_count | default(0) }} |
33+
| No Regression | {{ summary.no_regression_count | default(0) }} |
34+
| Insufficient Data | {{ summary.insufficient_data_count | default(0) }} |
35+
36+
## Data Windows
37+
baseline is the data we aggregated (max, min, earliest, latest) as measurement to decide whether metric values
38+
in target are considered regressions.
39+
40+
### Baseline window
41+
- **Start:** `{{ baseline.start.timestamp | default('') }}` (commit: `{{ baseline.start.commit | default('') }}`)
42+
- **End:** `{{ baseline.end.timestamp | default('') }}` (commit: `{{ baseline.end.commit | default('') }}`)
43+
44+
### Target window
45+
- **Start:** `{{ target.start.timestamp | default('') }}` (commit: `{{ target.start.commit | default('') }}`)
46+
- **End:** `{{ target.end.timestamp | default('') }}` (commit: `{{ target.end.commit | default('') }}`)
47+
48+
{% if regression_items and regression_items|length > 0 %}
49+
## Regression Glance
50+
{% if url %}
51+
Use items below in [HUD]({{ url }}) to see regression.
52+
{% endif %}
53+
54+
{% set items = regression_items if regression_items|length <= 10 else regression_items[:10] %}
55+
{% if regression_items|length > 10 %}
56+
… (showing first 10 only, total {{ regression_items|length }} regressions)
57+
{% endif %}
58+
{% for item in items %}
59+
{% set kv = item.group_info|dictsort %}
60+
{{ "" }}|{% for k, _ in kv %}{{ k }} |{% endfor %}{{ "\n" -}}
61+
|{% for _k, _ in kv %}---|{% endfor %}{{ "\n" -}}
62+
|{% for _k, v in kv %}{{ v }} |{% endfor %}{{ "\n\n" -}}
63+
{% if item.baseline_point -%}
64+
- **startTime**: {{ item.baseline_point.timestamp }}, **endTime**: {{ target.end.timestamp }}
65+
- **lcommit**: `{{ item.baseline_point.commit }}`, **rcommit**: `{{ target.end.commit }}`
66+
{{ "\n" }}
67+
{%- endif %}
68+
{% endfor %}
69+
{% endif %}
70+
"""
1871

1972

2073
class ReportManager:
@@ -68,6 +121,47 @@ def run(
68121
except Exception as e:
69122
logger.error(f"failed to insert report to db, error: {e}")
70123
raise
124+
self.notify_github_comment(github_token)
125+
126+
def notify_github_comment(self, github_token: str):
127+
if self.status != "regression":
128+
logger.info(
129+
"[%s] no regression found, skip notification",
130+
self.config_id,
131+
)
132+
return
133+
134+
github_notification = self.config.policy.get_github_notification_config()
135+
if not github_notification:
136+
logger.info(
137+
"[%s] no github notification config found, skip notification",
138+
self.config_id,
139+
)
140+
return
141+
logger.info("[%s] prepareing content", self.config_id)
142+
content = self._to_markdoown()
143+
logger.info("[%s] create comment to github issue", self.config_id)
144+
github_notification.create_github_comment(content, github_token)
145+
logger.info("[%s] done. comment is sent to github", self.config_id)
146+
147+
def _to_markdoown(self):
148+
self.regression_items = self._collect_regression_items()
149+
url = ""
150+
if self.config.hud_info:
151+
url = self.config.hud_info.get("url", "")
152+
153+
md = Template(REPORT_MD_TEMPLATE, trim_blocks=True, lstrip_blocks=True).render(
154+
id=self.id,
155+
url=url,
156+
status=self.status,
157+
report_id=self.config_id,
158+
summary=self.report["summary"],
159+
baseline=self.baseline,
160+
target=self.target,
161+
frequency=self.config.policy.frequency.get_text(),
162+
regression_items=self.regression_items,
163+
)
164+
return md
71165

72166
def _collect_regression_items(self) -> list[PerGroupResult]:
73167
items = []

aws/lambda/benchmark_regression_summary_report/lambda_function.py

Lines changed: 11 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@
1818
from dateutil.parser import isoparse
1919

2020

21+
# TODO(elainewy): change this to benchmark.benchmark_regression_report once the table is created
2122
BENCHMARK_REGRESSION_REPORT_TABLE = "fortesting.benchmark_regression_report"
23+
BENCHMARK_REGRESSION_TRACKING_CONFIG_IDS = ["compiler_regression"]
2224

2325
logging.basicConfig(
2426
level=logging.INFO,
@@ -33,9 +35,6 @@
3335
"CLICKHOUSE_USERNAME": os.getenv("CLICKHOUSE_USERNAME", ""),
3436
}
3537

36-
# TODO(elainewy): change this to benchmark.benchmark_regression_report once the table is created
37-
BENCHMARK_REGRESSION_TRACKING_CONFIG_IDS = ["compiler_regression"]
38-
3938

4039
def format_ts_with_t(ts: int) -> str:
4140
return dt.datetime.fromtimestamp(ts, tz=dt.timezone.utc).strftime(
@@ -138,7 +137,6 @@ def process(
138137
f"with frequency {report_freq.get_text()}..."
139138
)
140139

141-
self.log_info("get target data")
142140
target, ls, le = self.get_target(config, self.end_time)
143141
if not target:
144142
self.log_info(
@@ -172,7 +170,8 @@ def get_target(self, config: BenchmarkConfig, end_time: int):
172170
target_s = end_time - data_range.comparison_timedelta_s()
173171
target_e = end_time
174172
self.log_info(
175-
f"get baseline data for time range [{format_ts_with_t(target_s)},{format_ts_with_t(target_e)}]"
173+
"getting target data for time range "
174+
f"[{format_ts_with_t(target_s)},{format_ts_with_t(target_e)}] ..."
176175
)
177176
target_data = self._fetch_from_benchmark_ts_api(
178177
config_id=config.id,
@@ -181,7 +180,7 @@ def get_target(self, config: BenchmarkConfig, end_time: int):
181180
source=config.source,
182181
)
183182
self.log_info(
184-
f"found {len(target_data.time_series)} # of data, with time range {target_data.time_range}",
183+
f"done. found {len(target_data.time_series)} # of data groups, with time range {target_data.time_range}",
185184
)
186185
if not target_data.time_range or not target_data.time_range.end:
187186
return None, target_s, target_e
@@ -196,7 +195,8 @@ def get_baseline(self, config: BenchmarkConfig, end_time: int):
196195
baseline_s = end_time - data_range.total_timedelta_s()
197196
baseline_e = end_time - data_range.comparison_timedelta_s()
198197
self.log_info(
199-
f"get baseline data for time range [{format_ts_with_t(baseline_s)},{format_ts_with_t(baseline_e)}]"
198+
"getting baseline data for time range "
199+
f"[{format_ts_with_t(baseline_s)},{format_ts_with_t(baseline_e)}] ..."
200200
)
201201
# fetch baseline from api
202202
raw_data = self._fetch_from_benchmark_ts_api(
@@ -207,11 +207,7 @@ def get_baseline(self, config: BenchmarkConfig, end_time: int):
207207
)
208208

209209
self.log_info(
210-
f"get baseline data for time range [{format_ts_with_t(baseline_s)},{format_ts_with_t(baseline_e)}]"
211-
)
212-
213-
self.log_info(
214-
f"found {len(raw_data.time_series)} # of data, with time range {raw_data.time_range}",
210+
f"Done. found {len(raw_data.time_series)} # of data, with time range {raw_data.time_range}",
215211
)
216212

217213
baseline_latest_ts = int(isoparse(raw_data.time_range.end).timestamp())
@@ -269,11 +265,8 @@ def _fetch_from_benchmark_ts_api(
269265
)
270266

271267
elapsed_ms = (time.perf_counter() - t0) * 1000.0
272-
logger.info(
273-
"[%s] call OK in %.1f ms (query_len=%d)",
274-
config_id,
275-
elapsed_ms,
276-
len(query),
268+
self.log_info(
269+
f"call OK in {elapsed_ms} ms (query_len={len(query)})",
277270
)
278271
return resp.data
279272
except requests.exceptions.HTTPError as e:
@@ -290,7 +283,7 @@ def _fetch_from_benchmark_ts_api(
290283
else str(e)
291284
)
292285
self.log_error(
293-
f"[{config_id}] call FAILED in {elapsed_ms} ms: {err_msg}",
286+
f"call FAILED in {elapsed_ms} ms: {err_msg}",
294287
)
295288
raise
296289

0 commit comments

Comments
 (0)