add github notification

yangw-dev · yangw-dev · commit 386d14dc60e3 · 2025-09-05T23:30:07.000-07:00
ghstack-source-id: 8624785 Pull-Request: #7096
diff --git a/aws/lambda/benchmark_regression_summary_report/common/benchmark_time_series_api_model.py b/aws/lambda/benchmark_regression_summary_report/common/benchmark_time_series_api_model.py
@@ -1,6 +1,5 @@
-import datetime as dt
 from dataclasses import dataclass, field
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List
 
 import requests
 
@@ -63,24 +62,3 @@ def from_request(
         except Exception as e:
             raise RuntimeError(f"Malformed API payload: {e}")
         return cls(data=BenchmarkTimeSeriesApiData(time_series=ts, time_range=tr))
-
-
-def get_latest_meta_info(
-    time_series: List[BenchmarkTimeSeriesItem],
-) -> Optional[dict[str, Any]]:
-    if not time_series:
-        return None
-
-    pts = [p for s in time_series for p in s.data]
-    latest = max(
-        pts,
-        key=lambda p: dt.datetime.fromisoformat(
-            p["granularity_bucket"].replace("Z", "+00:00")
-        ),
-    )
-    return {
-        "commit": latest.get("commit", ""),
-        "branch": latest.get("branch", ""),
-        "timestamp": latest.get("granularity_bucket", ""),
-        "workflow_id": latest.get("workflow_id", ""),
-    }
diff --git a/aws/lambda/benchmark_regression_summary_report/common/config.py b/aws/lambda/benchmark_regression_summary_report/common/config.py
@@ -15,7 +15,6 @@
 # their own benchmark regression config, currenlty place
 # here for lambda
 
-
 COMPILER_BENCHMARK_CONFIG = BenchmarkConfig(
     name="Compiler Benchmark Regression",
     id="compiler_regression",
@@ -44,6 +43,9 @@
                 }
                 """,
     ),
+    hud_info={
+        "url": "https://hud.pytorch.org/benchmark/compilers",
+    },
     # set baseline from past 7 days using avg, and compare with the last 1 day
     policy=Policy(
         frequency=Frequency(value=1, unit="days"),
@@ -67,7 +69,7 @@
             "compression_ratio": RegressionPolicy(
                 name="compression_ratio",
                 condition="greater_equal",
-                threshold=0.9,
+                threshold=0.95,
                 baseline_aggregation="max",
             ),
         },
diff --git a/aws/lambda/benchmark_regression_summary_report/common/config_model.py b/aws/lambda/benchmark_regression_summary_report/common/config_model.py
@@ -241,6 +241,7 @@ class BenchmarkConfig:
     id: str
     source: BenchmarkApiSource
     policy: Policy
+    hud_info: Optional[dict[str, Any]] = None
 
 
 @dataclass
diff --git a/aws/lambda/benchmark_regression_summary_report/common/report_manager.py b/aws/lambda/benchmark_regression_summary_report/common/report_manager.py
@@ -12,9 +12,63 @@
     get_regression_status,
     PerGroupResult,
 )
+from jinja2 import Template
 
 
 logger = logging.getLogger()
+REPORT_MD_TEMPLATE = """# Benchmark Report {{ id }}
+config_id: `{{ report_id }}`
+
+We have detected **{{ status }}** in benchmark results for `{{ report_id }}` (id: `{{ id }}`).
+(HUD benchmark regression page coming soon...)
+
+> **Status:** {{ status }} · **Frequency:** {{ frequency }}
+
+## Summary
+| Metric | Value |
+| :-- | --: |
+| Total | {{ summary.total_count | default(0) }} |
+| Regressions | {{ summary.regression_count | default(0) }} |
+| Suspicious | {{ summary.suspicious_count | default(0) }} |
+| No Regression | {{ summary.no_regression_count | default(0) }} |
+| Insufficient Data | {{ summary.insufficient_data_count | default(0) }} |
+
+## Data Windows
+Baseline is a single reference value (e.g., mean, max, min, latest) aggregated from the previous few days,
+used to detect regressions by comparing against metric values in the target window.
+
+### Baseline window (used to calculate baseline value)
+- **Start:** `{{ baseline.start.timestamp | default('') }}` (commit: `{{ baseline.start.commit | default('') }}`)
+- **End:** `{{ baseline.end.timestamp   | default('') }}` (commit: `{{ baseline.end.commit   | default('') }}`)
+
+### Target window (used to compare against baseline value)
+- **Start:** `{{ target.start.timestamp | default('') }}` (commit: `{{ target.start.commit | default('') }}`)
+- **End:** `{{ target.end.timestamp   | default('') }}` (commit: `{{ target.end.commit   | default('') }}`)
+
+{% if regression_items and regression_items|length > 0 %}
+## Regression Glance
+{% if url %}
+Use items below in [HUD]({{ url }}) to see regression.
+{% endif %}
+
+{% set items = regression_items if regression_items|length <= 10 else regression_items[:10] %}
+{% if regression_items|length > 10 %}
+… (showing first 10 only, total {{ regression_items|length }} regressions)
+{% endif %}
+{% for item in items %}
+{% set kv = item.group_info|dictsort %}
+{{ "" }}|{% for k, _ in kv %}{{ k }} |{% endfor %}{{ "\n" -}}
+|{% for _k, _ in kv %}---|{% endfor %}{{ "\n" -}}
+|{% for _k, v in kv %}{{ v }} |{% endfor %}{{ "\n\n" -}}
+{% if item.baseline_point -%}
+- **baseline**: {{ item.baseline_point.value}},
+- **startTime**: {{ item.baseline_point.timestamp }}, **endTime**: {{ target.end.timestamp }}
+- **lcommit**: `{{ item.baseline_point.commit }}`, **rcommit**: `{{ target.end.commit }}`
+{{ "\n" }}
+{%- endif %}
+{% endfor %}
+{% endif %}
+"""
 
 
 class ReportManager:
@@ -68,6 +122,57 @@ def run(
         except Exception as e:
             logger.error(f"failed to insert report to db, error: {e}")
             raise
+        self.notify_github_comment(github_token)
+
+    def notify_github_comment(self, github_token: str):
+        if self.status != "regression":
+            logger.info(
+                "[%s] no regression found, skip notification",
+                self.config_id,
+            )
+            return
+
+        github_notification = self.config.policy.get_github_notification_config()
+        if not github_notification:
+            logger.info(
+                "[%s] no github notification config found, skip notification",
+                self.config_id,
+            )
+            return
+        logger.info("[%s] prepareing gitub comment content", self.config_id)
+        content = self._to_markdoown()
+        if self.is_dry_run:
+            logger.info(
+                "[%s]dry run, skip sending comment to github, report(%s)",
+                self.config_id,
+                self.id,
+            )
+            logger.info("[dry run] printing comment content")
+            print(json.dumps(content, indent=2, default=str))
+            logger.info("[dry run] Done! Finish printing comment content")
+            return
+        logger.info("[%s] create comment to github issue", self.config_id)
+        github_notification.create_github_comment(content, github_token)
+        logger.info("[%s] done. comment is sent to github", self.config_id)
+
+    def _to_markdoown(self):
+        self.regression_items = self._collect_regression_items()
+        url = ""
+        if self.config.hud_info:
+            url = self.config.hud_info.get("url", "")
+
+        md = Template(REPORT_MD_TEMPLATE, trim_blocks=True, lstrip_blocks=True).render(
+            id=self.id,
+            url=url,
+            status=self.status,
+            report_id=self.config_id,
+            summary=self.report["summary"],
+            baseline=self.baseline,
+            target=self.target,
+            frequency=self.config.policy.frequency.get_text(),
+            regression_items=self.regression_items,
+        )
+        return md
 
     def _collect_regression_items(self) -> list[PerGroupResult]:
         items = []
@@ -120,11 +225,30 @@ def insert_to_db(
             "repo": self.repo,
             "report_json": report_json,
         }
+
+        if self.is_dry_run:
+            logger.info(
+                "[%s]dry run, skip inserting report to db, report(%s)",
+                self.config_id,
+                self.id,
+            )
+            logger.info("[dry run] printing db params data")
+            if self.is_dry_run:
+                print(json.dumps(params, indent=2, default=str))
+            logger.info("[dry run] Done! Finish printing db params data")
+            return
         logger.info(
             "[%s]inserting benchmark regression report(%s)", self.config_id, self.id
         )
-        self._db_insert(cc, self.db_table_name, params)
-
+        try:
+            self._db_insert(cc, self.db_table_name, params)
+        except Exception:
+            logger.exception(
+                "[%s] failed to insert report to target table %s",
+                self.config_id,
+                self.db_table_name,
+            )
+            raise
         logger.info(
             "[%s] Done. inserted benchmark regression report(%s)",
             self.config_id,
@@ -136,14 +260,28 @@ def _db_insert(
         cc: clickhouse_connect.driver.Client,
         table: str,
         params: dict,
-    ) -> tuple[bool, int]:
+    ):
+        """
+        Insert one row into ClickHouse using cc.insert().
+        Returns (inserted, written_rows).
+        """
+        if self._row_exists(
+            cc,
+            table,
+            params["report_id"],
+            params["type"],
+            params["repo"],
+            params["last_record_ts"],
+        ):
+            return False, 0
+
         sql = f"""
             INSERT INTO {table} (
                 id,
                 report_id,
                 last_record_ts,
                 last_record_commit,
-                `type`,
+                type,
                 status,
                 regression_count,
                 insufficient_data_count,
@@ -152,49 +290,56 @@ def _db_insert(
                 repo,
                 report
             )
-            SELECT
-                {{id:UUID}},
-                {{report_id:String}},
-                {{last_record_ts:DateTime64(0)}},
-                {{last_record_commit:String}},
-                {{type:String}},
-                {{status:String}},
-                {{regression_count:UInt32}},
-                {{insufficient_data_count:UInt32}},
-                {{suspected_regression_count:UInt32}},
-                {{total_count:UInt32}},
-                {{repo:String}},
-                {{report_json:String}}
-            FROM system.one
-            WHERE NOT EXISTS (
-                SELECT 1
-                FROM {table}
-                WHERE report_id = {{report_id:String}}
-                AND `type`    = {{type:String}}
-                AND repo      = {{repo:String}}
-                AND stamp     = toDate({{last_record_ts:DateTime64(0)}})
+            VALUES
+            (
+                %(id)s,
+                %(report_id)s,
+                %(last_record_ts)s,
+                %(last_record_commit)s,
+                %(type)s,
+                %(status)s,
+                %(regression_count)s,
+                %(insufficient_data_count)s,
+                %(suspected_regression_count)s,
+                %(total_count)s,
+                %(repo)s,
+                %(report_json)s
             )
+            """
+        cc.command(sql, parameters=params)
+
+    def _row_exists(
+        self,
+        cc: clickhouse_connect.driver.Client,
+        table: str,
+        report_id: str,
+        type_str: str,
+        repo: str,
+        last_record_ts,
+    ) -> bool:
+        """
+        Check if a row already exists with the same (report_id, type, repo, stamp).
+        Returns True if found, False otherwise.
+        """
+        sql = f"""
+            SELECT 1
+            FROM {table}
+            WHERE report_id = %(report_id)s
+            AND type = %(type)s
+            AND repo = %(repo)s
+            AND stamp = toDate(%(last_record_ts)s)
             LIMIT 1
         """
-
-        res = cc.query(sql, parameters=params)
-        summary = getattr(res, "summary", {}) or {}
-
-        written_any = (
-            summary.get("written_rows")
-            or summary.get("rows_written")
-            or summary.get("written", 0)
-            or 0
+        res = cc.query(
+            sql,
+            parameters={
+                "report_id": report_id,
+                "type": type_str,
+                "repo": repo,
+                "last_record_ts": last_record_ts,
+            },
         )
-
-        logger.info("wrting to db summmary %s", summary)
-        try:
-            written = int(written_any)
-        except (TypeError, ValueError):
-            written = 0
-
-        inserted = written > 0
-        return inserted, written
+        return bool(res.result_rows)
 
     def _validate_latest_meta_info(
         self, latest_meta_info: Dict[str, Any]
diff --git a/aws/lambda/benchmark_regression_summary_report/lambda_function.py b/aws/lambda/benchmark_regression_summary_report/lambda_function.py