itdojp
diff --git a/‎.github/workflows/ci-extended.yml‎
Lines changed: 168 additions & 0 deletions b/‎.github/workflows/ci-extended.yml‎
Lines changed: 168 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 2 additions & 0 deletions b/‎.gitignore‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎docs/ci-policy.md‎
Lines changed: 6 additions & 0 deletions b/‎docs/ci-policy.md‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎docs/ci/heavy-test-album.md‎
Lines changed: 39 additions & 0 deletions b/‎docs/ci/heavy-test-album.md‎
Lines changed: 39 additions & 0 deletions
diff --git a/‎docs/ci/heavy-test-alerts.md‎
Lines changed: 67 additions & 0 deletions b/‎docs/ci/heavy-test-alerts.md‎
Lines changed: 67 additions & 0 deletions
@@ -13,6 +13,11 @@ permissions: read-all
 
 jobs:
   extended:
+    permissions:
+      contents: read
+      issues: write
+    env:
+      SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
     runs-on: ubuntu-latest
     if: ${{ github.event_name != 'pull_request' || (github.event_name == 'pull_request' && !github.event.pull_request.head.repo.fork) }}
     steps:
@@ -100,6 +105,44 @@ jobs:
         if: ${{ steps.flags.outputs.should_run == 'true' }}
         run: pnpm install --frozen-lockfile || pnpm install --no-frozen-lockfile
 
+      - name: Determine heavy test cache key
+        if: ${{ steps.flags.outputs.should_run == 'true' }}
+        id: heavy-cache-key
+        shell: bash
+        run: |
+          set -euo pipefail
+          prefix="ci-heavy-${{ runner.os }}"
+          if [ "${{ github.event_name }}" = "schedule" ]; then
+            key="${prefix}-schedule"
+            restore_keys="${prefix}-schedule"$'\n'"${prefix}-"
+          else
+            key="${prefix}-${GITHUB_SHA}"
+            restore_keys="${prefix}-"
+          fi
+          {
+            echo "key=${key}"
+            echo "restore_keys<<__RESTORE__"
+            printf '%s\n' "${restore_keys}"
+            echo "__RESTORE__"
+          } >> "$GITHUB_OUTPUT"
+
+      - name: Restore heavy test cache
+        if: ${{ steps.flags.outputs.should_run == 'true' }}
+        id: restore-heavy
+        uses: actions/cache/restore@v4
+        with:
+          path: .cache/test-results
+          key: ${{ steps.heavy-cache-key.outputs.key }}
+          restore-keys: ${{ steps.heavy-cache-key.outputs.restore_keys }}
+
+      - name: Rehydrate cached test artifacts
+        if: ${{ steps.flags.outputs.should_run == 'true' && steps.restore-heavy.outputs.cache-hit == 'true' }}
+        run: node scripts/pipelines/sync-test-results.mjs --restore
+
+      - name: Snapshot heavy test baseline
+        if: ${{ steps.flags.outputs.should_run == 'true' }}
+        run: node scripts/pipelines/sync-test-results.mjs --snapshot
+
       - name: Run integration tests
         if: ${{ steps.flags.outputs.run_integration == 'true' }}
         run: pnpm run test:int
@@ -247,3 +290,128 @@ jobs:
             echo "- run_mbt: ${RUN_MBT}"
             echo "- run_mutation: ${RUN_MUTATION}"
           } >> "$GITHUB_STEP_SUMMARY"
+
+      - name: Compare heavy test trends
+        if: ${{ always() && steps.flags.outputs.should_run == 'true' }}
+        run: node scripts/pipelines/compare-test-trends.mjs
+
+      - name: Archive heavy test trend history
+        if: ${{ always() && steps.flags.outputs.should_run == 'true' && github.event_name == 'schedule' }}
+        shell: bash
+        run: |
+          set -euo pipefail
+          if [ ! -f reports/heavy-test-trends.json ]; then
+            echo "Trend report not generated; skipping archive."
+            exit 0
+          fi
+          mkdir -p reports/heavy-test-trends-history
+          timestamp="$(date -u +'%Y-%m-%dT%H-%M-%SZ')"
+          cp reports/heavy-test-trends.json "reports/heavy-test-trends-history/${timestamp}.json"
+          printf 'Archived heavy test trend snapshot: %s\n' "${timestamp}"
+
+      - name: Render heavy test trend summary
+        if: ${{ always() && steps.flags.outputs.should_run == 'true' && github.event_name == 'schedule' }}
+        id: heavy-summary
+        shell: bash
+        run: |
+          set -euo pipefail
+          node scripts/pipelines/render-heavy-trend-summary.mjs \
+            --limit 5 \
+            --json-output reports/heavy-test-trends-history/summary.json \
+            --warn-mutation-score 98 \
+            --critical-mutation-score 96 \
+            --warn-mutation-delta -1.0 \
+            --critical-mutation-delta -2.5 \
+            --warn-property-failed 1 \
+            --critical-property-failed 3 \
+            --warn-property-failure-rate 0.1 \
+            --warn-mbt-violations 1 \
+            --critical-mbt-violations 3
+          severity=$(jq -r '.highestSeverity' reports/heavy-test-trends-history/summary.json)
+          echo "severity=${severity}" >> "$GITHUB_OUTPUT"
+
+      - name: Upload heavy test trend report
+        if: ${{ always() && steps.flags.outputs.should_run == 'true' }}
+        uses: actions/upload-artifact@v4
+        with:
+          name: heavy-test-trends
+          path: reports/heavy-test-trends.json
+          if-no-files-found: ignore
+          retention-days: 14
+
+      - name: Upload heavy test trend history
+        if: ${{ always() && steps.flags.outputs.should_run == 'true' && github.event_name == 'schedule' }}
+        uses: actions/upload-artifact@v4
+        with:
+          name: heavy-test-trends-history
+          path: reports/heavy-test-trends-history
+          if-no-files-found: ignore
+          retention-days: 30
+
+      - name: Notify Slack (heavy trend alert)
+        if: ${{ always() && steps.flags.outputs.should_run == 'true' && github.event_name == 'schedule' && steps.heavy-summary.outputs.severity != 'ok' && env.SLACK_WEBHOOK_URL != '' }}
+        uses: rtCamp/action-slack-notify@v2
+        env:
+          SLACK_WEBHOOK: ${{ env.SLACK_WEBHOOK_URL }}
+          SLACK_COLOR: ${{ steps.heavy-summary.outputs.severity == 'critical' && '#E01E5A' || '#FFC107' }}
+          SLACK_MESSAGE: |
+            ${{ steps.heavy-summary.outputs.severity == 'critical' && ':rotating_light:' || ':warning:' }} Heavy test trend ${{ steps.heavy-summary.outputs.severity }} detected in ${{ github.workflow }}
+            • run: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+            • summary: heavy-test-trends-history/summary.md
+
+      - name: Create heavy trend issue
+        if: ${{ always() && steps.flags.outputs.should_run == 'true' && github.event_name == 'schedule' && steps.heavy-summary.outputs.severity == 'critical' }}
+        uses: actions/github-script@v7
+        with:
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          script: |
+            const fs = require('fs');
+            const path = require('path');
+            const summaryPath = path.resolve('reports/heavy-test-trends-history/summary.json');
+            const summary = JSON.parse(fs.readFileSync(summaryPath, 'utf8'));
+            const snapshots = Array.isArray(summary.snapshots) ? summary.snapshots : [];
+            const criticalSnapshot = snapshots.find(s => (s.severity || '').toLowerCase() === 'critical');
+            if (!criticalSnapshot) {
+              core.info('No critical snapshot detected; skipping issue creation.');
+              return;
+            }
+            const criticalEntries = (criticalSnapshot.entries || []).filter(e => (e.severity || '').toLowerCase() === 'critical');
+            const detailLines = criticalEntries.map(entry => {
+              const reasons = (entry.reasons || []).join('; ') || 'threshold exceeded';
+              return `- **${entry.label}**: ${reasons}`;
+            }).join('\n');
+            const title = `[CI Extended] Heavy test critical alert - ${criticalSnapshot.label}`;
+            const body = [
+              '## Alert',
+              `- Workflow: ${context.workflow} (run: ${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId})`,
+              '- Severity: critical',
+              `- Snapshot: ${criticalSnapshot.label}`,
+              '- Summary: reports/heavy-test-trends-history/summary.md',
+              '- JSON: reports/heavy-test-trends-history/summary.json',
+              '',
+              '### Details',
+              detailLines || '- (no critical entry details recorded)',
+              '',
+              '## Next Steps',
+              '- [ ] Download artifacts and inspect mutation/property/MBT outputs',
+              '- [ ] Update issue with root cause and resolution plan',
+            ].join('\n');
+            await github.rest.issues.create({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              title,
+              body,
+              labels: ['flaky-test', 'ci-stability', 'needs-investigation'],
+            });
+            core.info(`Issue created for snapshot ${criticalSnapshot.label}`);
+
+      - name: Stage test results for caching
+        if: ${{ always() && steps.flags.outputs.should_run == 'true' }}
+        run: node scripts/pipelines/sync-test-results.mjs --store
+
+      - name: Save heavy test cache
+        if: ${{ always() && steps.flags.outputs.should_run == 'true' && steps.restore-heavy.outputs.cache-hit != 'true' }}
+        uses: actions/cache/save@v4
+        with:
+          path: .cache/test-results
+          key: ${{ steps.heavy-cache-key.outputs.key }}
@@ -55,6 +55,8 @@ cegis-report-*.json
 *-report-*.json
 conformance-results.json
 reports/conformance/
+reports/heavy-test-trends.json
+reports/heavy-test-trends-history/
 sample-*.json
 invalid-sample-*.json
 clean-sample-*.json
 
@@ -29,6 +29,8 @@ This document defines CI policies to keep PR experience fast and stable while ma
 - `run-property`: execute property harness smoke within CI Extended
 - `run-mbt`: execute MBT smoke (`test:mbt:ci`) within CI Extended
 - `run-mutation`: execute mutation auto diff (extended pipeline)
+
+CI Extended restores cached heavy test artifacts (`.cache/test-results`) when rerunning; the cache is refreshed at the end of each run via `node scripts/pipelines/sync-test-results.mjs --store`. Check or warm the cache locally with `--status` / `--restore` before dispatching reruns. Nightly runs use a stable cache key (`ci-heavy-${ runner.os }-schedule`) so the previous baseline is rehydrated before execution, call `node scripts/pipelines/compare-test-trends.mjs` to produce a Markdown diff (posted to the Step Summary), and persist both `reports/heavy-test-trends.json` and `reports/heavy-test-trends-history/<timestamp>.json` as artifacts (`heavy-test-trends`, `heavy-test-trends-history`).
 - `qa --light`: run QA in light mode (vitest -> `test:fast`); used in `ae-ci`
 - `ae-benchmark run --ci --light --dry-run`: benchmark config validation only in PRs (fast & stable)
 - `run-qa`: run `ae-ci` workflow’s `qa-bench` on PRs (default off)
@@ -140,6 +142,8 @@ This document defines CI policies to keep PR experience fast and stable while ma
 - `run-property`: CI Extended の property harness のみを実行
 - `run-mbt`: CI Extended の `test:mbt:ci` のみを実行
 - `run-mutation`: CI Extended の mutation auto diff のみを実行
+
+CI Extended 実行後は heavy テスト成果物を `.cache/test-results` に保存し、再実行時に自動復元します。必要に応じて `node scripts/pipelines/sync-test-results.mjs --status` / `--restore` でキャッシュの状態を確認・展開してから再実行できます。差分の確認は `node scripts/pipelines/compare-test-trends.mjs` を実行すると Markdown と JSON で出力され、Step Summary にも自動追記されます。
 - `qa --light`: QA を軽量実行（vitest は `test:fast` 実行）。`ae-ci` の QA ステップに適用済み
 - `ae-benchmark run --ci --light --dry-run`: ベンチは PR では構成検証のみに留め、時間・安定性を優先
 - `run-qa`: `ae-ci` ワークフローの `qa-bench` を PR で実行（既定は非実行）
@@ -166,6 +170,8 @@ This document defines CI policies to keep PR experience fast and stable while ma
 ### test:ci（ライト / 拡張）
 - `test:ci:lite`: Verify Lite のローカル実行口。types:check / lint / build / conformance report をまとめて実行し、PR ブロッキングの最小セットを再現。
 - `test:ci:extended`: Integration（`test:int`）/ property harness / `test:mbt:ci` / `pipelines:pact` を連続実行し、最後に `pipelines:mutation:quick` で mutation quick を叩くローカル向け統合スイート。
+- Heavy test artifacts for the extended suite are cached under `.cache/test-results`; run `node scripts/pipelines/sync-test-results.mjs --restore` before reruns to reuse survivors, MBT summaries, and property harness outputs, then `--store` after local runs to refresh the cache.
+- 拡張スイートで生成される成果物は `.cache/test-results` にキャッシュされるため、再実行前に `node scripts/pipelines/sync-test-results.mjs --restore` を実行すると mutation survivors / MBT summary / property summary を再利用できます（ローカル実行後は `--store` で更新）。
 - `.github/workflows/ci-extended.yml`: `run-ci-extended` で上記一式を PR から opt-in。`run-integration` / `run-property` / `run-mbt` / `run-mutation` で部分実行を選択でき、main push / schedule では常時稼働。
 - Vitest ベースの安定プロファイルは従来通り `test:ci:stable`（Docker/Podman smoke イメージで利用）として提供。
 
 
@@ -0,0 +1,39 @@
+# Heavy Test Trend Visualization PoC
+
+heavy-test トレンドの履歴 (`reports/heavy-test-trends-history/*.json`) を整形して可視化するためのアイデアをまとめます。
+
+## CSV / Markdown 生成スクリプト
+```bash
+pnpm node scripts/pipelines/export-heavy-trend-history.mjs   --history-dir reports/heavy-test-trends-history   --csv-output reports/heavy-test-trends-history/history.csv   --markdown-output reports/heavy-test-trends-history/history.md   --markdown-limit 20
+```
+- `history.csv`: 全スナップショットの `snapshot,label,metric,baseline,current,delta` を含む。Observable や Excel での分析に利用。  
+- `history.md`: 直近 N 件を Markdown テーブルで出力し、PR やドキュメントに貼り付け可能。
+
+## Markdown プレビュー例
+| Snapshot | Label | Metric | Baseline | Current | Δ |
+| --- | --- | --- | --- | --- | --- |
+| ... | ... | ... | ... | ... | ... |
+
+## Observable Notebook での活用例
+1. `history.csv` を `FileAttachment` として Notebook にアップロード。  
+2. 以下のコードで CSV を読み込み、Mutation score のトレンドを描画。
+```js
+viewof metric = Inputs.select([...new Set(data.map(d => d.metric))], {value: "mutationScore"})
+filtered = data.filter(d => d.metric === metric && d.label === "Mutation quick")
+Plot.plot({
+  marginLeft: 80,
+  x: {label: "Snapshot", tickRotate: -45},
+  y: {label: "Current"},
+  marks: [
+    Plot.ruleY([98], {stroke: "orange", strokeDash: "4,2"}),
+    Plot.ruleY([96], {stroke: "red", strokeDash: "4,2"}),
+    Plot.line(filtered, {x: "snapshot", y: "current", stroke: "steelblue", marker: true})
+  ]
+})
+```
+3. `delta` をヒートマップ表示する場合は `Plot.rectY` を利用し、critical しきい値を別色で塗り分ける。
+
+## 今後のステップ
+- Slack 通知／Issue 起票で共有された snapshot ラベルから直接 Notebook の該当位置へリンクさせる。  
+- Grafana 等の BI ツールへ `history.csv` を取り込み、ダッシュボード化する。  
+- delta が連続で悪化した場合の自動コメントなど、可視化結果とアラートを結びつける。
@@ -0,0 +1,67 @@
+# Heavy Test Trend Alerting Plan
+
+## 対象メトリクス
+- **Mutation quick**
+  - `mutationScore` の絶対値と直近比較 (`Δ`).
+  - レポート件数（`survived`, `timedOut`, `ignored`）の急増を補足指標とする。
+- **Property harness**
+  - `failed` 件数、`runs` に対する失敗率。
+  - `traceId` 単位での連続失敗を検知対象にする。
+- **MBT harness**
+  - `violations` 件数、および `runs` / `depth` の極端な変化。
+
+`scripts/pipelines/render-heavy-trend-summary.mjs` で ``--warn-*`` / ``--critical-*`` オプションを指定し、`summary.md` / `summary.json` から自動判定できるようになりました。
+
+## 初期閾値案
+| メトリクス | Warning | Critical | 備考 |
+|------------|---------|----------|------|
+| Mutation score | `current < 98` または `Δ <= -1.0` | `current < 96` または `Δ <= -2.5` | Δ は baseline との差。Warning で Slack 通知、Critical で Issue 起票を検討。|
+| Property failed count | `failed >= 1` | `failed >= 3` | 失敗率が 10% を超えた場合も Warning。|
+| MBT violations | `violations >= 1` | `violations >= 3` | violations が 0 でない場合は詳細ログ確認を必須にする。|
+
+## 通知フロー案
+1. `render-heavy-trend-summary.mjs` に閾値判定オプションを追加し、Markdown 出力内に :warning:/:rotating_light: を埋め込む。
+2. Warning 以上の項目が存在する場合は Slack Webhook（`ci-extended.yml` スケジュール実行に追加済み）でメッセージ送信。
+3. Critical 判定時は GitHub Issue（`flaky-test` ラベル）を自動作成し、関連ログ／アーティファクトへのリンクを添付。
+4. PR 上で手動 rerun を行う際も同スクリプトを実行し、Step Summary に判定結果を表示する。
+
+### Critical 判定時の Issue 起票案
+- 作成先: `itdojp/ae-framework` / labels: `flaky-test`, `ci-stability`, `needs-investigation`
+- タイトル例: `[CI Extended] Heavy test critical alert - mutation score < 96`
+- 本文テンプレート:
+  ```md
+  ## Alert
+  - Workflow: ${{ github.workflow }} (run: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})
+  - Severity: critical
+  - Snapshot: <timestamp>
+  - Summary: heavy-test-trends-history/summary.md
+  - JSON: heavy-test-trends-history/summary.json
+
+  ## Next Steps
+  - [ ] Download artifacts and inspect mutation/property/MBT outputs
+  - [ ] Update issue with root cause and resolution plan
+  ```
+- 実装案: `ci-extended.yml` で `severity == 'critical'` の場合に `gh issue create` を呼び出す（`GITHUB_TOKEN` の権限を要確認）。
+
+1. `render-heavy-trend-summary.mjs` に閾値判定オプションを追加し、Markdown 出力内に :warning:/:rotating_light: を埋め込む。
+2. Warning 以上の項目が存在する場合は Slack Webhook（`nightly-monitoring` 既存通知を再利用）でメッセージ送信。
+3. Critical 判定時は GitHub Issue（`flaky-test` ラベル）を自動作成し、関連ログ／アーティファクトへのリンクを添付。
+4. PR 上で手動 rerun を行う際も同スクリプトを実行し、Step Summary に判定結果を表示する。
+
+## 実装ステップ
+1. `render-heavy-trend-summary.mjs` を拡張し、`--warn-mutation-score`, `--critical-mutation-score` 等の CLI オプションで閾値を受け取り、Markdown 内にバッジを表示する。
+2. CLI から JSON 形式の判定結果を吐き出す (`--json-output`)、Slack ワークフローで利用できるようにする。
+3. `ci-extended` のスケジュール実行後に判定スクリプトを実行し、Warning 以上の場合は Slack 通知ステップを追加する。
+4. Critical の場合は `gh issue create` を用いた自動起票か、既存 `nightly-monitoring` に統合する。
+
+## 運用上の注意
+- 閾値は初期案。実データに基づき 2〜3 週間運用した後に見直す。
+- false positive を避けるため、`Δ` 判定は 2 回連続で閾値を下回った場合にエスカレーションするモードも検討する。
+- Slack 通知は深夜帯（JST）に偏るため、通知チャンネルのサイレンス設定を確認する。
+- Issue 起票時には関連する `heavy-test-trends-history/<timestamp>.json` と `summary.md`、該当 run の URL を必ず添付する。
+
+## TODO
+- [x] `render-heavy-trend-summary.mjs` への閾値オプション追加
+- [x] Slack Webhook 通知ステップの実装（`ci-extended.yml` スケジュール実行に追加済み）
+- [x] 自動 Issue 起票フローの設計（Critical 判定時）
+- [ ] 閾値リファインのためのメトリクス実測データ収集