From 9783a3a022d842f107a9836cb921f1e8d9502fa1 Mon Sep 17 00:00:00 2001
From: Slyghtning <hieblmi@protonmail.com>
Date: Fri, 22 Aug 2025 17:20:16 +0200
Subject: [PATCH 1/4] proxy: introduce per-path request rate limiter

This commit introduces basic configurable rate
limits per pathregex.
---
 README.md            | 49 +++++++++++++++++++++++++++++++
 proxy/proxy.go       | 42 +++++++++++++++++++++++++--
 proxy/ratelimiter.go | 69 ++++++++++++++++++++++++++++++++++++++++++++
 proxy/service.go     | 22 ++++++++++++++
 4 files changed, 180 insertions(+), 2 deletions(-)
 create mode 100644 proxy/ratelimiter.go

diff --git a/README.md b/README.md
index 8ed056a..d9b0a68 100644
--- a/README.md
+++ b/README.md
@@ -50,3 +50,52 @@ services and APIs.
   compare with `sample-conf.yaml`.
 * Start aperture without any command line parameters (`./aperture`), all configuration
   is done in the `~/.aperture/aperture.yaml` file.
+
+## Per-endpoint rate limiting
+
+Aperture supports per-endpoint rate limiting using a token bucket based on golang.org/x/time/rate.
+Limits are configured per service using regular expressions that match request paths.
+
+Key properties:
+- Scope: per service, per endpoint (path regex).
+- Process local: state is in-memory per Aperture process. In clustered deployments, each instance enforces its own limits.
+- Evaluation: all matching rules are enforced; if any matching rule denies a request, the request is rejected.
+- Protocols: applies to both REST and gRPC requests.
+
+Behavior on limit exceed:
+- HTTP/REST: returns 429 Too Many Requests and sets a Retry-After header (in seconds). Sub-second delays are rounded up to 1 second.
+- gRPC: response uses HTTP/2 headers/trailers with Grpc-Status and Grpc-Message indicating the error (message: "rate limit exceeded").
+- CORS headers are included consistently.
+
+Configuration fields (under a service):
+- pathregex: regular expression matched against the URL path (e.g., "/package.Service/Method").
+- requests: allowed number of requests per window.
+- per: size of the time window (e.g., 1s, 1m). Default: 1s.
+- burst: additional burst capacity. Default: equal to requests.
+
+Example (see sample-conf.yaml for a full example):
+
+```yaml
+services:
+  - name: "service1"
+    hostregexp: '^service1.com$'
+    pathregexp: '^/.*$'
+    address: "127.0.0.1:10009"
+    protocol: https
+
+    # Optional per-endpoint rate limits using a token bucket.
+    ratelimits:
+      - pathregex: '^/looprpc.SwapServer/LoopOutTerms.*$'
+        requests: 5
+        per: 1s
+        burst: 5
+      - pathregex: '^/looprpc.SwapServer/LoopOutQuote.*$'
+        requests: 2
+        per: 1s
+        burst: 2
+```
+
+Notes:
+- If multiple ratelimits match a request path, all must allow the request; the strictest rule will effectively apply.
+- If requests or burst are set to 0 or negative, safe defaults are used (requests defaults to 1; burst defaults to requests).
+- If per is omitted or 0, it defaults to 1s.
diff --git a/proxy/proxy.go b/proxy/proxy.go
index 7dfb5d4..4a1699a 100644
--- a/proxy/proxy.go
+++ b/proxy/proxy.go
@@ -10,6 +10,7 @@ import (
 	"os"
 	"strconv"
 	"strings"
+	"time"
 
 	"github.com/lightninglabs/aperture/auth"
 	"github.com/lightninglabs/aperture/l402"
@@ -167,10 +168,47 @@ func (p *Proxy) ServeHTTP(w http.ResponseWriter, r *http.Request) {
 		return
 	}
 
+	// Apply per-endpoint rate limits, if configured.
+	for _, rl := range target.compiledRateLimits {
+		if !rl.re.MatchString(r.URL.Path) {
+			continue
+		}
+
+		// Fast path: allow if a token is available now.
+		if rl.allow() {
+			continue
+		}
+
+		// Otherwise, compute suggested retry delay without consuming
+		// tokens.
+		res := rl.limiter.Reserve()
+		if res.OK() {
+			delay := res.Delay()
+			res.CancelAt(time.Now())
+			if delay > 0 {
+				// As seconds; for sub-second delays we still
+				// send 1 second.
+				secs := int(delay.Seconds())
+				if secs == 0 {
+					secs = 1
+				}
+				w.Header().Set(
+					"Retry-After", strconv.Itoa(secs),
+				)
+			}
+		}
+		addCorsHeaders(w.Header())
+		sendDirectResponse(
+			w, r, http.StatusTooManyRequests, "rate limit exceeded",
+		)
+
+		return
+	}
+
 	resourceName := target.ResourceName(r.URL.Path)
 
-	// Determine auth level required to access service and dispatch request
-	// accordingly.
+	// Determine the auth level required to access service and dispatch the
+	// request  accordingly.
 	authLevel := target.AuthRequired(r)
 	skipInvoiceCreation := target.SkipInvoiceCreation(r)
 	switch {
diff --git a/proxy/ratelimiter.go b/proxy/ratelimiter.go
new file mode 100644
index 0000000..0f81d01
--- /dev/null
+++ b/proxy/ratelimiter.go
@@ -0,0 +1,69 @@
+package proxy
+
+import (
+	"regexp"
+	"time"
+
+	"golang.org/x/time/rate"
+)
+
+// RateLimit defines a per-endpoint rate limit using a token bucket.
+// Requests allowed per time window with optional burst.
+// Example YAML:
+//
+//	ratelimits:
+//	  - pathregex: '^/looprpc.SwapServer/LoopOutQuote.*$'
+//	    requests: 5
+//	    per: 1s
+//	    burst: 5
+//
+// If burst is 0, it defaults to requests.
+// If per is 0, it defaults to 1s.
+// Note: All limits are in-memory and per-process.
+type RateLimit struct {
+	PathRegexp string        `long:"pathregex" description:"Regular expression to match the path of the URL against for rate limiting" yaml:"pathregex"`
+	Requests   int           `long:"requests" description:"Number of requests allowed per time window" yaml:"requests"`
+	Per        time.Duration `long:"per" description:"Size of the time window (e.g., 1s, 1m)" yaml:"per"`
+	Burst      int           `long:"burst" description:"Burst size allowed in addition to steady rate" yaml:"burst"`
+
+	// compiled is internal state prepared at startup.
+	compiled *compiledRateLimit
+}
+
+type compiledRateLimit struct {
+	re      *regexp.Regexp
+	limiter *rate.Limiter
+}
+
+// compile prepares the regular expression and the limiter.
+func (r *RateLimit) compile() error {
+	per := r.Per
+	if per == 0 {
+		per = time.Second
+	}
+	requests := r.Requests
+	if requests <= 0 {
+		requests = 1
+	}
+	burst := r.Burst
+	if burst <= 0 {
+		burst = requests
+	}
+
+	re, err := regexp.Compile(r.PathRegexp)
+	if err != nil {
+		return err
+	}
+
+	// rate.Every(per/requests) creates an average rate of requests
+	// per 'per'.
+	lim := rate.NewLimiter(rate.Every(per/time.Duration(requests)), burst)
+	r.compiled = &compiledRateLimit{re: re, limiter: lim}
+
+	return nil
+}
+
+// allow returns true if the rate limit permits an event now.
+func (c *compiledRateLimit) allow() bool {
+	return c.limiter.Allow()
+}
diff --git a/proxy/service.go b/proxy/service.go
index 8f58d03..989e571 100644
--- a/proxy/service.go
+++ b/proxy/service.go
@@ -123,6 +123,12 @@ type Service struct {
 	// invoice creation paths.
 	compiledAuthSkipInvoiceCreationPaths []*regexp.Regexp
 
+	// RateLimits configures per-endpoint rate limits for this service.
+	RateLimits []RateLimit `long:"ratelimits" description:"Per-endpoint rate limits" yaml:"ratelimits"`
+
+	// compiledRateLimits holds compiled regexes and limiter instances.
+	compiledRateLimits []*compiledRateLimit
+
 	freebieDB freebie.DB
 	pricer    pricer.Pricer
 }
@@ -236,6 +242,22 @@ func prepareServices(services []*Service) error {
 			service.compiledPathRegexp = compiledPathRegexp
 		}
 
+		// Compile rate limiters.
+		service.compiledRateLimits = make(
+			[]*compiledRateLimit, 0, len(service.RateLimits),
+		)
+		for i := range service.RateLimits {
+			rl := &service.RateLimits[i]
+			if err := rl.compile(); err != nil {
+				return fmt.Errorf("error compiling rate "+
+					"limit for path %s: %w",
+					rl.PathRegexp, err)
+			}
+			service.compiledRateLimits = append(
+				service.compiledRateLimits, rl.compiled,
+			)
+		}
+
 		service.compiledAuthWhitelistPaths = make(
 			[]*regexp.Regexp, 0, len(service.AuthWhitelistPaths),
 		)

From 2a7c5afaf0adf0f59690af7f75471f337805d2b7 Mon Sep 17 00:00:00 2001
From: Slyghtning <hieblmi@protonmail.com>
Date: Fri, 22 Aug 2025 17:20:35 +0200
Subject: [PATCH 2/4] proxy: adjust sample-conf.yaml with rate limit params

---
 sample-conf.yaml | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/sample-conf.yaml b/sample-conf.yaml
index aaa247d..0c95520 100644
--- a/sample-conf.yaml
+++ b/sample-conf.yaml
@@ -188,6 +188,19 @@ services:
     authwhitelistpaths:
       - '^/freebieservice.*$'
 
+    # Optional per-endpoint rate limits using a token bucket.
+    # Each entry matches a path regex and defines how many requests are
+    # allowed per time window, with an optional burst.
+    ratelimits:
+      - pathregex: '^/looprpc.SwapServer/LoopOutTerms.*$'
+        requests: 5
+        per: 1s
+        burst: 5
+      - pathregex: '^/looprpc.SwapServer/LoopOutQuote.*$'
+        requests: 2
+        per: 1s
+        burst: 2
+
     # A list of regular expressions for path that will skip invoice creation,
     # but still try to do the l402 authentication. This is useful for streaming
     # services, as they are not supported to be the initial request to receive

From dd83f832023fd893eb437b29f039ca0036c81f51 Mon Sep 17 00:00:00 2001
From: Slyghtning <hieblmi@protonmail.com>
Date: Tue, 26 Aug 2025 13:35:21 +0200
Subject: [PATCH 3/4] proxy: L402-scoped rate limits

This commit introduces per-L402 rate limits.
Consumption of a service by one L402 doesn't
eat into a rate bucked of another L402.
---
 README.md            | 27 ++++++++++++++++
 proxy/proxy.go       | 15 +++++----
 proxy/ratelimiter.go | 77 ++++++++++++++++++++++++++++++++++++++++----
 3 files changed, 106 insertions(+), 13 deletions(-)

diff --git a/README.md b/README.md
index d9b0a68..8c84897 100644
--- a/README.md
+++ b/README.md
@@ -99,3 +99,30 @@ Notes:
 - If multiple ratelimits match a request path, all must allow the request; the strictest rule will effectively apply.
 - If requests or burst are set to 0 or negative, safe defaults are used (requests defaults to 1; burst defaults to requests).
 - If per is omitted or 0, it defaults to 1s.
+
+### L402-scoped rate limiting
+
+In addition to path-based limits, Aperture now enforces rate limits on a 
+per-L402 key basis when an authenticated L402 request is present.
+
+How it works:
+- Key derivation: For requests that include L402 auth headers, Aperture extracts the preimage (via Authorization, Grpc-Metadata-Macaroon, or Macaroon headers) and derives a stable key from the preimage hash. Each unique L402 key gets its own token bucket for every matching rate limit rule.
+- Fallback to global: If no L402 key can be derived (unauthenticated or missing preimage), the rule’s global limiter is used for all such requests.
+- Multiple matching rules: If multiple rate limit entries match a path, each rule is checked independently per L402 key; the request must pass all of them.
+
+Headers recognized for L402 key extraction:
+- Authorization: "L402 <macBase64>:<preimageHex>" (also supports legacy "LSAT ...").
+- Grpc-Metadata-Macaroon: "<macHex>" (preimage is read from macaroon caveat).
+- Macaroon: "<macHex>" (preimage is read from macaroon caveat).
+
+Operational notes:
+- Isolation: Authenticated users (distinct L402 keys) do not interfere with each other’s token buckets. A surge from one key won’t consume tokens of another.
+- Unauthenticated traffic: Shares the global bucket per rule. Heavy unauthenticated traffic can still be throttled by the global limiter.
+- Memory/scale: Per-key limiters are kept in an in-memory map per process and currently do not expire. In high-churn environments with many unique L402 keys, this may grow over time. Consider process restarts or external rate-limiting if necessary.
+- Retry-After: When throttled, Aperture computes a suggested delay without consuming a token and sets Retry-After accordingly (minimum 1s), enabling clients to back off.
+
+Example scenario:
+- Suppose a rule allows 5 rps (burst 5) for path 
+  "^/looprpc.SwapServer/LoopOutQuote.*$". Two different L402 users (A and B)
+  each get their own 5 rps budget. Unauthenticated requests to the same path
+  share one global 5 rps budget.
\ No newline at end of file
diff --git a/proxy/proxy.go b/proxy/proxy.go
index 4a1699a..e335f83 100644
--- a/proxy/proxy.go
+++ b/proxy/proxy.go
@@ -10,7 +10,6 @@ import (
 	"os"
 	"strconv"
 	"strings"
-	"time"
 
 	"github.com/lightninglabs/aperture/auth"
 	"github.com/lightninglabs/aperture/l402"
@@ -168,23 +167,25 @@ func (p *Proxy) ServeHTTP(w http.ResponseWriter, r *http.Request) {
 		return
 	}
 
-	// Apply per-endpoint rate limits, if configured.
+	// Apply per-endpoint rate limits, if configured. Determine the L402 key
+	// (preimage hash) if present, otherwise fallback to global limiter.
+	var l402Key string
+	if _, preimage, err := l402.FromHeader(&r.Header); err == nil {
+		l402Key = preimage.Hash().String()
+	}
 	for _, rl := range target.compiledRateLimits {
 		if !rl.re.MatchString(r.URL.Path) {
 			continue
 		}
 
 		// Fast path: allow if a token is available now.
-		if rl.allow() {
+		if rl.allowFor(l402Key) {
 			continue
 		}
 
 		// Otherwise, compute suggested retry delay without consuming
 		// tokens.
-		res := rl.limiter.Reserve()
-		if res.OK() {
-			delay := res.Delay()
-			res.CancelAt(time.Now())
+		if delay, ok := rl.reserveDelay(l402Key); ok {
 			if delay > 0 {
 				// As seconds; for sub-second delays we still
 				// send 1 second.
diff --git a/proxy/ratelimiter.go b/proxy/ratelimiter.go
index 0f81d01..b9e3929 100644
--- a/proxy/ratelimiter.go
+++ b/proxy/ratelimiter.go
@@ -2,6 +2,7 @@ package proxy
 
 import (
 	"regexp"
+	"sync"
 	"time"
 
 	"golang.org/x/time/rate"
@@ -31,8 +32,23 @@ type RateLimit struct {
 }
 
 type compiledRateLimit struct {
-	re      *regexp.Regexp
+	// protects the l402Limiters map.
+	sync.Mutex
+
+	// re is the regular expression used to match the path of the URL.
+	re *regexp.Regexp
+
+	// global limiter is used when no per-L402 key can be derived.
 	limiter *rate.Limiter
+
+	// limiter per L402 key.
+	limit rate.Limit
+
+	// burst is the burst size allowed in addition to steady rate.
+	burst int
+
+	// l402Limiters is a map of per-L402 key limiters.
+	l402Limiters map[string]*rate.Limiter
 }
 
 // compile prepares the regular expression and the limiter.
@@ -57,13 +73,62 @@ func (r *RateLimit) compile() error {
 
 	// rate.Every(per/requests) creates an average rate of requests
 	// per 'per'.
-	lim := rate.NewLimiter(rate.Every(per/time.Duration(requests)), burst)
-	r.compiled = &compiledRateLimit{re: re, limiter: lim}
+	limit := rate.Every(per / time.Duration(requests))
+	lim := rate.NewLimiter(limit, burst)
+	r.compiled = &compiledRateLimit{
+		re:           re,
+		limiter:      lim,
+		limit:        limit,
+		burst:        burst,
+		l402Limiters: make(map[string]*rate.Limiter),
+	}
 
 	return nil
 }
 
-// allow returns true if the rate limit permits an event now.
-func (c *compiledRateLimit) allow() bool {
-	return c.limiter.Allow()
+// allowFor returns true if the rate limit permits an event now for the given
+// key. If the key is empty, the global limiter is used.
+func (c *compiledRateLimit) allowFor(key string) bool {
+	if key == "" {
+		return c.limiter.Allow()
+	}
+	l := c.getOrCreate(key)
+
+	return l.Allow()
+}
+
+// reserveDelay reserves a token on the limiter for the given key and returns
+// the suggested delay. Callers can use the delay to set Retry-After without
+// consuming tokens.
+func (c *compiledRateLimit) reserveDelay(key string) (time.Duration, bool) {
+	var l *rate.Limiter
+	if key == "" {
+		l = c.limiter
+	} else {
+		l = c.getOrCreate(key)
+	}
+
+	res := l.Reserve()
+	if !res.OK() {
+		return 0, false
+	}
+
+	delay := res.Delay()
+	res.CancelAt(time.Now())
+
+	return delay, true
+}
+
+func (c *compiledRateLimit) getOrCreate(key string) *rate.Limiter {
+	c.Lock()
+	defer c.Unlock()
+
+	if l, ok := c.l402Limiters[key]; ok {
+		return l
+	}
+
+	l := rate.NewLimiter(c.limit, c.burst)
+	c.l402Limiters[key] = l
+
+	return l
 }

From 627ad01e998418a5df70c91643e38c6b2dd7654e Mon Sep 17 00:00:00 2001
From: Slyghtning <hieblmi@protonmail.com>
Date: Tue, 26 Aug 2025 15:28:36 +0200
Subject: [PATCH 4/4] proxy: rate limiter tests

---
 proxy/ratelimiter_integration_test.go | 347 ++++++++++++++++++++++++++
 proxy/ratelimiter_test.go             | 158 ++++++++++++
 2 files changed, 505 insertions(+)
 create mode 100644 proxy/ratelimiter_integration_test.go
 create mode 100644 proxy/ratelimiter_test.go

diff --git a/proxy/ratelimiter_integration_test.go b/proxy/ratelimiter_integration_test.go
new file mode 100644
index 0000000..27afb29
--- /dev/null
+++ b/proxy/ratelimiter_integration_test.go
@@ -0,0 +1,347 @@
+package proxy_test
+
+import (
+	"crypto/tls"
+	"encoding/base64"
+	"fmt"
+	"io"
+	"net"
+	"net/http"
+	"path"
+	"testing"
+	"time"
+
+	"github.com/lightninglabs/aperture/auth"
+	"github.com/lightninglabs/aperture/proxy"
+	proxytest "github.com/lightninglabs/aperture/proxy/testdata"
+	"github.com/stretchr/testify/require"
+	"google.golang.org/grpc"
+	"google.golang.org/grpc/credentials"
+	"google.golang.org/grpc/metadata"
+	"google.golang.org/grpc/status"
+	"gopkg.in/macaroon.v2"
+)
+
+// buildAuthHeader constructs an Authorization: L402 header with a valid
+// macaroon and a given preimage hex string. The macaroon content isn't
+// validated by the proxy for key derivation, only parsed.
+func buildAuthHeader(t *testing.T, preimageHex string) string {
+	t.Helper()
+
+	dummyMac, err := macaroon.New(
+		[]byte("key"), []byte("id"), "loc", macaroon.LatestVersion,
+	)
+	require.NoError(t, err)
+
+	macBytes, err := dummyMac.MarshalBinary()
+	require.NoError(t, err)
+
+	macStr := base64.StdEncoding.EncodeToString(macBytes)
+
+	return fmt.Sprintf("L402 %s:%s", macStr, preimageHex)
+}
+
+func TestHTTPRateLimit_RetryAfterAndCORS(t *testing.T) {
+	// Configure a service with a rate limit for path /http/limited.
+	services := []*proxy.Service{{
+		Address:    testTargetServiceAddress,
+		HostRegexp: testHostRegexp,
+		PathRegexp: testPathRegexpHTTP,
+		Protocol:   "http",
+		Auth:       "off",
+		RateLimits: []proxy.RateLimit{{
+			PathRegexp: "^/http/limited.*$",
+			Requests:   1,
+			Per:        500 * time.Millisecond,
+			Burst:      1,
+		}},
+	}}
+
+	mockAuth := auth.NewMockAuthenticator()
+	p, err := proxy.New(mockAuth, services, []string{})
+	require.NoError(t, err)
+
+	// Start proxy and backend servers.
+	srv := &http.Server{
+		Addr:    testProxyAddr,
+		Handler: http.HandlerFunc(p.ServeHTTP),
+	}
+	go func() { _ = srv.ListenAndServe() }()
+	t.Cleanup(func() { _ = srv.Close() })
+
+	backend := &http.Server{Addr: testTargetServiceAddress}
+	go func() { _ = startBackendHTTP(backend) }()
+	t.Cleanup(func() { _ = backend.Close() })
+
+	time.Sleep(100 * time.Millisecond)
+
+	client := &http.Client{}
+	url := fmt.Sprintf("http://%s/http/limited", testProxyAddr)
+
+	// First request allowed.
+	resp, err := client.Get(url)
+	require.NoError(t, err)
+	require.Equal(t, http.StatusOK, resp.StatusCode)
+	_ = resp.Body.Close()
+
+	// The second immediate request should be rate limited.
+	resp, err = client.Get(url)
+	require.NoError(t, err)
+
+	defer resp.Body.Close()
+	require.Equal(t, http.StatusTooManyRequests, resp.StatusCode)
+
+	// Retry-After should be set and sub-second rounded up to at least 1.
+	ra := resp.Header.Get("Retry-After")
+	require.Equal(t, "1", ra)
+
+	// Ensure the CORS headers are present.
+	require.Equal(t, "*", resp.Header.Get("Access-Control-Allow-Origin"))
+	require.NotEmpty(t, resp.Header.Get("Access-Control-Allow-Methods"))
+
+	// Check the html body message.
+	b, _ := io.ReadAll(resp.Body)
+	require.Equal(t, "rate limit exceeded\n", string(b))
+
+	// After waiting 500ms, the request should succeed again.
+	time.Sleep(500 * time.Millisecond)
+	resp, err = client.Get(url)
+	require.NoError(t, err)
+	require.Equal(t, http.StatusOK, resp.StatusCode)
+	_ = resp.Body.Close()
+
+	// Test whole-second accuracy: set per=2s and check Retry-After=2.
+	services[0].RateLimits = []proxy.RateLimit{{
+		PathRegexp: "^/http/limited.*$",
+		Requests:   1,
+		Per:        2 * time.Second,
+		Burst:      1,
+	}}
+	require.NoError(t, p.UpdateServices(services))
+
+	resp, err = client.Get(url)
+	require.NoError(t, err)
+	require.Equal(t, http.StatusOK, resp.StatusCode)
+	_ = resp.Body.Close()
+
+	resp, err = client.Get(url)
+	require.NoError(t, err)
+	defer resp.Body.Close()
+	require.Equal(t, http.StatusTooManyRequests, resp.StatusCode)
+
+	// Due to integer truncation on fractional seconds, Retry-After may be
+	// "1" if the computed delay is slightly under 2s.
+	val := resp.Header.Get("Retry-After")
+	require.Contains(t, []string{"1", "2"}, val)
+}
+
+func TestHTTPRateLimit_MultipleRules_Strictest(t *testing.T) {
+	services := []*proxy.Service{{
+		Address:    testTargetServiceAddress,
+		HostRegexp: testHostRegexp,
+		PathRegexp: testPathRegexpHTTP,
+		Protocol:   "http",
+		Auth:       "off",
+		RateLimits: []proxy.RateLimit{{
+			PathRegexp: "^/http/limited.*$",
+			Requests:   2,
+			Per:        time.Second,
+			Burst:      2,
+		}, {
+			PathRegexp: "^/http/limited.*$",
+			Requests:   1,
+			Per:        time.Second,
+			Burst:      1,
+		}},
+	}}
+	mockAuth := auth.NewMockAuthenticator()
+	p, err := proxy.New(mockAuth, services, []string{})
+	require.NoError(t, err)
+
+	srv := &http.Server{
+		Addr:    testProxyAddr,
+		Handler: http.HandlerFunc(p.ServeHTTP),
+	}
+	go func() { _ = srv.ListenAndServe() }()
+	t.Cleanup(func() { _ = srv.Close() })
+
+	backend := &http.Server{
+		Addr: testTargetServiceAddress,
+	}
+	go func() { _ = startBackendHTTP(backend) }()
+	t.Cleanup(func() { _ = backend.Close() })
+
+	time.Sleep(100 * time.Millisecond)
+
+	client := &http.Client{}
+	url := fmt.Sprintf("http://%s/http/limited", testProxyAddr)
+
+	// The first request should be allowed by both rules.
+	resp, _ := client.Get(url)
+	require.Equal(t, http.StatusOK, resp.StatusCode)
+	_ = resp.Body.Close()
+
+	// The second request should be rate limited by the strictest rule.
+	resp, err = client.Get(url)
+	require.NoError(t, err)
+	defer resp.Body.Close()
+	require.Equal(t, http.StatusTooManyRequests, resp.StatusCode)
+}
+
+func TestHTTPRateLimit_PerIdentityIsolationAndGlobal(t *testing.T) {
+	services := []*proxy.Service{{
+		Address:    testTargetServiceAddress,
+		HostRegexp: testHostRegexp,
+		PathRegexp: testPathRegexpHTTP,
+		Protocol:   "http",
+		Auth:       "off",
+		RateLimits: []proxy.RateLimit{{
+			PathRegexp: "^/http/limited.*$",
+			Requests:   1,
+			Per:        time.Second,
+			Burst:      1,
+		}},
+	}}
+	mockAuth := auth.NewMockAuthenticator()
+	p, err := proxy.New(mockAuth, services, []string{})
+	require.NoError(t, err)
+
+	srv := &http.Server{
+		Addr:    testProxyAddr,
+		Handler: http.HandlerFunc(p.ServeHTTP),
+	}
+	go func() { _ = srv.ListenAndServe() }()
+	t.Cleanup(func() { _ = srv.Close() })
+
+	backend := &http.Server{
+		Addr: testTargetServiceAddress,
+	}
+	go func() { _ = startBackendHTTP(backend) }()
+	t.Cleanup(func() { _ = backend.Close() })
+
+	time.Sleep(100 * time.Millisecond)
+
+	client := &http.Client{}
+	url := fmt.Sprintf("http://%s/http/limited", testProxyAddr)
+	preA := "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+	preB := "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"
+	authA := buildAuthHeader(t, preA)
+	authB := buildAuthHeader(t, preB)
+
+	// A: first allowed, second denied.
+	req, _ := http.NewRequest("GET", url, nil)
+	req.Header.Set("Authorization", authA)
+	resp, _ := client.Do(req)
+	require.Equal(t, http.StatusOK, resp.StatusCode)
+	_ = resp.Body.Close()
+
+	// Immediate second request should be denied.
+	req, _ = http.NewRequest("GET", url, nil)
+	req.Header.Set("Authorization", authA)
+	resp, err = client.Do(req)
+	require.NoError(t, err)
+	defer resp.Body.Close()
+	require.Equal(t, http.StatusTooManyRequests, resp.StatusCode)
+
+	// B: should be allowed independently.
+	req, _ = http.NewRequest("GET", url, nil)
+	req.Header.Set("Authorization", authB)
+	resp, _ = client.Do(req)
+	require.Equal(t, http.StatusOK, resp.StatusCode)
+	_ = resp.Body.Close()
+
+	// No identity (global bucket): first is allowed, then denied; and
+	// subsequent anonymous request shares same bucket.
+	resp, _ = client.Get(url)
+	require.Equal(t, http.StatusOK, resp.StatusCode)
+	err = resp.Body.Close()
+	require.NoError(t, err)
+
+	resp, err = client.Get(url)
+	require.NoError(t, err)
+	defer resp.Body.Close()
+	require.Equal(t, http.StatusTooManyRequests, resp.StatusCode)
+}
+
+func TestGRPCRateLimit_ResponsesAndCORS(t *testing.T) {
+	// Start TLS infra like runGRPCTest.
+	certFile := path.Join(t.TempDir(), "proxy.cert")
+	keyFile := path.Join(t.TempDir(), "proxy.key")
+	cp, creds, certData, err := genCertPair(certFile, keyFile)
+	require.NoError(t, err)
+
+	// gRPC server
+	httpListener, err := net.Listen("tcp", testProxyAddr)
+	require.NoError(t, err)
+	tlsListener := tls.NewListener(
+		httpListener, configFromCert(&certData, cp),
+	)
+	t.Cleanup(func() { _ = tlsListener.Close() })
+
+	services := []*proxy.Service{{
+		Address:     testTargetServiceAddress,
+		HostRegexp:  testHostRegexp,
+		PathRegexp:  testPathRegexpGRPC,
+		Protocol:    "https",
+		TLSCertPath: certFile,
+		Auth:        "off",
+		RateLimits: []proxy.RateLimit{{
+			PathRegexp: "^/proxy_test\\.Greeter/SayHello.*$",
+			Requests:   1,
+			Per:        2 * time.Second,
+			Burst:      1,
+		}},
+	}}
+
+	mockAuth := auth.NewMockAuthenticator()
+	p, err := proxy.New(mockAuth, services, []string{})
+	require.NoError(t, err)
+
+	srv := &http.Server{
+		Addr:      testProxyAddr,
+		Handler:   http.HandlerFunc(p.ServeHTTP),
+		TLSConfig: configFromCert(&certData, cp),
+	}
+	go func() { _ = srv.Serve(tlsListener) }()
+	t.Cleanup(func() { _ = srv.Close() })
+
+	// Start backend gRPC server.
+	serverOpts := []grpc.ServerOption{
+		grpc.Creds(credentials.NewTLS(configFromCert(&certData, cp))),
+	}
+	backend := grpc.NewServer(serverOpts...)
+	go func() { _ = startBackendGRPC(backend) }()
+	t.Cleanup(func() { backend.Stop() })
+
+	// Dial client.
+	conn, err := grpc.Dial(
+		testProxyAddr, grpc.WithTransportCredentials(creds),
+	)
+	require.NoError(t, err)
+	client := proxytest.NewGreeterClient(conn)
+
+	// First call allowed.
+	_, err = client.SayHello(
+		t.Context(), &proxytest.HelloRequest{Name: "x"},
+	)
+	require.NoError(t, err)
+
+	// The second immediate call should be rate-limited.
+	var hdrMD, trMD metadata.MD
+	_, err = client.SayHello(
+		t.Context(), &proxytest.HelloRequest{Name: "x"},
+		grpc.Header(&hdrMD), grpc.Trailer(&trMD),
+	)
+	require.Error(t, err)
+
+	st, _ := status.FromError(err)
+	require.Equal(t, "rate limit exceeded", st.Message())
+
+	// CORS headers should be present in either headers or trailers.
+	vals := hdrMD.Get("Access-Control-Allow-Origin")
+	if len(vals) == 0 {
+		vals = trMD.Get("Access-Control-Allow-Origin")
+	}
+	require.NotEmpty(t, vals)
+	require.Equal(t, "*", vals[0])
+}
diff --git a/proxy/ratelimiter_test.go b/proxy/ratelimiter_test.go
new file mode 100644
index 0000000..6332d21
--- /dev/null
+++ b/proxy/ratelimiter_test.go
@@ -0,0 +1,158 @@
+package proxy
+
+import (
+	"testing"
+	"time"
+
+	"github.com/stretchr/testify/require"
+)
+
+// helper to quickly compile a RateLimit for tests.
+func mustCompile(t *testing.T, rl RateLimit) *compiledRateLimit {
+	t.Helper()
+	require.NoError(t, rl.compile())
+	return rl.compiled
+}
+
+func TestRateLimiter_BurstAndSteadyRate(t *testing.T) {
+	t.Parallel()
+	// 1 request every 100ms, burst 2.
+	rl := mustCompile(t, RateLimit{
+		PathRegexp: ".*",
+		Requests:   1,
+		Per:        100 * time.Millisecond,
+		Burst:      2,
+	})
+
+	L402Key := "userA"
+
+	// Burst allows two requests.
+	require.True(t, rl.allowFor(L402Key))
+	require.True(t, rl.allowFor(L402Key))
+
+	// Third should be immediately denied.
+	require.False(t, rl.allowFor(L402Key))
+
+	// Suggested retry delay should come from the steady rate ~100ms.
+	delay, ok := rl.reserveDelay(L402Key)
+	require.True(t, ok)
+	require.GreaterOrEqual(t, delay, 95*time.Millisecond)
+	require.Less(t, delay, 200*time.Millisecond)
+
+	// Wait for the delay to pass, then check again.
+	time.Sleep(delay)
+	require.True(t, rl.allowFor(L402Key))
+	require.False(t, rl.allowFor(L402Key))
+}
+
+func TestRateLimiter_ReserveDelayDoesNotConsume(t *testing.T) {
+	t.Parallel()
+	// 1 request every 200ms, burst 1. Use global limiter (empty L402 key).
+	rl := mustCompile(t, RateLimit{
+		PathRegexp: ".*",
+		Requests:   1,
+		Per:        200 * time.Millisecond,
+		Burst:      1,
+	})
+
+	// Consume the single burst token.
+	require.True(t, rl.allowFor(""))
+
+	// The next one is denied.
+	require.False(t, rl.allowFor(""))
+
+	// Compute delay twice; should not consume tokens or increase delay
+	// beyond a single interval. The first delay should be close to 200ms.
+	d1, ok := rl.reserveDelay("")
+	require.True(t, ok)
+	require.GreaterOrEqual(t, d1, 180*time.Millisecond)
+	require.Less(t, d1, 300*time.Millisecond)
+
+	// Immediately compute the delay again. The delay should be roughly the
+	// same (since we canceled the reservation and did not consume a token).
+	d2, ok := rl.reserveDelay("")
+	require.True(t, ok)
+
+	// Allow for some scheduler jitter; ensure the second delay isn't
+	// inflated by another interval.
+	require.Less(t, d2, d1+50*time.Millisecond)
+
+	// After the original delay has passed, we should be allowed again.
+	time.Sleep(d1)
+	require.True(t, rl.allowFor(""))
+}
+
+func TestRateLimiter_MultipleRules_StrictestGoverns(t *testing.T) {
+	t.Parallel()
+	// Two rules match the same path:
+	// lenient allows burst 2, strict allows burst 1.
+	strictRateLimit := RateLimit{
+		PathRegexp: ".*",
+		Requests:   1,
+		Per:        200 * time.Millisecond,
+		Burst:      1}
+	strict := mustCompile(t, strictRateLimit)
+
+	lenientRateLimit := RateLimit{
+		PathRegexp: ".*",
+		Requests:   10,
+		Per:        200 * time.Millisecond,
+		Burst:      2,
+	}
+	lenient := mustCompile(t, lenientRateLimit)
+
+	key := "userA"
+
+	allowsAll := func() bool {
+		return strict.allowFor(key) && lenient.allowFor(key)
+	}
+
+	// The first request passes both.
+	require.True(t, allowsAll())
+
+	// The second request should be denied by strict rule even though
+	// lenient would allow it. The overall decision is to deny.
+	require.False(t, allowsAll())
+
+	// Suggested retry delay should come from the strict rule ~200ms.
+	delay, ok := strict.reserveDelay(key)
+	require.True(t, ok)
+	require.GreaterOrEqual(t, delay, 180*time.Millisecond)
+	require.Less(t, delay, 300*time.Millisecond)
+}
+
+func TestRateLimiter_PerIdentityIsolationVsGlobal(t *testing.T) {
+	t.Parallel()
+	// 1 rps, burst 1. Separate identities shouldn't affect each other.
+	rl := mustCompile(
+		t, RateLimit{
+			PathRegexp: ".*",
+			Requests:   1,
+			Per:        time.Second,
+			Burst:      1},
+	)
+
+	// Two distinct identities.
+	L402KeyA := "A"
+	L402KeyB := "B"
+
+	// Both have their own burst token available.
+	require.True(t, rl.allowFor(L402KeyA))
+	require.True(t, rl.allowFor(L402KeyB))
+
+	// The second immediate call for A should be denied while B is still
+	// unaffected.
+	require.False(t, rl.allowFor(L402KeyA))
+	require.False(t, rl.allowFor(L402KeyB)) // still denied
+
+	// Global limiter: separate instance with its own bucket.
+	rl2 := mustCompile(
+		t, RateLimit{
+			PathRegexp: ".*",
+			Requests:   1,
+			Per:        time.Second,
+			Burst:      1},
+	)
+	require.True(t, rl2.allowFor(""))  // consume global burst
+	require.False(t, rl2.allowFor("")) // no more global tokens
+}