lightninglabs · hieblmi · Aug 22, 2025 · Aug 22, 2025 · Aug 26, 2025 · Aug 26, 2025
diff --git a/README.md b/README.md
@@ -50,3 +50,79 @@ services and APIs.
   compare with `sample-conf.yaml`.
 * Start aperture without any command line parameters (`./aperture`), all configuration
   is done in the `~/.aperture/aperture.yaml` file.
+
+## Per-endpoint rate limiting
+
+Aperture supports per-endpoint rate limiting using a token bucket based on golang.org/x/time/rate.
+Limits are configured per service using regular expressions that match request paths.
+
+Key properties:
+- Scope: per service, per endpoint (path regex).
+- Process local: state is in-memory per Aperture process. In clustered deployments, each instance enforces its own limits.
+- Evaluation: all matching rules are enforced; if any matching rule denies a request, the request is rejected.
+- Protocols: applies to both REST and gRPC requests.
+
+Behavior on limit exceed:
+- HTTP/REST: returns 429 Too Many Requests and sets a Retry-After header (in seconds). Sub-second delays are rounded up to 1 second.
+- gRPC: response uses HTTP/2 headers/trailers with Grpc-Status and Grpc-Message indicating the error (message: "rate limit exceeded").
+- CORS headers are included consistently.
+
+Configuration fields (under a service):
+- pathregex: regular expression matched against the URL path (e.g., "/package.Service/Method").
+- requests: allowed number of requests per window.
+- per: size of the time window (e.g., 1s, 1m). Default: 1s.
+- burst: additional burst capacity. Default: equal to requests.
+
+Example (see sample-conf.yaml for a full example):
+
+```yaml
+services:
+  - name: "service1"
+    hostregexp: '^service1.com$'
+    pathregexp: '^/.*$'
+    address: "127.0.0.1:10009"
+    protocol: https
+
+    # Optional per-endpoint rate limits using a token bucket.
+    ratelimits:
+      - pathregex: '^/looprpc.SwapServer/LoopOutTerms.*$'
+        requests: 5
+        per: 1s
+        burst: 5
+      - pathregex: '^/looprpc.SwapServer/LoopOutQuote.*$'
+        requests: 2
+        per: 1s
+        burst: 2
+```
+
+Notes:
+- If multiple ratelimits match a request path, all must allow the request; the strictest rule will effectively apply.
+- If requests or burst are set to 0 or negative, safe defaults are used (requests defaults to 1; burst defaults to requests).
+- If per is omitted or 0, it defaults to 1s.
+
+### L402-scoped rate limiting
+
+In addition to path-based limits, Aperture now enforces rate limits on a 
+per-L402 key basis when an authenticated L402 request is present.
+
+How it works:
+- Key derivation: For requests that include L402 auth headers, Aperture extracts the preimage (via Authorization, Grpc-Metadata-Macaroon, or Macaroon headers) and derives a stable key from the preimage hash. Each unique L402 key gets its own token bucket for every matching rate limit rule.
+- Fallback to global: If no L402 key can be derived (unauthenticated or missing preimage), the rule’s global limiter is used for all such requests.
+- Multiple matching rules: If multiple rate limit entries match a path, each rule is checked independently per L402 key; the request must pass all of them.
+
+Headers recognized for L402 key extraction:
+- Authorization: "L402 <macBase64>:<preimageHex>" (also supports legacy "LSAT ...").
+- Grpc-Metadata-Macaroon: "<macHex>" (preimage is read from macaroon caveat).
+- Macaroon: "<macHex>" (preimage is read from macaroon caveat).
+
+Operational notes:
+- Isolation: Authenticated users (distinct L402 keys) do not interfere with each other’s token buckets. A surge from one key won’t consume tokens of another.
+- Unauthenticated traffic: Shares the global bucket per rule. Heavy unauthenticated traffic can still be throttled by the global limiter.
+- Memory/scale: Per-key limiters are kept in an in-memory map per process and currently do not expire. In high-churn environments with many unique L402 keys, this may grow over time. Consider process restarts or external rate-limiting if necessary.
+- Retry-After: When throttled, Aperture computes a suggested delay without consuming a token and sets Retry-After accordingly (minimum 1s), enabling clients to back off.
+
+Example scenario:
+- Suppose a rule allows 5 rps (burst 5) for path 
+  "^/looprpc.SwapServer/LoopOutQuote.*$". Two different L402 users (A and B)
+  each get their own 5 rps budget. Unauthenticated requests to the same path
+  share one global 5 rps budget.
diff --git a/proxy/proxy.go b/proxy/proxy.go
@@ -167,10 +167,49 @@ func (p *Proxy) ServeHTTP(w http.ResponseWriter, r *http.Request) {
 		return
 	}
 
+	// Apply per-endpoint rate limits, if configured. Determine the L402 key
+	// (preimage hash) if present, otherwise fallback to global limiter.
+	var l402Key string
+	if _, preimage, err := l402.FromHeader(&r.Header); err == nil {
+		l402Key = preimage.Hash().String()
+	}
+	for _, rl := range target.compiledRateLimits {
+		if !rl.re.MatchString(r.URL.Path) {
+			continue
+		}
+
+		// Fast path: allow if a token is available now.
+		if rl.allowFor(l402Key) {
+			continue
+		}
+
+		// Otherwise, compute suggested retry delay without consuming
+		// tokens.
+		if delay, ok := rl.reserveDelay(l402Key); ok {
+			if delay > 0 {
+				// As seconds; for sub-second delays we still
+				// send 1 second.
+				secs := int(delay.Seconds())
+				if secs == 0 {
+					secs = 1
+				}
+				w.Header().Set(
+					"Retry-After", strconv.Itoa(secs),
+				)
+			}
+		}
+		addCorsHeaders(w.Header())
+		sendDirectResponse(
+			w, r, http.StatusTooManyRequests, "rate limit exceeded",
+		)
+
+		return
+	}
+
 	resourceName := target.ResourceName(r.URL.Path)
 
-	// Determine auth level required to access service and dispatch request
-	// accordingly.
+	// Determine the auth level required to access service and dispatch the
+	// request  accordingly.
 	authLevel := target.AuthRequired(r)
 	skipInvoiceCreation := target.SkipInvoiceCreation(r)
 	switch {

diff --git a/proxy/ratelimiter.go b/proxy/ratelimiter.go
@@ -0,0 +1,134 @@
+package proxy
+
+import (
+	"regexp"
+	"sync"
+	"time"
+
+	"golang.org/x/time/rate"
+)
+
+// RateLimit defines a per-endpoint rate limit using a token bucket.
+// Requests allowed per time window with optional burst.
+// Example YAML:
+//
+//	ratelimits:
+//	  - pathregex: '^/looprpc.SwapServer/LoopOutQuote.*$'
+//	    requests: 5
+//	    per: 1s
+//	    burst: 5
+//
+// If burst is 0, it defaults to requests.
+// If per is 0, it defaults to 1s.
+// Note: All limits are in-memory and per-process.
+type RateLimit struct {
+	PathRegexp string        `long:"pathregex" description:"Regular expression to match the path of the URL against for rate limiting" yaml:"pathregex"`
+	Requests   int           `long:"requests" description:"Number of requests allowed per time window" yaml:"requests"`
+	Per        time.Duration `long:"per" description:"Size of the time window (e.g., 1s, 1m)" yaml:"per"`
+	Burst      int           `long:"burst" description:"Burst size allowed in addition to steady rate" yaml:"burst"`
+
+	// compiled is internal state prepared at startup.
+	compiled *compiledRateLimit
+}
+
+type compiledRateLimit struct {
+	// protects the l402Limiters map.
+	sync.Mutex
+
+	// re is the regular expression used to match the path of the URL.
+	re *regexp.Regexp
+
+	// global limiter is used when no per-L402 key can be derived.
+	limiter *rate.Limiter
+
+	// limiter per L402 key.
+	limit rate.Limit
+
+	// burst is the burst size allowed in addition to steady rate.
+	burst int
+
+	// l402Limiters is a map of per-L402 key limiters.
+	l402Limiters map[string]*rate.Limiter
+}
+
+// compile prepares the regular expression and the limiter.
+func (r *RateLimit) compile() error {
+	per := r.Per
+	if per == 0 {
+		per = time.Second
+	}
+	requests := r.Requests
+	if requests <= 0 {
+		requests = 1
+	}
+	burst := r.Burst
+	if burst <= 0 {
+		burst = requests
+	}
+
+	re, err := regexp.Compile(r.PathRegexp)
+	if err != nil {
+		return err
+	}
+
+	// rate.Every(per/requests) creates an average rate of requests
+	// per 'per'.
+	limit := rate.Every(per / time.Duration(requests))
+	lim := rate.NewLimiter(limit, burst)
+	r.compiled = &compiledRateLimit{
+		re:           re,
+		limiter:      lim,
+		limit:        limit,
+		burst:        burst,
+		l402Limiters: make(map[string]*rate.Limiter),
+	}
+
+	return nil
+}
+
+// allowFor returns true if the rate limit permits an event now for the given
+// key. If the key is empty, the global limiter is used.
+func (c *compiledRateLimit) allowFor(key string) bool {
+	if key == "" {
+		return c.limiter.Allow()
+	}
+	l := c.getOrCreate(key)
+
+	return l.Allow()
+}
+
+// reserveDelay reserves a token on the limiter for the given key and returns
+// the suggested delay. Callers can use the delay to set Retry-After without
+// consuming tokens.
+func (c *compiledRateLimit) reserveDelay(key string) (time.Duration, bool) {
+	var l *rate.Limiter
+	if key == "" {
+		l = c.limiter
+	} else {
+		l = c.getOrCreate(key)
+	}
+
+	res := l.Reserve()
+	if !res.OK() {
+		return 0, false
+	}
+
+	delay := res.Delay()
+	res.CancelAt(time.Now())
+
+	return delay, true
+}
+
+func (c *compiledRateLimit) getOrCreate(key string) *rate.Limiter {
+	c.Lock()
+	defer c.Unlock()
+
+	if l, ok := c.l402Limiters[key]; ok {
+		return l
+	}
+
+	l := rate.NewLimiter(c.limit, c.burst)
+	c.l402Limiters[key] = l
+
+	return l
+}