diff --git a/sentry_sdk/probe.py b/sentry_sdk/probe.py new file mode 100644 index 0000000000..9e3c8c3d42 --- /dev/null +++ b/sentry_sdk/probe.py @@ -0,0 +1,122 @@ +import dis +import linecache +import random +import sys +import time +import types +from bytecode import Bytecode, Instr +from dataclasses import dataclass, field +from pathlib import Path +from sentry_sdk.utils import logger + + +@dataclass +class LogPoint: + id: int + source_file: str + line_number: int + message_template: str + tags: dict[str, str] = field(default_factory=dict) + sample_rate: float | None = None + sample_expression: str | None = None + valid_until: int | None = None + + +class Registry: + def __init__(self): + self.log_points = {} + self.log_point_hooks = {} + + def insert_log_point(self, log_point): + fn = function_at_location(log_point.source_file, log_point.line_number) + code = Bytecode.from_code(fn.__code__) + for i, instr in enumerate(code): + if getattr(instr, "lineno", None) == log_point.line_number: + logger.debug("Injecting log point at index=%s instr=%s", i, instr) + hook_instrs = _hook_instructions(log_point.id) + code[i:i] = hook_instrs + self.log_point_hooks[log_point.id] = (i, i+len(hook_instrs)) + break + fn.__code__ = code.to_code() + start_index, end_index = self.log_point_hooks[log_point.id] + logger.debug("Injected log point into function %s at start_index=%s end_index=%s", fn, start_index, end_index) + self.log_points[log_point.id] = log_point + + def remove_log_point(self, log_point_id): + log_point = self.log_points[log_point_id] + fn = function_at_location(log_point.source_file, log_point.line_number) + code = Bytecode.from_code(fn.__code__) + start_index, end_index = self.log_point_hooks[log_point_id] + logger.debug("Ejecting log point at start_index=%s end_index=%s", start_index, end_index) + del code[start_index:end_index] + fn.__code__ = code.to_code() + logger.debug("Ejected log point from function %s", fn) + del self.log_point_hooks[log_point_id] + del self.log_points[log_point_id] + + +registry = Registry() + + +def _hook_instructions(log_point_id): + # sentry_sdk.probe._handle_log_point(log_point_id, locals()) + return [ + Instr("LOAD_GLOBAL", (False, "sentry_sdk")), + Instr("LOAD_ATTR", (False, "probe")), + Instr("LOAD_ATTR", (True, "_handle_log_point")), + Instr("LOAD_CONST", log_point_id), + Instr("LOAD_GLOBAL", (True, "locals")), + Instr("CALL", 0), + Instr("CALL", 2), + Instr("POP_TOP"), + ] + + +def _handle_log_point(log_point_id, local_vars): + import sentry_sdk + + logger.debug("Handling log point id=%s, local_vars=%s", log_point_id, local_vars) + log_point = registry.log_points[log_point_id] + + if log_point.valid_until is not None and log_point.valid_until < time.time(): + logger.debug("Log point id=%s not valid", log_point_id) + return + + if log_point.sample_rate is not None and log_point.sample_rate < random.uniform(0, 1): + logger.debug("Log point id=%s not sampled: rate", log_point_id) + return + + if ( + log_point.sample_expression is not None and + not eval(log_point.sample_expression, globals={}, locals=local_vars) # 😬 + ): + logger.debug("Log point id=%s not sampled: expression", log_point_id) + return + + attributes = { + f"locals.{key}": value + for key, value in local_vars.items() + } | { + "log_point.source_file": log_point.source_file, + "log_point.line_number": log_point.line_number, + "log_point.sample_rate": log_point.sample_rate, + "log_point.valid_until": log_point.valid_until, + "log_point.code_context": linecache.getline(log_point.source_file, log_point.line_number), + } | log_point.tags + + sentry_sdk.logger.info(log_point.message_template, attributes=attributes) + + +def function_at_location(source_file, line_number): + module = next( + iter( + m for m + in sys.modules.values() + if getattr(m, "__file__", "").lower() == source_file + ) + ) + for fn in module.__dict__.values(): + if isinstance(fn, types.FunctionType): + for _, line in dis.findlinestarts(fn.__code__): + if line == line_number: + return fn diff --git a/sentry_sdk/transport.py b/sentry_sdk/transport.py index e904081959..ea4c747cd2 100644 --- a/sentry_sdk/transport.py +++ b/sentry_sdk/transport.py @@ -1,10 +1,15 @@ from abc import ABC, abstractmethod import io +import json import os import gzip +import random import socket import ssl +import sys +import threading import time +import traceback import warnings from datetime import datetime, timedelta, timezone from collections import defaultdict @@ -23,6 +28,7 @@ from sentry_sdk.utils import Dsn, logger, capture_internal_exceptions from sentry_sdk.worker import BackgroundWorker from sentry_sdk.envelope import Envelope, Item, PayloadRef +from sentry_sdk import probe from typing import TYPE_CHECKING, cast, List, Dict @@ -201,6 +207,7 @@ def __init__(self, options): self._worker = BackgroundWorker(queue_size=options["transport_queue_size"]) self._auth = self.parsed_dsn.to_auth("sentry.python/%s" % VERSION) self._disabled_until = {} # type: Dict[Optional[EventDataCategory], datetime] + self._log_point_ids = set() # We only use this Retry() class for the `get_retry_after` method it exposes self._retry = urllib3.util.Retry() self._discarded_events = defaultdict( @@ -317,6 +324,19 @@ def _update_rate_limits(self, response): seconds=retry_after ) + def _update_log_points(self, response): + data = json.loads(response.data) + + log_points = [probe.LogPoint(**lp) for lp in data.get("log_points", [])] + log_point_ids = {lp.id for lp in log_points} + if self._log_point_ids != log_point_ids: + logger.debug("Updating log points...") + for log_point_id in self._log_point_ids: + probe.registry.remove_log_point(log_point_id) + self._log_point_ids = log_point_ids + for log_point in log_points: + probe.registry.insert_log_point(log_point) + def _send_request( self, body, @@ -354,6 +374,7 @@ def record_loss(reason): try: self._update_rate_limits(response) + self._update_log_points(response) if response.status == 429: # if we hit a 429. Something was rate limited but we already diff --git a/setup.py b/setup.py index 11b02cbca8..0d37835b89 100644 --- a/setup.py +++ b/setup.py @@ -41,6 +41,7 @@ def get_file_text(file_name): install_requires=[ "urllib3>=1.26.11", "certifi", + "bytecode", ], extras_require={ "aiohttp": ["aiohttp>=3.5"],