diff --git a/synapse/logging/scopecontextmanager.py b/synapse/logging/scopecontextmanager.py index feaadc4d87a..4c3cc109b2d 100644 --- a/synapse/logging/scopecontextmanager.py +++ b/synapse/logging/scopecontextmanager.py @@ -155,7 +155,9 @@ def close(self) -> None: if self._finish_on_close: self.span.finish() - self.logcontext.scope = None + # The tracing scope/span should stick to the logcontext if it's active + # so we only clear if we're finishing the span. + self.logcontext.scope = None if self._enter_logcontext: self.logcontext.__exit__(None, None, None) diff --git a/tests/logging/test_opentracing.py b/tests/logging/test_opentracing.py index 2f389f7f44f..3d39b5eaf0f 100644 --- a/tests/logging/test_opentracing.py +++ b/tests/logging/test_opentracing.py @@ -19,13 +19,15 @@ # # -from typing import Awaitable, Optional, cast +from typing import Awaitable, Dict, Optional, cast from twisted.internet import defer from twisted.internet.testing import MemoryReactorClock from synapse.logging.context import ( LoggingContext, + PreserveLoggingContext, + current_context, make_deferred_yieldable, run_in_background, ) @@ -45,7 +47,6 @@ except ImportError: jaeger_client = None # type: ignore - try: import opentracing @@ -54,6 +55,7 @@ opentracing = None # type: ignore LogContextScopeManager = None # type: ignore + import logging from tests.unittest import TestCase @@ -224,6 +226,167 @@ async def root() -> None: [scopes[1].span, scopes[2].span, scopes[0].span], ) + def test_run_in_background_active_scope_still_available(self) -> None: + """ + Test that tasks running via `run_in_background` still have access to the + active tracing scope. + + This is a regression test for a previous Synapse issue where the tracing scope + would `__exit__` and close before the `run_in_background` task completed and our + own previous custom `_LogContextScope.close(...)` would clear + `LoggingContext.scope` preventing further tracing spans from having the correct + parent. + """ + reactor, clock = get_clock() + + scope_map: Dict[str, opentracing.Scope] = {} + + callback_finished = False + + async def async_task() -> None: + nonlocal callback_finished + try: + logger.info("asdf async_task1 - %s", current_context().scope) + root_scope = scope_map["root"] + root_context = cast(jaeger_client.SpanContext, root_scope.span.context) + + self.assertEqual( + self._tracer.active_span, + root_scope.span, + "expected to inherit the root tracing scope from where this was run", + ) + + # Return control back to the reactor thread and wait an arbitrary amount + await clock.sleep(4) + + logger.info("asdf async_task2 - %s", current_context().scope) + + # This is a key part of what we're testing! In a previous version of + # Synapse, we would lose the active span at this point. + self.assertEqual( + self._tracer.active_span, + root_scope.span, + "expected to still have a root tracing scope/span active", + ) + + # For complete-ness sake, let's also trace more sub-tasks here and assert + # they have the correct span parents as well (root) + + # Start tracing some other sub-task. + # + # This is a key part of what we're testing! In a previous version of + # Synapse, it would have the incorrect span parents. + scope = start_active_span( + "task1", + tracer=self._tracer, + ) + scope_map["task1"] = scope + + # Ensure the span parent is pointing to the root scope + context = cast(jaeger_client.SpanContext, scope.span.context) + self.assertEqual( + context.parent_id, + root_context.span_id, + "expected task1 parent to be the root span", + ) + + # Ensure that the active span is our new sub-task now + self.assertEqual(self._tracer.active_span, scope.span) + # Return control back to the reactor thread and wait an arbitrary amount + await clock.sleep(4) + # We should still see the active span as the scope wasn't closed yet + self.assertEqual(self._tracer.active_span, scope.span) + # Clock the scope using the context manager lifecycle (`scope.close()`) + with scope: + pass + finally: + # When exceptions happen, we still want to mark the callback as finished + # so that the test can complete and we see the underlying error. + callback_finished = True + + async def root() -> None: + with start_active_span( + "root span", + tracer=self._tracer, + # We will close this off later. We're basically just mimicking the same + # pattern for how we handle requests. We pass the span off to the + # request for it to finish. + finish_on_close=False, + ) as root_scope: + scope_map["root"] = root_scope + self.assertEqual(self._tracer.active_span, root_scope.span) + + logger.info("asdf root1 - %s", current_context().scope) + + # Fire-and-forget a task + # + # XXX: The root scope context manager will `__exit__` before this task + # completes. + run_in_background(async_task) + + logger.info("asdf root2 - %s", current_context().scope) + + # Because we used `run_in_background`, the active span should still be + # the root. + self.assertEqual(self._tracer.active_span, root_scope.span) + + logger.info("asdf root3 - %s", current_context().scope) + + # We shouldn't see any active spans outside of the scope + self.assertIsNone(self._tracer.active_span) + + with LoggingContext(name="root logcontext", server_name="test_server"): + # Start the test off + d_root = defer.ensureDeferred(root()) + + # Pump the reactor until the tasks complete + # + # We're manually pumping the reactor (and causing any pending callbacks to + # be called) so we need to be in the sentinel logcontext to avoid leaking + # our current logcontext into the reactor (which would then get picked up + # and associated with the next thing the reactor does). `with + # PreserveLoggingContext()` will reset the logcontext to the sentinel while + # we're pumping the reactor in the block and return us back to our current + # logcontext after the block. + with PreserveLoggingContext(): + reactor.pump((2,) * 8) + self.successResultOf(d_root) + + self.assertTrue( + callback_finished, + "Callback never finished which means the test probably didn't wait long enough", + ) + + # After we see all of the tasks are done (like a request when it + # `_finished_processing`), let's finish our root span + scope_map["root"].span.finish() + + # Sanity check again: We shouldn't see any active spans leftover in this + # this context. + self.assertIsNone(self._tracer.active_span) + + # The spans should be reported in order of their finishing: task 1, task 2, + # root. + # + # We use `assertIncludes` just as an easier way to see if items are missing or + # added. We assert the order just below + self.assertIncludes( + set(self._reporter.get_spans()), + { + scope_map["task1"].span, + scope_map["root"].span, + }, + exact=True, + ) + # This is where we actually assert the correct order + self.assertEqual( + self._reporter.get_spans(), + [ + scope_map["task1"].span, + scope_map["root"].span, + ], + ) + def test_trace_decorator_sync(self) -> None: """ Test whether we can use `@trace_with_opname` (`@trace`) and `@tag_args`