Skip to content

Commit d4af297

Browse files
Refactor Histogram metrics to be homeserver-scoped (#18724)
Bulk refactor `Histogram` metrics to be homeserver-scoped. We also add lints to make sure that new `Histogram` metrics don't sneak in without using the `server_name` label (`SERVER_NAME_LABEL`). Part of #18592 ### Testing strategy 1. Add the `metrics` listener in your `homeserver.yaml` ```yaml listeners: # This is just showing how to configure metrics either way # # `http` `metrics` resource - port: 9322 type: http bind_addresses: ['127.0.0.1'] resources: - names: [metrics] compress: false # `metrics` listener - port: 9323 type: metrics bind_addresses: ['127.0.0.1'] ``` 1. Start the homeserver: `poetry run synapse_homeserver --config-path homeserver.yaml` 1. Fetch `http://localhost:9322/_synapse/metrics` and/or `http://localhost:9323/metrics` 1. Observe response includes the TODO metrics with the `server_name` label ### Todo - [x] Wait for #18656 to merge ### Dev notes ``` LoggingDatabaseConnection make_conn make_pool make_fake_db_pool ``` ### Pull Request Checklist <!-- Please read https://element-hq.github.io/synapse/latest/development/contributing_guide.html before submitting your pull request --> * [x] Pull request is based on the develop branch * [x] Pull request includes a [changelog file](https://element-hq.github.io/synapse/latest/development/contributing_guide.html#changelog). The entry should: - Be a short description of your change which makes sense to users. "Fixed a bug that prevented receiving messages from other servers." instead of "Moved X method from `EventStore` to `EventWorkerStore`.". - Use markdown where necessary, mostly for `code blocks`. - End with either a period (.) or an exclamation mark (!). - Start with a capital letter. - Feel free to credit yourself, by adding a sentence "Contributed by @github_username." or "Contributed by [Your Name]." to the end of the entry. * [x] [Code style](https://element-hq.github.io/synapse/latest/code_style.html) is correct (run the [linters](https://element-hq.github.io/synapse/latest/development/contributing_guide.html#run-the-linters))
1 parent 31a38f5 commit d4af297

29 files changed

+262
-89
lines changed

changelog.d/18724.misc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Refactor `Histogram` metrics to be homeserver-scoped.

scripts-dev/mypy_synapse_plugin.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ def get_function_signature_hook(
6161
) -> Optional[Callable[[FunctionSigContext], FunctionLike]]:
6262
if fullname in (
6363
"prometheus_client.metrics.Counter",
64+
"prometheus_client.metrics.Histogram",
6465
"prometheus_client.metrics.Gauge",
6566
# TODO: Add other prometheus_client metrics that need checking as we
6667
# refactor, see https://github.com/element-hq/synapse/issues/18592

synapse/_scripts/review_recent_signups.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,19 +29,21 @@
2929

3030
from synapse.config._base import (
3131
Config,
32+
ConfigError,
3233
RootConfig,
3334
find_config_files,
3435
read_config_files,
3536
)
3637
from synapse.config.database import DatabaseConfig
38+
from synapse.config.server import ServerConfig
3739
from synapse.storage.database import DatabasePool, LoggingTransaction, make_conn
3840
from synapse.storage.engines import create_engine
3941

4042

4143
class ReviewConfig(RootConfig):
42-
"A config class that just pulls out the database config"
44+
"A config class that just pulls out the server and database config"
4345

44-
config_classes = [DatabaseConfig]
46+
config_classes = [ServerConfig, DatabaseConfig]
4547

4648

4749
@attr.s(auto_attribs=True)
@@ -148,6 +150,10 @@ def main() -> None:
148150
config_dict = read_config_files(config_files)
149151
config.parse_config_dict(config_dict, "", "")
150152

153+
server_name = config.server.server_name
154+
if not isinstance(server_name, str):
155+
raise ConfigError("Must be a string", ("server_name",))
156+
151157
since_ms = time.time() * 1000 - Config.parse_duration(config_args.since)
152158
exclude_users_with_email = config_args.exclude_emails
153159
exclude_users_with_appservice = config_args.exclude_app_service
@@ -159,7 +165,12 @@ def main() -> None:
159165

160166
engine = create_engine(database_config.config)
161167

162-
with make_conn(database_config, engine, "review_recent_signups") as db_conn:
168+
with make_conn(
169+
db_config=database_config,
170+
engine=engine,
171+
default_txn_name="review_recent_signups",
172+
server_name=server_name,
173+
) as db_conn:
163174
# This generates a type of Cursor, not LoggingTransaction.
164175
user_infos = get_recent_users(
165176
db_conn.cursor(),

synapse/_scripts/synapse_port_db.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -672,8 +672,14 @@ def build_db_store(
672672
engine = create_engine(db_config.config)
673673

674674
hs = MockHomeserver(self.hs_config)
675-
676-
with make_conn(db_config, engine, "portdb") as db_conn:
675+
server_name = hs.hostname
676+
677+
with make_conn(
678+
db_config=db_config,
679+
engine=engine,
680+
default_txn_name="portdb",
681+
server_name=server_name,
682+
) as db_conn:
677683
engine.check_database(
678684
db_conn, allow_outdated_version=allow_outdated_version
679685
)

synapse/api/auth/msc3861_delegated.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
inject_request_headers,
4848
start_active_span,
4949
)
50+
from synapse.metrics import SERVER_NAME_LABEL
5051
from synapse.synapse_rust.http_client import HttpClient
5152
from synapse.types import Requester, UserID, create_requester
5253
from synapse.util import json_decoder
@@ -62,7 +63,7 @@
6263
introspection_response_timer = Histogram(
6364
"synapse_api_auth_delegated_introspection_response",
6465
"Time taken to get a response for an introspection request",
65-
["code"],
66+
labelnames=["code", SERVER_NAME_LABEL],
6667
)
6768

6869

@@ -341,17 +342,23 @@ async def _introspect_token(
341342
)
342343
except HttpResponseException as e:
343344
end_time = self._clock.time()
344-
introspection_response_timer.labels(e.code).observe(end_time - start_time)
345+
introspection_response_timer.labels(
346+
code=e.code, **{SERVER_NAME_LABEL: self.server_name}
347+
).observe(end_time - start_time)
345348
raise
346349
except Exception:
347350
end_time = self._clock.time()
348-
introspection_response_timer.labels("ERR").observe(end_time - start_time)
351+
introspection_response_timer.labels(
352+
code="ERR", **{SERVER_NAME_LABEL: self.server_name}
353+
).observe(end_time - start_time)
349354
raise
350355

351356
logger.debug("Fetched token from MAS")
352357

353358
end_time = self._clock.time()
354-
introspection_response_timer.labels(200).observe(end_time - start_time)
359+
introspection_response_timer.labels(
360+
code=200, **{SERVER_NAME_LABEL: self.server_name}
361+
).observe(end_time - start_time)
355362

356363
resp = json_decoder.decode(resp_body.decode("utf-8"))
357364

synapse/federation/federation_server.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@
122122
pdu_process_time = Histogram(
123123
"synapse_federation_server_pdu_process_time",
124124
"Time taken to process an event",
125+
labelnames=[SERVER_NAME_LABEL],
125126
)
126127

127128
last_pdu_ts_metric = Gauge(
@@ -1324,9 +1325,9 @@ async def _process_incoming_pdus_in_room_inner(
13241325
origin, event.event_id
13251326
)
13261327
if received_ts is not None:
1327-
pdu_process_time.observe(
1328-
(self._clock.time_msec() - received_ts) / 1000
1329-
)
1328+
pdu_process_time.labels(
1329+
**{SERVER_NAME_LABEL: self.server_name}
1330+
).observe((self._clock.time_msec() - received_ts) / 1000)
13301331

13311332
next = await self._get_next_nonspam_staged_event_for_room(
13321333
room_id, room_version

synapse/federation/sender/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -667,7 +667,8 @@ async def handle_event(event: EventBase) -> None:
667667
ts = event_to_received_ts[event.event_id]
668668
assert ts is not None
669669
synapse.metrics.event_processing_lag_by_event.labels(
670-
"federation_sender"
670+
name="federation_sender",
671+
**{SERVER_NAME_LABEL: self.server_name},
671672
).observe((now - ts) / 1000)
672673

673674
async def handle_room_events(events: List[EventBase]) -> None:

synapse/handlers/appservice.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,8 @@ async def start_scheduler() -> None:
187187
assert ts is not None
188188

189189
synapse.metrics.event_processing_lag_by_event.labels(
190-
"appservice_sender"
190+
name="appservice_sender",
191+
**{SERVER_NAME_LABEL: self.server_name},
191192
).observe((now - ts) / 1000)
192193

193194
async def handle_room_events(events: Iterable[EventBase]) -> None:

synapse/handlers/federation.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@
7171
from synapse.http.servlet import assert_params_in_dict
7272
from synapse.logging.context import nested_logging_context
7373
from synapse.logging.opentracing import SynapseTags, set_tag, tag_args, trace
74+
from synapse.metrics import SERVER_NAME_LABEL
7475
from synapse.metrics.background_process_metrics import run_as_background_process
7576
from synapse.module_api import NOT_SPAM
7677
from synapse.storage.databases.main.events_worker import EventRedactBehaviour
@@ -90,7 +91,7 @@
9091
backfill_processing_before_timer = Histogram(
9192
"synapse_federation_backfill_processing_before_time_seconds",
9293
"sec",
93-
[],
94+
labelnames=[SERVER_NAME_LABEL],
9495
buckets=(
9596
0.1,
9697
0.5,
@@ -533,9 +534,9 @@ async def try_backfill(domains: StrCollection) -> bool:
533534
# backfill points regardless of `current_depth`.
534535
if processing_start_time is not None:
535536
processing_end_time = self.clock.time_msec()
536-
backfill_processing_before_timer.observe(
537-
(processing_end_time - processing_start_time) / 1000
538-
)
537+
backfill_processing_before_timer.labels(
538+
**{SERVER_NAME_LABEL: self.server_name}
539+
).observe((processing_end_time - processing_start_time) / 1000)
539540

540541
success = await try_backfill(likely_domains)
541542
if success:

synapse/handlers/federation_event.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@
113113
backfill_processing_after_timer = Histogram(
114114
"synapse_federation_backfill_processing_after_time_seconds",
115115
"sec",
116-
[],
116+
labelnames=[SERVER_NAME_LABEL],
117117
buckets=(
118118
0.1,
119119
0.25,
@@ -692,7 +692,9 @@ async def backfill(
692692
if not events:
693693
return
694694

695-
with backfill_processing_after_timer.time():
695+
with backfill_processing_after_timer.labels(
696+
**{SERVER_NAME_LABEL: self.server_name}
697+
).time():
696698
# if there are any events in the wrong room, the remote server is buggy and
697699
# should not be trusted.
698700
for ev in events:

0 commit comments

Comments
 (0)