From 16235414dc6dc0341beb503cd81b70d842dcbe88 Mon Sep 17 00:00:00 2001 From: Jakob Blomer Date: Fri, 12 Sep 2025 17:58:54 +0200 Subject: [PATCH] [ntuple] cache number of entries in reader --- tree/ntuple/inc/ROOT/RNTupleReader.hxx | 24 ++++++++++++------------ tree/ntuple/src/RNTupleReader.cxx | 1 + 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/tree/ntuple/inc/ROOT/RNTupleReader.hxx b/tree/ntuple/inc/ROOT/RNTupleReader.hxx index 3ef4ab0dc479e..8c3e09c7f7de8 100644 --- a/tree/ntuple/inc/ROOT/RNTupleReader.hxx +++ b/tree/ntuple/inc/ROOT/RNTupleReader.hxx @@ -82,6 +82,8 @@ private: /// Retrieving descriptor data from an RNTupleReader is supposed to be for testing and information purposes, /// not on a hot code path. std::optional fCachedDescriptor; + /// We know that the RNTupleReader is always reading a single RNTuple, so the number of entries is fixed. + ROOT::NTupleSize_t fCachedNEntries = kInvalidNTupleIndex; Experimental::Detail::RNTupleMetrics fMetrics; /// If not nullopt, these will be used when creating the model std::optional fCreateModelOptions; @@ -172,21 +174,19 @@ public: ~RNTupleReader(); /// Returns the number of entries in this RNTuple. - /// \attention This method requires locking a mutex, therefore it can become relatively expensive to call repeatedly - /// (even in the absence of contention). Unless necessary, you should not call this method in the condition of a - /// `for` loop. Instead, either call it once and cache the result, use the faster `GetEntryRange()` or, equivalently, - /// use the RNTupleReader directly as an iterator. - /// + /// Note that the recommended way to iterate the RNTuple is using /// ~~~ {.cpp} - /// // BAD for performance: - /// for (auto i = 0u; i < reader->GetNEntries(); ++i) { ... } - /// - /// // GOOD for performance (all equivalent): - /// for (auto i = 0u, n = reader->GetNEntries(); i < n; ++i) { ... } + /// // RECOMMENDED way to iterate an ntuple /// for (auto i : reader->GetEntryRange()) { ... } - /// for (auto i : *reader) { ... } /// ~~~ - ROOT::NTupleSize_t GetNEntries() const { return fSource->GetNEntries(); } + /// instead of + /// ~~~ {.cpp} + /// // DISCOURAGED way to iterate an ntuple + /// for (auto i = 0u; i < reader->GetNEntries(); ++i) { ... } + /// ~~~ + /// The reason is that determining the number of entries, while currently cheap, may in the future be + /// an expensive operation. + ROOT::NTupleSize_t GetNEntries() const { return fCachedNEntries; } const ROOT::RNTupleModel &GetModel(); std::unique_ptr CreateEntry(); diff --git a/tree/ntuple/src/RNTupleReader.cxx b/tree/ntuple/src/RNTupleReader.cxx index aa3c73bb77ff3..d4aecf4c8adc0 100644 --- a/tree/ntuple/src/RNTupleReader.cxx +++ b/tree/ntuple/src/RNTupleReader.cxx @@ -53,6 +53,7 @@ void ROOT::RNTupleReader::InitPageSource(bool enableMetrics) if (enableMetrics) EnableMetrics(); fSource->Attach(); + fCachedNEntries = fSource->GetNEntries(); } ROOT::RNTupleReader::RNTupleReader(std::unique_ptr model,