From d8f7d1fb93bcd1c6504c1089ab9472ab8a6c2810 Mon Sep 17 00:00:00 2001 From: Jakob Blomer Date: Tue, 9 Sep 2025 10:04:50 +0200 Subject: [PATCH 1/5] [ntuple] fix automatic schema evolution of pair/tuple For std::pair and std::tuple, check that the number of subfields in memory and on disk is identical. For 2 subfields, std::pair and std::tuple are interchangible. The pair/tuple elements don't need to be an exact match but are themselves subject to automatic schema evolution. --- tree/ntuple/inc/ROOT/RField/RFieldRecord.hxx | 4 ++ tree/ntuple/src/RFieldMeta.cxx | 31 +++++++++++++ tree/ntuple/test/ntuple_evolution_type.cxx | 47 ++++++++++++++++++++ 3 files changed, 82 insertions(+) diff --git a/tree/ntuple/inc/ROOT/RField/RFieldRecord.hxx b/tree/ntuple/inc/ROOT/RField/RFieldRecord.hxx index 105765b166126..a0fc3a0986af9 100644 --- a/tree/ntuple/inc/ROOT/RField/RFieldRecord.hxx +++ b/tree/ntuple/inc/ROOT/RField/RFieldRecord.hxx @@ -136,6 +136,8 @@ protected: RPairField(std::string_view fieldName, std::array, 2> itemFields, const std::array &offsets); + void ReconcileOnDiskField(const RNTupleDescriptor &desc) final; + public: RPairField(std::string_view fieldName, std::array, 2> itemFields); RPairField(RPairField &&other) = default; @@ -186,6 +188,8 @@ protected: RTupleField(std::string_view fieldName, std::vector> itemFields, const std::vector &offsets); + void ReconcileOnDiskField(const RNTupleDescriptor &desc) final; + public: RTupleField(std::string_view fieldName, std::vector> itemFields); RTupleField(RTupleField &&other) = default; diff --git a/tree/ntuple/src/RFieldMeta.cxx b/tree/ntuple/src/RFieldMeta.cxx index 3ab1e17009010..cbb9b3cb47785 100644 --- a/tree/ntuple/src/RFieldMeta.cxx +++ b/tree/ntuple/src/RFieldMeta.cxx @@ -624,6 +624,21 @@ ROOT::RPairField::RPairField(std::string_view fieldName, std::arrayGetThisOffset()); } +void ROOT::RPairField::ReconcileOnDiskField(const RNTupleDescriptor &desc) +{ + static const std::vector prefixes = {"std::pair<", "std::tuple<"}; + + const auto &fieldDesc = desc.GetFieldDescriptor(GetOnDiskId()); + EnsureMatchingOnDiskField(fieldDesc, kDiffTypeName); + EnsureMatchingTypePrefix(fieldDesc, prefixes); + + const auto nOnDiskSubfields = fieldDesc.GetLinkIds().size(); + if (nOnDiskSubfields != 2) { + throw ROOT::RException( + R__FAIL("invalid number of on-disk subfields for std::pair " + std::to_string(nOnDiskSubfields))); + } +} + //------------------------------------------------------------------------------ ROOT::RProxiedCollectionField::RCollectionIterableOnce::RIteratorFuncs @@ -1151,6 +1166,22 @@ ROOT::RTupleField::RTupleField(std::string_view fieldName, std::vector prefixes = {"std::pair<", "std::tuple<"}; + + const auto &fieldDesc = desc.GetFieldDescriptor(GetOnDiskId()); + EnsureMatchingOnDiskField(fieldDesc, kDiffTypeName); + EnsureMatchingTypePrefix(fieldDesc, prefixes); + + const auto nOnDiskSubfields = fieldDesc.GetLinkIds().size(); + const auto nSubfields = fSubfields.size(); + if (nOnDiskSubfields != nSubfields) { + throw ROOT::RException(R__FAIL("invalid number of on-disk subfields for std::tuple " + + std::to_string(nOnDiskSubfields) + " vs. " + std::to_string(nSubfields))); + } +} + //------------------------------------------------------------------------------ namespace { diff --git a/tree/ntuple/test/ntuple_evolution_type.cxx b/tree/ntuple/test/ntuple_evolution_type.cxx index 5ccfc828baad7..950672f254225 100644 --- a/tree/ntuple/test/ntuple_evolution_type.cxx +++ b/tree/ntuple/test/ntuple_evolution_type.cxx @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -160,3 +161,49 @@ TEST(RNTupleEvolution, CheckVariant) EXPECT_EQ('O', std::get>(v(1))[2]); EXPECT_EQ('T', std::get>(v(1))[3]); } + +TEST(RNTupleEvolution, CheckPairTuple) +{ + FileRaii fileGuard("test_ntuple_evolution_check_pair_tuple.root"); + { + auto model = ROOT::RNTupleModel::Create(); + auto p = model->MakeField>("p"); + auto t2 = model->MakeField>("t2"); + model->MakeField>("t3"); + auto writer = ROOT::RNTupleWriter::Recreate(std::move(model), "ntpl", fileGuard.GetPath()); + + *p = {1, 2.0}; + *t2 = {3, 4.0}; + writer->Fill(); + } + + auto reader = RNTupleReader::Open("ntpl", fileGuard.GetPath()); + + try { + reader->GetView>("p"); + FAIL() << "non-matching number of subfields for pair/tuple should throw"; + } catch (const ROOT::RException &err) { + EXPECT_THAT(err.what(), testing::HasSubstr("invalid number of on-disk subfields")); + } + + try { + reader->GetView>("p"); + FAIL() << "non-matching number of subfields for pair/tuple should throw"; + } catch (const ROOT::RException &err) { + EXPECT_THAT(err.what(), testing::HasSubstr("invalid number of on-disk subfields")); + } + + try { + reader->GetView>("t3"); + FAIL() << "non-matching number of subfields for pair/tuple should throw"; + } catch (const ROOT::RException &err) { + EXPECT_THAT(err.what(), testing::HasSubstr("invalid number of on-disk subfields")); + } + + auto t2 = reader->GetView>("t2"); + auto p = reader->GetView>("p"); + EXPECT_EQ(3, t2(0).first); + EXPECT_DOUBLE_EQ(4.0, t2(0).second); + EXPECT_EQ(1, std::get<0>(p(0))); + EXPECT_DOUBLE_EQ(2.0, std::get<1>(p(0))); +} From c3edba537b420d24e3dc0923bc0057e02d4abdaa Mon Sep 17 00:00:00 2001 From: Jakob Blomer Date: Tue, 9 Sep 2025 11:37:51 +0200 Subject: [PATCH 2/5] [ntuple] defriend RClassField from RFieldBase The privileged access of RClassField to RFieldBase is used to modify some properties of its subfields. Make this more targeted through protected methods. One of the methods, CallSetArtificialOn(), will be reused by the record field. --- tree/ntuple/inc/ROOT/RFieldBase.hxx | 6 +++++- tree/ntuple/src/RFieldMeta.cxx | 8 ++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/tree/ntuple/inc/ROOT/RFieldBase.hxx b/tree/ntuple/inc/ROOT/RFieldBase.hxx index 73828c58c5ad8..725d9163dadf9 100644 --- a/tree/ntuple/inc/ROOT/RFieldBase.hxx +++ b/tree/ntuple/inc/ROOT/RFieldBase.hxx @@ -85,7 +85,6 @@ This is and can only be partially enforced through C++. */ // clang-format on class RFieldBase { - friend class ROOT::RClassField; // to mark members as artificial friend class ROOT::Experimental::Detail::RRawPtrWriteEntry; // to call Append() friend struct ROOT::Internal::RFieldCallbackInjector; // used for unit tests friend struct ROOT::Internal::RFieldRepresentationModifier; // used for unit tests @@ -421,6 +420,11 @@ protected: static void CallConstructValueOn(const RFieldBase &other, void *where) { other.ConstructValue(where); } static std::unique_ptr GetDeleterOf(const RFieldBase &other) { return other.GetDeleter(); } + /// Allow parents to mark their childs as artificial fields (used in class and record fields) + static void CallSetArtificialOn(RFieldBase &other) { other.SetArtificial(); } + /// Allow class fields to adjust the type alias of their members + static void SetTypeAliasOf(RFieldBase &other, const std::string &alias) { other.fTypeAlias = alias; } + /// Operations on values of complex types, e.g. ones that involve multiple columns or for which no direct /// column type exists. virtual std::size_t AppendImpl(const void *from); diff --git a/tree/ntuple/src/RFieldMeta.cxx b/tree/ntuple/src/RFieldMeta.cxx index cbb9b3cb47785..4f69b5a588255 100644 --- a/tree/ntuple/src/RFieldMeta.cxx +++ b/tree/ntuple/src/RFieldMeta.cxx @@ -174,9 +174,9 @@ ROOT::RClassField::RClassField(std::string_view fieldName, TClass *classp) const auto normTypeName = ROOT::Internal::GetNormalizedUnresolvedTypeName(origTypeName); if (normTypeName == subField->GetTypeName()) { - subField->fTypeAlias = ""; + SetTypeAliasOf(*subField, ""); } else { - subField->fTypeAlias = normTypeName; + SetTypeAliasOf(*subField, normTypeName); } fTraits &= subField->GetTraits(); @@ -190,7 +190,7 @@ ROOT::RClassField::~RClassField() if (fStagingArea) { for (const auto &[_, si] : fStagingItems) { if (!(si.fField->GetTraits() & kTraitTriviallyDestructible)) { - auto deleter = si.fField->GetDeleter(); + auto deleter = GetDeleterOf(*si.fField); deleter->operator()(fStagingArea.get() + si.fOffset, true /* dtorOnly */); } } @@ -467,7 +467,7 @@ void ROOT::RClassField::BeforeConnectPageSource(ROOT::Internal::RPageSource &pag // Iterate over all sub fields in memory and mark those as missing that are not in the descriptor. for (auto &field : fSubfields) { if (regularSubfields.count(field->GetFieldName()) == 0) { - field->SetArtificial(); + CallSetArtificialOn(*field); } } } From cfddaa80e48d7daaf9d8afbeb58d388fc139bef1 Mon Sep 17 00:00:00 2001 From: Jakob Blomer Date: Tue, 9 Sep 2025 11:40:25 +0200 Subject: [PATCH 3/5] [ntuple] fix automatic schema evolution of record fields Ensure that record field evolution supports missing fields, reordered fields, and added fields. Mark added fields as artificial (which was missing before). --- tree/ntuple/src/RField.cxx | 17 +++++++- tree/ntuple/test/ntuple_evolution_shape.cxx | 45 +++++++++++++++++++++ 2 files changed, 61 insertions(+), 1 deletion(-) diff --git a/tree/ntuple/src/RField.cxx b/tree/ntuple/src/RField.cxx index 5a30d191ad798..a48aa4bbef4ca 100644 --- a/tree/ntuple/src/RField.cxx +++ b/tree/ntuple/src/RField.cxx @@ -27,7 +27,9 @@ #include #include #include +#include #include +#include std::unique_ptr ROOT::RFieldZero::CloneImpl(std::string_view /*newName*/) const { @@ -605,7 +607,20 @@ void ROOT::RRecordField::ReadInClusterImpl(RNTupleLocalIndex localIndex, void *t void ROOT::RRecordField::ReconcileOnDiskField(const RNTupleDescriptor &desc) { - EnsureMatchingOnDiskField(desc.GetFieldDescriptor(GetOnDiskId()), kDiffTypeName | kDiffTypeVersion); + const auto &fieldDesc = desc.GetFieldDescriptor(GetOnDiskId()); + EnsureMatchingOnDiskField(fieldDesc, kDiffTypeName | kDiffTypeVersion); + + // The on-disk ID of subfields is matched by field name. So we inherently support reordering of fields + // and we will ignore extra on-disk fields. + // It remains to mark the extra in-memory fields as artificial. + std::unordered_set onDiskSubfields; + for (const auto &subField : desc.GetFieldIterable(fieldDesc)) { + onDiskSubfields.insert(subField.GetFieldName()); + } + for (auto &f : fSubfields) { + if (onDiskSubfields.count(f->GetFieldName()) == 0) + CallSetArtificialOn(*f); + } } void ROOT::RRecordField::ConstructValue(void *where) const diff --git a/tree/ntuple/test/ntuple_evolution_shape.cxx b/tree/ntuple/test/ntuple_evolution_shape.cxx index ef926875ea8aa..5b81f0ccd111b 100644 --- a/tree/ntuple/test/ntuple_evolution_shape.cxx +++ b/tree/ntuple/test/ntuple_evolution_shape.cxx @@ -1012,3 +1012,48 @@ struct StreamerField { EXPECT_EVALUATE_EQ("ptrStreamerField->fInt", 2); EXPECT_EVALUATE_EQ("ptrStreamerField->fAdded", 137); } + +TEST(RNTupleEvolution, RecordField) +{ + FileRaii fileGuard("test_ntuple_evolution_record_field.root"); + { + std::vector> items; + items.emplace_back(std::make_unique>("f1")); + items.emplace_back(std::make_unique>("f2")); + items.emplace_back(std::make_unique>("f3")); + + auto model = ROOT::RNTupleModel::Create(); + model->AddField(std::make_unique("r", std::move(items))); + + auto writer = ROOT::RNTupleWriter::Recreate(std::move(model), "ntpl", fileGuard.GetPath()); + + auto r = writer->GetModel().GetDefaultEntry().GetPtr("r"); + const auto &rf = static_cast(writer->GetModel().GetConstField("r")); + + auto f1 = reinterpret_cast(reinterpret_cast(r.get()) + rf.GetOffsets()[0]); + auto f3 = reinterpret_cast(reinterpret_cast(r.get()) + rf.GetOffsets()[2]); + *f1 = 'x'; + *f3 = "ROOT"; + + writer->Fill(); + } + + std::vector> items; + items.emplace_back(std::make_unique>("f3")); + items.emplace_back(std::make_unique>>("f4")); + items.emplace_back(std::make_unique>("f1")); + + auto model = ROOT::RNTupleModel::Create(); + model->AddField(std::make_unique("r", std::move(items))); + + auto reader = RNTupleReader::Open(std::move(model), "ntpl", fileGuard.GetPath()); + auto r = reader->GetModel().GetDefaultEntry().GetPtr("r"); + const auto &rf = static_cast(reader->GetModel().GetConstField("r")); + auto f3 = reinterpret_cast(reinterpret_cast(r.get()) + rf.GetOffsets()[0]); + auto f4 = reinterpret_cast *>(reinterpret_cast(r.get()) + rf.GetOffsets()[1]); + auto f1 = reinterpret_cast(reinterpret_cast(r.get()) + rf.GetOffsets()[2]); + reader->LoadEntry(0); + EXPECT_EQ("ROOT", *f3); + EXPECT_TRUE(f4->empty()); + EXPECT_EQ('x', *f1); +} From cee97d02b2adde1e1b9c407ccdc7d0e7a56f5265 Mon Sep 17 00:00:00 2001 From: Jakob Blomer Date: Mon, 15 Sep 2025 23:19:25 +0200 Subject: [PATCH 4/5] [ntuple] fix cloning of pair and tuple fields Prevent pair and tuple fields from being demoted to record fields by adding their own CloneImpl() overrides. --- tree/ntuple/inc/ROOT/RField/RFieldRecord.hxx | 6 +++++- tree/ntuple/src/RFieldMeta.cxx | 18 ++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/tree/ntuple/inc/ROOT/RField/RFieldRecord.hxx b/tree/ntuple/inc/ROOT/RField/RFieldRecord.hxx index a0fc3a0986af9..db63c2d29413e 100644 --- a/tree/ntuple/inc/ROOT/RField/RFieldRecord.hxx +++ b/tree/ntuple/inc/ROOT/RField/RFieldRecord.hxx @@ -73,7 +73,7 @@ protected: std::size_t GetItemPadding(std::size_t baseOffset, std::size_t itemAlignment) const; - std::unique_ptr CloneImpl(std::string_view newName) const final; + std::unique_ptr CloneImpl(std::string_view newName) const override; void ConstructValue(void *where) const final; std::unique_ptr GetDeleter() const final; @@ -136,6 +136,8 @@ protected: RPairField(std::string_view fieldName, std::array, 2> itemFields, const std::array &offsets); + std::unique_ptr CloneImpl(std::string_view newName) const final; + void ReconcileOnDiskField(const RNTupleDescriptor &desc) final; public: @@ -188,6 +190,8 @@ protected: RTupleField(std::string_view fieldName, std::vector> itemFields, const std::vector &offsets); + std::unique_ptr CloneImpl(std::string_view newName) const final; + void ReconcileOnDiskField(const RNTupleDescriptor &desc) final; public: diff --git a/tree/ntuple/src/RFieldMeta.cxx b/tree/ntuple/src/RFieldMeta.cxx index 4f69b5a588255..3d3c7453579e6 100644 --- a/tree/ntuple/src/RFieldMeta.cxx +++ b/tree/ntuple/src/RFieldMeta.cxx @@ -624,6 +624,14 @@ ROOT::RPairField::RPairField(std::string_view fieldName, std::arrayGetThisOffset()); } +std::unique_ptr ROOT::RPairField::CloneImpl(std::string_view newName) const +{ + std::array offsets = {fOffsets[0], fOffsets[1]}; + std::array, 2> itemClones = {fSubfields[0]->Clone(fSubfields[0]->GetFieldName()), + fSubfields[1]->Clone(fSubfields[1]->GetFieldName())}; + return std::unique_ptr(new RPairField(newName, std::move(itemClones), offsets)); +} + void ROOT::RPairField::ReconcileOnDiskField(const RNTupleDescriptor &desc) { static const std::vector prefixes = {"std::pair<", "std::tuple<"}; @@ -1166,6 +1174,16 @@ ROOT::RTupleField::RTupleField(std::string_view fieldName, std::vector ROOT::RTupleField::CloneImpl(std::string_view newName) const +{ + std::vector> itemClones; + itemClones.reserve(fSubfields.size()); + for (const auto &f : fSubfields) { + itemClones.emplace_back(f->Clone(f->GetFieldName())); + } + return std::unique_ptr(new RTupleField(newName, std::move(itemClones), fOffsets)); +} + void ROOT::RTupleField::ReconcileOnDiskField(const RNTupleDescriptor &desc) { static const std::vector prefixes = {"std::pair<", "std::tuple<"}; From 42e5bbcc52c8c3329d1c33d92b8d2b5c0be2722b Mon Sep 17 00:00:00 2001 From: Jakob Blomer Date: Mon, 15 Sep 2025 15:01:58 +0200 Subject: [PATCH 5/5] [ntuple] clarify/improve record field reconciliation Record fields are used for 1) untyped records 2) emulated classes and as 3) base class for the pair and tuple fields. These three cases use different constructors and have different reconciliation logic wrt. to the on-disk information. Make this more clear through code comments. Shortcut reconciliation for emulated fields. --- tree/ntuple/inc/ROOT/RField/RFieldRecord.hxx | 5 +++++ tree/ntuple/src/RField.cxx | 7 +++++++ 2 files changed, 12 insertions(+) diff --git a/tree/ntuple/inc/ROOT/RField/RFieldRecord.hxx b/tree/ntuple/inc/ROOT/RField/RFieldRecord.hxx index db63c2d29413e..921d724c2cfc9 100644 --- a/tree/ntuple/inc/ROOT/RField/RFieldRecord.hxx +++ b/tree/ntuple/inc/ROOT/RField/RFieldRecord.hxx @@ -63,6 +63,7 @@ class RRecordField : public RFieldBase { /// If `emulatedFromType` is non-empty, this field was created as a replacement for a ClassField that we lack a /// dictionary for and reconstructed from the on-disk information. + /// Used by the public constructor and by Internal::CreateEmulatedRecordField(). RRecordField(std::string_view fieldName, std::vector> itemFields, std::string_view emulatedFromType); @@ -82,6 +83,8 @@ protected: void ReadGlobalImpl(ROOT::NTupleSize_t globalIndex, void *to) final; void ReadInClusterImpl(RNTupleLocalIndex localIndex, void *to) final; + /// Used by RPairField and RTupleField descendants. These descendants have their own logic to attach the subfields + /// that ensure that the resulting memory layout matches std::pair or std::tuple, resp. RRecordField(std::string_view fieldName, std::string_view typeName); void AttachItemFields(std::vector> itemFields); @@ -106,6 +109,8 @@ protected: public: /// Construct a RRecordField based on a vector of child fields. The ownership of the child fields is transferred /// to the RRecordField instance. + /// The resulting field uses a memory layout for its values as if there was a struct consisting of the passed + /// item fields. RRecordField(std::string_view fieldName, std::vector> itemFields); RRecordField(RRecordField &&other) = default; RRecordField &operator=(RRecordField &&other) = default; diff --git a/tree/ntuple/src/RField.cxx b/tree/ntuple/src/RField.cxx index a48aa4bbef4ca..5cfb0641844c0 100644 --- a/tree/ntuple/src/RField.cxx +++ b/tree/ntuple/src/RField.cxx @@ -607,6 +607,13 @@ void ROOT::RRecordField::ReadInClusterImpl(RNTupleLocalIndex localIndex, void *t void ROOT::RRecordField::ReconcileOnDiskField(const RNTupleDescriptor &desc) { + if (fTraits & kTraitEmulatedField) { + // The field has been explicitly constructed following the on-disk information. No further reconcilation needed. + return; + } + // Note that the RPairField and RTupleField descendants have their own reconcilation logic + R__ASSERT(GetTypeName().empty()); + const auto &fieldDesc = desc.GetFieldDescriptor(GetOnDiskId()); EnsureMatchingOnDiskField(fieldDesc, kDiffTypeName | kDiffTypeVersion);