Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions include/podio/DatamodelRegistry.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#ifndef PODIO_DATAMODELREGISTRY_H
#define PODIO_DATAMODELREGISTRY_H

#include "podio/SchemaEvolution.h"
#include "podio/podioVersion.h"

#include <optional>
Expand Down Expand Up @@ -100,8 +101,25 @@ class DatamodelRegistry {
/// @returns The name of the datamodel
const std::string& getDatamodelName(size_t index) const;

/// Get the version of the datamodel with the given name
///
/// @note This will return the version of the datamodel library **not the
/// schema version of the datamodel**.
///
/// @param name The name of the datamodel
///
/// @returns The version of the datamodel if the datamodel is known to the
/// registry or an empty optional otherwise
std::optional<podio::version::Version> getDatamodelVersion(const std::string& name) const;

/// Get the schema version of the datamodel with the given name
///
/// @param name The name of the datamodel
///
/// @returns The schema version of the datamodel if the datamodel is known to the
/// registry or an empty optional otherwise
std::optional<podio::SchemaVersionT> getSchemaVersion(const std::string& name) const;

/// Register a datamodel and return its index in the registry.
///
/// This is the hook that is called during dynamic loading of an EDM to
Expand Down Expand Up @@ -133,6 +151,8 @@ class DatamodelRegistry {
std::unordered_map<std::string_view, RelationNames> m_relations{};

std::unordered_map<std::string, podio::version::Version> m_datamodelVersions{};

std::unordered_map<std::string, podio::SchemaVersionT> m_schemaVersions{};
};
} // namespace podio

Expand Down
4 changes: 2 additions & 2 deletions include/podio/ROOTReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -195,8 +195,8 @@ class ROOTReader {
const std::vector<std::string>& collsToRead);

/// Get / read the buffers at index iColl in the passed category information
podio::CollectionReadBuffers getCollectionBuffers(CategoryInfo& catInfo, size_t iColl, bool reloadBranches,
unsigned int localEntry);
std::optional<podio::CollectionReadBuffers> getCollectionBuffers(CategoryInfo& catInfo, size_t iColl,
bool reloadBranches, unsigned int localEntry);

std::unique_ptr<TChain> m_metaChain{nullptr}; ///< The metadata tree
std::unordered_map<std::string, CategoryInfo> m_categories{}; ///< All categories
Expand Down
15 changes: 15 additions & 0 deletions include/podio/utilities/DatamodelRegistryIOHelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,18 @@

#include <set>
#include <string>
#include <string_view>
#include <tuple>
#include <vector>

namespace podio {
namespace detail {
/// Extract schema version from a JSON datamodel definition
///
/// @param definition The JSON definition string
/// @returns The schema version found in the definition, or 0 if not found
podio::SchemaVersionT extractSchemaVersion(const std::string_view definition);
} // namespace detail

/// Helper class to collect the datamodel (JSON) definitions that should be
/// written.
Expand Down Expand Up @@ -63,6 +71,13 @@ class DatamodelDefinitionHolder {

std::optional<podio::version::Version> getDatamodelVersion(const std::string& name) const;

/// Get the schema version for the given datamodel name by extracting it from
/// the stored datamodel definition.
///
/// @param name The name of the datamodel
/// @returns The schema version if the datamodel is available, or std::nullopt otherwise
std::optional<podio::SchemaVersionT> getSchemaVersion(const std::string& name) const;

protected:
MapType m_availEDMDefs{};
VersionList m_edmVersions{};
Expand Down
9 changes: 9 additions & 0 deletions src/DatamodelRegistry.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "podio/DatamodelRegistry.h"
#include "podio/utilities/DatamodelRegistryIOHelpers.h"

#include <algorithm>
#include <iostream>
Expand All @@ -21,6 +22,7 @@ size_t DatamodelRegistry::registerDatamodel(std::string name, std::string_view d

if (it == m_definitions.cend()) {
int index = m_definitions.size();
m_schemaVersions.emplace(name, detail::extractSchemaVersion(definition));
m_definitions.emplace_back(std::move(name), definition);

for (const auto& [typeName, relations, vectorMembers] : relationNames) {
Expand Down Expand Up @@ -104,4 +106,11 @@ std::optional<podio::version::Version> DatamodelRegistry::getDatamodelVersion(co
return std::nullopt;
}

std::optional<podio::SchemaVersionT> DatamodelRegistry::getSchemaVersion(const std::string& name) const {
if (const auto it = m_schemaVersions.find(name); it != m_schemaVersions.end()) {
return it->second;
}
return std::nullopt;
}

} // namespace podio
40 changes: 40 additions & 0 deletions src/DatamodelRegistryIOHelpers.cc
Original file line number Diff line number Diff line change
@@ -1,8 +1,40 @@
#include "podio/utilities/DatamodelRegistryIOHelpers.h"

#include <algorithm>
#include <charconv>
#include <iterator>

namespace podio {
namespace detail {
podio::SchemaVersionT extractSchemaVersion(const std::string_view definition) {
// Extract schema_version from JSON definition without full parsing
// Look for "schema_version": followed by a number
constexpr std::string_view schemaVersionKey = "\"schema_version\":";
if (auto pos = definition.find(schemaVersionKey); pos != std::string_view::npos) {
pos += schemaVersionKey.length();
// Skip whitespace
while (pos < definition.length() && std::isspace(definition[pos])) {
++pos;
}
// Extract the number
auto start = pos;
while (pos < definition.length() && std::isdigit(definition[pos])) {
++pos;
}
if (pos > start) {
// Convert substring to integer using std::from_chars for better error handling
podio::SchemaVersionT schemaVersion = 0;
auto result = std::from_chars(definition.data() + start, definition.data() + pos, schemaVersion);
if (result.ec == std::errc{}) {
return schemaVersion;
}
}
}

// Return 0 if no valid schema version found
return 0;
}
} // namespace detail

void DatamodelDefinitionCollector::registerDatamodelDefinition(const podio::CollectionBase* coll,
const std::string& name) {
Expand Down Expand Up @@ -57,4 +89,12 @@ std::optional<podio::version::Version> DatamodelDefinitionHolder::getDatamodelVe
return std::nullopt;
}

std::optional<podio::SchemaVersionT> DatamodelDefinitionHolder::getSchemaVersion(const std::string& name) const {
const auto definition = getDatamodelDefinition(name);
if (definition != "{}") {
return detail::extractSchemaVersion(definition);
}
return std::nullopt;
}

} // namespace podio
10 changes: 8 additions & 2 deletions src/RNTupleReader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
#include "podio/DatamodelRegistry.h"
#include "podio/GenericParameters.h"
#include "podio/utilities/RootHelpers.h"

#include "ioUtils.h"
#include "rootUtils.h"

#include <ROOT/RError.hxx>
Expand Down Expand Up @@ -94,6 +96,10 @@ void RNTupleReader::openFiles(const std::vector<std::string>& filenames) {
}
m_datamodelHolder = DatamodelDefinitionHolder(std::move(edm), std::move(edmVersions));

for (const auto& warning : io_utils::checkEDMVersionsReadable(m_datamodelHolder)) {
std::cerr << "WARNING: " << warning << std::endl;
}

auto availableCategoriesField = m_metadata->GetView<std::vector<std::string>>(root_utils::availableCategories);
m_availableCategories = availableCategoriesField(0);
}
Expand Down Expand Up @@ -179,13 +185,13 @@ std::unique_ptr<ROOTFrameData> RNTupleReader::readEntry(const std::string& categ
const auto& collType = coll.dataType;
const auto& bufferFactory = podio::CollectionBufferFactory::instance();
const auto maybeBuffers = bufferFactory.createBuffers(collType, coll.schemaVersion, coll.isSubset);
const auto collBuffers = maybeBuffers.value_or(podio::CollectionReadBuffers{});

if (!maybeBuffers) {
std::cout << "WARNING: Buffers couldn't be created for collection " << coll.name << " of type " << coll.dataType
std::cerr << "WARNING: Buffers couldn't be created for collection " << coll.name << " of type " << coll.dataType
<< " and schema version " << coll.schemaVersion << std::endl;
return nullptr;
}
const auto& collBuffers = maybeBuffers.value();

if (coll.isSubset) {
const auto brName = root_utils::subsetBranch(coll.name);
Expand Down
25 changes: 20 additions & 5 deletions src/ROOTReader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
#include "podio/GenericParameters.h"
#include "podio/podioVersion.h"
#include "podio/utilities/RootHelpers.h"

#include "ioUtils.h"
#include "rootUtils.h"

// ROOT specific includes
Expand Down Expand Up @@ -131,7 +133,11 @@ std::unique_ptr<ROOTFrameData> ROOTReader::readEntry(ROOTReader::CategoryInfo& c
if (!collsToRead.empty() && std::ranges::find(collsToRead, catInfo.storedClasses[i].name) == collsToRead.end()) {
continue;
}
buffers.emplace(catInfo.storedClasses[i].name, getCollectionBuffers(catInfo, i, reloadBranches, localEntry));
auto collBuffers = getCollectionBuffers(catInfo, i, reloadBranches, localEntry);
if (!collBuffers) {
return nullptr;
}
buffers.emplace(catInfo.storedClasses[i].name, collBuffers.value());
}

auto parameters = readEntryParameters(catInfo, reloadBranches, localEntry);
Expand All @@ -140,17 +146,22 @@ std::unique_ptr<ROOTFrameData> ROOTReader::readEntry(ROOTReader::CategoryInfo& c
return std::make_unique<ROOTFrameData>(std::move(buffers), catInfo.table, std::move(parameters));
}

podio::CollectionReadBuffers ROOTReader::getCollectionBuffers(ROOTReader::CategoryInfo& catInfo, size_t iColl,
bool reloadBranches, unsigned int localEntry) {
std::optional<podio::CollectionReadBuffers> ROOTReader::getCollectionBuffers(ROOTReader::CategoryInfo& catInfo,
size_t iColl, bool reloadBranches,
unsigned int localEntry) {
const auto& name = catInfo.storedClasses[iColl].name;
const auto& [collType, isSubsetColl, schemaVersion, index] = catInfo.storedClasses[iColl].info;
auto& branches = catInfo.branches[index];

const auto& bufferFactory = podio::CollectionBufferFactory::instance();
auto maybeBuffers = bufferFactory.createBuffers(collType, schemaVersion, isSubsetColl);

// TODO: Error handling of empty optional
auto collBuffers = maybeBuffers.value_or(podio::CollectionReadBuffers{});
if (!maybeBuffers) {
std::cerr << "WARNING: Buffers couldn't be created for collection " << name << " of type " << collType
<< " and schema version " << schemaVersion << std::endl;
return std::nullopt;
}
auto collBuffers = maybeBuffers.value();

if (reloadBranches) {
root_utils::resetBranches(catInfo.chain.get(), branches, name);
Expand Down Expand Up @@ -315,6 +326,10 @@ void ROOTReader::openFiles(const std::vector<std::string>& filenames) {
}

m_datamodelHolder = DatamodelDefinitionHolder(std::move(datamodelDefs), std::move(edmVersions));

for (const auto& warning : io_utils::checkEDMVersionsReadable(m_datamodelHolder)) {
std::cerr << "WARNING: " << warning << std::endl;
}
}

// Do some work up front for setting up categories and setup all the chains
Expand Down
5 changes: 5 additions & 0 deletions src/SIOReader.cc
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "podio/SIOReader.h"

#include "ioUtils.h"
#include "sioUtils.h"

#include <sio/api.h>
Expand Down Expand Up @@ -129,6 +130,10 @@ void SIOReader::readEDMDefinitions() {
auto datamodelDefs = static_cast<SIOMapBlock<std::string, std::string>*>(blocks[0].get());
auto edmVersions = static_cast<SIOMapBlock<std::string, podio::version::Version>*>(blocks[1].get());
m_datamodelHolder = DatamodelDefinitionHolder(std::move(datamodelDefs->mapData), std::move(edmVersions->mapData));

for (const auto& warning : io_utils::checkEDMVersionsReadable(m_datamodelHolder)) {
std::cerr << "WARNING: " << warning << std::endl;
}
}

} // namespace podio
42 changes: 42 additions & 0 deletions src/ioUtils.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#ifndef PODIO_IO_UTILS_H // NOLINT(llvm-header-guard): internal headers confuse clang-tidy
#define PODIO_IO_UTILS_H // NOLINT(llvm-header-guard): internal headers confuse clang-tidy

#include "podio/DatamodelRegistry.h"
#include "podio/utilities/DatamodelRegistryIOHelpers.h"

#include <string>
#include <vector>

namespace podio::io_utils {

/// Check and verify that all EDM (verions) that have been read from file are
/// readable
///
/// Effectively this boils down to making sure that the EDM versions on file are
/// not newer than the ones that have been loaded dynamically.
///
/// Returns all warnings that have been found
inline std::vector<std::string> checkEDMVersionsReadable(const podio::DatamodelDefinitionHolder& fileEdms) {
std::vector<std::string> warnings{};
for (const auto& edmName : fileEdms.getAvailableDatamodels()) {
// There is no way we get an empty optional here
const auto fileSchemaVersion = fileEdms.getSchemaVersion(edmName).value();
const auto envSchemaVersion = podio::DatamodelRegistry::instance().getSchemaVersion(edmName);

if (!envSchemaVersion) {
warnings.emplace_back("EDM '" + edmName +
"' exists in file but does not seem to be loaded from the environment by podio");
continue;
}
Comment on lines +26 to +30
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This might emit warnings that are harmless. E.g. RNTuple seems to be much less eager to pull in EDM libraries if they are not needed but they might still be present in the file. It looks like it does a much lazier loading than the TTree version, so that this check is potentially too early. But it really isn't clear to me why it would know at this point which libraries it has to load.

I have to check what is going on here and why RNTuple seems to behave special here.

if (envSchemaVersion.value() < fileSchemaVersion) {
warnings.emplace_back("EDM '" + edmName + "' exists in file with schema version " +
std::to_string(fileSchemaVersion) + " but podio loaded schema version " +
std::to_string(envSchemaVersion.value()) + " from the environment");
}
}

return warnings;
}
} // namespace podio::io_utils

#endif
46 changes: 46 additions & 0 deletions tests/schema_evolution/code_gen/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,49 @@ endif()
# ADD_SCHEMA_EVOLUTION_TEST(datatypes_rename_member WITH_EVOLUTION RNTUPLE NO_GENERATE_MODELS)
# ADD_SCHEMA_EVOLUTION_TEST(components_rename_member WITH_EVOLUTION RNTUPLE NO_GENERATE_MODELS)
# endif()


# The following are tests that check whether trying to read files that have a
# **newer** version of the datamodel than is available from the environment.
# First we have to write the file with a new version. We will simply use one of
# the check files of the schema evolution checks above
add_executable(write_file_newer_model components_new_member/check.cpp)
target_link_libraries(write_file_newer_model PRIVATE
components_new_member_newModel
podio::podioIO
)
target_include_directories(write_file_newer_model PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}
)
target_compile_definitions(write_file_newer_model PRIVATE
PODIO_SCHEMA_EVOLUTION_TEST_WRITE
TEST_CASE="write_file_newer_model"
)
# Write a file with a newer version
add_test(NAME write_file_newer_model COMMAND write_file_newer_model)
set_property(TEST write_file_newer_model
PROPERTY
ENVIRONMENT
ROOT_LIBRARY_PATH=${CMAKE_CURRENT_BINARY_DIR}/components_new_member/new_model
LD_LIBRARY_PATH=${PROJECT_BINARY_DIR}/src:$<TARGET_FILE_DIR:ROOT::Tree>:$<$<TARGET_EXISTS:SIO::sio>:$<TARGET_FILE_DIR:SIO::sio>>:$ENV{LD_LIBRARY_PATH}

)
set_tests_properties(write_file_newer_model
PROPERTIES
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/components_new_member
FIXTURES_SETUP write_file_newer_model_setup
)
# Try to read back this file with an older version
add_test(NAME read_file_older_env_edm_version COMMAND podio-dump-tool write_file_newer_model.root)
set_property(TEST read_file_older_env_edm_version
PROPERTY
ENVIRONMENT
ROOT_LIBRARY_PATH=${CMAKE_CURRENT_BINARY_DIR}/components_new_member/old_model
LD_LIBRARY_PATH=${PROJECT_BINARY_DIR}/src:$<TARGET_FILE_DIR:ROOT::Tree>:$<$<TARGET_EXISTS:SIO::sio>:$<TARGET_FILE_DIR:SIO::sio>>:$ENV{LD_LIBRARY_PATH}
)
set_tests_properties(read_file_older_env_edm_version
PROPERTIES
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/components_new_member
FIXTURES_REQUIRED write_file_newer_model_setup
PASS_REGULAR_EXPRESSION "WARNING: EDM 'datamodel' exists in file with schema version 2 but podio loaded schema version 1 from the environment"
)
1 change: 1 addition & 0 deletions tools/src/podio-dump-tool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <fmt/ranges.h>

#include <algorithm>
#include <functional>
#include <iterator>
#include <numeric>
#include <ranges>
Expand Down
Loading