Skip to content

Commit 0c9415b

Browse files
author
Olia
committed
Time Profiler for Sofie
1 parent 8fc9ae7 commit 0c9415b

File tree

9 files changed

+253
-24
lines changed

9 files changed

+253
-24
lines changed

tmva/sofie/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ ROOT_STANDARD_LIBRARY_PACKAGE(ROOTTMVASofie
2222
TMVA/OperatorList.hxx
2323
TMVA/RModel_Base.hxx
2424
TMVA/RModel.hxx
25+
TMVA/RModelProfiler.hxx
2526
TMVA/ROperator.hxx
2627
TMVA/ROperator_BasicUnary.hxx
2728
TMVA/ROperator_BasicBinary.hxx
@@ -77,6 +78,7 @@ ROOT_STANDARD_LIBRARY_PACKAGE(ROOTTMVASofie
7778
SOURCES
7879
src/RModel_Base.cxx
7980
src/RModel.cxx
81+
src/RModelProfiler.cxx
8082
src/RModel_GNN.cxx
8183
src/RModel_GraphIndependent.cxx
8284
src/RFunction.cxx

tmva/sofie/inc/TMVA/RModel.hxx

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,16 @@ namespace SOFIE {
1111

1212
class RModel final : public RModel_Base {
1313

14+
friend class RModelProfiler;
15+
1416
private:
1517
bool fIsInitialized = false;
1618
bool fIsSubGraph = false;
1719
int fVerbose = 0;
1820
int fBatchSize = -1;
1921
long fReadPos = 0; // reading file position
22+
std::string fProfilerGC = "";
23+
bool fProfile = false;
2024

2125
OptimizationLevel fOptimizationLevel = OptimizationLevel::kExtended;
2226

@@ -148,8 +152,8 @@ public:
148152

149153
void Initialize(int batchSize = -1, bool verbose = false);
150154
void Initialize(const std::map<std::string,size_t> & inputParams, bool verbose = false);
151-
152-
void Generate(std::underlying_type_t<Options> options, int batchSize = -1, long pos = 0, bool verbose = false);
155+
156+
void Generate(std::underlying_type_t<Options> options, int batchSize = -1, long pos = 0, bool verbose = false);
153157
void Generate(Options options = Options::kDefault, int batchSize = -1, int pos = 0, bool verbose = false)
154158
{
155159
Generate(static_cast<std::underlying_type_t<Options>>(options), batchSize, pos, verbose);
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
#ifndef TMVA_SOFIE_RMODELPROFILER
2+
#define TMVA_SOFIE_RMODELPROFILER
3+
4+
#include "TMVA/RModel.hxx"
5+
6+
namespace TMVA {
7+
namespace Experimental {
8+
namespace SOFIE {
9+
10+
/// \class RModelProfiler
11+
/// \brief A helper class to generate profiled inference code for an RModel.
12+
///
13+
/// This class instruments the generated C++ code to measure the execution
14+
/// time of each operator. It is invoked when the RModel::Generate is called
15+
/// with the Options::kProfile flag.
16+
class RModelProfiler {
17+
private:
18+
RModel &fModel;
19+
20+
void GenerateUtilityFunctions();
21+
22+
public:
23+
// The profiler must be constructed with a model to work on.
24+
RModelProfiler() = delete;
25+
RModelProfiler(RModel &model);
26+
~RModelProfiler() = default;
27+
28+
// There is no point in copying or moving an RModelProfiler
29+
RModelProfiler(const RModelProfiler &other) = delete;
30+
RModelProfiler(RModelProfiler &&other) = delete;
31+
RModelProfiler &operator=(const RModelProfiler &other) = delete;
32+
RModelProfiler &operator=(RModelProfiler &&other) = delete;
33+
34+
// Main function to generate the profiled code.
35+
void Generate();
36+
};
37+
38+
} // namespace SOFIE
39+
} // namespace Experimental
40+
} // namespace TMVA
41+
42+
#endif // TMVA_SOFIE_RMODELPROFILER

tmva/sofie/inc/TMVA/RModel_Base.hxx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ enum class Options {
2626
kRootBinaryWeightFile = 0x4,
2727
kGNN = 0x8,
2828
kGNNComponent = 0x10,
29+
kProfile = 0x20,
2930
};
3031

3132
// Optimization levels inspired by ONNXRuntime.

tmva/sofie/inc/TMVA/ROperator.hxx

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,9 @@ public:
3737
//virtual void Forward_blas() = 0;
3838
virtual ~ROperator(){}
3939

40+
std::string name = "UnnamedOperator";
41+
const std::string &GetOperatorName() { return name; };
42+
4043
protected:
4144

4245
const std::string SP = " "; ///< space used to correctly indent the generated C++ code

tmva/sofie/src/RModel.cxx

Lines changed: 29 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#endif
1010

1111
#include "TMVA/RModel.hxx"
12+
#include "TMVA/RModelProfiler.hxx"
1213
#include "TMVA/SOFIE_common.hxx"
1314

1415
namespace TMVA {
@@ -850,7 +851,7 @@ void RModel::GenerateSessionCode()
850851
CheckAndFlushIntermediateMemory(fOperators[op_idx]->GetOpInputTensors(), op_idx);
851852
}
852853

853-
// to check remaining unused fragments after memory allocation (lesser the better)
854+
// to check remaining unused fragments after memory allocation (lesser the better)
854855
// for (const auto &it: fIntermediateMemoryInfo.available_stack){
855856
// std::cout<<"chunk_idx: "<<it.first<<", chunk_size: "<<it.second<<"\n";
856857
// }
@@ -878,13 +879,13 @@ void RModel::GenerateSessionCode()
878879
// Generate code for Session constructor
879880
if (fUseSession) {
880881
std::string sessionName = "Session";
881-
if (fIsSubGraph)
882+
if (fIsSubGraph)
882883
sessionName += "_" + fName;
883884
// add here specific operator code that needs to define session data members
884885
fGC += "\n";
885886
for (size_t id = 0; id < fOperators.size(); id++) {
886887
std::string opName = std::to_string(id);
887-
fGC += fOperators[id]->GenerateSessionMembersCode(opName);
888+
fGC += fOperators[id]->GenerateSessionMembersCode(opName);
888889
}
889890
fGC += "\n";
890891
// here add initialization and reading of weight tensors
@@ -930,23 +931,28 @@ void RModel::GenerateSessionCode()
930931
fGC += "}\n\n";
931932
}
932933

933-
fGC += doInferSignature + "{\n";
934-
fGC += "\n";
934+
if (fProfile) {
935+
RModelProfiler profiler(*this);
936+
profiler.Generate();
937+
fGC += fProfilerGC;
938+
} else {
939+
fGC += doInferSignature + "{\n";
940+
fGC += "\n";
935941

936-
// generate the inference code
937-
if (fVerbose)
938-
std::cout << "Generating main inference code for " << fName << std::endl;
942+
// generate the inference code
943+
if (fVerbose)
944+
std::cout << "Generating main inference code for " << fName << std::endl;
939945

940-
if (fOutputTensorNames.size() == 0)
941-
throw std::runtime_error("TMVA-SOFIE: output size=0 are not supported");
946+
if (fOutputTensorNames.size() == 0)
947+
throw std::runtime_error("TMVA-SOFIE: output size=0 are not supported");
942948

943-
for (size_t op_idx = 0; op_idx < fOperators.size(); ++op_idx) {
944-
if (fVerbose)
949+
for (size_t op_idx = 0; op_idx < fOperators.size(); ++op_idx) {
950+
if (fVerbose)
945951
std::cout << "Generating code for operator .... " << op_idx << std::endl;
946-
fGC += (fOperators[op_idx]->Generate(std::to_string(op_idx)));
947-
}
952+
fGC += (fOperators[op_idx]->Generate(std::to_string(op_idx)));
953+
}
948954

949-
fGC += SP + "using TMVA::Experimental::SOFIE::UTILITY::FillOutput;\n\n";
955+
fGC += SP + "using TMVA::Experimental::SOFIE::UTILITY::FillOutput;\n\n";
950956

951957
for (std::string const &name : fOutputTensorNames) {
952958
// need to check is size is the same (don't want to return a vector with
@@ -957,7 +963,8 @@ void RModel::GenerateSessionCode()
957963
fGC += SP + "FillOutput(tensor_" + name + ", output_tensor_" + name + ", " + n + ");\n";
958964
}
959965

960-
fGC += "}\n\n";
966+
fGC += "}\n\n";
967+
}
961968

962969
// generate the inference overload that returns an output struct
963970
GenerateOutput();
@@ -970,9 +977,11 @@ void RModel::GenerateSessionCode()
970977

971978
void RModel::Generate(std::underlying_type_t<Options> options, int batchSize, long pos, bool verbose)
972979
{
980+
bool profile = (options & static_cast<std::underlying_type_t<Options>>(Options::kProfile));
973981
fVerbose = verbose;
974982
fBatchSize = batchSize;
975983
fReadPos = pos;
984+
fProfile = profile;
976985

977986
// session flag is used in operator initialize
978987
if (static_cast<std::underlying_type_t<Options>>(Options::kNoSession) & options) {
@@ -992,9 +1001,9 @@ void RModel::Generate(std::underlying_type_t<Options> options, int batchSize, lo
9921001
"TMVA-SOFIE: RModel::Generate: cannot use a separate weight file without generating a Session class");
9931002
}
9941003

995-
if (static_cast<std::underlying_type_t<Options>>(Options::kGNN) & options)
1004+
if (static_cast<std::underlying_type_t<Options>>(Options::kGNN) & options)
9961005
fIsGNN = true;
997-
if (static_cast<std::underlying_type_t<Options>>(Options::kGNNComponent) & options)
1006+
if (static_cast<std::underlying_type_t<Options>>(Options::kGNNComponent) & options)
9981007
fIsGNNComponent = true;
9991008

10001009
// initialize the model including all operators and sub-graphs
@@ -1008,13 +1017,13 @@ void RModel::Generate(std::underlying_type_t<Options> options, int batchSize, lo
10081017

10091018
// generate first code for the subgraphs
10101019
for (auto &graph : fSubGraphs) {
1011-
if (fVerbose)
1020+
if (fVerbose)
10121021
std::cout << "generate session code for subgraph " << graph->fName << std::endl;
10131022
graph->GenerateSessionCode();
10141023
fGC += graph->fGC;
10151024
}
10161025

1017-
if (fVerbose)
1026+
if (fVerbose)
10181027
std::cout << "generate Main session code - model " << fName << std::endl;
10191028

10201029
// generate main session code

tmva/sofie/src/RModelProfiler.cxx

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
#include "TMVA/RModelProfiler.hxx"
2+
#include "TMVA/SOFIE_common.hxx"
3+
4+
namespace TMVA {
5+
namespace Experimental {
6+
namespace SOFIE {
7+
8+
// The constructor now just registers the necessary C++ libraries.
9+
RModelProfiler::RModelProfiler(RModel &model) : fModel(model)
10+
{
11+
fModel.AddNeededStdLib("chrono"); // for timing operators
12+
fModel.AddNeededStdLib("vector"); // for storing profiling results
13+
fModel.AddNeededStdLib("string"); // for operator names
14+
fModel.AddNeededStdLib("map"); // for the results map
15+
fModel.AddNeededStdLib("iostream"); // for printing results
16+
fModel.AddNeededStdLib("iomanip"); // for printing results
17+
}
18+
19+
// This function generates the helper functions inside the Session struct.
20+
void RModelProfiler::GenerateUtilityFunctions()
21+
{
22+
auto &gc = fModel.fProfilerGC;
23+
24+
// Generate PrintProfilingResults function
25+
gc += " void PrintProfilingResults() const {\n";
26+
gc += " if (fProfilingResults.empty()) {\n";
27+
gc += " std::cout << \"No profiling results to display.\" << std::endl;\n";
28+
gc += " return;\n";
29+
gc += " }\n";
30+
gc += "\n";
31+
gc += " std::cout << \"\\n\" << std::string(50, '=') << std::endl;\n";
32+
gc += " std::cout << \" AVERAGE PROFILING RESULTS\" << std::endl;\n";
33+
gc += " std::cout << std::string(50, '=') << std::endl;\n";
34+
gc += " for (const auto& op : fProfilingResults) {\n";
35+
gc += " double sum = 0.0;\n";
36+
gc += " for (double time : op.second) {\n";
37+
gc += " sum += time;\n";
38+
gc += " }\n";
39+
gc += " double average = sum / op.second.size();\n";
40+
gc += " std::cout << \" \" << std::left << std::setw(20) << op.first\n";
41+
gc += " << \": \" << std::fixed << std::setprecision(6) << average << \" us\"\n";
42+
gc += " << \" (over \" << op.second.size() << \" runs)\" << std::endl;\n";
43+
gc += " }\n";
44+
gc += " std::cout << std::string(50, '=') << \"\\n\" << std::endl;\n";
45+
gc += " }\n";
46+
gc += "\n";
47+
48+
// Generate ResetProfilingResults function
49+
gc += " void ResetProfilingResults() {\n";
50+
gc += " fProfilingResults.clear();\n";
51+
gc += " }\n";
52+
gc += "\n";
53+
54+
// Generate GetOpAvgTime function
55+
gc += " std::map<std::string, double> GetOpAvgTime() const {\n";
56+
gc += " if (fProfilingResults.empty()) {\n";
57+
gc += " return {};\n";
58+
gc += " }\n";
59+
gc += "\n";
60+
gc += " std::map<std::string, double> avg;\n";
61+
gc += " for (const auto& op : fProfilingResults) {\n";
62+
gc += " double mean = 0.0;\n";
63+
gc += " for (double time : op.second) {\n";
64+
gc += " mean += time;\n";
65+
gc += " }\n";
66+
gc += " mean /= op.second.size();\n";
67+
gc += " avg[op.first] = mean;\n";
68+
gc += " }\n";
69+
gc += "\n";
70+
gc += " return avg;\n";
71+
gc += " }\n";
72+
gc += "\n";
73+
74+
// Generate GetOpVariance function
75+
gc += " std::map<std::string, double> GetOpVariance() const {\n";
76+
gc += " if (fProfilingResults.empty()) {\n";
77+
gc += " return {};\n";
78+
gc += " }\n";
79+
gc += "\n";
80+
gc += " std::map<std::string, double> variance;\n";
81+
gc += " for (const auto& op : fProfilingResults) {\n";
82+
gc += " // Var[X] = E[X^2] - E[X]^2\n";
83+
gc += " double mean = 0.0, mean2 = 0.0;\n";
84+
gc += " for (double time : op.second) {\n";
85+
gc += " mean += time;\n";
86+
gc += " mean2 += time * time;\n";
87+
gc += " }\n";
88+
gc += " mean /= op.second.size();\n";
89+
gc += " mean2 /= op.second.size();\n";
90+
gc += " variance[op.first] = mean2 - mean * mean;\n";
91+
gc += " }\n";
92+
gc += "\n";
93+
gc += " return variance;\n";
94+
gc += " }\n";
95+
}
96+
97+
// Main generation function for the profiler.
98+
void RModelProfiler::Generate()
99+
{
100+
// Clear the profiler's code string to start fresh.
101+
fModel.fProfilerGC.clear();
102+
auto &gc = fModel.fProfilerGC;
103+
104+
// 1. Add the data member to the Session struct to store results.
105+
gc += "public:\n";
106+
gc += " // Maps an operator name to a vector of its execution times (in microseconds).\n";
107+
gc += " std::map<std::string, std::vector<double>> fProfilingResults;\n\n";
108+
109+
// 2. Generate and add the utility functions like PrintProfilingResults.
110+
GenerateUtilityFunctions();
111+
112+
// 3. Generate the signature for the profiled doInfer method.
113+
std::string doInferSignature = fModel.GenerateInferSignature();
114+
if (!doInferSignature.empty()) doInferSignature += ", ";
115+
for (auto const &name : fModel.GetOutputTensorNames()) {
116+
doInferSignature += " std::vector<" + ConvertTypeToString(fModel.GetTensorType(name)) + "> &output_tensor_" + name + ",";
117+
}
118+
if (!fModel.GetOutputTensorNames().empty()) {
119+
doInferSignature.back() = ' ';
120+
}
121+
gc += "void doInfer(" + doInferSignature + ") {\n";
122+
123+
// 4. Generate the body of the doInfer method with timing instrumentation.
124+
gc += " // Timer variable for profiling\n";
125+
gc += " std::chrono::steady_clock::time_point tp_start, tp_overall_start;\n\n";
126+
gc += " tp_overall_start = std::chrono::steady_clock::now();\n\n";
127+
128+
for (size_t op_idx = 0; op_idx < fModel.fOperators.size(); ++op_idx) {
129+
const auto& op = fModel.fOperators[op_idx];
130+
gc += " // -- Profiling for operator " + op->name + " --\n";
131+
gc += " tp_start = std::chrono::steady_clock::now();\n\n";
132+
133+
// Add the actual operator inference code
134+
gc += op->Generate(std::to_string(op_idx));
135+
136+
// Add the code to stop the timer and store the result
137+
gc += "\n fProfilingResults[\"" + op->name + "\"].push_back(\n";
138+
gc += " std::chrono::duration_cast<std::chrono::duration<double, std::micro>>(\n";
139+
gc += " std::chrono::steady_clock::now() - tp_start).count());\n\n";
140+
}
141+
142+
// 5. Generate the code to fill the output tensors.
143+
gc += " using TMVA::Experimental::SOFIE::UTILITY::FillOutput;\n\n";
144+
for (std::string const &name : fModel.GetOutputTensorNames()) {
145+
bool isIntermediate = fModel.fIntermediateTensorInfos.count(name) > 0;
146+
std::string n = isIntermediate ? std::to_string(ConvertShapeToLength(fModel.GetTensorShape(name)))
147+
: ConvertDynamicShapeToLength(fModel.GetDynamicTensorShape(name));
148+
gc += " FillOutput(tensor_" + name + ", output_tensor_" + name + ", " + n + ");\n";
149+
}
150+
151+
gc += "\n // -- Record overall inference time --\n";
152+
gc += " fProfilingResults[\"Overall_Time\"].push_back(\n";
153+
gc += " std::chrono::duration_cast<std::chrono::duration<double, std::micro>>(\n";
154+
gc += " std::chrono::steady_clock::now() - tp_overall_start).count());\n";
155+
156+
gc += "}\n\n"; // End of doInfer function
157+
}
158+
159+
} // namespace SOFIE
160+
} // namespace Experimental
161+
} // namespace TMVA

0 commit comments

Comments
 (0)