Skip to content
This repository was archived by the owner on Jan 7, 2023. It is now read-only.

Commit 4bcc47f

Browse files
committed
Merge pull request #216 from ndawe/master
[MRG] tree2array: enable tree caching and add cache_size argument
2 parents c0dbd3b + 74b8887 commit 4bcc47f

File tree

6 files changed

+2330
-2230
lines changed

6 files changed

+2330
-2230
lines changed

root_numpy/_tree.py

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,8 @@ def root2array(filenames,
101101
stop=None,
102102
step=None,
103103
include_weight=False,
104-
weight_name='weight'):
104+
weight_name='weight',
105+
cache_size=-1):
105106
"""Convert trees in ROOT files into a numpy structured array.
106107
107108
Refer to the type conversion table :ref:`here <conversion_table>`.
@@ -130,6 +131,10 @@ def root2array(filenames,
130131
Include a column containing the tree weight.
131132
weight_name : str, optional (default='weight')
132133
The field name for the weight column if ``include_weight=True``.
134+
cache_size : int, optional (default=-1)
135+
Set the size (in bytes) of the TTreeCache used while reading a TTree. A
136+
value of -1 uses ROOT's default cache size. A value of 0 disables the
137+
cache.
133138
134139
Examples
135140
--------
@@ -197,12 +202,13 @@ def root2array(filenames,
197202
else:
198203
flatten = False
199204

200-
arr = _librootnumpy.root2array_fromFname(
205+
arr = _librootnumpy.root2array_fromfile(
201206
filenames, treename, branches,
202207
selection,
203208
start, stop, step,
204209
include_weight,
205-
weight_name)
210+
weight_name,
211+
cache_size)
206212

207213
if flatten:
208214
# select single column
@@ -218,7 +224,8 @@ def root2rec(filenames,
218224
stop=None,
219225
step=None,
220226
include_weight=False,
221-
weight_name='weight'):
227+
weight_name='weight',
228+
cache_size=-1):
222229
"""View the result of :func:`root2array` as a record array.
223230
224231
Notes
@@ -236,7 +243,8 @@ def root2rec(filenames,
236243
branches, selection,
237244
start, stop, step,
238245
include_weight,
239-
weight_name).view(np.recarray)
246+
weight_name,
247+
cache_size).view(np.recarray)
240248

241249

242250
def tree2array(tree,
@@ -246,7 +254,8 @@ def tree2array(tree,
246254
stop=None,
247255
step=None,
248256
include_weight=False,
249-
weight_name='weight'):
257+
weight_name='weight',
258+
cache_size=-1):
250259
"""Convert a tree into a numpy structured array.
251260
252261
Refer to the type conversion table :ref:`here <conversion_table>`.
@@ -271,6 +280,10 @@ def tree2array(tree,
271280
Include a column containing the tree weight.
272281
weight_name : str, optional (default='weight')
273282
The field name for the weight column if ``include_weight=True``.
283+
cache_size : int, optional (default=-1)
284+
Set the size (in bytes) of the TTreeCache used while reading a TTree. A
285+
value of -1 uses ROOT's default cache size. A value of 0 disables the
286+
cache.
274287
275288
See Also
276289
--------
@@ -289,11 +302,12 @@ def tree2array(tree,
289302
else:
290303
flatten = False
291304

292-
arr = _librootnumpy.root2array_fromCObj(
305+
arr = _librootnumpy.root2array_fromtree(
293306
cobj, branches, selection,
294307
start, stop, step,
295308
include_weight,
296-
weight_name)
309+
weight_name,
310+
cache_size)
297311

298312
if flatten:
299313
# select single column
@@ -308,7 +322,8 @@ def tree2rec(tree,
308322
stop=None,
309323
step=None,
310324
include_weight=False,
311-
weight_name='weight'):
325+
weight_name='weight',
326+
cache_size=-1):
312327
"""View the result of :func:`tree2array` as a record array.
313328
314329
Notes
@@ -329,7 +344,8 @@ def tree2rec(tree,
329344
stop=stop,
330345
step=step,
331346
include_weight=include_weight,
332-
weight_name=weight_name).view(np.recarray)
347+
weight_name=weight_name,
348+
cache_size=cache_size).view(np.recarray)
333349

334350

335351
def array2tree(arr, name='tree', tree=None):

root_numpy/src/Column.h

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,7 @@ class FormulaColumn: public Column
5858

5959
void* GetValuePointer()
6060
{
61-
// required, as in TTreePlayer
62-
formula->GetNdata();
61+
formula->GetNdata(); // required, as in TTreePlayer
6362
value[0] = formula->EvalInstance(0);
6463
return value;
6564
}
@@ -97,13 +96,13 @@ class BranchColumn: public Column
9796

9897
int GetLen()
9998
{
100-
// get len of this block (in unit of element)
99+
// Get length of this block (number of elements)
101100
return leaf->GetLen();
102101
}
103102

104103
int GetCountLen()
105104
{
106-
// get count leaf value
105+
// Get count leaf value
107106
TLeaf* count_leaf = leaf->GetLeafCount();
108107
if (count_leaf != NULL)
109108
{
@@ -114,7 +113,7 @@ class BranchColumn: public Column
114113

115114
int GetSize()
116115
{
117-
// get size of this block in bytes
116+
// Get size of this block in bytes
118117
return leaf->GetLenType() * leaf->GetLen();
119118
}
120119

root_numpy/src/TreeChain.h

Lines changed: 67 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -9,17 +9,13 @@
99
#include <TBranch.h>
1010
#include <TLeaf.h>
1111
#include <TTreeFormula.h>
12+
#include <TTreeCache.h>
1213

1314
#include <iostream>
14-
#include <iomanip>
15-
#include <fstream>
16-
#include <cstdlib>
17-
#include <cstdio>
18-
#include <cassert>
1915
#include <string>
2016
#include <map>
2117
#include <vector>
22-
#include <set>
18+
#include <utility>
2319

2420
#include "Column.h"
2521
#include "util.h"
@@ -47,20 +43,21 @@ class TreeChain
4743
{
4844
public:
4945

50-
TreeChain(TTree* fChain):
51-
fChain(fChain),
52-
ientry(0)
46+
TreeChain(TTree* tree, long long cache_size):
47+
tree(tree),
48+
itree(-1),
49+
ientry(0),
50+
cache_size(cache_size)
5351
{
54-
fCurrent = -1;
55-
notifier = new MiniNotify(fChain->GetNotify());
56-
fChain->SetNotify(notifier);
52+
notifier = new MiniNotify(tree->GetNotify());
53+
tree->SetNotify(notifier);
5754
}
5855

5956
~TreeChain()
6057
{
61-
fChain->SetNotify(notifier->oldnotify);
58+
tree->SetNotify(notifier->oldnotify);
6259

63-
// Delete TTreeFormula
60+
// Delete all TTreeFormula
6461
std::vector<TTreeFormula*>::iterator fit;
6562
for (fit = formulae.begin(); fit != formulae.end(); ++fit)
6663
{
@@ -78,22 +75,21 @@ class TreeChain
7875
// Enable all branches since we don't know yet which branches are
7976
// required by the formulae. The branches must be activated when a
8077
// TTreeFormula is initially created.
81-
fChain->SetBranchStatus("*", true);
82-
//fChain->SetCacheSize(10000000);
78+
tree->SetBranchStatus("*", true);
8379
}
8480
return (int)load;
8581
}
8682

8783
long long LoadTree(long long entry)
8884
{
89-
long long load = fChain->LoadTree(entry);
85+
long long load = tree->LoadTree(entry);
9086
if (load < 0)
9187
{
9288
return load;
9389
}
94-
if (fChain->GetTreeNumber() != fCurrent)
90+
if (tree->GetTreeNumber() != itree)
9591
{
96-
fCurrent = fChain->GetTreeNumber();
92+
itree = tree->GetTreeNumber();
9793
}
9894
if (notifier->notified)
9995
{
@@ -111,53 +107,81 @@ class TreeChain
111107

112108
void InitBranches()
113109
{
114-
// The branches must be activated when a TTreeFormula is initially created.
110+
// Prepare() must be called before InitBranches()
111+
TFile* file = NULL;
112+
TTreeCache* cache = NULL;
115113
TBranch* branch;
116114
TLeaf* leaf;
117115
std::string bname, lname;
118116
LeafCache::iterator it;
119117

120-
// Only the required branches will be added to the cache below
121-
fChain->DropBranchFromCache("*", true);
118+
// Set the cache size. This should create a new cache if one does not
119+
// already exist.
120+
tree->SetCacheSize(cache_size);
121+
122+
// Get and set up the cache
123+
file = tree->GetCurrentFile();
124+
if (file)
125+
{
126+
cache = dynamic_cast<TTreeCache*>(file->GetCacheRead(tree));
127+
}
128+
if (cache)
129+
{
130+
cache->ResetCache();
131+
cache->StartLearningPhase();
132+
}
122133

123134
for (it = leafcache.begin(); it != leafcache.end(); ++it)
124135
{
125-
bname = it->first.first;
126-
lname = it->first.second;
127-
branch = fChain->GetBranch(bname.c_str());
128-
leaf = branch->FindLeaf(lname.c_str());
136+
leaf = it->second->leaf;
137+
branch = leaf->GetBranch();
129138

130139
// Make the branch active and cache it
131140
branch->SetStatus(true);
132-
fChain->AddBranchToCache(branch, true);
133-
// and the length leaf as well
141+
if (cache)
142+
{
143+
tree->AddBranchToCache(branch, true);
144+
}
134145

146+
// ... and the length leaf as well
135147
// TODO: Does this work if user doesn't want the length column
136148
// in the output structure?
137149
TLeaf* leafCount = leaf->GetLeafCount();
138150
if (leafCount != NULL)
139151
{
140152
branch = leafCount->GetBranch();
141153
branch->SetStatus(true);
142-
fChain->AddBranchToCache(branch, true);
154+
if (cache)
155+
{
156+
tree->AddBranchToCache(branch, true);
157+
}
143158
}
144159
}
145160

146161
// Activate all branches used by the formulae
147-
int ncodes, n;
162+
int ncodes, icode;
148163
std::vector<TTreeFormula*>::iterator fit;
149164
for (fit = formulae.begin(); fit != formulae.end(); ++fit)
150165
{
151166
ncodes = (*fit)->GetNcodes();
152-
for (n = 0; n < ncodes; ++n)
167+
for (icode = 0; icode < ncodes; ++icode)
153168
{
154-
branch = (*fit)->GetLeaf(n)->GetBranch();
169+
branch = (*fit)->GetLeaf(icode)->GetBranch();
155170
// Branch may be a TObject split across multiple
156171
// subbranches. These must be activated recursively.
157172
activate_branch_recursive(branch);
158-
fChain->AddBranchToCache(branch, true);
173+
if (cache)
174+
{
175+
tree->AddBranchToCache(branch, true);
176+
}
159177
}
160178
}
179+
if (cache)
180+
{
181+
// Stop the cache learning phase since we have included a fixed set
182+
// of branches
183+
cache->StopLearningPhase();
184+
}
161185
}
162186

163187
int GetEntry(long long entry)
@@ -234,18 +258,19 @@ class TreeChain
234258
{
235259
bname = it->first.first;
236260
lname = it->first.second;
237-
branch = fChain->FindBranch(bname.c_str());
261+
branch = tree->FindBranch(bname.c_str());
238262
if (branch == NULL)
239263
{
240264
std::cerr << "WARNING: cannot find branch " << bname
241-
<< std::endl;
265+
<< std::endl;
242266
continue;
243267
}
244268
leaf = branch->FindLeaf(lname.c_str());
245269
if (leaf == NULL)
246270
{
247271
std::cerr << "WARNING: cannot find leaf " << lname
248-
<< " for branch " << bname << std::endl;
272+
<< " for branch " << bname
273+
<< std::endl;
249274
continue;
250275
}
251276
it->second->SetLeaf(leaf, true);
@@ -273,7 +298,7 @@ class TreeChain
273298
const std::string& leaf_name,
274299
BranchColumn* column)
275300
{
276-
BL bl = make_pair(branch_name, leaf_name);
301+
BranchLeaf bl = make_pair(branch_name, leaf_name);
277302
leafcache.insert(make_pair(bl, column));
278303
}
279304

@@ -299,15 +324,16 @@ class TreeChain
299324
TObject* oldnotify;
300325
};
301326

302-
TTree* fChain;
303-
int fCurrent;
327+
TTree* tree;
328+
int itree;
304329
long long ientry;
330+
long long cache_size;
305331
MiniNotify* notifier;
306332
std::vector<TTreeFormula*> formulae;
307333

308334
// Branch name to leaf name association
309-
typedef std::pair<std::string, std::string> BL;
310-
typedef std::map<BL, BranchColumn*> LeafCache;
335+
typedef std::pair<std::string, std::string> BranchLeaf;
336+
typedef std::map<BranchLeaf, BranchColumn*> LeafCache;
311337

312338
// Column pointer cache to update leaves
313339
// when new file is loaded in the chain

0 commit comments

Comments
 (0)