Skip to content

Commit 903dbc7

Browse files
authored
fix process allocation (#24721)
1 parent 8f51162 commit 903dbc7

File tree

6 files changed

+90
-66
lines changed

6 files changed

+90
-66
lines changed

ydb/core/tx/columnshard/counters/duplicate_filtering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ TDuplicateFilteringCounters::TDuplicateFilteringCounters()
66
, MergeRowsAccepted(TBase::GetDeriviative("DuplicateFiltering/SourcesMerging/RowsAccepted"))
77
, MergeRowsRejected(TBase::GetDeriviative("DuplicateFiltering/SourcesMerging/RowsRejected"))
88
, MergeRowsBulkAccepted(TBase::GetDeriviative("DuplicateFiltering/SourcesMerging/RowsBulkAccepted"))
9-
, IntersectingPortionsPerRequest(TBase::GetHistogram("DuplicateFiltering/IntersectingPortions", NMonitoring::ExponentialHistogram(18, 2, 8)))
9+
, IntersectingPortionsPerRequest(TBase::GetHistogram("DuplicateFiltering/IntersectingPortions", NMonitoring::ExponentialHistogram(18, 2, 1)))
1010
, FilterCacheHits(TBase::GetDeriviative("DuplicateFiltering/FilterCache/Hits"))
1111
, FilterCacheMisses(TBase::GetDeriviative("DuplicateFiltering/FilterCache/Misses"))
1212
{

ydb/core/tx/columnshard/engines/reader/simple_reader/duplicates/context.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,15 @@ namespace NKikimr::NOlap::NReader::NSimple::NDuplicateFiltering {
44

55
TFilterAccumulator::TFilterAccumulator(const TEvRequestFilter::TPtr& request)
66
: OriginalRequest(request)
7-
, ProcessGuard(NGroupedMemoryManager::TDeduplicationMemoryLimiterOperator::BuildProcessGuard(GetStageFeatures()))
7+
{
8+
AFL_VERIFY(!!OriginalRequest);
9+
}
10+
11+
TFilterBuildingGuard::TFilterBuildingGuard()
12+
: ProcessGuard(NGroupedMemoryManager::TDeduplicationMemoryLimiterOperator::BuildProcessGuard(GetStageFeatures()))
813
, ScopeGuard(ProcessGuard->BuildScopeGuard(1))
914
, GroupGuard(ScopeGuard->BuildGroupGuard())
1015
{
11-
AFL_VERIFY(!!OriginalRequest);
1216
}
1317

1418
} // namespace NKikimr::NOlap::NReader::NSimple::NDuplicateFiltering

ydb/core/tx/columnshard/engines/reader/simple_reader/duplicates/context.h

Lines changed: 33 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,35 @@
88

99
namespace NKikimr::NOlap::NReader::NSimple::NDuplicateFiltering {
1010

11+
class TFilterBuildingGuard: TMoveOnly {
12+
private:
13+
const std::shared_ptr<NGroupedMemoryManager::TProcessGuard> ProcessGuard;
14+
const std::shared_ptr<NGroupedMemoryManager::TScopeGuard> ScopeGuard;
15+
const std::shared_ptr<NGroupedMemoryManager::TGroupGuard> GroupGuard;
16+
17+
static std::vector<std::shared_ptr<NGroupedMemoryManager::TStageFeatures>> GetStageFeatures() {
18+
static const std::vector<std::shared_ptr<NGroupedMemoryManager::TStageFeatures>> StageFeatures = {
19+
NGroupedMemoryManager::TDeduplicationMemoryLimiterOperator::BuildStageFeatures("INTERSECTIONS", 10000000), // 10 MiB
20+
NGroupedMemoryManager::TDeduplicationMemoryLimiterOperator::BuildStageFeatures("ACCESSORS", 100000000), // 100 MiB
21+
NGroupedMemoryManager::TDeduplicationMemoryLimiterOperator::BuildStageFeatures("COLUMN_DATA", 10000000000), // 10 GiB
22+
};
23+
return StageFeatures;
24+
}
25+
26+
public:
27+
ui64 GetMemoryProcessId() const {
28+
return ProcessGuard->GetProcessId();
29+
}
30+
ui64 GetMemoryScopeId() const {
31+
return ScopeGuard->GetScopeId();
32+
}
33+
ui64 GetMemoryGroupId() const {
34+
return GroupGuard->GetGroupId();
35+
}
36+
37+
TFilterBuildingGuard();
38+
};
39+
1140
class TFilterAccumulator: TMoveOnly {
1241
public:
1342
enum class EFetchingStage {
@@ -18,9 +47,6 @@ class TFilterAccumulator: TMoveOnly {
1847

1948
private:
2049
const TEvRequestFilter::TPtr OriginalRequest;
21-
const std::shared_ptr<NGroupedMemoryManager::TProcessGuard> ProcessGuard;
22-
const std::shared_ptr<NGroupedMemoryManager::TScopeGuard> ScopeGuard;
23-
const std::shared_ptr<NGroupedMemoryManager::TGroupGuard> GroupGuard;
2450
bool Done = false;
2551

2652
std::vector<std::optional<NArrow::TColumnFilter>> Filters;
@@ -48,15 +74,6 @@ class TFilterAccumulator: TMoveOnly {
4874
}
4975

5076
public:
51-
static std::vector<std::shared_ptr<NGroupedMemoryManager::TStageFeatures>> GetStageFeatures() {
52-
static const std::vector<std::shared_ptr<NGroupedMemoryManager::TStageFeatures>> StageFeatures = {
53-
NGroupedMemoryManager::TDeduplicationMemoryLimiterOperator::BuildStageFeatures("INTERSECTIONS", 10000000), // 10 MiB
54-
NGroupedMemoryManager::TDeduplicationMemoryLimiterOperator::BuildStageFeatures("ACCESSORS", 100000000), // 100 MiB
55-
NGroupedMemoryManager::TDeduplicationMemoryLimiterOperator::BuildStageFeatures("COLUMN_DATA", 10000000000), // 10 GiB
56-
};
57-
return StageFeatures;
58-
}
59-
6077
void SetIntervalsCount(const ui32 cnt) {
6178
AFL_VERIFY(Filters.empty());
6279
AFL_VERIFY(cnt);
@@ -106,16 +123,6 @@ class TFilterAccumulator: TMoveOnly {
106123
return sb;
107124
}
108125

109-
ui64 GetMemoryProcessId() const {
110-
return ProcessGuard->GetProcessId();
111-
}
112-
ui64 GetMemoryScopeId() const {
113-
return ScopeGuard->GetScopeId();
114-
}
115-
ui64 GetMemoryGroupId() const {
116-
return GroupGuard->GetGroupId();
117-
}
118-
119126
ui64 GetDataSize() const {
120127
return Filters.capacity() * sizeof(std::optional<NArrow::TColumnFilter>);
121128
}
@@ -135,14 +142,15 @@ class TBuildFilterContext: NColumnShard::TMonitoringObjectsCounter<TBuildFilterC
135142
YDB_READONLY_DEF(std::shared_ptr<NColumnFetching::TColumnDataManager>, ColumnDataManager);
136143
YDB_READONLY_DEF(std::shared_ptr<NDataAccessorControl::IDataAccessorsManager>, DataAccessorsManager);
137144
YDB_READONLY_DEF(std::shared_ptr<NColumnShard::TDuplicateFilteringCounters>, Counters);
145+
YDB_READONLY_DEF(std::unique_ptr<TFilterBuildingGuard>, RequestGuard);
138146
std::shared_ptr<NGroupedMemoryManager::TAllocationGuard> SelfMemory;
139147

140148
public:
141149
TBuildFilterContext(const TActorId owner, const std::shared_ptr<TFilterAccumulator>& context, TPortionIndex&& portions,
142150
std::vector<std::pair<TColumnDataSplitter::TBorder, TColumnDataSplitter::TBorder>>&& intervals, const TFieldByColumn& columns,
143151
const std::shared_ptr<arrow::Schema>& pkSchema, const std::shared_ptr<NColumnFetching::TColumnDataManager>& columnDataManager,
144152
const std::shared_ptr<NDataAccessorControl::IDataAccessorsManager>& dataAccessorsManager,
145-
const std::shared_ptr<NColumnShard::TDuplicateFilteringCounters>& counters,
153+
const std::shared_ptr<NColumnShard::TDuplicateFilteringCounters>& counters, std::unique_ptr<TFilterBuildingGuard>&& requestGuard,
146154
const std::shared_ptr<NGroupedMemoryManager::TAllocationGuard>& contextMemory)
147155
: Owner(owner)
148156
, Context(context)
@@ -153,6 +161,7 @@ class TBuildFilterContext: NColumnShard::TMonitoringObjectsCounter<TBuildFilterC
153161
, ColumnDataManager(columnDataManager)
154162
, DataAccessorsManager(dataAccessorsManager)
155163
, Counters(counters)
164+
, RequestGuard(std::move(requestGuard))
156165
, SelfMemory(contextMemory)
157166
{
158167
AFL_VERIFY(Owner);
@@ -163,7 +172,7 @@ class TBuildFilterContext: NColumnShard::TMonitoringObjectsCounter<TBuildFilterC
163172
AFL_VERIFY(ColumnDataManager);
164173
AFL_VERIFY(DataAccessorsManager);
165174
AFL_VERIFY(Counters);
166-
// AFL_VERIFY(SelfMemory); // may be null
175+
AFL_VERIFY(SelfMemory);
167176
}
168177

169178
std::set<ui32> GetFetchingColumnIds() const {

ydb/core/tx/columnshard/engines/reader/simple_reader/duplicates/manager.cpp

Lines changed: 39 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -102,10 +102,9 @@ class TColumnDataAccessorFetching: public IDataAccessorRequestsSubscriber {
102102
mem += accessor->GetColumnRawBytes(Request.GetFetchingColumnIds(), false);
103103
}
104104

105-
auto context = Request.GetContext();
106-
NGroupedMemoryManager::TDeduplicationMemoryLimiterOperator::SendToAllocation(context->GetMemoryProcessId(), context->GetMemoryScopeId(),
107-
context->GetMemoryGroupId(), { std::make_shared<TColumnDataAllocation>(std::move(Request), mem) },
108-
(ui64)TFilterAccumulator::EFetchingStage::COLUMN_DATA);
105+
NGroupedMemoryManager::TDeduplicationMemoryLimiterOperator::SendToAllocation(Request.GetRequestGuard()->GetMemoryProcessId(),
106+
Request.GetRequestGuard()->GetMemoryScopeId(), Request.GetRequestGuard()->GetMemoryGroupId(),
107+
{ std::make_shared<TColumnDataAllocation>(std::move(Request), mem) }, (ui64)TFilterAccumulator::EFetchingStage::COLUMN_DATA);
109108
}
110109
virtual const std::shared_ptr<const TAtomicCounter>& DoGetAbortionFlag() const override {
111110
return Request.GetContext()->GetRequest()->Get()->GetAbortionFlag();
@@ -161,22 +160,25 @@ class TPortionIntersectionsAllocation: public NGroupedMemoryManager::IAllocation
161160
private:
162161
TActorId Owner;
163162
std::shared_ptr<TFilterAccumulator> Request;
163+
YDB_READONLY_DEF(std::unique_ptr<TFilterBuildingGuard>, RequestGuard);
164164

165165
private:
166166
virtual void DoOnAllocationImpossible(const TString& errorMessage) override {
167167
Request->Abort(TStringBuilder() << "cannot allocate memory: " << errorMessage);
168168
}
169169
virtual bool DoOnAllocated(std::shared_ptr<NGroupedMemoryManager::TAllocationGuard>&& guard,
170170
const std::shared_ptr<NGroupedMemoryManager::IAllocation>& /*allocation*/) override {
171-
TActorContext::AsActorContext().Send(Owner, new NPrivate::TEvFilterRequestResourcesAllocated(Request, guard));
171+
TActorContext::AsActorContext().Send(Owner, new NPrivate::TEvFilterRequestResourcesAllocated(Request, guard, std::move(RequestGuard)));
172172
return true;
173173
}
174174

175175
public:
176-
TPortionIntersectionsAllocation(const TActorId& owner, const std::shared_ptr<TFilterAccumulator>& request, const ui64 mem)
176+
TPortionIntersectionsAllocation(const TActorId& owner, const std::shared_ptr<TFilterAccumulator>& request, const ui64 mem,
177+
std::unique_ptr<TFilterBuildingGuard>&& requestGuard)
177178
: NGroupedMemoryManager::IAllocation(mem)
178179
, Owner(owner)
179180
, Request(request)
181+
, RequestGuard(std::move(requestGuard))
180182
{
181183
}
182184
};
@@ -200,26 +202,35 @@ TDuplicateManager::TDuplicateManager(const TSpecialReadContext& context, const s
200202
{
201203
}
202204

203-
void TDuplicateManager::Handle(const TEvRequestFilter::TPtr& ev) {
204-
auto constructor = std::make_shared<TFilterAccumulator>(ev);
205-
TPortionInfo::TConstPtr mainPortion = Portions->GetPortionVerified(constructor->GetRequest()->Get()->GetSourceId());
206-
static constexpr ui64 LOW_INTERSECTIONS_LIMIT = 10;
205+
bool TDuplicateManager::IsExclusiveInterval(const NArrow::TSimpleRow& begin, const NArrow::TSimpleRow& end) const {
207206
ui64 intersectionsCount = 0;
208-
Intervals.EachIntersection(TPortionIntervalTree::TRange(mainPortion->IndexKeyStart(), true, mainPortion->IndexKeyEnd(), true),
207+
return Intervals.EachIntersection(TPortionIntervalTree::TRange(begin, true, end, true),
209208
[&intersectionsCount](const TPortionIntervalTree::TRange& /*interval*/, const std::shared_ptr<TPortionInfo>& /*portion*/) {
210209
++intersectionsCount;
211-
return intersectionsCount <= LOW_INTERSECTIONS_LIMIT;
210+
return intersectionsCount == 1;
212211
});
213-
ExpectedIntersectionCount = std::max(ExpectedIntersectionCount, intersectionsCount);
214-
if (intersectionsCount <= LOW_INTERSECTIONS_LIMIT) {
215-
Send(SelfId(), new NPrivate::TEvFilterRequestResourcesAllocated(constructor, nullptr));
216-
} else {
217-
NGroupedMemoryManager::TDeduplicationMemoryLimiterOperator::SendToAllocation(constructor->GetMemoryProcessId(),
218-
constructor->GetMemoryScopeId(), constructor->GetMemoryGroupId(),
219-
{ std::make_shared<TPortionIntersectionsAllocation>(
220-
SelfId(), constructor, TBuildFilterContext::GetApproximateDataSize(ExpectedIntersectionCount)) },
221-
(ui64)TFilterAccumulator::EFetchingStage::INTERSECTIONS);
212+
}
213+
214+
void TDuplicateManager::Handle(const TEvRequestFilter::TPtr& ev) {
215+
auto constructor = std::make_shared<TFilterAccumulator>(ev);
216+
TPortionInfo::TConstPtr mainPortion = Portions->GetPortionVerified(constructor->GetRequest()->Get()->GetSourceId());
217+
if (IsExclusiveInterval(mainPortion->IndexKeyStart(), mainPortion->IndexKeyEnd())) {
218+
auto filter = NArrow::TColumnFilter::BuildAllowFilter();
219+
filter.Add(true, mainPortion->GetRecordsCount());
220+
constructor->SetIntervalsCount(1);
221+
constructor->AddFilter(0, std::move(filter));
222+
AFL_VERIFY(constructor->IsDone());
223+
Counters->OnFilterRequest(1);
224+
Counters->OnRowsMerged(0, 0, mainPortion->GetRecordsCount());
225+
return;
222226
}
227+
228+
auto task = std::make_shared<TPortionIntersectionsAllocation>(
229+
SelfId(), constructor, TBuildFilterContext::GetApproximateDataSize(ExpectedIntersectionCount), std::make_unique<TFilterBuildingGuard>());
230+
NGroupedMemoryManager::TDeduplicationMemoryLimiterOperator::SendToAllocation(task->GetRequestGuard()->GetMemoryProcessId(),
231+
task->GetRequestGuard()->GetMemoryScopeId(), task->GetRequestGuard()->GetMemoryGroupId(), { task },
232+
(ui64)TFilterAccumulator::EFetchingStage::INTERSECTIONS);
233+
return;
223234
}
224235

225236
void TDuplicateManager::StartIntervalProcessing(const THashMap<ui64, TPortionInfo::TConstPtr>& intersectingPortions,
@@ -271,6 +282,7 @@ void TDuplicateManager::StartIntervalProcessing(const THashMap<ui64, TPortionInf
271282
void TDuplicateManager::Handle(const NPrivate::TEvFilterRequestResourcesAllocated::TPtr& ev) {
272283
std::shared_ptr<TFilterAccumulator> constructor = ev->Get()->GetRequest();
273284
std::shared_ptr<NGroupedMemoryManager::TAllocationGuard> memoryGuard = ev->Get()->ExtractAllocationGuard();
285+
auto requestGuard = ev->Get()->ExtractRequestGuard();
274286

275287
THashMap<ui64, TPortionInfo::TConstPtr> intersectingPortions;
276288
const std::shared_ptr<const TPortionInfo>& mainPortion = Portions->GetPortionVerified(constructor->GetRequest()->Get()->GetSourceId());
@@ -290,16 +302,6 @@ void TDuplicateManager::Handle(const NPrivate::TEvFilterRequestResourcesAllocate
290302
("source", constructor->GetRequest()->Get()->GetSourceId())("intersecting_portions", intersectingPortions.size());
291303
AFL_VERIFY(intersectingPortions.size());
292304

293-
if (intersectingPortions.size() == 1) {
294-
AFL_VERIFY((*intersectingPortions.begin()).first == mainPortion->GetPortionId());
295-
auto filter = NArrow::TColumnFilter::BuildAllowFilter();
296-
filter.Add(true, mainPortion->GetRecordsCount());
297-
constructor->SetIntervalsCount(1);
298-
constructor->AddFilter(0, std::move(filter));
299-
AFL_VERIFY(constructor->IsDone());
300-
return;
301-
}
302-
303305
THashSet<ui64> portionIdsToFetch;
304306
std::vector<std::pair<TColumnDataSplitter::TBorder, TColumnDataSplitter::TBorder>> intervalsToBuild;
305307
StartIntervalProcessing(intersectingPortions, constructor, portionIdsToFetch, intervalsToBuild);
@@ -314,13 +316,13 @@ void TDuplicateManager::Handle(const NPrivate::TEvFilterRequestResourcesAllocate
314316
AFL_VERIFY(portionIdsToFetch.contains(mainPortion->GetPortionId()))("main_portion", mainPortion->GetPortionId())(
315317
"required_portions", JoinSeq(',', portionIdsToFetch));
316318
TBuildFilterContext columnFetchingRequest(SelfId(), constructor, std::move(portionsToFetch), std::move(intervalsToBuild),
317-
GetFetchingColumns(), PKSchema, ColumnDataManager, DataAccessorsManager, Counters, memoryGuard);
318-
if (memoryGuard) {
319-
memoryGuard->Update(columnFetchingRequest.GetDataSize());
320-
}
319+
GetFetchingColumns(), PKSchema, ColumnDataManager, DataAccessorsManager, Counters, std::move(requestGuard), memoryGuard);
320+
memoryGuard->Update(columnFetchingRequest.GetDataSize());
321321
const ui64 mem = TColumnDataAccessorFetching::GetRequiredMemory(columnFetchingRequest, LastSchema);
322-
NGroupedMemoryManager::TDeduplicationMemoryLimiterOperator::SendToAllocation(constructor->GetMemoryProcessId(),
323-
constructor->GetMemoryScopeId(), constructor->GetMemoryGroupId(),
322+
const ui64 processId = columnFetchingRequest.GetRequestGuard()->GetMemoryProcessId();
323+
const ui64 scopeId = columnFetchingRequest.GetRequestGuard()->GetMemoryScopeId();
324+
const ui64 groupId = columnFetchingRequest.GetRequestGuard()->GetMemoryGroupId();
325+
NGroupedMemoryManager::TDeduplicationMemoryLimiterOperator::SendToAllocation(processId, scopeId, groupId,
324326
{ std::make_shared<TDataAccessorAllocation>(std::move(columnFetchingRequest), mem) },
325327
(ui64)TFilterAccumulator::EFetchingStage::ACCESSORS);
326328
}

ydb/core/tx/columnshard/engines/reader/simple_reader/duplicates/manager.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,8 @@ class TDuplicateManager: public NActors::TActor<TDuplicateManager> {
9393
return std::make_shared<TPortionStore>(std::move(portions));
9494
}
9595

96+
bool IsExclusiveInterval(const NArrow::TSimpleRow& begin, const NArrow::TSimpleRow& end) const;
97+
9698
private:
9799
STATEFN(StateMain) {
98100
switch (ev->GetTypeRewrite()) {

ydb/core/tx/columnshard/engines/reader/simple_reader/duplicates/private_events.h

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,18 +13,25 @@ class TEvFilterRequestResourcesAllocated
1313
private:
1414
YDB_READONLY_DEF(std::shared_ptr<TFilterAccumulator>, Request);
1515
std::shared_ptr<NGroupedMemoryManager::TAllocationGuard> AllocationGuard;
16+
std::unique_ptr<TFilterBuildingGuard> RequestGuard;
1617

1718
public:
18-
TEvFilterRequestResourcesAllocated(
19-
const std::shared_ptr<TFilterAccumulator>& request, const std::shared_ptr<NGroupedMemoryManager::TAllocationGuard>& guard)
19+
TEvFilterRequestResourcesAllocated(const std::shared_ptr<TFilterAccumulator>& request,
20+
const std::shared_ptr<NGroupedMemoryManager::TAllocationGuard>& guard, std::unique_ptr<TFilterBuildingGuard>&& requestGuard)
2021
: Request(request)
2122
, AllocationGuard(guard)
23+
, RequestGuard(std::move(requestGuard))
2224
{
25+
AFL_VERIFY(RequestGuard);
2326
}
2427

2528
std::shared_ptr<NGroupedMemoryManager::TAllocationGuard>&& ExtractAllocationGuard() {
2629
return std::move(AllocationGuard);
2730
}
31+
std::unique_ptr<TFilterBuildingGuard>&& ExtractRequestGuard() {
32+
AFL_VERIFY(RequestGuard);
33+
return std::move(RequestGuard);
34+
}
2835
};
2936

3037
class TEvFilterConstructionResult

0 commit comments

Comments
 (0)