2
2
#include " construct_join_graph.h"
3
3
#include " factories.h"
4
4
#include < ydb/library/yql/dq/comp_nodes/ut/utils/utils.h>
5
+ #include < yql/essentials/minikql/computation/mkql_computation_node_holders.h>
5
6
6
7
namespace {
7
8
TVector<ui64> GenerateKeyColumn (i32 size, i32 seed) {
@@ -13,10 +14,10 @@ TVector<ui64> GenerateKeyColumn(i32 size, i32 seed) {
13
14
return keyCoumn;
14
15
}
15
16
16
- NKikimr::NMiniKQL::TInnerJoinDescription PrepareCommonDescription (NKikimr::NMiniKQL::TDqSetup<false >* setup) {
17
+ NKikimr::NMiniKQL::TInnerJoinDescription PrepareSameSizeTables (NKikimr::NMiniKQL::TDqSetup<false >* setup) {
17
18
NKikimr::NMiniKQL::TInnerJoinDescription descr;
18
19
descr.Setup = setup;
19
- const int size = 1 << 14 ;
20
+ const int size = 1 << 16 ;
20
21
21
22
std::tie (descr.LeftSource .ColumnTypes , descr.LeftSource .ValuesList ) = ConvertVectorsToRuntimeTypesAndValue (
22
23
*setup, GenerateKeyColumn (size, 123 ), TVector<ui64>(size, 111 ), TVector<TString>(size, " meow" ));
@@ -25,10 +26,31 @@ NKikimr::NMiniKQL::TInnerJoinDescription PrepareCommonDescription(NKikimr::NMini
25
26
return descr;
26
27
}
27
28
29
+ NKikimr::NMiniKQL::TInnerJoinDescription PrepareSmallRightTable (NKikimr::NMiniKQL::TDqSetup<false >* setup) {
30
+ NKikimr::NMiniKQL::TInnerJoinDescription descr;
31
+ descr.Setup = setup;
32
+ const int leftSize = 1 << 16 ;
33
+ const int rightSize = leftSize >> 7 ;
34
+ std::tie (descr.LeftSource .ColumnTypes , descr.LeftSource .ValuesList ) = ConvertVectorsToRuntimeTypesAndValue (
35
+ *setup, GenerateKeyColumn (leftSize, 123 ), TVector<ui64>(leftSize, 111 ), TVector<TString>(leftSize, " meow" ));
36
+ std::tie (descr.RightSource .ColumnTypes , descr.RightSource .ValuesList ) = ConvertVectorsToRuntimeTypesAndValue (
37
+ *setup, GenerateKeyColumn (rightSize, 111 ), TVector<TString>(rightSize, " woo" ));
38
+ return descr;
39
+ }
40
+
28
41
struct TTestResult {
29
42
TRunResult Run;
30
43
TString TestName;
31
44
};
45
+
46
+ int LineSize (NKikimr::NMiniKQL::ETestedJoinAlgo algo, std::span<const NYql::NUdf::TUnboxedValue> line) {
47
+ if (NKikimr::NMiniKQL::IsBlockJoin (algo)) {
48
+ return NKikimr::NMiniKQL::TArrowBlock::From (line.back ()).GetDatum ().scalar_as <arrow::UInt64Scalar>().value ;
49
+ } else {
50
+ return 1 ;
51
+ }
52
+ }
53
+
32
54
} // namespace
33
55
34
56
void NKikimr::NMiniKQL::RunJoinsBench (const TRunParams& params, TTestResultCollector& printout) {
@@ -44,31 +66,38 @@ void NKikimr::NMiniKQL::RunJoinsBench(const TRunParams& params, TTestResultColle
44
66
{NYKQL::ETestedJoinAlgo::kScalarMap , " ScalarMap" },
45
67
{NYKQL::ETestedJoinAlgo::kBlockMap , " BlockMap" },
46
68
};
69
+ TVector<std::pair<NYKQL::TInnerJoinDescription, std::string_view>> inputs = {
70
+ {PrepareSameSizeTables (&setup), " SameSizeTables" },
71
+ {PrepareSmallRightTable (&setup), " SmallRight" },
72
+ };
47
73
48
- for (auto [algo, name] : cases) {
49
- NYKQL::TInnerJoinDescription descr = PrepareCommonDescription (&setup);
50
- descr.LeftSource .KeyColumnIndexes = keyColumns;
51
- descr.RightSource .KeyColumnIndexes = keyColumns;
52
- THolder<NKikimr::NMiniKQL::IComputationGraph> wideStreamGraph = ConstructInnerJoinGraphStream (algo, descr);
53
- NYql::NUdf::TUnboxedValue wideStream = wideStreamGraph->GetValue ();
54
- std::vector<NYql::NUdf::TUnboxedValue> fetchBuff;
55
- i32 cols = NKikimr::NMiniKQL::ResultColumnCount (algo, descr);
56
- fetchBuff.resize (cols);
57
- Cerr << " Compute graph result for algorithm '" << name << " '" ;
74
+ for (auto [algo, algo_name] : cases) {
75
+ for (auto [descr, descr_name] : inputs) {
76
+ descr.LeftSource .KeyColumnIndexes = keyColumns;
77
+ descr.RightSource .KeyColumnIndexes = keyColumns;
58
78
59
- NYql::NUdf::EFetchStatus fetchStatus;
60
- i64 lineCount = 0 ;
61
- const auto graphTimeStart = GetThreadCPUTime ();
79
+ THolder<NKikimr::NMiniKQL::IComputationGraph> wideStreamGraph = ConstructInnerJoinGraphStream (algo, descr);
80
+ NYql::NUdf::TUnboxedValue wideStream = wideStreamGraph->GetValue ();
81
+ std::vector<NYql::NUdf::TUnboxedValue> fetchBuff;
82
+ ui32 cols = NKikimr::NMiniKQL::ResultColumnCount (algo, descr);
83
+ fetchBuff.resize (cols);
84
+ Cerr << " Compute graph result for algorithm '" << algo_name << " ' and input data '" << descr_name << " '" ;
62
85
63
- while ((fetchStatus = wideStream.WideFetch (fetchBuff.data (), cols)) != NYql::NUdf::EFetchStatus::Finish) {
64
- if (fetchStatus == NYql::NUdf::EFetchStatus::Ok) {
65
- ++lineCount;
86
+ NYql::NUdf::EFetchStatus fetchStatus;
87
+ i64 lineCount = 0 ;
88
+ const auto graphTimeStart = GetThreadCPUTime ();
89
+
90
+ while ((fetchStatus = wideStream.WideFetch (fetchBuff.data (), cols)) != NYql::NUdf::EFetchStatus::Finish) {
91
+ if (fetchStatus == NYql::NUdf::EFetchStatus::Ok) {
92
+ lineCount += LineSize (algo, {fetchBuff.data (), cols});
93
+ }
66
94
}
67
- }
68
- TRunResult thisNodeResult;
95
+ TRunResult thisNodeResult;
69
96
70
- thisNodeResult.ResultTime = GetThreadCPUTimeDelta (graphTimeStart);
71
- Cerr << " . Output line count(block considered to be 1 line): " << lineCount << Endl;
72
- printout.SubmitMetrics (params, thisNodeResult, name.data (), false , false );
97
+ thisNodeResult.ResultTime = GetThreadCPUTimeDelta (graphTimeStart);
98
+ Cerr << " . Output line count(block considered to be 1 line): " << lineCount << Endl;
99
+ std::string testname = std::string{algo_name} + " _" + std::string{descr_name};
100
+ printout.SubmitMetrics (params, thisNodeResult, testname.data (), false , false );
101
+ }
73
102
}
74
103
}
0 commit comments