Skip to content

Commit 7fde8d8

Browse files
committed
GH-46411: [C++] Implemented dataset option in Meson
1 parent 9b96bdb commit 7fde8d8

File tree

4 files changed

+226
-12
lines changed

4 files changed

+226
-12
lines changed

cpp/meson.build

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,24 +52,28 @@ if git_description == '' and not meson.is_subproject()
5252
endif
5353

5454
needs_benchmarks = get_option('benchmarks').enabled()
55-
needs_compute = get_option('compute').enabled()
5655
needs_csv = get_option('csv').enabled()
56+
needs_dataset = get_option('dataset').enabled()
5757
needs_azure = get_option('azure').enabled()
5858
needs_gcs = get_option('gcs').enabled()
5959
needs_hdfs = get_option('hdfs').enabled()
60+
needs_opentelemetry = false
61+
needs_orc = false
6062
needs_parquet = get_option('parquet').enabled()
6163
needs_parquet_encryption = get_option('parquet_require_encryption').enabled()
6264
needs_s3 = get_option('s3').enabled()
6365
needs_filesystem = (get_option('filesystem').enabled()
6466
or needs_azure
67+
or needs_dataset
6568
or needs_gcs
6669
or needs_hdfs
6770
or needs_parquet_encryption
6871
or needs_s3
6972
)
7073
needs_integration = get_option('integration').enabled()
7174
needs_tests = get_option('tests').enabled()
72-
needs_acero = get_option('acero').enabled()
75+
needs_acero = get_option('acero').enabled() or needs_dataset
76+
needs_compute = get_option('compute').enabled() or needs_acero
7377
needs_flight = get_option('flight').enabled()
7478
needs_ipc = (get_option('ipc').enabled()
7579
or needs_tests
@@ -112,3 +116,11 @@ if needs_parquet
112116
subdir('examples/parquet')
113117
endif
114118
endif
119+
120+
if needs_dataset
121+
# Unlike the CMake configuration we need to add dataset support in the top level
122+
# because it potentially requires parquet, which in turn requires arrow.
123+
# When included in the subdir('src/arrow') call with parquet enabled, you end up
124+
# with a circular dependency
125+
subdir('src/arrow/dataset')
126+
endif

cpp/meson.options

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,11 @@ option(
3939
description: 'Build all Arrow Compute kernels',
4040
)
4141
option('csv', type: 'feature', description: 'Build the Arrow CSV Parser Module')
42+
option(
43+
'dataset',
44+
type: 'feature',
45+
description: 'Build the Arrow Dataset Modules',
46+
)
4247
option(
4348
'filesystem',
4449
type: 'feature',

cpp/src/arrow/acero/meson.build

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,16 @@ arrow_acero_dep = declare_dependency(
9090
)
9191
meson.override_dependency('arrow-acero', arrow_acero_dep)
9292

93-
arrow_acero_testing_sources = ['test_nodes.cc', 'test_util_internal.cc']
93+
arrow_acero_test_sources = ['test_nodes.cc', 'test_util_internal.cc']
94+
arrow_acero_test_lib = static_library(
95+
'arrow-acero-testing',
96+
sources: arrow_acero_test_sources,
97+
dependencies: [arrow_acero_dep, arrow_compute_test_dep],
98+
)
99+
arrow_acero_test_dep = declare_dependency(
100+
link_with: [arrow_acero_test_lib],
101+
dependencies: [arrow_acero_dep, arrow_compute_test_dep],
102+
)
94103

95104
arrow_acero_tests = {
96105
'plan-test': {'sources': ['plan_test.cc', 'test_nodes_test.cc']},
@@ -114,8 +123,8 @@ arrow_acero_tests = {
114123
foreach key, val : arrow_acero_tests
115124
exc = executable(
116125
'arrow-acero-@0@'.format(key),
117-
sources: val['sources'] + arrow_acero_testing_sources,
118-
dependencies: [arrow_acero_dep, arrow_compute_test_dep],
126+
sources: val['sources'],
127+
dependencies: [arrow_acero_test_dep],
119128
)
120129
test(key, exc)
121130
endforeach
@@ -137,13 +146,8 @@ arrow_acero_benchmarks = {
137146
foreach key, val : arrow_acero_benchmarks
138147
exc = executable(
139148
key,
140-
sources: val['sources'] + arrow_acero_testing_sources,
141-
dependencies: [
142-
arrow_acero_dep,
143-
arrow_compute_test_dep,
144-
arrow_benchmark_dep,
145-
gmock_dep,
146-
],
149+
sources: val['sources'],
150+
dependencies: [arrow_acero_test_dep, arrow_benchmark_dep, gmock_dep],
147151
)
148152
benchmark(key, exc)
149153
endforeach

cpp/src/arrow/dataset/meson.build

Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
install_headers(
19+
[
20+
'api.h',
21+
'dataset.h',
22+
'dataset_writer.h',
23+
'discovery.h',
24+
'file_base.h',
25+
'file_csv.h',
26+
'file_ipc.h',
27+
'file_json.h',
28+
'file_orc.h',
29+
'file_parquet.h',
30+
'parquet_encryption_config.h',
31+
'partition.h',
32+
'plan.h',
33+
'projector.h',
34+
'scanner.h',
35+
'type_fwd.h',
36+
'visibility.h',
37+
],
38+
subdir: 'arrow/dataset',
39+
)
40+
41+
arrow_dataset_srcs = files(
42+
'dataset.cc',
43+
'dataset_writer.cc',
44+
'discovery.cc',
45+
'file_base.cc',
46+
'file_ipc.cc',
47+
'partition.cc',
48+
'plan.cc',
49+
'projector.cc',
50+
'scan_node.cc',
51+
'scanner.cc',
52+
)
53+
54+
arrow_dataset_deps = [arrow_acero_dep, arrow_compute_dep]
55+
arrow_pkgconfig_requires = ['arrow-acero', 'arrow-compute']
56+
if needs_csv
57+
arrow_dataset_srcs += ['file_csv.cc']
58+
endif
59+
60+
if needs_json
61+
arrow_dataset_srcs += ['file_json.cc']
62+
endif
63+
64+
if needs_orc
65+
arrow_dataset_srcs += ['file_orc.cc']
66+
endif
67+
68+
if needs_parquet
69+
arrow_dataset_srcs += ['file_parquet.cc']
70+
arrow_dataset_deps += [parquet_dep]
71+
arrow_pkgconfig_requires += ['parquet']
72+
endif
73+
74+
if needs_opentelemetry
75+
arrow_dataset_deps += [opentelemetry_dep]
76+
endif
77+
78+
arrow_dataset_lib = library(
79+
'arrow_dataset',
80+
sources: arrow_dataset_srcs,
81+
dependencies: arrow_dataset_deps,
82+
cpp_static_args: ['-DARROW_DS_STATIC'],
83+
cpp_shared_args: ['-DARROW_DS_EXPORTING'],
84+
gnu_symbol_visibility: 'inlineshidden',
85+
)
86+
87+
arrow_dataset_args = []
88+
if get_option('default_library') == 'static'
89+
arrow_dataset_args += ['-DARROW_DS_STATIC']
90+
endif
91+
92+
arrow_dataset_dep = declare_dependency(
93+
link_with: [arrow_dataset_lib],
94+
dependencies: arrow_dataset_deps,
95+
compile_args: arrow_dataset_args,
96+
)
97+
meson.override_dependency('arrow-dataset', arrow_dataset_dep)
98+
99+
pkg_config_cflags = get_option('default_library') == 'static' ? '-DARROW_DS_STATIC' : ''
100+
pkg_config_cflags_private = get_option('default_library') != 'static' ? '-DARROW_DS_STATIC' : ''
101+
pkg.generate(
102+
arrow_dataset_lib,
103+
filebase: 'arrow-dataset',
104+
name: 'Apache Arrow Dataset',
105+
description: 'Apache Arrow Dataset provides an API to read and write semantic datasets stored in different locations and formats.',
106+
extra_cflags: [pkg_config_cflags],
107+
requires: arrow_pkgconfig_requires,
108+
variables: {'Cflags.private': pkg_config_cflags_private},
109+
)
110+
111+
if needs_testing
112+
arrow_dataset_testing_lib = static_library(
113+
'arrow_dataset_testing',
114+
sources: ['test_util_internal.cc'],
115+
dependencies: [arrow_dataset_dep, arrow_acero_test_dep],
116+
)
117+
arrow_dataset_test_dep = declare_dependency(
118+
link_with: [arrow_dataset_testing_lib],
119+
dependencies: [arrow_dataset_dep, arrow_acero_test_dep],
120+
)
121+
else
122+
arrow_dataset_test_dep = disabler()
123+
endif
124+
125+
dataset_tests = {
126+
'dataset': {'sources': ['dataset_test.cc']},
127+
'dataset_writer': {'sources': ['dataset_writer_test.cc']},
128+
'discovery': {'sources': ['discovery_test.cc']},
129+
'file_ipc': {'sources': ['file_ipc_test.cc']},
130+
'file': {'sources': ['file_test.cc']},
131+
'partition': {'sources': ['partition_test.cc']},
132+
'scanner': {'sources': ['scanner_test.cc']},
133+
'subtree': {'sources': ['subtree_test.cc']},
134+
'write_node': {'sources': ['write_node_test.cc']},
135+
}
136+
137+
if needs_csv
138+
dataset_tests += {'file_csv': {'sources': ['file_csv_test.cc']}}
139+
endif
140+
141+
if needs_json
142+
dataset_tests += {
143+
'file_json': {
144+
'sources': ['file_json_test.cc'],
145+
'dependencies': [rapidjson_dep],
146+
},
147+
}
148+
endif
149+
150+
if needs_orc
151+
dataset_tests += {
152+
'file_orc': {'sources': ['file_orc_test.cc'], 'dependencies': [orc_dep]},
153+
}
154+
endif
155+
156+
if needs_parquet
157+
dataset_tests += {'file_parquet': {'sources': ['file_parquet_test.cc']}}
158+
if needs_parquet_encryption
159+
dataset_tests += {
160+
'file_parquet_encryption': {
161+
'sources': [
162+
'file_parquet_encryption_test.cc',
163+
meson.project_source_root() / 'src/parquet/encryption/test_in_memory_kms.cc',
164+
],
165+
},
166+
}
167+
endif
168+
endif
169+
170+
foreach key, value : dataset_tests
171+
test_name = 'arrow-dataset-@0@'.format(key.replace('_', '-'))
172+
exc = executable(
173+
test_name,
174+
sources: value['sources'],
175+
dependencies: [arrow_dataset_test_dep, val.get('dependencies', [])],
176+
)
177+
test(test_name, exc)
178+
endforeach
179+
180+
dataset_benchmarks = ['file', 'scanner']
181+
foreach benchmark : dataset_benchmarks
182+
benchmark_name = f'arrow-dataset-@benchmark@-benchmark'
183+
exc = executable(
184+
benchmark_name,
185+
sources: [f'@benchmark@_test.cc'],
186+
dependencies: [
187+
arrow_dataset_dep,
188+
arrow_benchmark_dep,
189+
arrow_compute_core_test_dep,
190+
],
191+
)
192+
benchmark(benchmark_name, exc)
193+
endforeach

0 commit comments

Comments
 (0)