Skip to content

Commit 03453e2

Browse files
authored
Test cases for schema change due to empty batch (#314)
1 parent 9ed9069 commit 03453e2

File tree

131 files changed

+854
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

131 files changed

+854
-0
lines changed

conf/plugin-templates/dfs-storage-plugin.template

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,11 @@
4545
"writable" : true,
4646
"defaultInputFormat" : "parquet"
4747
},
48+
"drillTestDirSchemaChangeEmptyBatch" : {
49+
"location" : "/drill/testdata/schema_change_empty_batch/",
50+
"writable" : true,
51+
"defaultInputFormat" : null
52+
},
4853
"Join" : {
4954
"location" : "/drill/testdata/join",
5055
"writable" : true,

conf/plugin-templates/mfs-storage-plugin.template

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,11 @@
5959
"location":"/drill/testdata/tpcds_sf100/maprdb/json/views",
6060
"writable":true,
6161
"defaultInputFormat":"maprdb"
62+
},
63+
"drillTestDirSchemaChangeEmptyBatch" : {
64+
"location" : "/drill/testdata/schema_change_empty_batch/",
65+
"writable" : true,
66+
"defaultInputFormat" : "maprdb"
6267
}
6368
},
6469
"formats":{

framework/pom.xml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,19 @@
126126
<outputDirectory>${project.basedir}/resources/Functional/Failing/data-shapes/wide-columns/5000/1000rows/parquet/</outputDirectory>
127127
</configuration>
128128
</execution>
129+
<execution>
130+
<id>schema_change_empty_batch</id>
131+
<phase>prepare-package</phase>
132+
<goals>
133+
<goal>wget</goal>
134+
</goals>
135+
<configuration>
136+
<url>https://s3.amazonaws.com/apache-drill/files/schema_change_empty_batch.tgz</url>
137+
<outputFileName>schema_change_empty_batch.tgz</outputFileName>
138+
<unpack>true</unpack>
139+
<outputDirectory>${project.basedir}/resources/Datasources/schema_change_empty_batch/</outputDirectory>
140+
</configuration>
141+
</execution>
129142
<execution>
130143
<id>func-passing-widestrings-large</id>
131144
<phase>prepare-package</phase>
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#!/bin/sh
2+
source conf/drillTestConfig.properties
3+
4+
set -x
5+
set -e
6+
7+
if ! $(hadoop fs -test -d ${DRILL_TESTDATA}/schema_change_empty_batch/hbase/data)
8+
then
9+
10+
hadoop fs -mkdir -p ${DRILL_TESTDATA}/schema_change_empty_batch/hbase/data
11+
12+
hadoop fs -put ${DRILL_TEST_DATA_DIR}/Datasources/schema_change_empty_batch/data/hbase/* ${DRILL_TESTDATA}/schema_change_empty_batch/hbase/data/
13+
14+
fi
15+
16+
set +x
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
#!/bin/bash
2+
3+
# Schema change empty batch tests on Hbase tables
4+
table1="schema_change_empty_batch_part"
5+
table2="schema_change_empty_batch_partsupp"
6+
table3="schema_change_empty_batch_empty"
7+
table4="browser_action2"
8+
9+
# Create HBase Tables
10+
11+
echo "[INFO] Dropping existing '${table1}'"
12+
13+
echo "disable '${table1}' ; drop '${table1}'" |hbase shell
14+
15+
echo "[INFO] Creating '${table1}'"
16+
17+
echo "create '${table1}', 'data', {SPLITS => ['1', '10000', '15000', '25000', '99999']}" |hbase shell
18+
19+
echo "[INFO] Dropping existing '${table2}'"
20+
21+
echo "disable '${table2}' ; drop '${table2}'" |hbase shell
22+
23+
echo "[INFO] Creating '${table2}'"
24+
25+
echo "create '${table2}', 'data', {SPLITS => ['1', '10000', '50000', '75000', '100000', '999999']}" |hbase shell
26+
27+
echo "[INFO] Dropping existing '${table3}'"
28+
29+
echo "disable '${table3}' ; drop '${table3}'" |hbase shell
30+
31+
echo "[INFO] Creating '${table3}'"
32+
33+
echo "create '${table3}', 'data', {SPLITS => ['1', '1000']}" |hbase shell
34+
35+
echo "[INFO] Dropping existing '${table4}'"
36+
37+
echo "disable '${table4}' ; drop '${table4}'" |hbase shell
38+
39+
echo "[INFO] Creating '${table4}'"
40+
41+
echo "create '${table4}', 'v', {SPLITS => ['0','1','2','3','4','5','6','7','8','9']}" |hbase shell
42+
43+
echo "[INFO] Inserting rows into '${table4}'"
44+
45+
echo "put 'browser_action2', '1','v:e0', 'abc1'" |hbase shell
46+
echo "put 'browser_action2', '2','v:e0', 'abc2'" |hbase shell
47+
echo "put 'browser_action2', '3','v:e0', 'abc3'" |hbase shell
48+
echo "put 'browser_action2', '4','v:e0', 'abc4'" |hbase shell
49+
echo "put 'browser_action2', '5','v:e0', 'abc5'" |hbase shell
50+
echo "put 'browser_action2', '6','v:e0', 'abc6'" |hbase shell
51+
echo "put 'browser_action2', '7','v:e0', 'abc7'" |hbase shell
52+
echo "put 'browser_action2', '8','v:e0', 'abc8'" |hbase shell
53+
echo "put 'browser_action2', '9','v:e0', 'abc9'" |hbase shell
54+
echo "put 'browser_action2', '10','v:e0', 'abc10'" |hbase shell
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
/* ---- Part data ---- */
2+
-- Reading PSV Data
3+
PART_DATA = LOAD '${DRILL_TESTDATA}/schema_change_empty_batch/hbase/part.tbl' USING PigStorage('|') as
4+
(
5+
row_key:long,
6+
p_partkey:long,
7+
p_name:chararray,
8+
p_mfgr:chararray,
9+
p_brand:chararray,
10+
p_type:chararray,
11+
p_size:int,
12+
p_container:chararray,
13+
p_retailprice:double,
14+
p_comment:chararray
15+
);
16+
17+
-- Writing HBase Data [Implicitly, first column is ROW_KEY]
18+
STORE PART_DATA INTO 'schema_change_empty_batch_part' USING org.apache.pig.backend.hadoop.hbase.HBaseStorage
19+
('
20+
data:p_partkey,
21+
data:p_name,
22+
data:p_mfgr,
23+
data:p_brand,
24+
data:p_type,
25+
data:p_size,
26+
data:p_container,
27+
data:p_retailprice,
28+
data:p_comment
29+
');
30+
31+
/* ---- Partsupp data ---- */
32+
-- Reading PSV Data
33+
PARTSUPP_DATA = LOAD '${DRILL_TESTDATA}/schema_change_empty_batch/hbase/partsupp.tbl' USING PigStorage('|') as
34+
(
35+
row_key:long,
36+
ps_partkey:long,
37+
ps_suppkey:long,
38+
ps_availqty:int,
39+
ps_supplycost:double,
40+
ps_comment:chararray
41+
);
42+
43+
-- Writing Hbase Data [Implicitly, first column is ROW_KEY]
44+
STORE PARTSUPP_DATA INTO 'schema_change_empty_batch_partsupp' USING org.apache.pig.backend.hadoop.hbase.HBaseStorage
45+
('
46+
data:ps_partkey,
47+
data:ps_suppkey,
48+
data:ps_availqty,
49+
data:ps_supplycost,
50+
data:ps_comment
51+
');
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
#!/bin/bash
2+
source conf/drillTestConfig.properties
3+
4+
set -e
5+
6+
hbase org.apache.hadoop.hbase.mapreduce.Import schema_change_empty_batch_part ${DRILL_TESTDATA}/schema_change_empty_batch/hbase/data/part/
7+
hbase org.apache.hadoop.hbase.mapreduce.Import schema_change_empty_batch_partsupp ${DRILL_TESTDATA}/schema_change_empty_batch/hbase/data/partsupp/
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#!/bin/sh
2+
source conf/drillTestConfig.properties
3+
4+
set -x
5+
set -e
6+
7+
${DRILL_TEST_DATA_DIR}/Datasources/schema_change_empty_batch/hbase/copyFiles.sh
8+
${DRILL_TEST_DATA_DIR}/Datasources/schema_change_empty_batch/hbase/createTables.sh
9+
${DRILL_TEST_DATA_DIR}/Datasources/schema_change_empty_batch/hbase/loadTables.sh
10+
11+
set +x
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
#!/bin/bash
2+
source conf/drillTestConfig.properties
3+
4+
set -x
5+
set -e
6+
7+
if [ ! -d ${DRILL_TEST_DATA_DIR}/Datasources/schema_change_empty_batch/data/json/part ]
8+
then
9+
10+
echo "Creating directory ${DRILL_TEST_DATA_DIR}/Datasources/schema_change_empty_batch/data/json/part"
11+
12+
mkdir ${DRILL_TEST_DATA_DIR}/Datasources/schema_change_empty_batch/data/json/part
13+
14+
echo "Creating directory ${DRILL_TEST_DATA_DIR}/Datasources/schema_change_empty_batch/data/json/partsupp"
15+
16+
mkdir ${DRILL_TEST_DATA_DIR}/Datasources/schema_change_empty_batch/data/json/partsupp
17+
18+
echo "Creating directory ${DRILL_TEST_DATA_DIR}/Datasources/schema_change_empty_batch/data/json/empty"
19+
20+
mkdir ${DRILL_TEST_DATA_DIR}/Datasources/schema_change_empty_batch/data/json/empty
21+
22+
echo "Creating source files"
23+
24+
split -l 5000 ${DRILL_TEST_DATA_DIR}/Datasources/schema_change_empty_batch/data/json/part.json ${DRILL_TEST_DATA_DIR}/Datasources/schema_change_empty_batch/data/json/part/part
25+
26+
split -l 20000 ${DRILL_TEST_DATA_DIR}/Datasources/schema_change_empty_batch/data/json/partsupp.json ${DRILL_TEST_DATA_DIR}/Datasources/schema_change_empty_batch/data/json/partsupp/partsupp
27+
28+
find ${DRILL_TEST_DATA_DIR}/Datasources/schema_change_empty_batch/data/json/part/ -type f ! -name "*.*" -exec mv {} {}.json \;
29+
30+
find ${DRILL_TEST_DATA_DIR}/Datasources/schema_change_empty_batch/data/json/partsupp/ -type f ! -name "*.*" -exec mv {} {}.json \;
31+
32+
echo "Creating empty files"
33+
34+
touch ${DRILL_TEST_DATA_DIR}/Datasources/schema_change_empty_batch/data/json/part/parta{f..h}.json
35+
36+
touch ${DRILL_TEST_DATA_DIR}/Datasources/schema_change_empty_batch/data/json/partsupp/partsuppa{f..h}.json
37+
38+
touch ${DRILL_TEST_DATA_DIR}/Datasources/schema_change_empty_batch/data/json/empty/empty{a..d}.json
39+
40+
fi
41+
42+
if ! $(hadoop fs -test -d ${DRILL_TESTDATA}/schema_change_empty_batch/json)
43+
then
44+
45+
echo "Copying to hadoop"
46+
47+
hadoop fs -mkdir -p ${DRILL_TESTDATA}/schema_change_empty_batch/json
48+
49+
hadoop fs -put ${DRILL_TEST_DATA_DIR}/Datasources/schema_change_empty_batch/data/json/part ${DRILL_TESTDATA}/schema_change_empty_batch/json/
50+
51+
hadoop fs -put ${DRILL_TEST_DATA_DIR}/Datasources/schema_change_empty_batch/data/json/partsupp ${DRILL_TESTDATA}/schema_change_empty_batch/json/
52+
53+
hadoop fs -put ${DRILL_TEST_DATA_DIR}/Datasources/schema_change_empty_batch/data/json/empty ${DRILL_TESTDATA}/schema_change_empty_batch/json/
54+
55+
hadoop fs -put ${DRILL_TEST_DATA_DIR}/Datasources/schema_change_empty_batch/data/json/part_small ${DRILL_TESTDATA}/schema_change_empty_batch/json/
56+
57+
fi
58+
59+
set +x
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
#!/bin/sh
2+
source conf/drillTestConfig.properties
3+
4+
set -x
5+
set -e
6+
7+
if ! $(hadoop fs -test -d ${DRILL_TESTDATA}/schema_change_empty_batch/maprdb/binary/data)
8+
then
9+
hadoop fs -mkdir -p ${DRILL_TESTDATA}/schema_change_empty_batch/maprdb/binary/data
10+
11+
hadoop fs -put ${DRILL_TEST_DATA_DIR}/Datasources/schema_change_empty_batch/data/hbase/* ${DRILL_TESTDATA}/schema_change_empty_batch/maprdb/binary/data/
12+
fi
13+
14+
set +x

0 commit comments

Comments
 (0)