Skip to content

Commit 7c5ea70

Browse files
prasadns14agirish
authored andcommitted
MD-2954: Fixed TPCDS100 hive datagen script (#337)
1 parent 56fb0ab commit 7c5ea70

File tree

6 files changed

+60317
-60281
lines changed

6 files changed

+60317
-60281
lines changed

framework/resources/Datasources/hive_storage/change_metadata2.ddl

Lines changed: 27 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -34,45 +34,45 @@ ALTER TABLE remove_columns2 REPLACE COLUMNS (
3434
ALTER TABLE modify_columntypes1 REPLACE COLUMNS (
3535
int_col string,
3636
bigint_col varchar(50),
37-
date_col int,
38-
time_col bigint,
37+
date_col date,
38+
time_col string,
3939
timestamp_col varchar(20),
40-
interval_col date,
41-
varchar_col int,
42-
float_col int,
43-
double_col float,
44-
bool_col int
40+
interval_col string,
41+
varchar_col string,
42+
float_col float,
43+
double_col double,
44+
bool_col boolean
4545
);
4646

4747
ALTER TABLE modify_columntypes2 REPLACE COLUMNS (
4848
int_col float,
49-
bigint_col int,
49+
bigint_col bigint,
5050
date_col varchar(15),
51-
time_col timestamp,
52-
timestamp_col date,
53-
interval_col boolean,
51+
time_col string,
52+
timestamp_col timestamp,
53+
interval_col string,
5454
varchar_col varchar(5),
5555
float_col string,
56-
double_col bigint,
57-
bool_col float
56+
double_col double,
57+
bool_col boolean
5858
);
5959

6060
alter table seq_modify_columntypes1 replace columns (
6161
l_orderkey float,
6262
l_partkey double,
6363
l_suppkey varchar(1),
6464
l_linenumber string,
65-
l_quantity bigint,
66-
l_extendedprice int,
67-
l_discount float,
65+
l_quantity double,
66+
l_extendedprice double,
67+
l_discount double,
6868
l_tax varchar(20),
69-
l_returnflag boolean,
70-
l_linestatus int,
71-
l_shipdate timestamp,
69+
l_returnflag string,
70+
l_linestatus string,
71+
l_shipdate date,
7272
l_commitdate string,
73-
l_receiptdate int,
73+
l_receiptdate date,
7474
l_shipinstruct varchar(2),
75-
l_shipmode boolean,
75+
l_shipmode string,
7676
l_comment double
7777
);
7878
ALTER TABLE dpp_compressed_mktevents ADD COLUMNS (newcol STRING);
@@ -87,7 +87,10 @@ ALTER TABLE changemetadata_orders_partitioned ADD PARTITION (year=1997) location
8787

8888
ALTER TABLE changemetadata_orders_partitioned1 replace columns (
8989
o_orderkey INT,
90-
o_custkey INT,
90+
o_custkey INT
91+
);
92+
93+
ALTER TABLE changemetadata_orders_partitioned1 add columns (
9194
o_orderdate STRING,
9295
o_orderpriority STRING,
9396
o_clerk STRING,
@@ -103,9 +106,9 @@ ALTER TABLE changemetadata_orders_partitioned2 replace columns (
103106
o_custkey STRING,
104107
o_orderstatus STRING,
105108
o_totalprice DOUBLE,
106-
o_orderdate BOOLEAN,
109+
o_orderdate STRING,
107110
o_orderpriority VARCHAR(1),
108111
o_clerk DOUBLE,
109112
o_shippriority INT
110113
);
111-
ALTER TABLE changemetadata_orders_partitioned2 DROP IF EXISTS PARTITION(year=1990)
114+
ALTER TABLE changemetadata_orders_partitioned2 DROP IF EXISTS PARTITION(year=1990);

framework/resources/Datasources/hive_storage/tpcds100/tpcds100_hive.ddl

Lines changed: 73 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -624,7 +624,9 @@ create external table tpcds100_parquet.customer
624624
c_email_address string,
625625
c_last_review_date string
626626
)
627-
STORED AS PARQUET;
627+
STORED AS PARQUET
628+
LOCATION '/drill/testdata/tpcds_sf100/parquet/customer'
629+
TBLPROPERTIES ("parquet.page.size"="4096","parquet.block.size"="12288");
628630

629631
drop table if exists tpcds100_parquet.customer_address;
630632
create external table tpcds100_parquet.customer_address
@@ -643,7 +645,9 @@ create external table tpcds100_parquet.customer_address
643645
ca_gmt_offset int,
644646
ca_location_type string
645647
)
646-
STORED AS PARQUET;
648+
STORED AS PARQUET
649+
LOCATION '/drill/testdata/tpcds_sf100/parquet/customer_address'
650+
TBLPROPERTIES ("parquet.dictionary.page.size"="65536");
647651

648652
drop table if exists tpcds100_parquet.customer_demographics;
649653
create external table tpcds100_parquet.customer_demographics
@@ -658,7 +662,9 @@ create external table tpcds100_parquet.customer_demographics
658662
cd_dep_employed_count int,
659663
cd_dep_college_count int
660664
)
661-
STORED AS PARQUET;
665+
STORED AS PARQUET
666+
LOCATION '/drill/testdata/tpcds_sf100/parquet/customer_demographics'
667+
TBLPROPERTIES ("parquet.enable.dictionary"="false", "parquet.compression"="SNAPPY", "parquet.page.size"="4096","parquet.block.size"="12288");
662668

663669
drop table if exists tpcds100_parquet.household_demographics;
664670
create external table tpcds100_parquet.household_demographics
@@ -669,15 +675,17 @@ create external table tpcds100_parquet.household_demographics
669675
hd_dep_count int,
670676
hd_vehicle_count int
671677
)
672-
STORED AS PARQUET;
678+
STORED AS PARQUET
679+
LOCATION '/drill/testdata/tpcds_sf100/parquet/household_demographics'
680+
TBLPROPERTIES ("parquet.compression"="GZIP");
673681

674682
drop table if exists tpcds100_parquet.item;
675-
create external table tpcds100_parquet.item
683+
create external table tpcds100_parquet.item
676684
(
677685
i_item_sk int,
678686
i_item_id string,
679-
i_rec_start_date string,
680-
i_rec_end_date string,
687+
i_rec_start_date date,
688+
i_rec_end_date date,
681689
i_item_desc string,
682690
i_current_price double,
683691
i_wholesale_cost double,
@@ -697,7 +705,8 @@ create external table tpcds100_parquet.item
697705
i_manager_id int,
698706
i_product_name string
699707
)
700-
STORED AS PARQUET;
708+
STORED AS PARQUET
709+
LOCATION '/drill/testdata/tpcds_sf100/parquet/item';
701710

702711
drop table if exists tpcds100_parquet.promotion;
703712
create external table tpcds100_parquet.promotion
@@ -722,7 +731,8 @@ create external table tpcds100_parquet.promotion
722731
p_purpose string,
723732
p_discount_active string
724733
)
725-
STORED AS PARQUET;
734+
STORED AS PARQUET
735+
LOCATION '/drill/testdata/tpcds_sf100/parquet/promotion';
726736

727737
drop table if exists tpcds100_parquet.time_dim;
728738
create external table tpcds100_parquet.time_dim (
@@ -737,7 +747,9 @@ create external table tpcds100_parquet.time_dim (
737747
t_sub_shift string,
738748
t_meal_time string
739749
)
740-
STORED AS PARQUET;
750+
STORED AS PARQUET
751+
LOCATION '/drill/testdata/tpcds_sf100/parquet/time_dim'
752+
TBLPROPERTIES ("parquet.enable.dictionary"="false", "parquet.compression"="GZIP", "parquet.block.size"="32768", "parquet.page.size"="1024");
741753

742754
drop table if exists tpcds100_parquet.date_dim;
743755
create external table tpcds100_parquet.date_dim (
@@ -770,10 +782,12 @@ create external table tpcds100_parquet.date_dim (
770782
d_current_quarter string,
771783
d_current_year string
772784
)
773-
STORED AS PARQUET;
785+
STORED AS PARQUET
786+
LOCATION '/drill/testdata/tpcds_sf100/parquet/date_dim'
787+
TBLPROPERTIES ("parquet.enable.dictionary"="false", "parquet.page.size"="134217728");
774788

775789
drop table if exists tpcds100_parquet.store;
776-
create external table tpcds100_parquet.store (
790+
create external table tpcds100_parquet.store (
777791
s_store_sk int,
778792
s_store_id string,
779793
s_rec_start_date string,
@@ -804,10 +818,12 @@ create external table tpcds100_parquet.store (
804818
s_gmt_offset double,
805819
s_tax_precentage double
806820
)
807-
STORED AS PARQUET;
821+
STORED AS PARQUET
822+
LOCATION '/drill/testdata/tpcds_sf100/parquet/store'
823+
TBLPROPERTIES ("parquet.dictionary.page.size"="32768", "parquet.block.size"="32768", "parquet.page.size"="32768");
808824

809825
drop table if exists tpcds100_parquet.store_sales;
810-
create external table tpcds100_parquet.store_sales (
826+
create external table tpcds100_parquet.store_sales (
811827
ss_sold_date_sk int,
812828
ss_sold_time_sk int,
813829
ss_item_sk int,
@@ -825,17 +841,18 @@ create external table tpcds100_parquet.store_sales (
825841
ss_ext_discount_amt double,
826842
ss_ext_sales_price double,
827843
ss_ext_wholesale_cost double,
828-
ss_ext_list_price double,
844+
ss_ext_list_price double,
829845
ss_ext_tax double,
830846
ss_coupon_amt double,
831847
ss_net_paid double,
832848
ss_net_paid_inc_tax double,
833849
ss_net_profit double
834850
)
835-
STORED AS PARQUET;
851+
STORED AS PARQUET
852+
LOCATION '/drill/testdata/tpcds_sf100/parquet/store_sales';
836853

837854
drop table if exists tpcds100_parquet.warehouse;
838-
create external table tpcds100_parquet.warehouse (
855+
create external table tpcds100_parquet.warehouse (
839856
w_warehouse_sk int,
840857
w_warehouse_id string,
841858
w_warehouse_name string,
@@ -851,7 +868,8 @@ create external table tpcds100_parquet.warehouse (
851868
w_country string,
852869
w_gmt_offset double
853870
)
854-
STORED AS PARQUET;
871+
STORED AS PARQUET
872+
LOCATION '/drill/testdata/tpcds_sf100/parquet/warehouse';
855873

856874
drop table if exists tpcds100_parquet.ship_mode;
857875
create external table tpcds100_parquet.ship_mode (
@@ -862,26 +880,29 @@ create external table tpcds100_parquet.ship_mode (
862880
sm_carrier string,
863881
sm_contract string
864882
)
865-
STORED AS PARQUET;
883+
STORED AS PARQUET
884+
LOCATION '/drill/testdata/tpcds_sf100/parquet/ship_mode';
866885

867886
drop table if exists tpcds100_parquet.reason;
868887
create external table tpcds100_parquet.reason (
869888
r_reason_sk int,
870889
r_reason_id string,
871890
r_reason_desc string
872891
)
873-
STORED AS PARQUET;
892+
STORED AS PARQUET
893+
LOCATION '/drill/testdata/tpcds_sf100/parquet/reason';
874894

875895
drop table if exists tpcds100_parquet.income_band;
876-
create external table tpcds100_parquet.income_band (
896+
create external table tpcds100_parquet.income_band (
877897
ib_income_band_sk int,
878898
ib_lower_bound int,
879899
ib_upper_bound int
880900
)
881-
STORED AS PARQUET;
901+
STORED AS PARQUET
902+
LOCATION '/drill/testdata/tpcds_sf100/parquet/income_band';
882903

883904
drop table if exists tpcds100_parquet.call_center;
884-
create external table tpcds100_parquet.call_center (
905+
create external table tpcds100_parquet.call_center (
885906
cc_call_center_sk int,
886907
cc_call_center_id string,
887908
cc_rec_start_date string,
@@ -914,10 +935,11 @@ create external table tpcds100_parquet.call_center (
914935
cc_gmt_offset double,
915936
cc_tax_percentage double
916937
)
917-
STORED AS PARQUET;
938+
STORED AS PARQUET
939+
LOCATION '/drill/testdata/tpcds_sf100/parquet/call_center';
918940

919941
drop table if exists tpcds100_parquet.web_site;
920-
create external table tpcds100_parquet.web_site (
942+
create external table tpcds100_parquet.web_site (
921943
web_site_sk int,
922944
web_site_id string,
923945
web_rec_start_date string,
@@ -945,10 +967,11 @@ create external table tpcds100_parquet.web_site (
945967
web_gmt_offset double,
946968
web_tax_percentage double
947969
)
948-
STORED AS PARQUET;
970+
STORED AS PARQUET
971+
LOCATION '/drill/testdata/tpcds_sf100/parquet/web_site';
949972

950973
drop table if exists tpcds100_parquet.store_returns;
951-
create external table tpcds100_parquet.store_returns (
974+
create external table tpcds100_parquet.store_returns (
952975
sr_returned_date_sk int,
953976
sr_return_time_sk int,
954977
sr_item_sk int,
@@ -970,10 +993,11 @@ create external table tpcds100_parquet.store_returns (
970993
sr_store_credit double,
971994
sr_net_loss double
972995
)
973-
STORED AS PARQUET;
996+
STORED AS PARQUET
997+
LOCATION '/drill/testdata/tpcds_sf100/parquet/store_returns';
974998

975999
drop table if exists tpcds100_parquet.web_page;
976-
create external table tpcds100_parquet.web_page (
1000+
create external table tpcds100_parquet.web_page (
9771001
wp_web_page_sk int,
9781002
wp_web_page_id string,
9791003
wp_rec_start_date string,
@@ -989,7 +1013,8 @@ create external table tpcds100_parquet.web_page (
9891013
wp_image_count int,
9901014
wp_max_ad_count int
9911015
)
992-
STORED AS PARQUET;
1016+
STORED AS PARQUET
1017+
LOCATION '/drill/testdata/tpcds_sf100/parquet/web_page';
9931018

9941019
drop table if exists tpcds100_parquet.catalog_page;
9951020
create external table tpcds100_parquet.catalog_page (
@@ -1003,19 +1028,23 @@ create external table tpcds100_parquet.catalog_page (
10031028
cp_description string,
10041029
cp_type string
10051030
)
1006-
STORED AS PARQUET;
1031+
STORED AS PARQUET
1032+
LOCATION '/drill/testdata/tpcds_sf100/parquet/catalog_page'
1033+
TBLPROPERTIES ("parquet.dictionary.page.size"="5242880", "parquet.page.size"="262144", "parquet.block.size"="262144");
10071034

10081035
drop table if exists tpcds100_parquet.inventory;
1009-
create external table tpcds100_parquet.inventory (
1036+
create external table tpcds100_parquet.inventory (
10101037
inv_date_sk int,
10111038
inv_item_sk int,
10121039
inv_warehouse_sk int,
10131040
inv_quantity_on_hand int
10141041
)
1015-
STORED AS PARQUET;
1042+
STORED AS PARQUET
1043+
LOCATION '/drill/testdata/tpcds_sf100/parquet/inventory'
1044+
TBLPROPERTIES ("parquet.block.size"="10485760");
10161045

10171046
drop table if exists tpcds100_parquet.catalog_returns;
1018-
create external table tpcds100_parquet.catalog_returns (
1047+
create external table tpcds100_parquet.catalog_returns (
10191048
cr_returned_date_sk int,
10201049
cr_returned_time_sk int,
10211050
cr_item_sk int,
@@ -1044,10 +1073,11 @@ create external table tpcds100_parquet.catalog_returns (
10441073
cr_store_credit double,
10451074
cr_net_loss double
10461075
)
1047-
STORED AS PARQUET;
1076+
STORED AS PARQUET
1077+
LOCATION '/drill/testdata/tpcds_sf100/parquet/catalog_returns';
10481078

10491079
drop table if exists tpcds100_parquet.web_returns;
1050-
create external table tpcds100_parquet.web_returns (
1080+
create table tpcds100_parquet.web_returns (
10511081
wr_returned_date_sk int,
10521082
wr_returned_time_sk int,
10531083
wr_item_sk int,
@@ -1073,7 +1103,8 @@ create external table tpcds100_parquet.web_returns (
10731103
wr_account_credit double,
10741104
wr_net_loss double
10751105
)
1076-
STORED AS PARQUET;
1106+
STORED AS PARQUET
1107+
LOCATION '/drill/testdata/tpcds_sf100/parquet/web_returns';
10771108

10781109
drop table if exists tpcds100_parquet.web_sales;
10791110
create external table tpcds100_parquet.web_sales (
@@ -1112,10 +1143,11 @@ create external table tpcds100_parquet.web_sales (
11121143
ws_net_paid_inc_ship_tax double,
11131144
ws_net_profit double
11141145
)
1115-
STORED AS PARQUET;
1146+
STORED AS PARQUET
1147+
LOCATION '/drill/testdata/tpcds_sf100/parquet/web_sales';
11161148

11171149
drop table if exists tpcds100_parquet.catalog_sales;
1118-
create external table tpcds100_parquet.catalog_sales (
1150+
create external table tpcds100_parquet.catalog_sales (
11191151
cs_sold_date_sk int,
11201152
cs_sold_time_sk int,
11211153
cs_ship_date_sk int,
@@ -1151,4 +1183,5 @@ create external table tpcds100_parquet.catalog_sales (
11511183
cs_net_paid_inc_ship_tax double,
11521184
cs_net_profit double
11531185
)
1154-
STORED AS PARQUET;
1186+
STORED AS PARQUET
1187+
LOCATION '/drill/testdata/tpcds_sf100/parquet/catalog_sales';

0 commit comments

Comments
 (0)