LoginSignup
0
0

More than 1 year has passed since last update.

TPC-DS tools for Apache Impala

Last updated at Posted at 2022-12-13

参考資料:
https://github.com/cloudera/impala-tpcds-kit

1. テストデータを事前に準備

sudo yum -y install java-1.8.0-openjdk-devel maven
sudo yum -y install git gcc make flex bison byacc curl unzip patch


git clone https://github.com/cloudera/impala-tpcds-kit.git
cd impala-tpcds-kit/
cd tpcds-gen/
make


sudo -u hdfs hdfs dfs -mkdir /user/root
sudo -u hdfs hdfs dfs -chown root:root /user/root


hadoop jar target/tpcds-gen-1.0-SNAPSHOT.jar -d /tmp/tpc-ds/sf10000/ -p 10 -s 10


aws s3 cp store_sales s3://秘密のBucket/store_sales/ --recursive --exclude "*" --include "data*"
aws s3 cp catalog_sales s3://秘密のBucket/catalog_sales/ --recursive --exclude "*" --include "data*"

準備したTextデータをImpala Schemaに入れる


create schema if not exists zzeng_tpcds_60_text;
use zzeng_tpcds_60_text;


create external table catalog_sales (
  cs_sold_date_sk int,
  cs_sold_time_sk int,
  cs_ship_date_sk int,
  cs_bill_customer_sk int,
  cs_bill_cdemo_sk int,
  cs_bill_hdemo_sk int,
  cs_bill_addr_sk int,
  cs_ship_customer_sk int,
  cs_ship_cdemo_sk int,
  cs_ship_hdemo_sk int,
  cs_ship_addr_sk int,
  cs_call_center_sk int,
  cs_catalog_page_sk int,
  cs_ship_mode_sk int,
  cs_warehouse_sk int,
  cs_item_sk int,
  cs_promo_sk int,
  cs_order_number bigint,
  cs_quantity int,
  cs_wholesale_cost decimal(7,2),
  cs_list_price decimal(7,2),
  cs_sales_price decimal(7,2),
  cs_ext_discount_amt decimal(7,2),
  cs_ext_sales_price decimal(7,2),
  cs_ext_wholesale_cost decimal(7,2),
  cs_ext_list_price decimal(7,2),
  cs_ext_tax decimal(7,2),
  cs_coupon_amt decimal(7,2),
  cs_ext_ship_cost decimal(7,2),
  cs_net_paid decimal(7,2),
  cs_net_paid_inc_tax decimal(7,2),
  cs_net_paid_inc_ship decimal(7,2),
  cs_net_paid_inc_ship_tax decimal(7,2),
  cs_net_profit decimal(7,2)
)
row format delimited fields terminated by '|'
stored as textfile
location '/tmp/tpc-ds/sf10000/catalog_sales'
tblproperties ('serialization.null.format'='')
;

create external table store_sales (
  ss_sold_date_sk int,
  ss_sold_time_sk int,
  ss_item_sk int,
  ss_customer_sk int,
  ss_cdemo_sk int,
  ss_hdemo_sk int,
  ss_addr_sk int,
  ss_store_sk int,
  ss_promo_sk int,
  ss_ticket_number bigint,
  ss_quantity int,
  ss_wholesale_cost decimal(7,2),
  ss_list_price decimal(7,2),
  ss_sales_price decimal(7,2),
  ss_ext_discount_amt decimal(7,2),
  ss_ext_sales_price decimal(7,2),
  ss_ext_wholesale_cost decimal(7,2),
  ss_ext_list_price decimal(7,2),
  ss_ext_tax decimal(7,2),
  ss_coupon_amt decimal(7,2),
  ss_net_paid decimal(7,2),
  ss_net_paid_inc_tax decimal(7,2),
  ss_net_profit decimal(7,2)
)
row format delimited fields terminated by '|'
stored as textfile
location '/tmp/tpc-ds/sf10000/store_sales'
tblproperties ('serialization.null.format'='')
;
0
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0