ICode9

精准搜索请尝试: 精确搜索
首页 > 其他分享> 文章详细

Hudi-通过Hive查询hudi表数据

2022-03-03 22:02:45  阅读:262  来源: 互联网

标签:Hudi EXISTS db didi Hive tbl hudi 2017


环境准备

集成jar包:hudi-hadoop-mr-bundle-0.10.1.jar,放入$HIVE_HOME/lib目录下

建外部表

create database db_hudi;

use db_hudi;

CREATE EXTERNAL TABLE IF NOT EXISTS tbl_hudi_didi(
    order_id BIGINT,
    product_id INT,
    city_id INT,
    district INT,
    county INT,
    type INT,
    combo_type INT,
    traffic_type INT,
    passenger_count INT,
    driver_product_id INT,
    start_dest_distance INT,
    arrive_time STRING,
    departure_time STRING,
    pre_total_fee DOUBLE,
    normal_time STRING,
    bubble_trace_id STRING,
    product_1level INT,
    dest_lng DOUBLE,
    dest_lat DOUBLE,
    starting_lng DOUBLE,
    starting_lat DOUBLE,
    ts BIGINT,
    partitionpath STRING
)
PARTITIONED BY(
    date_str string
)
ROW FORMAT SERDE
    'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
STORED AS INPUTFORMAT
    'org.apache.hudi.hadoop.HoodieParquetInputFormat'
OUTPUTFORMAT
    'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION
    '/hudi-warehouse/tbl_didi_haikou';

手动加入分区

--手动添加分区
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-22') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-22';
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-23') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-23';
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-24') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-24';
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-25') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-25';
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-26') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-26';
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-27') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-27';
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-28') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-28';
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-29') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-29';
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-30') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-30';
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-5-31') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-5-31';
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-1') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-1';
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-2') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-2';
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-3') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-3';
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-4') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-4';
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-5') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-5';
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-6') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-6';
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-7') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-7';
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-8') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-8';
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-9') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-9';
ALTER TABLE db_hudi.tbl_hudi_didi ADD IF NOT EXISTS PARTITION (date_str = '2017-6-10') LOCATION '/hudi-warehouse/tbl_didi_haikou/2017-6-10';

查看分区

SHOW PARTITIONS db_hudi.tbl_hudi_didi;  

指标统计

-- 开发测试,设置运行模式为本地模式
set hive.exec.mode.local.auto = true;

set hive.exec.mode.local.auto.tasks.max = 10;
set hive.exec.mode.local.auto.inputbytes.max=88801103;
set hive.exec.mode.local.auto.input.files.max=50;
SET hive.mapred.mode=nonstrict;
-- 指标一:订单类型统计
WITH tmp as (
    SELECT
        product_id,
        COUNT(1) AS total
    FROM db_hudi.tbl_hudi_didi
    GROUP BY product_id
)
SELECT
    CASE product_id
        WHEN 1 THEN "滴滴专车"
        WHEN 2 THEN "滴滴企业专车"
        WHEN 3 THEN "滴滴快车"
        WHEN 4 THEN "滴滴企业快车"
        ELSE "未知"
    END AS order_type,
    total
FROM tmp
;

-- 指标二:订单时效性统计
WITH tmp as (
    SELECT
        type,
        COUNT(1) AS total
    FROM db_hudi.tbl_hudi_didi
    GROUP BY type
)
SELECT
    CASE type
        WHEN 0 THEN "实时"
        WHEN 1 THEN "预约"
        ELSE "未知"
    END AS order_type,
    total
FROM tmp
;

--指标三:订单交通类型统计
SELECT
    traffic_type,
    COUNT(1) AS total
FROM db_hudi.tbl_hudi_didi
GROUP BY traffic_type;

-- 指标五:订单价格统计,先将价格划分区间,再统计,此处使用WHEN函数和SUM函数
SELECT
    SUM(
        CASE WHEN pre_total_fee BETWEEN 0 AND 15 THEN 1 ELSE 0 END
    ) AS 0_15,
    SUM(
        CASE WHEN pre_total_fee BETWEEN 16 AND 30 THEN 1 ELSE 0 END
    ) AS 16_30,
    SUM(
        CASE WHEN pre_total_fee BETWEEN 31 AND 50 THEN 1 ELSE 0 END
    ) AS 31_50,
    SUM(
        CASE WHEN pre_total_fee BETWEEN 51 AND 100 THEN 1 ELSE 0 END
    ) AS 51_100,
    SUM(
        CASE WHEN pre_total_fee > 100 THEN 1 ELSE 0 END
    ) AS 100_
FROM db_hudi.tbl_hudi_didi;

 

 

 

标签:Hudi,EXISTS,db,didi,Hive,tbl,hudi,2017
来源: https://www.cnblogs.com/EnzoDin/p/15962045.html

本站声明: 1. iCode9 技术分享网(下文简称本站)提供的所有内容,仅供技术学习、探讨和分享;
2. 关于本站的所有留言、评论、转载及引用,纯属内容发起人的个人观点,与本站观点和立场无关;
3. 关于本站的所有言论和文字,纯属内容发起人的个人观点,与本站观点和立场无关;
4. 本站文章均是网友提供,不完全保证技术分享内容的完整性、准确性、时效性、风险性和版权归属;如您发现该文章侵犯了您的权益,可联系我们第一时间进行删除;
5. 本站为非盈利性的个人网站,所有内容不会用来进行牟利,也不会利用任何形式的广告来间接获益,纯粹是为了广大技术爱好者提供技术内容和技术思想的分享性交流网站。

专注分享技术,共同学习,共同进步。侵权联系[81616952@qq.com]

Copyright (C)ICode9.com, All Rights Reserved.

ICode9版权所有