初学者求HQL实现下面需求

bangge138 2017-10-24 09:18:14


请问这样日期重合的要怎样实现?
我目前思路 将每个活动的时间区间展开到具体日期 然后再count(distinct 日期) 但是不知道如何实现
...全文
634 4 打赏 收藏 转发到动态 举报
写回复
用AI写文章
4 条回复
切换为时间正序
请发表友善的回复…
发表回复
卜塔 2018-01-26
  • 打赏
  • 举报
回复
写的特别好,请问hive怎么设置查询结果中用网格隔开,我刚安装的hive查询结果特别乱。多谢
引用 3 楼 u011512492 的回复:

vim sales.txt
# 将下面的内容粘贴到sales.txt文件中
1,nike,20180901,20180905
2,nike,20180903,20180906
3,nike,20180909,20180915
4,oppo,20180804,20180805
5,oppo,20180804,20180815
6,vivo,20180815,20180821
7,vivo,20180902,20180912

# 上传到hdfs中
hdfs dfs -put sales.txt /data/

#创建hive表
create table t_sales(id int,brand string,startdate string,enddate string)
row format delimited
fields terminated by ',';

#加载数据
load data inpath '/data/sales.txt' into table t_sales;





select t.id, t.brand,
       --datediff(t.startdate, t.enddate),
       from_unixtime(unix_timestamp(t.startdate, 'yyyyMMdd')),
       from_unixtime(unix_timestamp(t.enddate, 'yyyyMMdd')),
       datediff(from_unixtime(unix_timestamp(t.enddate, 'yyyyMMdd')),from_unixtime(unix_timestamp(t.startdate, 'yyyyMMdd'))) + 1
  from t_sales t;
+-------+----------+----------------------+----------------------+------+--+
| t.id  | t.brand  |         _c2          |         _c3          | _c4  |
+-------+----------+----------------------+----------------------+------+--+
| 1     | nike     | 2018-09-01 00:00:00  | 2018-09-05 00:00:00  | 5    |
| 2     | nike     | 2018-09-03 00:00:00  | 2018-09-06 00:00:00  | 4    |
| 3     | nike     | 2018-09-09 00:00:00  | 2018-09-15 00:00:00  | 7    |
| 4     | oppo     | 2018-08-04 00:00:00  | 2018-08-05 00:00:00  | 2    |
| 5     | oppo     | 2018-08-04 00:00:00  | 2018-08-15 00:00:00  | 12   |
| 6     | vivo     | 2018-08-15 00:00:00  | 2018-08-21 00:00:00  | 7    |
| 7     | vivo     | 2018-09-02 00:00:00  | 2018-09-12 00:00:00  | 11   |
+-------+----------+----------------------+----------------------+------+--+



select v1.*,
       lag(enddate) over(partition by v1.brand order by startdate) prev_enddate
  from (select t.id,
               t.brand,
               from_unixtime(unix_timestamp(t.startdate, 'yyyyMMdd')) startdate,
               from_unixtime(unix_timestamp(t.enddate, 'yyyyMMdd')) enddate
          from t_sales t) v1;
+--------+-----------+----------------------+----------------------+----------------------+--+
| v1.id  | v1.brand  |     v1.startdate     |      v1.enddate      |     prev_enddate     |
+--------+-----------+----------------------+----------------------+----------------------+--+
| 1      | nike      | 2018-09-01 00:00:00  | 2018-09-05 00:00:00  | NULL                 |
| 2      | nike      | 2018-09-03 00:00:00  | 2018-09-06 00:00:00  | 2018-09-05 00:00:00  |
| 3      | nike      | 2018-09-09 00:00:00  | 2018-09-15 00:00:00  | 2018-09-06 00:00:00  |
| 5      | oppo      | 2018-08-04 00:00:00  | 2018-08-15 00:00:00  | NULL                 |
| 4      | oppo      | 2018-08-04 00:00:00  | 2018-08-05 00:00:00  | 2018-08-15 00:00:00  |
| 6      | vivo      | 2018-08-15 00:00:00  | 2018-08-21 00:00:00  | NULL                 |
| 7      | vivo      | 2018-09-02 00:00:00  | 2018-09-12 00:00:00  | 2018-08-21 00:00:00  |
+--------+-----------+----------------------+----------------------+----------------------+--+


select datediff('2018-09-05 00:00:00','2018-09-01 00:00:00');



select v2.*,
       (case
         when prev_enddate is null or startdate > prev_enddate then
          datediff(enddate, startdate) + 1
         else
          datediff(enddate, prev_enddate)
       end) days
  from (select v1.*,
               lag(enddate) over(partition by v1.brand order by enddate) prev_enddate
          from (select t.id,
                       t.brand,
                       from_unixtime(unix_timestamp(t.startdate, 'yyyyMMdd')) startdate,
                       from_unixtime(unix_timestamp(t.enddate, 'yyyyMMdd')) enddate
                  from t_sales t) v1) v2;
+--------+-----------+----------------------+----------------------+----------------------+-------+--+
| v2.id  | v2.brand  |     v2.startdate     |      v2.enddate      |   v2.prev_enddate    | days  |
+--------+-----------+----------------------+----------------------+----------------------+-------+--+
| 1      | nike      | 2018-09-01 00:00:00  | 2018-09-05 00:00:00  | NULL                 | 5     |
| 2      | nike      | 2018-09-03 00:00:00  | 2018-09-06 00:00:00  | 2018-09-05 00:00:00  | 1     |
| 3      | nike      | 2018-09-09 00:00:00  | 2018-09-15 00:00:00  | 2018-09-06 00:00:00  | 7     |
| 4      | oppo      | 2018-08-04 00:00:00  | 2018-08-05 00:00:00  | NULL                 | 2     |
| 5      | oppo      | 2018-08-04 00:00:00  | 2018-08-15 00:00:00  | 2018-08-05 00:00:00  | 10    |
| 6      | vivo      | 2018-08-15 00:00:00  | 2018-08-21 00:00:00  | NULL                 | 7     |
| 7      | vivo      | 2018-09-02 00:00:00  | 2018-09-12 00:00:00  | 2018-08-21 00:00:00  | 11    |
+--------+-----------+----------------------+----------------------+----------------------+-------+--+



with v3 as
 (select v2.*,
         (case
           when prev_enddate is null or startdate > prev_enddate then
            datediff(enddate, startdate) + 1
           else
            datediff(enddate, prev_enddate)
         end) days
    from (select v1.*,
                 lag(enddate) over(partition by v1.brand order by enddate) prev_enddate
            from (select t.brand,
                         from_unixtime(unix_timestamp(t.startdate, 'yyyyMMdd')) startdate,
                         from_unixtime(unix_timestamp(t.enddate, 'yyyyMMdd')) enddate
                    from t_sales t) v1) v2)
select brand, sum(days) all_days from v3 group by brand;

+--------+-----------+--+
| brand  | all_days  |
+--------+-----------+--+
| nike   | 13        |
| oppo   | 12        |
| vivo   | 18        |
+--------+-----------+--+

bigdata-sb 2018-01-25
  • 打赏
  • 举报
回复

vim sales.txt
# 将下面的内容粘贴到sales.txt文件中
1,nike,20180901,20180905
2,nike,20180903,20180906
3,nike,20180909,20180915
4,oppo,20180804,20180805
5,oppo,20180804,20180815
6,vivo,20180815,20180821
7,vivo,20180902,20180912

# 上传到hdfs中
hdfs dfs -put sales.txt /data/

#创建hive表
create table t_sales(id int,brand string,startdate string,enddate string)
row format delimited
fields terminated by ',';

#加载数据
load data inpath '/data/sales.txt' into table t_sales;





select t.id, t.brand,
       --datediff(t.startdate, t.enddate),
       from_unixtime(unix_timestamp(t.startdate, 'yyyyMMdd')),
       from_unixtime(unix_timestamp(t.enddate, 'yyyyMMdd')),
       datediff(from_unixtime(unix_timestamp(t.enddate, 'yyyyMMdd')),from_unixtime(unix_timestamp(t.startdate, 'yyyyMMdd'))) + 1
  from t_sales t;
+-------+----------+----------------------+----------------------+------+--+
| t.id  | t.brand  |         _c2          |         _c3          | _c4  |
+-------+----------+----------------------+----------------------+------+--+
| 1     | nike     | 2018-09-01 00:00:00  | 2018-09-05 00:00:00  | 5    |
| 2     | nike     | 2018-09-03 00:00:00  | 2018-09-06 00:00:00  | 4    |
| 3     | nike     | 2018-09-09 00:00:00  | 2018-09-15 00:00:00  | 7    |
| 4     | oppo     | 2018-08-04 00:00:00  | 2018-08-05 00:00:00  | 2    |
| 5     | oppo     | 2018-08-04 00:00:00  | 2018-08-15 00:00:00  | 12   |
| 6     | vivo     | 2018-08-15 00:00:00  | 2018-08-21 00:00:00  | 7    |
| 7     | vivo     | 2018-09-02 00:00:00  | 2018-09-12 00:00:00  | 11   |
+-------+----------+----------------------+----------------------+------+--+



select v1.*,
       lag(enddate) over(partition by v1.brand order by startdate) prev_enddate
  from (select t.id,
               t.brand,
               from_unixtime(unix_timestamp(t.startdate, 'yyyyMMdd')) startdate,
               from_unixtime(unix_timestamp(t.enddate, 'yyyyMMdd')) enddate
          from t_sales t) v1;
+--------+-----------+----------------------+----------------------+----------------------+--+
| v1.id  | v1.brand  |     v1.startdate     |      v1.enddate      |     prev_enddate     |
+--------+-----------+----------------------+----------------------+----------------------+--+
| 1      | nike      | 2018-09-01 00:00:00  | 2018-09-05 00:00:00  | NULL                 |
| 2      | nike      | 2018-09-03 00:00:00  | 2018-09-06 00:00:00  | 2018-09-05 00:00:00  |
| 3      | nike      | 2018-09-09 00:00:00  | 2018-09-15 00:00:00  | 2018-09-06 00:00:00  |
| 5      | oppo      | 2018-08-04 00:00:00  | 2018-08-15 00:00:00  | NULL                 |
| 4      | oppo      | 2018-08-04 00:00:00  | 2018-08-05 00:00:00  | 2018-08-15 00:00:00  |
| 6      | vivo      | 2018-08-15 00:00:00  | 2018-08-21 00:00:00  | NULL                 |
| 7      | vivo      | 2018-09-02 00:00:00  | 2018-09-12 00:00:00  | 2018-08-21 00:00:00  |
+--------+-----------+----------------------+----------------------+----------------------+--+


select datediff('2018-09-05 00:00:00','2018-09-01 00:00:00');



select v2.*,
       (case
         when prev_enddate is null or startdate > prev_enddate then
          datediff(enddate, startdate) + 1
         else
          datediff(enddate, prev_enddate)
       end) days
  from (select v1.*,
               lag(enddate) over(partition by v1.brand order by enddate) prev_enddate
          from (select t.id,
                       t.brand,
                       from_unixtime(unix_timestamp(t.startdate, 'yyyyMMdd')) startdate,
                       from_unixtime(unix_timestamp(t.enddate, 'yyyyMMdd')) enddate
                  from t_sales t) v1) v2;
+--------+-----------+----------------------+----------------------+----------------------+-------+--+
| v2.id  | v2.brand  |     v2.startdate     |      v2.enddate      |   v2.prev_enddate    | days  |
+--------+-----------+----------------------+----------------------+----------------------+-------+--+
| 1      | nike      | 2018-09-01 00:00:00  | 2018-09-05 00:00:00  | NULL                 | 5     |
| 2      | nike      | 2018-09-03 00:00:00  | 2018-09-06 00:00:00  | 2018-09-05 00:00:00  | 1     |
| 3      | nike      | 2018-09-09 00:00:00  | 2018-09-15 00:00:00  | 2018-09-06 00:00:00  | 7     |
| 4      | oppo      | 2018-08-04 00:00:00  | 2018-08-05 00:00:00  | NULL                 | 2     |
| 5      | oppo      | 2018-08-04 00:00:00  | 2018-08-15 00:00:00  | 2018-08-05 00:00:00  | 10    |
| 6      | vivo      | 2018-08-15 00:00:00  | 2018-08-21 00:00:00  | NULL                 | 7     |
| 7      | vivo      | 2018-09-02 00:00:00  | 2018-09-12 00:00:00  | 2018-08-21 00:00:00  | 11    |
+--------+-----------+----------------------+----------------------+----------------------+-------+--+



with v3 as
 (select v2.*,
         (case
           when prev_enddate is null or startdate > prev_enddate then
            datediff(enddate, startdate) + 1
           else
            datediff(enddate, prev_enddate)
         end) days
    from (select v1.*,
                 lag(enddate) over(partition by v1.brand order by enddate) prev_enddate
            from (select t.brand,
                         from_unixtime(unix_timestamp(t.startdate, 'yyyyMMdd')) startdate,
                         from_unixtime(unix_timestamp(t.enddate, 'yyyyMMdd')) enddate
                    from t_sales t) v1) v2)
select brand, sum(days) all_days from v3 group by brand;

+--------+-----------+--+
| brand  | all_days  |
+--------+-----------+--+
| nike   | 13        |
| oppo   | 12        |
| vivo   | 18        |
+--------+-----------+--+

善若止水 2017-11-13
  • 打赏
  • 举报
回复
----测试表的表结构以及数据如下所示: hive> desc sales; OK id int produce_name string start_time date end_time date days int Time taken: 0.354 seconds, Fetched: 5 row(s) hive> select * from sales; OK 1 nike 2011-09-01 2011-09-05 5 2 nike 2011-09-03 2011-09-06 4 3 nike 2011-09-09 2011-09-15 7 4 oppo 2011-08-04 2011-08-05 2 5 oppo 2011-08-04 2011-08-15 12 6 vivo 2011-08-15 2011-08-21 7 7 vivo 2011-09-02 2011-09-12 11 Time taken: 0.223 seconds, Fetched: 7 row(s) ---第一步:求出每个开始时间的前一次促销的开始时间和结束时间 select id,produce_name,start_time,end_time, lag(start_time) over(partition by produce_name order by id) before_start_time, lag(end_time) over(partition by produce_name order by id) before_end_time from sales; ---第一步:执行结果(第一步) 1 nike 2011-09-01 2011-09-05 NULL NULL 2 nike 2011-09-03 2011-09-06 2011-09-01 2011-09-05 3 nike 2011-09-09 2011-09-15 2011-09-03 2011-09-06 4 oppo 2011-08-04 2011-08-05 NULL NULL 5 oppo 2011-08-04 2011-08-15 2011-08-04 2011-08-05 6 vivo 2011-08-15 2011-08-21 NULL NULL 7 vivo 2011-09-02 2011-09-12 2011-08-15 2011-08-21 ---第二步 依据前一次促销的开始时间和结束时间,开始时间统一为最早的,为后面分组做准备 select produce_name,start_time,max(end_time) end_time from (select id,produce_name,case when start_time>=before_start_time and start_time<=before_end_time then before_start_time else start_time end as start_time, end_time from (select id,produce_name,start_time,end_time, lag(start_time) over(partition by produce_name order by id) before_start_time, lag(end_time) over(partition by produce_name order by id) before_end_time from sales) t) d group by produce_name,start_time; --第二步 执行结果(第二步) nike 2011-09-01 2011-09-06 nike 2011-09-09 2011-09-15 oppo 2011-08-04 2011-08-15 vivo 2011-08-15 2011-08-21 vivo 2011-09-02 2011-09-12 ---第三步 依据合并后的开始时间,算出每个时间段内的促销天数之和 select produce_name,start_time,end_time,datediff(end_time,start_time)+1 days from (select produce_name,start_time,max(end_time) end_time from (select id,produce_name,case when start_time>=before_start_time and start_time<=before_end_time then before_start_time else start_time end as start_time, end_time from (select id,produce_name,start_time,end_time, lag(start_time) over(partition by produce_name order by id) before_start_time, lag(end_time) over(partition by produce_name order by id) before_end_time from sales) t) d group by produce_name,start_time) e; --执行结果(第三步) nike 2011-09-01 2011-09-06 6 nike 2011-09-09 2011-09-15 7 oppo 2011-08-04 2011-08-15 12 vivo 2011-08-15 2011-08-21 7 vivo 2011-09-02 2011-09-12 11 --第四步:依据产品名称,求出最终的促销天数之和 select produce_name,sum(days) from (select produce_name,start_time,end_time,datediff(end_time,start_time)+1 days from (select produce_name,start_time,max(end_time) end_time from (select id,produce_name,case when start_time>=before_start_time and start_time<=before_end_time then before_start_time else start_time end as start_time, end_time from (select id,produce_name,start_time,end_time, lag(start_time) over(partition by produce_name order by id) before_start_time, lag(end_time) over(partition by produce_name order by id) before_end_time from sales) t) d group by produce_name,start_time) e) f group by produce_name; ---执行结果(第四步) nike 13 oppo 12 vivo 18
zgycsmb 2017-10-30
  • 打赏
  • 举报
回复
没明白你的意思,

20,808

社区成员

发帖
与我相关
我的任务
社区描述
Hadoop生态大数据交流社区,致力于有Hadoop,hive,Spark,Hbase,Flink,ClickHouse,Kafka,数据仓库,大数据集群运维技术分享和交流等。致力于收集优质的博客
社区管理员
  • 分布式计算/Hadoop社区
  • 涤生大数据
加入社区
  • 近7日
  • 近30日
  • 至今
社区公告
暂无公告

试试用AI创作助手写篇文章吧