20,808
社区成员
发帖
与我相关
我的任务
分享
#准备测试数据
cat /home/hadoop/hemers.txt
2014050201 新街口
2014050202 新街口
2014050203 新街口
2014050204 鼓楼
2014050205 岔路口
2014050206 岔路口
2014050207 新街口
2014050208 新街口
#创建测试数据表
CREATE EXTERNAL TABLE IF NOT EXISTS testdb.hemers(
date STRING,
ts STRING
)ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';
#导入数据
LOAD DATA LOCAL INPATH '/home/hadoop/hemers.txt' OVERWRITE INTO TABLE testdb.hemers;
#执行SQL
use testdb;
select t1.date,t1.ts from hemers t1
inner join(
select dt,count(1)ct1,count(distinct ts)ct2 from (
select unix_timestamp(date,'yyyyMMddhh') dt,date,ts from hemers a
union all
select unix_timestamp(date,'yyyyMMddhh')+3600 dt,date,ts from hemers b
)c group by dt
having count(1)=1 or ct2=2
)t2 on unix_timestamp(t1.date,'yyyyMMddhh') =t2.dt;
#输出
2014050201 新街口
2014050204 鼓楼
2014050205 岔路口
2014050207 新街口