数据表不连续数据

weixin_40101242 2017-11-28 02:35:16

现在数据表里有这样的数据
日期小时分钟
2016-01-01 1 0
2016-01-01 1 5
2016-01-01 1 10
. . .
. . .
2016-12-31 24 55
(数据是按日期、小时、分钟连续的)但会存在这样的情况，举例：

也就是2016-08-03 16时 0分数据的前后是缺失的，那么2016-08-03 16时 0分的数据就要被取出存储(称这类数据为A)，
同理，下面许多数据也都是A类数据，A类数据的个数为1时存入a表，在1和3之间存入表b，大于3时存入c表。
请问各位大神，这样的判断并存储的语句怎么谢？小弟感激不尽。

...全文

753 40 打赏收藏转发到动态举报

写回复

用AI写文章

40 条回复

切换为时间正序

请发表友善的回复…

发表回复

weixin_40101242 2017-11-30

打赏
举报

引用 39 楼 superwfei 的回复:

;with tt as (
    select *
	,datediff(minute,'2000-1-1',dateadd(minute,分钟,dateadd(hour,小时,convert(datetime,观测日期))))/5 as tn
    from #t  -- 实际使用吧#t换成自己的表名
),ttt as (
	select *,row_number() over(order by tn) as rid from tt a where not exists(select top 1 1 from tt where tn=a.tn-1)
),tttt as (
	select * from ttt a
	cross apply(
		select count(0) as 最大观测次数 from tt where tn>=a.tn and tn<isnull((select tn from ttt where rid=a.rid+1),100000000)
	) b
)
select * from tttt

换了个写法，这个效率应该高一些，并且不会出现递归超出限制问题

谢谢帮助了，还是没解决，我结帖换个板块问问吧。

文盲老顾 2017-11-29

打赏
举报

;with tt as (
    select *
	,datediff(minute,'2000-1-1',dateadd(minute,分钟,dateadd(hour,小时,convert(datetime,观测日期))))/5 as tn
    from #t  -- 实际使用吧#t换成自己的表名
),ttt as (
	select *,row_number() over(order by tn) as rid from tt a where not exists(select top 1 1 from tt where tn=a.tn-1)
),tttt as (
	select * from ttt a
	cross apply(
		select count(0) as 最大观测次数 from tt where tn>=a.tn and tn<isnull((select tn from ttt where rid=a.rid+1),100000000)
	) b
)
select * from tttt

换了个写法，这个效率应该高一些，并且不会出现递归超出限制问题

文盲老顾 2017-11-29

打赏
举报

用完100次递归了？看看数据是否有重复的，也就是日期、小时、分钟这三项，如果有重复的，追加一个其他条件用以区分这些如果不是因为重复造成的，看看是否有连续超过100次的观测，如果有这种数据，那么递归的写法要调整

weixin_40101242 2017-11-29

打赏
举报

求助

weixin_40101242 2017-11-29

打赏
举报

引用 34 楼 RINK_1 的回复:


--测试数据
if not object_id(N'Tempdb..#T') is null
    drop table #T
Go
Create table #T([日期] Date,[小时] int,[分钟] int)
Insert #T
    select '2016-8-3' ,15,45
    union all
    select '2016-8-3',15,50
    union all
    select '2016-8-3',15,55
    union all
    select '2016-8-3',16,0
    union all
    select '2016-8-19',12,25
    union all
    select '2016-8-19',12,30
    union all
    select '2016-10-1',1,0
    union all
    select '2016-10-1',1,5
    union all
    select '2016-10-1',1,10
    union all
    select '2016-10-9',7,55
    union all
    select '2016-10-9',8,5
    union all
    select '2016-10-9',8,10
    go
    
with cte_1
as
(select *,
dateadd(minute,[分钟],dateadd(HOUR,[小时],cast([日期] as datetime))) as [time]
from #T),

cte_2
as
(select ROW_NUMBER() over (order by (select null)) as seq from master..spt_values A,master..spt_values B
 where A.TYPE='P' and B.type='P' and A.number<1000 and B.number<1000)

select *,
count(1) over (partition by rn_1-rn_2) as 最大观察次数,
case when count(1) over (partition by rn_1-rn_2)=1 then 'A'
     when count(1) over (partition by rn_1-rn_2)>3 then 'C' 
     else 'B' 
     end as type 
from
(select *,
case when isnull(B.time,'')='' then '0' else '1' end as symbol,
ROW_NUMBER() over (order by schedule_time) as rn_1,
ROW_NUMBER() over (partition by case when isnull(B.time,'')='' then '0' else '1' end order by schedule_time) as rn_2
from 
(select DATEADD(MINUTE,(seq-1)*5,min_time) as schedule_time
from (select MIN(time) as min_time,MAX(time) as max_time from cte_1) as A
join cte_2 as B on (DATEDIFF(MINUTE,min_time,max_time)/5)+1>=B.seq) as A
left join cte_1 as B ON A.schedule_time=B.time) as A
where symbol=1
order by schedule_time

大神语句的执行速度特别快，但不是我想要的啊，我其实就是要取出前后缺失数据中间夹着的那些数据，再判断中间的数据有几条，1条是a表，2-3条是b表，大于3条是超表。

weixin_40101242 2017-11-29

打赏
举报

引用 33 楼 superwfei 的回复:

-- 测试数据

create table #t (id int identity,观测日期 varchar(10),小时 int,分钟 int)

 

insert into #t(观测日期,小时,分钟)

select '2016-8-3' as 观测日期,15 as 小时,45 as 分钟

union all

select '2016-8-3',15,50

union all

select '2016-8-3',15,55

union all

select '2016-8-3',16,0

union all

select '2016-8-19',12,25

union all

select '2016-8-19',12,30

union all

select '2016-10-1',1,0

union all

select '2016-10-1',1,5

union all

select '2016-10-1',1,10

union all

select '2016-10-9',7,55

union all

select '2016-10-9',8,5

union all

select '2016-10-9',8,10

-- 测试数据填充完毕

 

;with tt as (

    select *

	,dateadd(minute,分钟,dateadd(hour,小时,convert(datetime,观测日期))) as tm -- 对实际数据追加一个 datetime 类型的字段，用来描述每次观测的时间

	from #t  -- 实际使用吧#t换成自己的表名

),ttt as (

    select *,tm as 观测开始时间,1 as 第几次观测 -- 将当前时间追加为观测开始时间，并记录为第一次观测

	from tt a -- 从追加完观测时间的cte表中获取数据

	where not exists(select top 1 1 from tt where datediff(minute,tm,a.tm)=5) -- 获取条件为不存在比当前观测时间少5分钟的记录，也就是每次观测的第一次观测记录

    union all -- 对cte表 ttt 进行递归查询

    select a.*,b.观测开始时间,b.第几次观测+1 -- 将ttt表中的观测开始时间作为当前记录的观测开始时间，将ttt表中的观测次数加1作为当前观测记录的观测次数

	from tt a,ttt b -- 从tt表和ttt表中获取数据

	where datediff(minute,b.tm,a.tm)=5 -- 获取条件为在ttt表中存在比当前观测时间少5分钟的数据

)

select b.*,最大观测次数 

from (

	select 观测开始时间,max(第几次观测) as 最大观测次数

	from ttt a 

	group by 观测开始时间	-- 根据观测开始时间分组，取每个观测开始时间对应的最大观测次数

) a

cross apply (

	select * from ttt where 观测开始时间=a.观测开始时间 -- 根据观测开始时间补全数据

) b

order by tm

 

drop table #t   -- 删除测试数据临时表

加了注释了。。。。

或许是数据量太大了，执行了5分钟也不行，最后终止了。

RINK_1 2017-11-28

打赏
举报


--测试数据
if not object_id(N'Tempdb..#T') is null
    drop table #T
Go
Create table #T([日期] Date,[小时] int,[分钟] int)
Insert #T
    select '2016-8-3' ,15,45
    union all
    select '2016-8-3',15,50
    union all
    select '2016-8-3',15,55
    union all
    select '2016-8-3',16,0
    union all
    select '2016-8-19',12,25
    union all
    select '2016-8-19',12,30
    union all
    select '2016-10-1',1,0
    union all
    select '2016-10-1',1,5
    union all
    select '2016-10-1',1,10
    union all
    select '2016-10-9',7,55
    union all
    select '2016-10-9',8,5
    union all
    select '2016-10-9',8,10
    go
    
with cte_1
as
(select *,
dateadd(minute,[分钟],dateadd(HOUR,[小时],cast([日期] as datetime))) as [time]
from #T),

cte_2
as
(select ROW_NUMBER() over (order by (select null)) as seq from master..spt_values A,master..spt_values B
 where A.TYPE='P' and B.type='P' and A.number<1000 and B.number<1000)

select *,
count(1) over (partition by rn_1-rn_2) as 最大观察次数,
case when count(1) over (partition by rn_1-rn_2)=1 then 'A'
     when count(1) over (partition by rn_1-rn_2)>3 then 'C' 
     else 'B' 
     end as type 
from
(select *,
case when isnull(B.time,'')='' then '0' else '1' end as symbol,
ROW_NUMBER() over (order by schedule_time) as rn_1,
ROW_NUMBER() over (partition by case when isnull(B.time,'')='' then '0' else '1' end order by schedule_time) as rn_2
from 
(select DATEADD(MINUTE,(seq-1)*5,min_time) as schedule_time
from (select MIN(time) as min_time,MAX(time) as max_time from cte_1) as A
join cte_2 as B on (DATEDIFF(MINUTE,min_time,max_time)/5)+1>=B.seq) as A
left join cte_1 as B ON A.schedule_time=B.time) as A
where symbol=1
order by schedule_time

文盲老顾 2017-11-28

打赏
举报

-- 测试数据
create table #t (id int identity,观测日期 varchar(10),小时 int,分钟 int)
 
insert into #t(观测日期,小时,分钟)
select '2016-8-3' as 观测日期,15 as 小时,45 as 分钟
union all
select '2016-8-3',15,50
union all
select '2016-8-3',15,55
union all
select '2016-8-3',16,0
union all
select '2016-8-19',12,25
union all
select '2016-8-19',12,30
union all
select '2016-10-1',1,0
union all
select '2016-10-1',1,5
union all
select '2016-10-1',1,10
union all
select '2016-10-9',7,55
union all
select '2016-10-9',8,5
union all
select '2016-10-9',8,10
-- 测试数据填充完毕
 
;with tt as (
    select *
	,dateadd(minute,分钟,dateadd(hour,小时,convert(datetime,观测日期))) as tm -- 对实际数据追加一个 datetime 类型的字段，用来描述每次观测的时间
	from #t  -- 实际使用吧#t换成自己的表名
),ttt as (
    select *,tm as 观测开始时间,1 as 第几次观测 -- 将当前时间追加为观测开始时间，并记录为第一次观测
	from tt a -- 从追加完观测时间的cte表中获取数据
	where not exists(select top 1 1 from tt where datediff(minute,tm,a.tm)=5) -- 获取条件为不存在比当前观测时间少5分钟的记录，也就是每次观测的第一次观测记录
    union all -- 对cte表 ttt 进行递归查询
    select a.*,b.观测开始时间,b.第几次观测+1 -- 将ttt表中的观测开始时间作为当前记录的观测开始时间，将ttt表中的观测次数加1作为当前观测记录的观测次数
	from tt a,ttt b -- 从tt表和ttt表中获取数据
	where datediff(minute,b.tm,a.tm)=5 -- 获取条件为在ttt表中存在比当前观测时间少5分钟的数据
)
select b.*,最大观测次数 
from (
	select 观测开始时间,max(第几次观测) as 最大观测次数
	from ttt a 
	group by 观测开始时间	-- 根据观测开始时间分组，取每个观测开始时间对应的最大观测次数
) a
cross apply (
	select * from ttt where 观测开始时间=a.观测开始时间 -- 根据观测开始时间补全数据
) b
order by tm
 
drop table #t   -- 删除测试数据临时表

加了注释了。。。。

文盲老顾 2017-11-28

打赏
举报

比如，你一个指令 select * from 观测记录可以列出6万数据，但你粘贴出来的数据没有这么多我们只能根据你粘贴出来的数据创建临时表，并把你的数据填充进去 insert into #t(a,b,c) select 1,2,3 union all select 2,4,6 和 insert into #t(a,b,c) values(1,2,3),(2,4,6) 的效果是一致的，写法不同而已

weixin_40101242 2017-11-28

打赏
举报

引用 29楼文盲老顾的回复:

with t as (... union all ...) 这个写法仅仅是生成一个无表名的cte临时表，这部分可以忽略就和我 27楼的写法那样，仅仅是因为别人没有你的实际表，只能靠建立临时表、表变量，或者cte表这样的方式来存放测试数据然后才是根据测试数据写的实际指令，你在实际应用中应该忽略测试数据部分，把这个换成你自己的实际内容

好，我再试试。

文盲老顾 2017-11-28

打赏
举报

例如你粘贴了一部分数据，没有提到表名那么别人测试的时候就会自己随意起个临时名代替例如 create table #t(...)，用#t来代替你的实际表参与运算或者像 with t as () 这样的，用 t 来代替你的实际表参与运算也可以declare @t table(...)，用@t来代替实际表这些测试数据基本上都可以忽略，直接换成你自己的实际数据即可

文盲老顾 2017-11-28

打赏
举报

with t as (... union all ...) 这个写法仅仅是生成一个无表名的cte临时表，这部分可以忽略就和我 27楼的写法那样，仅仅是因为别人没有你的实际表，只能靠建立临时表、表变量，或者cte表这样的方式来存放测试数据然后才是根据测试数据写的实际指令，你在实际应用中应该忽略测试数据部分，把这个换成你自己的实际内容

weixin_40101242 2017-11-28

打赏
举报

引用 26 楼 superwfei 的回复:

[quote=引用 24 楼 weixin_40101242 的回复:] [quote=引用 22 楼 superwfei 的回复:] ..... 你的表名叫什么？直接把
引用
select '2016-8-3' as 观测日期,15 as 小时,45 as 分钟 union all select '2016-8-3',15,50 union all select '2016-8-3',15,55 union all select '2016-8-3',16,0 union all select '2016-8-19',12,25 union all select '2016-8-19',12,30 union all select '2016-10-1',1,0 union all select '2016-10-1',1,5 union all select '2016-10-1',1,10 union all select '2016-10-9',7,55 union all select '2016-10-9',8,5 union all select '2016-10-9',8,10
这个部分换成你自己的select 指令

我一个表里六万多条数据，都需要比对一下[/quote] 这个是你表里有多少数据没关系。。。。把别人测试的指令正确的替换成实际指令比较重要[/quote] 我的意思是所有数据都需要union all 吗，那样工作量太大了。原谅我是个小白，大神。

文盲老顾 2017-11-28

打赏
举报

-- 测试数据
create table #t (id int identity,观测日期 varchar(10),小时 int,分钟 int)

insert into #t(观测日期,小时,分钟)
select '2016-8-3' as 观测日期,15 as 小时,45 as 分钟
union all
select '2016-8-3',15,50
union all
select '2016-8-3',15,55
union all
select '2016-8-3',16,0
union all
select '2016-8-19',12,25
union all
select '2016-8-19',12,30
union all
select '2016-10-1',1,0
union all
select '2016-10-1',1,5
union all
select '2016-10-1',1,10
union all
select '2016-10-9',7,55
union all
select '2016-10-9',8,5
union all
select '2016-10-9',8,10
-- 测试数据填充完毕

;with tt as (
    select *,dateadd(minute,分钟,dateadd(hour,小时,convert(datetime,观测日期))) as tm from #t  -- 实际使用吧#t换成自己的表名
),ttt as (
    --select * from tt a 
    --cross apply (select count(0) as 观测次数 from tt where 观测日期=a.观测日期) b
    --where not exists(select top 1 1 from tt where 观测日期=a.观测日期 and tm<a.tm)
    select *,tm as 观测开始时间,1 as 第几次观测 from tt a where not exists(select top 1 1 from tt where datediff(minute,tm,a.tm)=5)
    union all
    select a.*,b.观测开始时间,b.第几次观测+1 from tt a,ttt b where datediff(minute,b.tm,a.tm)=5
)
select b.*,最大观测次数 from (select 观测开始时间,max(第几次观测) as 最大观测次数 from ttt a group by 观测开始时间) a
cross apply (select * from ttt where 观测开始时间=a.观测开始时间) b
order by tm

drop table #t   -- 删除测试数据临时表

这样能看明白了么？

文盲老顾 2017-11-28

打赏
举报

引用 24 楼 weixin_40101242 的回复:

[quote=引用 22 楼 superwfei 的回复:] ..... 你的表名叫什么？直接把
引用
select '2016-8-3' as 观测日期,15 as 小时,45 as 分钟 union all select '2016-8-3',15,50 union all select '2016-8-3',15,55 union all select '2016-8-3',16,0 union all select '2016-8-19',12,25 union all select '2016-8-19',12,30 union all select '2016-10-1',1,0 union all select '2016-10-1',1,5 union all select '2016-10-1',1,10 union all select '2016-10-9',7,55 union all select '2016-10-9',8,5 union all select '2016-10-9',8,10
这个部分换成你自己的select 指令

我一个表里六万多条数据，都需要比对一下[/quote] 这个是你表里有多少数据没关系。。。。把别人测试的指令正确的替换成实际指令比较重要

文盲老顾 2017-11-28

打赏
举报


with t as (
	-- 测试数据开始
    select '2016-8-3' as 观测日期,15 as 小时,45 as 分钟
    union all
    select '2016-8-3',15,50
    union all
    select '2016-8-3',15,55
    union all
    select '2016-8-3',16,0
    union all
    select '2016-8-19',12,25
    union all
    select '2016-8-19',12,30
    union all
    select '2016-10-1',1,0
    union all
    select '2016-10-1',1,5
    union all
    select '2016-10-1',1,10
    union all
    select '2016-10-9',7,55
    union all
    select '2016-10-9',8,5
    union all
    select '2016-10-9',8,10
	-- 测试数据结束
	-- 实际应用将测试数据删除，换成 “select * from yourTable” 即可，yourTable为表名
),tt as (
    select *,dateadd(minute,分钟,dateadd(hour,小时,convert(datetime,观测日期))) as tm from t
),ttt as (
    --select * from tt a 
    --cross apply (select count(0) as 观测次数 from tt where 观测日期=a.观测日期) b
    --where not exists(select top 1 1 from tt where 观测日期=a.观测日期 and tm<a.tm)
    select *,tm as 观测开始时间,1 as 第几次观测 from tt a where not exists(select top 1 1 from tt where datediff(minute,tm,a.tm)=5)
    union all
    select a.*,b.观测开始时间,b.第几次观测+1 from tt a,ttt b where datediff(minute,b.tm,a.tm)=5
)
select b.*,最大观测次数 from (select 观测开始时间,max(第几次观测) as 最大观测次数 from ttt a group by 观测开始时间) a
cross apply (select * from ttt where 观测开始时间=a.观测开始时间) b
order by tm

weixin_40101242 2017-11-28

打赏
举报

引用 22 楼 superwfei 的回复:

..... 你的表名叫什么？直接把
引用
select '2016-8-3' as 观测日期,15 as 小时,45 as 分钟 union all select '2016-8-3',15,50 union all select '2016-8-3',15,55 union all select '2016-8-3',16,0 union all select '2016-8-19',12,25 union all select '2016-8-19',12,30 union all select '2016-10-1',1,0 union all select '2016-10-1',1,5 union all select '2016-10-1',1,10 union all select '2016-10-9',7,55 union all select '2016-10-9',8,5 union all select '2016-10-9',8,10
这个部分换成你自己的select 指令

我一个表里六万多条数据，都需要比对一下

文盲老顾 2017-11-28

打赏
举报

..... 你的表名叫什么？直接把

引用

select '2016-8-3' as 观测日期,15 as 小时,45 as 分钟 union all select '2016-8-3',15,50 union all select '2016-8-3',15,55 union all select '2016-8-3',16,0 union all select '2016-8-19',12,25 union all select '2016-8-19',12,30 union all select '2016-10-1',1,0 union all select '2016-10-1',1,5 union all select '2016-10-1',1,10 union all select '2016-10-9',7,55 union all select '2016-10-9',8,5 union all select '2016-10-9',8,10

这个部分换成你自己的select 指令

weixin_40101242 2017-11-28