筛选数据集

dinoalex 2009-06-18 11:09:29
情况一:
ColA (varchar) ColB (Datetime)
A 2009-06-15 10:58:23
B 2009-06-15 10:58:31
C 2009-06-15 10:58:36
A 2009-06-15 10:58:39
A 2009-06-15 10:59:23
B 2009-06-15 11:00:22
A 2009-06-15 11:00:25

所要结果: ColA相同的三分钟内只要一个 但ColA是A的话,要两个
A 2009-06-15 10:58:23
B 2009-06-15 10:58:31
C 2009-06-15 10:58:36
A 2009-06-15 10:58:39


情况二:
A 2009-06-15 23:58:55
B 2009-06-15 23:59:43
A 2009-06-16 00:00:12
A 2009-06-16 00:00:23

所要结果: 三分钟内只要一个,只是这里的是跨天,如果情况一的可以处理这里的话,可以不理情况二
B 2009-06-15 23:59:43
A 2009-06-16 00:00:23


搞了好几天,不能完全达到效果,请SQL的高手写个mysql存储过程,参数是分钟,即上面的是3分钟,处理这个表A,存到表B,(表A跟表B的结构是一样的)
...全文
82 18 打赏 收藏 转发到动态 举报
写回复
用AI写文章
18 条回复
切换为时间正序
请发表友善的回复…
发表回复
ACMAIN_CHM 2009-06-19
  • 打赏
  • 举报
回复

[Quote]怎么我的执行的是下面那样呢

| C | 2009-06-15 10:53:36 |
| B | 2009-06-15 10:55:31 |
| C | 2009-06-15 10:57:36 |
| A | 2009-06-15 10:58:23 |
.....
[/Quote]

楼主啊,讲了很多遍了,请提供你的测试用例,否则根本不知道你的问题!
dinoalex 2009-06-19
  • 打赏
  • 举报
回复
== 我再看看
dinoalex 2009-06-19
  • 打赏
  • 举报
回复
怎么我的执行的是下面那样呢

| C | 2009-06-15 10:53:36 |
| B | 2009-06-15 10:55:31 |
| C | 2009-06-15 10:57:36 |
| A | 2009-06-15 10:58:23 |
| A | 2009-06-15 10:58:39 |
| B | 2009-06-15 10:59:31 |
| B | 2009-06-15 11:03:31 |
| A | 2009-06-15 11:04:25 |
| A | 2009-06-15 11:05:25 |
ACMAIN_CHM 2009-06-19
  • 打赏
  • 举报
回复

按照你六楼的测试了一下,修正了一下程序。

mysql> delimiter //
mysql>
mysql> CREATE PROCEDURE simpleproc (itvl INT)
-> BEGIN
-> DECLARE a,a1 CHAR default '';
-> DECLARE b,b1 Datetime default '1900-01-01 00:00:00';
-> DECLARE c INT DEFAULT 0;
->
-> DECLARE done INT DEFAULT 0;
-> DECLARE cur1 CURSOR FOR SELECT ColA,ColB FROM tx order by ColA,ColB;
-> DECLARE CONTINUE HANDLER FOR NOT FOUND SET done = 1;
->
->
-> OPEN cur1;
->
-> delete from ty;
->
-> REPEAT
-> FETCH cur1 INTO a, b;
-> IF a != a1 or TIMEDIFF(b,b1)>itvl*100 THEN
-> insert into ty values (a,b);
-> set a1 = a;
-> set b1 = b;
-> set c = 0;
-> ELSE
-> if a='A' and c=0 then
-> insert into ty values (a,b);
-> set c = 1;
-> end if;
-> END IF;
->
-> UNTIL done END REPEAT;
->
-> END;
-> //
Query OK, 0 rows affected (0.00 sec)

mysql>
mysql> delimiter ;
mysql> select * from tx;
+------+---------------------+
| ColA | ColB |
+------+---------------------+
| B | 2009-06-15 10:58:23 |
| B | 2009-06-15 10:59:23 |
| B | 2009-06-15 11:00:23 |
| B | 2009-06-15 11:01:24 |
| B | 2009-06-15 11:02:23 |
| B | 2009-06-15 11:03:23 |
| B | 2009-06-15 11:04:25 |
+------+---------------------+
7 rows in set (0.00 sec)

mysql> call simpleproc(3);
Query OK, 1 row affected (0.14 sec)


mysql> select * from ty;
+------+---------------------+
| ColA | ColB |
+------+---------------------+
| B | 2009-06-15 10:58:23 |
| B | 2009-06-15 11:01:24 |
| B | 2009-06-15 11:04:25 |
+------+---------------------+
3 rows in set (0.00 sec)

mysql>
ACMAIN_CHM 2009-06-19
  • 打赏
  • 举报
回复

[Quote]是A的话,就要两条,如三分钟内有多条,就要三分钟内的最早一条和最后一条[/Quote]

建议按三楼的测试数据给出你的答案。
dinoalex 2009-06-19
  • 打赏
  • 举报
回复
三分钟是参数

如参数是三分的话. 下面是B,不是特别处理的A

B 2009-06-15 10:58:23
B 2009-06-15 10:59:23
B 2009-06-15 11:00:23
B 2009-06-15 11:01:24
B 2009-06-15 11:02:23
B 2009-06-15 11:03:23
B 2009-06-15 11:04:25

结果应为

B 2009-06-15 10:58:23
B 2009-06-15 11:01:24
B 2009-06-15 11:04:25

是A的话,就要两条,如三分钟内有多条,就要三分钟内的最早一条和最后一条
wwwwb 2009-06-19
  • 打赏
  • 举报
回复
SELECT A1.cola,A1.colb FROM (
select A.*,timediff(a.colb,b.colb) from tta2 a left join tta2 b
on a.colb + INTERVAL 3 minute =b.colb
and a.cola=b.cola WHERE B.cola IS NULL) A1 LEFT JOIN
(
select A.*,timediff(a.colb,b.colb) from tta2 a left join tta2 b
on a.colb + INTERVAL 3 minute =b.colb
and a.cola=b.cola WHERE B.cola IS NULL) A2

ON A1.cola=A2.cola AND A1.COLB>=A2.COLB GROUP BY A1.cola,A1.colb
HAVING(COUNT(A2.COLA))<=IF(A1.COLA='A',2,1)
v轻扬v 2009-06-19
  • 打赏
  • 举报
回复
忘了要求逢A取2,修改如下:

create database testdb;
use testdb;

create table testtable (cola varchar(2),colb datetime);

create index a on testtable(cola,colb);

insert into testtable values
('A','2009-06-15 10:58:23'),
('B','2009-06-15 10:58:31'),
('C','2009-06-15 10:53:36'),
('C','2009-06-15 10:54:36'),
('C','2009-06-15 10:55:36'),
('C','2009-06-15 10:56:36'),
('C','2009-06-15 10:57:36'),
('C','2009-06-15 10:58:36'),
('C','2009-06-15 10:59:36'),
('A','2009-06-15 10:58:39'),
('A','2009-06-15 10:59:23'),
('B','2009-06-15 11:00:22'),
('B','2009-06-15 10:55:31'),
('B','2009-06-15 10:59:31'),
('B','2009-06-15 11:03:31'),
('B','2009-06-15 11:04:31'),
('A','2009-06-15 11:04:25'),
('A','2009-06-15 11:05:25'),
('A','2009-06-15 11:06:25');

select * from testtable;#显示一下样本数据
set @t=DATE('1900-01-01 00:00:00');#设置一个比表中任何一个时间都小的日期
set @a="";
set @Anum=0;
#以下是记录合并查询语句
select cola,colb,(@Anum:=if(DATE_SUB(colb,INTERVAL 3 MINUTE)<=@t && cola='A',@Anum+1,0)) as
Anum,(@isa:=if(cola='A' && @Anum<2,true,false)) as isa,(@t:=if(DATE_SUB(colb,INTERVAL 3
MINUTE)>=@t || cola!=@a || @isa,colb,@t)) as mergetime,(@a:=cola) from testtable group by cola,mergetime;

drop table testtable;
drop database testdb;
ACMAIN_CHM 2009-06-19
  • 打赏
  • 举报
回复

楼主的情况二,没看出来什么意思,为什么第一个 A 2009-06-15 23:58:55 为省掉了,基本什么逻辑原理?

另外如一楼所说,你的三分钟的定义到底是什么?特别是当A取二个的时间,比如 t1,t2 两个时间,那下个的三分钟间隔是基于t1还是t2 ? 提供测试用例也是一种学问,需要仔细。否则按照二楼的做法,已经满足你的要求,你应该按合同付款了。

请按下面的数据给出你的正确结果。

A,2009-06-15 10:21:13
A,2009-06-15 10:22:13
A,2009-06-15 10:23:13
A,2009-06-15 10:24:13
A,2009-06-15 10:25:13
A,2009-06-15 10:26:13
A,2009-06-15 10:27:13
A,2009-06-15 10:30:13
A,2009-06-15 10:31:13
A,2009-06-15 10:32:13
A,2009-06-15 10:33:13
A,2009-06-15 10:34:13
A,2009-06-15 10:35:13
A,2009-06-15 10:42:13
A,2009-06-15 10:43:13
A,2009-06-15 10:44:13
A,2009-06-15 10:45:13
A,2009-06-15 10:46:13
A,2009-06-15 10:47:13
A,2009-06-15 10:48:13
A,2009-06-15 23:54:13
A,2009-06-15 23:55:13
A,2009-06-15 23:56:13
A,2009-06-15 23:57:13
A,2009-06-15 23:58:13
A,2009-06-15 23:59:13
A,2009-06-16 24:00:13
A,2009-06-16 24:01:13
A,2009-06-16 24:02:13
A,2009-06-16 24:03:13

ACMAIN_CHM 2009-06-19
  • 打赏
  • 举报
回复

mysql> select * from tx;
+------+---------------------+
| ColA | ColB |
+------+---------------------+
| A | 2009-06-15 10:58:23 |
| B | 2009-06-15 10:58:31 |
| C | 2009-06-15 10:58:36 |
| A | 2009-06-15 10:58:39 |
| A | 2009-06-15 10:59:23 |
| B | 2009-06-15 11:00:22 |
| A | 2009-06-15 11:00:25 |
+------+---------------------+
7 rows in set (0.00 sec)

mysql>
mysql> delimiter //
mysql>
mysql> CREATE PROCEDURE simpleproc (itvl INT)
-> BEGIN
-> DECLARE a,a1 CHAR default '';
-> DECLARE b,b1 Datetime default '1900-01-01 00:00:00';
-> DECLARE c INT DEFAULT 0;
->
-> DECLARE done INT DEFAULT 0;
-> DECLARE cur1 CURSOR FOR SELECT ColA,ColB FROM tx order by ColA,ColB;
-> DECLARE CONTINUE HANDLER FOR NOT FOUND SET done = 1;
->
->
-> OPEN cur1;
->
-> delete from ty;
->
-> REPEAT
-> FETCH cur1 INTO a, b;
-> IF a != a1 or MINUTE(TIMEDIFF(b,b1))>itvl THEN
-> insert into ty values (a,b);
-> set a1 = a;
-> set b1 = b;
-> set c = 0;
-> ELSE
-> if a='A' and c=0 then
-> insert into ty values (a,b);
-> set c = 1;
-> end if;
-> END IF;
->
-> UNTIL done END REPEAT;
->
-> END;
-> //
Query OK, 0 rows affected (0.01 sec)

mysql>
mysql> delimiter ;
mysql>
mysql> CALL simpleproc(3);
Query OK, 1 row affected (0.13 sec)

mysql> select * from ty;
+------+---------------------+
| ColA | ColB |
+------+---------------------+
| A | 2009-06-15 10:58:23 |
| A | 2009-06-15 10:58:39 |
| B | 2009-06-15 10:58:31 |
| C | 2009-06-15 10:58:36 |
+------+---------------------+
4 rows in set (0.00 sec)

mysql>


v轻扬v 2009-06-19
  • 打赏
  • 举报
回复
三分钟内是什么意思啊?
1、与当前时间相比的三分钟内?
2、每间隔3分钟内,即0-3,3-6,6-9,……,57-60。
3、与colB的值动态偏移的3分钟内。这种情况估计是很费力气。估计楼内要的也是这个吧。

以下是针对第三种情况的test.sql文件内容,已经过测试
————————————————————

create database testdb;
use testdb;

create table testtable (cola varchar(2),colb datetime);

create index a on testtable(cola,colb);

insert into testtable values
('A','2009-06-15 10:58:23'),
('B','2009-06-15 10:58:31'),
('C','2009-06-15 10:53:36'),
('C','2009-06-15 10:54:36'),
('C','2009-06-15 10:55:36'),
('C','2009-06-15 10:56:36'),
('C','2009-06-15 10:57:36'),
('C','2009-06-15 10:58:36'),
('C','2009-06-15 10:59:36'),
('A','2009-06-15 10:58:39'),
('A','2009-06-15 10:59:23'),
('B','2009-06-15 11:00:22'),
('B','2009-06-15 10:55:31'),
('B','2009-06-15 10:59:31'),
('B','2009-06-15 11:03:31'),
('B','2009-06-15 11:04:31'),
('A','2009-06-15 11:04:25'),
('A','2009-06-15 11:05:25'),
('A','2009-06-15 11:06:25');
select * from testtable;#显示一下样本数据
set @t=DATE('1900-01-01 00:00:00');#设置一个比表中任何一个时间都小的日期
set @a="";
#以下是记录合并查询语句
select cola,colb,(@t:=if(DATE_SUB(colb,INTERVAL 3 MINUTE)>=@t || cola!=@a,colb,@t)) as
mergetime,(@a:=cola) from testtable group by cola,mergetime;

dinoalex 2009-06-19
  • 打赏
  • 举报
回复
还有另外两位的帮助,感恩!
dinoalex 2009-06-19
  • 打赏
  • 举报
回复
谢谢ACMAIN_CHM的帮助,感恩!
ACMAIN_CHM 2009-06-19
  • 打赏
  • 举报
回复

我个人则更喜欢改成这种。

mysql> delimiter //
mysql>
mysql> CREATE PROCEDURE simpleproc (itvl INT)
-> BEGIN
-> DECLARE a,a1,ao CHAR default '';
-> DECLARE b,b1 Datetime default '1900-01-01 00:00:00';
-> DECLARE c INT DEFAULT 0;
->
-> declare sAList varchar(1000);
->
-> DECLARE done INT DEFAULT 0;
-> DECLARE cur1 CURSOR FOR
-> SELECT tx.ColA,tx.ColB,othertable.cloa
-> FROM tx left join othertable on tx.ColA=othertable.cloa
-> order by ColA,ColB;
-> DECLARE CONTINUE HANDLER FOR NOT FOUND SET done = 1;
->
-> OPEN cur1;
->
-> delete from ty;
->
-> REPEAT
-> FETCH cur1 INTO a, b, ao;
-> IF a != a1 or TIMEDIFF(b,b1)>itvl*100 THEN
-> insert into ty values (a,b);
-> set a1 = a;
-> set b1 = b;
-> set c = 0;
-> ELSE
-> if ao is not null and c=0 then -- changed
-> insert into ty values (a,b);
-> set c = 1;
-> end if;
-> END IF;
->
-> UNTIL done END REPEAT;
->
-> END;
-> //
Query OK, 0 rows affected (0.00 sec)

mysql>
mysql> delimiter ;
mysql>
ACMAIN_CHM 2009-06-19
  • 打赏
  • 举报
回复

不能这么改,因为你这不是SQL语句。
一种方法是改成如下:

mysql> delimiter //
mysql>
mysql> CREATE PROCEDURE simpleproc (itvl INT)
-> BEGIN
-> DECLARE a,a1 CHAR default '';
-> DECLARE b,b1 Datetime default '1900-01-01 00:00:00';
-> DECLARE c INT DEFAULT 0;
->
-> declare sAList varchar(1000);
->
-> DECLARE done INT DEFAULT 0;
-> DECLARE cur1 CURSOR FOR SELECT ColA,ColB FROM tx order by ColA,ColB;
-> DECLARE CONTINUE HANDLER FOR NOT FOUND SET done = 1;
->
-> select group_concat(cloa) into sAList from othertable; -- changed
->
-> OPEN cur1;
->
-> delete from ty;
->
-> REPEAT
-> FETCH cur1 INTO a, b;
-> IF a != a1 or TIMEDIFF(b,b1)>itvl*100 THEN
-> insert into ty values (a,b);
-> set a1 = a;
-> set b1 = b;
-> set c = 0;
-> ELSE
-> if FIND_IN_SET(a,sAList) and c=0 then -- changed
-> insert into ty values (a,b);
-> set c = 1;
-> end if;
-> END IF;
->
-> UNTIL done END REPEAT;
->
-> END;
-> //
Query OK, 0 rows affected (0.00 sec)

mysql>
mysql> delimiter ;
mysql>
dinoalex 2009-06-19
  • 打赏
  • 举报
回复
就是说可以改成下面的??
[Code=SQL]
mysql> delimiter //
mysql>
mysql> CREATE PROCEDURE simpleproc (itvl INT)
-> BEGIN
-> DECLARE a,a1 CHAR default '';
-> DECLARE b,b1 Datetime default '1900-01-01 00:00:00';
-> DECLARE c INT DEFAULT 0;
->
-> DECLARE done INT DEFAULT 0;
-> DECLARE cur1 CURSOR FOR SELECT ColA,ColB FROM tx order by ColA,ColB;
-> DECLARE CONTINUE HANDLER FOR NOT FOUND SET done = 1;
->
->
-> OPEN cur1;
->
-> delete from ty;
->
-> REPEAT
-> FETCH cur1 INTO a, b;
-> IF a != a1 or TIMEDIFF(b,b1)>itvl*100 THEN
-> insert into ty values (a,b);
-> set a1 = a;
-> set b1 = b;
-> set c = 0;
-> ELSE
-> if (a in (select cloa from othertable) and c=0 then //改成这样??
-> insert into ty values (a,b);
-> set c = 1;
-> end if;
-> END IF;
->
-> UNTIL done END REPEAT;
->
-> END;
-> //
[/Code]
ACMAIN_CHM 2009-06-19
  • 打赏
  • 举报
回复

MySQL 支持 select * from t1 where a in (select a from t2) 这种标准的ANSI的SQL语句。
dinoalex 2009-06-19
  • 打赏
  • 举报
回复
对喔,我用的是4楼的数据,呵呵~~

对了,MYSQL可以可以像MSSQL那样,可以 select * from t1 where a in (select a from t2)

因为像A的情况不止一个,都在t2里

56,678

社区成员

发帖
与我相关
我的任务
社区描述
MySQL相关内容讨论专区
社区管理员
  • MySQL
加入社区
  • 近7日
  • 近30日
  • 至今
社区公告
暂无公告

试试用AI创作助手写篇文章吧