56,687
社区成员
发帖
与我相关
我的任务
分享
mysql> DESC ratedat;
+-------------+---------------+------+-----+---------+-------+
| Field | Type | Null | Key | Default | Extra |
+-------------+---------------+------+-----+---------+-------+
| trade_id | bigint(20) | NO | PRI | NULL | |
| trade_time | int(10) | NO | | NULL | |
| uid_buy | bigint(20) | NO | MUL | NULL | |
| uid_sell | bigint(20) | NO | MUL | NULL | |
| goods_title | varchar(120) | NO | MUL | NULL | |
| goods_price | decimal(10,2) | NO | | NULL | |
| rate_txt | text | NO | MUL | NULL | |
+-------------+---------------+------+-----+---------+-------+
7 rows in set (0.01 sec)
mysql> DESC ratedat_page;
+-------+------------+------+-----+---------+----------------+
| Field | Type | Null | Key | Default | Extra |
+-------+------------+------+-----+---------+----------------+
| page | int(10) | NO | PRI | NULL | auto_increment |
| id1 | bigint(20) | NO | | NULL | |
| id2 | bigint(20) | NO | | NULL | |
+-------+------------+------+-----+---------+----------------+
3 rows in set (0.00 sec)
1.218750 s,q:9
Array
(
[0] => SELECT MAX(trade_id) AS `idMax`,MIN(`trade_id`) AS `idMin` FROM `ratedat` LIMIT 0,1;
#为了统计ratedat中最大最小来鉴定是否已经执行完毕
[1] => EXPLAIN SELECT 1 FROM `ratedat` LIMIT 0,1;
#其实原先是在[0]语句中的SELECT COUNT()结果发现总是死机,后来才用EXPLAIN 的rows
[2] => SELECT MIN(trade_id) AS id1 FROM ratedat WHERE trade_id>=45270589172168 LIMIT 0,1;
#选出分区的开始id,ratedat_page.id1
[3] => SELECT trade_id AS id2 FROM ratedat WHERE trade_id>=45270589172168 AND trade_id<(45270589182168+90000001000) ORDER BY trade_id ASC LIMIT 9999, 1 ;
#选出分区的结束,因为id是不连续,所以就会有多次搜寻。找的是第1000个,所以用的是LIMIT 9999,1
[4] => SELECT trade_id AS id2 FROM ratedat WHERE trade_id>=45270589172168 AND trade_id<(45270589182168+900000010000) ORDER BY trade_id ASC LIMIT 9999, 1
#第[3]中没有搜索到最大的,因为他里面不够一个分区(1w记录),所以就继续加大搜索,但是表ratedat的id是不连续的,又为了防止扫描过多死机,所以每次都是扫描增加 90000001000 背后多加一个0
[5] => SELECT trade_id AS id2 FROM ratedat WHERE trade_id>=45270589172168 AND trade_id<(45270589182168+9000000100000) ORDER BY trade_id ASC LIMIT 9999, 1 ;
#第[5]次终于搜索到了最大的
[6] => INSERT INTO ratedat_page SET id1=45270672789725,id2=46703483905514 ;
#第[6]则写入到ratedat_page表
[7] => SELECT 46703483905514+1 AS idStartNext,46703483905514+10000+1 AS idEndNext LIMIT 0,1;
#这一段纯粹是为了加减运算,因为PHP无法执行这么大的运算(有一个gmp扩展但是没有用)
[8] => SELECT CEIL((46703483905515 - 48353669)*100/299922201925902) AS pctAll,
CEIL(currpage/totalpage*100) AS pctCount,currpage,totalpage
FROM (
SELECT MAX(page) AS currpage,
CEIL(10554777/10000) AS totalpage
FROM ratedat_page
) s LIMIT 0,1;
#第[8]语句则也是为了执行加减运算,计算完成比例,剩余比例
)
-- 得到这一区间的开始:id1
SELECT MIN(trade_id) AS id1 FROM tb WHERE trade_id>=$idStart
-- 得到这一区间的结束:id2
SELECT trade_id AS id2 FROM tb WHERE trade_id>=$idStart AND trade_id<$idStart+$ScanRange LIMIT $PerPartition-1,1
INSERT INTO rate_buyer(uid,costcount,costsum,costavg,costmax,costmin)
SELECT uid_buy AS uid,
COUNT(trade_id) AS costcount,
SUM(goods_price) AS costsum,
ROUND(AVG(goods_price),2) AS costavg,
MAX(goods_price) AS costmax,
MIN(goods_price) AS costmin
FROM ratedat
GROUP BY uid_buy
但是由于ratedat表有1亿多数据,太多了。所以才会用WHERE来限制每次存取的条数。
就变成了如下:
SELECT uid_buy AS uid,
COUNT(trade_id) AS costcount,
SUM(goods_price) AS costsum,
ROUND(AVG(goods_price),2) AS costavg,
MAX(goods_price) AS costmax,
MIN(goods_price) AS costmin
FROM ratedat WHERE trade_id BETWEEN $id1 AND $id2
GROUP BY uid_buy
$id1 和 $id2 就来自于ratedat_page 中的记录。现在发现生成ratedat_page真的太慢了。
数据一多,以前经常用的老方法都失灵了一样。