4,249
社区成员




$str=iconv('gb2312','utf-8',file_get_contents('http://www.newegg.com.cn/CategoryList.htm'));
preg_match_all("#<h2><a href='http://www.newegg.com.cn/Category/(\d+)\.htm' title='([^']+)[\S\s]+?</ul>#i",$str,$matches);
print_R(array_combine($matches[1],$matches[2]));
echo '<br>';
reset($matches[0]);
while(list($key,$val)=each($matches[0]))
{
echo $matches[2][$key].'<br>';
preg_match_all("#<a href='http://www\.newegg\.com\.cn/SubCategory/(\d+)\.htm' title='([^']+)#i",$val,$match);
print_R(array_combine($match[1],$match[2]));
echo '<br>';
}
<?php
$url="CategoryList.htm"; //是下载到本地的分类页面,能读取其中的值
$preg="/<a\shref='http:\/\/www\.newegg\.com\.cn\/Category\/(\d+)\.htm[^><]+>(.+?)<\/a>/";
$str = file_get_contents($url);
preg_match_all($preg,$str,$arr);
print_r($arr);
?>
Array
(
[0] => Array
(
[0] => <a href='http://www.newegg.com.cn/Category/711.htm' title='大家电'>大家电</a>
[1] => <a href='http://www.newegg.com.cn/Category/530.htm' title='主机硬件'>主机硬件</a>
[2] => <a href='http://www.newegg.com.cn/Category/536.htm' title='笔记本'>笔记本</a>
[3] => <a href='http://www.newegg.com.cn/Category/1151.htm' title='餐桌厨房'>餐桌厨房</a>
[4] => <a href='http://www.newegg.com.cn/Category/549.htm' title='手机通信'>手机通信</a>
[5] => <a href='http://www.newegg.com.cn/Category/535.htm' title='外设产品'>外设产品</a>
[6] => <a href='http://www.newegg.com.cn/Category/862.htm' title='台式机'>台式机</a>
[7] => <a href='http://www.newegg.com.cn/Category/987.htm' title='家庭影音'>家庭影音</a>
[8] => <a href='http://www.newegg.com.cn/Category/1148.htm' title='家居用品'>家居用品</a>
[9] => <a href='http://www.newegg.com.cn/Category/833.htm' title='手机配件'>手机配件</a>
[10] => <a href='http://www.newegg.com.cn/Category/562.htm' title='厨用电器'>厨用电器</a>
[11] => <a href='http://www.newegg.com.cn/Category/546.htm' title='存储设备'>存储设备</a>
[12] => <a href='http://www.newegg.com.cn/Category/1232.htm' title='服务器'>服务器</a>
[13] => <a href='http://www.newegg.com.cn/Category/1154.htm' title='工具汽配'>工具汽配</a>
[14] => <a href='http://www.newegg.com.cn/Category/543.htm' title='网络设备'>网络设备</a>
[15] => <a href='http://www.newegg.com.cn/Category/852.htm' title='日用电器'>日用电器</a>
[16] => <a href='http://www.newegg.com.cn/Category/1294.htm' title='生活健康'>生活健康</a>
[17] => <a href='http://www.newegg.com.cn/Category/561.htm' title='个人护理'>个人护理</a>
[18] => <a href='http://www.newegg.com.cn/Category/936.htm' title='整机附件'>整机附件</a>
[19] => <a href='http://www.newegg.com.cn/Category/1150.htm' title='玩具宠物'>玩具宠物</a>
[20] => <a href='http://www.newegg.com.cn/Category/1371.htm' title='电脑附件'>电脑附件</a>
[21] => <a href='http://www.newegg.com.cn/Category/1149.htm' title='母婴用品'>母婴用品</a>
[22] => <a href='http://www.newegg.com.cn/Category/556.htm' title='打印耗材'>打印耗材</a>
[23] => <a href='http://www.newegg.com.cn/Category/1153.htm' title='运动户外'>运动户外</a>
[24] => <a href='http://www.newegg.com.cn/Category/544.htm' title='办公设备'>办公设备</a>
[25] => <a href='http://www.newegg.com.cn/Category/563.htm' title='旅行箱包'>旅行箱包</a>
[26] => <a href='http://www.newegg.com.cn/Category/548.htm' title='线缆'>线缆</a>
[27] => <a href='http://www.newegg.com.cn/Category/725.htm' title='正版软件'>正版软件</a>
[28] => <a href='http://www.newegg.com.cn/Category/955.htm' title='钟表首饰'>钟表首饰</a>
[29] => <a href='http://www.newegg.com.cn/Category/1304.htm' title='文化礼品'>文化礼品</a>
[30] => <a href='http://www.newegg.com.cn/Category/550.htm' title='摄影摄像'>摄影摄像</a>
[31] => <a href='http://www.newegg.com.cn/Category/1305.htm' title='家用家纺'>家用家纺</a>
[32] => <a href='http://www.newegg.com.cn/Category/951.htm' title='数码附件'>数码附件</a>
[33] => <a href='http://www.newegg.com.cn/Category/1152.htm' title='服装鞋帽'>服装鞋帽</a>
[34] => <a href='http://www.newegg.com.cn/Category/553.htm' title='影音电子'>影音电子</a>
[35] => <a href='http://www.newegg.com.cn/Category/555.htm' title='GPS导航'>GPS导航</a>
)
[1] => Array
(
[0] => 711
[1] => 530
[2] => 536
[3] => 1151
[4] => 549
[5] => 535
[6] => 862
[7] => 987
[8] => 1148
[9] => 833
[10] => 562
[11] => 546
[12] => 1232
[13] => 1154
[14] => 543
[15] => 852
[16] => 1294
[17] => 561
[18] => 936
[19] => 1150
[20] => 1371
[21] => 1149
[22] => 556
[23] => 1153
[24] => 544
[25] => 563
[26] => 548
[27] => 725
[28] => 955
[29] => 1304
[30] => 550
[31] => 1305
[32] => 951
[33] => 1152
[34] => 553
[35] => 555
)
[2] => Array
(
[0] => 大家电
[1] => 主机硬件
[2] => 笔记本
[3] => 餐桌厨房
[4] => 手机通信
[5] => 外设产品
[6] => 台式机
[7] => 家庭影音
[8] => 家居用品
[9] => 手机配件
[10] => 厨用电器
[11] => 存储设备
[12] => 服务器
[13] => 工具汽配
[14] => 网络设备
[15] => 日用电器
[16] => 生活健康
[17] => 个人护理
[18] => 整机附件
[19] => 玩具宠物
[20] => 电脑附件
[21] => 母婴用品
[22] => 打印耗材
[23] => 运动户外
[24] => 办公设备
[25] => 旅行箱包
[26] => 线缆
[27] => 正版软件
[28] => 钟表首饰
[29] => 文化礼品
[30] => 摄影摄像
[31] => 家用家纺
[32] => 数码附件
[33] => 服装鞋帽
[34] => 影音电子
[35] => GPS导航
)
)
$url="http://www.newegg.com.cn/CategoryList.htm";
$content=file_get_contents($url);
preg_match_all("/<a\shref='http:\/\/www\.newegg\.com\.cn\/Category\/(\d+)\.htm[^><]+>(.+?)<\/a>/",$content,$match);
print_r($match);
preg_match_all("/<a\shref='http:\/\/www\.newegg\.com\.cn\/Category\/(\d+)\.htm[^><]+>(.+?)<\/a>/",$content,$match);
print_r($match);
//大分类
preg_match_all("/<a\shref='http:\/\/www\.newegg\.com\.cn\/Category\/[^><]+>(.+?)<\/a>/",$content,$macth);
print_r($macth[1]);
$url="http://www.newegg.com.cn/CategoryList.htm";
$content=file_get_contents($url);
preg_match_all("#Category\/(\d+)\.htm'[\s.]title='(.*)'>(.*?)</a><\/h2>#i",$content,$match);
print_r($match[0]);