java爬虫爬新浪微博里某个明星的微博,Starting ChromeDriver (v2.7.236900) on port 27871,就自动报错

baidu_35198066 2016-11-21 10:26:41
/**
*
*/
package 新浪微博爬虫;

import java.io.File;
import java.io.IOException;
import java.util.List;
import java.util.Random;

import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriverService;
import org.openqa.selenium.remote.DesiredCapabilities;
import org.openqa.selenium.remote.RemoteWebDriver;
/*
from selenium import webdriver
options = webdriver.ChromeOptions()
options.add_experimental_option("excludeSwitches",["ignore-certificate-errors"])
driver = webdriver.Chrome(chrome_options=options)
/
/**
* @author Administrator
*
*/
public class Spider {

/**
* @param args
* @throws IOException
* @throws InterruptedException
*/
public static void main(String[] args) throws IOException, InterruptedException {
long waitLoadBaseTime = 10000;
int waitLoadRandomTime = 3000;
Random random = new Random(System.currentTimeMillis());
// 设置 chrome 的路径
System.setProperty(
"webdriver.chrome.driver",
"C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe");
// 创建一个 ChromeDriver 的接口,用于连接 Chrome
@SuppressWarnings("deprecation")
ChromeDriverService service = new ChromeDriverService.Builder()
.usingDriverExecutable(
new File(
"C:\\Program Files (x86)\\Google\\Chrome\\Application\\chromedriver.exe"))
.usingAnyFreePort().build();
service.start();
// 创建一个 Chrome 的浏览器实例
WebDriver driver = new RemoteWebDriver(service.getUrl(),
DesiredCapabilities.chrome());
// 让浏览器访问微博主页
driver.get("http://weibo.com/u/6033327972");
//等待页面动态加载完毕
Thread.sleep(waitLoadBaseTime+random.nextInt(waitLoadRandomTime));
//选择每条微博的整体子模块
List<WebElement> elements = driver.findElements(By.cssSelector("div[action-type=feed_list_item]"));
//选择每条微博的文本内容模块
List<WebElement> elements2 = driver.findElements(By.cssSelector("div[node-type=feed_list_reason],div[node-type=feed_list_content]"));
System.out.println(elements.size());
for (int i = 0; i < elements.size(); i++) {
//展开评论
elements.get(i).findElement(By.cssSelector("a[action-type=fl_comment]")).click();;
Thread.sleep(1000);
}
//评论列表
List<WebElement> elements3 = driver.findElements(By.cssSelector("div[node-type=feed_list_commentList]"));
System.out.println(elements3.size());
int a = 0;
for (int i =0;i<elements2.size()&&a<elements3.size();i++) {
//抓取内容
String content = elements2.get(i).getText();
if (!content.contains("转发微博")) {
System.out.println("content:"+content);
//抓取评论
if (elements3.get(a).getText().isEmpty()) {
System.out.println("comment:no comment");
}else{
System.out.println("comment:"+elements3.get(a).getText());
}
a++;
}
}
driver.quit();
// 关闭 ChromeDriver 接口
service.stop();
}

}



程序已启动,弹出chromediver.exe已停止工作
结束进程
弹出错误
Starting ChromeDriver (v2.7.236900) on port 15302
Exception in thread "main" org.openqa.selenium.remote.UnreachableBrowserException: Error communicating with the remote browser. It may have died.
Build info: version: '2.42.2', revision: '6a6995d', time: '2014-06-03 17:42:30'
System info: host: 'MS-20160515FWNU', ip: '192.168.1.190', os.name: 'Windows 7', os.arch: 'amd64', os.version: '6.1', java.version: '1.8.0_92'
Driver info: driver.version: RemoteWebDriver
at org.openqa.selenium.remote.RemoteWebDriver.execute(RemoteWebDriver.java:593)
at org.openqa.selenium.remote.RemoteWebDriver.get(RemoteWebDriver.java:304)
at 新浪微博爬虫.Spider.main(Spider.java:54)
Caused by: java.net.SocketException: Connection reset
at java.net.SocketInputStream.read(SocketInputStream.java:209)
at java.net.SocketInputStream.read(SocketInputStream.java:141)
at org.apache.http.impl.io.SessionInputBufferImpl.streamRead(SessionInputBufferImpl.java:136)
at org.apache.http.impl.io.SessionInputBufferImpl.fillBuffer(SessionInputBufferImpl.java:152)
at org.apache.http.impl.io.SessionInputBufferImpl.readLine(SessionInputBufferImpl.java:270)
at org.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:140)
at org.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:57)
at org.apache.http.impl.io.AbstractMessageParser.parse(AbstractMessageParser.java:260)
at org.apache.http.impl.DefaultBHttpClientConnection.receiveResponseHeader(DefaultBHttpClientConnection.java:161)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.http.impl.conn.CPoolProxy.invoke(CPoolProxy.java:138)
at com.sun.proxy.$Proxy0.receiveResponseHeader(Unknown Source)
at org.apache.http.protocol.HttpRequestExecutor.doReceiveResponse(HttpRequestExecutor.java:271)
at org.apache.http.protocol.HttpRequestExecutor.execute(HttpRequestExecutor.java:123)
at org.apache.http.impl.execchain.MainClientExec.execute(MainClientExec.java:254)
at org.apache.http.impl.execchain.ProtocolExec.execute(ProtocolExec.java:195)
at org.apache.http.impl.execchain.RetryExec.execute(RetryExec.java:85)
at org.apache.http.impl.execchain.RedirectExec.execute(RedirectExec.java:108)
at org.apache.http.impl.client.InternalHttpClient.doExecute(InternalHttpClient.java:186)
at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:72)
at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:57)
at org.openqa.selenium.remote.HttpCommandExecutor.fallBackExecute(HttpCommandExecutor.java:204)
at org.openqa.selenium.remote.HttpCommandExecutor.execute(HttpCommandExecutor.java:173)
at org.openqa.selenium.remote.RemoteWebDriver.execute(RemoteWebDriver.java:572)
... 2 more

经确认,地址没错。
最新版chrome 最新版本chromediver selenium-2.42.2

求大神解惑!!
...全文
489 6 打赏 收藏 转发到动态 举报
写回复
用AI写文章
6 条回复
切换为时间正序
请发表友善的回复…
发表回复
飞翔中初学者 2017-02-16
  • 打赏
  • 举报
回复
楼主解决了么?我遇到同样的问题
SSHorSSM 2016-11-29
  • 打赏
  • 举报
回复
你都都是大神,我是菜鸟
youthflies 2016-11-29
  • 打赏
  • 举报
回复
用selenium来写爬虫,深深的震惊了。
yuanshuijun 2016-11-24
  • 打赏
  • 举报
回复
遇到相同问题,求大神赐教!
九尾狐的yi巴 2016-11-24
  • 打赏
  • 举报
回复
都这么溜 但我不知道
逗泥丸的平方 2016-11-23
  • 打赏
  • 举报
回复
哇 写爬虫 好厉害.. org.openqa.selenium什么工具包 0-0

50,530

社区成员

发帖
与我相关
我的任务
社区描述
Java相关技术讨论
javaspring bootspring cloud 技术论坛(原bbs)
社区管理员
  • Java相关社区
  • 小虚竹
  • 谙忆
加入社区
  • 近7日
  • 近30日
  • 至今
社区公告
暂无公告

试试用AI创作助手写篇文章吧