java爬虫,爬出来的网页中文内容是乱码

Zzzzzzt丶 2016-11-21 09:55:28
 package com.lib;

import java.io.BufferedReader;
import java.io.InputStreamReader;

import org.apache.http.HttpEntity;
import org.apache.http.HttpStatus;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;


public class HttpGetUtils {
public String get(String url){
String result = "";
try{
CloseableHttpClient httpclient = HttpClients.createDefault();
HttpGet httpget = new HttpGet(url);
CloseableHttpResponse response = httpclient.execute(httpget);
try{
if (response != null && response.getStatusLine().getStatusCode()
== HttpStatus.SC_OK ){
System.out.println(response.getStatusLine());
HttpEntity entity = response.getEntity();
System.out.println(entity.getContentEncoding());
result = readResponse(entity, "UTF-8");
}
}
finally{
httpclient.close();
response.close();
}

}
catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}
return result;
}

public String readResponse(HttpEntity entity, String charset){
StringBuffer res = new StringBuffer();
BufferedReader reader = null;
try{
if (entity == null){
return null;
}
else{
reader = new BufferedReader(new InputStreamReader(entity.getContent(),charset));
String line;
while ( (line = reader.readLine()) != null){
line = line + "\n";
res.append(line);
}
}
}
catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}
finally{
try{
if (reader != null){
reader.close();
}

}
catch(Exception e){
e.toString();
}
}
return res.toString();
}


}
...全文
399 2 打赏 收藏 转发到动态 举报
写回复
用AI写文章
2 条回复
切换为时间正序
请发表友善的回复…
发表回复
xiashengwuyu 2016-11-22
  • 打赏
  • 举报
回复
我觉得你也许可以去看一下浏览器的编码是不是utf-8的编码
baidu_35198066 2016-11-21
  • 打赏
  • 举报
回复
把UTF-8换成其他的试试

50,523

社区成员

发帖
与我相关
我的任务
社区描述
Java相关技术讨论
javaspring bootspring cloud 技术论坛(原bbs)
社区管理员
  • Java相关社区
  • 小虚竹
  • 谙忆
加入社区
  • 近7日
  • 近30日
  • 至今
社区公告
暂无公告

试试用AI创作助手写篇文章吧