求解析网页~
求解析这个网页http://home.focus.cn/msglist/650000/
小弟解析的一直是乱码,
public static String gethtml(String cookie,String htmltype,String httpurl,Boolean changeline) throws IOException {
URL urlx = new URL(httpurl);
HttpURLConnection uc = (HttpURLConnection) urlx.openConnection();
uc.setDoInput(true);
uc.setUseCaches(false);
uc.setRequestMethod("GET");
uc.setInstanceFollowRedirects(true);
uc.setConnectTimeout(10*1000);
uc.setReadTimeout(30*1000);
uc.setRequestProperty("Cache-Control", "no-cache, must-revalidate");
uc.setRequestProperty("Connection", "Keep-Alive");
uc.setRequestProperty("Accept","image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/msword, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/x-shockwave-flash, */*");
uc.setRequestProperty("Accept-Language", "zh-cn");
// uc.setRequestProperty("Accept-Encoding", "gzip, deflate");
uc.setRequestProperty("Content-Type", "application/x-www-form-urlencoded");
uc.setRequestProperty("User-Agent","Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; SV1)");
//uc.setRequestProperty("User-Agent","Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt)");
//uc.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; SV1; POTU(RR:28031409:0); EmbeddedWB 14.52 from: http://www.bsalsa.com/ EmbeddedWB 14.52; .NET CLR 1.1.4322; .NET CLR 2.0.50727; CIBA; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022)");
if(cookie!=null && !cookie.trim().equals(""))uc.setRequestProperty("Cookie",cookie);
String vhtml = "";
java.io.InputStream inputstream = uc.getInputStream();
BufferedReader bufferedreader = null;
if(htmltype==null||htmltype.trim().equals(""))bufferedreader = new BufferedReader(new InputStreamReader(inputstream));
else bufferedreader = new BufferedReader(new InputStreamReader(inputstream,htmltype));
String s1;
//String vhtml = "";
while ((s1 = bufferedreader.readLine()) != null) {
if(changeline!=null && changeline==false)vhtml = vhtml + s1;
else vhtml = vhtml + "\n" + s1;
}
return vhtml;
}