HttpURLConnection抓取这个页面乱码

ivan_070 2013-11-15 08:48:49
无论用什么编码都乱码,而且就这个页面有问题,在浏览器中正常,请教一下怎么抓取这个。
http://h.vimage4.com/upload/actpics/pingou/2013/11m/14/kaizi/sh.js

...全文
182 5 打赏 收藏 转发到动态 举报
写回复
用AI写文章
5 条回复
切换为时间正序
请发表友善的回复…
发表回复
tony4geek 2013-11-15
  • 打赏
  • 举报
回复
或者用htmlparser
String path="http://h.vimage4.com/upload/actpics/pingou/2013/11m/14/kaizi/sh.js";
			Parser parser=new Parser(path);
			parser.setEncoding("utf-8");
			NodeList list=parser.parse(null);
			System.out.println(list.toHtml());
teemai 2013-11-15
  • 打赏
  • 举报
回复

Connection conn = Jsoup.connect(url);
		conn.timeout(0);
		conn.ignoreContentType(true);
		Document doc = conn.get();
		System.out.println(doc.text());
teemai 2013-11-15
  • 打赏
  • 举报
回复
为什么我的没问题? 抓出来是这样的

var id_s=new Array( "14971271" ,"14971272" ,"14971273" ,"14971247" ,"14971285" ,"14971274" ,"14971280" ,"14971282" ,"14971283" ,"14971277" ,"14971276" ,"14971279" ,"14971275" ,"14971281" ,"14971284" ,"14971286" ,"14971261" ,"14971245" ,"14971292" ,"14971260" ,"14971265" ,"14971253" ,"14971305" ,"14971306" ,"14971293" ,"14971246" ,"14971252" ,"14971294" ,"14971308" ,"14971307" ,"14971298" ,"14971299" ,"14971296" ,"14971297" ,"14971295" ,"14971303" ,"14971278" ,"14971244" ,"14971240" ,"14971313" ,"14971254" ,"14971302" ,"14971311" ,"14971312" ,"14971314" ,"14971241" ,"14971242" ,"14971255" ,"14971256" ,"14971257" ,"14971287" ,"14971270" ,"14971304" ,"14971288" ,"14971301" ,"14971300" ,"14971243" ,"14971239" ,"14971249" ,"14971267" ,"14971269" ,"14971268" ,"14971266" ,"14971258" ,"14971259" ,"14971238" ,"14971250" ,"14971251" ,"14971248" ,"14971262" ,"14971263" ,"14971264" ,"14971290" ,"14971289" ,"14971291" ,"14971316" ,"14971317" ,"14971310" ,"14971309" ,"14971315" ); //B标签ID、已售完数组 var sold_outArr=new Object(); sold_outArr.b_ids=new Array(); sold_outArr.s_outArr=new Array();//已售完数组 var id_s_ar=new Array(); var id_href=new Array(); var red_cut; var userType;//用户类型 var s_spl=new Array(); //id_sp为分批数,整除时为正确,产生余数时在 var id_sp=(id_s.length-id_s.length%50)/50; var i,c1,c,t,g,brand_idtmp,s_tmp=0; //summery// function change()//遍历清除---已售完标签解决方案 { } $(document).ready(function sethrefs(){ i=0; c=0; g=0; var a=this.location.href; var atmp=a.split("/"); var atmp1=a.split("-"); if(atmp1[1]!=0&&atmp1[1]!=1&&atmp1[1]!=2&&atmp1[1]!=3&&atmp1[1]!=4&&atmp1[1]!=undefined) { brand_idtmp=atmp1[1].split(".")[0]; } else { brand_idtmp=atmp1[2]; } //获取用户类型 var VipNewUser = !!$.Cookie.get('VipNewUser'), vip_new_b_user = !!$.Cookie.get('vip_new_b_user'), vip_new_old_user = !!$.Cookie.get('vip_new_old_user'), userType = 0; if (!vip_new_old_user || VipNewUser) { userType = 0; } else { userType = vip_new_b_user ? 1 : 1; } // sold_outArr.b_ids=('L_soldout_' + id_s.toString().replace(/,/g, ",L_soldout_")).split(","); $(".wrapper a").each(function(){ if(this.name==""){ id_href[i]="http://shop.vipshop.com/detail-"+brand_idtmp+"-"+id_s[i]+".html"; $(this).attr("id",sold_outArr.b_ids[i]); $(this).attr("href",id_href[i]); $(this).attr("target","_blank"); i++;} }); //已售完 $.ajax ({ url : 'http://stock.vipshop.com/list/', data : { brandId: brand_idtmp, is_old: userType }, cache : true, jsonp: 'callback', jsonpCallback : 'te_pingou', success : function (re) { sold_outArr.s_outArr='#L_soldout_' + re.sold_out.replace(/,/g, ",#L_soldout_"); $(sold_outArr.s_outArr).find("b").show(); }, dataType : 'jsonp' }); /*change();*/ })

tony4geek 2013-11-15
  • 打赏
  • 举报
回复
网页经过压缩了。
public static void main(String[] args) throws Exception {
		String result = "";
		URL url = new URL("http://h.vimage4.com/upload/actpics/pingou/2013/11m/14/kaizi/sh.js");
		HttpURLConnection conn = (HttpURLConnection) url.openConnection();
		conn.setConnectTimeout(7 * 1000);
		if (conn.getResponseCode() != 200)
			throw new RuntimeException("请求url失败");
		InputStream is = conn.getInputStream();
		if ("gzip".equals(conn.getContentEncoding())) {
			result = readDataForZgip(is, "utf-8");
		}
		conn.disconnect();
		System.err.println("ContentEncoding: " + conn.getContentEncoding());
		System.out.println(result);
	}

	public static String readDataForZgip(InputStream inStream,
			String charsetName) throws Exception {
		GZIPInputStream gzipStream = new GZIPInputStream(inStream);
		ByteArrayOutputStream outStream = new ByteArrayOutputStream();
		byte[] buffer = new byte[1024];
		int len = -1;
		while ((len = gzipStream.read(buffer)) != -1) {
			outStream.write(buffer, 0, len);
		}
		byte[] data = outStream.toByteArray();
		outStream.close();
		gzipStream.close();
		inStream.close();
		return new String(data, charsetName);
	}
ivan_070 2013-11-15
  • 打赏
  • 举报
回复
用curl或者迅雷下载下来也是乱码。

62,614

社区成员

发帖
与我相关
我的任务
社区描述
Java 2 Standard Edition
社区管理员
  • Java SE
加入社区
  • 近7日
  • 近30日
  • 至今
社区公告
暂无公告

试试用AI创作助手写篇文章吧