java获得网页源码 结果缺不是指定网页的源码
土匪八号 2012-07-30 12:39:15 代码如下 ,可以看到通过查看源代码的方式看到的结果和实际获得的结果不一样 网址是 http://weibo.com/u/2694820721
java 源码
import java.io.ByteArrayOutputStream;
import java.io.InputStream;
import java.net.URL;
import java.net.URLConnection;
public class URLSource {
//抽取网页的源文件
private static String getSource(String link) {
String charset = "GBK";//网页默认编码设置为GBK
URLConnection connection = null;
try {
URL url = new URL(link);
//打开连接
connection = url.openConnection();
//如果网页无法打开
if(null == connection)
return null;
//下载源文件
byte[] buf = new byte[4096];
InputStream is = null;
ByteArrayOutputStream os = new ByteArrayOutputStream();
int count = 0;
try {
is = connection.getInputStream();
while ((count = is.read(buf)) >= 0)
{
os.write(buf, 0, count);
}
}catch (Exception e) {
e.printStackTrace();
if (os.size() == 0)
{
return null;
}
}
finally
{
try{is.close(); }
catch(Exception e){}
}
//获取网页的编码格式
String content = os.toString();
int fromIndex = content.indexOf("charset=");
charset = content.substring(fromIndex+8, content.indexOf("\"", fromIndex));
return new String(os.toByteArray(), charset);
}catch (Exception e) {
e.printStackTrace();
}
return null;
}
public static void main(String[] arg){
System.out.println(getSource("http://weibo.com/u/2694820721"));
}
}
获得的源码如下
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta http-equiv="X-UA-Compatible" content="IE=8" />
<meta name="application-name" content="新浪微博"/>
<meta name="msapplication-tooltip" content="新浪微博" />
<meta name="msapplication-window" content="width=1024;height=768" />
<meta name="msapplication-task" content="name=新浪微博;action-uri=http://weibo.com/;icon-uri=http://img.t.sinajs.cn/t4/style/images/common/favicon/home.ico" />
<meta name="msapplication-task" content="name=我的微博;action-uri=http://weibo.com//profile;icon-uri=http://img.t.sinajs.cn/t4/style/images/common/favicon/mypage.ico" />
<meta name="msapplication-task" content="name=微博广场;action-uri=http://plaza.weibo.com;icon-uri=http://img.t.sinajs.cn/t4/style/images/common/favicon/square.ico" />
<meta content="#2994ce" name="msapplication-navbutton-color" />
<title>新浪微博登录 新浪微博-随时随地分享身边的新鲜事儿</title>
<link href="http://img.t.sinajs.cn/t4/style/css/pages/register/v35loginphp.css?version=88ecb2cf40228fff" type="text/css" rel="stylesheet" />
<script type="text/javascript">
var $CONFIG = {};
$CONFIG['islogin'] = '0';
$CONFIG['oid'] = '';
$CONFIG['onick'] = '';
$CONFIG['inviteCode'] = '';
$CONFIG['entry'] = 'weiyonghu';
$CONFIG['afterupgrade'] = 'false';
$CONFIG['version'] = '88ecb2cf40228fff';
$CONFIG['bigpipe'] = 'true';
$CONFIG['timeDiff'] = (new Date() - 1343623377000);
$CONFIG['product'] = 'v4mblog';
$CONFIG['pageid'] = 'login';
$CONFIG['skin'] = 'default';
$CONFIG['background'] = "";
$CONFIG['scheme'] = "";
$CONFIG['colors_type'] = "0";
$CONFIG['lang'] = 'zh-cn';
$CONFIG['jsPath'] = 'http://js.t.sinajs.cn/t4/';
$CONFIG['cssPath'] = 'http://img.t.sinajs.cn/t4/';
$CONFIG['imgPath'] = 'http://img.t.sinajs.cn/t4/';
$CONFIG['servertime'] = 1343623377;
$CONFIG['any'] = "&wvr=3.6";
$CONFIG['$webim'] = 0;
$CONFIG['location'] = 'commbox';
$CONFIG['miyou'] = 0;
$CONFIG['mJsPath'] = ['http://js{n}.t.sinajs.cn/t4/', 1, 2];
$CONFIG['mCssPath'] = ['http://img{n}.t.sinajs.cn/t4/', 1, 2];
$CONFIG['enterprise']='';
$CONFIG['token'] = 'b85b158144832b8f50aaa02074b1e391';
$CONFIG['redirect'] = 'http%3A%2F%2Fweibo.com%2Fu%2F2694820721';
</script>
<script src="http://js.t.sinajs.cn/STK/js/gaea_1_19.js" type="text/javascript" ></script>
</head>
<body class="MIB_loginphp">
<div class="loginWarp">
<div class="warpB1"></div>
<div class="warpC" id="pl_login_login">
<div class="loginMain">
<div class="loginMain2">
<table class="regTab">
<tr>
<th>还没有新浪微博帐号?</th>
<td class="regBg"><a target="_self" href="http://weibo.com/signup/signup.php?c=http%3A%2F%2Fweibo.com%2Fu%2F2694820721&lang=zh-cn" suda-data="key=weibo_login&value=register">立即注册</a></td>
</tr>
</table>
<!-- 登录组件HTML start -->
<div id="login_form"></div>
<!-- 登录组件HTML end -->
<div class="recommend_area">
<div class="recommend_link">
<div class="recommend_bg_right">
<p><a href="http://weibo.com/z/invite?from=weidenglu" target="_blank">邀请好友开微博,赢大奖!<span>>></span></a></p>
</div>
</div>
</div>
<div class="hezuo_login"> <strong>使用其他账号登录</strong>
<span><i class="hezuo_login1"></i><a class="msnlink" tabindex="6" title="MSN" href="javascript:void(0);" node-type="msn" suda-data="key=weibo_login&value=weibo_login_by_msn">MSN</a></span><b>|</b>
<span><i class="hezuo_login3"></i><a target="_blank" href="http://weibo.com/bind/gotooauth.php?company=189.cn" suda-data="key=weibo_login&value=weibo_login_by_189">天翼</a></span><b>|</b>
<span><i class="hezuo_login4"></i><a target="_blank" href="http://weibo.com/bind/gotooauth.php?company=wo.com.cn" suda-data="key=weibo_login&value=weibo_login_by_wo">联通</a></span><b>|</b>
<span><i class="hezuo_login5"></i><a target="_blank" href="https://openapi.360.cn/oauth2/authorize?client_id=f48a9f02f41ca0ddaf4f01ae523294c7&response_type=code&redirect_uri=http://weibo.com/bind/bind_360.php&scope=basic&display=default" suda-data="key=weibo_login&value=weibo_login_by_360">360</a></span>
</div>
<p class="mobile"><span class="sj"><a target="_blank" href="http://m.weibo.com/web/cellphone.php?topnav=1&wvr=4" suda-data="key=weibo_login&value=goto_weibo">手机玩转新浪微博</a></span></p>
</div>
</div>
</div>
<div class="warpB clearfix">
<div class="bottomLinks">
<div class="MIB_foot_new MIB_txtbr MIB_linkbr">
<div class="lf">
<p class="p1"><a target="_blank" href="http://help.sina.com.cn/p/i_12.html">微博帮助</a><a target="_blank" href="http://weibo.com/k/weijianyi">意见反馈</a><!--<a target="_blank" href="http://weibo.com/verified">微博认证及合作</a>--><a target="_blank" href="http://open.weibo.com/">开放平台</a><a target="_blank" href="http://hr.weibo.com">微博招聘</a><a target="_blank" href="http://news.sina.com.cn/guide/">新浪网导航</a><a target="_blank" href="http://weibo.com/complaint/complaint.php?url=http%3A%2F%2Fweibo.com%2F2006764843"><img class="informIcon" src="http://img.t.sinajs.cn/t4/style/images/common/transparent.gif">不良信息举报</a></p>
<p>客服电话:400 096 0960(个人) 400 098 0980(企业) (按当地市话标准计费)</p>
<p class="p1">北京微梦创科网络技术有限公司 <a href="http://weibo.com/aj/static/jww.html" target="_blank">京网文[2011]0398-130号</a><a href="http://www.miibeian.gov.cn" target="_blank">京ICP证100780号</a></p>
</div>
<div class="rt">
<p>语言:
<select onchange="location.href=this.value" id="lang_select">
<option value="/login?url=http%3A%2F%2Fweibo.com%2Fu%2F2694820721&lang=zh-cn" selected>中文(简体)</option>
<option value="/login?url=http%3A%2F%2Fweibo.com%2Fu%2F2694820721&lang=zh-tw">中文(繁体)</option>
</select>
</p>
<p>Copyright ? 1996-2012 SINA</p>
</div>
<div class="clearit"></div>
</div>
</div>
</div></div><!-- SUDA_CODE_START -->
<script type='text/javascript' charset="utf-8">
(function() {
var doc = document,
wa = doc.createElement('script'),
s = doc.getElementsByTagName('script')[0];
wa.type = 'text/javascript';
wa.charset = 'utf-8';
wa.async = true;
wa.src = ('https:' == doc.location.protocol ? 'https://' : 'http://') + 'js.t.sinajs.cn/open/analytics/js/suda.js?version=88ecb2cf40228fff';
s.parentNode.insertBefore(wa, s);
})();
</script>
<noScript>
<img width="0" height="0" src="http://beacon.sina.com.cn/a.gif?noScript" border="0" alt="" />
</noScript>
<!-- SUDA_CODE_END --><script type="text/javascript" src="http://js.t.sinajs.cn/t35/miniblog/static/js/sso.js?version=88ecb2cf40228fff"></script>
<script type="text/javascript" src="http://js.t.sinajs.cn/t4/apps/secure/js/login/plugin.js?version=88ecb2cf40228fff"></script>
<script type="text/javascript" src="http://js.t.sinajs.cn/t4/home/js/login/login.js?version=88ecb2cf40228fff"></script>
</body>
</html>