模拟登陆新浪微博抓取用户数据出错?

halfsuccess 2013-04-23 04:17:05
错误信息如下:
Exception in thread "main" java.lang.NoSuchFieldError: INSTANCE
at org.apache.http.client.utils.URLEncodedUtils.parse(URLEncodedUtils.java:190)
at org.apache.http.client.utils.URIBuilder.parseQuery(URIBuilder.java:95)
at org.apache.http.client.utils.URIBuilder.digestURI(URIBuilder.java:165)
at org.apache.http.client.utils.URIBuilder.<init>(URIBuilder.java:90)
at org.apache.http.client.utils.URIUtils.rewriteURI(URIUtils.java:133)
at org.apache.http.impl.client.DefaultRequestDirector.rewriteRequestURI(DefaultRequestDirector.java:354)
at org.apache.http.impl.client.DefaultRequestDirector.execute(DefaultRequestDirector.java:477)
at org.apache.http.impl.client.AbstractHttpClient.doExecute(AbstractHttpClient.java:858)
at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:76)
at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:97)
at Sina.get(Sina.java:150)
at Sina.preLogin(Sina.java:169)
at Sina.login(Sina.java:38)
at Sina.main(Sina.java:247)

源代码如下
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.HashMap;

import javax.script.*;

import org.apache.commons.codec.binary.Base64;
import org.apache.commons.io.IOUtils;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.NameValuePair;

import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.impl.client.DefaultHttpClient;

import org.apache.http.message.BasicNameValuePair;

import org.apache.http.protocol.HTTP;
import org.apache.http.util.EntityUtils;

@SuppressWarnings("deprecation")
public class Sina {

public static void login(String u, String p) {

DefaultHttpClient client = new DefaultHttpClient();

try {
/** 获得rsaPubkey,rsakv,servertime等参数值,此获取参数值的方法的形式,要感谢网上一大哥发的帖子 **/
HashMap<String, String> params = preLogin(encodeAccount(u), client);

/******** 登录操作 *********/
HttpPost post = new HttpPost(
"http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.5)");
post.setHeader("Accept",
"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
post.setHeader("User-Agent",
"Mozilla/5.0 (Windows NT 5.1; rv:9.0.1) Gecko/20100101 Firefox/9.0.1");

post.setHeader("Accept-Language", "zh-cn,zh;q=0.5");
post.setHeader("Accept-Charset", "GB2312,utf-8;q=0.7,*;q=0.7");
post.setHeader("Referer",
"http://weibo.com/?c=spr_web_sq_firefox_weibo_t001");
post.setHeader("Content-Type", "application/x-www-form-urlencoded");

String nonce = makeNonce(6);

List<NameValuePair> nvps = new ArrayList<NameValuePair>();
nvps.add(new BasicNameValuePair("encoding", "UTF-8"));
nvps.add(new BasicNameValuePair("entry", "weibo"));
nvps.add(new BasicNameValuePair("from", ""));
nvps.add(new BasicNameValuePair("gateway", "1"));
nvps.add(new BasicNameValuePair("nonce", nonce));
nvps.add(new BasicNameValuePair("pagerefer",
"http://i.firefoxchina.cn/old/"));
nvps.add(new BasicNameValuePair("prelt", "111"));
nvps.add(new BasicNameValuePair("pwencode", "rsa2"));
nvps.add(new BasicNameValuePair("returntype", "META"));
nvps.add(new BasicNameValuePair("rsakv", params.get("rsakv")));
nvps.add(new BasicNameValuePair("savestate", "0"));
nvps.add(new BasicNameValuePair("servertime", params
.get("servertime")));

nvps.add(new BasicNameValuePair("service", "miniblog"));

/******************** *加密密码 ***************************/
ScriptEngineManager sem = new ScriptEngineManager();
ScriptEngine se = sem.getEngineByName("javascript");

se.eval(getJs());
String pass = "";

if (se instanceof Invocable) {
Invocable invoke = (Invocable) se;
// 调用preprocess方法,并传入两个参数密码和验证码

pass = invoke.invokeFunction("getpass", p,
params.get("servertime"), nonce, params.get("pubkey"))
.toString();

System.out.println("c = " + pass);
}

nvps.add(new BasicNameValuePair("sp", pass));
nvps.add(new BasicNameValuePair("su", encodeAccount(u)));
nvps.add(new BasicNameValuePair(
"url",
"http://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack"));

nvps.add(new BasicNameValuePair("useticket", "1"));

nvps.add(new BasicNameValuePair("vsnf", "1"));

post.setEntity(new UrlEncodedFormEntity(nvps, HTTP.UTF_8));

HttpResponse response = client.execute(post);

String entity = EntityUtils.toString(response.getEntity());

if (entity.replace("\"", "").indexOf("retcode=0") > -1) {
String url = entity.substring(
entity.indexOf("http://weibo.com/sso/login.php?"),
entity.indexOf("code=0") + 6);

String strScr = "";
String nick = "暂无"; // 昵称

// 获取到实际url进行连接
HttpGet getMethod = new HttpGet(url);
response = client.execute(getMethod);
entity = EntityUtils.toString(response.getEntity());

nick = entity.substring(entity.indexOf("displayname") + 14,
entity.lastIndexOf("userdomain") - 3).trim();

url = entity.substring(entity.indexOf("userdomain") + 13,
entity.lastIndexOf("\""));
getMethod = new HttpGet("http://weibo.com/" + url);
response = client.execute(getMethod);
entity = EntityUtils.toString(response.getEntity());

System.out.println(entity);

}

} catch (Exception e) {
e.printStackTrace();

}

}

/**
* 根据URL,get网页
*
* @param url
* @throws IOException
*/
private static String get(String url, DefaultHttpClient client)
throws IOException {
HttpGet get = new HttpGet(url);
HttpResponse response = client.execute(get);
System.out.println(response.getStatusLine());
HttpEntity entity = response.getEntity();
String result = dump(entity);
get.abort();
return result;
}

/**
* 新浪微博预登录,获取密码加密公钥
*
* @param unameBase64
* @return 返回从结果获取的参数的哈希表
* @throws IOException
*/
private static HashMap<String, String> preLogin(String unameBase64,
DefaultHttpClient client) throws IOException {
String url = "http://login.sina.com.cn/sso/prelogin.php?entry=weibo&callback=sinaSSOController.preloginCallBack&su=&rsakt=mod&client=ssologin.js(v1.4.5)&_="
+ "_=" + new Date().getTime();
return getParaFromResult(get(url, client));
}

/**
* 从新浪返回的结果字符串中获得参数
*
* @param result
* @return
*/
private static HashMap<String, String> getParaFromResult(String result) {
HashMap<String, String> hm = new HashMap<String, String>();
result = result.substring(result.indexOf("{") + 1, result.indexOf("}"));
String[] r = result.split(",");
String[] temp;
for (int i = 0; i < r.length; i++) {
temp = r[i].split(":");
for (int j = 0; j < 2; j++) {
if (temp[j].contains("\""))
temp[j] = temp[j].substring(1, temp[j].length() - 1);
}
hm.put(temp[0], temp[1]);
}
return hm;
}

/**
* 打印页面
*
* @param entity
* @throws IOException
*/
private static String dump(HttpEntity entity) throws IOException {
BufferedReader br = new BufferedReader(new InputStreamReader(
entity.getContent(), "utf8"));
return IOUtils.toString(br);
}

// rsa2加密
public static String getJs() {
String js =
//这里省略了一个很长的js字符串
return js;
}

// 用户名编码
private static String encodeAccount(String account) {
String userName = "";
try {
userName = Base64.encodeBase64String(URLEncoder.encode(account,
"UTF-8").getBytes());

} catch (UnsupportedEncodingException e) {

e.printStackTrace();
}
return userName;
}

// 参数nonce值生成
private static String makeNonce(int len) {
String x = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
String str = "";
for (int i = 0; i < len; i++) {
str += x.charAt((int) (Math.ceil(Math.random() * 1000000) % x
.length()));
}
return str;
}

public static void main(String[] args)
{
login(args[0],args[1]);
}
}
...全文
125 1 打赏 收藏 转发到动态 举报
写回复
用AI写文章
1 条回复
切换为时间正序
请发表友善的回复…
发表回复
Towan 2013-10-28
  • 打赏
  • 举报
回复
最近新浪微博,有更新了一番,现在已经是v1.4.11,想问楼主当初模拟登陆后数据获得了吗?我现在只能登陆后获得公众名人的信息,但是对于非名人的信息还是获取不到,请问是什么原因,麻烦给与解答。。谢谢

10,606

社区成员

发帖
与我相关
我的任务
社区描述
Web 开发 其他
社区管理员
  • 其他
加入社区
  • 近7日
  • 近30日
  • 至今
社区公告
暂无公告

试试用AI创作助手写篇文章吧