本人想用C#做一个信息抓取系统，想在新浪网上抓取新闻。前提是我已经在新浪网上已登陆的。请问应该怎么实现?

summba 2010-01-05 05:21:58

请教各位帅哥们：

本人想用C#做一个信息抓取系统，想在新浪网上抓取新闻。前提是我已经在新浪网上已登陆的。请问应该怎么实现?

...全文

160 7 打赏收藏转发到动态举报

写回复

用AI写文章

7 条回复

切换为时间正序

请发表友善的回复…

发表回复

hua_88617 2010-01-05

打赏
举报

public static byte[] GetHtmlByBytes(string server, string URL, byte[] byteRequest, string cookie, out string header)

        {

            long contentLength;

            HttpWebRequest httpWebRequest;

            HttpWebResponse webResponse;





            httpWebRequest = (HttpWebRequest)HttpWebRequest.Create(URL);

            CookieContainer co = new CookieContainer();

            co.SetCookies(new Uri(server), cookie);





            httpWebRequest.CookieContainer = co;



            httpWebRequest.ContentType = "application/x-www-form-urlencoded";

            httpWebRequest.Accept = "application/x-shockwave-flash, image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, */*";



           



            httpWebRequest.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 2.0.50727; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729)";

            httpWebRequest.Headers.Add("Accept-Language", "zh-cn");



           



            //是否支持重新定向

            //httpWebRequest.AllowAutoRedirect = false;



            httpWebRequest.Method = "GET";

            httpWebRequest.Timeout = 15000;

            httpWebRequest.ContentLength = byteRequest.Length;

           

            webResponse = (HttpWebResponse)httpWebRequest.GetResponse();

            //header = webResponse.Headers.ToString();

            header = webResponse.Headers.Get("Set-Cookie");

            if (string.IsNullOrEmpty(header))

            {

                header = cookie;

            }

            else

            {

                header = cookie + "," + header;

            }

            //getStream = webResponse.GetResponseStream();





            Stream gzips = webResponse.GetResponseStream();



            contentLength = webResponse.ContentLength;

            byte[] outBytes = new byte[0];

            outBytes = ReadFully(gzips);

            gzips.Close();





            return outBytes;

        }

public static byte[] ReadFully(Stream stream)

        {

            byte[] buffer = new byte[128];

            using (MemoryStream ms = new MemoryStream())

            {

                while (true)

                {

                    int read = stream.Read(buffer, 0, buffer.Length);

                    if (read <= 0)

                        return ms.ToArray();

                    ms.Write(buffer, 0, read);

                }

            }

        }