求指点:关于LINUX C LIBCURL问题

pygmalion 2012-10-11 06:23:46
日前做一个抓去网页的C程序,开始的时候参照了一下curl官网一段代码:
/* This is an example application source code using the multi interface. */ 

#include <stdio.h>
#include <string.h>

/* somewhat unix-specific */
#include <sys/time.h>
#include <unistd.h>

/* curl stuff */
#include <curl/curl.h>

/*
* Download a HTTP file and upload an FTP file simultaneously.
*/

#define HANDLECOUNT 2
#define HTTP_HANDLE 0
#define FTP_HANDLE 1

int main(void)
{
CURL *handles[HANDLECOUNT];
CURLM *multi_handle;

int still_running; /* keep number of running handles */
int i;

CURLMsg *msg; /* for picking up messages with the transfer status */
int msgs_left; /* how many messages are left */

/* Allocate one CURL handle per transfer */
for (i=0; i<HANDLECOUNT; i++)
handles[i] = curl_easy_init();

/* set the options (I left out a few, you'll get the point anyway) */
curl_easy_setopt(handles[HTTP_HANDLE], CURLOPT_URL, "http://example.com");

curl_easy_setopt(handles[FTP_HANDLE], CURLOPT_URL, "ftp://example.com");
curl_easy_setopt(handles[FTP_HANDLE], CURLOPT_UPLOAD, 1L);

/* init a multi stack */
multi_handle = curl_multi_init();

/* add the individual transfers */
for (i=0; i<HANDLECOUNT; i++)
curl_multi_add_handle(multi_handle, handles[i]);

/* we start some action by calling perform right away */
curl_multi_perform(multi_handle, &still_running);

do {
struct timeval timeout;
int rc; /* select() return code */

fd_set fdread;
fd_set fdwrite;
fd_set fdexcep;
int maxfd = -1;

long curl_timeo = -1;

FD_ZERO(&fdread);
FD_ZERO(&fdwrite);
FD_ZERO(&fdexcep);

/* set a suitable timeout to play around with */
timeout.tv_sec = 1;
timeout.tv_usec = 0;

curl_multi_timeout(multi_handle, &curl_timeo);
if(curl_timeo >= 0) {
timeout.tv_sec = curl_timeo / 1000;
if(timeout.tv_sec > 1)
timeout.tv_sec = 1;
else
timeout.tv_usec = (curl_timeo % 1000) * 1000;
}

/* get file descriptors from the transfers */
curl_multi_fdset(multi_handle, &fdread, &fdwrite, &fdexcep, &maxfd);

/* In a real-world program you OF COURSE check the return code of the
function calls. On success, the value of maxfd is guaranteed to be
greater or equal than -1. We call select(maxfd + 1, ...), specially in
case of (maxfd == -1), we call select(0, ...), which is basically equal
to sleep. */

rc = select(maxfd+1, &fdread, &fdwrite, &fdexcep, &timeout);

switch(rc) {
case -1:
/* select error */
break;
case 0: /* timeout */
default: /* action */
curl_multi_perform(multi_handle, &still_running);
break;
}
} while(still_running);

/* See how the transfers went */
while ((msg = curl_multi_info_read(multi_handle, &msgs_left))) {
if (msg->msg == CURLMSG_DONE) {
int idx, found = 0;

/* Find out which handle this message is about */
for (idx=0; idx<HANDLECOUNT; idx++) {
found = (msg->easy_handle == handles[idx]);
if(found)
break;
}

switch (idx) {
case HTTP_HANDLE:
printf("HTTP transfer completed with status %d\n", msg->data.result);
break;
case FTP_HANDLE:
printf("FTP transfer completed with status %d\n", msg->data.result);
break;
}
}
}

curl_multi_cleanup(multi_handle);

/* Free the CURL handles */
for (i=0; i<HANDLECOUNT; i++)
curl_easy_cleanup(handles[i]);

return 0;
}


为了提高效率,我使用了多线程,在每个线程里面进行抓取,最后发现运行时间并没有大幅降低。很疑惑,请高手解答一下。
...全文
149 回复 打赏 收藏 转发到动态 举报
写回复
用AI写文章
回复
切换为时间正序
请发表友善的回复…
发表回复

69,382

社区成员

发帖
与我相关
我的任务
社区描述
C语言相关问题讨论
社区管理员
  • C语言
  • 花神庙码农
  • 架构师李肯
加入社区
  • 近7日
  • 近30日
  • 至今
社区公告
暂无公告

试试用AI创作助手写篇文章吧