18,356
社区成员
发帖
与我相关
我的任务
分享
#include <winsock2.h>
#include <windows.h>
#include <stdio.h>
#include <vector>
#include <string>
#include <afxmt.inl>
using namespace std;
#define DATA_BUFSIZE 1024*1024*10
#pragma comment(lib,"ws2_32")
#define OP_READ 1
#define OP_WRITE 2
static unsigned int nFileSerial=0;
typedef struct
{
OVERLAPPED Overlapped;
CHAR Buffer[DATA_BUFSIZE];
CHAR strUrl[100];
DWORD nRecvBytes;
int nOperationType;
} PER_IO_OPERATION_DATA, *LPPER_IO_OPERATION_DATA;
typedef struct
{
SOCKET Socket;
struct sockaddr_in addr; // 服务器地址
} HOST_INFO_T;
HANDLE g_hCompletion;//完成端口
struct sockaddr_in servAddr;
DWORD WINAPI ServerWorkerThread(LPVOID CompletionPortID);
void PostRequest(const char *pHostName,const char *pUrlAddress)
{
SOCKET cs;
char strRequest[1024];
cs = socket(AF_INET,SOCK_STREAM,IPPROTO_TCP);
if(INVALID_SOCKET==cs)
{
printf("创建套接字失败\n");
return ;
}
if(connect(cs,(sockaddr *)&servAddr,sizeof(servAddr))==SOCKET_ERROR)
{
printf("%d\n",WSAGetLastError());
return ;
}
HOST_INFO_T *pHostInfo;
LPPER_IO_OPERATION_DATA lpHandlerData;
pHostInfo = (HOST_INFO_T *)malloc(sizeof(HOST_INFO_T));
lpHandlerData=(PER_IO_OPERATION_DATA *)malloc(sizeof(PER_IO_OPERATION_DATA));
memset(strRequest,0,sizeof(strRequest));
sprintf_s(strRequest,"GET %s HTTP/1.1\r\nHost: %s\r\nAccept: */*\r\nUser-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 2.0.50727)\r\nConnection: close\r\n\r\n",pUrlAddress,pHostName); //取得HTTP报头
send(cs,strRequest,strlen(strRequest),0);
pHostInfo->Socket = cs;
CreateIoCompletionPort((HANDLE)&pHostInfo->Socket,g_hCompletion,(DWORD)pHostInfo,0);
// 投递一个接收请求
memset(lpHandlerData->Buffer,0,sizeof(lpHandlerData->Buffer));
lpHandlerData->nRecvBytes = 0;
lpHandlerData->nOperationType = OP_READ;
strcpy(lpHandlerData->strUrl,pUrlAddress);
WSABUF wsaBuf;
wsaBuf.len = 1024*10;
wsaBuf.buf = lpHandlerData->Buffer;
//memset(&(lpHandlerData->Overlapped),0,sizeof(OVERLAPPED));
DWORD nFlags=0;
DWORD dwTrans=0;
WSARecv(pHostInfo->Socket,&wsaBuf,1,&dwTrans,&nFlags,&(lpHandlerData->Overlapped),NULL);
}
void main(void)
{
HANDLE ThreadHandle[4];
SYSTEM_INFO SystemInfo;
DWORD ThreadID;
WSADATA wsaData;
DWORD Ret=0;
SOCKET cs;
vector<string> vUrlList;
char strHostName[50]="search.51job.com";
FILE *fp=fopen("E:\\test\\UrlList.TXT","r");
char strTemp[255];
while(!feof(fp))
{
memset(strTemp,0,sizeof(strTemp));
fgets(strTemp,sizeof(strTemp),fp);
strTemp[strlen(strTemp)-1]=0;
vUrlList.push_back(strTemp);
}
fclose(fp);
if ((Ret = WSAStartup(0x0202, &wsaData)) != 0)
{
printf("WSAStartup failed with error %d\n", Ret);
return;
}
cs = socket(AF_INET,SOCK_STREAM,IPPROTO_TCP);
if(INVALID_SOCKET==cs)
{
printf("创建套接字失败\n");
return ;
}
struct hostent *newHost;
newHost = gethostbyname(strHostName);
if(newHost==NULL)
{
printf("不能连通远程主机【%s】,%d\n",strHostName,GetLastError());
return;
}
char strIpAddress[20];
memset(strIpAddress,0,sizeof(strIpAddress));
strncpy(strIpAddress,inet_ntoa(*(in_addr *)newHost->h_addr_list[0]),sizeof(strIpAddress));
servAddr.sin_family=AF_INET;
servAddr.sin_addr.s_addr=inet_addr(strIpAddress);
servAddr.sin_port=htons(80);
if((g_hCompletion=CreateIoCompletionPort(INVALID_HANDLE_VALUE,NULL,0,0))==NULL)
{
printf( "CreateIoCompletionPort failed with error: %d\n", GetLastError());
return;
}
GetSystemInfo(&SystemInfo);
for(DWORD i = 0; i < SystemInfo.dwNumberOfProcessors * 2; i++)
{
if ((ThreadHandle[i] = CreateThread(NULL, 0, ServerWorkerThread, (LPVOID)g_hCompletion,0, NULL)) == NULL)
{
printf("CreateThread() failed with error %d\n", GetLastError());
return;
}
}
for(int k=0;k<vUrlList.size();k++)
{
PostRequest((const char *)strHostName,vUrlList[k].c_str());
}
WaitForMultipleObjects(4,ThreadHandle,true,INFINITE);
CloseHandle(ThreadHandle[0]);
CloseHandle(ThreadHandle[1]);
CloseHandle(ThreadHandle[2]);
CloseHandle(ThreadHandle[3]);
}
DWORD WINAPI ServerWorkerThread(LPVOID lpParam)
{
HOST_INFO_T *pHostInfo;
LPPER_IO_OPERATION_DATA lpHandlerData;
BOOL bOK=FALSE;
DWORD dwTrans=0;
// 得到完成端口对象句柄
HANDLE hCompletion = (HANDLE)lpParam;
while(true)
{
bOK = GetQueuedCompletionStatus(hCompletion,&dwTrans,(LPDWORD)&pHostInfo,(LPOVERLAPPED*)&lpHandlerData,INFINITE);
if (dwTrans==0 && (lpHandlerData->nOperationType==OP_READ || lpHandlerData->nOperationType==OP_WRITE))
{
SavHttpPage(lpHandlerData->Buffer);
fprintf(stderr,"成功抓取网页【%s】\n",lpHandlerData->strUrl);
closesocket(pHostInfo->Socket);
free(pHostInfo);
free(lpHandlerData);
continue;
}
DWORD dwflag=0;
switch (lpHandlerData->nOperationType)
{
case OP_READ:
WSABUF wsaData;
lpHandlerData->nRecvBytes+=dwTrans;
wsaData.buf = lpHandlerData->Buffer+lpHandlerData->nRecvBytes;
wsaData.len = 1024*10;
WSARecv(pHostInfo->Socket,&wsaData,1,&dwTrans,&dwflag,&lpHandlerData->Overlapped,NULL);
break;
case OP_WRITE:
break;
}
}
return 0;
}