大文件关键字搜索, 一个8G多的文件单线程只要27秒, 多线程反而要一分多钟???

Sandrer 2019-08-07 11:44:12
加精
是不是代码有问题???

#include "stdafx.h"

static volatile ULONGLONG m_ullKeyCount = 0;
static SRWLOCK m_LockLog;

typedef struct _MAPPINGINFO
{
HANDLE hMap;
LPCSTR pchKey;
LPSTR pchTail;
LPSTR pchHead;
DWORD dwKeyLen;
ULARGE_INTEGER uiStartPoint;
ULARGE_INTEGER uiBlockSize;
} MAPPINGINFO, *PMAPPINGINFO;

#define MAX_BLOCK_SIZE (512 * 1024)

LPCTSTR m_pszPathName = _T("E:\\vm\\Win7_x86\\Win7_x86.vmdk");

static void _log(LPCTSTR fmt, ...)
{
SYSTEMTIME st;
va_list ap;

AcquireSRWLockExclusive(&m_LockLog);

GetLocalTime(&st);
_tprintf_s(_T("%02u:%02u:%02u.%03u >> "), st.wHour, st.wMinute, st.wSecond, st.wMilliseconds);

va_start(ap, fmt);
_vtcprintf_s(fmt, ap);

_tprintf_s(_T("\n"));

ReleaseSRWLockExclusive(&m_LockLog);
}

#ifdef _UNICODE
#define log(fmt, ...) _log(L##fmt, __VA_ARGS__)
#else
#define log(fmt, ...) _log(fmt, __VA_ARGS__)
#endif

static unsigned __stdcall thread_search(void *);

void TestLargeFileRead()
{
UINT i, j, len;
HANDLE hFile;
HANDLE hMap = NULL;
HANDLE *phThreads = NULL;
MAPPINGINFO *pInfo = NULL;
DWORD dwThreadCount = 0;
DWORD dwProcessors = 0;
LPSTR pchHeadTail = NULL;
SYSTEM_INFO si;
ULARGE_INTEGER uiLeftSize;
ULARGE_INTEGER uiFileSize;
ULARGE_INTEGER uiBlockSize;
ULARGE_INTEGER uiBeginPoint;
ULONGLONG ullCompare;
ULONGLONG ullBegin, ullEnd;
CHAR chKey[64];

InitializeSRWLock(&m_LockLog);

hFile = CreateFile(m_pszPathName, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, 0, NULL);
if (hFile == INVALID_HANDLE_VALUE)
{
log("Could not open file, error %u.\n", GetLastError());
goto _exit;
}
log("File Opened.");

if (!GetFileSizeEx(hFile, (PLARGE_INTEGER)&uiFileSize))
{
CloseHandle(hFile);
log("Could not get file size, error %u.", GetLastError());
goto _exit;
}
log("File Size: %I64u, Block Count: %I64u", uiFileSize.QuadPart, (uiFileSize.QuadPart + (MAX_BLOCK_SIZE - 1)) / MAX_BLOCK_SIZE);

hMap = CreateFileMapping(hFile, NULL, PAGE_READONLY, 0, 0, NULL);
CloseHandle(hFile);
if (hMap == NULL)
{
log("Could not mapping file, error %u.\n", GetLastError());
goto _exit;
}
log("File Mapped.");

GetSystemInfo(&si);
dwProcessors = si.dwNumberOfProcessors;
if (uiFileSize.QuadPart < (512 * 1024 * 1024))
si.dwNumberOfProcessors = 1;

// 与系统分配粒度对齐
uiBlockSize.QuadPart = ((uiFileSize.QuadPart / dwProcessors) + si.dwAllocationGranularity - 1) / si.dwAllocationGranularity * si.dwAllocationGranularity;
dwProcessors = (DWORD)((uiFileSize.QuadPart + (uiBlockSize.QuadPart - 1)) / uiBlockSize.QuadPart);

phThreads = (HANDLE *)malloc(sizeof(HANDLE) * dwProcessors);
pInfo = (MAPPINGINFO *)malloc(sizeof(MAPPINGINFO) * dwProcessors);
if (phThreads == NULL || pInfo == NULL)
{
log("Not enough memory.");
goto _exit;
}

_get_key:

printf("Enter Search Key: ");

gets_s(chKey, _countof(chKey));
if (chKey[0] == 0)
goto _exit;

GetSystemTimeAsFileTime((LPFILETIME)&ullBegin);

len = strlen(chKey);

pchHeadTail = (LPSTR)malloc(sizeof(char) * (len + 1) * 2);
if (pchHeadTail == NULL)
{
log("Not enough memory.");
goto _exit;
}

uiBeginPoint.QuadPart = 0;
uiLeftSize = uiFileSize;
dwThreadCount = 0;

for (i = 0; i < dwProcessors; i++)
{
pInfo[i].hMap = hMap;
pInfo[i].pchKey = chKey;
pInfo[i].dwKeyLen = len;
pInfo[i].uiStartPoint = uiBeginPoint;
if (uiLeftSize.QuadPart < uiBlockSize.QuadPart)
pInfo[i].uiBlockSize = uiLeftSize;
else
pInfo[i].uiBlockSize = uiBlockSize;

uiBeginPoint.QuadPart += pInfo[i].uiBlockSize.QuadPart;
uiLeftSize.QuadPart -= pInfo[i].uiBlockSize.QuadPart;

phThreads[i] = (HANDLE)_beginthreadex(NULL, 0, thread_search, &pInfo[i], 0, NULL);
if (phThreads[i] == NULL)
break;

dwThreadCount++;
}

WaitForMultipleObjects(dwThreadCount, phThreads, TRUE, INFINITE);
for (i = 0; i < dwThreadCount; i++)
CloseHandle(phThreads[i]);

// TODO: 将每个分块中的头/尾组合, 然后需要比较是否有关键字
free(pchHeadTail);

GetSystemTimeAsFileTime((LPFILETIME)&ullEnd);

log("Time Used: %u.%04u ms, Key Count: %I64u",
(DWORD)((ullEnd - ullBegin) / 10000),
(DWORD)((ullEnd - ullBegin) % 10000),
m_ullKeyCount);

goto _get_key;

_exit:

if (phThreads != NULL) free(phThreads);
if (pInfo != NULL) free(pInfo);
if (hMap != NULL) CloseHandle(hMap);
}

static unsigned __stdcall thread_search(void *p)
{
UINT i, j;
UINT cmp_len = 0;
PVOID pView = NULL;
PBYTE pData = NULL;
LPCSTR pchKey;
DWORD dwKeyLen;
DWORD dwMapSize;
ULARGE_INTEGER uiMapPoint;
ULARGE_INTEGER uiLeftSize;
PMAPPINGINFO pInfo = (PMAPPINGINFO)p;

log("Handle data from %I64u, size %I64u.", pInfo->uiStartPoint.QuadPart, pInfo->uiBlockSize.QuadPart);

pchKey = pInfo->pchKey;
dwKeyLen = pInfo->dwKeyLen;
uiLeftSize = pInfo->uiBlockSize;
uiMapPoint = pInfo->uiStartPoint;

while (uiLeftSize.QuadPart)
{
dwMapSize = MAX_BLOCK_SIZE;

if (uiLeftSize.QuadPart < (ULONGLONG)dwMapSize)
{
uiMapPoint.QuadPart = pInfo->uiStartPoint.QuadPart + (pInfo->uiBlockSize.QuadPart - (pInfo->uiBlockSize.QuadPart % dwMapSize));
dwMapSize = uiLeftSize.LowPart;
}

pView = MapViewOfFile(pInfo->hMap, FILE_MAP_READ, uiMapPoint.HighPart, uiMapPoint.LowPart, dwMapSize);
if (pView == NULL)
{
log("Could not handle file, error %u.", GetLastError());
goto _exit;
}

uiLeftSize.QuadPart -= dwMapSize;

pData = (PBYTE)pView;
for (i = 0; i < dwMapSize; i++)
{
if (pData[i] == pchKey[cmp_len])
{
for (j = cmp_len + 1; j < dwKeyLen; j++)
{
if (dwMapSize <= i + j)
{
cmp_len = j;
goto _next;
}
else if (pData[i + j] != pchKey[j])
{
cmp_len = 0;
goto _next;
}
}

// '12': 849816
i += dwKeyLen - cmp_len - 1;
InterlockedIncrement64((LONGLONG *)&m_ullKeyCount);
}

cmp_len = 0;

_next:
;
}

uiMapPoint.QuadPart += MAX_BLOCK_SIZE;

UnmapViewOfFile(pView);
pView = NULL;
}

_exit:
if (pView != NULL)
UnmapViewOfFile(pView);

return 0;
}
...全文
3560 57 打赏 收藏 转发到动态 举报
AI 作业
写回复
用AI写文章
57 条回复
切换为时间正序
请发表友善的回复…
发表回复
哟喂老弟 2020-04-14
  • 打赏
  • 举报
回复
大佬带我飞丫
Dnils 2020-04-12
  • 打赏
  • 举报
回复
有时候会这样的,看硬件
造轮子 2020-04-09
  • 打赏
  • 举报
回复
谢谢分享,大佬们辛苦了
youke321zou 2020-04-07
  • 打赏
  • 举报
回复
好复杂的样子
sun79564182 2020-04-05
  • 打赏
  • 举报
回复
坐等高人指点
shbagb 2020-03-29
  • 打赏
  • 举报
回复
好好好好好好好好
万花筒哈哈 2020-03-27
  • 打赏
  • 举报
回复
磁盘i/o接口问题。
pgz11111 2020-03-22
  • 打赏
  • 举报
回复
ok33333333333
rongjack99 2020-03-17
  • 打赏
  • 举报
回复
看不懂!!!
leeming163 2020-03-15
  • 打赏
  • 举报
回复
机器性能差,多线性下性能糟糕
aka没花呗 2020-03-14
  • 打赏
  • 举报
回复
多线程所消耗的比单线程多啊,可能这个更适合单线程运行
向大佬看齐 2020-03-12
  • 打赏
  • 举报
回复
不知道啊啊啊啊啊啊啊啊
sghui002 2020-03-07
  • 打赏
  • 举报
回复
学习了。感谢更新
IHXI 2020-03-06
  • 打赏
  • 举报
回复
多个硬盘RAID
Amazing@@@ 2020-03-05
  • 打赏
  • 举报
回复
增加内存的容量
待续_1006 2020-03-03
  • 打赏
  • 举报
回复
学习了!几位大神都说的不错!
qq_46247784 2020-03-02
  • 打赏
  • 举报
回复
我裂开了看不懂
weixin_44040778 2020-02-27
  • 打赏
  • 举报
回复
多线程不一定干的快。
兼言MKER 2020-02-25
  • 打赏
  • 举报
回复
线程不是越多越好,过多时线程同步会消耗不少的时间片。理论上讲线程越少越快吧
阳光与肉肉 2020-02-24
  • 打赏
  • 举报
回复
这里都是大神,我不会操作,但是对我太重要了,你们谁能加我好友帮帮忙
加载更多回复(37)

19,472

社区成员

发帖
与我相关
我的任务
社区描述
VC/MFC 图形处理/算法
社区管理员
  • 图形处理/算法社区
加入社区
  • 近7日
  • 近30日
  • 至今
社区公告
暂无公告

试试用AI创作助手写篇文章吧