16,550
社区成员
发帖
与我相关
我的任务
分享int nX =strData.Find(_T(","),nIndex+nRand);
int nY = strData.Find(_T("。"),nIndex+nRand);CString CFileProcessDlg::Data(CString strData)
{
CString strMax;
CString strMin;
GetDlgItem(IDC_EDIT1)->GetWindowText(strMin);
GetDlgItem(IDC_EDIT2)->GetWindowText(strMax);
CString strNewData;
CString strTemp;
int nMax =_ttoi(strMax);
int nMin =_ttoi(strMin);
int nLen = strData.GetLength();
int nIndex = 0;
int nRand =0;
int nNum = 0;
for(int i=0;nIndex<nLen;i++)
{
nRand =rand()/(double)RAND_MAX *(nMax - nMin) + nMin;
int nX =strData.Find(",",nRand);
int nY = strData.Find("。",nRand);
if(nX<nY)
nNum =nX;
else
nNum =nY;
if(nNum<1)
nNum = nRand;
strTemp =strData.Mid(nIndex,nNum);
nIndex+=nNum+1;
strTemp+="\n";
strNewData += strTemp ;
}
return strNewData ;
}
CStringA ProcessData(LPCSTR strData, LPCWSTR Split, int nMin, int nMax)
{
CStringA strNewData;
int icp = CP_THREAD_ACP;
int iWchLen = MultiByteToWideChar(icp, 0, strData, -1, NULL, 0);
WCHAR *pWchData = new WCHAR[iWchLen + 1];
MultiByteToWideChar(icp, 0, strData, -1, pWchData, iWchLen);
pWchData[iWchLen] = 0;
int nIndex = 0;
while(nIndex<iWchLen-1)
{
int nRand = (int)(rand()/(double)RAND_MAX *(nMax - nMin) + nMin);
WCHAR *pWStrData = pWchData + nIndex;
WCHAR *pSub = wcsstr(pWStrData + nRand, Split);
int iNewLen;
if(pSub == NULL)
iNewLen = wcslen(pWStrData);
else
iNewLen = (int)(INT_PTR)((pSub + wcslen(Split)) - pWStrData);
int imbLen = WideCharToMultiByte(icp, 0, pWStrData, iNewLen, NULL, 0, NULL, NULL);
CHAR *szTemp = new CHAR[imbLen + 3];
WideCharToMultiByte(icp, 0, pWStrData, iNewLen, szTemp, imbLen, NULL, NULL);
szTemp[imbLen + 0] = '\r';
szTemp[imbLen + 1] = '\n';
szTemp[imbLen + 2] = '\0';
nIndex += iNewLen;
strNewData += szTemp;
OutputDebugStringA(szTemp);
delete [] szTemp;
}
delete []pWchData;
return strNewData ;
}
void CDlg4Dlg::OnButton1()
{
CFileDialog dlg(TRUE);
if(dlg.DoModal() == IDOK)
{
try
{
CFile file(dlg.GetPathName(), CFile::modeRead);
int iLen = (int)file.GetLength();
CHAR *pSrc = new CHAR[iLen + 1];
file.Read(pSrc, iLen);
pSrc[iLen] = 0;
file.Close();
CStringA szRet = ProcessData(pSrc, L"。", 50, 100);
OutputDebugStringA(szRet);
OutputDebugStringA("\r\n");
delete [] pSrc;
}
catch(CFileException *e)
{
e->ReportError();
e->Delete();
}
}
}
汉字全部都提取出来了 里面还有很多符号和数字 就是分段出的问题。。。 先把文本检索一次,把每一个汉字都提出来,然后再分段
先把文本检索一次,把每一个汉字都提出来,然后再分段
strNewData += strTemp 之前把 strTemp TRACE出来, 并设置断点,F5运行分析
或者转成UNICODE 再分段
或者转成UNICODE 再分段