703
社区成员
发帖
与我相关
我的任务
分享
strFind = "<h3 id=\"Actor\" class=\"lh20 px14 mt30\">演员 Actor:</h3>";
nPos = strText.Pos(strFind);
strTemp = strText.SubString(nPos + strFind.Length(), 8428);
strTemp = strTemp.SubString(1, strTemp.Pos("</li>") - 1);
OutputLog("演员:" + CrnRemoveHTMLTag(strTemp));
MemoActor->Lines->Add(CrnRemoveHTMLTag(strTemp));
OutputLog("正在获取网页数据...");
String strUrl = “http://movie.mtime.com/56856” ;
TMemoryStream *ms = new TMemoryStream;
IdHTTP1->Get(strUrl, ms);
LPSTR lpBuf = new char[ms->Size];
ms->Position = 0;
ms->Read(lpBuf, ms->Size);
delete ms;
String strText = Utf8ToAnsi(AnsiString(lpBuf));
delete []lpBuf;
OutputLog("网页数据获取完毕, 正在分析页面元素...");
String strFind, strTemp;
int nPos;
// 分析 上映日期
strFind = "<strong class=\"bold\">上映日期:</strong>";
nPos = strText.Pos(strFind);
strTemp = strText.SubString(nPos + strFind.Length() + 1, 256);
strTemp = strTemp.SubString(1, strTemp.Pos("<a href") - 1);
OutputLog("上映日期:" + CrnRemoveHTMLTag(strTemp));
MovieTime->Text = CrnRemoveHTMLTag(strTemp);
// 分析 国家/地区
strText = strText.SubString(nPos, strText.Length());
strFind = "<strong>国家/地区:</strong>";
nPos = strText.Pos(strFind);
strTemp = strText.SubString(nPos + strFind.Length() + 1, 1024);
strTemp = strTemp.SubString(1, strTemp.Pos("</li>") - 1);
OutputLog("国家/地区:" + CrnRemoveHTMLTag(strTemp));
MovieCountry->Text = CrnRemoveHTMLTag(strTemp);
// 分析 类型
strText = strText.SubString(nPos, strText.Length());
strFind = "<strong>类型:</strong>";
nPos = strText.Pos(strFind);
strTemp = strText.SubString(nPos + strFind.Length() + 1, 1024);
strTemp = strTemp.SubString(1, strTemp.Pos("</li>") - 1);
OutputLog("类型:" + CrnRemoveHTMLTag(strTemp));
//---------------------------------------------------------------------------
strUrl = “http://movie.mtime.com/56856/company_credits.html” ;
TMemoryStream *ms2 = new TMemoryStream;
IdHTTP1->Get(strUrl, ms2);
lpBuf = new char[ms2->Size];
ms2->Position = 0;
ms2->Read(lpBuf, ms2->Size);
delete ms2;
strText = Utf8ToAnsi(AnsiString(lpBuf));
delete []lpBuf;
// 制作公司
strFind = "<h3 class=\"bold lh20 px14\">制作公司:</h3>";
nPos = strText.Pos(strFind);
strTemp = strText.SubString(nPos + strFind.Length() + 1, 2048);
strTemp = strTemp.SubString(1, strTemp.Pos("</h3>") - 1);
OutputLog("制作公司:" + CrnRemoveHTMLTag(strTemp));
//发行公司
strText = strText.SubString(nPos, strText.Length());
strFind = "<h3 class=\"bold lh20 px14\">发行公司:</h3>";
nPos = strText.Pos(strFind);
strTemp = strText.SubString(nPos + strFind.Length() + 1, 2048);
strTemp = strTemp.SubString(1, strTemp.Pos("</h3>") - 1);
OutputLog("发行公司:" + CrnRemoveHTMLTag(strTemp));
//--------------------------------------------------------------------
strUrl = “http://movie.mtime.com/56856/fullcredits.html” ;
TMemoryStream *ms3 = new TMemoryStream;
IdHTTP1->Get(strUrl, ms3);
lpBuf = new char[ms3->Size];
ms3->Position = 0;
ms3->Read(lpBuf, ms3->Size);
delete ms3;
strText = Utf8ToAnsi(AnsiString(lpBuf));
delete []lpBuf;
// 导演
strFind ="<h3 id=\"Director\" class=\"lh20 px14 mt30\">导演 Director:</h3>";
nPos = strText.Pos(strFind);
strTemp = strText.SubString(nPos + strFind.Length() + 1, 1024);
strTemp = strTemp.SubString(1, strTemp.Pos("</li>") - 1);
OutputLog("导演:" + CrnRemoveHTMLTag(strTemp));
//编剧
strText = strText.SubString(nPos, strText.Length());
strFind = "<h3 id=\"Writer\" class=\"lh20 px14 mt30\">编剧 Writer:</h3>";
nPos = strText.Pos(strFind);
strTemp = strText.SubString(nPos + strFind.Length() + 1, 2048);
strTemp = strTemp.SubString(1, strTemp.Pos("</li>") - 1);
OutputLog("编剧:" + CrnRemoveHTMLTag(strTemp));
//演员
strFind = "<h3 id=\"Writer\" class=\"lh20 px14 mt30\">演员 Actor:</h3>";
nPos = strText.Pos(strFind);
strTemp = strText.SubString(nPos + strFind.Length() + 1, 3072);
strTemp = strTemp.SubString(1, strTemp.Pos("</li>") - 1);
OutputLog("演员:" + CrnRemoveHTMLTag(strTemp));
OutputLog("分析完成.");
// 导演
strFind = "<h3 id=\"Director\" class=\"lh20 px14 mt30\">导演 Director:</h3>";
nPos = strText.Pos(strFind);
strTemp = strText.SubString(nPos + strFind.Length(), 256);
strTemp = strTemp.SubString(1, strTemp.Pos("</li>") - 1);
OutputLog("导演:" + CrnRemoveHTMLTag(strTemp));
MemoDirector->Lines->Add(CrnRemoveHTMLTag(strTemp));
//编剧
strText = strText.SubString(nPos, strText.Length());
strFind = "<h3 id=\"Writer\" class=\"lh20 px14 mt30\">编剧 Writer:</h3>";
nPos = strText.Pos(strFind);
strTemp = strText.SubString(nPos + strFind.Length(), 1048);
strTemp = strTemp.SubString(1, strTemp.Pos("</ul>") - 1);
OutputLog("编剧:" + CrnRemoveHTMLTag(strTemp));
MemoWriter->Lines->Add(CrnRemoveHTMLTag(strTemp));
//演员
strText = strText.SubString(nPos, strText.Length());
strFind = "<h3 id=\"Actor\" class=\"lh20 px14 mt30\">演员 Actor:</h3>";
nPos = strText.Pos(strFind);
strTemp = strText.SubString(nPos + strFind.Length(), 8428);
strTemp = strTemp.SubString(1, strTemp.Pos("</em></li></ul>") - 1);
OutputLog("演员:" + CrnRemoveHTMLTag(strTemp));
MemoActor->Lines->Add(CrnRemoveHTMLTag(strTemp));
String __fastcall CrnRemoveHTMLTag(String strHtmlText)
{
// 多了这么一句,但是可能会影响到分析其他的页面.
strHtmlText = StringReplace(strHtmlText, "</li>", "\r\n", TReplaceFlags() << rfReplaceAll);
int nPos;
String strResult;
while (strHtmlText.Pos("<") > 0)
{
nPos = strHtmlText.Pos("<");
strResult += strHtmlText.SubString(1, nPos - 1);
strHtmlText = strHtmlText.Delete(1, strHtmlText.Pos(">"));
}
strResult += strHtmlText;
strResult = StringReplace(strResult.Trim(), "\t", "", TReplaceFlags() << rfReplaceAll);
strResult = StringReplace(strResult, " ", " ", TReplaceFlags() << rfReplaceAll);
// 这一行是根据那个页面多加的,应该是网页制作者的笔误,少写了一个分号
strResult = StringReplace(strResult, " ", " ", TReplaceFlags() << rfReplaceAll);
return strResult;
}
// ---------------------------------------------------------------------------
//
void __fastcall TForm1::Button1Click(TObject *Sender)
{
OutputLog("正在获取网页数据...");
TMemoryStream *ms = new TMemoryStream;
IdHTTP1->Get("http://movie.mtime.com/56856/company_credits.html", ms);
LPSTR lpBuf = new char[ms->Size];
ms->Position = 0;
ms->Read(lpBuf, ms->Size);
delete ms;
String strText = Utf8ToAnsi(AnsiString(lpBuf));
delete []lpBuf;
OutputLog("网页数据获取完毕, 正在分析页面元素...");
String strFind, strTemp;
int nPos;
// 分析 制作公司
strFind = "<h3 class=\"bold lh20 px14\">制作公司:</h3>";
nPos = strText.Pos(strFind);
strTemp = strText.SubString(nPos + strFind.Length(), 1024);
strTemp = strTemp.SubString(1, strTemp.Pos("<h3 ") - 1);
OutputLog("制作公司:" + CrnRemoveHTMLTag(strTemp));
OutputLog("分析完成.");
}
16:29:33 正在获取网页数据...
16:29:35 网页数据获取完毕, 正在分析页面元素...
16:29:35 制作公司:盛日影业公司[英国]
华纳兄弟影片公司[美国]
16:29:35 分析完成.
OutputLog("正在获取网页数据...");
TMemoryStream *ms = new TMemoryStream;
IdHTTP1->Get("http://movie.mtime.com/56856/fullcredits.html", ms);
LPSTR lpBuf = new char[ms->Size];
ms->Position = 0;
ms->Read(lpBuf, ms->Size);
delete ms;
String strText = Utf8ToAnsi(AnsiString(lpBuf));
delete []lpBuf;
OutputLog("网页数据获取完毕, 正在分析页面元素...");
String strFind, strTemp;
int nPos;
// 分析 导演
strFind = "<h3 id=\"Director\" class=\"lh20 px14 mt30\">导演 Director:</h3>";
nPos = strText.Pos(strFind);
strTemp = strText.SubString(nPos + strFind.Length(), 256);
strTemp = strTemp.SubString(1, strTemp.Pos("</li>") - 1);
OutputLog("导演:" + CrnRemoveHTMLTag(strTemp));
OutputLog("分析完成.");
23:14:22 正在获取网页数据...
23:14:23 网页数据获取完毕, 正在分析页面元素...
23:14:23 导演:大卫·叶茨 David Yates
23:14:23 分析完成.
strFind = "<h3 class=\"lh20 px14 mt30\" id=Director>导演 Director:</h3>";
nPos = strText.Pos(strFind);
strTemp = strText.SubString(nPos + strFind.Length() + 1, 256);
strTemp = strTemp.SubString(1, strTemp.Pos("</li>") - 1);