蛙蛙请教:把一段c算法代码转换成c#代码。
蛙蛙请教:把一段c算法代码转换成c#代码。
这是一段剪贴板的数据转换算法代码,请帮忙把UTF8ToHtml转换成c#代码,我转了一下,根本不能用。
再帮忙写下注释,简单说一下原理,谢谢。
原文如下:
I was working on a project where I had to paste into textbox HTML, copied from the Browser. A quick search on "HTML Clipboard Format" in MSDN gives you an article that thoroughly explained how HTML is kept in the Clipboard. Unfortunately, this article tells you that it's kept in UTF-8 format without explaining how to convert from UTF-8 back to HTML. So I had do some research on my own.
UTF-8 is the format that allows using Unicode characters in ASCII text by embedding a special token, &#code;, into the text, where the code is Unicode code (in decimal format) for the symbol. For some symbols there are special names. An example " " is " "... You can jump to the specification if you need more examples.
Here is a UTF8ToHtml function, which converts from UTF-8 to HTML. The algorithm is not explained, but you can read more about it here.
//utf8 - pointer to UTF8 formatted text. dwSize - size of UTF8 text; ptr is the pointer to Output buffer.
//The OnClickedPastehtml is the handler for BN_CLICK event of the button in Dialog box. IDC_TEXT is the multiline text box.
void UTF8ToHtml(BYTE *utf8, DWORD dwSize, CHAR *ptr )
{
int code;
BYTE *end = utf8 + dwSize;
while( utf8 < end )
{
code = 0;
if( (*utf8 & 0xF0) == 0xF0 )
{
code = (((*utf8)&0x0F) << 18) | (((*(utf8+1))
& 0x7F)<<12) | (((*(utf8+2)) & 0x7F)<<6)
| ((*(utf8+3)) & 0x7F );
utf8+=3;
}
else
{
if( (*utf8 & 0xE0) == 0xE0 )
{
code = (((*utf8)&0x1F) << 12) | (((*(utf8+1))
& 0x7F)<<6 ) | ((*(utf8+2)) & 0x7F );
utf8+=2;
}
else
{
if( (*utf8 & 0xC0) == 0xC0 )
{
code = (((*utf8)&0x3F) << 6) | ((*(utf8+1)) & 0x7F) ;
utf8+=1;
}
}
}
if( code == 0 )
{
*ptr = *utf8;
}
else
{
char s[10];
switch(code)
{
case 160:
strcpy(s, "& ");
break;
case 34:
strcpy(s, "&");
break;
case 36:
strcpy( s, "&&");
break;
case 60:
strcpy( s, "&<");
break;
case 62:
strcpy( s, "&>");
break;
default:
sprintf( s, "&#%d;", code );
break;
}
strcpy( ptr, s );
ptr += strlen(s)-1;
}
utf8++;
ptr++;
}
*ptr = 0;
}
LRESULT CDialog::OnClickedPastehtml( WORD wNotifyCode,
WORD wID,
HWND hWndCtl,
BOOL& bHandled)
{
if (!OpenClipboard() )
return 0;
UINT uHtmlFormat = RegisterClipboardFormat("HTML Format");
UINT uFormat = uHtmlFormat;
if( IsClipboardFormatAvailable( uHtmlFormat ) == FALSE )
{
if( IsClipboardFormatAvailable( CF_TEXT ) == FALSE )
return 0;
uFormat = CF_TEXT;
}
HGLOBAL hglb;
LPTSTR lptstr;
hglb = GetClipboardData(uFormat);
if (hglb != NULL)
{
lptstr = (LPTSTR)GlobalLock(hglb);
if (lptstr != NULL)
{
char *ptr1 = strstr( lptstr, "<!--StartFragment-->");
if( ptr1 != 0 )
{
ptr1 += 20;
char * ptr2 = strstr( lptstr, "<!--EndFragment-->");
int iSize = (ptr2 - ptr1);
char * tmp = (char*)_alloca( iSize *2);
UTF8ToHtml((BYTE*)ptr1, iSize, tmp );
//memcpy(tmp, ptr1, iSize );
//tmp[iSize] = 0;
SetDlgItemText(IDC_TEXT, tmp );
}
else
SetDlgItemText(IDC_TEXT, lptstr );
GlobalUnlock(hglb);
}
}
CloseClipboard();
return 0;
}
这是我转换的代码
private void FTopMost_DragDrop(object sender, DragEventArgs e)
{
strCont = e.Data.GetData(DataFormats.Html, true).ToString();
int start = strCont.IndexOf("<!--StartFragment-->");
int end = strCont.IndexOf("<!--EndFragment-->");
string s = strCont.Substring(start + 20, end - start - 20);
FNewPost f = new FNewPost();
f.HTML = UTF8ToHtml(s);
f.Show();
f.Activate();
}
string UTF8ToHtml(string utf8)
{
string ptr = null;
int code;
for (int i = 0; i < utf8.Length; i++ )
{
code = 0;
if ((utf8[i]) == 0xF0)
{
code = (((utf8[i]) & 0x0F) << 18) | ((((utf8[i] + 1)) & 0x7F) << 12) | ((((utf8[i] + 2)) & 0x7F) << 6) | (((utf8[i] + 3)) & 0x7F);
i += 3;
}
else
{
if ((utf8[i]) == 0xE0)
{
code = (((utf8[i]) & 0x1F) << 12) | ((((utf8[i] + 1)) & 0x7F) << 6) | (((utf8[i] + 2)) & 0x7F);
i += 2;
}
else
{
if ((utf8[i]) == 0xC0)
{
code = (((utf8[i]) & 0x3F) << 6) | (((utf8[i] + 1)) & 0x7F);
i += 1;
}
}
}
if (code == 0)
{
ptr += utf8[i];
}
else
{
string s = null;
switch (code)
{
case 160:
s = "& ";
break;
case 34:
s = "&";
break;
case 36:
s = "&&";
break;
case 60:
s = "&<";
break;
case 62:
s = "&>";
break;
default:
//sprintf( s, "&#%d;", code );
s = "&" + code.ToString();
break;
}
ptr += s;
}
}
return ptr;
}
这是一段剪贴板数据,当然这里没有中文
Version:0.9
StartHTML:71
EndHTML:170
StartFragment:140
EndFragment:160
StartSelection:140
EndSelection:160
<!DOCTYPE>
<HTML>
<HEAD>
<TITLE>The HTML Clipboard</TITLE>
<BASE HREF="http://sample/specs">
</HEAD>
<BODY>
<!--StartFragment -->
<P>The Fragment</P>
<!--EndFragment -->
</BODY>
</HTML>