取汉字首字母C/C++代码

iamknight 2004-05-09 08:55:04
查了好多资料,有了好多的关于取汉字首字母的方法。可没有发现现成可用的C/C++源代码。因为正好用到,就写了一个,在此贴出共享,以对提供参考资料的大虾们表示敬意。

char * _SecondSecTable;
const char _FirstLetter[] = {'A', 'B','C','D','E','F','G','H','J','K','L','M','N','O','P','Q','R','S','T','W','X','Y','Z'};
const int _SecPosValue[] = {1601,1637,1833,2078,2274,2302,2433,2594,2787,3106,3212,3472,3635,3722,3730,3858,4027,4086,4390,4558,4684,4925,5249 };

_SecondSecTable = new char[4000];
memset(_SecondSecTable,'\0',4000);
strcpy(_SecondSecTable,"CJWGNSPGCGNE[Y[BTYYZDXYKYGT[JNNJQMBSGZSCYJSYY[PGKBZGY[YWJKGKLJYWKPJQHY[W[DZLSGMRYPYWWCCKZNKYYGTTNJJNYKKZYT");
strcat(_SecondSecTable,"CJNMCYLQLYPYQFQRPZSLWBTGKJFYXJWZLTBNCXJJJJTXDTTSQZYCDXXHGCK[PHFFSS[YBGXLPPBYLL[HLXS[ZM[JHSOJNGHDZQYKLGJHSGQZHXQGKEZZWYSCSCJXY");
strcat(_SecondSecTable,"EYXADZPMDSSMZJZQJYZC[J[WQJBYZPXGZNZCPWHKXHQKMWFBPBYDTJZZKQHYLYGXFPTYJYYZPSZLFCHMQSHGMXXSXJ[[DCSBBQBEFSJYHXWGZKPYLQBGLDLCCTNMA");
strcat(_SecondSecTable,"YDDKSSNGYCSGXLYZAYBNPTSDKDYLHGYMYLCXPY[JNDQJWXQXFYYFJLEJPZRXCCQWQQSBNKYMGPLBMJRQCFLNYMYQMSQYRBCJTHZTQFRXQHXMJJCJLXQGJMSHZKBSW");
strcat(_SecondSecTable,"YEMYLTXFSYDSWLYCJQXSJNQBSCTYHBFTDCYZDJWYGHQFRXWCKQKXEBPTLPXJZSRMEBWHJLBJSLYYSMDXLCLQKXLHXJRZJMFQHXHWYWSBHTRXXGLHQHFNM[YKLDYXZ");
strcat(_SecondSecTable,"PYLGG[MTCFPAJJZYLJTYANJGBJPLQGDZYQYAXBKYSECJSZNSLYZHSXLZCGHPXZHZNYTDSBCJKDLZAYFMYDLEBBGQYZKXGLDNDNYSKJSHDLYXBCGHXYPKDJMMZNGMM");
strcat(_SecondSecTable,"CLGWZSZXZJFZNMLZZTHCSYDBDLLSCDDNLKJYKJSYCJLKWHQASDKNHCSGANHDAASHTCPLCPQYBSDMPJLPZJOQLCDHJJYSPRCHN[NNLHLYYQYHWZPTCZGWWMZFFJQQQ");
strcat(_SecondSecTable,"QYXACLBHKDJXDGMMYDJXZLLSYGXGKJRYWZWYCLZMSSJZLDBYD[FCXYHLXCHYZJQ[[QAGMNYXPFRKSSBJLYXYSYGLNSCMHZWWMNZJJLXXHCHSY[[TTXRYCYXBYHCSM");
strcat(_SecondSecTable,"XJSZNPWGPXXTAYBGAJCXLY[DCCWZOCWKCCSBNHCPDYZNFCYYTYCKXKYBSQKKYTQQXFCWCHCYKELZQBSQYJQCCLMTHSYWHMKTLKJLYCXWHEQQHTQH[PQ[QSCFYMNDM");
strcat(_SecondSecTable,"GBWHWLGSLLYSDLMLXPTHMJHWLJZYHZJXHTXJLHXRSWLWZJCBXMHZQXSDZPMGFCSGLSXYMJSHXPJXWMYQKSMYPLRTHBXFTPMHYXLCHLHLZYLXGSSSSTCLSLDCLRPBH");
strcat(_SecondSecTable,"ZHXYYFHB[GDMYCNQQWLQHJJ[YWJZYEJJDHPBLQXTQKWHLCHQXAGTLXLJXMSL[HTZKZJECXJCJNMFBY[SFYWYBJZGNYSDZSQYRSLJPCLPWXSDWEJBJCBCNAYTWGMPA");
strcat(_SecondSecTable,"PCLYQPCLZXSBNMSGGFNZJJBZSFZYNDXHPLQKZCZWALSBCCJX[YZGWKYPSGXFZFCDKHJGXDLQFSGDSLQWZKXTMHSBGZMJZRGLYJBPMLMSXLZJQQHZYJCZYDJWBMYKL");
strcat(_SecondSecTable,"DDPMJEGXYHYLXHLQYQHKYCWCJMYYXNATJHYCCXZPCQLBZWWYTWBQCMLPMYRJCCCXFPZNZZLJPLXXYZTZLGDLDCKLYRZZGQTGJHHGJLJAXFGFJZSLCFDQZLCLGJDJC");
strcat(_SecondSecTable,"SNZLLJPJQDCCLCJXMYZFTSXGCGSBRZXJQQCTZHGYQTJQQLZXJYLYLBCYAMCSTYLPDJBYREGKLZYZHLYSZQLZNWCZCLLWJQJJJKDGJZOLBBZPPGLGHTGZXYGHZMYCN");
strcat(_SecondSecTable,"QSYCYHBHGXKAMTXYXNBSKYZZGJZLQJDFCJXDYGJQJJPMGWGJJJPKQSBGBMMCJSSCLPQPDXCDYYKY[CJDDYYGYWRHJRTGZNYQLDKLJSZZGZQZJGDYKSHPZMTLCPWNJ");
strcat(_SecondSecTable,"AFYZDJCNMWESCYGLBTZCGMSSLLYXQSXSBSJSBBSGGHFJLYPMZJNLYYWDQSHZXTYYWHMZYHYWDBXBTLMSYYYFSXJC[DXXLHJHF[SXZQHFZMZCZTQCXZXRTTDJHNNYZ");
strcat(_SecondSecTable,"QQMNQDMMG[YDXMJGDHCDYZBFFALLZTDLTFXMXQZDNGWQDBDCZJDXBZGSQQDDJCMBKZFFXMKDMDSYYSZCMLJDSYNSBRSKMKMPCKLGDBQTFZSWTFGGLYPLLJZHGJ[GY");
strcat(_SecondSecTable,"PZLTCSMCNBTJBQFKTHBYZGKPBBYMTDSSXTBNPDKLEYCJNYDDYKZDDHQHSDZSCTARLLTKZLGECLLKJLQJAQNBDKKGHPJTZQKSECSHALQFMMGJNLYJBBTMLYZXDCJPL");
strcat(_SecondSecTable,"DLPCQDHZYCBZSCZBZMSLJFLKRZJSNFRGJHXPDHYJYBZGDLQCSEZGXLBLGYXTWMABCHECMWYJYZLLJJYHLG[DJLSLYGKDZPZXJYYZLWCXSZFGWYYDLYHCLJSCMBJHB");
strcat(_SecondSecTable,"LYZLYCBLYDPDQYSXQZBYTDKYXJY[CNRJMPDJGKLCLJBCTBJDDBBLBLCZQRPPXJCJLZCSHLTOLJNMDDDLNGKAQHQHJGYKHEZNMSHRP[QQJCHGMFPRXHJGDYCHGHLYR");
strcat(_SecondSecTable,"ZQLCYQJNZSQTKQJYMSZSWLCFQQQXYFGGYPTQWLMCRNFKKFSYYLQBMQAMMMYXCTPSHCPTXXZZSMPHPSHMCLMLDQFYQXSZYYDYJZZHQPDSZGLSTJBCKBXYQZJSGPSXQ");
strcat(_SecondSecTable,"ZQZRQTBDKYXZKHHGFLBCSMDLDGDZDBLZYYCXNNCSYBZBFGLZZXSWMSCCMQNJQSBDQSJTXXMBLTXZCLZSHZCXRQJGJYLXZFJPHYMZQQYDFQJJLZZNZJCDGZYGCTXMZ");
strcat(_SecondSecTable,"YSCTLKPHTXHTLBJXJLXSCDQXCBBTJFQZFSLTJBTKQBXXJJLJCHCZDBZJDCZJDCPRNPQCJPFCZLCLZXZDMXMPHJSGZGSZZQLYLWTJPFSYASMCJBTZKYCWMYTCSJJLJ");
strcat(_SecondSecTable,"CQLWZMALBXYFBPNLSFHTGJWEJJXXGLLJSTGSHJQLZFKCGNNNSZFDEQFHBSAQTGYLBXMMYGSZLDYDQMJJRGBJTKGDHGKBLQKBDMBYLXWCXYTTYBKMRTJZXQJBHLMHM");
strcat(_SecondSecTable,"JJZMQASLDCYXYQDLQCAFYWYXQHZ.");

char* GetFirstLetter(char * src)
{
char ret[256];
memset(ret,'\0',256);
int iSectorCode,iPositionCode,iSecPosCode;
int iOffset;
int i,j;
int iCount =0;
for(i =0; i< strlen(src); i++)
{
if((src[i] < 128) && (src[i] >0))
{
ret[iCount] = src[i];
iCount++;
}
else
{
iSectorCode = src[i] - 160;
iPositionCode =src[i+1] -160;
if(iSectorCode < 0)
iSectorCode += 256;
if(iPositionCode < 0)
iPositionCode += 256;
iSecPosCode = iSectorCode * 100 + iPositionCode;
if((iSecPosCode > 1600) && (iSecPosCode < 5590))
{
for(j=22; j>=0; j--)
{
if(iSecPosCode >= _SecPosValue[j])
{
ret[iCount] = _FirstLetter[j];
iCount++;
break;
}
}
}
else
{
iOffset = (iSectorCode -56) * 94 + iPositionCode -1;
if ((iOffset >=0) && (iOffset <=3007))
{
ret[iCount] = _SecondSecTable[iOffset];
iCount++;
}
}
i++;
}
}
ret[255]='\0';
return ret;
}

参考自PB相关源代码
受原代码同种限制
如下:
//xuejun, 19990821
//Function name : uf_GetFirstLetter
//Used to : 返回给定汉字串的首字母串,即声母串
//Input Arguments: as_InputString - string , 给定的汉字串
//Return Value : ls_ReturnString - String , 给定的汉字串的声母串,一律为小写
//Notice : 1. 此方法基于汉字的国标汉字库区位编码的有效性,不符合此编码的
// 系统此函数无效!
// 2. 若汉字串含有非汉字字符,如图形符号或ASCII码,则这些非汉字字符
// 将保持不变.
//Sample : ls_rtn = uf_GetFirstLetter("中华人民共和国")
// ls_rtn will be : zhrmghg

同时,本程序限制生成结果目标字符最多不能超过255
...全文
49 点赞 收藏 1
写回复
1 条回复
切换为时间正序
当前发帖距今超过3年,不再开放新的回复
发表回复
code8238 2004-05-11
up
回复
相关推荐
发帖
资源
创建于2007-09-28

2581

社区成员

VC/MFC 资源
申请成为版主
帖子事件
创建了帖子
2004-05-09 08:55
社区公告
暂无公告