今天看到xuejun的一個取漢字首字符的函數(shù),試用了一下,感覺很好用,不敢獨享,拿出來跟大家分享:
$PBExportHeader$uf_getfirstletter.srf
$PBExportComments$ 返回給定漢字串的首字母串, xuejun , 19990821
global type uf_getfirstletter from function_object
end type
forward prototypes
global function string uf_getfirstletter (string as_inputstring)
end prototypes
global function string uf_getfirstletter (string as_inputstring);//Function name : uf_GetFirstLetter
//Used to : 返回給定漢字串的首字母串,即聲母串
//Input Arguments: as_InputString - string , 給定的漢字串
//Return Value : ls_ReturnString - String , 給定的漢字串的聲母串,一律為小寫
//Notice : 1. 此方法基于漢字的國標(biāo)漢字庫區(qū)位編碼的有效性,不符合此編碼的
// 系統(tǒng)此函數(shù)無效!
// 2. 若漢字串含有非漢字字符,如圖形符號或ASCII碼,則這些非漢字字符
// 將保持不變.
//Sample : ls_rtn = uf_GetFirstLetter("中華人民共和國")
// ls_rtn will be : zhrmghg
//Scripts:
char lc_FirstLetter[23] //存放國標(biāo)一級漢字不同讀音的起始區(qū)位碼對應(yīng)讀音
string ls_ch //臨時單元
string ls_SecondSecTable //存放所有國標(biāo)二級漢字讀音
string ls_ReturnStr //返回串
integer li_SecPosValue[23] //存放國標(biāo)一級漢字不同讀音的起始區(qū)位碼
integer i , j
integer li_SectorCode //漢字區(qū)碼
integer li_PositionCode //漢字位碼
integer li_SecPosCode //漢字區(qū)位碼
integer li_offset //二級字庫偏移量
//Set initial value
li_SecPosValue[]={1601,1637,1833,2078,2274,2302,2433,2594,2787,3106,3212,3472,3635,3722,3730,3858,4027,4086,4390,4558,4684,4925,5249 }
lc_FirstLetter[] = {"A", "B","C","D","E","F","G","H","J","K","L","M","N","O","P","Q","R","S","T","W","X","Y","Z"}</P><P> ls_SecondSecTable="CJWGNSPGCGNE[Y[BTYYZDXYKYGT[JNNJQMBSGZSCYJSYY[PGKBZGY[YWJKGKLJYWKPJQHY[W[DZLSGMRYPYWWCCKZNKYYGTTNJJNYKKZYTCJNMCYLQLYPYQFQRPZSLWBTGKJFYXJWZLTBNCXJJJJTXDTTSQZYCDXXHGCK[PHFFSS[YBGXLPPBYLL[HLXS[ZM[JHSOJNGHDZQYKLGJHSGQZHXQGKEZZWYSCSCJXYEYXADZPMDSSMZJZQJYZC[J[WQJBYZPXGZNZCPWHKXHQKMWFBPBYDTJZZKQHYLYGXFPTYJYYZPSZLFCHMQSHGMXXSXJ[[DCSBBQBEFSJYHXWGZKPYLQBGLDLCCTNMAYDDKSSNGYCSGXLYZAYBNPTSDKDYLHGYMYLCXPY[JNDQJWXQXFYYFJLEJPZRXCCQWQQSBNKYMGPLBMJRQCFLNYMYQMSQYRBCJTHZTQFRXQHXMJJCJLXQGJMSHZKBSWYEMYLTXFSYDSWLYCJQXSJNQBSCTYHBFTDCYZDJWYGHQFRXWCKQKXEBPTLPXJZSRMEBWHJLBJSLYYSMDXLCLQKXLHXJRZJMFQHXHWYWSBHTRXXGLHQHFNM[YKLDYXZPYLGG[MTCFPAJJZYLJTYANJGBJPLQGDZYQYAXBKYSECJSZNSLYZHSXLZCGHPXZHZNYTDSBCJKDLZAYFMYDLEBBGQYZKXGLDNDNYSKJSHDLYXBCGHXYPKDJMMZNGMMCLGWZSZXZJFZNMLZZTHCSYDBDLLSCDDNLKJYKJSYCJLKWHQASDKNHCSGANHDAASHTCPLCPQYBSDMPJLPZJOQLCDHJJYSPRCHN[NNLHLYYQYHWZPTCZGWWMZFFJQQQQYXACLBHKDJXDGMMYDJXZLLSYGXGKJRYWZWYCLZMSSJZLDBYD[FCXYHLXCHYZJQ[[QAGMNYXPFRKSSBJLYXYSYGLNSCMHZWWMNZJJLXXHCHSY[[TTXRYCYXBYHCSMXJSZNPWGPXXTAYBGAJCXLY[DCCWZOCWKCCSBNHCPDYZNFCYYTYCKXKYBSQKKYTQQXFCWCHCYKELZQBSQYJQCCLMTHSYWHMKTLKJLYCXWHEQQHTQH[PQ[QSCFYMNDMGBWHWLGSLLYSDLMLXPTHMJHWLJZYHZJXHTXJLHXRSWLWZJCBXMHZQXSDZPMGFCSGLSXYMJSHXPJXWMYQKSMYPLRTHBXFTPMHYXLCHLHLZYLXGSSSSTCLSLDCLRPBHZHXYYFHB[GDMYCNQQWLQHJJ[YWJZYEJJDHPBLQXTQKWHLCHQXAGTLXLJXMSL[HTZKZJECXJCJNMFBY[SFYWYBJZGNYSDZSQYRSLJPCLPWXSDWEJBJCBCNAYTWGMPAPCLYQPCLZXSBNMSGGFNZJJBZSFZYNDXHPLQKZCZWALSBCCJX[YZGWKYPSGXFZFCDKHJGXDLQFSGDSLQWZKXTMHSBGZMJZRGLYJBPMLMSXLZJQQHZYJCZYDJWBMYKLDDPMJEGXYHYLXHLQYQHKYCWCJMYYXNATJHYCCXZPCQLBZWWYTWBQCMLPMYRJCCCXFPZNZZLJPLXXYZTZLGDLDCKLYRZZGQTGJHHGJLJAXFGFJZSLCFDQZLCLGJDJCSNZLLJPJQDCCLCJXMYZFTSXGCGSBRZXJQQCTZHGYQTJQQLZXJYLYLBCYAMCSTYLPDJBYREGKLZYZHLYSZQLZNWCZCLLWJQJJJKDGJZOLBBZPPGLGHTGZXYGHZMYCNQSYCYHBHGXKAMTXYXNBSKYZZGJZLQJDFCJXDYGJQJJPMGWGJJJPKQSBGBMMCJSSCLPQPDXCDYYKY[CJDDYYGYWRHJRTGZNYQLDKLJSZZGZQZJGDYKSHPZMTLCPWNJAFYZDJCNMWESCYGLBTZCGMSSLLYXQSXSBSJSBBSGGHFJLYPMZJNLYYWDQSHZXTYYWHMZYHYWDBXBTLMSYYYFSXJC[DXXLHJHF[SXZQHFZMZCZTQCXZXRTTDJHNNYZQQMNQDMMG[YDXMJGDHCDYZBFFALLZTDLTFXMXQZDNGWQDBDCZJDXBZGSQQDDJCMBKZFFXMKDMDSYYSZCMLJDSYNSBRSKMKMPCKLGDBQTFZSWTFGGLYPLLJZHGJ[GYPZLTCSMCNBTJBQFKTHBYZGKPBBYMTDSSXTBNPDKLEYCJNYDDYKZDDHQHSDZSCTARLLTKZLGECLLKJLQJAQNBDKKGHPJTZQKSECSHALQFMMGJNLYJBBTMLYZXDCJPLDLPCQDHZYCBZSCZBZMSLJFLKRZJSNFRGJHXPDHYJYBZGDLQCSEZGXLBLGYXTWMABCHECMWYJYZLLJJYHLG[DJLSLYGKDZPZXJYYZLWCXSZFGWYYDLYHCLJSCMBJHBLYZLYCBLYDPDQYSXQZBYTDKYXJY[CNRJMPDJGKLCLJBCTBJDDBBLBLCZQRPPXJCJLZCSHLTOLJNMDDDLNGKAQHQHJGYKHEZNMSHRP[QQJCHGMFPRXHJGDYCHGHLYRZQLCYQJNZSQTKQJYMSZSWLCFQQQXYFGGYPTQWLMCRNFKKFSYYLQBMQAMMMYXCTPSHCPTXXZZSMPHPSHMCLMLDQFYQXSZYYDYJZZHQPDSZGLSTJBCKBXYQZJSGPSXQZQZRQTBDKYXZKHHGFLBCSMDLDGDZDBLZYYCXNNCSYBZBFGLZZXSWMSCCMQNJQSBDQSJTXXMBLTXZCLZSHZCXRQJGJYLXZFJPHYMZQQYDFQJJLZZNZJCDGZYGCTXMZYSCTLKPHTXHTLBJXJLXSCDQXCBBTJFQZFSLTJBTKQBXXJJLJCHCZDBZJDCZJDCPRNPQCJPFCZLCLZXZDMXMPHJSGZGSZZQLYLWTJPFSYASMCJBTZKYCWMYTCSJJLJCQLWZMALBXYFBPNLSFHTGJWEJJXXGLLJSTGSHJQLZFKCGNNNSZFDEQFHBSAQTGYLBXMMYGSZLDYDQMJJRGBJTKGDHGKBLQKBDMBYLXWCXYTTYBKMRTJZXQJBHLMHMJJZMQASLDCYXYQDLQCAFYWYXQHZ"
//Get it !
ls_ReturnStr = ""
For i=1 to Len(as_InputString) //依次處理as_InputString中每個字符
ls_ch=Mid(as_InputString , i , 1)
If Asc(ls_ch)<128 then // 非漢字
ls_returnStr = ls_returnStr+ls_ch // 不變
Else // 是漢字
ls_ch = Mid(as_InputString , i , 2) // 取出此漢字
li_SectorCode = Asc(Left(ls_ch, 1)) - 160 //區(qū)碼
li_PositionCode = Asc(Right(ls_ch, 1)) - 160 //位碼
li_SecPosCode = li_SectorCode*100 + li_PositionCode // 區(qū)位碼
If li_SecPosCode>1600 and li_SecPosCode<5590 then // 第一個字符
For j=23 to 1 Step -1 // 找聲母
If li_SecPosCode>=li_SecPosValue[j] then
ls_returnStr = ls_returnStr + lc_FirstLetter[j]
Exit
End if
Next
Else // 第一個字符
li_offset = (li_SectorCode - 56 ) *94 + li_PositionCode - 1 // 計算偏移量
If li_offset>=0 and li_offset<=3007 then //二區(qū)漢字
ls_returnStr = ls_returnStr + Mid(ls_SecondSecTable, li_offset , 1) //取出此字聲母
End if
End if
i = i+1 // 指向下一個漢字
End if
Next // 處理完畢
//Return result
Return lower( ls_returnStr ) //返回 as_InputString 的聲母串
end function
$PBExportHeader$uf_getfirstletter.srf
$PBExportComments$ 返回給定漢字串的首字母串, xuejun , 19990821
global type uf_getfirstletter from function_object
end type
forward prototypes
global function string uf_getfirstletter (string as_inputstring)
end prototypes
global function string uf_getfirstletter (string as_inputstring);//Function name : uf_GetFirstLetter
//Used to : 返回給定漢字串的首字母串,即聲母串
//Input Arguments: as_InputString - string , 給定的漢字串
//Return Value : ls_ReturnString - String , 給定的漢字串的聲母串,一律為小寫
//Notice : 1. 此方法基于漢字的國標(biāo)漢字庫區(qū)位編碼的有效性,不符合此編碼的
// 系統(tǒng)此函數(shù)無效!
// 2. 若漢字串含有非漢字字符,如圖形符號或ASCII碼,則這些非漢字字符
// 將保持不變.
//Sample : ls_rtn = uf_GetFirstLetter("中華人民共和國")
// ls_rtn will be : zhrmghg
//Scripts:
char lc_FirstLetter[23] //存放國標(biāo)一級漢字不同讀音的起始區(qū)位碼對應(yīng)讀音
string ls_ch //臨時單元
string ls_SecondSecTable //存放所有國標(biāo)二級漢字讀音
string ls_ReturnStr //返回串
integer li_SecPosValue[23] //存放國標(biāo)一級漢字不同讀音的起始區(qū)位碼
integer i , j
integer li_SectorCode //漢字區(qū)碼
integer li_PositionCode //漢字位碼
integer li_SecPosCode //漢字區(qū)位碼
integer li_offset //二級字庫偏移量
//Set initial value
li_SecPosValue[]={1601,1637,1833,2078,2274,2302,2433,2594,2787,3106,3212,3472,3635,3722,3730,3858,4027,4086,4390,4558,4684,4925,5249 }
lc_FirstLetter[] = {"A", "B","C","D","E","F","G","H","J","K","L","M","N","O","P","Q","R","S","T","W","X","Y","Z"}</P><P> ls_SecondSecTable="CJWGNSPGCGNE[Y[BTYYZDXYKYGT[JNNJQMBSGZSCYJSYY[PGKBZGY[YWJKGKLJYWKPJQHY[W[DZLSGMRYPYWWCCKZNKYYGTTNJJNYKKZYTCJNMCYLQLYPYQFQRPZSLWBTGKJFYXJWZLTBNCXJJJJTXDTTSQZYCDXXHGCK[PHFFSS[YBGXLPPBYLL[HLXS[ZM[JHSOJNGHDZQYKLGJHSGQZHXQGKEZZWYSCSCJXYEYXADZPMDSSMZJZQJYZC[J[WQJBYZPXGZNZCPWHKXHQKMWFBPBYDTJZZKQHYLYGXFPTYJYYZPSZLFCHMQSHGMXXSXJ[[DCSBBQBEFSJYHXWGZKPYLQBGLDLCCTNMAYDDKSSNGYCSGXLYZAYBNPTSDKDYLHGYMYLCXPY[JNDQJWXQXFYYFJLEJPZRXCCQWQQSBNKYMGPLBMJRQCFLNYMYQMSQYRBCJTHZTQFRXQHXMJJCJLXQGJMSHZKBSWYEMYLTXFSYDSWLYCJQXSJNQBSCTYHBFTDCYZDJWYGHQFRXWCKQKXEBPTLPXJZSRMEBWHJLBJSLYYSMDXLCLQKXLHXJRZJMFQHXHWYWSBHTRXXGLHQHFNM[YKLDYXZPYLGG[MTCFPAJJZYLJTYANJGBJPLQGDZYQYAXBKYSECJSZNSLYZHSXLZCGHPXZHZNYTDSBCJKDLZAYFMYDLEBBGQYZKXGLDNDNYSKJSHDLYXBCGHXYPKDJMMZNGMMCLGWZSZXZJFZNMLZZTHCSYDBDLLSCDDNLKJYKJSYCJLKWHQASDKNHCSGANHDAASHTCPLCPQYBSDMPJLPZJOQLCDHJJYSPRCHN[NNLHLYYQYHWZPTCZGWWMZFFJQQQQYXACLBHKDJXDGMMYDJXZLLSYGXGKJRYWZWYCLZMSSJZLDBYD[FCXYHLXCHYZJQ[[QAGMNYXPFRKSSBJLYXYSYGLNSCMHZWWMNZJJLXXHCHSY[[TTXRYCYXBYHCSMXJSZNPWGPXXTAYBGAJCXLY[DCCWZOCWKCCSBNHCPDYZNFCYYTYCKXKYBSQKKYTQQXFCWCHCYKELZQBSQYJQCCLMTHSYWHMKTLKJLYCXWHEQQHTQH[PQ[QSCFYMNDMGBWHWLGSLLYSDLMLXPTHMJHWLJZYHZJXHTXJLHXRSWLWZJCBXMHZQXSDZPMGFCSGLSXYMJSHXPJXWMYQKSMYPLRTHBXFTPMHYXLCHLHLZYLXGSSSSTCLSLDCLRPBHZHXYYFHB[GDMYCNQQWLQHJJ[YWJZYEJJDHPBLQXTQKWHLCHQXAGTLXLJXMSL[HTZKZJECXJCJNMFBY[SFYWYBJZGNYSDZSQYRSLJPCLPWXSDWEJBJCBCNAYTWGMPAPCLYQPCLZXSBNMSGGFNZJJBZSFZYNDXHPLQKZCZWALSBCCJX[YZGWKYPSGXFZFCDKHJGXDLQFSGDSLQWZKXTMHSBGZMJZRGLYJBPMLMSXLZJQQHZYJCZYDJWBMYKLDDPMJEGXYHYLXHLQYQHKYCWCJMYYXNATJHYCCXZPCQLBZWWYTWBQCMLPMYRJCCCXFPZNZZLJPLXXYZTZLGDLDCKLYRZZGQTGJHHGJLJAXFGFJZSLCFDQZLCLGJDJCSNZLLJPJQDCCLCJXMYZFTSXGCGSBRZXJQQCTZHGYQTJQQLZXJYLYLBCYAMCSTYLPDJBYREGKLZYZHLYSZQLZNWCZCLLWJQJJJKDGJZOLBBZPPGLGHTGZXYGHZMYCNQSYCYHBHGXKAMTXYXNBSKYZZGJZLQJDFCJXDYGJQJJPMGWGJJJPKQSBGBMMCJSSCLPQPDXCDYYKY[CJDDYYGYWRHJRTGZNYQLDKLJSZZGZQZJGDYKSHPZMTLCPWNJAFYZDJCNMWESCYGLBTZCGMSSLLYXQSXSBSJSBBSGGHFJLYPMZJNLYYWDQSHZXTYYWHMZYHYWDBXBTLMSYYYFSXJC[DXXLHJHF[SXZQHFZMZCZTQCXZXRTTDJHNNYZQQMNQDMMG[YDXMJGDHCDYZBFFALLZTDLTFXMXQZDNGWQDBDCZJDXBZGSQQDDJCMBKZFFXMKDMDSYYSZCMLJDSYNSBRSKMKMPCKLGDBQTFZSWTFGGLYPLLJZHGJ[GYPZLTCSMCNBTJBQFKTHBYZGKPBBYMTDSSXTBNPDKLEYCJNYDDYKZDDHQHSDZSCTARLLTKZLGECLLKJLQJAQNBDKKGHPJTZQKSECSHALQFMMGJNLYJBBTMLYZXDCJPLDLPCQDHZYCBZSCZBZMSLJFLKRZJSNFRGJHXPDHYJYBZGDLQCSEZGXLBLGYXTWMABCHECMWYJYZLLJJYHLG[DJLSLYGKDZPZXJYYZLWCXSZFGWYYDLYHCLJSCMBJHBLYZLYCBLYDPDQYSXQZBYTDKYXJY[CNRJMPDJGKLCLJBCTBJDDBBLBLCZQRPPXJCJLZCSHLTOLJNMDDDLNGKAQHQHJGYKHEZNMSHRP[QQJCHGMFPRXHJGDYCHGHLYRZQLCYQJNZSQTKQJYMSZSWLCFQQQXYFGGYPTQWLMCRNFKKFSYYLQBMQAMMMYXCTPSHCPTXXZZSMPHPSHMCLMLDQFYQXSZYYDYJZZHQPDSZGLSTJBCKBXYQZJSGPSXQZQZRQTBDKYXZKHHGFLBCSMDLDGDZDBLZYYCXNNCSYBZBFGLZZXSWMSCCMQNJQSBDQSJTXXMBLTXZCLZSHZCXRQJGJYLXZFJPHYMZQQYDFQJJLZZNZJCDGZYGCTXMZYSCTLKPHTXHTLBJXJLXSCDQXCBBTJFQZFSLTJBTKQBXXJJLJCHCZDBZJDCZJDCPRNPQCJPFCZLCLZXZDMXMPHJSGZGSZZQLYLWTJPFSYASMCJBTZKYCWMYTCSJJLJCQLWZMALBXYFBPNLSFHTGJWEJJXXGLLJSTGSHJQLZFKCGNNNSZFDEQFHBSAQTGYLBXMMYGSZLDYDQMJJRGBJTKGDHGKBLQKBDMBYLXWCXYTTYBKMRTJZXQJBHLMHMJJZMQASLDCYXYQDLQCAFYWYXQHZ"
//Get it !
ls_ReturnStr = ""
For i=1 to Len(as_InputString) //依次處理as_InputString中每個字符
ls_ch=Mid(as_InputString , i , 1)
If Asc(ls_ch)<128 then // 非漢字
ls_returnStr = ls_returnStr+ls_ch // 不變
Else // 是漢字
ls_ch = Mid(as_InputString , i , 2) // 取出此漢字
li_SectorCode = Asc(Left(ls_ch, 1)) - 160 //區(qū)碼
li_PositionCode = Asc(Right(ls_ch, 1)) - 160 //位碼
li_SecPosCode = li_SectorCode*100 + li_PositionCode // 區(qū)位碼
If li_SecPosCode>1600 and li_SecPosCode<5590 then // 第一個字符
For j=23 to 1 Step -1 // 找聲母
If li_SecPosCode>=li_SecPosValue[j] then
ls_returnStr = ls_returnStr + lc_FirstLetter[j]
Exit
End if
Next
Else // 第一個字符
li_offset = (li_SectorCode - 56 ) *94 + li_PositionCode - 1 // 計算偏移量
If li_offset>=0 and li_offset<=3007 then //二區(qū)漢字
ls_returnStr = ls_returnStr + Mid(ls_SecondSecTable, li_offset , 1) //取出此字聲母
End if
End if
i = i+1 // 指向下一個漢字
End if
Next // 處理完畢
//Return result
Return lower( ls_returnStr ) //返回 as_InputString 的聲母串
end function