37,719
社区成员
发帖
与我相关
我的任务
分享
--Coder Кефин Ханович Китанский
--2015-5
--民国一百零四年 后清和谐十年
local function readfilebylines(hfilehandle)
for tlinedata in hfilehandle:lines() do
local tuslinedata=Utf8to32(tlinedata)
local wordlooper
local bannedword={}
for wordlooper=1,#tuslinedata do
if tuslinedata[wordlooper]==12289 then--分隔的顿号
vappendtoDict(bannedword)--
bannedword={}
else
table.insert(bannedword,tuslinedata[wordlooper])
end-- the if statement
end--for each chars
end--for lines
end--function
local function main()
local hfilehandle=(io.open("test.txt","r"))
hfilehandle:read(3)--remove the BOM at opening EF BB BF
readfilebylines(hfilehandle)
end
--http://lua-users.org/wiki/LuaUnicode
function Utf8to32(utf8str)
assert(type(utf8str) == "string")
local res, seq, val = {}, 0, nil
for i = 1, #utf8str do
local c = string.byte(utf8str, i)
if seq == 0 then
table.insert(res, val)
seq = c < 0x80 and 1 or c < 0xE0 and 2 or c < 0xF0 and 3 or
c < 0xF8 and 4 or --c < 0xFC and 5 or c < 0xFE and 6 or
error("invalid UTF-8 character sequence")
val = bit32.band(c, 2^(8-seq) - 1)
else
val = bit32.bor(bit32.lshift(val, 6), bit32.band(c, 0x3F))
end
seq = seq - 1
end
table.insert(res, val)
table.insert(res, 0)
return res
end
--when you are to print them
function tounicode(decimal)
local bytemarkers = { {0x7FF,192}, {0xFFFF,224}, {0x1FFFFF,240} }
if decimal<128 then return string.char(decimal) end
local charbytes = {}
local charorder={}
for bytes,vals in ipairs(bytemarkers) do
if decimal<=vals[1] then
for b=bytes+1,2,-1 do
local mod = decimal%64
decimal = (decimal-mod)/64
charbytes[b] = string.char(128+mod)
charorder[b]=128+mod
end
charbytes[1] = string.char(vals[2]+decimal)
charorder[1]=vals[2]+decimal
break
end
end
return table.concat(charbytes)
end
--http://stackoverflow.com/questions/7983574/how-to-write-a-unicode-symbol-in-lua