37,720
社区成员
发帖
与我相关
我的任务
分享
# coding=utf-8
import re
string = u"应用 application software 软件"
rlt = re.findall(u"[\u4e00-\u9fa5]+", string)
print(rlt)
for value in rlt:
print(value)
# 输出
# [u'\u5e94\u7528', u'\u8f6f\u4ef6']
# 应用
# 软件
# -*- coding: utf-8 -*-
"""
Spyder Editor
write data to xls,2016.7.24
主要实现将一些其他格式的数据转化成,图灵机器人可以识别的xls格式数据
"""
import os
import xlwt
import re
knowledge = []
def set_style(name,height,bold = False):
style = xlwt.XFStyle() #初始化样式
font = xlwt.Font() #为样式创建字体
font.name = name
font.bold = bold
font.color_index = 4
font.height = height
style.font = font
return style
def write_excel(knowledge):
#创建xls工作薄
workbook = xlwt.Workbook(encoding = 'utf-8')
#创建sheet
data_sheet = workbook.add_sheet('first')
for i in range(len(knowledge)):
data_sheet.write(i,0,knowledge[i][0])
data_sheet.write(i,1,knowledge[i][1])
#保存文件
workbook.save('answer.xls')
print "successful write!"
x=xlwt.Workbook()
s1=x.add_sheet('sheet1')
if __name__ == '__main__':
info = open("know.txt")
print "中文"
#a = info.readlines()
#print a
for line in info:
line = line.decode('GB2312').encode('utf-8')
#print line 这块也可以整行进行拆分
# letter_str = re.findall(r'([a-zA-Z]+)',line,re.MULTILINE)
#hanzi_str = re.findall(r"([\x80-\xff]+)", line,re.MULTILINE)
#找到第一个出现汉字字符的位置,进行截断,分成两部分,分别写到两列中
hanstr = ''
yingstr = ''
index = 0
for i in line:
an = re.match(r"([\x80-\xff]+)", i)#判断一下是中文
if an:
break
else:
index = index +1
yingstr = line[0:index]
hanstr = line[index:len(line)]
print index
print hanstr
str = [yingstr,hanstr]
knowledge.append(str)
write_excel(knowledge)
'''
下面使用库xlwt进行操作excel文件的一些代码,希望大家有空能够用到
style1=xlwt.XFStyle() #样式类
style1.font.colour_index=30 #字体颜色前景色为红
style1.font.bold=True #粗体
style1.pattern.pattern=1 #填充solid
style1.pattern.pattern_fore_colour=2 #填充颜色红色
style2=xlwt.easyxf('font:italic on;pattern:pattern solid,fore-colour yellow')
#快速生成样式
#参数字符串格式:
#"class1:key1 value1,key2 value2;class2:k1 v1,k2 v2;"
s1.write(0,0,"Hello",style1)#写入字符串
s1.write(1,0,True,style2)#写入真值
s1.write(2,0,3.1415926);s1.write(2,1,-5);s1.write(2,2,xlwt.Formula("2*A3*ABS(B3)"));
#使用公式计算
s1.write(3,0,'right',xlwt.easyxf("align:horiz right"))
#调整对齐方式
x.save('example.xls') #保存
'''
re.findall("[\u4e00-\u9fa5]+", "application software应用软件")
试试