37,718
社区成员
发帖
与我相关
我的任务
分享
#!/usr/bin/python
# -*- coding: utf-8 -*-
我会这么写
#!/usr/bin/env python
#-*- coding=utf-8 -*-
#!/usr/bin/python
# -*- coding: utf-8 -*-
import re
s = '!@#4A5^&*9&^4DFGhj<论坛>\'ds\\regex\"\'__>Z...*^8,-0-=._'
#用正则表达式
def reprocess_line(sText):
template1 = re.compile('[^a-zA-Z0-9\.,]')
template2 = re.compile('\d')
sText = re.sub(template1, '',sText)
sText = re.sub(template2, '0', sText)
return sText.lower()
#最直观的
def reprocess_line1(sText):
#如果默认是utf-8
sText = unicode(sText,'utf-8')
newText = ''
for i in sText:
#过滤
if (ord(i) >= 65 and ord(i) < (65^26)):
newText += i
if (ord(i) >= 97 and ord(i) < (97^26)):
newText += i
if (ord(i) >= 48 and ord(i) < (48^10)):
newText += '0'
if (i=='.' or i==','):
newText += i
return newText.lower()
print reprocess_line(s)
print reprocess_line1(s)