python 列表操作求助

ai527518 2012-06-07 10:55:31
> 13:39:41.990623 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [S]
> 13:39:41.990650 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:41.990652 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:42.016317 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:42.016322 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:42.016324 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:42.016774 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:42.016878 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:42.016886 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:42.016888 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:42.016891 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:42.016894 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:42.018229 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [P.]
> 13:39:42.372091 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:42.699621 IP 192.168.90.20.1097 > 113.107.45.134.80: Flags [S]
> 13:39:42.710105 IP 192.168.90.20.1097 > 113.107.45.134.80: Flags [.]
> 13:39:42.710353 IP 192.168.90.20.1097 > 113.107.45.134.80: Flags [P.]
> 13:39:42.721820 IP 192.168.90.20.1097 > 113.107.45.134.80: Flags [.]
> 13:39:42.721851 IP 192.168.90.20.1097 > 113.107.45.134.80: Flags [S]
> 13:39:42.755950 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [P.]
> 13:39:42.756030 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [P.]
> 13:39:43.137791 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]

数据格式如上
第一步 把IP和Flags之间的数据 相同的归类到一起
第二步 以[S]为分割 将数据分隔开
第三步 对分割开的数据 取时间的最大值和最小值进行做差 即把[S]之间的数据的时间进行做差
最后输出ip x.x.x.x > x.x.x.x 时间差

之前某位大牛帮我写过一个类似的 试着修改 不过还是没搞明白是怎么把IP相同的归类到一起

下面是之前某大牛帮写的类似的

#!/usr/bin/python
# encoding: utf-8

import re
import datetime
import sys

patt = re.compile(r'''
(?P<dt>\d{4}\-\d{2}\-\d{2}\s\d{2}:\d{2}:\d{2})\s
(?P<src>\d+(\.\d+){3})\s
(?P<tag>\d+(\.\d+){3})\s
(?P<port>\d+)
''', re.I|re.U|re.X)

def dataReader(filename):
with open(filename, 'rt') as handle:
for ln in handle:
m = patt.match(ln.strip())
if m:
yield m.groupdict()
else: continue

def s2dt(s, fmt='%Y-%m-%d %H:%M:%S'):
return datetime.datetime.strptime(s, fmt)

def dataCollector(filename):
collector = {}
for d in dataReader(filename):
collector.setdefault(
(d['src'],d['tag'],d['port']),[]
).append(s2dt(d['dt']))
return collector

def delta(timelist):
timelist.sort()
dlist = []
t0 = timelist.pop(0)
for t in timelist:
d = (t - t0).total_seconds()
t0 = t
if d < 10:
continue
dlist.append(d)
return countdlist(dlist)


def countdlist(dlist):
dd, totalcnt = {}, 0
for d in dlist:
totalcnt += 1
dd.setdefault(d,[]).append(d)
lst = [(len(dd[d]),d) for d in dd]
if not lst:
return None
lst.sort()
cnt, dur = lst[-1]
return cnt, dur, '%.2f'%(100.*cnt/totalcnt)


for category, timelist in dataCollector(r'/home/ip.data').items():
#print (timelist)]
buff = delta(timelist)
if buff != None:
print category, buff

...全文
172 4 打赏 收藏 转发到动态 举报
AI 作业
写回复
用AI写文章
4 条回复
切换为时间正序
请发表友善的回复…
发表回复
Rlay_2 2012-06-07
  • 打赏
  • 举报
回复
函数里面有setdefault函数,将IP相同的内容放到1个list里面去的,给个例子:

from time import ctime
IP = ['10.85.141.84', '10.85.141.85', '10.85.141.84', '10.85.141.85', '10.85.141.86']
ipdict = {}
for ip in IP:
ipdict.setdefault(ip, []).append(ctime())

print ipdict

结果:
{'10.85.141.86': ['Thu Jun 07 15:52:47 2012'], '10.85.141.84': ['Thu Jun 07 15:52:47 2012', 'Thu Jun 07 15:52:47 2012'], '10.85.141.85': ['Thu Jun 07 15:52:47 2012', 'Thu Jun 07 15:52:47 2012']}
tim_spac_126 2012-06-07
  • 打赏
  • 举报
回复
#!/usr/bin/python
# encoding: utf-8

content = '''
> 13:39:41.990623 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [S]
> 13:39:41.990650 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:41.990652 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:42.016317 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:42.016322 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:42.016324 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:42.016774 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:42.016878 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:42.016886 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:42.016888 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:42.016891 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:42.016894 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:42.018229 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [P.]
> 13:39:42.372091 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:42.699621 IP 192.168.90.20.1097 > 113.107.45.134.80: Flags [S]
> 13:39:42.710105 IP 192.168.90.20.1097 > 113.107.45.134.80: Flags [.]
> 13:39:42.710353 IP 192.168.90.20.1097 > 113.107.45.134.80: Flags [P.]
> 13:39:42.721820 IP 192.168.90.20.1097 > 113.107.45.134.80: Flags [.]
> 13:39:42.721851 IP 192.168.90.20.1097 > 113.107.45.134.80: Flags [S]
> 13:39:42.755950 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [P.]
> 13:39:42.756030 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [P.]
> 13:39:43.137791 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]

最后输出ip x.x.x.x > x.x.x.x 时间差

'''
import re
import datetime
import sys

patt = re.compile(r'''\>\s
(?P<dt>\d{2}:\d{2}:\d{2}\.\d+)\sIP\s
(?P<category>\d+(\.\d+){4}\s\>\s\d+(\.\d+){4})\:\sFlags\s
\[(?P<flag>[SP\.])\]
''', re.I|re.U|re.X)

def each():
# 解析数据;可按实际情况改为传入文件名,从文件读取
for m in map(patt.match, content.splitlines()):
if not m: continue
yield m.groupdict()

def datagroup():
# 数据分组
collections = []
for d in each():
if d['flag'] == 'S':
# 以[S]为分割 将数据分隔开
collections.append({})
# 把IP和Flags之间的数据 相同的归类到一起
collections[-1].setdefault(d['category'],[]).append(d['dt'])
return collections

def static():
# 数据统计
for dl in datagroup():
for category, timepoints in dl.items():
# 时间的最大值和最小值进行做差
print '%s\t%s ~ %s'%(
category, min(timepoints), max(timepoints)
)
print '-'*32

if __name__ == '__main__':
static()


>python -u "test.py"
192.168.90.20.1096 > 65.54.51.253.443 13:39:41.990623 ~ 13:39:42.372091
--------------------------------
192.168.90.20.1097 > 113.107.45.134.80 13:39:42.699621 ~ 13:39:42.721820
--------------------------------
192.168.90.20.1096 > 65.54.51.253.443 13:39:43.137791 ~ 13:39:43.137791
192.168.90.20.1097 > 113.107.45.134.80 13:39:42.721851 ~ 13:39:42.721851
--------------------------------
>Exit code: 0 Time: 0.090
ai527518 2012-06-07
  • 打赏
  • 举报
回复
呵呵 用mysql自带的函数搞定了 还是谢谢LS
bugs2k 2012-06-07
  • 打赏
  • 举报
回复
#!/usr/bin/env python

from datetime import datetime, timedelta

with open('data.txt') as fd:
adict = {}
for line in fd:
data = line.split()
ntime, sip, op, dip, flag = data[1], data[3], data[4], data[5], data[7]
if flag == '[S]':
akey = "%s => %s" % (sip, dip)
ntime = datetime.strptime(ntime, '%H:%M:%S.%f')
if akey in adict:
otime = adict[akey]
dur = ntime - otime
print akey, dur
adict[akey] = ntime

37,743

社区成员

发帖
与我相关
我的任务
社区描述
JavaScript,VBScript,AngleScript,ActionScript,Shell,Perl,Ruby,Lua,Tcl,Scala,MaxScript 等脚本语言交流。
社区管理员
  • 脚本语言(Perl/Python)社区
  • WuKongSecurity@BOB
加入社区
  • 近7日
  • 近30日
  • 至今

试试用AI创作助手写篇文章吧