37,743
社区成员




#!/usr/bin/python
# encoding: utf-8
import re
import datetime
import sys
patt = re.compile(r'''
(?P<dt>\d{4}\-\d{2}\-\d{2}\s\d{2}:\d{2}:\d{2})\s
(?P<src>\d+(\.\d+){3})\s
(?P<tag>\d+(\.\d+){3})\s
(?P<port>\d+)
''', re.I|re.U|re.X)
def dataReader(filename):
with open(filename, 'rt') as handle:
for ln in handle:
m = patt.match(ln.strip())
if m:
yield m.groupdict()
else: continue
def s2dt(s, fmt='%Y-%m-%d %H:%M:%S'):
return datetime.datetime.strptime(s, fmt)
def dataCollector(filename):
collector = {}
for d in dataReader(filename):
collector.setdefault(
(d['src'],d['tag'],d['port']),[]
).append(s2dt(d['dt']))
return collector
def delta(timelist):
timelist.sort()
dlist = []
t0 = timelist.pop(0)
for t in timelist:
d = (t - t0).total_seconds()
t0 = t
if d < 10:
continue
dlist.append(d)
return countdlist(dlist)
def countdlist(dlist):
dd, totalcnt = {}, 0
for d in dlist:
totalcnt += 1
dd.setdefault(d,[]).append(d)
lst = [(len(dd[d]),d) for d in dd]
if not lst:
return None
lst.sort()
cnt, dur = lst[-1]
return cnt, dur, '%.2f'%(100.*cnt/totalcnt)
for category, timelist in dataCollector(r'/home/ip.data').items():
#print (timelist)]
buff = delta(timelist)
if buff != None:
print category, buff
from time import ctime
IP = ['10.85.141.84', '10.85.141.85', '10.85.141.84', '10.85.141.85', '10.85.141.86']
ipdict = {}
for ip in IP:
ipdict.setdefault(ip, []).append(ctime())
print ipdict
结果:
{'10.85.141.86': ['Thu Jun 07 15:52:47 2012'], '10.85.141.84': ['Thu Jun 07 15:52:47 2012', 'Thu Jun 07 15:52:47 2012'], '10.85.141.85': ['Thu Jun 07 15:52:47 2012', 'Thu Jun 07 15:52:47 2012']}
#!/usr/bin/python
# encoding: utf-8
content = '''
> 13:39:41.990623 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [S]
> 13:39:41.990650 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:41.990652 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:42.016317 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:42.016322 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:42.016324 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:42.016774 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:42.016878 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:42.016886 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:42.016888 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:42.016891 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:42.016894 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:42.018229 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [P.]
> 13:39:42.372091 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:42.699621 IP 192.168.90.20.1097 > 113.107.45.134.80: Flags [S]
> 13:39:42.710105 IP 192.168.90.20.1097 > 113.107.45.134.80: Flags [.]
> 13:39:42.710353 IP 192.168.90.20.1097 > 113.107.45.134.80: Flags [P.]
> 13:39:42.721820 IP 192.168.90.20.1097 > 113.107.45.134.80: Flags [.]
> 13:39:42.721851 IP 192.168.90.20.1097 > 113.107.45.134.80: Flags [S]
> 13:39:42.755950 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [P.]
> 13:39:42.756030 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [P.]
> 13:39:43.137791 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
最后输出ip x.x.x.x > x.x.x.x 时间差
'''
import re
import datetime
import sys
patt = re.compile(r'''\>\s
(?P<dt>\d{2}:\d{2}:\d{2}\.\d+)\sIP\s
(?P<category>\d+(\.\d+){4}\s\>\s\d+(\.\d+){4})\:\sFlags\s
\[(?P<flag>[SP\.])\]
''', re.I|re.U|re.X)
def each():
# 解析数据;可按实际情况改为传入文件名,从文件读取
for m in map(patt.match, content.splitlines()):
if not m: continue
yield m.groupdict()
def datagroup():
# 数据分组
collections = []
for d in each():
if d['flag'] == 'S':
# 以[S]为分割 将数据分隔开
collections.append({})
# 把IP和Flags之间的数据 相同的归类到一起
collections[-1].setdefault(d['category'],[]).append(d['dt'])
return collections
def static():
# 数据统计
for dl in datagroup():
for category, timepoints in dl.items():
# 时间的最大值和最小值进行做差
print '%s\t%s ~ %s'%(
category, min(timepoints), max(timepoints)
)
print '-'*32
if __name__ == '__main__':
static()
#!/usr/bin/env python
from datetime import datetime, timedelta
with open('data.txt') as fd:
adict = {}
for line in fd:
data = line.split()
ntime, sip, op, dip, flag = data[1], data[3], data[4], data[5], data[7]
if flag == '[S]':
akey = "%s => %s" % (sip, dip)
ntime = datetime.strptime(ntime, '%H:%M:%S.%f')
if akey in adict:
otime = adict[akey]
dur = ntime - otime
print akey, dur
adict[akey] = ntime