python 列表操作求助

ai527518 2012-06-07 10:55:31

> 13:39:41.990623 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [S]
> 13:39:41.990650 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:41.990652 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:42.016317 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:42.016322 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:42.016324 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:42.016774 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:42.016878 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:42.016886 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:42.016888 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:42.016891 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:42.016894 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:42.018229 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [P.]
> 13:39:42.372091 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]
> 13:39:42.699621 IP 192.168.90.20.1097 > 113.107.45.134.80: Flags [S]
> 13:39:42.710105 IP 192.168.90.20.1097 > 113.107.45.134.80: Flags [.]
> 13:39:42.710353 IP 192.168.90.20.1097 > 113.107.45.134.80: Flags [P.]
> 13:39:42.721820 IP 192.168.90.20.1097 > 113.107.45.134.80: Flags [.]
> 13:39:42.721851 IP 192.168.90.20.1097 > 113.107.45.134.80: Flags [S]
> 13:39:42.755950 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [P.]
> 13:39:42.756030 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [P.]
> 13:39:43.137791 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]

数据格式如上
第一步把IP和Flags之间的数据相同的归类到一起
第二步以[S]为分割将数据分隔开
第三步对分割开的数据取时间的最大值和最小值进行做差即把[S]之间的数据的时间进行做差
最后输出ip x.x.x.x > x.x.x.x 时间差

之前某位大牛帮我写过一个类似的试着修改不过还是没搞明白是怎么把IP相同的归类到一起

下面是之前某大牛帮写的类似的

#!/usr/bin/python

# encoding: utf-8



import re

import datetime

import sys



patt = re.compile(r'''

  (?P<dt>\d{4}\-\d{2}\-\d{2}\s\d{2}:\d{2}:\d{2})\s

  (?P<src>\d+(\.\d+){3})\s

  (?P<tag>\d+(\.\d+){3})\s

  (?P<port>\d+)

  ''', re.I|re.U|re.X)



def dataReader(filename):

    with open(filename, 'rt') as handle:

        for ln in handle:

            m = patt.match(ln.strip())

            if m:

                yield m.groupdict()

            else: continue



def s2dt(s, fmt='%Y-%m-%d %H:%M:%S'):

    return datetime.datetime.strptime(s, fmt)



def dataCollector(filename):

    collector = {}

    for d in dataReader(filename):

        collector.setdefault(

            (d['src'],d['tag'],d['port']),[]

        ).append(s2dt(d['dt']))

    return collector



def delta(timelist):

    timelist.sort()

    dlist = []

    t0 = timelist.pop(0)

    for t in timelist:

        d = (t - t0).total_seconds()

        t0 = t

        if d < 10:

            continue

        dlist.append(d)

    return countdlist(dlist)





def countdlist(dlist):

    dd, totalcnt = {}, 0

    for d in dlist:

        totalcnt += 1

        dd.setdefault(d,[]).append(d)

    lst = [(len(dd[d]),d) for d in dd]

    if not lst:

        return None

    lst.sort()

    cnt, dur = lst[-1]

    return cnt, dur, '%.2f'%(100.*cnt/totalcnt)





for category, timelist in dataCollector(r'/home/ip.data').items():

    #print (timelist)]

    buff = delta(timelist)

    if buff != None:

        print category, buff

...全文

172 4 打赏收藏转发到动态举报

写回复

用AI写文章

4 条回复

切换为时间正序

请发表友善的回复…

发表回复

Rlay_2 2012-06-07

打赏
举报

函数里面有setdefault函数,将IP相同的内容放到1个list里面去的,给个例子:



    from time import ctime

    IP = ['10.85.141.84', '10.85.141.85', '10.85.141.84', '10.85.141.85', '10.85.141.86']

    ipdict = {}

    for ip in IP:

        ipdict.setdefault(ip, []).append(ctime())

        

    print ipdict



结果:

{'10.85.141.86': ['Thu Jun 07 15:52:47 2012'], '10.85.141.84': ['Thu Jun 07 15:52:47 2012', 'Thu Jun 07 15:52:47 2012'], '10.85.141.85': ['Thu Jun 07 15:52:47 2012', 'Thu Jun 07 15:52:47 2012']}

tim_spac_126 2012-06-07

打赏
举报

#!/usr/bin/python

# encoding: utf-8



content = '''

> 13:39:41.990623 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [S]

> 13:39:41.990650 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]

> 13:39:41.990652 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]

> 13:39:42.016317 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]

> 13:39:42.016322 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]

> 13:39:42.016324 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]

> 13:39:42.016774 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]

> 13:39:42.016878 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]

> 13:39:42.016886 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]

> 13:39:42.016888 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]

> 13:39:42.016891 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]

> 13:39:42.016894 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]

> 13:39:42.018229 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [P.]

> 13:39:42.372091 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]

> 13:39:42.699621 IP 192.168.90.20.1097 > 113.107.45.134.80: Flags [S]

> 13:39:42.710105 IP 192.168.90.20.1097 > 113.107.45.134.80: Flags [.]

> 13:39:42.710353 IP 192.168.90.20.1097 > 113.107.45.134.80: Flags [P.]

> 13:39:42.721820 IP 192.168.90.20.1097 > 113.107.45.134.80: Flags [.]

> 13:39:42.721851 IP 192.168.90.20.1097 > 113.107.45.134.80: Flags [S]

> 13:39:42.755950 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [P.]

> 13:39:42.756030 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [P.]

> 13:39:43.137791 IP 192.168.90.20.1096 > 65.54.51.253.443: Flags [.]



最后输出ip x.x.x.x > x.x.x.x 时间差



'''

import re

import datetime

import sys



patt = re.compile(r'''\>\s

    (?P<dt>\d{2}:\d{2}:\d{2}\.\d+)\sIP\s

    (?P<category>\d+(\.\d+){4}\s\>\s\d+(\.\d+){4})\:\sFlags\s

    \[(?P<flag>[SP\.])\]

    ''', re.I|re.U|re.X)



def each():

    # 解析数据；可按实际情况改为传入文件名，从文件读取

    for m in map(patt.match, content.splitlines()):

        if not m: continue

        yield m.groupdict()



def datagroup():

    # 数据分组

    collections = []

    for d in each():

        if d['flag'] == 'S': 

            # 以[S]为分割 将数据分隔开

            collections.append({})

        # 把IP和Flags之间的数据 相同的归类到一起

        collections[-1].setdefault(d['category'],[]).append(d['dt'])

    return collections



def static():

    # 数据统计

    for dl in datagroup():

        for category, timepoints in dl.items():

            # 时间的最大值和最小值进行做差

            print '%s\t%s ~ %s'%(

                category, min(timepoints), max(timepoints)

                )

        print '-'*32



if __name__ == '__main__':

    static()

>python -u "test.py"
192.168.90.20.1096 > 65.54.51.253.443 13:39:41.990623 ~ 13:39:42.372091
--------------------------------
192.168.90.20.1097 > 113.107.45.134.80 13:39:42.699621 ~ 13:39:42.721820
--------------------------------
192.168.90.20.1096 > 65.54.51.253.443 13:39:43.137791 ~ 13:39:43.137791
192.168.90.20.1097 > 113.107.45.134.80 13:39:42.721851 ~ 13:39:42.721851
--------------------------------
>Exit code: 0 Time: 0.090

ai527518 2012-06-07

打赏
举报

呵呵用mysql自带的函数搞定了还是谢谢LS

bugs2k 2012-06-07

打赏
举报

#!/usr/bin/env python



from datetime import datetime, timedelta



with open('data.txt') as fd:

    adict = {}

    for line in fd:

        data = line.split()

        ntime, sip, op, dip, flag = data[1], data[3], data[4], data[5], data[7]

        if flag == '[S]':

            akey = "%s => %s" % (sip, dip)

            ntime = datetime.strptime(ntime, '%H:%M:%S.%f')

            if akey in adict:

                otime = adict[akey]

                dur = ntime - otime

                print akey, dur

            adict[akey] = ntime