1. 程式人生 > >python-re模組(92)

python-re模組(92)


>>> import re
>>> m = re.match('f..','food')  # 匹配到返回物件,匹配開頭
>>> print(re.match('f..','seafood'))    #匹配不到返回None
None
>>> m.group()   #返回匹配的值
'foo'
>>> m = re.search('f..','seafood')  #匹配沒有限制
>>> m.group()
'foo'
>>> re.findall('f..','seafood is food') #返回所有匹配項組成的列表
['foo', 'foo']

>>> result = re.finditer('f..','seafood is food')   # 返回匹配物件組成的迭代器
>>> for m in result:    # 從迭代器中逐個取出匹配物件
...     print(m.group())
...
foo
foo

>>> re.sub('f..','abc','fish is food')  #相當於匹配後替換
'abch is abcd'
>>> re.split('\.|-','hello-word-.tar.gz')   #切割,用.和-做切割符號
['hello', 'word', '', 'tar', 'gz']

>>> patt = re.compile('f..')    # 先把要匹配的模式編譯,提升效率
>>> m = patt.search('seafood')  # 指定在哪個字串中匹配
>>> m.group()
'foo'



#匹配檔案中指定模式
import re
def count_patt(fname,patt):
    cpatt = re.compile(patt)
    result = {}

    with open(fname) as fobj:
        for line in fobj:
            m = cpatt.search(line)  #如果匹配不到,返回None
            if m:
                key = m.group()
                result[key] = result.get(key,0) + 1 #如果有值,返回值key,沒有返回0

    return result

if __name__ == '__main__':
    fname = 'access_log'    # apache日誌檔案
    ip = '^(\d+\.){3}\d+'   # 日誌開頭的ip地址
    print(count_patt(fname,ip))
    br = 'Firefox|MSIE|Chrome'  # 日誌中客戶端瀏覽器
    print(count_patt(fname,br))




#模式匹配進階寫法

import re
from collections import Counter     # Counter物件是有序的,字典無序

class CountPatt:
    def __init__(self,fname):
        self.fname = fname

    def count_patt(self,patt):
        cpatt = re.compile(patt)
        result = Counter()

        with open(self,fname) as fobj:
            for line in fobj:
                m = cpatt.search(line)  #如果匹配不到,返回None
                if m:
                    result.update([m.group()])

        return result

if __name__ == '__main__':
    c = CounterPatt('access_log')
    ip = '^(\d+\.){3}\d+'
    br = 'Firefox|MSIE|Chrome'
    a = c.count_patt(ip)
    print(a)
    print(a.most_common(3))
    print(c.count_patt(br))