1. 程式人生 > >python----使用re正則表示式刷選資料,去重,列表,取特定行資料(適用於web的html回包資料提取)

python----使用re正則表示式刷選資料,去重,列表,取特定行資料(適用於web的html回包資料提取)

python—-使用re正則表示式刷選資料,去重,列表,取特定行資料(適用於web的html回包資料提取)

環境配置:對目標伺服器的日誌檔案進行刷選特定資料(192.168.4.27)
/usr/local/tomcat_corp/logs/catalina.out

python指令碼必須在該伺服器上執行

1、篩選銀行卡欄位bankCode=

python程式碼:
[root@cdn tmp]# ls
findbankid_back_before.py  findbankid.py  findemail.py  findidno.py  findmobile.py  findreadlname.py
[root@cdn tmp]# 
[root@cdn tmp]# cat findbankid_back_before.py #!/usr/nbin/python # --*-- coding:utf-8 --*-- import re lastlist = [] logyzm = open("/usr/local/tomcat_corp/logs/catalina.out").read() #print logyzm temp = logyzm.decode("utf8") findword = r'.{75}bankCode=.{100}'#取該字串前75個字元以及其後面100個字元資料 pattern = re.compile(findword) results = re.findall(pattern,temp) for
result in results: #print result lastlist.append(result) list = set(lastlist)#對重複資料進行去重處理 for l in list: print l [root@cdn tmp]
指令碼執行情況:
[[email protected] tmp]# python findbankid_back_before.py 
..............................
.............................
bjectDTO [t=[com.dinpay
.dpp.domain.system.config.BankGateway@*****[id=3,bankCode=CCB,bankAccount=62148502********,rate=0.0,name=建設銀行,status=1,remark=<null>,defaultFlag=0,maxLimitAmo uency=0], com.dinpay.dpp.domain.system.config.BankGateway@*****[id=1002,bankCode=SPABANK,bankAccount=01120004********,rate=0.0,name=深圳平安銀企直連代付,status=1,remark=<null>,defaultFlag=0, tDTO [t=[com.dinpay.dpp.domain.system.config.PayChannel@*****[id=<null>,bankCode=GDB,chargeType=<null>,rate=<null>,dinpayRate=<null>,name=廣東發展銀行,status=<null>,remark=<null>,remark2=

2、篩選email郵箱地址

python程式碼:
[root@cdn tmp]# cat findemail.py 
#!/usr/nbin/python  
# --*-- coding:utf-8 --*--  
import re  

lastlist = []
logyzm = open("/usr/local/tomcat_corp/logs/catalina.out").read()  
#print logyzm  
temp = logyzm.decode("utf8")  
findword = r'.{100}bindEmail.{90}'#取該字串前100個字元以及其後面90個字元資料
pattern = re.compile(findword)  
results = re.findall(pattern,temp)  
for result in results:  
    #print result
    lastlist.append(result)

list = set(lastlist)#去重
for l in list:
    print l
[root@cdn tmp]#
程式碼執行情況:
[[email protected] tmp]# python findemail.py 
anageController toFindPayPwdByCard memberObjectResponse:MemberDetailResponse [memberId=137****1580, bindEmail=null, bindMobile=137*****1580, companyName=李*, certificationType=1, createDate=Tue Dec 19
ankCardController toBankCardManage memberObjectResponse:MemberDetailResponse [memberId=186****3214, bindEmail=null, bindMobile=186*****3214, companyName=聶*平, certificationType=1, createDate=Thu May 0
eController toAccountManage memberObjectResponse:MemberDetailResponse [memberId=*****@163.com, bindEmail=ssh*****.com, bindMobile=137*****4764, companyName=沈*, certificationType=1, createDate=Tu

3、篩選身份證號碼

python程式碼:
[root@cdn tmp]# cat findidno.py 
#!/usr/nbin/python  
# --*-- coding:utf-8 --*--  
import re  

lastlist = []
logyzm = open("/usr/local/tomcat_corp/logs/catalina.out").read()  
#print logyzm  
temp = logyzm.decode("utf8")  
findword = r'.{100}certNum.{20}'#取該字串前100個字元以及其後面20個字元資料
pattern = re.compile(findword)  
results = re.findall(pattern,temp)  
for result in results:  
    #print result
    lastlist.append(result)

list = set(lastlist)#去重
for l in list:
    print l
[root@cdn tmp]#
程式碼執行情況:
[root@cdn tmp]# python findidno.py 
l, address=null, supportBalance=1, bankCode=CCB, auditStatus=null, authStatus=null, isEnterprise=1, certNum=4*****************3
l, address=null, supportBalance=1, bankCode=ABC, auditStatus=null, authStatus=null, isEnterprise=1, certNum=4****************2]
l, address=null, supportBalance=1, bankCode=ABC, auditStatus=null, authStatus=null, isEnterprise=1, certNum=4*****************3
, address=null, supportBalance=1, bankCode=ICBC, auditStatus=null, authStatus=null, isEnterprise=0, certNum=4******************
l, address=null, supportBalance=1, bankCode=CMB, auditStatus=null, authStatus=null, isEnterprise=1, certNum=4****************X]
, address=null, supportBalance=1, bankCode=ICBC, auditStatus=null, authStatus=null, isEnterprise=1, certNum=4****************X]

4、篩選手機號碼

python程式碼;
[root@cdn tmp]# cat  findmobile.py 
#!/usr/nbin/python  
# --*-- coding:utf-8 --*--  
import re  

lastlist = []
logyzm = open("/usr/local/tomcat_corp/logs/catalina.out").read()  
#print logyzm  
temp = logyzm.decode("utf8")  
findword = r'.{100}bindMobile.{65}'#取該字串前100個字元以及其後面65個字元資料
pattern = re.compile(findword)  
results = re.findall(pattern,temp)  
for result in results:  
    #print result
    lastlist.append(result)

list = set(lastlist)
for l in list:
    print l
[root@cdn tmp]# 
程式碼執行情況:
[[email protected] tmp]# python findmobile.py 
oller setMemberExtInfo:MemberDetailResponse [memberId=*****@163.com, bindEmail=464*****.com, bindMobile=null, companyName=聶*平, certificationType=1, createDate=Thu Jun 2
er toAccountManage memberObjectResponse:MemberDetailResponse [memberId=131****8888, bindEmail=null, bindMobile=131*****8888, companyName=陳*榮2, certificationType=1, createDate=
-MemberLoginController setMemberExtInfo:MemberDetailResponse [memberId=131****8888, bindEmail=null, bindMobile=861*****1066, companyName=陳*榮, certificationType=1, createDate=S
-MemberLoginController setMemberExtInfo:MemberDetailResponse [memberId=131****8888, bindEmail=null, bindMobile=153*****6761, companyName=陳*榮, certificationType=0, createDate=S

5、篩選姓名

python程式碼:
[root@cdn tmp]# cat findreadlname.py 
#!/usr/nbin/python  
# --*-- coding:utf-8 --*--  
import re  

lastlist = []
logyzm = open("/usr/local/tomcat_corp/logs/catalina.out").read()  
#print logyzm  
temp = logyzm.decode("utf8")  
findword = r'.{100}realName=.{90}'
pattern = re.compile(findword)  
results = re.findall(pattern,temp)  
for result in results:  
    #print result
    lastlist.append(result)

list = set(lastlist)
for l in list:
    print l
[root@cdn tmp]# 
程式碼執行情況:

[[email protected] tmp]# python findreadlname.py 
,rgeRecordVO [rechargeDateStr=2017-11-20 16:35:18, dealDateStr=2017-11-20 16:35:18, transferType=充值, realName=陳*榮, memberId=q******[email protected], getSerialno()=21686, getAccountId()=35700*****, getRechar
rgeRecordVO [rechargeDateStr=2018-01-17 11:53:41, dealDateStr=2018-01-17 11:53:41, transferType=充值, realName=聶*平, memberId=j**********[email protected], getSerialno()=22012, getAccountId()=25800*****, getRec
rgeRecordVO [rechargeDateStr=2018-04-23 15:39:57, dealDateStr=2018-04-23 15:39:57, transferType=充值, realName=徐*波, memberId=b***********[email protected], getSerialno()=22191, getAccountId()=10000000*****, 
rgeRecordVO [rechargeDateStr=2017-04-26 16:54:14, dealDateStr=2017-04-26 16:54:14, transferType=充值, realName=田*君, memberId=b******[email protected], getSerialno()=19996, getAccountId()=10100*****, getRecharg
rgeRecordVO [rechargeDateStr=2017-11-17 09:39:10, dealDateStr=2017-11-17 09:39:10, transferType=充值, realName=深*店, memberId=5*******[email protected], getSerialno()=21616, getAccountId()=10000000*****, getRec
ordVO [rechargeDateStr=2017-09-19 17:15:32, dealDateStr=2017-09-19 17:15:32, transferType=Recharge, realName=聶*平, memberId=j**********[email protected], getSerialno()=21239, getAccountId()=100000000*****, g
ordVO [rechargeDateStr=2017-11-20 16:17:49, dealDateStr=2017-11-20 16:17:49, transferType=Recharge, realName=深*店, memberId=q******[email protected], getSerialno()=21683, getAccountId()=35700*****, getRechar