1. 程式人生 > >python解析Nginx訪問日誌

python解析Nginx訪問日誌

環境說明

python3+

pip install geoip2==2.9.0

nginx日誌配置成json格式,配置如下:

log_format json_log '{ "time": "$time_local", '
                         '"remote_addr": "$remote_addr", '
                         '"remote_user": "$remote_user", '
                         '"body_bytes_sent": "$body_bytes_sent", 
' '"request_time": "$request_time", ' '"status": "$status", ' '"request": "$request", ' '"request_method": "$request_method", ' '"http_referrer": "$http_referer", ' '
"body_bytes_sent":"$body_bytes_sent", ' '"http_x_forwarded_for": "$http_x_forwarded_for", ' '"http_user_agent": "$http_user_agent"}';
配置日誌成json格式

生成的日誌如下:

配置指令碼

#encoding: utf-8
import os
import sys
import json
from datetime import
datetime from geoip2.database import Reader logfile = sys.argv[1] BASE_DIR = os.path.dirname(os.path.abspath(__file__)) def stat_days(infile): """ 統計每天日誌資料 """ day_data = {} with open(infile, 'r', encoding="utf-8") as fhandler: for line in fhandler.readlines(): try: line=line.strip('\n') #print('---------------') #print(line) dict_line = json.loads(line) # ip datetime method url status bytes #dict_line['remote_addr'] dict_line['time'] dict_line['request_method'] dict_line['request'] dict_line['status'] dict_line['body_bytes_sent'] #_day = datetime.strptime(dict_line['time'], '%d/%b/%Y:%H:%M:%S').strftime('%Y-%m-%d') _day = '2018-11-29' #設定每天的預設值 day_data.setdefault(_day, {'hits': 0, 'vistors': {}, 'status': {}, 'bytes': 0}) #設定每天出現的IP訪問次數預設為0 day_data[_day]['vistors'].setdefault(dict_line['remote_addr'], 0) #設定每天出現的狀態碼預設值為0 day_data[_day]['status'].setdefault(dict_line['status'],0) #統計資料 day_data[_day]['hits'] += 1 day_data[_day]['vistors'][dict_line['remote_addr']] += 1 day_data[_day]['status'][dict_line['status']] += 1 day_data[_day]['bytes'] += int(dict_line['body_bytes_sent']) if dict_line['body_bytes_sent'].isdigit() else 0 except Exception as err: continue return sorted(day_data.items(), key=lambda x:x[0]) def stat_total(days): """ 統計總資料 """ total_data = {'hits': 0, 'vistors': {}, 'status': {}, 'bytes': 0} for _day, _stat in days: total_data['hits'] += _stat['hits'] total_data['bytes'] += _stat['bytes'] for _ip, _cnt in _stat['vistors'].items(): total_data['vistors'].setdefault(_ip, 0) total_data['vistors'][_ip] += _cnt for _status, _cnt in _stat['status'].items(): total_data['status'].setdefault(_status, 0) total_data['status'][_status] += _cnt return total_data def stat_region(total_data): """ 統計區域 """ region_data = {} region_location = {} #開啟maxmind mmdb檔案 geoip2_reader = Reader(os.path.join(BASE_DIR, 'db', 'GeoLite2-City.mmdb')) for _ip, _cnt in total_data['vistors'].items(): try: _city = geoip2_reader.city(_ip) #只顯示國內IP地址 #if _city.country.names.get('zh-CN', '') != '中國': #continue #獲取國家和城市資訊 _city_name = '{}/{}'.format(_city.country.names.get('zh-CN', ''), _city.city.names.get('zh-CN', '')) region_data.setdefault(_city_name, 0) #統計每天城市發生訪問次數 region_data[_city_name] += _cnt except Exception as err: print(err) #關閉檔案 geoip2_reader.close() return region_data def formatSize(bytes): bytes = float(bytes) kb = bytes / 1024 if kb >= 1024: M = kb /1024 if M >= 1024: G = M /1024 return "{} G".format(G) else: return "{} M".format(M) else: return "{} K".format(kb) def main(infile): """ 主程式 """ #獲取各種統計結果 day_data = stat_days(infile) #每天統計項 total_data = stat_total(day_data) #總統計項 region_data = sorted(stat_region(total_data).items(), key=lambda x:x[1], reverse=True) status_data = total_data['status'] access_num = total_data['hits'] ip_num = len(total_data['vistors']) ip_detail = sorted(total_data['vistors'].items(), key=lambda x:x[1], reverse=True) traffic = formatSize(total_data['bytes']) print(""" 總訪問量: {} 總IP數: {} 總流量: {} """.format(access_num, ip_num, traffic)) print('\n-------Top 15 地區訪問分佈-------') for region in region_data[0:15]: print("{}:{}".format(region[0], region[1])) print('\n-------Top 15 ip訪問-------') for ip in ip_detail[0:15]: print("{} {}".format(ip[0], ip[1])) print('\n-------狀態碼情況-------') for code, cnt in status_data.items(): print("{} {}".format(code, cnt)) if __name__ == "__main__": main(logfile)
logganalysis.py