1. 程式人生 > >利用python找出那些被“標記”的照片

利用python找出那些被“標記”的照片

利用python找出那些被“標記”的照片

 

環境準備

下面的兩個第三方模組都可以直接通過pip快速安裝,這裡使用py36作為執行環境。

  • python3.6
  • requests
  • exifread

思路

  1. 遍歷目錄
  2. 拉取資料集合
  3. 遍歷集合取得exif
  4. exif資訊整理,並獲取實體地址
  5. 拷貝檔案到結果樣本目錄
  6. 生成json報告檔案

基礎知識

下面是現今相片中會存在與GPS相關的關鍵字,大牛亦可一比帶過~ [參考]

{
 "GPSVersionID": "GPS版本",
 "GPSLatitudeRef": "南北緯",
 "GPSLatitude": "緯度",
 "GPSLongitudeRef": "東西經",
 "GPSLongitude": "經度",
 "GPSAltitudeRef": "海拔參照值",
 "GPSAltitude": "海拔",
 "GPSTimeStamp": "GPS時間戳",
 "GPSSatellites": "測量的衛星",
 "GPSStatus": "接收器狀態",
 "GPSMeasureMode": "測量模式",
 "GPSDOP": "測量精度",
 "GPSSpeedRef": "速度單位",
 "GPSSpeed": "GPS接收器速度",
 "GPSTrackRef": "移動方位參照",
 "GPSTrack": "移動方位",
 "GPSImgDirectionRef": "影象方位參照",
 "GPSImgDirection": "影象方位",
 "GPSMapDatum": "地理測量資料",
 "GPSDestLatitudeRef": "目標緯度參照",
 "GPSDestLatitude": "目標緯度",
 "GPSDestLongitudeRef": "目標經度參照",
 "GPSDestLongitude": "目標經度",
 "GPSDestBearingRef": "目標方位參照",
 "GPSDestBearing": "目標方位",
 "GPSDestDistanceRef": "目標距離參照",
 "GPSDestDistance": "目標距離",
 "GPSProcessingMethod": "GPS處理方法名",
 "GPSAreaInformation": "GPS區功能變數名",
 "GPSDateStamp": "GPS日期",
 "GPSDifferential": "GPS修正"
}

初始化

考慮到exifread的模組中有大量的logging輸出,這裡將它的level級別調到最高。 然後下邊的KEY是某站在高德地圖API的時候遺留下來的 我也很尷尬。。就當福利了

import os
import time
import json
import random
import logging
import requests
import exifread
logging.basicConfig(level=logging.CRITICAL)
KEY = "169d2dd7829fe45690fabec812d05bc3"

主邏輯函式

def main():
 # 預設字尾列表
 types = ["bmp", "jpg", "tiff", "gif", "png"]
 #結果資料集合
 picex = []
 # 檔案儲存路徑
 saves = "$" + input("| SavePath: ").strip()
 # 檔案搜尋路徑 並遍歷所有檔案返回檔案路徑列表
 pools = jpgwalk(input("| FindPath: "), types)
 #儲存目錄
 savep = "%s/%s" % (os.getcwd().replace("\", "/"), saves)
 if savep in pools:
 pools.remove(savep)
 # 遍歷資料集並獲取exif資訊
 for path in pools:
 res = getEXIF(path)
 if res:
 picex.append(res)
 # 結果報告
 print("| Result %s" % len(picex))
 # 如果存在結果 儲存結果到json並講相關圖片複製到該目錄下
 if picex:
 #建立目錄
 if not os.path.exists(saves):
 os.mkdir(saves)
 #生成一個4格縮排的json檔案 
 with open("%s/%s.json" % (saves, saves), "wb") as f:
 f.write(json.dumps(picex, ensure_ascii=False, indent=4).encode("utf8"))
 #copy影象到該目錄
 for item in picex:
 source_path = item["Filename"]
 with open("%s/%s" % (saves, source_path.split("/")[-1]), "wb") as f_in:
 with open(source_path, "rb") as f_out:
 f_in.write(f_out.read())

遍歷方法

遍歷指定及其所有下級目錄,並返回全部的圖片的路徑集合,這裡要注意的是每次掃描後的拷貝行為都會生成快取,所以通過指定 $ 來避開。

# 獲取指導目錄全部的圖片路徑
def jpgwalk(path, types):
 _start = time.time()
 _pools = []
 # 遍歷該目錄 並判斷files字尾 如符合規則則拼接路徑
 for _root, _dirs, _files in os.walk(path):
 _pools.extend([_root.replace("\", "/") + "/" +
 _item for _item in _files if _item.split(".")[-1].lower() in types and "$" not in _root])
 #報告消耗時間
 print("| Find %s 
| Time %.3fs" % (len(_pools), time.time() - _start))
 return _pools

經緯度格式化

度分秒轉浮點,方便api呼叫查詢,因為存在一些詭異的資料比如 1/0,所以預設返回0

def cg(i):
 try:
 _ii = [float(eval(x)) for x in i[1:][:-1].split(', ')]
 _res = _ii[0] + _ii[1] / 60 + _ii[2] / 3600
 return _res
 except ZeroDivisionError:
 return 0

EXIF資訊整理

考慮到大部分的裝置還未開始支援朝向、速度、測量依據等關鍵字,這裡暫時只使用比較常見的,如有需要的朋友可以自行新增。畢竟得到的資訊越多對社工有更大的幫助。

def getEXIF(filepath):
 #基礎關鍵字
 _showlist = [
 'GPS GPSDOP',
 'GPS GPSMeasureMode',
 'GPS GPSAltitudeRef',
 'GPS GPSAltitude',
 'Image Software',
 'Image Model',
 'Image Make'
 ]
 #GPS關鍵字
 _XYlist = ["GPS GPSLatitude", "GPS GPSLongitude"]
 #時間關鍵字
 _TimeList = ["EXIF DateTimeOrigina", "Image DateTime", "GPS GPSDate"]
 #初始化結果字典
 _infos = {
 'Filename': filepath
 }
 with open(filepath, "rb") as _files:
 _tags = None
 # 嘗試去的EXIF資訊
 try:
 _tags = exifread.process_file(_files)
 except KeyError:
 return
 # 判斷是否存在地理位置資訊
 _tagkeys = _tags.keys()
 if _tags and len(set(_tagkeys) & set(_XYlist)) == 2 and cg(str(_tags["GPS GPSLongitude"])) != 0.0:
 for _item in sorted(_tagkeys):
 if _item in _showlist:
 _infos[_item.split()[-1]] = str(_tags[_item]).strip()
 # 經緯度取值
 _infos["GPS"] = (cg(str(_tags["GPS GPSLatitude"])) * float(1.0 if str(_tags.get("GPS GPSLatitudeRef", "N")) == "N" else -1.0),
 cg(str(_tags["GPS GPSLongitude"])) * float(1.0 if str(_tags.get("GPS GPSLongitudeRef", "E")) == "E" else -1.0))
 # 獲取實體地址
 _infos["address"] = address(_infos["GPS"])
 # 獲取照片海拔高度
 if "GPS GPSAltitudeRef" in _tagkeys:
 try:
 _infos["GPSAltitude"] = eval(_infos["GPSAltitude"])
 except ZeroDivisionError:
 _infos["GPSAltitude"] = 0
 _infos["GPSAltitude"] = "距%s%.2f米" % ("地面" if int(
 _infos["GPSAltitudeRef"]) == 1 else "海平面", _infos["GPSAltitude"])
 del _infos["GPSAltitudeRef"]
 # 獲取可用時間
 _timeitem = list(set(_TimeList) & set(_tagkeys))
 if _timeitem:
 _infos["Dates"] = str(_tags[_timeitem[0]])
 return _infos

地址轉換

一個簡單的爬蟲,呼叫高德地圖api進行座標轉換,考慮到原本是跨域,這裡新增基礎的反防爬程式碼。這裡有個小細節,海外的一律都取不到(包括臺灣),可以通過更換googlemap的api來實現全球查詢。

def address(gps):
 global KEY
 try:
 # 隨機UA
 _ulist = [
 "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.163 Safari/535.1",
 "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:6.0) Gecko/20100101 Firefox/6.0",
 "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; InfoPath.2; .NET4.0C; .NET4.0E; .NET CLR 2.0.50727; 360SE)",
 "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
 "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
 "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 2.0.50727; SLCC2; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.3; .NET4.0C; Tablet PC 2.0; .NET4.0E)",
 "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)",
 "Mozilla/5.0 (X11; U; Linux i686; rv:1.7.3) Gecko/20040913 Firefox/0.10",
 "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; ja) Presto/2.10.289 Version/12.00",
 "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.93 Safari/537.36"
 ]
 # 偽造header
 _header = {
 "User-Agent": random.choice(_ulist),
 "Accept": "text/javascript, application/javascript, application/ecmascript, application/x-ecmascript, */*; q=0.01",
 "Accept-Encoding": "gzip, deflate, sdch",
 "Accept-Language": "zh-CN,zh;q=0.8",
 "Referer": "http://www.gpsspg.com",
 }
 _res = requests.get(
 "http://restapi.amap.com/v3/geocode/regeo?key={2}&s=rsv3&location={1},{0}&platform=JS&logversion=2.0&sdkversion=1.3&appname=http%3A%2F%2Fwww.gpsspg.com%2Fiframe%2Fmaps%2Famap_161128.htm%3Fmapi%3D3&csid=945C5A2C-E67F-4362-B881-9608D9BC9913".format(gps[0], gps[1], KEY), headers=_header, timeout=(5, 5))
 _json = _res.json()
 # 判斷是否取得資料
 if _json and _json["status"] == "1" and _json["info"] == "OK":
 # 返回對應地址
 return _json.get("regeocode").get("formatted_address")
 except Exception as e:
 pass

例項

執行該程式碼 然後輸入儲存資料夾名和掃描位置即可

利用python找出那些被“標記”的照片

 

這邊可以看到8019張中有396張存在有效的地理位置,打碼的地方就不解釋了,各位老司機~。