1. 程式人生 > >python爬取手機號段(電信199號段)

python爬取手機號段(電信199號段)

# -*- coding: GBK -*-

"""
    爬取手機號段歸屬地
"""

import time

import requests
from lxml import etree

time_start = time.time()  # 程式開始時間
url = 'http://www.ip138.com:8080/search.asp?'
param = {'action': 'mobile', 'mobile': '1990012'}

file = open("C:\\Users\\yang\\Desktop\\phoneNumber.txt", "a+", encoding='utf-8'
) for n1 in range(0, 10): for n2 in range(0, 10): for n3 in range(0, 10): for n4 in range(0, 10): print("!!!n1: "+str(n1)+" n2: "+str(n2)+" n3: "+str(n3)+" n4: " + str(n4)) param['mobile'] = '199'+str(n1)+str(n2)+str(n3)+str(n4) rq = requests.get(url, param) rq.encoding = 'GBK'
page = etree.HTML(rq.text) hs = page.xpath('/html/body/table/tr/td[@class="tdc2"]') sum = 0 while hs[1].text is None: rq = requests.get(url, param) rq.encoding = 'GBK' page = etree.HTML(rq.text) hs = page.xpath('/html/body/table/tr/td[@class="tdc2"]'
) sum += 1 if hs is not None: break if sum == 20: break if hs[1].text is not None and hs[1].text is not '未知' and hs[1].text is not '': resultStr = param['mobile'] + " " + hs[1].text.strip()+"\n" file.write(resultStr) file.close() time_end = time.time() # 程式結束時間 print('\r程式執行時間:', time_end - time_start)