1. 程式人生 > >python 爬蟲需要的庫

python 爬蟲需要的庫

pip install builtwit 
該模組將URL作為引數,下載該URL並對其進行分析,然後返回該網站使
用的技術。下面是使用該模組的-一個例子。
import builtwith
builtwith.parse('http://example.webscraping.com')
{'web-servers': ['Nginx'], 'web-frameworks': ['Web2py', 'Twitter Bootstrap'], 'programming-languages': ['Python'], 'javascript-frameworks': ['jQuery', 'Modernizr
', 'jQuery UI']}
尋找網站所有者 pip install python-whois
import whois
print (whois.whois('http://example.webscraping.com/'))
{
  "domain_name": "WEBSCRAPING.COM",
  "registrar": "GoDaddy.com, LLC",
  "whois_server": "whois.godaddy.com",
  "referral_url": null,
  "updated_date": [
    "2013-08-20 08:08:30
", "2013-08-20 08:08:29" ], "creation_date": "2004-06-26 18:01:19", "expiration_date": "2020-06-26 18:01:19", "name_servers": [ "NS1.WEBFACTION.COM", "NS2.WEBFACTION.COM", "NS3.WEBFACTION.COM", "NS4.WEBFACTION.COM" ], "status": [ "clientDeleteProhibited https://icann.org/epp#clientDeleteProhibited
", "clientRenewProhibited https://icann.org/epp#clientRenewProhibited", "clientTransferProhibited https://icann.org/epp#clientTransferProhibited", "clientUpdateProhibited https://icann.org/epp#clientUpdateProhibited", "clientTransferProhibited http://www.icann.org/epp#clientTransferProhibited", "clientUpdateProhibited http://www.icann.org/epp#clientUpdateProhibited", "clientRenewProhibited http://www.icann.org/epp#clientRenewProhibited", "clientDeleteProhibited http://www.icann.org/epp#clientDeleteProhibited" ], "emails": "[email protected]", "dnssec": "unsigned", "name": null, "org": null, "address": null, "city": null, "state": "Victoria", "zipcode": null, "country": "AU" }