1. 程式人生 > >python簡單爬蟲

python簡單爬蟲

url get out res except urlopen 5.0 html_ lse

from urllib import request,parse
from urllib.error import HTTPError,URLError

def get(url,headers = None):
  return urlrequest(url,headers=headers)
def post(url,form,headers=None):
  return urlrequest(url,form,headers=headers)
def urlrequest(url,form = None,headers = None):   user_agent = Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36
  if headers == None:     headers = {
      
User-Agent:user_agent     } html_bytes = b‘‘ try:     if form:
      
#POST
      #轉換成字符串
      form_str = parse.urlencode(form)
      #轉換成bytes
      html_bytes = form_str.encode(‘utf-8‘)
      req = request.Request(url,data=form_bytes)
    else:
      #GET
      #Request
      req = request.Request(url,headers = headers)
     #添加     response = request.urlopen(req,timeout = 5)     html_bytes = reponse.read()   except HTTPError as e:
    print(e)
  except URLError as e:
    print(e)
  return html_bytes
if __name__==__main__:
  #post
  #url = ‘http://fanyi.baidu.com/sug‘
  #form = {
  #  ‘kw‘:‘鷹‘    
  #}
  #html_bytes = post(url,form=form)
  #print(html_bytes)
  

  url = ‘http://www.baidu.com‘
html_bytes
= get(url) print(html_bytes.decode(utf-8))

python簡單爬蟲