第六章 讀取pdf檔案
阿新 • • 發佈:2018-12-19
#!/usr/bin/env python # _*_ coding:utf-8 _*_ from io import StringIO from urllib.request import urlopen from pdfminer.converter import TextConverter from pdfminer.layout import LAParams from pdfminer.pdfinterp import PDFResourceManager, process_pdf def readPDF(pdfFile): rsrcmgr=PDFResourceManager retstr=StringIO laparams=LAParams() device=TextConverter(rsrcmgr,retstr,laparams=laparams) process_pdf(rsrcmgr,device,pdfFile) device.close() content=retstr.getvalue() retstr.close() return content pdfFile=urlopen("http://pythonscraping.com/pages/waranpeace/chapter1.pdf") outputString=readPDF(pdfFile) print(outputString) pdfFile.close()