iText實現URL頁面轉PDF

阿新 • • 發佈：2019-01-30

概述
軟體要求
實現過程
[一]、概述

前面已經介紹瞭如何實現對HTML中文字元的轉換以及HTML檔案生成PDF檔案的基本方法，本文主要演示下如何把URL地址對應的內容直接轉換生成PDF檔案，這個需求也有很多的應用場景，最簡單的應用場景比如：自己blog中的文章如何轉PDF，如果能生成PDF檔案，一方面可以方便自己的閱讀，亦可作為一種備份。

[二]、軟體要求

org.jsoup
jsoup
1.7.1
jar
compile

1
2
3
4
5
6
7

org.jsoup
jsoup
1.7.1
jar
compile

[三]、實現過程

Java實現程式碼：Demo4URL2PDF.java

package com.bigdata.ai.util.pdf;

import java.io.ByteArrayInputStream;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;

import org.jsoup.Jsoup;

import com.itextpdf.text.BaseColor;
import 
 com.itextpdf.text.Chapter;
import com.itextpdf.text.Chunk;
import com.itextpdf.text.Document;
import com.itextpdf.text.Element;
import com.itextpdf.text.Font;
import com.itextpdf.text.PageSize;
import com.itextpdf.text.Paragraph;
import com.itextpdf.text.Section;
import com.itextpdf.text.pdf.BaseFont;
import 
 com.itextpdf.text.pdf.PdfWriter;
import com.itextpdf.text.pdf.draw.LineSeparator;
import com.itextpdf.tool.xml.ElementHandler;
import com.itextpdf.tool.xml.Writable;
import com.itextpdf.tool.xml.XMLWorkerHelper;
import com.itextpdf.tool.xml.pipeline.WritableElement;

/**  
 * 依據jsoup.jar包和itextReader匯出pdf文件
 * @file Demo4URL2Pdf.java
 * @package com.bigdata.ai.util.pdf
 * @author dxh
 * @createTime 2017年8月23日08:05:31  
*/
public class Demo4URL2Pdf {

    /**
     * @param args
     */
    public static void main(String[] args) throws Exception {
        String blogURL = "http://www.micmiu.com/os/linux/shell-dev-null/";

        // 直接把網頁內容轉為PDF檔案
        String pdfFile = "d:/test/itext/demo-URL.pdf";
        Demo4URL2Pdf.parseURL2PDFFile(pdfFile, blogURL);

        // 把網頁內容轉為PDF中的Elements
        String pdfFile2 = "d:/test/itext/demo-URL2.pdf";
        Demo4URL2Pdf.parseURL2PDFElement(pdfFile2, blogURL);
    }
    /**
     * 根據URL提前blog的基本資訊，返回結果&gt;&gt;:[主題 ,分類,日期,內容]等.
     *
     * @param blogURL
     * @return
     * @throws Exception
     */
    public static String[] extractBlogInfo(String blogURL) throws Exception {
        String[] info = new String[4];
        org.jsoup.nodes.Document doc = Jsoup.connect(blogURL).get();
        org.jsoup.nodes.Element e_title = doc.select("h2.title").first();
        info[0] = e_title.text();

        org.jsoup.nodes.Element e_category = doc.select("a[rel=category tag]")
                .first();
        info[1] = e_category.attr("href").replace("http://www.micmiu.com/", "");

        org.jsoup.nodes.Element e_date = doc.select("span.post-info-date")
                .first();

        String dateStr = e_date.text().split("日期")[1].trim();
        info[2] = dateStr;
        org.jsoup.nodes.Element entry = doc.select("div.entry").first();
        info[3] = formatContentTag(entry);

        return info;
    }

    /**
     * 格式化 img標籤
     *
     * @param entry
     * @return
     */
    private static String formatContentTag(org.jsoup.nodes.Element entry) {
        try {
            entry.select("div").remove();
            // 把 &lt;a href="*.jpg" &gt;&lt;img src="*.jpg"/&gt;&lt;/a&gt; 替換為 &lt;img
            // src="*.jpg"/&gt;
            for (org.jsoup.nodes.Element imgEle : entry
                    .select("a[href~=(?i)\\.(png|jpe?g)]")) {
                imgEle.replaceWith(imgEle.select("img").first());
            }
            return entry.html();
        } catch (Exception e) {
            return "";
        }
    }

    /**
     * 把String 轉為 InputStream
     *
     * @param content
     * @return
     */
    public static InputStream parse2Stream(String content) {
        try {
            ByteArrayInputStream stream = new ByteArrayInputStream(
                    content.getBytes("utf-8"));
            return stream;
        } catch (Exception e) {

            return null;
        }
    }

    /**
     * 直接把網頁內容轉為PDF檔案
     *
     * @param fileName
     * @throws Exception
     */
    public static void parseURL2PDFFile(String pdfFile, String blogURL)
            throws Exception {

        BaseFont bfCN = BaseFont.createFont("C:/WINDOWS/Fonts/SIMYOU.TTF", BaseFont.IDENTITY_H,BaseFont.NOT_EMBEDDED);
        // 中文字型定義
        Font chFont = new Font(bfCN, 14, Font.NORMAL, BaseColor.BLUE);
        Font secFont = new Font(bfCN, 12, Font.NORMAL, new BaseColor(0, 204,
                255));
        Font textFont = new Font(bfCN, 12, Font.NORMAL, BaseColor.BLACK);

        Document document = new Document();
        PdfWriter pdfwriter = PdfWriter.getInstance(document,
                new FileOutputStream(pdfFile));
        pdfwriter.setViewerPreferences(PdfWriter.HideToolbar);
        document.open();

        String[] blogInfo = extractBlogInfo(blogURL);

        int chNum = 1;
        Chapter chapter = new Chapter(new Paragraph("URL轉PDF測試", chFont),
                chNum++);

        Section section = chapter
                .addSection(new Paragraph(blogInfo[0], secFont));
        section.setIndentation(10);
        section.setIndentationLeft(10);
        section.setBookmarkOpen(false);
        section.setNumberStyle(Section.NUMBERSTYLE_DOTTED_WITHOUT_FINAL_DOT);
        section.add(new Chunk("分類：" + blogInfo[1] + " 日期：" + blogInfo[2],
                textFont));

        LineSeparator line = new LineSeparator(1, 100, new BaseColor(204, 204,
                204), Element.ALIGN_CENTER, -2);
        Paragraph p_line = new Paragraph(" ");
        p_line.add(line);
        section.add(p_line);
        section.add(Chunk.NEWLINE);

        document.add(chapter);

        // html檔案
        XMLWorkerHelper.getInstance().parseXHtml(pdfwriter, document,
                parse2Stream(blogInfo[3]));

        document.close();
    }

    /**
     * 把網頁內容轉為PDF中的Elements
     *
     * @param pdfFile
     * @param htmlFileStream
     */
    public static void parseURL2PDFElement(String pdfFile, String blogURL) {
        try {
            Document document = new Document(PageSize.A4);

            FileOutputStream outputStream = new FileOutputStream(pdfFile);
            PdfWriter pdfwriter = PdfWriter.getInstance(document, outputStream);
            // pdfwriter.setViewerPreferences(PdfWriter.HideToolbar);
            document.open();

            BaseFont bfCN = BaseFont.createFont("C:/WINDOWS/Fonts/SIMYOU.TTF", BaseFont.IDENTITY_H,BaseFont.NOT_EMBEDDED);
            // 中文字型定義
            Font chFont = new Font(bfCN, 14, Font.NORMAL, BaseColor.BLUE);
            Font secFont = new Font(bfCN, 12, Font.NORMAL, new BaseColor(0,
                    204, 255));
            Font textFont = new Font(bfCN, 12, Font.NORMAL, BaseColor.BLACK);

            int chNum = 1;
            Chapter chapter = new Chapter(new Paragraph("URL轉PDF元素，便於追加其他內容",
                    chFont), chNum++);

            String[] blogInfo = extractBlogInfo(blogURL);

            Section section = chapter.addSection(new Paragraph(blogInfo[0],
                    secFont));

            section.setIndentation(10);
            section.setIndentationLeft(10);
            section.setBookmarkOpen(false);
            section.setNumberStyle(Section.NUMBERSTYLE_DOTTED_WITHOUT_FINAL_DOT);
            section.add(new Chunk("分類：" + blogInfo[1] + " 發表日期：" + blogInfo[2],
                    textFont));
            LineSeparator line = new LineSeparator(1, 100, new BaseColor(204,
                    204, 204), Element.ALIGN_CENTER, -2);
            Paragraph p_line = new Paragraph();
            p_line.add(line);
            section.add(p_line);
            section.add(Chunk.NEWLINE);

            final List<Element> pdfeleList = new ArrayList<Element>();
            ElementHandler elemH = new ElementHandler() {

                public void add(final Writable w) {
                    if (w instanceof WritableElement) {
                        pdfeleList.addAll(((WritableElement) w).elements());
                    }

                }
            };
            XMLWorkerHelper.getInstance().parseXHtml(elemH,
                    new InputStreamReader(parse2Stream(blogInfo[3]), "utf-8"));

            List<Element> list = new ArrayList<Element>();
            for (Element ele : pdfeleList) {
                if (ele instanceof LineSeparator
                        || ele instanceof WritableElement) {
                    continue;
                }
                list.add(ele);
            }
            section.addAll(list);

            section = chapter.addSection(new Paragraph("繼續新增章節", secFont));

            section.setIndentation(10);
            section.setIndentationLeft(10);
            section.setBookmarkOpen(false);
            section.setNumberStyle(Section.NUMBERSTYLE_DOTTED_WITHOUT_FINAL_DOT);
            section.add(new Chunk("測試URL轉為PDF元素，方便追加其他內容", textFont));

            document.add(chapter);
            document.close();
        } catch (Exception e) {
            e.printStackTrace();
        }

    }
}

iText實現URL頁面轉PDF

概述軟體要求實現過程 [一]、概述前面已經介紹瞭如何實現對HTML中文字元的轉換以及HTML檔案生成PDF檔案的基本方法，本文主要演示下如何把URL地址對應的內容直接轉換生成PDF檔案，這個需求也有很多的應用場景，最簡單的應用場景比如：自己bl

使用iText 將html頁面轉PDF檔案（itext+freemarker）

1.匯入jar包（使用maven管理）  <dependency> <groupId>com.lowagie&l

java使用itext實現html程式碼轉pdf

1、引入jar包 <dependency> <groupId>com.lowagie</groupId> <artifactId>itext</artifactId> <version>4.

利用itext將html頁面轉成pdf(不模糊)

relative long ble wid ems map entity repl oat 1.maven項目進入依賴 <dependency> <groupId>org.xhtmlrenderer</groupId>

ASP.NET指定頁面轉PDF、JPG（插件）

isp asp type ref [] addheader 導出pdf length net //PDF文件導出 public ActionResult pdfs() { //導出頁面的路徑（死路徑）

使用OpenOffice外掛實現RTF/WORD轉PDF轉多張圖片或者一張圖片

這裡我們使用的是OpenOffice外掛，需要安裝，還有相關的jar包網盤地址： https://pan.baidu.com/s/1c6HymABx3wre-d19eB1c-w 密碼： n1cd 安裝OpenOffice完成後 W

iText —— JAVA將html轉pdf

1、index.html檔案內容如下 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.

Itext 實現 html轉換成pdf

需要的jar包: core-renderer.jar 和 iText-2.0.8.jar Html程式碼 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/

c++ 實現url的轉碼與解碼 urlencode urldecode

在網上找的程式碼各種問題，最終根據網上的版本修改實現了自己的一個版本，其中= 不做處理，- _都進行了轉碼 [cpp] view plain copy print? #include<iostream>#include<stdio.h&

Java實現各種檔案轉PDF（使用OpenOffice）

最近遇到一個學習平臺的專案，涉及到各種文件，為了站點資源的安全性，文件全部需要轉成pdf，只供使用者瀏覽。翻閱了很多資料，看了一個博主寫的使用open office實現的方式，簡單明瞭，這裡也分享一下。這裡介紹在win環境下的openoffice使用。百度搜索：open

iText把jsp/html轉pdf,並支援新增頁首頁尾

公司的相關業務需要匯出pdf,找遍了各大網站論壇,然後自己又總結融合了不少其他程式碼,吃過不少虧,特把全部原始碼扔上去供大家做個參考,鄙人研發兩年還屬於小菜階段.程式碼不嚴謹的地方還望指正; 為了給pdf直接加上頁首頁尾,直接重寫了ITextRenderer 下面直接上原始

js實現html頁面轉為pdf下載

1、簡單描述最近做了一個專案，我也是剛學js才幾個多月，對js不是很懂，但是我相信，只要肯學，總會進步的。專案裡面要實現把網頁的試題下載成pdf，所以我有個同事就實現了這個功能，然後我參考著他寫的程式碼就總結了一下。網頁渲染的過程中因伺服器效能和圖片的數量而定，圖片太多的

c++ 實現url的轉碼與解碼 urlencode urldecode

在網上找的程式碼各種問題，最終根據網上的版本修改實現了自己的一個版本，其中= 不做處理，- _都進行了轉碼 #include<iostream> #include<stdio.h> using namespace std; static unsi

nginx rewrite 實現URL跳轉

最近工作中常常要改nginx配置，學習了nginx中rewrite的用法 URL跳轉這裡說的URL跳轉就是使用者在訪問一個URL時將其跳轉到另一個URL上。常見的應用場景是讓多個域名跳轉到同一個URL上，（例如讓舊域名跳轉到新域名上）將靜態檔案請求跳轉到cdn上等

AngularJS使用Controller實現URL跳轉

參考文章：https://docs.angularjs.org/guide/$location 具體寫法舉例： 1.js定義一個controller function MyCtrl($scope, $location) { $scope.jumpToUrl = fu

Apache通過更改配置檔案實現url跳轉和域名跳轉

1，首先確認下自己伺服器的httpd的服務正常啟用，以下以centos 7.0為例 2. 編輯Apache的配置檔案，vi /etc/httpd/conf/httpd.conf 可以在尾行新增如下規則： RewriteEngine On RewriteRule ^/(.*

Java 實現HTML 頁面轉成image 圖片

前言在java 中把HTML轉化成圖檔，思路基本上是現在 AWT or Swing 的Panel上顯示網頁，在把Panel輸出為 image 檔案。 java 本身的API有提供相關的結果，但是直接產生的效果不是很好，所以有出現一些 library. Java Co

Django實現url跳轉（重定向）

編輯urls.py檔案如下： from django.urls import path, include from django.views.generic import RedirectView urlpatterns = [ path('polls/', in

html頁面轉PDF、圖片操作記錄

　　前言　　日常開發中，我們有可能會碰到從系統中匯出資料並列印的需要，列印的格式是常規的表格形式，例如：　　本文記錄使用js庫html2canvas + jspdf實現html轉PDF、圖片，並下載　　畫出頁面　　對於我們後端開發人員來說，畫這樣的表格頁面還是比較耗時的，

【JAVA】使用 iText XMLWorker實現HTML轉PDF

使用 iText XML Worker實現HTML轉PDF package com.yfli.iText; import java.io.FileInputStream; import java.i

iText實現URL頁面轉PDF

相關推薦