1. 程式人生 > >Java 爬取網易雲MV視訊

Java 爬取網易雲MV視訊

一、環境

JDK 1.8+

二、 步驟

1. 開啟網易雲音樂中的其中一首歌頁面,然後開啟開發者工具,檢視mv連結地址

3. 開啟mv連結地址後,檢視網頁框架原始碼

4. 發現視訊的地址就是這裡<meta content='">中,用Jsoup進行解析,這裡的url可以用URLDecode 進行解碼,我們可以複製url到線上解碼網站去進行解碼, 線上加/解碼 地址: http://www.convertstring.com/zh_CN/EncodeDecode/UrlDecode

解碼後的url是這樣的:

                

複製解碼後的網址進行訪問:

視訊的源播放地址就出來了,這時候就可以進行下載

下面給出原始碼(Java)

三、 原始碼

maven配置

<!--http模擬請求-->
        <!-- https://mvnrepository.com/artifact/org.apache.httpcomponents/httpclient -->
        <dependency>
            <groupId>org.apache.httpcomponents</groupId>
            <artifactId>httpclient</artifactId>
            <version>4.5.2</version>
        </dependency>

        <!--html 直譯器-->
        <!-- https://mvnrepository.com/artifact/org.jsoup/jsoup -->
        <dependency>
            <groupId>org.jsoup</groupId>
            <artifactId>jsoup</artifactId>
            <version>1.11.3</version>
        </dependency>

        <!-- https://mvnrepository.com/artifact/net.sf.json-lib/json-lib -->
        <dependency>
            <groupId>net.sf.json-lib</groupId>
            <artifactId>json-lib</artifactId>
            <version>2.3</version>
            <classifier>jdk15</classifier>
        </dependency>
package netease;

import org.apache.http.HttpHeaders;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;

import java.io.*;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLDecoder;
import java.util.HashMap;
import java.util.UUID;

public class MVTest {
    static final String[] user_Agent = {
            "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36",
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36",
            "Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10",
            "Mozilla/5.0 (iPad; U; CPU OS 4_2_1 like Mac OS X; zh-cn) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8C148 Safari/6533.18.5",
            "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)",
            "Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.2; .NET CLR 3.0.04506.30)",
            "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)",
            "Mozilla/5.0 (X11; U; Linux; en-US) AppleWebKit/527+ (KHTML, like Gecko, Safari/419.3) Arora/0.6",
            "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1",
            "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0",
            "Mozilla/5.0 (X11; Linux i686; U;) Gecko/20070322 Kazehakase/0.4.5",
            "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6",
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20",
            "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52",
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.11 TaoBrowser/2.0 Safari/536.11",
            "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER",
            "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; LBBROWSER)",
    };

    static final String neteaseReferer = "http://music.163.com/";
    static final String neteeaseHost = "music.163.com";

    public static void main(String[] args) throws IOException {
        String url = Common.neteaseReferer+"mv?id=303284";  //mv 視訊地址

        //請求header 設定
        HashMap<String, String> config = new HashMap<>();
        config.put(HttpHeaders.USER_AGENT, user_Agent[0]);
        config.put(HttpHeaders.REFERER, neteaseReferer);
        config.put(HttpHeaders.HOST, neteeaseHost);
        Document res = Jsoup.connect(url).headers(config).get();

        //解析框架原始碼中的 mv 播放url
        Element element = res.select("meta[property=og:video]").first();
        String downloadURL = URLDecoder.decode( element.attr("content"), "UTF-8" );
        System.err.println(downloadURL);

        downloadFile(downloadURL, UUID.randomUUID().toString()+".mp4");
    }

    /**
     * 檔案下載儲存
     * @param downloadURL  下載連結
     * @param savePath     儲存路徑
     * @throws MalformedURLException
     * @throws MalformedURLException
     */
    public static void downloadFile(String downloadURL, String savePath) throws MalformedURLException, MalformedURLException {
        // 下載網路檔案
        int byteread = 0;
        URL url = new URL(downloadURL.trim());
        try {
            URLConnection conn = url.openConnection();
            InputStream inStream = conn.getInputStream();
            File file = new File(savePath);
            if(!file.exists()){
                if(!file.getParentFile().exists()) {
                    //若資料夾不存在,建立資料夾
                    file.getParentFile().mkdirs();  //注意不是 mkdir , mkdirs是建立多級目錄
                }
            }
            FileOutputStream fs = new FileOutputStream(file);

            byte[] buffer = new byte[1204];
            while ((byteread = inStream.read(buffer)) != -1) {
                fs.write(buffer, 0, byteread);
            }
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

執行後當前專案目錄下出現剛剛下載的MP4檔案