Java 爬取網易雲MV視訊
阿新 • • 發佈:2018-12-18
一、環境
JDK 1.8+
二、 步驟
1. 開啟網易雲音樂中的其中一首歌頁面,然後開啟開發者工具,檢視mv連結地址
3. 開啟mv連結地址後,檢視網頁框架原始碼
4. 發現視訊的地址就是這裡<meta content='">中,用Jsoup進行解析,這裡的url可以用URLDecode 進行解碼,我們可以複製url到線上解碼網站去進行解碼, 線上加/解碼 地址: http://www.convertstring.com/zh_CN/EncodeDecode/UrlDecode
解碼後的url是這樣的:
複製解碼後的網址進行訪問:
視訊的源播放地址就出來了,這時候就可以進行下載
下面給出原始碼(Java)
三、 原始碼
maven配置
<!--http模擬請求--> <!-- https://mvnrepository.com/artifact/org.apache.httpcomponents/httpclient --> <dependency> <groupId>org.apache.httpcomponents</groupId> <artifactId>httpclient</artifactId> <version>4.5.2</version> </dependency> <!--html 直譯器--> <!-- https://mvnrepository.com/artifact/org.jsoup/jsoup --> <dependency> <groupId>org.jsoup</groupId> <artifactId>jsoup</artifactId> <version>1.11.3</version> </dependency> <!-- https://mvnrepository.com/artifact/net.sf.json-lib/json-lib --> <dependency> <groupId>net.sf.json-lib</groupId> <artifactId>json-lib</artifactId> <version>2.3</version> <classifier>jdk15</classifier> </dependency>
package netease; import org.apache.http.HttpHeaders; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import java.io.*; import java.net.MalformedURLException; import java.net.URL; import java.net.URLConnection; import java.net.URLDecoder; import java.util.HashMap; import java.util.UUID; public class MVTest { static final String[] user_Agent = { "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36", "Mozilla/5.0 (X11; U; Linux x86_64; zh-CN; rv:1.9.2.10) Gecko/20100922 Ubuntu/10.10 (maverick) Firefox/3.6.10", "Mozilla/5.0 (iPad; U; CPU OS 4_2_1 like Mac OS X; zh-cn) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8C148 Safari/6533.18.5", "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)", "Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.2; .NET CLR 3.0.04506.30)", "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)", "Mozilla/5.0 (X11; U; Linux; en-US) AppleWebKit/527+ (KHTML, like Gecko, Safari/419.3) Arora/0.6", "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1", "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0", "Mozilla/5.0 (X11; Linux i686; U;) Gecko/20070322 Kazehakase/0.4.5", "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20", "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.11 TaoBrowser/2.0 Safari/536.11", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER", "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; LBBROWSER)", }; static final String neteaseReferer = "http://music.163.com/"; static final String neteeaseHost = "music.163.com"; public static void main(String[] args) throws IOException { String url = Common.neteaseReferer+"mv?id=303284"; //mv 視訊地址 //請求header 設定 HashMap<String, String> config = new HashMap<>(); config.put(HttpHeaders.USER_AGENT, user_Agent[0]); config.put(HttpHeaders.REFERER, neteaseReferer); config.put(HttpHeaders.HOST, neteeaseHost); Document res = Jsoup.connect(url).headers(config).get(); //解析框架原始碼中的 mv 播放url Element element = res.select("meta[property=og:video]").first(); String downloadURL = URLDecoder.decode( element.attr("content"), "UTF-8" ); System.err.println(downloadURL); downloadFile(downloadURL, UUID.randomUUID().toString()+".mp4"); } /** * 檔案下載儲存 * @param downloadURL 下載連結 * @param savePath 儲存路徑 * @throws MalformedURLException * @throws MalformedURLException */ public static void downloadFile(String downloadURL, String savePath) throws MalformedURLException, MalformedURLException { // 下載網路檔案 int byteread = 0; URL url = new URL(downloadURL.trim()); try { URLConnection conn = url.openConnection(); InputStream inStream = conn.getInputStream(); File file = new File(savePath); if(!file.exists()){ if(!file.getParentFile().exists()) { //若資料夾不存在,建立資料夾 file.getParentFile().mkdirs(); //注意不是 mkdir , mkdirs是建立多級目錄 } } FileOutputStream fs = new FileOutputStream(file); byte[] buffer = new byte[1204]; while ((byteread = inStream.read(buffer)) != -1) { fs.write(buffer, 0, byteread); } } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } }
執行後當前專案目錄下出現剛剛下載的MP4檔案