1.專案需求描述

通過訂單號獲取某系統內訂單的詳細資料,不需要賬號密碼的登入驗證,但有圖片驗證碼的動態識別,將獲取到的資料存到資料庫。

2.整體思路

1.通過Selenium技術,無視窗模式開啟瀏覽器

2.在輸入框中動態輸入訂單號

3.將圖片驗證碼截圖儲存到本地

4.通過Tesseract-OCR技術去本地識別驗證碼轉化為文字

5.將獲取的驗證碼輸入輸入框

6.點選查詢獲取列表資料

3.功能實現

1.下載並安裝Google瀏覽器,安裝Google驅動chromedriver.exe,獲取安裝路徑,配置在專案中

2.使用Selenium進行瀏覽器操作

 1 System.setProperty(瀏覽器驅動, 瀏覽器驅動安裝位置);
2 ChromeOptions options = new ChromeOptions();
3 options.addArguments("--headless"); // 無視窗模式
4 options.addArguments("--disable-infobars"); // 禁言訊息條
5 options.addArguments("--disable-extensions"); // 禁用外掛
6 options.addArguments("--disable-gpu"); // 禁用GPU
7 options.addArguments("--no-sandbox"); // 禁用沙盒模式
8 options.addArguments("--disable-dev-shm-usage");
9 options.addArguments("--hide-scrollbars"); // 隱藏滾動條
10
11 WebDriver driver = new ChromeDriver(options);
12 driver.get(爬取網站URL);
13 driver.manage().window().setSize(new Dimension(450, 260)); // 設定遊覽器開啟後調整大小
14 try {
15 // 儲存IMG圖片到本地
16 saveImgToLocal(driver);
17 Thread.sleep(2000);
18 // OCR智慧識別驗證碼
19 String codeByOCR = getCodeByOCR();
20 if (codeByOCR != null) {
21 try {
22 WebElement input1 = driver.findElement(By.id(TEXTBOX1));
23 input1.sendKeys(code);
24 WebElement input2 = driver.findElement(By.id(TEXTBOX2));
25 input2.sendKeys(codeByOCR);
26 // 獲取table資料
27 WebElement addButton = driver.findElement(By.id(SELECT_BUTTON));
28 addButton.click();
29 List<WebElement> tRCollection = driver.findElement(By.id(TABLE_ID)).findElements(By.tagName("tr"));
30 for (int t = 1; t < tRCollection.size(); t++) {
31 List<WebElement> tDCollection = tRCollection.get(t).findElements(By.tagName("td"));
32 VipLogisticsMinHangDetailVo minHangDetailVo = new VipLogisticsMinHangDetailVo();
33 minHangDetailVo.setLogistics_number(code);
34 for (int i = 0; i < tDCollection.size(); i++) {
35 String text = tDCollection.get(i).getText();
36 switch (i) {
37 case 0:
38 minHangDetailVo.setTime(text);
39 case 1:
40 minHangDetailVo.setOutlet(text);
41 case 2:
42 minHangDetailVo.setOrganization(text);
43 case 3:
44 minHangDetailVo.setEvent(text);
45 case 4:
46 minHangDetailVo.setDetail(text);
47 }
48 }
49 list.add(minHangDetailVo);
50 }
51 log.info("驗證碼識別成功!");
52 } catch (Exception e) {
53 if (e.toString().contains("錯誤提示:驗證碼錯誤或已過期!")) {
54 log.error("驗證碼識別錯誤!" + e.toString());
55 } else if (e.toString().contains("錯誤提示:請輸入驗證碼!")) {
56 log.error("未輸入驗證碼!:" + e.toString());
57 } else {
58 log.error("其他異常:" + e.toString());
59 }
60 }
61 }
62 driver.quit();
63 } catch (Exception e) {
64 e.printStackTrace();
65 }

3.將圖片驗證碼截圖儲存到本地(截圖法)

 1 private void saveImgToLocal(WebDriver driver) {
2 WebElement element = driver.findElement(By.id(img元素ID));
3 //建立全屏截圖
4 WrapsDriver wrapsDriver = (WrapsDriver) element;
5 File screen = ((TakesScreenshot) wrapsDriver.getWrappedDriver()).getScreenshotAs(OutputType.FILE);
6 try {
7 BufferedImage image = ImageIO.read(screen);
8 //建立一個矩形使用上面的高度,和寬度
9 Point p = element.getLocation();
10 //元素座標
11 BufferedImage img = image.getSubimage(p.getX(), p.getY(), element.getSize().getWidth(), element.getSize().getHeight());
12 ImageIO.write(img, "png", screen);
13
14 FileUtils.copyFile(screen, new File(儲存本地地址 + "imgname.png"));
15 } catch (IOException e) {
16 e.printStackTrace();
17 }
18 }

4.將圖片驗證碼儲存到本地(滑鼠法)

 1 private static void saveImgToLocal1(WebDriver driver) {
2 Actions action = new Actions(driver);
3 action.contextClick(driver.findElement(By.id(img元素ID))).build().perform();
4 try {
5 Robot robot = new Robot();
6 Thread.sleep(1000);
7
8 robot.keyPress(KeyEvent.VK_DOWN);
9 Thread.sleep(1000);
10
11 robot.keyPress(KeyEvent.VK_DOWN);
12 Thread.sleep(1000);
13
14 robot.keyPress(KeyEvent.VK_ENTER);
15 Thread.sleep(1000);
16 //釋放向下鍵,不然在此之前的條目將起作用
17 robot.keyRelease(KeyEvent.VK_DOWN);
18 Thread.sleep(1000);
19 //執行儲存
20 Runtime.getRuntime().exec(SAVE_IMG_EXE);
21 Thread.sleep(10000);
22 } catch (Exception e) {
23 e.printStackTrace();
24 }
25 }

 1 private static void saveImgToLocal1(WebDriver driver) {
2 Actions action = new Actions(driver);
3 action.contextClick(driver.findElement(By.id(img元素ID))).build().perform();
4 try {
5 Robot robot = new Robot();
6 Thread.sleep(1000);
7
8 robot.keyPress(KeyEvent.VK_DOWN);
9 Thread.sleep(1000);
10
11 robot.keyPress(KeyEvent.VK_DOWN);
12 Thread.sleep(1000);
13
14 robot.keyPress(KeyEvent.VK_ENTER);
15 Thread.sleep(1000);
16 //釋放向下鍵,不然在此之前的條目將起作用
17 robot.keyRelease(KeyEvent.VK_DOWN);
18 Thread.sleep(1000);
19 //執行儲存
20 Runtime.getRuntime().exec(SAVE_IMG_EXE);
21 Thread.sleep(10000);
22 } catch (Exception e) {
23 e.printStackTrace();
24 }
25 }

5.對本地驗證碼進行OCR識別

 1 private String getCodeByOCR() {
2 String result = null;
3 File file = new File(本地圖片地址);
4 if (!file.exists()) {
5 if (systemFalg != 1) {
6 file.setWritable(true, false);
7 }
8 file.mkdirs();
9 }
10 File imageFile = new File(本地圖片地址 + "imgname.png");
11 if (imageFile.exists()) {
12 ITesseract instance = new Tesseract();
13 instance.setDatapath(tessdata存放地址);
14 try {
15 String doOCR = instance.doOCR(imageFile);
16 result = replaceBlank(doOCR);
17 log.info("解析的驗證碼為:{}", result != null ? result : "為空!");
18 } catch (Exception e) {
19 e.printStackTrace();
20 log.error("解析驗證碼異常!");
21 }
22 } else {
23 log.error("解析驗證碼的檔案不存在!");
24 }
25 return result;
26 }

綜上,該網頁的資料就可以獲取了。