用C#實現網路爬蟲(一)
阿新 • • 發佈:2019-01-24
1 private void ReceivedData(IAsyncResult ar)
2 {
3 RequestState rs = (RequestState)ar.AsyncState; //獲取引數
4 HttpWebRequest req = rs.Req;
5 Stream resStream = rs.ResStream;
6 string url = rs.Url;
7 int depth = rs.Depth;
8 string html = null;
9 int index = rs.Index;
10 int read = 0;
11
12 try
13 {
14 read = resStream.EndRead(ar); //獲得資料讀取結果
15 if (_stop)//判斷是否中止下載
16 {
17 rs.ResStream.Close();
18 req.Abort();
19 return;
20 }
21 if (read > 0)
22 {
23 MemoryStream ms = new MemoryStream(rs.Data, 0, read); //利用獲得的資料建立記憶體流
24 StreamReader reader = new StreamReader(ms, _encoding);
25 string str = reader.ReadToEnd(); //讀取所有字元
26 rs.Html.Append(str); // 新增到之前的末尾
27 var result = resStream.BeginRead(rs.Data, 0, rs.BufferSize, //再次非同步請求讀取資料
28 new AsyncCallback(ReceivedData), rs);
29 return;
30 }
31 html = rs.Html.ToString();
32 SaveContents(html, url); //儲存到本地
33 string[] links = GetLinks(html); //獲取頁面中的連結
34 AddUrls(links, depth + 1); //過濾連結並新增到未下載集合中
35
36 _reqsBusy[index] = false; //重置工作狀態
37 DispatchWork(); //分配新任務
38 }
39 catch (WebException we)
40 {
41 MessageBox.Show("ReceivedData Web " + we.Message + url + we.Status);
42 }
43 }