1. 程式人生 > >用C#實現網路爬蟲(一)

用C#實現網路爬蟲(一)

1 private void ReceivedData(IAsyncResult ar) 2 { 3 RequestState rs = (RequestState)ar.AsyncState; //獲取引數 4 HttpWebRequest req = rs.Req; 5 Stream resStream = rs.ResStream; 6 string url = rs.Url; 7 int depth = rs.Depth; 8 string html = null; 9 int index = rs.Index; 10
int read = 0; 11 12 try 13 { 14 read = resStream.EndRead(ar); //獲得資料讀取結果 15 if (_stop)//判斷是否中止下載 16 { 17 rs.ResStream.Close(); 18 req.Abort(); 19 return; 20 } 21 if (read > 0) 22 { 23 MemoryStream ms = new
MemoryStream(rs.Data, 0, read); //利用獲得的資料建立記憶體流 24 StreamReader reader = new StreamReader(ms, _encoding); 25 string str = reader.ReadToEnd(); //讀取所有字元 26 rs.Html.Append(str); // 新增到之前的末尾 27 var result = resStream.BeginRead(rs.Data, 0, rs.BufferSize, //再次非同步請求讀取資料
28 new AsyncCallback(ReceivedData), rs); 29 return; 30 } 31 html = rs.Html.ToString(); 32 SaveContents(html, url); //儲存到本地 33 string[] links = GetLinks(html); //獲取頁面中的連結 34 AddUrls(links, depth + 1); //過濾連結並新增到未下載集合中 35 36 _reqsBusy[index] = false; //重置工作狀態 37 DispatchWork(); //分配新任務 38 } 39 catch (WebException we) 40 { 41 MessageBox.Show("ReceivedData Web " + we.Message + url + we.Status); 42 } 43 }