1. 程式人生 > >C語言實現抓取網頁原始碼

C語言實現抓取網頁原始碼

#include <stdio.h>  
#include <stdlib.h>  
#include <string.h>  
#include <sys/types.h>
#include <sys/socket.h>  
#include <netinet/in.h>  
#include <arpa/inet.h>  
#include <netdb.h>  
#include <unistd.h> 
 
int port = 80;  

int main(int argc,char **argv)  
{  
 char buffer[512];  
 int isock;  
 struct sockaddr_in pin;  
 struct hostent *hptr;  
 char message[512];  
 char   *ptr, **pptr;
char str[32];
 int done = 0;  
 int chars = 0;  
 int l = 0;  


if(argc!=2)
{
	printf("%s url\n",argv[0]);
	return -1;
}

 if( (hptr = gethostbyname(argv[1])) == 0 )  
 {  
  printf("gethostbyname is fail\n");  
  exit(1);  
 }  

#if 1
printf("official hostname:%s\n",hptr->h_name);  
    for(pptr = hptr->h_aliases; *pptr != NULL; pptr++)  
        printf(" alias:%s\n",*pptr);  
  
    switch(hptr->h_addrtype)  
    {  
        case AF_INET:  
        case AF_INET6:  
            pptr=hptr->h_addr_list;  
            for(; *pptr!=NULL; pptr++)  
                printf(" address:%s\n", inet_ntop(hptr->h_addrtype, *pptr, str, sizeof(str)));  
                printf(" first address: %s\n", inet_ntop(hptr->h_addrtype, hptr->h_addr, str, sizeof(str)));  
        break;  
        default:  
            printf("unknown address type\n");  
        break;  
    }  
#endif

 bzero(message,sizeof(message));  
 bzero(&pin,sizeof(pin));  
 pin.sin_family = AF_INET;  
 pin.sin_port = htons(port);  
 pin.sin_addr.s_addr = ( (struct in_addr *)(hptr->h_addr) )->s_addr;  
   
 if( (isock = socket(AF_INET, SOCK_STREAM, 0)) == -1)  
 {  
  printf("Error opening socket!\n");  
  exit(1);  
 }
  
 sprintf(message, "GET / HTTP/1.1\r\n");  
 strcat(message, "Host:");  
 strcat(message, argv[1]);  
 strcat(message, "\r\n");  
 strcat(message, "Accept: */*\r\n");  
 strcat(message, "User-Agent: Mozilla/4.0(compatible)\r\n");  
 strcat(message, "connection:Keep-Alive\r\n");  
 strcat(message, "\r\n\r\n");  
 //printf("%s",message);  
 if( connect(isock, (const struct sockaddr *) &pin, sizeof(pin)) == -1 )  
 {  
  printf("Error connecting to socket\n");  
  exit(1);  
 }  
 if( send(isock, message, strlen(message), 0) == -1)  
 {  
  printf("Error in send\n");  
  exit(1);  
 } 

     struct timeval timeout = {1,0};  //設定超時時間1秒,0代表秒後面的微秒數,左邊這個就是1秒0微秒  
          
     //設定接收超時  
     setsockopt(isock, SOL_SOCKET, SO_RCVTIMEO, (char *)&timeout, sizeof(struct timeval));  

while(done == 0)  
 {  
  l = recv(isock, buffer, 1, 0);  
  if( l < 0 )  
   done = 1;  
  switch(*buffer)  
  {  
   case '\r':  
    break;  
   case '\n':  
    if(chars == 0)  
     done = 1;  
    chars = 0;  
    break;  
   default:  
    chars++;  
    break;  
  }  
 //  printf("%c",*buffer);  
 }  

//  recv(isock, buffer, 5, 0);  /* delete over char */
 do  
 {  
  l = recv(isock, buffer, sizeof(buffer) - 1, 0);  
  if( l < 0 )  
   break;  
  *(buffer + l) = 0;  
  fputs(buffer, stdout);  
 }while( l > 0 );  
 close(isock);  
 return 0;  
}