1. 程式人生 > >boa原始碼解析(1)-接收請求,傳送html的流程

boa原始碼解析(1)-接收請求,傳送html的流程

最近接觸到boa,記錄下來以便複習

本篇記錄伺服器接受請求,向客戶端傳送指定檔案的流程

1.首先從boa.c的main函式開始

while ((c = getopt(argc, argv, "c:r:d")) != -1) {
        switch (c) {
        case 'c':
            if (server_root)
                free(server_root);
            server_root = strdup(optarg);
            if (!server_root) {
                perror("strdup (for server_root)");
                exit(1);
            }
            break;
        case 'r':
            if (chdir(optarg) == -1) {
                log_error_time();
                perror("chdir (to chroot)");
                exit(1);
            }
            if (chroot(optarg) == -1) {
                log_error_time();
                perror("chroot");
                exit(1);
            }
            if (chdir("/") == -1) {
                log_error_time();
                perror("chdir (after chroot)");
                exit(1);
            }
            break;
        case 'd':
            do_fork = 0;
            break;
        default:
            fprintf(stderr, "Usage: %s [-c serverroot] [-r chroot] [-d]\n", argv[0]);
            exit(1);
        }
剛開始會對傳入的引數進行解析,例如-c /opt/app/,則server_root=/opt/app,server_root用於指定配置檔案的所在位置,即boa.conf在/opt/app/目錄下!!!
    fixup_server_root();
    read_config_files();
    open_logs();
    server_s = create_server_socket();
    init_signals();
    drop_privs();
    create_common_env();
    build_needs_escape();

fixup_server_root():該函式中最重要的語句是chdir(server_root),即跳轉到配置檔案所在的目錄下,然後read_config_files,即讀取配置檔案!!!之後程式碼中很多引數的值都是從配置檔案中獲取的。

boa伺服器建立socket連線,無非就是建立socket,將套接字設定為非阻塞,設定斷開後端口可立即使用,繫結socket,設定監聽。

init_signals:設定相應的訊號及訊號處理函式。

    /* background ourself */
    if (do_fork) {
        switch(fork()) {
        case -1:
            /* error */
            perror("fork");
            exit(1);
            break;
        case 0:
            /* child, success */
            break;
        default:
            /* parent, success */
            exit(0);
            break;
        }
    }
守護程序:當終端被關閉後,利用該終端開啟的相應的程序也會被關閉。守護程序就是為了衝破這種障礙。它從被執行的時候開始運轉,知道整個系統關閉才退出(當然可以認為的殺死相應的守護程序)。如果想讓某個程序不因為使用者或中斷或其他變化而影響,那麼就必須把這個程序變成一個守護程序。

守護程序的步驟:

1.使用fork,建立子程序,父程序退出
2.呼叫setsid,擺脫其他程序的控制

3.改變當前目錄為根目錄

4.重設檔案許可權掩碼

這裡只做了第一步

程式總算是初始化完畢了。

接下來就是接收處理資料了。關鍵函式select_loop(server_s);boa會進入一個迴圈,不斷地接收處理資料

/* any blocked req's move from request_ready to request_block */
process_requests(server_s);

if (!sigterm_flag && total_connections < (max_connections - 10)) {
    BOA_FD_SET(server_s, &block_read_fdset); /* server always set */
}

req_timeout.tv_sec = (request_ready ? 0 :(ka_timeout ? ka_timeout : REQUEST_TIMEOUT));
req_timeout.tv_usec = 0l;   /* reset timeout */

if (select(max_fd + 1, &block_read_fdset,
           &block_write_fdset, NULL,
           (request_ready || request_block ? &req_timeout : NULL)) == -1) {
/* what is the appropriate thing to do here on EBADF */
if (errno == EINTR)
   continue;   /* while(1) */
else if (errno != EBADF) {
    DIE("select");
   }
}

time(&t_time);
if (FD_ISSET(server_s, &block_read_fdset))
     pending_requests = 1;
}

剛開始,pending_request為0。從上面的程式碼可以看到,程式首先會執行process_request,但因為pending_request=0,程式只是進去逛了一圈,什麼事情也不會做。

程式會一直阻塞在select,直到有新的客戶端連線進來。當有新的客戶端連線到伺服器後,即接收到http報文,select不再阻塞,pending_request將變成1,然後就會再次進入process_request中

 if (pending_requests) {
        get_request(server_s);
#ifdef ORIGINAL_BEHAVIOR
        pending_requests = 0;
#endif
    }
這時我們將會執行get_request來獲取連線請求。在get_request中,首先執行accept,然後新建一個request
conn = new_request();
    if (!conn) {
        close(fd);
        return;
    }
    conn->fd = fd;
    conn->status = READ_HEADER;
    conn->header_line = conn->client_stream;
    conn->time_last = current_time;
    conn->kacount = ka_max;
初始狀態為READ_HEADER。最後把conn加入request_ready佇列中:enqueue(&request_ready, conn);

執行完get_request函式後,程式繼續執行process_request

current = request_ready;

    while (current) {
        time(&t_time);
        if (current->buffer_end && /* there is data in the buffer */
            current->status != DEAD && current->status != DONE) {
            retval = req_flush(current);
            /*
             * retval can be -2=error, -1=blocked, or bytes left
             */
            if (retval == -2) { /* error */
                current->status = DEAD;
                retval = 0;
            } else if (retval >= 0) {
                /* notice the >= which is different from below?
                   Here, we may just be flushing headers.
                   We don't want to return 0 because we are not DONE
                   or DEAD */

                retval = 1;
            }
        } else {
            switch (current->status) {
            case READ_HEADER:
            case ONE_CR:
            case ONE_LF:
            case TWO_CR:
                retval = read_header(current);
                break;
            case BODY_READ:
                retval = read_body(current);
                break;
            case BODY_WRITE:
                retval = write_body(current);
                break;
            case WRITE:
                retval = process_get(current);
                break;
            case PIPE_READ:
                retval = read_from_pipe(current);
                break;
            case PIPE_WRITE:
                retval = write_from_pipe(current);
                break;
            case DONE:
                /* a non-status that will terminate the request */
                retval = req_flush(current);
                /*
                 * retval can be -2=error, -1=blocked, or bytes left
                 */
                if (retval == -2) { /* error */
                    current->status = DEAD;
                    retval = 0;
                } else if (retval > 0) {
                    retval = 1;
                }
                break;
            case DEAD:
                retval = 0;
                current->buffer_end = 0;
                SQUASH_KA(current);
                break;
            default:
                retval = 0;
                fprintf(stderr, "Unknown status (%d), "
                        "closing!\n", current->status);
                current->status = DEAD;
                break;
            }

        }

        if (sigterm_flag)
            SQUASH_KA(current);

        /* we put this here instead of after the switch so that
         * if we are on the last request, and get_request is successful,
         * current->next is valid!
         */
        if (pending_requests)
            get_request(server_s);

        switch (retval) {
        case -1:               /* request blocked */
            trailer = current;
            current = current->next;
            block_request(trailer);
            break;
        case 0:                /* request complete */
            current->time_last = current_time;
            trailer = current;
            current = current->next;
            free_request(&request_ready, trailer);
            break;
        case 1:                /* more to do */
            current->time_last = current_time;
            current = current->next;
            break;
        default:
            log_error_time();
            fprintf(stderr, "Unknown retval in process.c - "
                    "Status: %d, retval: %d\n", current->status, retval);
            current = current->next;
            break;
        }
    }
接下來開始迴圈處理剛才接收到的請求。剛開始由於沒有資料,將會執行else中的內容,由於初始狀態為READ_HEADER,程式將執行read_header,將會執行read讀取報文,並返回1,由程式碼註釋(more to do)我們也可以看出,我們只是獲取了資料,並沒有進行處理,然後current=current->next,如果只有一個請求的話,這時候current已經是NULL了,將會推出while迴圈,但資料並沒有消失,仍有儲存在request_ready佇列中,因為request_ready還在,所以select(max_fd + 1, &block_read_fdset, &block_write_fdset, NULL,(request_ready || request_block ? &req_timeout : NULL))並不會一直阻塞,程式將再一次執行process_request函式(這麼做的一個好處就是當boa和CGI互動時,如果資料量較大,boa不會一直卡住,會處理完得到的資料後再執行process_request進行處理),這時候current再次等於request_ready,處理剛才未處理的資料;這時候將再次執行read_header函式,不過不再是讀取資料了,而是進行處理資料了。

while (check < (buffer + bytes)) {
        switch (req->status) {
        case READ_HEADER:
            if (*check == '\r') {
                req->status = ONE_CR;
                req->header_end = check;
            } else if (*check == '\n') {
                req->status = ONE_LF;
                req->header_end = check;
            }
            break;

        case ONE_CR:
            if (*check == '\n')
                req->status = ONE_LF;
            else if (*check != '\r')
                req->status = READ_HEADER;
            break;

        case ONE_LF:
            /* if here, we've found the end (for sure) of a header */
            if (*check == '\r') /* could be end o headers */
                req->status = TWO_CR;
            else if (*check == '\n')
                req->status = BODY_READ;
            else
                req->status = READ_HEADER;
            break;

        case TWO_CR:
            if (*check == '\n')
                req->status = BODY_READ;
            else if (*check != '\r')
                req->status = READ_HEADER;
            break;

        default:
            break;
        }

#ifdef VERY_FASCIST_LOGGING
        log_error_time();
        fprintf(stderr, "status, check: %d, %d\n", req->status, *check);
#endif

        req->parse_pos++;       /* update parse position */
        check++;

        if (req->status == ONE_LF) {
            *req->header_end = '\0';

            /* terminate string that begins at req->header_line */

            if (req->logline) {
                if (process_option_line(req) == 0) {
                    return 0;
                }
            } else {
                if (process_logline(req) == 0)
                    return 0;
                if (req->simple)
                    return process_header_end(req);
            }
            /* set header_line to point to beginning of new header */
            req->header_line = check;
        } else if (req->status == BODY_READ) {
#ifdef VERY_FASCIST_LOGGING
            int retval;
            log_error_time();
            fprintf(stderr, "%s:%d -- got to body read.\n",
                    __FILE__, __LINE__);
            retval = process_header_end(req);
#else
            int retval = process_header_end(req);
#endif
            /* process_header_end inits non-POST cgi's */

            if (retval && req->method == M_POST) {
                /* for body_{read,write}, set header_line to start of data,
                   and header_end to end of data */
                req->header_line = check;
                req->header_end =
                    req->client_stream + req->client_stream_pos;

                req->status = BODY_WRITE;
                /* so write it */
                /* have to write first, or read will be confused
                 * because of the special case where the
                 * filesize is less than we have already read.
                 */

                /*

                   As quoted from RFC1945:

                   A valid Content-Length is required on all HTTP/1.0 POST requests. An
                   HTTP/1.0 server should respond with a 400 (bad request) message if it
                   cannot determine the length of the request message's content.

                 */

                if (req->content_length) {
                    int content_length;

                    content_length = boa_atoi(req->content_length);
                    /* Is a content-length of 0 legal? */
                    if (content_length <= 0) {
                        log_error_time();
                        fprintf(stderr, "Invalid Content-Length [%s] on POST!\n",
                                req->content_length);
                        send_r_bad_request(req);
                        return 0;
                    }
                    if (single_post_limit && content_length > single_post_limit) {
                        log_error_time();
                        fprintf(stderr, "Content-Length [%d] > SinglePostLimit [%d] on POST!\n",
                                content_length, single_post_limit);
                        send_r_bad_request(req);
                        return 0;
                    }
                    req->filesize = content_length;
                    req->filepos = 0;
                    if (req->header_end - req->header_line > req->filesize) {
                        req->header_end = req->header_line + req->filesize;
                    }
                } else {
                    log_error_time();
                    fprintf(stderr, "Unknown Content-Length POST!\n");
                    send_r_bad_request(req);
                    return 0;
                }
            }                   /* either process_header_end failed or req->method != POST */
            return retval;      /* 0 - close it done, 1 - keep on ready */
        }                       /* req->status == BODY_READ */
    }
首先,讀取http第一行資料,即請求行。因為剛開始req->logfile為NULL,所有程式會執行process_logfile,該函式用於解析請求行資料。之後logfile不再為NULL,之後讀取的請求頭部都會執行process_option_file。請求頭讀取完畢後,http報文會空一行,也就是會有兩個換行符,狀態變更為BODY_READ,執行process_header_end。
int process_header_end(request * req)
{
    if (!req->logline) {
        send_r_error(req);
        return 0;
    }

    /* Percent-decode request */
    if (unescape_uri(req->request_uri, &(req->query_string)) == 0) {
        log_error_doc(req);
        fputs("Problem unescaping uri\n", stderr);
        send_r_bad_request(req);
        return 0;
    }

    /* clean pathname */
    clean_pathname(req->request_uri);

    if (req->request_uri[0] != '/') {
        send_r_bad_request(req);
        return 0;
    }

    if (translate_uri(req) == 0) { /* unescape, parse uri */
        SQUASH_KA(req);
        return 0;               /* failure, close down */
    }

    if (req->method == M_POST) {
        req->post_data_fd = create_temporary_file(1, NULL, 0);
        if (req->post_data_fd == 0)
            return(0);
        return(1); /* success */
    }

    if (req->is_cgi) {
        return init_cgi(req);
    }

    req->status = WRITE;
    return init_get(req);       /* get and head */
}

該函式會對請求行的url進行解析。狀態變更為write,執行init_get,讀取要傳送的檔案資料。然後程式回到process_request,因為狀態為WRITE,程式便會執行process_get,將檔案傳送到客戶端。