1. 程式人生 > >select模型linux核心原始碼註釋總結

select模型linux核心原始碼註釋總結

1、終端使用者空間的系統呼叫會呼叫到sys_select函式

asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp,
            fd_set __user *exp, struct timeval __user *tvp)
{
    s64 timeout = -1;
    struct timeval tv;
    int ret;

    if (tvp) {
        if (copy_from_user(&tv, tvp, sizeof(tv)))//從使用者空間拷貝時間
return -EFAULT; if (tv.tv_sec < 0 || tv.tv_usec < 0) return -EINVAL; /* Cast to u64 to make GCC stop complaining */ if ((u64)tv.tv_sec >= (u64)MAX_INT64_SECONDS) timeout = -1; /* infinite */ else { timeout = DIV_ROUND_UP(tv.tv_usec, USEC_PER_SEC/HZ); timeout += tv.tv_sec * HZ; } } ret = core_sys_select(n, inp, outp, exp
, &timeout);//selcet的呼叫入口 if (tvp) { struct timeval rtv; if (current->personality & STICKY_TIMEOUTS) goto sticky; rtv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)); rtv.tv_sec = timeout; if (timeval_compare(&rtv, &tv) >= 0
) rtv = tv; if (copy_to_user(tvp, &rtv, sizeof(rtv))) { sticky: /* * If an application puts its timeval in read-only * memory, we don't want the Linux-specific update to * the timeval to cause a fault after the select has * completed successfully. However, because we're not * updating the timeval, we can't restart the system * call. */ if (ret == -ERESTARTNOHAND) ret = -EINTR; } } return ret; }

2、繼續呼叫core_sys_select

static int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
               fd_set __user *exp, s64 *timeout)
{
    fd_set_bits fds;
    void *bits;
    int ret, max_fds;
    unsigned int size;
    struct fdtable *fdt;
    /* Allocate small arguments on the stack to save memory and be faster */
    long stack_fds[SELECT_STACK_ALLOC/sizeof(long)];

    ret = -EINVAL;
    if (n < 0)
        goto out_nofds;

    /* max_fds can increase, so grab it once to avoid race */
    rcu_read_lock();
    fdt = files_fdtable(current->files);
    max_fds = fdt->max_fds;
    rcu_read_unlock();
    if (n > max_fds)
        n = max_fds;

    /*
     * We need 6 bitmaps (in/out/ex for both incoming and outgoing),需要6個位圖,分別為輸出和輸出
     * since we used fdset we need to allocate memory in units of
     * long-words. 
     */
    size = FDS_BYTES(n);//szie大小取決於n,n=1024時,size = 128位元組,對應1024位
    bits = stack_fds;//bits = 64;
    if (size > sizeof(stack_fds) / 6) {
        /* Not enough space in on-stack array; must use kmalloc */
        ret = -ENOMEM;
        bits = kmalloc(6 * size, GFP_KERNEL);
        if (!bits)
            goto out_nofds;
    }
    fds.in      = bits;
    fds.out     = bits +   size;
    fds.ex      = bits + 2*size;
    fds.res_in  = bits + 3*size;
    fds.res_out = bits + 4*size;
    fds.res_ex  = bits + 5*size;
    /*-------------------------
      | 128 | 128 | ...| 128 |
      -------------------------
      總共6個size。每個size是32個long型別存貯,因此是1024位,最大監聽1024個檔案描述符,每一位都代表一個fd
    */

    if ((ret = get_fd_set(n, inp, fds.in)) ||
        (ret = get_fd_set(n, outp, fds.out)) ||
        (ret = get_fd_set(n, exp, fds.ex)))//拷貝使用者空間要監聽的fd,分別在可寫、可讀、異常中複製一份
        goto out;
    zero_fd_set(n, fds.res_in);//將輸出清零
    zero_fd_set(n, fds.res_out);
    zero_fd_set(n, fds.res_ex);

    ret = do_select(n, &fds, timeout);

    if (ret < 0)
        goto out;
    if (!ret) {
        ret = -ERESTARTNOHAND;
        if (signal_pending(current))
            goto out;
        ret = 0;
    }

    if (set_fd_set(n, inp, fds.res_in) ||//如果都沒有設定則返回錯誤,或者等待時間超時
        set_fd_set(n, outp, fds.res_out) ||
        set_fd_set(n, exp, fds.res_ex))
        ret = -EFAULT;

out:
    if (bits != stack_fds)
        kfree(bits);
out_nofds:
    return ret;
}

3、繼續呼叫do_select

int do_select(int n, fd_set_bits *fds, s64 *timeout)
{
    struct poll_wqueues table;
    poll_table *wait;
    int retval, i;

    rcu_read_lock();
    retval = max_select_fd(n, fds);//fd監聽的最大值
    rcu_read_unlock();

    if (retval < 0)
        return retval;
    n = retval;

    poll_initwait(&table);
    wait = &table.pt;
    if (!*timeout)
        wait = NULL;
    retval = 0;
    for (;;) {//死迴圈
        unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp;
        long __timeout;

        set_current_state(TASK_INTERRUPTIBLE);//此任務可以被中斷

        inp = fds->in; outp = fds->out; exp = fds->ex;
        rinp = fds->res_in; routp = fds->res_out; rexp = fds->res_ex;

        for (i = 0; i < n; ++rinp, ++routp, ++rexp) {
            unsigned long in, out, ex, all_bits, bit = 1, mask, j;
            unsigned long res_in = 0, res_out = 0, res_ex = 0;
            const struct file_operations *f_op = NULL;
            struct file *file = NULL;

            in = *inp++; out = *outp++; ex = *exp++;
            all_bits = in | out | ex;//當前的位置沒有置位,則繼續下一個位迴圈,一個位一個位的迴圈遍歷
            if (all_bits == 0) {
                i += __NFDBITS;
                continue;
            }

            for (j = 0; j < __NFDBITS; ++j, ++i, bit <<= 1) {//按照8*long大小輪詢
                int fput_needed;
                if (i >= n)
                    break;
                if (!(bit & all_bits))
                    continue;
                file = fget_light(i, &fput_needed);//根據i查詢有沒有對應的虛擬檔案,也就是檔案描述符
                if (file) {
                    f_op = file->f_op;//獲取處理函式
                    mask = DEFAULT_POLLMASK;
                    if (f_op && f_op->poll)//poll函式指標不為空
                        mask = (*f_op->poll)(file, retval ? NULL : wait);//wait是回撥,是否存在事件觸發
                    fput_light(file, fput_needed);
                    if ((mask & POLLIN_SET) && (in & bit)) {
                        res_in |= bit;
                        retval++;
                    }
                    if ((mask & POLLOUT_SET) && (out & bit)) {
                        res_out |= bit;
                        retval++;
                    }
                    if ((mask & POLLEX_SET) && (ex & bit)) {
                        res_ex |= bit;
                        retval++;
                    }//根據每一位是否存在可讀、可寫、異常的設定,分別將對應的事件資訊放在結果的陣列中
                }
                cond_resched();
            }
            if (res_in)
                *rinp = res_in;
            if (res_out)
                *routp = res_out;
            if (res_ex)
                *rexp = res_ex;
        }
        wait = NULL;
        if (retval || !*timeout || signal_pending(current))
            break;
        if(table.error) {
            retval = table.error;
            break;
        }

        if (*timeout < 0) {
            /* Wait indefinitely */
            __timeout = MAX_SCHEDULE_TIMEOUT;
        } else if (unlikely(*timeout >= (s64)MAX_SCHEDULE_TIMEOUT - 1)) {
            /* Wait for longer than MAX_SCHEDULE_TIMEOUT. Do it in a loop */
            __timeout = MAX_SCHEDULE_TIMEOUT - 1;
            *timeout -= __timeout;
        } else {
            __timeout = *timeout;
            *timeout = 0;
        }
        __timeout = schedule_timeout(__timeout);
        if (*timeout >= 0)
            *timeout += __timeout;
    }
    __set_current_state(TASK_RUNNING);

    poll_freewait(&table);

    return retval;
}

引用兩張圖
上面的圖是說明select在進行監聽前是怎樣組織資料的,但是每個size不是4個位元組,是按照需要監聽的最大檔案描述符按照有多少個long形式可以滿足監聽的需求,每一位代表一個檔案描述符。加入監聽1024個,則需要128位元組,需要32個long型別的資料。如果fd為1,則只需要一個long型別的資料,同樣要申請6個size的空間。
這裡寫圖片描述
這是select怎樣判斷是否存在可讀、可寫、以及異常的事件。都是通過呼叫回撥函式。
尤其注意的是select方式在迴圈檢測的方法,這是select主要的模型方法。空間消耗和時間消耗比較大。
圖片的來源:https://blog.csdn.net/leaf_cold/article/details/79452371
再綜合看另外一張圖:
這裡寫圖片描述
比較清晰的畫出了select的呼叫過程
圖片來源:https://blog.csdn.net/zhougb3/article/details/79792089
參考文章:1、select用法&原理詳解(原始碼剖析):https://blog.csdn.net/zhougb3/article/details/79792089
2、select 原始碼剖析:https://blog.csdn.net/leaf_cold/article/details/79452371