1. 程式人生 > >linux核心3.4基於wakeup_source的autosleep機制分析

linux核心3.4基於wakeup_source的autosleep機制分析

一:wakeup_source簡介:

    linux 3.4核心PM使用了wakeup_source來保持喚醒狀態,也就是keep awake。之前android一直是基於Linux加入了wake_lock機制來阻止系統休眠,後來Linux 3.4核心加入了wakeup_source來管理,安卓4.4跟著升級核心也就摒棄了自己的臃腫的wake_lock機制,在對上層介面並不改變,在核心wake_lock實現直接基於wakeup_source來實現的。當然也會帶來debug上的一些問題,比如以前的wake_lock自身帶有強大的debug資訊,那麼我們在除錯的時候可以自己看見dmesg中預設列印active wake lock XXX,很直觀來辨別需要休眠的時候那個wake lock有問題阻止了休眠。這個需要我們自己來完善。個人認為改進很大,現在使用了autosleep機制,只要不存在任何active wakeup_source了,系統自動休眠,當有active wake_source自動block住,個人認為休眠更及時,非休眠時間在減少,同時不會消耗額外的資源。使用基於queue work與程序block來管理suspend。還有這裡的wakeup_source個人覺得應該叫keepawake_source或者stayawake_souce,畢竟系統的喚醒也就是cpu的再次執行是由中斷喚醒的而不是wakeup_source。同時安卓4.4還有一個重大改變就是去除了early suspend機制改為fb event通知機制。那麼現在就只有suspend與resume,runtime suspend與runtime resume了。



/**
 * struct wakeup_source - Representation of wakeup sources
 *
 * @total_time: Total time this wakeup source has been active.
 * @max_time: Maximum time this wakeup source has been continuously active.
 * @last_time: Monotonic clock when the wakeup source's was touched last time.
 * @prevent_sleep_time: Total time this source has been preventing autosleep.

 * @event_count: Number of signaled wakeup events.
 * @active_count: Number of times the wakeup sorce was activated.
 * @relax_count: Number of times the wakeup sorce was deactivated.
 * @expire_count: Number of times the wakeup source's timeout has expired.
 * @wakeup_count: Number of times the wakeup source might abort suspend.

 * @active: Status of the wakeup source.
 * @has_timeout: The wakeup source has been activated with a timeout.
 */
struct wakeup_source {
    const char         *name;
    struct list_head    entry;
    struct list_head    list;
    spinlock_t        lock;
    struct timer_list    timer;
    unsigned long        timer_expires; //超時時間,也就是wake_lock_timeout()裡面的時間引數,超時後會執行deactivate函式
    ktime_t total_time;
    ktime_t max_time;
    ktime_t last_time;
    ktime_t start_prevent_time;
    ktime_t prevent_sleep_time;
    unsigned long        event_count; //event計數
    unsigned long        active_count;//active計數
    unsigned long        relax_count;
    unsigned long        expire_count;
    unsigned long        wakeup_count;
    bool            active:1; //用於判斷是否是active狀態
    bool            autosleep_enabled:1;//這個變數是來標記active等時間的
};


//active任何wakeup_source都會執行該函式,標記active為true
/**
 * wakup_source_activate - Mark given wakeup source as active.
 * @ws: Wakeup source to handle.
 *
 * Update the @ws' statistics and, if @ws has just been activated, notify the PM
 * core of the event by incrementing the counter of of wakeup events being
 * processed.
 */
static void wakeup_source_activate(struct wakeup_source *ws)
{
    unsigned int cec;

    ws->active = true;
    ws->active_count++;
    ws->last_time = ktime_get();
    if (ws->autosleep_enabled)
        ws->start_prevent_time = ws->last_time;

    /* Increment the counter of events in progress. */
    cec = atomic_inc_return(&combined_event_count);

    trace_wakeup_source_activate(ws->name, cec);
}


//deactivate任何wakeup_source都會執行該函式,標記active為false
/**
 * wakup_source_deactivate - Mark given wakeup source as inactive.
 * @ws: Wakeup source to handle.
 *
 * Update the @ws' statistics and notify the PM core that the wakeup source has
 * become inactive by decrementing the counter of wakeup events being processed
 * and incrementing the counter of registered wakeup events.
 */
static void wakeup_source_deactivate(struct wakeup_source *ws)
{
    unsigned int cnt, inpr, cec;
    ktime_t duration;
    ktime_t now;

    ws->relax_count++;
    /*
     * __pm_relax() may be called directly or from a timer function.
     * If it is called directly right after the timer function has been
     * started, but before the timer function calls __pm_relax(), it is
     * possible that __pm_stay_awake() will be called in the meantime and
     * will set ws->active.  Then, ws->active may be cleared immediately
     * by the __pm_relax() called from the timer function, but in such a
     * case ws->relax_count will be different from ws->active_count.
     */
    if (ws->relax_count != ws->active_count) {
        ws->relax_count--;
        return;
    }

    ws->active = false;

    now = ktime_get();
    duration = ktime_sub(now, ws->last_time);
    ws->total_time = ktime_add(ws->total_time, duration);
    if (ktime_to_ns(duration) > ktime_to_ns(ws->max_time))
        ws->max_time = duration;

    ws->last_time = now;
    del_timer(&ws->timer);
    ws->timer_expires = 0;

    if (ws->autosleep_enabled)
        update_prevent_sleep_time(ws, now);

    /*
     * Increment the counter of registered wakeup events and decrement the
     * couter of wakeup events in progress simultaneously.
     */
    cec = atomic_add_return(MAX_IN_PROGRESS, &combined_event_count);
    trace_wakeup_source_deactivate(ws->name, cec);

    split_counters(&cnt, &inpr);

    if (!inpr && waitqueue_active(&wakeup_count_wait_queue)){
        wake_up(&wakeup_count_wait_queue); //當不存在任何active wake_up source的時候喚醒try_to_suspend程序。
    }

}

wakup_source的申請與釋放:
1:使用安卓的wake_lock介面:wake_lock(),wake_lock_timeout(),wake_unlock();
2: 使用wakeup_source自帶的介面:pm_stay_awake(),pm_relax();這裡的name就是device name。

二:autosleep分析:

sys介面:sys/power/autosleep

亮屏時libsuspend會寫入off,滅屏寫入mem


static ssize_t autosleep_show(struct kobject *kobj,
                  struct kobj_attribute *attr,
                  char *buf)
{
    suspend_state_t state = pm_autosleep_state();

    if (state == PM_SUSPEND_ON)
        return sprintf(buf, "off\n");

#ifdef CONFIG_SUSPEND
    if (state < PM_SUSPEND_MAX)
        return sprintf(buf, "%s\n", valid_state(state) ?
                        pm_states[state] : "error");
#endif
#ifdef CONFIG_HIBERNATION
    return sprintf(buf, "disk\n");
#else
    return sprintf(buf, "error");
#endif
}

static ssize_t autosleep_store(struct kobject *kobj,
                   struct kobj_attribute *attr,
                   const char *buf, size_t n)
{
    suspend_state_t state = decode_state(buf, n);
    int error;

    if (state == PM_SUSPEND_ON
        && strcmp(buf, "off") && strcmp(buf, "off\n"))
        return -EINVAL;

    error = pm_autosleep_set_state(state);
    return error ? error : n;
}

power_attr(autosleep);


int pm_autosleep_set_state(suspend_state_t state)
{

#ifndef CONFIG_HIBERNATION
    if (state >= PM_SUSPEND_MAX)
        return -EINVAL;
#endif

    __pm_stay_awake(autosleep_ws); //防止系統休眠

    mutex_lock(&autosleep_lock);

    autosleep_state = state;

    __pm_relax(autosleep_ws); //釋放上面的wake up source

    if (state > PM_SUSPEND_ON) {
        pm_wakep_autosleep_enabled(true); //設定所有wake up source裡面的autosleep_enabled為真,這個變數不會對休眠有影響,但是會標記active的時間,使用debugfs可以看見
        queue_up_suspend_work();//排程工作佇列,會執行try_to_suspend(),其實state mem執行try_to_suspend(),一次就可以了,後面再分析。
    } else {
        pm_wakep_autosleep_enabled(false);//設定所有wake up source裡面的autosleep_enabled為假
    }

    mutex_unlock(&autosleep_lock);
    return 0;
}


重頭戲:try_to_suspend

static void try_to_suspend(struct work_struct *work)
{
    unsigned int initial_count, final_count;

    if (!pm_get_wakeup_count(&initial_count, true)) //獲取initial_count,這個函式會block住,當存在active wakeup source的時候,直到wakeup source為detative狀態
        goto out;

    mutex_lock(&autosleep_lock);

    if (!pm_save_wakeup_count(initial_count)) {//儲存initial_count,不會block,當然也會檢查是否有active wakeup source,當有active存在再次queue work。
        mutex_unlock(&autosleep_lock);
        goto out;
    }

    if (autosleep_state == PM_SUSPEND_ON) {//當為ON狀態時,return。//在睡眠期間跟了很久沒有遇見過這種情況
        mutex_unlock(&autosleep_lock);
        return;
    }
    if (autosleep_state >= PM_SUSPEND_MAX)
        hibernate();                  //hibernate高通平臺目前不支援
    else
        pm_suspend(autosleep_state); //進入pm_suspend,dmesg會有PM: suspend entry 與PM: suspend exit來標記,這裡面會執行freeze task,suspend與resume,disable cpu的操作。核心PM最重要的函式。

    mutex_unlock(&autosleep_lock);

    if (!pm_get_wakeup_count(&final_count, false))//獲取final_count,非block,當然也會檢查是否有active wakeup source,當有active存在再次queue work
        goto out;

    /*
     * If the wakeup occured for an unknown reason, wait to prevent the
     * system from trying to suspend and waking up in a tight loop.
     */
    if (final_count == initial_count)             //這裡遇見未知原因,initial_count與final_count相等,超時500ms後繼續往下執行。這種現象我也是跟了許久沒有遇見過。
        schedule_timeout_uninterruptible(HZ / 2);

 out:
    queue_up_suspend_work(); //排程queue work會再次執行該函式,實際上只要一次echo mem > sys/power/autosleep後這個程序一直會在auto_sleep cycle。
}


pm_get_wakeup_count原型,個人感覺這個是仿__wait_event_interruptible()而寫的。

bool pm_get_wakeup_count(unsigned int *count, bool block)
{
    unsigned int cnt, inpr;

    if (block) { //當block為真時,該程序可能會block住
        DEFINE_WAIT(wait);

        for (;;) {
            prepare_to_wait(&wakeup_count_wait_queue, &wait,
                    TASK_INTERRUPTIBLE);
            split_counters(&cnt, &inpr); //有active的wakeup_source存在就是block住,否則block
            if (inpr == 0 || signal_pending(current))
                break;

            schedule();//在這裡面block住,直到最後一個active的wakeup_source deactivate時會喚醒該程序,之後會break出來。
        }
        finish_wait(&wakeup_count_wait_queue, &wait);
    }

    split_counters(&cnt, &inpr);
    *count = cnt;
    return !inpr;
}

那麼有2個問題,按power鍵喚醒系統是退出try_to_suspend了嗎?
       首先系統是如何被喚醒的?這個是由硬體中斷喚醒的,比如我們這裡的power鍵,還有其他的比如alarm等其他硬體中斷,只要我們在中斷申請時enbale_irq_wake(),那麼睡眠期間,只要觸發該中斷就可以喚醒系統。那麼在try_to_suspend裡面喚醒後pm_suspend()執行完resume後會退出來,接著會獲取final_count,在這裡是存在active wake_up source的(在out之前加添的列印active wakeup_source,下面的dmesg證明了存在的wakeup_source),之後執行out,後排程工作佇列,再次進入try_to_suspend(),在第一次獲取initial_count後便會遇見active wakeup_source,這裡面更多的是系統上層加的wakeup_source,那麼try_to_suspend()會一直block在pm_get_wakeup_count()裡面直到滅屏和所有wakeup_source deactivate時會再次進入pm_suspend()休眠。
       這裡也就解釋了為什麼只要執行一次ehco mem > sys/power/autosleep後自動可以休眠了的原因。

看下面的dmesg:
<6>[  928.536418] CPU0: msm_cpu_pm_enter_sleep mode 000000,00000000,00000000,00000000,00000000,00000020,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000
<6>[  928.543066] PM: noirq resume of devices complete after 6.020 msecs
<6>[  928.548512] PM: early resume of devices complete after 2.598 msecs
<6>[  928.650793] PM: resume of devices complete after 102.266 msecs
<6>[  928.660290] Restarting tasks ... done.
<6>[  928.681208] PM: suspend exit 1970-01-05 05:23:32.206389881 UTC
<6>[  928.681229] active wake lock KeyEvents
<6>[  928.681267] active wake lock qpnp_soc_wake
<6>[  928.681284] active wake lock alarm, time left 486
<6>[  928.681342] active wake lock KeyEvents
<6>[  928.681584] active wake lock qpnp_soc_wake
<6>[  928.681600] active wake lock alarm, time left 486
<6>[  928.696345] request_suspend_state: wakeup at 928691792356 (1970-01-05 05:23:32.221521704 UTC)
<6>[  928.708608] mdss_dsi_panel_power on=1



還有一個與wakeup source無關的問題,為什麼suspend後就一直停留在那裡不動了?
       這個是cpu停止運轉了,下面再分析下程式碼。
       核心函式suspend_devices_and_enter():

/**
 * suspend_devices_and_enter - Suspend devices and enter system sleep state.
 * @state: System sleep state to enter.
 */
int suspend_devices_and_enter(suspend_state_t state)
{
    int error;
    bool wakeup = false;

    if (!suspend_ops)
        return -ENOSYS;

    trace_machine_suspend(state);
    if (suspend_ops->begin) {
        error = suspend_ops->begin(state);
        if (error)
            goto Close;
    }
    suspend_console();
    suspend_test_start();
    error = dpm_suspend_start(PMSG_SUSPEND);//這裡會執行所有driver的suspend函式,suspend裡面有active wakeup_source或者return 為真的話,suspend會報錯
    if (error) {
        printk(KERN_ERR "PM: Some devices failed to suspend\n");
        goto Recover_platform;
    }
    suspend_test_finish("suspend devices");
    if (suspend_test(TEST_DEVICES))
        goto Recover_platform;

    do {
        error = suspend_enter(state, &wakeup);//這裡會diable cpu
    } while (!error && !wakeup
        && suspend_ops->suspend_again && suspend_ops->suspend_again());

 Resume_devices:
    suspend_test_start();
    dpm_resume_end(PMSG_RESUME);
    suspend_test_finish("resume devices");
    resume_console();
 Close:
    if (suspend_ops->end)
        suspend_ops->end();
    trace_machine_suspend(PWR_EVENT_EXIT);
    return error;

 Recover_platform:
    if (suspend_ops->recover)
        suspend_ops->recover();
    goto Resume_devices;
}


static int suspend_enter(suspend_state_t state, bool *wakeup)
{
    int error;

    if (suspend_ops->prepare) {
        error = suspend_ops->prepare();
        if (error)
            goto Platform_finish;
    }

    error = dpm_suspend_end(PMSG_SUSPEND);
    if (error) {
        printk(KERN_ERR "PM: Some devices failed to power down\n");
        goto Platform_finish;
    }

    if (suspend_ops->prepare_late) {
        error = suspend_ops->prepare_late();
        if (error)
            goto Platform_wake;
    }

    if (suspend_test(TEST_PLATFORM))
        goto Platform_wake;

    error = disable_nonboot_cpus();  //disable nonboot cpu注意還有cpu需要下面disable的
    if (error || suspend_test(TEST_CPUS))
        goto Enable_cpus;

    arch_suspend_disable_irqs();
    BUG_ON(!irqs_disabled());

    error = syscore_suspend();
    if (!error) {
        *wakeup = pm_wakeup_pending();
        if (!(suspend_test(TEST_CORE) || *wakeup)) {
            error = suspend_ops->enter(state); //在這裡cpu會停止執行,直到中斷喚醒
//下面的全部是喚醒的操作了
            events_check_enabled = false;
        }
        syscore_resume();
    }

    arch_suspend_enable_irqs();
    BUG_ON(irqs_disabled());

 Enable_cpus:
    enable_nonboot_cpus();

 Platform_wake:
    if (suspend_ops->wake)
        suspend_ops->wake();

    dpm_resume_start(PMSG_RESUME);

 Platform_finish:
    if (suspend_ops->finish)
        suspend_ops->finish();

    return error;
}

平臺賦值,這裡debug平臺是基於msm8974的:
static const struct platform_suspend_ops lpm_suspend_ops = {
    .enter = lpm_suspend_enter,//在這裡面disbale cpu,停止執行程式
    .valid = suspend_valid_only_mem,
    .prepare_late = lpm_suspend_prepare,
    .wake = lpm_suspend_wake,
};


新增顯示wake_lock的dmesg:
<6>[   90.964850] CPU0: msm_cpu_pm_enter_sleep mode 000000,00000000,00000000,00000000,00000000,00000020,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000
<6>[   90.965302] Enabling non-boot CPUs ...
<6>[   90.968303] CPU3 is up
<6>[   90.970699] PM: noirq resume of devices complete after 2.382 msecs
<6>[   90.973460] PM: early resume of devices complete after 1.443 msecs
<6>[   91.097369] PM: resume of devices complete after 123.888 msecs
<6>[   91.112858] Restarting tasks ... done.
<6>[   91.141699] PM: suspend exit 1970-01-05 03:59:28.158266979 UTC
<6>[   91.141776] PM: suspend entry 1970-01-05 03:59:28.158347917 UTC
<6>[   91.141801] PM: Syncing filesystems ... done.
<6>[   91.222299] Freezing user space processes ... (elapsed 0.03 seconds) done.
<6>[   91.258126] Freezing remaining freezable tasks ... (elapsed 0.02 seconds) done.
<6>[   91.278279] Suspending console(s) (use no_console_suspend to debug)
<6>[   91.384933] PM: suspend of devices complete after 95.062 msecs
<6>[   91.397910] PM: late suspend of devices complete after 12.934 msecs
<6>[   91.413019] PM: noirq suspend of devices complete after 15.064 msecs
<6>[   91.413059] Disabling non-boot CPUs ...
<6>[   91.424477] CPU0: msm_cpu_pm_enter_sleep mode 000000,00000000,00000000,00000000,00000000,00000020,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000//這裡cpu停止執行直到中斷喚醒
<6>[   91.425144] Enabling non-boot CPUs ...
<6>[   91.432354] CPU3 is up
<6>[   91.444416] PM: noirq resume of devices complete after 11.997 msecs
<6>[   91.460948] PM: early resume of devices complete after 10.458 msecs
<6>[   91.577213] PM: resume of devices complete after 116.231 msecs
<6>[   91.584876] Restarting tasks ... done.
<6>[   91.614571] PM: suspend exit 1970-01-05 03:59:35.550203849 UTC
<6>[   91.614639] active wake lock rmt_storage_-1220268312
<6>[   91.689912] PM: suspend entry 1970-01-05 03:59:35.625550620 UTC
<6>[   91.689921] PM: Syncing filesystems ... done.
<6>[   91.700706] Freezing user space processes ...
<3>[   91.712870] Freezing of user space  aborted
<6>[   91.712898]
<6>[   91.712903] Restarting tasks ... done.
<6>[   91.720540] PM: suspend exit 1970-01-05 03:59:35.656175256 UTC
<6>[   91.720574] PM: suspend entry 1970-01-05 03:59:35.656214579 UTC
<6>[   91.720586] PM: Syncing filesystems ... done.
<6>[   91.801097] Freezing user space processes ...
<3>[   91.815050] Freezing of user space  aborted
<6>[   91.815075]
<6>[   91.815083] Restarting tasks ... done.
<6>[   91.823603] PM: suspend exit 1970-01-05 03:59:35.759241558 UTC
<6>[   91.823633] PM: suspend entry 1970-01-05 03:59:35.759272964 UTC
<6>[   91.823643] PM: Syncing filesystems ... done.
<6>[   91.911985] Freezing user space processes ... (elapsed 0.03 seconds) done.
<6>[   91.949378] Freezing remaining freezable tasks ... (elapsed 0.01 seconds) done.
<6>[   91.969089] Suspending console(s) (use no_console_suspend to debug)
<6>[   92.085594] PM: suspend of devices complete after 98.499 msecs
<6>[   92.098615] PM: late suspend of devices complete after 12.975 msecs
<6>[   92.113909] PM: noirq suspend of devices complete after 15.249 msecs
<6>[   92.113951] Disabling non-boot CPUs ...
<6>[   92.147320] CPU0: msm_cpu_pm_enter_sleep mode 000000,00000000,00000000,00000000,00000000,00000020,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000
<6>[   92.148265] Enabling non-boot CPUs ...
<6>[   92.155121] CPU1 is up
<6>[   92.163797] CPU2 is up
<6>[   92.174144] CPU3 is up
<6>[   92.186556] PM: noirq resume of devices complete after 12.344 msecs
<6>[   92.199945] PM: early resume of devices complete after 10.020 msecs
<6>[   92.317528] PM: resume of devices complete after 117.548 msecs
<6>[   92.326254] Restarting tasks ... done.
<6>[   92.342025] PM: suspend exit 1970-01-05 04:00:48.122436614 UTC
<6>[   92.342337] active wake lock qpnp-vadc-f611ac00
<6>[   92.342357] active wake lock alarm, time left 481

<6>[   92.411428] PM: suspend entry 1970-01-05 04:00:48.191838905 UTC
<6>[   92.411451] PM: Syncing filesystems ... done.
<6>[   92.419086] Freezing user space processes ...
<3>[   92.431173] Freezing of user space  aborted
<6>[   92.431213]
<6>[   92.431225] Restarting tasks ... done.
<6>[   92.441575] PM: suspend exit 1970-01-05 04:00:48.221987864 UTC
<6>[   92.441709] active wake lock qpnp-vadc-f611ac00
<6>[   92.445004] PM: suspend entry 1970-01-05 04:00:48.225417551 UTC
<6>[   92.445026] PM: Syncing filesystems ... done.
<6>[   92.525764] Freezing user space processes ...
<3>[   92.541465] Freezing of user space  aborted
<6>[   92.541523]
<6>[   92.541545] Restarting tasks ... done.
<6>[   92.558768] PM: suspend exit 1970-01-05 04:00:48.339166145 UTC
<6>[   92.558865] PM: suspend entry 1970-01-05 04:00:48.339268645 UTC
<6>[   92.558898] PM: Syncing filesystems ... done.
<6>[   92.656122] Freezing user space processes ... (elapsed 0.04 seconds) done.
<6>[   92.699091] Freezing remaining freezable tasks ... (elapsed 0.01 seconds) done.
<6>[   92.718891] Suspending console(s) (use no_console_suspend to debug)
<6>[   92.832084] PM: suspend of devices complete after 97.905 msecs
<6>[   92.845099] PM: late suspend of devices complete after 12.971 msecs
<6>[   92.860407] PM: noirq suspend of devices complete after 15.264 msecs
<6>[   92.860447] Disabling non-boot CPUs ...
<6>[   92.895114] CPU0: msm_cpu_pm_enter_sleep mode 000000,00000000,00000000,00000000,00000000,00000020,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000