1. 程式人生 > >linux 記憶體管理---bootmem(三)

linux 記憶體管理---bootmem(三)

為什麼要使用bootmem分配器,記憶體管理不是有buddy系統和slab分配器嗎?由於在系統初始化的時候需要執行一些記憶體管理,記憶體分配的任務,這個時候buddy系統,slab分配器等並沒有被初始化好,此時就引入了一種記憶體管理器bootmem分配器在系統初始化的時候進行記憶體管理與分配,當buddy系統和slab分配器初始化好後,在mem_init()中對bootmem分配器進行釋放,記憶體管理與分配由buddy系統,slab分配器等進行接管。

bootmem分配器使用一個bitmap來標記物理頁是否被佔用,分配的時候按照第一適應的原則,從bitmap中進行查詢,如果這位為1,表示已經被佔用,否則表示未被佔用。為什麼系統執行的時候不使用bootmem分配器呢?bootmem分配器每次在bitmap中進行線性搜尋,效率非常低,而且在記憶體的起始端留下許多小的空閒碎片,在需要非常大的記憶體塊的時候,檢查點陣圖這一過程就顯得代價很高。bootmem分配器是用於在啟動階段分配記憶體的,對該分配器的需求集中於簡單性方面,而不是效能和通用性。

memblock演算法是linux核心初始化階段的一個記憶體分配器,本質上是取代了原來的bootmem演算法. memblock實現比較簡單,而它的作用就是在page allocator初始化之前來管理記憶體,完成分配和釋放請求.

為了保證系統的相容性, 核心為bootmem和memblock提供了相同的API介面.

這樣在編譯Kernel的時候可以選擇nobootmem或者bootmem 來在buddy system起來之前管理memory. 這兩種機制對提供的API是一致的,因此對使用者是透明的

ifdef CONFIG_NO_BOOTMEM

obj-y += nobootmem.o

else

obj-y += bootmem.o

endif

由於介面是一致的, 那麼他們共同使用一份

Memblock是在早期引導過程中管理記憶體的方法之一,此時核心記憶體分配器還沒執行. Memblock以前被定義為Logical Memory Block( 邏輯記憶體塊), 但根據Yinghai Lu的補丁, 它被重新命名為memblock.

    +-------------------------------------------------------+

    |                                        外部模組申請記憶體                                   |

    +-------------------------------------------------------+

           |                                    |

           |                                    |

                    ↓                                                                    ↓                  

+------------------------+         +------------------------+

|        bootmem.c       |         |    nobootmem.c        |

|   __alloc_bootmem()     |                 |   __alloc_bootmem()     |

+------------------------+         +------------------------+

                                                                                            |

                                                                                            |

                                                                                            ↓

                                                                +-----------------------------------+

                                   |                 memblock.c             |

                                   | memblock_find_in_range_node()     |

                                                                +-----------------------------------+

這裡僅僅介紹bootmem。

前面一節《 linux 記憶體管理---實體記憶體探測(二)》記錄了實體記憶體的分佈,那麼之後就交由bootmem來管理了。

static void __init bootmem_init(void)

{

    unsigned long reserved_end;

    unsigned long mapstart = ~0UL;

    unsigned long bootmap_size;

    int i;

    /*

     * Init any data related to initrd. It's a nop if INITRD is

     * not selected. Once that done we can determine the low bound

     * of usable memory.

     */

    reserved_end = max(init_initrd(),

               (unsigned long) PFN_UP(__pa_symbol(&_end)));    //得到核心映像或者initrd佔用的最後一個頁框

    /*

     * max_low_pfn is not a number of pages. The number of pages

     * of the system is given by 'max_low_pfn - min_low_pfn'.

     */

    min_low_pfn = ~0UL;

    max_low_pfn = 0;

    /*

     * Find the highest page frame number we have available.

     */

    for (i = 0; i < boot_mem_map.nr_map; i++) {

        unsigned long start, end;

        if (boot_mem_map.map[i].type != BOOT_MEM_RAM)

            continue;

        start = PFN_UP(boot_mem_map.map[i].addr);

        end = PFN_DOWN(boot_mem_map.map[i].addr

                + boot_mem_map.map[i].size);

        if (end > max_low_pfn)

            max_low_pfn = end;

        if (start < min_low_pfn)

            min_low_pfn = start;

        if (end <= reserved_end)

            continue;

        if (start >= mapstart)

            continue;

        mapstart = max(reserved_end, start);      //得到mapstart的頁框,用於bootmem記錄分配的情況,mapstart就在核心映像後面的一個頁框

    }

    if (min_low_pfn >= max_low_pfn)

        panic("Incorrect memory mapping !!!");

    if (min_low_pfn > ARCH_PFN_OFFSET) {

        pr_info("Wasting %lu bytes for tracking %lu unused pages\n",

            (min_low_pfn - ARCH_PFN_OFFSET) * sizeof(struct page),

            min_low_pfn - ARCH_PFN_OFFSET);

    } else if (min_low_pfn < ARCH_PFN_OFFSET) {

        pr_info("%lu free pages won't be used\n",

            ARCH_PFN_OFFSET - min_low_pfn);

    }

    min_low_pfn = ARCH_PFN_OFFSET;   //#define ARCH_PFN_OFFSET        PFN_UP(PHYS_OFFSET)

    /*

     * Determine low and high memory ranges

     */

    max_pfn = max_low_pfn;

    if (max_low_pfn > PFN_DOWN(HIGHMEM_START)) {         //最大不超過0x20000000+768M

#ifdef CONFIG_HIGHMEM

        highstart_pfn = PFN_DOWN(HIGHMEM_START);

        highend_pfn = max_low_pfn;

#endif

        max_low_pfn = PFN_DOWN(HIGHMEM_START);

    }

    /*

     * Initialize the boot-time allocator with low memory only.

     */

    bootmap_size = init_bootmem_node(NODE_DATA(0), mapstart,

                     min_low_pfn, max_low_pfn);      //初始化bootmem, 最小頁框,最大頁框,包括中間的空洞

    ...

    /*

     * Register fully available low RAM pages with the bootmem allocator.

     */

    for (i = 0; i < boot_mem_map.nr_map; i++) {

        unsigned long start, end, size;

        start = PFN_UP(boot_mem_map.map[i].addr);

        end   = PFN_DOWN(boot_mem_map.map[i].addr

                    + boot_mem_map.map[i].size);

        /*

         * Reserve usable memory.

         */

        switch (boot_mem_map.map[i].type) {

        case BOOT_MEM_RAM:

            break;

        case BOOT_MEM_INIT_RAM:

            memory_present(0, start, end);

            continue;

        default:

            /* Not usable memory */

            continue;

        }

        /*

         * We are rounding up the start address of usable memory

         * and at the end of the usable range downwards.

         */

        if (start >= max_low_pfn)

            continue;

        if (start < reserved_end)   //從核心映像最後一個頁框開始標記為可用

            start = reserved_end;

        if (end > max_low_pfn)

            end = max_low_pfn;

        /*

         * ... finally, is the area going away?

         */

        if (end <= start)

            continue;

        size = end - start;

        /* Register lowmem ranges */

#ifdef CONFIG_BRCMSTB

        /* carve out space for bmem */

        brcm_free_bootmem(PFN_PHYS(start), size << PAGE_SHIFT);   //剔除bmem記憶體,bmem記憶體是保留給裝置DMA用的

#else

        free_bootmem(PFN_PHYS(start), size << PAGE_SHIFT);   //標記核心映像結束的頁框到連續頁框最後一個頁框之間的頁框為free可用頁框

#endif

    }

    /*

     * Reserve the bootmap memory.

     */

    reserve_bootmem(PFN_PHYS(mapstart), bootmap_size, BOOTMEM_DEFAULT);  //標記bootmem用於分配標記佔用的頁為保留

    /*

     * Reserve initrd memory if needed.

     */

    finalize_initrd();   //標記initrd佔用的頁為保留

    /*

     * Call memory_present() on all valid ranges, for SPARSEMEM.

     * This must be done after setting up bootmem, since memory_present()

     * may allocate bootmem.

     */

    for (i = 0; i < boot_mem_map.nr_map; i++) {

        unsigned long start, end;

        if (boot_mem_map.map[i].type != BOOT_MEM_RAM)

            continue;

        start = PFN_UP(boot_mem_map.map[i].addr);

        end   = PFN_DOWN(boot_mem_map.map[i].addr

                    + boot_mem_map.map[i].size);

        memory_present(0, start, end);     //主要是實體記憶體空洞,對於mips,低256M為DRAM,接著256M為register,接著768M為DRAM,所以對於系統記憶體大於256M,就肯定有記憶體空洞了

    }

}

unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn,

                unsigned long startpfn, unsigned long endpfn)

{

    return init_bootmem_core(pgdat->bdata, freepfn, startpfn, endpfn);

}

在include/asm-mips/mach-generic/spaces.h中:

#ifndef PHYS_OFFSET

#define PHYS_OFFSET  _AC(0, UL)

#endif

#ifdef CONFIG_32BIT

#define CAC_BASE  _AC(0x80000000, UL)

#endif

#define BRCM_MAX_UPPER_MB    _AC(768, UL)

#define UPPERMEM_START        _AC(0x20000000, UL)

#define HIGHMEM_START        (UPPERMEM_START + (BRCM_MAX_UPPER_MB << 20))

#ifndef PAGE_OFFSET

#define PAGE_OFFSET  (CAC_BASE + PHYS_OFFSET)

#endif

static unsigned long __init init_bootmem_core(bootmem_data_t *bdata,

    unsigned long mapstart, unsigned long start, unsigned long end)

{

    unsigned long mapsize;

    mminit_validate_memmodel_limits(&start, &end);

    bdata->node_bootmem_map = phys_to_virt(PFN_PHYS(mapstart));    //記錄分配標誌

    bdata->node_min_pfn = start;

    bdata->node_low_pfn = end;

    link_bootmem(bdata);

    /*

     * Initially all pages are reserved - setup_arch() has to

     * register free RAM areas explicitly.

     */

    mapsize = bootmap_bytes(end - start);   //需要多少個byte來記錄

    memset(bdata->node_bootmem_map, 0xff, mapsize);     

    bdebug("nid=%td start=%lx map=%lx end=%lx mapsize=%lx\n",

        bdata - bootmem_node_data, start, mapstart, end, mapsize);

    return mapsize;

}

static unsigned long __init bootmap_bytes(unsigned long pages)

{

    unsigned long bytes = DIV_ROUND_UP(pages, 8);

    return ALIGN(bytes, sizeof(long));

}

一個byte有8bit,每個bit可用來記錄一個頁是否分配或釋放,非0表示頁可用,因此一個byte可用記錄8個頁。

void __init free_bootmem(unsigned long addr, unsigned long size)

{

    unsigned long start, end;

    kmemleak_free_part(__va(addr), size);

    start = PFN_UP(addr);

    end = PFN_DOWN(addr + size);

    mark_bootmem(start, end, 0, 0);

}

static int __init mark_bootmem(unsigned long start, unsigned long end,

                int reserve, int flags)

{

    unsigned long pos;

    bootmem_data_t *bdata;

    pos = start;

    list_for_each_entry(bdata, &bdata_list, list) {

        int err;

        unsigned long max;

        if (pos < bdata->node_min_pfn ||

            pos >= bdata->node_low_pfn) {

            BUG_ON(pos != start);

            continue;

        }

        max = min(bdata->node_low_pfn, end);

        err = mark_bootmem_node(bdata, pos, max, reserve, flags);

        if (reserve && err) {

            mark_bootmem(start, pos, 0, 0);

            return err;

        }

        if (max == end)

            return 0;

        pos = bdata->node_low_pfn;

    }

    BUG();

}

static int __init mark_bootmem_node(bootmem_data_t *bdata,

                unsigned long start, unsigned long end,

                int reserve, int flags)

{

    unsigned long sidx, eidx;

    sidx = start - bdata->node_min_pfn;

    eidx = end - bdata->node_min_pfn;

    if (reserve)

        return __reserve(bdata, sidx, eidx, flags);

    else

        __free(bdata, sidx, eidx);

    return 0;

}

static void __init __free(bootmem_data_t *bdata,

            unsigned long sidx, unsigned long eidx)

{

    unsigned long idx;

    ...

    if (bdata->hint_idx > sidx)

        bdata->hint_idx = sidx;

    for (idx = sidx; idx < eidx; idx++)

        if (!test_and_clear_bit(idx, bdata->node_bootmem_map))

            BUG();

}

static int __init __reserve(bootmem_data_t *bdata, unsigned long sidx,

            unsigned long eidx, int flags)

{

    unsigned long idx;

    int exclusive = flags & BOOTMEM_EXCLUSIVE;

    for (idx = sidx; idx < eidx; idx++)

        if (test_and_set_bit(idx, bdata->node_bootmem_map)) {

            if (exclusive) {   //如果是互斥的,頁框已經為1,再設定為reserve

                __free(bdata, sidx, idx);

                return -EBUSY;

            }

            bdebug("silent double reserve of PFN %lx\n",

                idx + bdata->node_min_pfn);

        }

    return 0;

}

呼叫bootmem_init()函式之後bootmem就初始化完成了,當然可能有人會問在bootmem初始化之前核心要分配記憶體怎麼辦,而且在bootmem初始化過程中要用到記憶體哪裡來?這就是一個先有雞還是先有蛋的問題,核心採取的辦法是在bootmem可用之前包括bootmem的初始化,核心的一切記憶體需要都採用靜態記憶體,即全域性變數的形式,比如bootmem的初始化過程中:

NODE_DATA(0) 巨集展開為:

#define NODE_DATA(nid)        (&contig_page_data)

struct pglist_data __refdata contig_page_data = {

    .bdata = &bootmem_node_data[0]

};

contig_page_data就是定義為一個全域性結構體變數,其中bdata為它的成員變數指標,直接指向另外一個全域性變數:

bootmem_data_t bootmem_node_data[MAX_NUMNODES] __initdata;

bootmem初始化完成後就可以通過下列函式分配記憶體了:

#define alloc_bootmem(x) \

    __alloc_bootmem(x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT)

#define alloc_bootmem_align(x, align) \

    __alloc_bootmem(x, align, BOOTMEM_LOW_LIMIT)

#define alloc_bootmem_nopanic(x) \

    __alloc_bootmem_nopanic(x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT)

#define alloc_bootmem_pages(x) \

    __alloc_bootmem(x, PAGE_SIZE, BOOTMEM_LOW_LIMIT)

#define alloc_bootmem_pages_nopanic(x) \

    __alloc_bootmem_nopanic(x, PAGE_SIZE, BOOTMEM_LOW_LIMIT)

#define alloc_bootmem_node(pgdat, x) \

    __alloc_bootmem_node(pgdat, x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT)

#define alloc_bootmem_node_nopanic(pgdat, x) \

    __alloc_bootmem_node_nopanic(pgdat, x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT)

#define alloc_bootmem_pages_node(pgdat, x) \

    __alloc_bootmem_node(pgdat, x, PAGE_SIZE, BOOTMEM_LOW_LIMIT)

#define alloc_bootmem_pages_node_nopanic(pgdat, x) \

    __alloc_bootmem_node_nopanic(pgdat, x, PAGE_SIZE, BOOTMEM_LOW_LIMIT)

下面簡單進行說明:

static void * __init alloc_bootmem_core(struct bootmem_data *bdata,

                    unsigned long size, unsigned long align,

                    unsigned long goal, unsigned long limit)

{

    unsigned long fallback = 0;

    unsigned long min, max, start, sidx, midx, step;

    ...

    min = bdata->node_min_pfn;

    max = bdata->node_low_pfn;

    goal >>= PAGE_SHIFT;

    limit >>= PAGE_SHIFT;

    if (limit && max > limit)

        max = limit;

    if (max <= min)

        return NULL;

    step = max(align >> PAGE_SHIFT, 1UL);

    if (goal && min < goal && goal < max)

        start = ALIGN(goal, step);

    else

        start = ALIGN(min, step);

    sidx = start - bdata->node_min_pfn;

    midx = max - bdata->node_min_pfn;

    if (bdata->hint_idx > sidx) {

        /*

         * Handle the valid case of sidx being zero and still

         * catch the fallback below.

         */

        fallback = sidx + 1;

        sidx = align_idx(bdata, bdata->hint_idx, step);

    }

    while (1) {

        int merge;

        void *region;

        unsigned long eidx, i, start_off, end_off;

find_block:

        sidx = find_next_zero_bit(bdata->node_bootmem_map, midx, sidx);      //查詢滿足要求的起始頁框

        sidx = align_idx(bdata, sidx, step);

        eidx = sidx + PFN_UP(size);

        if (sidx >= midx || eidx > midx)

            break;

        for (i = sidx; i < eidx; i++)

            if (test_bit(i, bdata->node_bootmem_map)) {

                sidx = align_idx(bdata, i, step);

                if (sidx == i)

                    sidx += step;

                goto find_block;

            }

        if (bdata->last_end_off & (PAGE_SIZE - 1) &&

                PFN_DOWN(bdata->last_end_off) + 1 == sidx)

            start_off = align_off(bdata, bdata->last_end_off, align);

        else

            start_off = PFN_PHYS(sidx);

        merge = PFN_DOWN(start_off) < sidx;

        end_off = start_off + size;

        bdata->last_end_off = end_off;

        bdata->hint_idx = PFN_UP(end_off);

        /*

         * Reserve the area now:

         */

        if (__reserve(bdata, PFN_DOWN(start_off) + merge,

                PFN_UP(end_off), BOOTMEM_EXCLUSIVE))         //將分配後的頁框設定為保留

            BUG();

        region = phys_to_virt(PFN_PHYS(bdata->node_min_pfn) +

                start_off);

        memset(region, 0, size);

        /*

         * The min_count is set to 0 so that bootmem allocated blocks

         * are never reported as leaks.

         */

        kmemleak_alloc(region, size, 0, 0);

        return region;

    }

    if (fallback) {

        sidx = align_idx(bdata, fallback - 1, step);

        fallback = 0;

        goto find_block;

    }

    return NULL;

}

參考文件:

--------------------- 本文來自 淡泊的豬 的CSDN 部落格 ,全文地址請點選:https://blog.csdn.net/whuzm08/article/details/80135358?utm_source=copy