linux 記憶體管理---bootmem(三)
為什麼要使用bootmem分配器,記憶體管理不是有buddy系統和slab分配器嗎?由於在系統初始化的時候需要執行一些記憶體管理,記憶體分配的任務,這個時候buddy系統,slab分配器等並沒有被初始化好,此時就引入了一種記憶體管理器bootmem分配器在系統初始化的時候進行記憶體管理與分配,當buddy系統和slab分配器初始化好後,在mem_init()中對bootmem分配器進行釋放,記憶體管理與分配由buddy系統,slab分配器等進行接管。
bootmem分配器使用一個bitmap來標記物理頁是否被佔用,分配的時候按照第一適應的原則,從bitmap中進行查詢,如果這位為1,表示已經被佔用,否則表示未被佔用。為什麼系統執行的時候不使用bootmem分配器呢?bootmem分配器每次在bitmap中進行線性搜尋,效率非常低,而且在記憶體的起始端留下許多小的空閒碎片,在需要非常大的記憶體塊的時候,檢查點陣圖這一過程就顯得代價很高。bootmem分配器是用於在啟動階段分配記憶體的,對該分配器的需求集中於簡單性方面,而不是效能和通用性。
memblock演算法是linux核心初始化階段的一個記憶體分配器,本質上是取代了原來的bootmem演算法. memblock實現比較簡單,而它的作用就是在page allocator初始化之前來管理記憶體,完成分配和釋放請求.
為了保證系統的相容性, 核心為bootmem和memblock提供了相同的API介面.
這樣在編譯Kernel的時候可以選擇nobootmem或者bootmem 來在buddy system起來之前管理memory. 這兩種機制對提供的API是一致的,因此對使用者是透明的
ifdef CONFIG_NO_BOOTMEM
obj-y += nobootmem.o
else
obj-y += bootmem.o
endif
由於介面是一致的, 那麼他們共同使用一份
Memblock是在早期引導過程中管理記憶體的方法之一,此時核心記憶體分配器還沒執行. Memblock以前被定義為Logical Memory Block( 邏輯記憶體塊), 但根據Yinghai Lu的補丁, 它被重新命名為memblock.
+-------------------------------------------------------+
| 外部模組申請記憶體 |
+-------------------------------------------------------+
| |
| |
↓ ↓
+------------------------+ +------------------------+
| bootmem.c | | nobootmem.c |
| __alloc_bootmem() | | __alloc_bootmem() |
+------------------------+ +------------------------+
|
|
↓
+-----------------------------------+
| memblock.c |
| memblock_find_in_range_node() |
+-----------------------------------+
這裡僅僅介紹bootmem。
前面一節《 linux 記憶體管理---實體記憶體探測(二)》記錄了實體記憶體的分佈,那麼之後就交由bootmem來管理了。
static void __init bootmem_init(void)
{
unsigned long reserved_end;
unsigned long mapstart = ~0UL;
unsigned long bootmap_size;
int i;
/*
* Init any data related to initrd. It's a nop if INITRD is
* not selected. Once that done we can determine the low bound
* of usable memory.
*/
reserved_end = max(init_initrd(),
(unsigned long) PFN_UP(__pa_symbol(&_end))); //得到核心映像或者initrd佔用的最後一個頁框
/*
* max_low_pfn is not a number of pages. The number of pages
* of the system is given by 'max_low_pfn - min_low_pfn'.
*/
min_low_pfn = ~0UL;
max_low_pfn = 0;
/*
* Find the highest page frame number we have available.
*/
for (i = 0; i < boot_mem_map.nr_map; i++) {
unsigned long start, end;
if (boot_mem_map.map[i].type != BOOT_MEM_RAM)
continue;
start = PFN_UP(boot_mem_map.map[i].addr);
end = PFN_DOWN(boot_mem_map.map[i].addr
+ boot_mem_map.map[i].size);
if (end > max_low_pfn)
max_low_pfn = end;
if (start < min_low_pfn)
min_low_pfn = start;
if (end <= reserved_end)
continue;
if (start >= mapstart)
continue;
mapstart = max(reserved_end, start); //得到mapstart的頁框,用於bootmem記錄分配的情況,mapstart就在核心映像後面的一個頁框
}
if (min_low_pfn >= max_low_pfn)
panic("Incorrect memory mapping !!!");
if (min_low_pfn > ARCH_PFN_OFFSET) {
pr_info("Wasting %lu bytes for tracking %lu unused pages\n",
(min_low_pfn - ARCH_PFN_OFFSET) * sizeof(struct page),
min_low_pfn - ARCH_PFN_OFFSET);
} else if (min_low_pfn < ARCH_PFN_OFFSET) {
pr_info("%lu free pages won't be used\n",
ARCH_PFN_OFFSET - min_low_pfn);
}
min_low_pfn = ARCH_PFN_OFFSET; //#define ARCH_PFN_OFFSET PFN_UP(PHYS_OFFSET)
/*
* Determine low and high memory ranges
*/
max_pfn = max_low_pfn;
if (max_low_pfn > PFN_DOWN(HIGHMEM_START)) { //最大不超過0x20000000+768M
#ifdef CONFIG_HIGHMEM
highstart_pfn = PFN_DOWN(HIGHMEM_START);
highend_pfn = max_low_pfn;
#endif
max_low_pfn = PFN_DOWN(HIGHMEM_START);
}
/*
* Initialize the boot-time allocator with low memory only.
*/
bootmap_size = init_bootmem_node(NODE_DATA(0), mapstart,
min_low_pfn, max_low_pfn); //初始化bootmem, 最小頁框,最大頁框,包括中間的空洞
...
/*
* Register fully available low RAM pages with the bootmem allocator.
*/
for (i = 0; i < boot_mem_map.nr_map; i++) {
unsigned long start, end, size;
start = PFN_UP(boot_mem_map.map[i].addr);
end = PFN_DOWN(boot_mem_map.map[i].addr
+ boot_mem_map.map[i].size);
/*
* Reserve usable memory.
*/
switch (boot_mem_map.map[i].type) {
case BOOT_MEM_RAM:
break;
case BOOT_MEM_INIT_RAM:
memory_present(0, start, end);
continue;
default:
/* Not usable memory */
continue;
}
/*
* We are rounding up the start address of usable memory
* and at the end of the usable range downwards.
*/
if (start >= max_low_pfn)
continue;
if (start < reserved_end) //從核心映像最後一個頁框開始標記為可用
start = reserved_end;
if (end > max_low_pfn)
end = max_low_pfn;
/*
* ... finally, is the area going away?
*/
if (end <= start)
continue;
size = end - start;
/* Register lowmem ranges */
#ifdef CONFIG_BRCMSTB
/* carve out space for bmem */
brcm_free_bootmem(PFN_PHYS(start), size << PAGE_SHIFT); //剔除bmem記憶體,bmem記憶體是保留給裝置DMA用的
#else
free_bootmem(PFN_PHYS(start), size << PAGE_SHIFT); //標記核心映像結束的頁框到連續頁框最後一個頁框之間的頁框為free可用頁框
#endif
}
/*
* Reserve the bootmap memory.
*/
reserve_bootmem(PFN_PHYS(mapstart), bootmap_size, BOOTMEM_DEFAULT); //標記bootmem用於分配標記佔用的頁為保留
/*
* Reserve initrd memory if needed.
*/
finalize_initrd(); //標記initrd佔用的頁為保留
/*
* Call memory_present() on all valid ranges, for SPARSEMEM.
* This must be done after setting up bootmem, since memory_present()
* may allocate bootmem.
*/
for (i = 0; i < boot_mem_map.nr_map; i++) {
unsigned long start, end;
if (boot_mem_map.map[i].type != BOOT_MEM_RAM)
continue;
start = PFN_UP(boot_mem_map.map[i].addr);
end = PFN_DOWN(boot_mem_map.map[i].addr
+ boot_mem_map.map[i].size);
memory_present(0, start, end); //主要是實體記憶體空洞,對於mips,低256M為DRAM,接著256M為register,接著768M為DRAM,所以對於系統記憶體大於256M,就肯定有記憶體空洞了
}
}
unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn,
unsigned long startpfn, unsigned long endpfn)
{
return init_bootmem_core(pgdat->bdata, freepfn, startpfn, endpfn);
}
在include/asm-mips/mach-generic/spaces.h中:
#ifndef PHYS_OFFSET
#define PHYS_OFFSET _AC(0, UL)
#endif
#ifdef CONFIG_32BIT
#define CAC_BASE _AC(0x80000000, UL)
#endif
#define BRCM_MAX_UPPER_MB _AC(768, UL)
#define UPPERMEM_START _AC(0x20000000, UL)
#define HIGHMEM_START (UPPERMEM_START + (BRCM_MAX_UPPER_MB << 20))
#ifndef PAGE_OFFSET
#define PAGE_OFFSET (CAC_BASE + PHYS_OFFSET)
#endif
static unsigned long __init init_bootmem_core(bootmem_data_t *bdata,
unsigned long mapstart, unsigned long start, unsigned long end)
{
unsigned long mapsize;
mminit_validate_memmodel_limits(&start, &end);
bdata->node_bootmem_map = phys_to_virt(PFN_PHYS(mapstart)); //記錄分配標誌
bdata->node_min_pfn = start;
bdata->node_low_pfn = end;
link_bootmem(bdata);
/*
* Initially all pages are reserved - setup_arch() has to
* register free RAM areas explicitly.
*/
mapsize = bootmap_bytes(end - start); //需要多少個byte來記錄
memset(bdata->node_bootmem_map, 0xff, mapsize);
bdebug("nid=%td start=%lx map=%lx end=%lx mapsize=%lx\n",
bdata - bootmem_node_data, start, mapstart, end, mapsize);
return mapsize;
}
static unsigned long __init bootmap_bytes(unsigned long pages)
{
unsigned long bytes = DIV_ROUND_UP(pages, 8);
return ALIGN(bytes, sizeof(long));
}
一個byte有8bit,每個bit可用來記錄一個頁是否分配或釋放,非0表示頁可用,因此一個byte可用記錄8個頁。
void __init free_bootmem(unsigned long addr, unsigned long size)
{
unsigned long start, end;
kmemleak_free_part(__va(addr), size);
start = PFN_UP(addr);
end = PFN_DOWN(addr + size);
mark_bootmem(start, end, 0, 0);
}
static int __init mark_bootmem(unsigned long start, unsigned long end,
int reserve, int flags)
{
unsigned long pos;
bootmem_data_t *bdata;
pos = start;
list_for_each_entry(bdata, &bdata_list, list) {
int err;
unsigned long max;
if (pos < bdata->node_min_pfn ||
pos >= bdata->node_low_pfn) {
BUG_ON(pos != start);
continue;
}
max = min(bdata->node_low_pfn, end);
err = mark_bootmem_node(bdata, pos, max, reserve, flags);
if (reserve && err) {
mark_bootmem(start, pos, 0, 0);
return err;
}
if (max == end)
return 0;
pos = bdata->node_low_pfn;
}
BUG();
}
static int __init mark_bootmem_node(bootmem_data_t *bdata,
unsigned long start, unsigned long end,
int reserve, int flags)
{
unsigned long sidx, eidx;
sidx = start - bdata->node_min_pfn;
eidx = end - bdata->node_min_pfn;
if (reserve)
return __reserve(bdata, sidx, eidx, flags);
else
__free(bdata, sidx, eidx);
return 0;
}
static void __init __free(bootmem_data_t *bdata,
unsigned long sidx, unsigned long eidx)
{
unsigned long idx;
...
if (bdata->hint_idx > sidx)
bdata->hint_idx = sidx;
for (idx = sidx; idx < eidx; idx++)
if (!test_and_clear_bit(idx, bdata->node_bootmem_map))
BUG();
}
static int __init __reserve(bootmem_data_t *bdata, unsigned long sidx,
unsigned long eidx, int flags)
{
unsigned long idx;
int exclusive = flags & BOOTMEM_EXCLUSIVE;
for (idx = sidx; idx < eidx; idx++)
if (test_and_set_bit(idx, bdata->node_bootmem_map)) {
if (exclusive) { //如果是互斥的,頁框已經為1,再設定為reserve
__free(bdata, sidx, idx);
return -EBUSY;
}
bdebug("silent double reserve of PFN %lx\n",
idx + bdata->node_min_pfn);
}
return 0;
}
呼叫bootmem_init()函式之後bootmem就初始化完成了,當然可能有人會問在bootmem初始化之前核心要分配記憶體怎麼辦,而且在bootmem初始化過程中要用到記憶體哪裡來?這就是一個先有雞還是先有蛋的問題,核心採取的辦法是在bootmem可用之前包括bootmem的初始化,核心的一切記憶體需要都採用靜態記憶體,即全域性變數的形式,比如bootmem的初始化過程中:
NODE_DATA(0) 巨集展開為:
#define NODE_DATA(nid) (&contig_page_data)
struct pglist_data __refdata contig_page_data = {
.bdata = &bootmem_node_data[0]
};
contig_page_data就是定義為一個全域性結構體變數,其中bdata為它的成員變數指標,直接指向另外一個全域性變數:
bootmem_data_t bootmem_node_data[MAX_NUMNODES] __initdata;
bootmem初始化完成後就可以通過下列函式分配記憶體了:
#define alloc_bootmem(x) \
__alloc_bootmem(x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT)
#define alloc_bootmem_align(x, align) \
__alloc_bootmem(x, align, BOOTMEM_LOW_LIMIT)
#define alloc_bootmem_nopanic(x) \
__alloc_bootmem_nopanic(x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT)
#define alloc_bootmem_pages(x) \
__alloc_bootmem(x, PAGE_SIZE, BOOTMEM_LOW_LIMIT)
#define alloc_bootmem_pages_nopanic(x) \
__alloc_bootmem_nopanic(x, PAGE_SIZE, BOOTMEM_LOW_LIMIT)
#define alloc_bootmem_node(pgdat, x) \
__alloc_bootmem_node(pgdat, x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT)
#define alloc_bootmem_node_nopanic(pgdat, x) \
__alloc_bootmem_node_nopanic(pgdat, x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT)
#define alloc_bootmem_pages_node(pgdat, x) \
__alloc_bootmem_node(pgdat, x, PAGE_SIZE, BOOTMEM_LOW_LIMIT)
#define alloc_bootmem_pages_node_nopanic(pgdat, x) \
__alloc_bootmem_node_nopanic(pgdat, x, PAGE_SIZE, BOOTMEM_LOW_LIMIT)
下面簡單進行說明:
static void * __init alloc_bootmem_core(struct bootmem_data *bdata,
unsigned long size, unsigned long align,
unsigned long goal, unsigned long limit)
{
unsigned long fallback = 0;
unsigned long min, max, start, sidx, midx, step;
...
min = bdata->node_min_pfn;
max = bdata->node_low_pfn;
goal >>= PAGE_SHIFT;
limit >>= PAGE_SHIFT;
if (limit && max > limit)
max = limit;
if (max <= min)
return NULL;
step = max(align >> PAGE_SHIFT, 1UL);
if (goal && min < goal && goal < max)
start = ALIGN(goal, step);
else
start = ALIGN(min, step);
sidx = start - bdata->node_min_pfn;
midx = max - bdata->node_min_pfn;
if (bdata->hint_idx > sidx) {
/*
* Handle the valid case of sidx being zero and still
* catch the fallback below.
*/
fallback = sidx + 1;
sidx = align_idx(bdata, bdata->hint_idx, step);
}
while (1) {
int merge;
void *region;
unsigned long eidx, i, start_off, end_off;
find_block:
sidx = find_next_zero_bit(bdata->node_bootmem_map, midx, sidx); //查詢滿足要求的起始頁框
sidx = align_idx(bdata, sidx, step);
eidx = sidx + PFN_UP(size);
if (sidx >= midx || eidx > midx)
break;
for (i = sidx; i < eidx; i++)
if (test_bit(i, bdata->node_bootmem_map)) {
sidx = align_idx(bdata, i, step);
if (sidx == i)
sidx += step;
goto find_block;
}
if (bdata->last_end_off & (PAGE_SIZE - 1) &&
PFN_DOWN(bdata->last_end_off) + 1 == sidx)
start_off = align_off(bdata, bdata->last_end_off, align);
else
start_off = PFN_PHYS(sidx);
merge = PFN_DOWN(start_off) < sidx;
end_off = start_off + size;
bdata->last_end_off = end_off;
bdata->hint_idx = PFN_UP(end_off);
/*
* Reserve the area now:
*/
if (__reserve(bdata, PFN_DOWN(start_off) + merge,
PFN_UP(end_off), BOOTMEM_EXCLUSIVE)) //將分配後的頁框設定為保留
BUG();
region = phys_to_virt(PFN_PHYS(bdata->node_min_pfn) +
start_off);
memset(region, 0, size);
/*
* The min_count is set to 0 so that bootmem allocated blocks
* are never reported as leaks.
*/
kmemleak_alloc(region, size, 0, 0);
return region;
}
if (fallback) {
sidx = align_idx(bdata, fallback - 1, step);
fallback = 0;
goto find_block;
}
return NULL;
}
參考文件:
--------------------- 本文來自 淡泊的豬 的CSDN 部落格 ,全文地址請點選:https://blog.csdn.net/whuzm08/article/details/80135358?utm_source=copy