Linux 記憶體管理之vmalloc實現
vmalloc最小分配一個page.並且分配到的頁面不保證是連續的.因為vmalloc內部呼叫alloc_page多次分配單個頁面.
vmalloc主要內容:
1. 從VMALLOC_START到VMALLOC_END查詢空閒的虛擬地址空間(hole)
2.根據分配的size,呼叫alloc_page依次分配單個頁面.
3. 把分配的單個頁面,對映到第一步中找到的連續的虛擬地址。
1. 查詢空閒的虛擬地址空間(hole)
關鍵資料結構
struct vm_struct {
struct vm_struct *next;
void *addr;//起始虛擬地址
unsigned long size;//分配記憶體大小
unsigned long flags;
struct page **pages;//分配的物理頁面
unsigned int nr_pages;//頁面數量
phys_addr_t phys_addr;//起始實體地址
const void *caller;
};
struct vmap_area {
unsigned long va_start;//起始虛擬地址
unsigned long va_end;//結束虛擬地址
unsigned long flags;
struct rb_node rb_node; /* 掛接到vmap_area_root紅黑樹 */
struct list_head list; /* 掛接到vmap_area_list連結串列 */
struct list_head purge_list; /* "lazy purge" list */
struct vm_struct *vm;
struct rcu_head rcu_head;
};
__vmalloc->__vmalloc_node->__vmalloc_node_range->__get_vm_area_node->alloc_vmap_area
static struct vmap_area *alloc_vmap_area(unsigned long size, unsigned long align, unsigned long vstart, unsigned long vend, int node, gfp_t gfp_mask) { struct vmap_area *va; struct rb_node *n; unsigned long addr; int purged = 0; struct vmap_area *first; va = kmalloc_node(sizeof(struct vmap_area), gfp_mask & GFP_RECLAIM_MASK, node); if (unlikely(!va)) return ERR_PTR(-ENOMEM); retry: spin_lock(&vmap_area_lock); /* find starting point for our search */ addr = ALIGN(vstart, align); if (addr + size < addr) goto overflow; n = vmap_area_root.rb_node; first = NULL; /*在紅黑樹中,找到一個離vstart最近的虛擬地址區域, */ while (n) { struct vmap_area *tmp; tmp = rb_entry(n, struct vmap_area, rb_node); if (tmp->va_end >= addr) { first = tmp; if (tmp->va_start <= addr) break; n = n->rb_left; } else n = n->rb_right; } if (!first) goto found; /* 從最小的地址開始查詢可以用的區間(hole),找到一個addr+size<first->va_start區域 */ while (addr + size > first->va_start && addr + size <= vend) { addr = ALIGN(first->va_end, align); if (addr + size < addr) goto overflow; if (list_is_last(&first->list, &vmap_area_list)) goto found; /*遍歷下一個區域 */ first = list_entry(first->list.next, struct vmap_area, list); } found: if (addr + size > vend) goto overflow; /*記錄其實虛擬地址和大小 */ va->va_start = addr; va->va_end = addr + size; va->flags = 0; /*把這個區域插入紅黑樹 */ __insert_vmap_area(va); free_vmap_cache = &va->rb_node; spin_unlock(&vmap_area_lock); return va; }
2.分配頁面和頁表對映
__vmalloc->__vmalloc_node->__vmalloc_node_range->__vmalloc_area_node
static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
pgprot_t prot, int node)
{
const int order = 0;
struct page **pages;
unsigned int nr_pages, array_size, i;
const gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
const gfp_t alloc_mask = gfp_mask | __GFP_NOWARN;
/*計算物理頁面數量 */
nr_pages = get_vm_area_size(area) >> PAGE_SHIFT;
array_size = (nr_pages * sizeof(struct page *));
area->nr_pages = nr_pages;
/*分配page記憶體 */
/* Please note that the recursion is strictly bounded. */
if (array_size > PAGE_SIZE) {
pages = __vmalloc_node(array_size, 1, nested_gfp|__GFP_HIGHMEM,
PAGE_KERNEL, node, area->caller);
area->flags |= VM_VPAGES;
} else {
pages = kmalloc_node(array_size, nested_gfp, node);
}
area->pages = pages;
for (i = 0; i < area->nr_pages; i++) {
struct page *page;
/*呼叫alloc_page分配單個頁面 */
if (node == NUMA_NO_NODE)
page = alloc_page(alloc_mask);
else
page = alloc_pages_node(node, alloc_mask, order);
area->pages[i] = page;
if (gfpflags_allow_blocking(gfp_mask))
cond_resched();
}
/*建立頁表,把分配的物理頁面,對映到對應的虛擬地址, PGD->PMD->PTE,頁表對映已經分析過,這裡不在分析*/
if (map_vm_area(area, prot, pages))
goto fail;
return area->addr;
}