1. 程式人生 > >kernel常用函式、巨集、結構體

kernel常用函式、巨集、結構體

1 __setup

  在include/linux/init.h檔案中定義

#define __setup_param(str, unique_id, fn, early)            \
    static const char __setup_str_##unique_id[] __initconst \
        __aligned(1) = str; \
    static struct obs_kernel_param __setup_##unique_id  \
        __used __section(.init.setup)           \
        __attribute__((aligned((sizeof
(long))))) \ = { __setup_str_##unique_id, fn, early } #define __setup(str, fn) \ __setup_param(str, fn, fn, 0)

  __setup在kernel啟動時用來讀取、解析cmdline。str可以當成一個變數,fn是用來處理str變數的函式
  ldb.c kernel_imx\drivers\video\mxc檔案使用到這個巨集

static int __init ldb_setup(char *options)
{
    if
(!strcmp(options, "spl0")) g_ldb_mode = LDB_SPL_DI0; else if (!strcmp(options, "spl1")) g_ldb_mode = LDB_SPL_DI1; else if (!strcmp(options, "dul0")) g_ldb_mode = LDB_DUL_DI0; else if (!strcmp(options, "dul1")) g_ldb_mode = LDB_DUL_DI1; else if (!strcmp(options, "sin0"
)) g_ldb_mode = LDB_SIN0; else if (!strcmp(options, "sin1")) g_ldb_mode = LDB_SIN1; else if (!strcmp(options, "sep0")) g_ldb_mode = LDB_SEP0; else if (!strcmp(options, "sep1")) g_ldb_mode = LDB_SEP1; return 1; } __setup("ldb=", ldb_setup);

  當前使用的cmdline部分內容為 

bootargs=console=ttymxc0,115200 androidboot.console=ttymxc0 vmalloc=400M init=/init video=mxcfb0:dev=ldb,LDB-1080P60,if=RGB24,bpp=32 ldb=spl0

  ldb=spl0,那麼ldb_setup的引數就是spl0。巨集的展開

__setup("ldb=", ldb_setup);
__setup_param("ldb=", ldb_setup, ldb_setup, 0) //定義兩個變數
static const char __setup_str_ldb_setup[] __initconst __aligned(1) = "ldb=";  //字串陣列
static struct obs_kernel_param __setup_ldb_setup __used __section(.init.setup) //結構體        __attribute__((aligned((sizeof(long))))) = {
     __setup_str_ldb_setup, ldb_setup, 0
 }

2 early_param

  定義如下,除了__setup_param的最後一個引數,其他的跟__setup的定義是一樣的,定義兩個變數,引數不同。

#define early_param(str, fn)                    /
    __setup_param(str, fn, fn, 1)

  early_param和__setup定義的變數都是在main.c (kernel_imx\init) start_kernel函式中處理的。

parse_early_param()//處理early_param定義的變數,實際最後還是呼叫了parse_args函式,引數不一樣
parse_args("Booting kernel", static_command_line, __start___param,
         __stop___param - __start___param,
             &unknown_bootoption);//處理 __setup定義的變數

3 MACHINE_START

4 __attribute__編譯屬性 section

  本節內容從__attribute__編譯屬性—section轉載
  __attribute__ 是gcc編譯屬性,主要用於改變所宣告或定義的函式或資料的特性,它有很多子項,用於改變作用物件的特性。比如對函式,noline將禁止進行內聯擴充套件、noreturn表示沒有返回值、pure表明函式除返回值外,不會通過其它(如全域性變數、指標)對函式外部產生任何影響。核心中出現比較多是section, section對程式碼段起作用。
  目前支援以下變數屬性

• address (addr)
• aligned (alignment)
• boot
• deprecated
• fillupper
• far
• mode (mode)
• near
• noload
• packed
• persistent
• reverse (alignment)
• section ("section-name")
• secure
• sfr (address)
• space (space)
• transparent_union
• unordered
• unused
• weak
__attribute__的section子項的使用格式為:
__attribute__((section("section_name")))
其作用是將作用的函式或資料放入指定名為"section_name"輸入段。

  輸入段和輸出段是相對於要生成最終的elf或binary時的Link過程說的,Link過程的輸入大都是由原始碼編繹生成的目標檔案.o,那麼這些.o檔案中包含的段相對link過程來說就是輸入段,而Link的輸出一般是可執行檔案elf或庫等,這些輸出檔案中也包含有段,這些輸出檔案中的段就叫做輸出段。輸入段和輸出段本來沒有什麼必然的聯絡,是互相獨立,只是在Link過程中,Link程式會根據一定的規則(這些規則其實來源於Link Script),將不同的輸入段重新組合到不同的輸出段中,即使是段的名字,輸入段和輸出段可以完全不同。
 int var __attribute__((section(".xdata"))) = 0;
 這樣定義的變數var將被放入名為.xdata的輸入段,(注意:attribute這種用法中的括號很嚴格,這裡的幾個括號好象一個也不能少。)__attribute__的section屬性只指定物件的輸入段,它並不能影響所指定物件最終會放在可執行檔案的什麼段。
 __init 巨集最常用的地方是驅動模組初始化函式的定義處,其目的是將驅動模組的初始化函式放入名叫.init.text的輸入段。對於__initdata來說,用於資料定義,目的是將資料放入名叫.init.data的輸入段。

4.1 initcall巨集定義

  原始碼

#define __define_initcall(level,fn,id) \
        static initcall_t __initcall_##fn##id __used \
        __attribute__((__section__(".initcall" level ".init"*強調內容*))) = fn

  其用來定義型別為initcall_t的static函式指標,函式指標的名稱由引數fn和id決定:_initcall##fn##id,這就是函式指標的名稱,它其實是一個變數名稱。從該名稱的定義方法我們其學到了巨集定義的一種高階用法,即利用巨集的引數產生名稱,這要藉助於”##”這一符號組合的作用。
  這一函式指標變數放入什麼輸入段呢,請看__attribute__ ((__section__ (“.initcall” levle “.init”))),輸入段的名稱由level決定,如果level=”1”,則輸入段是.initcall1.init,如果level=”3s”,則輸入段是.initcall3s.init。這一函式指標變數就是放在用這種方法決定的輸入段中的。

5 current

  kernel中current是一個巨集,返回當前程序task_struct結構的指標。參考文件
  定義如下,

/* arch/arm/include/asm/current.h */
static inline struct task_struct *get_current(void)
{
        return current_thread_info()->task;
}
// current巨集
#define current (get_current()) 

  sp為當前程序核心棧棧頂地址

/* arch/arm/include/asm/thread_info.h */
static inline struct thread_info *current_thread_info(void)
{
        register unsigned long sp asm ("sp");
        return (struct thread_info *)(sp & ~(THREAD_SIZE - 1));
}

  每個程序在核心態下都會開闢一個核心棧,一般就是8KB,一般把thread_info這個結構體和核心棧放在一起,這樣核心就可以很方便從ESP暫存器中獲取當前CPU上正在執行的thread_info。具體的位置是thread_info結構儲存在8K起始位置,如下圖所示:

這裡寫圖片描述

  無論esp是指向哪裡,只要將其低13位遮蔽掉,總能找到8K的起始地址,也就是圖中的0x015fa000,這樣我們就找到了thread_info,而task也就是tast_struct結構休是thread_info的成員,thread_info->task就是當前程序task_struct結構體指標。
  上圖是以x86架構畫的圖,arm cpu也是一樣的處理邏輯,只是 將esp改成sp
  struct thread_info結構體
struct thread_info {
        unsigned long           flags;          /* low level flags */
        int                     preempt_count;  /* 0 => preemptable, <0 => bug */
        mm_segment_t            addr_limit;     /* address limit */
        struct task_struct      *task;          /* main task structure */
        struct exec_domain      *exec_domain;   /* execution domain */
        __u32                   cpu;            /* cpu */
        __u32                   cpu_domain;     /* cpu domain */
        struct cpu_context_save cpu_context;    /* cpu context */
        __u32                   syscall;        /* syscall number */
        __u8                    used_cp[16];    /* thread used copro */
        unsigned long           tp_value;
        struct crunch_state     crunchstate;
        union fp_state          fpstate __attribute__((aligned(8)));
        union vfp_state         vfpstate;
#ifdef CONFIG_ARM_THUMBEE
        unsigned long           thumbee_state;  /* ThumbEE Handler Base register */
#endif
        struct restart_block    restart_block;
};

   核心做的大部分動作是代表一個特定程序的,可以將核心看作是一個特殊的程序,應用層的是普通程序。在一個系統呼叫執行期間,例如 open 或者 read, 當前程序是發出呼叫的程序。核心程式碼可以通過使用 current 來使用程序特定的資訊,此時的current是發出呼叫的程序的task_struct指標。

6 關於開啟裝置結點(struct inode和struct file)

  當在應用層多個終端或者檔案上同時開啟同一個裝置結點,如:/dev/stdin時,fd = open(“/dev/stdin”, O_RDWR)
返回的fd不總是同一個值,由當前終端決定,有可能相同,也有可能不同,也就是說fd是不確定的,由系統的環境決定。
  但是在kernel所有開啟的結點都指向同一個inode(struct inode),也就是說,無論應用層開啟多少次,在kernel看來都是同一個檔案。其呼叫的方法、使用的資料都是一致的。但是每次開啟結點檔案,kernel都會分配一個struct file *filp,file是與上層應用的檔案描述符想對應的,在一個程序(無論是否在一個程序、執行緒),多次開啟結點,會分配多個struct file結構體,並返回不同的fd。
  不僅是結點檔案,普通檔案同樣適用。
  總結:應用層多次開啟檔案,kernel只分配一次struct inode,多次分配struct file,返回多個不同的fd

7 struct page

  struct page 表示一個記憶體頁框,是記憶體管理的最小單位,通常一個頁框的大小是4K,kernel會為每個記憶體頁框分配一個struct page結構體。下面是struct page部分程式碼

struct page {
        /* First double word block */
        unsigned long flags;            /* 體系結構無關的標記,用於描述頁的屬性,flags中的每一個bit,定義了page的一種屬性 */
        struct address_space *mapping;  /*  a: 如果mapping = 0,說明該page屬於交換快取(swap cache);當需要使用地址空間時會指定交換分割槽的地址空間swapper_space。
                                            b: 如果mapping != 0,bit[0] = 0,說明該page屬於頁快取或檔案對映,mapping指向檔案的地址空間address_space。
                                            c: 如果mapping != 0,bit[0] != 0,說明該page為匿名對映,mapping指向struct anon_vma物件。
                                                通過mapping恢復anon_vma的方法:anon_vma = (struct anon_vma *)(mapping - PAGE_MAPPING_ANON)。

                                         */
        /* Second double word */
        struct {
                union {
                        pgoff_t index;          /* Our offset within mapping.在對映的虛擬空間(vma_area)內的偏移;一個檔案可能只對映一部分,
                                                    假設映射了1M的空間,index指的是在1M空間內的偏移,而不是在整個檔案內的偏移 */
                        void *freelist;         /* slub first free object */
                };

                union {
#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
        defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
                        /* Used for cmpxchg_double in slub */
                        unsigned long counters;
#else
                        /*
                         * Keep _count separate from slub cmpxchg_double data.
                         * As the rest of the double word is protected by
                         * slab_lock but _count is not.
                         */
                        unsigned counters;
#endif

                        struct {

                                union {
                                        /*
                                            被頁表對映的次數,也就是說該page同時被多少個程序共享。初始值為-1,如果只被一個程序的頁表映射了,該值為0 。
                                            如果該page處於夥伴系統中,該值為PAGE_BUDDY_MAPCOUNT_VALUE(-128),核心通過判斷該值是否為PAGE_BUDDY_MAPCOUNT_VALUE
                                            來確定該page是否屬於夥伴系統
                                         */
                                        atomic_t _mapcount;

                                        struct {
                                                unsigned inuse:16;
                                                unsigned objects:15;
                                                unsigned frozen:1;
                                        };
                                };
                                atomic_t _count;                /* 表示核心中引用該頁的次數。當值為0時,表示page當前沒有使用者,那麼這個page可以被釋放,
                                                                    否則的話表示這個page有使用者。_mapcount表示的是對映次數,而_count表示的是使用次數;
                                                                    被映射了不一定在使用,但要使用必須先對映
                                                                */
                        };
                };
        };

        /* Third double word block */
        union {
                struct list_head lru;   /* Pageout list, eg. active_list
                                         * protected by zone->lru_lock !
                                         */
                struct {                /* slub per cpu partial pages */
                        struct page *next;      /* Next partial slab */
#ifdef CONFIG_64BIT
                        int pages;      /* Nr of partial slabs left */
                        int pobjects;   /* Approximate # of objects */
#else
                        short int pages;
                        short int pobjects;
#endif
                };
        };

        /* Remainder is not double word aligned */
        union {
                unsigned long private;          /* Mapping-private opaque data:
                                                 * usually used for buffer_heads
                                                 * if PagePrivate set; used for
                                                 * swp_entry_t if PageSwapCache;
                                                 * indicates order in the buddy
                                                 * system if PG_buddy is set.
                                                 */
#if defined(WANT_PAGE_VIRTUAL)
        void *virtual;                  /* Kernel virtual address (NULL if not kmapped, ie. highmem)指向本頁框的核心虛擬地址,
                                            virtual只用於高階記憶體中的頁,這是因為高階記憶體的頁,無法簡單的通過該頁的實體地址計算出線性地址。
                                            當然如果高階記憶體還沒有對映到kernel時,地址為空 */
#endif /* WANT_PAGE_VIRTUAL */

}

8 struct mm_struct

  轉載
  task_struct,叫做程序描述符,而mm_struct 叫做記憶體描述符,描述linux下程序的地址空間的所有的資訊。
  
  一個程序的虛擬地址空間主要由兩個資料結構來描述。一個是最高層次的:mm_struct,一個是較高層次的:vm_area_struct。最高層次的mm_struct結構描述了一個程序的整個虛擬地址空間。較高層次的結構vm_area_truct描述了虛擬地址空間的一個區間(簡稱虛擬區)。每個程序只有一個mm_struct結構,在每個程序的task_struct結構中,有一個指向該程序的結構。下面來看下mm_struct在核心中的位置。
  

這裡寫圖片描述
圖8-1 程序的地址空間的分佈

  mm_struct儲存了一個程序程式碼段(start_code ~ end_code)、DATA段(start_data ~ end_data)、BSS段、堆(start_brk ~ brk)棧(stack_start ~ stack_end)、mmap(mmap_base是維護共享對映區的起始地址) 地址。這些地址通過頁錶轉換可以找到對應的實體地址。task_struct用mm、active_mm變數來指向當前程序的mm_struct結構體。

  每一個程序都會有自己獨立的mm_struct,這樣每一個程序都會有自己獨立的地址空間,這樣才能互不干擾。當程序之間的地址空間被共享的時候,我們可以理解為這個時候是多個程序使用一份地址空間,這就是執行緒。

這裡寫圖片描述
圖 8-2 程序虛擬地址空間

  多個程序的地址空間分佈如 圖8-2 一樣,每一個程序的使用者空間在32位的平臺上就是上面這個圖的情況,對於實體記憶體當中的核心kernel,是隻存在一份,所有的程序是用來共享的,核心當中會利用PCB(程序控制塊)來管理不同的程序。  

struct mm_struct {
    struct vm_area_struct * mmap; /* list of VMAs, 連結串列,每個vm_area_struct虛擬記憶體區間,就是mm_struct的一段 */
    struct rb_root mm_rb; /* 紅黑樹,跟mmap一樣用來組織各個段,使用的演算法不一樣,用紅黑樹來管理 */
    struct vm_area_struct * mmap_cache; /* 用來儲存最後使用的 vm_area_struct,如果下次還要使用就不用從連結串列中找 */
#ifdef CONFIG_MMU
    unsigned long (*get_unmapped_area) (struct file *filp,
                unsigned long addr, unsigned long len,
                unsigned long pgoff, unsigned long flags);
    void (*unmap_area) (struct mm_struct *mm, unsigned long addr);
#endif
    unsigned long mmap_base;        /* base of mmap area, mmap的起始地址*/
    unsigned long task_size;        /* size of task vm space 當前程序虛擬地址空間大小 */
    unsigned long cached_hole_size;     /* if non-zero, the largest hole below free_area_cache */
    unsigned long free_area_cache;      /* first hole of size cached_hole_size or larger */
    pgd_t * pgd; /* pgt區間是用來維護頁表的目錄,每一個程序的都有自己的頁表目錄,需要注意程序的頁目錄和核心的頁目錄
                    是不一樣的,當程式排程器排程程式執行的時候,這個時候這個地址就會轉換成為實體地址,linux一般採用
                    三級頁表進行轉換。 */
    atomic_t mm_users;          /* How many users with user space? 程序數量值(在多執行緒的情況下尤為適用) */
    atomic_t mm_count;          /* How many references to "struct mm_struct" (users count as 1) 引用計數 */
    int map_count;              /* number of VMAs mmap連結串列中個數 */
    spinlock_t page_table_lock;     /* Protects page tables and some counters 頁表鎖 */
    struct rw_semaphore mmap_sem;

    struct list_head mmlist;        /* List of maybe swapped mm's.  These are globally strung
                         * together off init_mm.mmlist, and are protected
                         * by mmlist_lock,通過mmlist將當前mm_struct新增到系統全域性的mm_struct連結串列中
                         */


    unsigned long hiwater_rss;  /* High-watermark of RSS usage */
    unsigned long hiwater_vm;   /* High-water virtual memory usage */
    //程序地址空間的大小,鎖住無法換頁的個數,共享檔案記憶體對映的頁數,可執行記憶體對映中的頁數
    unsigned long total_vm;     /* Total pages mapped */
    unsigned long locked_vm;    /* Pages that have PG_mlocked set */
    unsigned long pinned_vm;    /* Refcount permanently increased */
    unsigned long shared_vm;    /* Shared pages (files) */
    unsigned long exec_vm;      /* VM_EXEC & ~VM_WRITE */
    //使用者態堆疊的頁數
    unsigned long stack_vm;     /* VM_GROWSUP/DOWN */
    unsigned long reserved_vm;  /* VM_RESERVED|VM_IO pages */
    unsigned long def_flags;
    unsigned long nr_ptes;      /* Page table pages */
    //維護程式碼段和資料段
    unsigned long start_code, end_code, start_data, end_data;
    //維護堆和棧
    unsigned long start_brk, brk, start_stack;
    //維護命令列引數,命令列引數的起始地址和最後地址,以及環境變數的起始地址和最後地址
    unsigned long arg_start, arg_end, env_start, env_end;

    unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */

    /*
     * Special counters, in some configurations protected by the
     * page_table_lock, in other configurations by being atomic.
     */
    struct mm_rss_stat rss_stat;

    struct linux_binfmt *binfmt;

    cpumask_var_t cpu_vm_mask_var;

    /* Architecture-specific MM context */
    mm_context_t context;

    /* Swap token stuff */
    /*
     * Last value of global fault stamp as seen by this process.
     * In other words, this value gives an indication of how long
     * it has been since this task got the token.
     * Look at mm/thrash.c
     */
    unsigned int faultstamp;
    unsigned int token_priority;
    unsigned int last_interval;

    unsigned long flags; /* Must use atomic bitops to access the bits */

    struct core_state *core_state; /* coredumping support */
#ifdef CONFIG_AIO
    spinlock_t      ioctx_lock;
    struct hlist_head   ioctx_list;
#endif
#ifdef CONFIG_MM_OWNER
    /*
     * "owner" points to a task that is regarded as the canonical
     * user/owner of this mm. All of the following must be true in
     * order for it to be changed:
     *
     * current == mm->owner
     * current->mm != mm
     * new_owner->mm == mm
     * new_owner->alloc_lock is held
     */
    struct task_struct __rcu *owner;
#endif

    /* store ref to file /proc/<pid>/exe symlink points to */
    struct file *exe_file;
    unsigned long num_exe_file_vmas;
#ifdef CONFIG_MMU_NOTIFIER
    struct mmu_notifier_mm *mmu_notifier_mm;
#endif
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
    pgtable_t pmd_huge_pte; /* protected by page_table_lock */
#endif
#ifdef CONFIG_CPUMASK_OFFSTACK
    struct cpumask cpumask_allocation;
#endif
};

9 struct task_struct

struct task_struct {
    volatile long state;    /* -1 unrunnable, 0 runnable, >0 stopped volatile關鍵字是降低編譯器對程式碼的優化,state變數一直從變數的記憶體中讀取內容而不是暫存器 */
    void *stack; //用來維護程序的核心棧
    atomic_t usage;
    unsigned int flags; /* per process flags, defined below */
    unsigned int ptrace;

#ifdef CONFIG_SMP
    struct llist_node wake_entry;
    int on_cpu;
#endif
    int on_rq;
//優先順序,用於程序排程
/*
static_prio 用來儲存靜態優先順序,可以呼叫nice系統直接來修改取值範圍為100~139
rt_priority 用來儲存實時優先順序,取值範圍為0~99
prio    用來儲存動態優先順序
normal_prio 它的值取決於靜態優先順序和排程策略
*/
    int prio, static_prio, normal_prio;
    unsigned int rt_priority;
    const struct sched_class *sched_class;
    struct sched_entity se;
    struct sched_rt_entity rt;
#ifdef CONFIG_CGROUP_SCHED
    struct task_group *sched_task_group;
#endif

#ifdef CONFIG_PREEMPT_NOTIFIERS
    /* list of struct preempt_notifier: */
    struct hlist_head preempt_notifiers;
#endif

    /*
     * fpu_counter contains the number of consecutive context switches
     * that the FPU is used. If this is over a threshold, the lazy fpu
     * saving becomes unlazy to save the trap. This is an unsigned char
     * so that after 256 times the counter wraps and the behavior turns
     * lazy again; this to deal with bursty apps that only use FPU for
     * a short time
     */
    unsigned char fpu_counter;
#ifdef CONFIG_BLK_DEV_IO_TRACE
    unsigned int btrace_seq;
#endif

    unsigned int policy;
    cpumask_t cpus_allowed;

#ifdef CONFIG_PREEMPT_RCU
    int rcu_read_lock_nesting;
    char rcu_read_unlock_special;
    struct list_head rcu_node_entry;
#endif /* #ifdef CONFIG_PREEMPT_RCU */
#ifdef CONFIG_TREE_PREEMPT_RCU
    struct rcu_node *rcu_blocked_node;
#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
#ifdef CONFIG_RCU_BOOST
    struct rt_mutex *rcu_boost_mutex;
#endif /* #ifdef CONFIG_RCU_BOOST */

#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
    struct sched_info sched_info;
#endif

    struct list_head tasks;
#ifdef CONFIG_SMP
    struct plist_node pushable_tasks;
#endif
    //程序地址空間,mm指定mm_struct連結串列,active_mm指定mm_struct紅黑樹
    struct mm_struct *mm, *active_mm;
#ifdef CONFIG_COMPAT_BRK
    unsigned brk_randomized:1;
#endif
#if defined(SPLIT_RSS_COUNTING)
    struct task_rss_stat    rss_stat;
#endif
/* task state */
    int exit_state;
    int exit_code, exit_signal;
    int pdeath_signal;  /*  The signal sent when the parent dies  */
    unsigned int jobctl;    /* JOBCTL_*, siglock protected */
    /* ??? */
    unsigned int personality;
    unsigned did_exec:1;
    unsigned in_execve:1;   /* Tell the LSMs that the process is doing an
                 * execve */
    unsigned in_iowait:1;

    /* task may not gain privileges */
    unsigned no_new_privs:1;

    /* Revert to default priority/policy when forking */
    unsigned sched_reset_on_fork:1;
    unsigned sched_contributes_to_load:1;

#ifdef CONFIG_GENERIC_HARDIRQS
    /* IRQ handler threads */
    unsigned irq_thread:1;
#endif

    pid_t pid; //程序的識別符號
    pid_t tgid; //執行緒組識別符號

#ifdef CONFIG_CC_STACKPROTECTOR
    /* Canary value for the -fstack-protector gcc feature */
    unsigned long stack_canary;
#endif

    /* 程序之間的親屬關係
     * pointers to (original) parent process, youngest child, younger sibling,
     * older sibling, respectively.  (p->father can be replaced with 
     * p->real_parent->pid)
     */
    struct task_struct __rcu *real_parent; /* real parent process */
    struct task_struct __rcu *parent; /* recipient of SIGCHLD, wait4() reports */
    /*
     * children/sibling forms the list of my natural children
     */
    struct list_head children;  /* list of my children */
    struct list_head sibling;   /* linkage in my parent's children list */
    struct task_struct *group_leader;   /* threadgroup leader */

    /*
     * ptraced is the list of tasks this task is using ptrace on.
     * This includes both natural children and PTRACE_ATTACH targets.
     * p->ptrace_entry is p's link on the p->parent->ptraced list.
     */
    struct list_head ptraced;
    struct list_head ptrace_entry;

    /* PID/PID hash table linkage. */
    struct pid_link pids[PIDTYPE_MAX];
    struct list_head thread_group;

    struct completion *vfork_done;      /* for vfork() */
    int __user *set_child_tid;      /* CLONE_CHILD_SETTID */
    int __user *clear_child_tid;        /* CLONE_CHILD_CLEARTID */

    cputime_t utime, stime, utimescaled, stimescaled;
    cputime_t gtime;
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
    cputime_t prev_utime, prev_stime;
#endif
    unsigned long nvcsw, nivcsw; /* context switch counts */
    struct timespec start_time;         /* monotonic time */
    struct timespec real_start_time;    /* boot based time */
/* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
    unsigned long min_flt, maj_flt;

    struct task_cputime cputime_expires;
    struct list_head cpu_timers[3];

/* process credentials */
    const struct cred __rcu *real_cred; /* objective and real subjective task
                     * credentials (COW) */
    const struct cred __rcu *cred;  /* effective (overridable) subjective task
                     * credentials (COW) */
    struct cred *replacement_session_keyring; /* for KEYCTL_SESSION_TO_PARENT */

    char comm[TASK_COMM_LEN]; /* executable name excluding path
                     - access with [gs]et_task_comm (which lock
                       it with task_lock())
                     - initialized normally by setup_new_exec */
/* file system info */
    int link_count, total_link_count;
#ifdef CONFIG_SYSVIPC
/* ipc stuff */
    struct sysv_sem sysvsem;
#endif
#ifdef CONFIG_DETECT_HUNG_TASK
/* hung task detection */
    unsigned long last_switch_count;
#endif
/* CPU-specific state of this task */
    struct thread_struct thread;
/* filesystem information */
    struct fs_struct *fs;
/* open file information */
    struct files_struct *files;
/* namespaces */
    struct nsproxy *nsproxy;
/* signal handlers */
    struct signal_struct *signal;
    struct sighand_struct *sighand;

    sigset_t blocked, real_blocked;
    sigset_t saved_sigmask; /* restored if set_restore_sigmask() was used */
    struct sigpending pending;

    unsigned long sas_ss_sp;
    size_t sas_ss_size;
    int (*notifier)(void *priv);
    void *notifier_data;
    sigset_t *notifier_mask;
    struct audit_context *audit_context;
#ifdef CONFIG_AUDITSYSCALL
    uid_t loginuid;
    unsigned int sessionid;
#endif
    struct seccomp seccomp;

/* Thread group tracking */
    u32 parent_exec_id;
    u32 self_exec_id;
/* Protection of (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed,
 * mempolicy */
    spinlock_t alloc_lock;

    /* Protection of the PI data structures: */
    raw_spinlock_t pi_lock;

#ifdef CONFIG_RT_MUTEXES
    /* PI waiters blocked on a rt_mutex held by this task */
    struct plist_head pi_waiters;
    /* Deadlock detection and priority inheritance handling */
    struct rt_mutex_waiter *pi_blocked_on;
#endif

#ifdef CONFIG_DEBUG_MUTEXES
    /* mutex deadlock detection */
    struct mutex_waiter *blocked_on;
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
    unsigned int irq_events;
    unsigned long hardirq_enable_ip;
    unsigned long hardirq_disable_ip;
    unsigned int hardirq_enable_event;
    unsigned int hardirq_disable_event;
    int hardirqs_enabled;
    int hardirq_context;
    unsigned long softirq_disable_ip;
    unsigned long softirq_enable_ip;
    unsigned int softirq_disable_event;
    unsigned int softirq_enable_event;
    int softirqs_enabled;
    int softirq_context;
#endif
#ifdef CONFIG_LOCKDEP
# define MAX_LOCK_DEPTH 48UL
    u64 curr_chain_key;
    int lockdep_depth;
    unsigned int lockdep_recursion;
    struct held_lock held_locks[MAX_LOCK_DEPTH];
    gfp_t lockdep_reclaim_gfp;
#endif

/* journalling filesystem info */
    void *journal_info;

/* stacked block device info */
    struct bio_list *bio_list;

#ifdef CONFIG_BLOCK
/* stack plugging */
    struct blk_plug *plug;
#endif

/* VM state */
    struct reclaim_state *reclaim_state;

    struct backing_dev_info *backing_dev_info;

    struct io_context *io_context;

    unsigned long ptrace_message;
    siginfo_t *last_siginfo; /* For ptrace use.  */
    struct task_io_accounting ioac;
#if defined(CONFIG_TASK_XACCT)
    u64 acct_rss_mem1;  /* accumulated rss usage */
    u64 acct_vm_mem1;   /* accumulated virtual memory usage */
    cputime_t acct_timexpd; /* stime + utime since last update */
#endif
#ifdef CONFIG_CPUSETS
    nodemask_t mems_allowed;    /* Protected by alloc_lock */
    seqcount_t mems_allowed_seq;    /* Seqence no to catch updates */
    int cpuset_mem_spread_rotor;
    int cpuset_slab_spread_rotor;
#endif
#ifdef CONFIG_CGROUPS
    /* Control Group info protected by css_set_lock */
    struct css_set __rcu *cgroups;
    /* cg_list protected by css_set_lock and tsk->alloc_lock */
    struct list_head cg_list;
#endif
#ifdef CONFIG_FUTEX
    struct robust_list_head __user *robust_list;
#ifdef CONFIG_COMPAT
    struct compat_robust_list_head __user *compat_robust_list;
#endif
    struct list_head pi_state_list;
    struct futex_pi_state *pi_state_cache;
#endif
#ifdef CONFIG_PERF_EVENTS
    struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts];
    struct mutex perf_event_mutex;
    struct list_head perf_event_list;
#endif
#ifdef CONFIG_NUMA
    struct mempolicy *mempolicy;    /* Protected by alloc_lock */
    short il_next;
    short pref_node_fork;
#endif
    struct rcu_head rcu;

    /*
     * cache last used pipe for splice
     */
    struct pipe_inode_info *splice_pipe;
#ifdef  CONFIG_TASK_DELAY_ACCT
    struct task_delay_info *delays;
#endif
#ifdef CONFIG_FAULT_INJECTION
    int make_it_fail;
#endif
    /*
     * when (nr_dirtied >= nr_dirtied_pause), it's time to call
     * balance_dirty_pages() for some dirty throttling pause
     */
    int nr_dirtied;
    int nr_dirtied_pause;
    unsigned long dirty_paused_when; /* start of a write-and-pause period */

#ifdef CONFIG_LATENCYTOP
    int latency_record_count;
    struct latency_record latency_record[LT_SAVECOUNT];
#endif
    /*
     * time slack values; these are used to round up poll() and
     * select() etc timeout values. These are in nanoseconds.
     */
    unsigned long timer_slack_ns;
    unsigned long default_timer_slack_ns;

    struct list_head    *scm_work_list;
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
    /* Index of current stored address in ret_stack */
    int curr_ret_stack;
    /* Stack of return addresses for return function tracing */
    struct ftrace_ret_stack *ret_stack;
    /* time stamp for last schedule */
    unsigned long long ftrace_timestamp;
    /*
     * Number of functions that haven't been traced
     * because of depth overrun.
     */
    atomic_t trace_overrun;
    /* Pause for the tracing */
    atomic_t tracing_graph_pause;
#endif
#ifdef CONFIG_TRACING
    /* state flags for use by tracers */
    unsigned long trace;
    /* bitmask and counter of trace recursion */
    unsigned long trace_recursion;
#endif /* CONFIG_TRACING */
#ifdef CONFIG_CGROUP_MEM_RES_CTLR /* memcg uses this to do batch job */
    struct memcg_batch_info {
        int do_batch;   /* incremented when batch uncharge started */
        struct mem_cgroup *memcg; /* target memcg of uncharge */
        unsigned long nr_pages; /* uncharged usage */
        unsigned long memsw_nr_pages; /* uncharged mem+swap usage */
    } memcg_batch;
#endif
#ifdef CONFIG_HAVE_HW_BREAKPOINT
    atomic_t ptrace_bp_refcnt;
#endif
};