1. 程式人生 > >arm linux 系統呼叫過程

arm linux 系統呼叫過程

在Linux下系統呼叫是用軟中斷實現的,下面以一個簡單的open例子簡要分析一下應用層的open是如何呼叫到核心中的sys_open的。

t8.c

   1:  #include <stdio.h>
   2:  #include <sys/types.h>
   3:  #include <sys/stat.h>
   4:  #include <fcntl.h>
   5:   
   6:  int main(int argc, const char *argv[])
   7:  {
   8:      int fd;
   9:   
  10:      fd = open(".", O_RDWR);
  11:   
  12:      close(fd);
  13:      return 0;
  14:  }

這裡需要注意的是:open是C庫提供的庫函式,並不是系統呼叫,系統呼叫時在核心空間的,應用空間無法直接呼叫。在《Linux核心設計與實現》中說:要訪問系統呼叫(在Linux中常稱作syscall),通常通過C庫中定義的函式呼叫來進行。

將t8.c進行靜態編譯,然後反彙編,看一下是如何呼叫open的?

   1:  arm-linux-gcc t8.c --static
   2:  arm-linux-objdump -D a.out >a.dis

下面我們擷取a.dis中的一部分進行說明:

   1:  ......
   2:  00008228 <main>:
   3:      8228:   e92d4800    push    {fp, lr}
   4:      822c:   e28db004    add fp, sp, #4  ; 0x4
   5:      8230:   e24dd010    sub sp, sp, #16 ; 0x10
   6:      8234:   e50b0010    str r0, [fp, #-16]
   7:      8238:   e50b1014    str r1, [fp, #-20]
   8:      823c:   e59f0028    ldr r0, [pc, #40]   ; 826c <main+0x44>
   9:      8240:   e3a01002    mov r1, #2  ; 0x2   ;  #define  O_RDWR  00000002 
  10:      8244:   eb002e7d    bl  13c40 <__libc_open>
  11:      8248:   e1a03000    mov r3, r0
  12:      824c:   e50b3008    str r3, [fp, #-8]
  13:      8250:   e51b0008    ldr r0, [fp, #-8]
  14:      8254:   eb002e9d    bl  13cd0 <__libc_close>
  15:      8258:   e3a03000    mov r3, #0  ; 0x0
  16:      825c:   e1a00003    mov r0, r3
  17:      8260:   e24bd004    sub sp, fp, #4  ; 0x4
  18:      8264:   e8bd4800    pop {fp, lr}
  19:      8268:   e12fff1e    bx  lr
  20:      826c:   00064b8c    .word   0x00064b8c
  21:  ......
  22:  00013c40 <__libc_open>:
  23:     13c40:   e51fc028    ldr ip, [pc, #-40]  ; 13c20 <___fxstat64+0x50>
  24:     13c44:   e79fc00c    ldr ip, [pc, ip]
  25:     13c48:   e33c0000    teq ip, #0  ; 0x0
  26:     13c4c:   1a000006    bne 13c6c <__libc_open+0x2c>
  27:     13c50:   e1a0c007    mov ip, r7
  28:     13c54:   e3a07005    mov r7, #5  ; 0x5   

  #在arch/arm/include/asm/unistd.h中:#define __NR_open  (__NR_SYSCALL_BASE+5)
                    其中,__NR_OABI_SYSCALL_BASE是0

  29:     
13c58: ef000000 svc 0x00000000 #產生軟中斷

  30:     13c5c:   e1a0700c    mov r7, ip
  31:     13c60:   e3700a01    cmn r0, #4096   ; 0x1000
  32:     13c64:   312fff1e    bxcc    lr
  33:     13c68:   ea0008d4    b   15fc0 <__syscall_error>
  34:  ......

通過上面的程式碼註釋,可以看到,系統呼叫sys_open的系統呼叫號是5,將系統呼叫號存放到暫存器R7當中,然後應用程式通過svc 0x00000000產生軟中斷,陷入核心空間。

也許會好奇,ARM軟中斷不是用SWI嗎,這裡怎麼變成了SVC了,請看下面一段話,是從ARM官網copy的:

SVC

超級使用者呼叫。 
語法

SVC{cond} #immed

其中:

cond

    是一個可選的條件程式碼(請參閱條件執行)。 
immed

    是一個表示式,其取值為以下範圍內的一個整數:

        在 ARM 指令中為 0 到 224–1(24 位值)

        在 16 位 Thumb 指令中為 0-255(8 位值)。

用法

SVC 指令會引發一個異常。 這意味著處理器模式會更改為超級使用者模式,CPSR 會儲存到超級使用者模式 SPSR,並且執行會跳轉到 SVC 向量(請參閱《開發指南》中的第 6 章 處理處理器異常)。

處理器會忽略 immed。 但異常處理程式會獲取它,藉以確定所請求的服務。 
Note

作為 ARM 組合語言開發成果的一部分,SWI 指令已重新命名為 SVC。 在此版本的 RVCT 中,SWI 指令反彙編為 SVC,並提供註釋以指明這是以前的 SWI。 
條件標記

此指令不更改標記。 
體系結構

此 ARM 指令可用於所有版本的 ARM 體系結構。

在基於ARM的Linux中,異常向量表已經被放置在了0xFFFF0000這個位置。這個過程的完成:

start_kernel ---> setup_arch ---> early_trap_init

   1:  void __init early_trap_init(void)
   2:  {
   3:      unsigned long vectors = CONFIG_VECTORS_BASE;  // 就是0xFFFF0000
   4:      extern char __stubs_start[], __stubs_end[];
   5:      extern char __vectors_start[], __vectors_end[];
   6:      extern char __kuser_helper_start[], __kuser_helper_end[];
   7:      int kuser_sz = __kuser_helper_end - __kuser_helper_start;
   8:   
   9:      /*
  10:       * Copy the vectors, stubs and kuser helpers (in entry-armv.S)
  11:       * into the vector page, mapped at 0xffff0000, and ensure these
  12:       * are visible to the instruction stream.
  13:       */
  14:      memcpy((void *)vectors, __vectors_start, __vectors_end - __vectors_start);
  15:      memcpy((void *)vectors + 0x200, __stubs_start, __stubs_end - __stubs_start);
  16:      memcpy((void *)vectors + 0x1000 - kuser_sz, __kuser_helper_start, kuser_sz);
  17:   
  18:      /*
  19:       * Copy signal return handlers into the vector page, and
  20:       * set sigreturn to be a pointer to these.
  21:       */
  22:      memcpy((void *)KERN_SIGRETURN_CODE, sigreturn_codes,
  23:             sizeof(sigreturn_codes));
  24:   
  25:      flush_icache_range(vectors, vectors + PAGE_SIZE);
  26:      modify_domain(DOMAIN_USER, DOMAIN_CLIENT);
  27:  }

關於上面這個函式的詳細解釋,參見:

把異常中斷向量表的位置設定為0xffff0000的話,需要修改協處理器CP15的暫存器C1的第13位,將其設定為1。以Tq2440的提供的核心2.6.30.4為例看一下:

 arch/arm/kernel/head.S

   1:      adr    lr, __enable_mmu        @ return (PIC) address
   2:      add    pc, r10, #PROCINFO_INITFUNC
其中,PROCINFO_INITFUNC的值是16,r10的值是__arm920_proc_info的地址:

   1:  __arm920_proc_info:
   2:      .long    0x41009200
   3:      .long    0xff00fff0
   4:      .long   PMD_TYPE_SECT | \
   5:          PMD_SECT_BUFFERABLE | \
   6:          PMD_SECT_CACHEABLE | \
   7:          PMD_BIT4 | \
   8:          PMD_SECT_AP_WRITE | \
   9:          PMD_SECT_AP_READ
  10:      .long   PMD_TYPE_SECT | \
  11:          PMD_BIT4 | \
  12:          PMD_SECT_AP_WRITE | \
  13:          PMD_SECT_AP_READ
  14:      b    __arm920_setup
  15:      .long    cpu_arch_name
  16:      .long    cpu_elf_name
  17:       ......
  18:      .size    __arm920_proc_info, . - __arm920_proc_info
看一下__arm920_setup的實現(proc-arm920.S (arch\arm\mm)):

   1:      .type    __arm920_setup, #function
   2:  __arm920_setup:
   3:      mov    r0, #0
   4:      mcr    p15, 0, r0, c7, c7        @ invalidate I,D caches on v4
   5:      mcr    p15, 0, r0, c7, c10, 4        @ drain write buffer on v4
   6:  #ifdef CONFIG_MMU
   7:      mcr    p15, 0, r0, c8, c7        @ invalidate I,D TLBs on v4
   8:  #endif
   9:      adr    r5, arm920_crval
  10:      ldmia    r5, {r5, r6}            @ 參看以下下面的arm920_crval的實現,本句話執行完後r5和r6分別為:0x3f3f和0x3135
  11:      mrc    p15, 0, r0, c1, c0        @ get control register v4   獲取協處理器p15的暫存器才c1
  12:      
bic r0, r0, r5

  13:      
orr r0, r0, r6 @ 我們只關注第13位,這裡將r0的第13位設定為了1

  14:      
mov pc, lr

  15:      .size    __arm920_setup, . - __arm920_setup
  16:   
  17:      /*
  18:       *  R
  19:       * .RVI ZFRS BLDP WCAM
  20:       * ..11 0001 ..11 0101
  21:       * 
  22:       */
  23:      .type    arm920_crval, #object
  24:  arm920_crval:
  25:      crval    clear=0x00003f3f, mmuset=0x00003135, ucset=0x00001130
在看一下crval的實現(proc-macros.S (arch\arm\mm)):

   1:      .macro    crval, clear, mmuset, ucset
   2:  #ifdef CONFIG_MMU
   3:      .word    \clear
   4:      .word    \mmuset
   5:  #else
   6:      .word    \clear
   7:      .word    \ucset
   8:  #endif
   9:      .endm
在__arm920_setup中執行完 mov pc, lr後,便跳入了下面的語句:

   1:  __enable_mmu:
   2:  #ifdef CONFIG_ALIGNMENT_TRAP
   3:      orr    r0, r0, #CR_A
   4:  #else
   5:      bic    r0, r0, #CR_A
   6:  #endif
   7:  #ifdef CONFIG_CPU_DCACHE_DISABLE
   8:      bic    r0, r0, #CR_C
   9:  #endif
  10:  #ifdef CONFIG_CPU_BPREDICT_DISABLE
  11:      bic    r0, r0, #CR_Z
  12:  #endif
  13:  #ifdef CONFIG_CPU_ICACHE_DISABLE
  14:      bic    r0, r0, #CR_I
  15:  #endif
  16:      mov    r5, #(domain_val(DOMAIN_USER, DOMAIN_MANAGER) | \
  17:                domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \
  18:                domain_val(DOMAIN_TABLE, DOMAIN_MANAGER) | \
  19:                domain_val(DOMAIN_IO, DOMAIN_CLIENT))
  20:      mcr    p15, 0, r5, c3, c0, 0        @ load domain access register
  21:      mcr    p15, 0, r4, c2, c0, 0        @ load page table pointer
  22:      b    __turn_mmu_on
  23:  ENDPROC(__enable_mmu)
看一下__turn_mmu_on的實現(head.S (arch\arm\kernel)):

   1:      .align    5
   2:  __turn_mmu_on:
   3:      mov    r0, r0
   4:      mcr    p15, 0, r0, c1, c0, 0        @ write control reg
   5:      mrc    p15, 0, r3, c0, c0, 0        @ read id reg
   6:      mov    r3, r3
   7:      mov    r3, r3
   8:      mov    pc, r13
   9:  ENDPROC(__turn_mmu_on)

在__turn_mmu_on中,將暫存器r0的值寫到了cp15協處理器的暫存器C1中。到這裡便完成了將異常中斷向量表的位置放到了0xffff0000.

說完異常向量表的位置,接下來看看軟中斷的實現。

ARM提供的中斷型別:

image

ARM的異常處理模型:

image

entry-armv.S (arch\arm\kernel)

   1:  .LCvswi:
   2:     
.word vector_swi

   3:   
   4:      .globl    __stubs_end
   5:  __stubs_end:
   6:   
   7:      .equ    stubs_offset, __vectors_start + 0x200 - __stubs_start
   8:   
   9:      .globl    __vectors_start
  10:  __vectors_start:
  11:      swi    SYS_ERROR0
  12:      b    vector_und + stubs_offset
  13:      ldr    pc, .LCvswi + stubs_offset     @發生軟中斷後先跳到這裡
  14:      b    vector_pabt + stubs_offset
  15:      b    vector_dabt + stubs_offset
  16:      b    vector_addrexcptn + stubs_offset
  17:      b    vector_irq + stubs_offset
  18:      b    vector_fiq + stubs_offset
  19:   
  20:      .globl    __vectors_end
  21:  __vectors_end:
  22:   
  23:      .data
  24:   
  25:      .globl    cr_alignment
  26:      .globl    cr_no_alignment
  27:  cr_alignment:
  28:      .space    4
  29:  cr_no_alignment:
  30:      .space    4
接下來看一下vector_swi的實現,根據實際的巨集定義進行了簡化

   1:  ENTRY(vector_swi)
   2:      sub    sp, sp, #S_FRAME_SIZE
   3:      stmia    sp, {r0 - r12}            @ Calling r0 - r12
   4:      add    r8, sp, #S_PC
   5:      stmdb    r8, {sp, lr}^            @ Calling sp, lr
   6:      mrs    r8, spsr            @ called from non-FIQ mode, so ok.
   7:      str    lr, [sp, #S_PC]            @ Save calling PC
   8:      str    r8, [sp, #S_PSR]        @ Save CPSR
   9:      str    r0, [sp, #S_OLD_R0]        @ Save OLD_R0
  10:      zero_fp
  11:   
  12:      /*
  13:       * Get the system call number.
  14:       */
  15:   
  16:      /*
  17:       * If we have CONFIG_OABI_COMPAT then we need to look at the swi
  18:       * value to determine if it is an EABI or an old ABI call.
  19:       */
  20:      ldr    r10, [lr, #-4]   

            @ get SWI instruction  r10中存放的就是引起軟中斷的那條指令的機器碼
              發生軟中斷的時候,系統自動將PC-4存放到了lr暫存器,由於是三級流水,
              並且是ARM狀態,還需要減4才能得到發生軟中斷的那條指令的機器碼所在的地址


  21:    A710(    and    ip, r10, #0x0f000000        @ check for SWI        )
  22:    A710(    teq    ip, #0x0f000000                        )
  23:    A710(    bne    .Larm710bug                        )
  24:   
  25:      ldr    ip, __cr_alignment
  26:      ldr    ip, [ip]
  27:      mcr    p15, 0, ip, c1, c0        @ update control register
  28:      enable_irq   @在發生中斷的時候,相應的中斷線在在所有CPU上都會被遮蔽掉
  29:   
  30: 

get_thread_info tsk @ 參看下面的介紹

31: adr tbl, sys_call_table

@ load syscall table pointer 此時tbl(r8)中存放的就是sys_call_table的起始地址

  32:      ldr    ip, [tsk, #TI_FLAGS]        @ check for syscall tracing
  33:   
  34:      /*
  35:       * If the swi argument is zero, this is an EABI call and we do nothing.
  36:       *
  37:       * If this is an old ABI call, get the syscall number into scno and
  38:       * get the old ABI syscall table address.
  39:       */
  40:      bics    r10, r10, #0xff000000
  41:      eorne    scno, r10, #__NR_OABI_SYSCALL_BASE
  42:      ldr
ne

    tbl, =sys_oabi_call_table
  43:   
  44:      stmdb    sp!, {r4, r5}            @ push fifth and sixth args
  45:      tst    ip, #_TIF_SYSCALL_TRACE        @ are we tracing syscalls?
  46:      bne    __sys_trace
  47:   
  48:      cmp    scno, #NR_syscalls        @ check upper syscall limit
  49:      adr    lr, ret_fast_syscall        @ return address
  50:      
ldrcc pc, [tbl, scno, lsl #2] @ call sys_* routine

  51:   
  52:      add    r1, sp, #S_OFF
  53:  2:    mov    why, #0                @ no longer a real syscall
  54:      cmp    scno, #(__ARM_NR_BASE - __NR_SYSCALL_BASE)
  55:      eor    r0, scno, #__NR_SYSCALL_BASE    @ put OS number back
  56:      bcs    arm_syscall
  57:      b    sys_ni_syscall            @ not private func
  58:  ENDPROC(vector_swi)
entry-common.S (arch\arm\kernel下面是entry-header.S (arch\arm\kernel)的部分內容:

   1:  /*
   2:   * These are the registers used in the syscall handler, and allow us to
   3:   * have in theory up to 7 arguments to a function - r0 to r6.
   4:   *
   5:   * r7 is reserved for the system call number for thumb mode.
   6:   *
   7:   * Note that tbl == why is intentional.
   8:   *
   9:   * We must set at least "tsk" and "why" when calling ret_with_reschedule.
  10:   */
  11:  scno    .req    r7        @ syscall number
  12:  tbl    .req    r8        @ syscall table pointer
  13:  why    .req    r8        @ Linux syscall (!= 0)
  14:  tsk    .req    r9        @ current thread_info

.req 是偽彙編,以 scno .req r7 為例,表示scno是暫存器r7的別名。

  • get_thread_info tsk

其中,tsk是暫存器r9的別名,get_thread_info是一個巨集定義,如下:

   1:      .macro    get_thread_info, rd
   2:      mov    \rd, sp, lsr #13
   3:      mov    \rd, \rd, lsl #13
   4:      .endm

即:將sp進行8KB對齊後的值賦給暫存器r9,什麼意思?

這個就涉及到Linux的核心棧了。Linux為每個程序都分配了一個8KB的核心棧,在核心棧的尾端存放有關於這個程序的struct therad_info結構:

   1:  struct thread_info {
   2:      unsigned long        flags;        /* low level flags */
   3:      int            preempt_count;    /* 0 => preemptable, <0 => bug */
   4:      mm_segment_t        addr_limit;    /* address limit */
   5:      struct task_struct    *task;        /* main task structure */
   6:      struct exec_domain    *exec_domain;    /* execution domain */
   7:      __u32            cpu;        /* cpu */
   8:      __u32            cpu_domain;    /* cpu domain */
   9:      struct cpu_context_save    cpu_context;    /* cpu context */
  10:      __u32            syscall;    /* syscall number */
  11:      __u8            used_cp[16];    /* thread used copro */
  12:      unsigned long        tp_value;
  13:      struct crunch_state    crunchstate;
  14:      union fp_state        fpstate __attribute__((aligned(8)));
  15:      union vfp_state        vfpstate;
  16:  #ifdef CONFIG_ARM_THUMBEE
  17:      unsigned long        thumbee_state;    /* ThumbEE Handler Base register */
  18:  #endif
  19:      struct restart_block    restart_block;
  20:  };

通過上面的操作,暫存器r9中就是這個程序的thread_info結構的起始地址。

  • sys_call_table
entry-common.S (arch\arm\kernel)

   1:      .type    sys_call_table, #object
   2:  ENTRY(sys_call_table)
   3:  #include "calls.S"
   4:  #undef ABI
   5:  #undef OBSOLETE

其中,calls.S的內容如下:

   1:  /*
   2:   *  linux/arch/arm/kernel/calls.S
   3:   *
   4:   *  Copyright (C) 1995-2005 Russell King
   5:   *
   6:   * This program is free software; you can redistribute it and/or modify
   7:   * it under the terms of the GNU General Public License version 2 as
   8:   * published by the Free Software Foundation.
   9:   *
  10:   *  This file is included thrice in entry-common.S
  11:   */
  12:  /* 0 */        CALL(sys_restart_syscall)
  13:          CALL(sys_exit)
  14:          CALL(sys_fork_wrapper)
  15:          CALL(sys_read)
  16:          CALL(sys_write)
  17:  /* 5 */        CALL(sys_open)
  18:          CALL(sys_close)
  19:          CALL(sys_ni_syscall)        /* was sys_waitpid */
  20:          CALL(sys_creat)
  21:          CALL(sys_link)
  22:  /* 10 */    CALL(sys_unlink)
  23:          CALL(sys_execve_wrapper)
  24:          CALL(sys_chdir)
  25:          CALL(OBSOLETE(sys_time))    /* used by libc4 */
  26:          CALL(sys_mknod)
  27:  ......
  28:  /* 355 */    CALL(sys_signalfd4)
  29:          CALL(sys_eventfd2)
  30:          CALL(sys_epoll_create1)
  31:          CALL(sys_dup3)
  32:          CALL(sys_pipe2)
  33:  /* 360 */    CALL(sys_inotify_init1)
  34:          CALL(sys_preadv)
  35:          CALL(sys_pwritev)
  36:  #ifndef syscalls_counted
  37:  .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
  38:  #define syscalls_counted
  39:  #endif
  40:  .rept syscalls_padding
  41:          CALL(sys_ni_syscall)
  42:  .endr

關於這個部分的更多介紹參見:

  • bics    r10, r10, #0xff000000

執行這個操作的時候,r10中存放的是SWI instruction,在我們的例子中就是(a.dis):

image

即:r10 為 0xEF000000

顯然,bics這條指令下面的兩個語句由於條件不成立,無法獲得執行。這條指令的作用是獲得系統呼叫號

可以參考這個手冊,看一下svc執行的格式:

image

可以看到,[23:0]存放的就是svc指令後面的那個立即數,也即系統呼叫號。

不過需要注意的是:我們這裡並沒有這樣做,我們的做法是(a.dis中可以看到):

image

使用的是svc 0,後面跟的並不是系統呼叫號,而是0,這裡把系統呼叫號存放在了暫存器r7中(a.dis中):

image

可以看到,由於使用的sys_open系統呼叫,所以把它的系統呼叫號5存放到了暫存器r7當中

  • ldrcc    pc, [tbl, scno, lsl #2]        @ call sys_* routine

這裡的scno是就是暫存器r7的別名,它的值是sys_open的系統呼叫號5,由於在calls.S中每個系統呼叫標號佔用4個位元組,所以這個將scno的值乘以4然後再加上tbl,tbl是系統呼叫表sys_call_table的基地址。然後就跳入開始執行sys_open了。

asmlinkage long sys_open(const char __user *filename,
                int flags, int mode);

那麼sys_open在哪呢?在核心原始碼中直接搜尋sys_open,無法搜到它的實現程式碼,實際上它是在fs/open.c中實現的:

   1:  SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, int, mode)
   2:  {
   3:      long ret;
   4:   
   5:      if (force_o_largefile())
   6:          flags |= O_LARGEFILE;
   7:   
   8:      ret = do_sys_open(AT_FDCWD, filename, flags, mode);
   9:      /* avoid REGPARM breakage on x86: */
  10:      asmlinkage_protect(3, ret, filename, flags, mode);
  11:      return ret;
  12:  }

其中SYSCALL_DEFINE3是一個巨集:

syscalls.h (include\linux)

#define SYSCALL_DEFINE3(name, ...) SYSCALL_DEFINEx(3, _##name, __VA_ARGS__)

SYSCALL_DEFINEx也是一個巨集:

syscalls.h (include\linux)

#define SYSCALL_DEFINEx(x, sname, ...)                \
    __SYSCALL_DEFINEx(x, sname, __VA_ARGS__)

__SYSCALL_DEFINEx仍然是個巨集:

syscalls.h (include\linux)

#define __SYSCALL_DEFINEx(x, name, ...)                    \
    asmlinkage long sys##name(__SC_DECL##x(__VA_ARGS__))

所以展開後的結果就是:

asmlinkage long sys_open(__SC_DECL3(__VA_ARGS__))

其中,__SC_DECL3定義如下:

syscalls.h (include\linux)

   1:  #define __SC_DECL1(t1, a1)    t1 a1
   2:  #define __SC_DECL2(t2, a2, ...) t2 a2, __SC_DECL1(__VA_ARGS__)
   3:  #define __SC_DECL3(t3, a3, ...) t3 a3, __SC_DECL2(__VA_ARGS__)

所以最終的結果如下:

   1:  asmlinkage long sys_open(const char __user *filename, int flags, int mode)
   2:  {
   3:      long ret;
   4:   
   5:      if (force_o_largefile())
   6:          flags |= O_LARGEFILE;
   7:   
   8:      ret = do_sys_open(AT_FDCWD, filename, flags, mode);
   9:      /* avoid REGPARM breakage on x86: */
  10:      asmlinkage_protect(3, ret, filename, flags, mode);
  11:      return ret;
  12:   
  13:  }

關於sys_open本身的實現這裡就不深入分析了。

接下來看一下返回。

  • adr    lr, ret_fast_syscall        @ return address

當sys_open中return後,便跳入ret_fast_syscall處開始執行:

   1:  /*
   2:   * This is the fast syscall return path.  We do as little as
   3:   * possible here, and this includes saving r0 back into the SVC
   4:   * stack.
   5:   */
   6:  ret_fast_syscall:
   7:   UNWIND(.fnstart    )
   8:   UNWIND(.cantunwind    )
   9:      disable_irq                @ disable interrupts
  10:      
ldr r1, [tsk, #TI_FLAGS] @將thread_info中的flags成員存放到r1中

  11:      tst    r1, #_TIF_WORK_MASK
  12:      bne    fast_work_pending  
  13:   
  14:      /* perform architecture specific actions before user return */
  15:      arch_ret_to_user r1, lr
  16:   
  17:      @ fast_restore_user_regs
  18:      ldr    r1, [sp, #S_OFF + S_PSR]    @ get calling cpsr
  19:      ldr    lr, [sp, #S_OFF + S_PC]!    @ get pc
  20:      msr    spsr_cxsf, r1            @ save in spsr_svc
  21:      ldmdb    sp, {r1 - lr}^            @ get calling r1 - lr
  22:      mov    r0, r0
  23:      add    sp, sp, #S_FRAME_SIZE - S_PC
  24:      movs    pc, lr                @ return & move spsr_svc into cpsr
  25:   UNWIND(.fnend        )
  26:   
  27:  /*
  28:   * Ok, we need to do extra processing, enter the slow path.
  29:   */
  30:  fast_work_pending:
  31:      str    r0, [sp, #S_R0+S_OFF]!        @ returned r0
  32:  work_pending:
  33:      tst    r1, #_TIF_NEED_RESCHED       @判斷是否需要進行程序排程
  34:      bne    work_resched
  35:      tst    r1, #_TIF_SIGPENDING
  36:      beq    no_work_pending
  37:      mov    r0, sp                @ 'regs'
  38:      mov    r2, why                @ 'syscall'
  39:      bl    do_notify_resume
  40:      b    ret_slow_syscall        @ Check work again
  41:   
  42:  
work_resched:

  43:      bl    schedule
  44:  /*
  45:   * "slow" syscall return path.  "why" tells us if this was a real syscall.
  46:   */
  47:  ENTRY(ret_to_user)
  48:  ret_slow_syscall:
  49:      disable_irq                @ disable interrupts
  50:      ldr    r1, [tsk, #TI_FLAGS]
  51:      tst    r1, #_TIF_WORK_MASK
  52:      bne    work_pending
  53:  no_work_pending:
  54:      /* perform architecture specific actions before user return */
  55:      arch_ret_to_user r1, lr
  56:   
  57:      @ slow_restore_user_regs
  58:      ldr    r1, [sp, #S_PSR]        @ get calling cpsr
  59:      ldr    lr, [sp, #S_PC]!        @ get pc
  60:      msr    spsr_cxsf, r1            @ save in spsr_svc
  61:      ldmdb    sp, {r0 - lr}^            @ get calling r0 - lr
  62:      mov    r0, r0
  63:      add    sp, sp, #S_FRAME_SIZE - S_PC
  64:      movs    pc, lr                @ return & move spsr_svc into cpsr
  65:  ENDPROC(ret_to_user)

在返回的時候要看是否要進行程序呼叫。

image

先分析到這裡。