1. 程式人生 > >Lua虛擬機器之語法分析(二)

Lua虛擬機器之語法分析(二)

      最近實在是忙,專案就快要釋出了,加班加點在所難免。繼上一篇關於lua基本資料型別的簡單分析後,我將繼續寫閱讀lua虛擬機器的原始碼筆記,這是第一次接觸虛擬機器工作原理,lua虛擬機器程式碼量不大,是個很好的學習的例子,應當堅持下去。關於語法分析,我覺得只需要弄清楚從哪分析,怎麼分析以及最終的生成結果就差不多了。

      當通過呼叫lstate.c:lua_newstate()方法生成一個新的lua_State時,其內部又會呼叫llex.c:luaX_init方法,該方法的作用是生成lua的關鍵字(保留字)到global_State的字串表中(stringtable,以雜湊表的形式儲存),關於lua的保留字,在llex.c中可看到下面這段:

/* ORDER RESERVED */
static const char *const luaX_tokens [] = {
    "and", "break", "do", "else", "elseif",
    "end", "false", "for", "function", "goto", "if",
    "in", "local", "nil", "not", "or", "repeat",
    "return", "then", "true", "until", "while",
    "..", "...", "==", ">=", "<=", "~=", "::", "<eof>",
    "<number>", "<name>", "<string>"
};
      而真正讓虛擬機器開始分析原始碼,則是通過lapi.c:lua_load函式,lauxlib.c中則封裝了對lua_load的呼叫,其中包括從檔案載入原始碼的分析lauxlib.c:luaL_loadfilex,以及從記憶體載入lauxlib.c:luaL_loadbufferex。在lua_load中,除了必要的初始化,比如生成一個ZIO物件(流物件)負責處理檔案或字串的輸入,還呼叫了ldo.c:luaD_protectedparser函式,而在這個函式中,又最終呼叫了lparser.c:luaY_parser函式,好了,虛擬機器開始進入原始碼的分析階段了,程式碼如下:
Closure *luaY_parser (lua_State *L, ZIO *z, Mbuffer *buff,
                      Dyndata *dyd, const char *name, int firstchar) {
  LexState lexstate; // 掃描狀態機,單詞讀取、步進等操作
  FuncState funcstate; // 函式狀態機,每輸入一個原始碼檔案或者是一段原始碼字串,都將有一個狀態機對應
  Closure *cl = luaF_newLclosure(L, 1);  /* create main closure */
  /* anchor closure (to avoid being collected) */
  setclLvalue(L, L->top, cl);
  incr_top(L);
  funcstate.f = cl->l.p = luaF_newproto(L);
  funcstate.f->source = luaS_new(L, name);  /* create and anchor TString */
  lexstate.buff = buff;
  lexstate.dyd = dyd;
  dyd->actvar.n = dyd->gt.n = dyd->label.n = 0;
  luaX_setinput(L, &lexstate, z, funcstate.f->source, firstchar);
  mainfunc(&lexstate, &funcstate);
  lua_assert(!funcstate.prev && funcstate.nups == 1 && !lexstate.fs);
  /* all scopes should be correctly finished */
  lua_assert(dyd->actvar.n == 0 && dyd->gt.n == 0 && dyd->label.n == 0);
  return cl;  /* it's on the stack too */ // 返回的閉包,將儲存在lua_State當前的棧頂
}
      關於LexState物件,其責任就是記錄當前的行號、符號、期望的下一個符號、讀取字串或者數字,當然還有很多其他的資訊,具體可以參看llex.h中的定義。而FuncState中,則儲存了函式的原型(Proto),包含的元素,所在的塊,下一個位元組碼的地址,區域性變數個數等,具體定義如下:(lparser.h)
/* state needed to generate code for a given function */
typedef struct FuncState {
  Proto *f;  /* current function header */
  Table *h;  /* table to find (and reuse) elements in `k' */
  struct FuncState *prev;  /* enclosing function */
  struct LexState *ls;  /* lexical state */
  struct BlockCnt *bl;  /* chain of current blocks */
  int pc;  /* next position to code (equivalent to `ncode') */
  int lasttarget;   /* 'label' of last 'jump label' */
  int jpc;  /* list of pending jumps to `pc' */
  int nk;  /* number of elements in `k' */
  int np;  /* number of elements in `p' */
  int firstlocal;  /* index of first local var (in Dyndata array) */
  short nlocvars;  /* number of elements in 'f->locvars' */
  lu_byte nactvar;  /* number of active local variables */
  lu_byte nups;  /* number of upvalues */
  lu_byte freereg;  /* first free register */
} FuncState;
      還有幾個比較重要的結構,分別是閉包與原型,定義分別如下:
/*
** Function Prototypes
*/
typedef struct Proto {
  CommonHeader;
  TValue *k;  /* constants used by the function */
  Instruction *code; // 位元組碼陣列
  struct Proto **p;  /* functions defined inside the function */
  int *lineinfo;  /* map from opcodes to source lines (debug information) */
  LocVar *locvars;  /* information about local variables (debug information) */
  Upvaldesc *upvalues;  /* upvalue information */
  union Closure *cache;  /* last created closure with this prototype */
  TString  *source;  /* used for debug information */
  int sizeupvalues;  /* size of 'upvalues' */
  int sizek;  /* size of `k' */
  int sizecode; // 位元組碼陣列大小
  int sizelineinfo;
  int sizep;  /* size of `p' */
  int sizelocvars; // 區域性變數個數
  int linedefined;
  int lastlinedefined;
  GCObject *gclist;
  lu_byte numparams;  /* number of fixed parameters */
  lu_byte is_vararg;
  lu_byte maxstacksize;  /* maximum stack used by this function */
} Proto;

/*
** Lua Upvalues
*/
typedef struct UpVal {
  CommonHeader;
  TValue *v;  /* points to stack or to its own value */
  union {
    TValue value;  /* the value (when closed) */
    struct {  /* double linked list (when open) */
      struct UpVal *prev;
      struct UpVal *next;
    } l;
  } u;
} UpVal;

/*
** Closures
*/

#define ClosureHeader \
	CommonHeader; lu_byte nupvalues; GCObject *gclist

typedef struct CClosure {
  ClosureHeader;
  lua_CFunction f;
  TValue upvalue[1];  /* list of upvalues */
} CClosure;

typedef struct LClosure {
  ClosureHeader;
  struct Proto *p;
  UpVal *upvals[1];  /* list of upvalues */
} LClosure;

typedef union Closure {
  CClosure c;
  LClosure l;
} Closure;
      UpValue可以簡單的理解為函式外部定義的區域性變數,如下所示的a就是upvalue
function f()
    local a = 0;
    return function() return a+1 end;
end
      還有個概念需要稍微關注,那就是block,直接翻譯成程式碼塊好了,什麼是一個程式碼塊以及怎麼表示一個程式碼塊,如下:
if   xxxx  then 
   abcd.... // 這部分可以算程式碼塊
end

//程式碼塊的定義
typedef struct BlockCnt {
  struct BlockCnt *previous;  /* chain */
  short firstlabel;  /* index of first label in this block */
  short firstgoto;  /* index of first pending goto in this block */
  lu_byte nactvar;  /* # active locals outside the block */
  lu_byte upval;  /* true if some variable in the block is an upvalue */
  lu_byte isloop;  /* true if `block' is a loop */
} BlockCnt;
      閱讀lparser.c時,還發現有兩種東西需要去理解,一個稱為exp即表示式,一個是statement,表示式可能是簡單的true或false,也可能是~a這種帶一元操作符的,還有可能是a+b這種,也還有形如{a=x,b=x}被稱為構造(constructor)的表示式,複雜的表示式,也是由簡單的表示式所組成的,以下是lua中簡單表示式的定義:
static void simpleexp (LexState *ls, expdesc *v) {
  /* simpleexp -> NUMBER | STRING | NIL | TRUE | FALSE | ... |
                  constructor | FUNCTION body | suffixedexp */
  switch (ls->t.token) {
    case TK_NUMBER: {
      init_exp(v, VKNUM, 0);
      v->u.nval = ls->t.seminfo.r;
      break;
    }
    case TK_STRING: {
      codestring(ls, v, ls->t.seminfo.ts);
      break;
    }
    case TK_NIL: {
      init_exp(v, VNIL, 0);
      break;
    }
    case TK_TRUE: {
      init_exp(v, VTRUE, 0);
      break;
    }
    case TK_FALSE: {
      init_exp(v, VFALSE, 0);
      break;
    }
    case TK_DOTS: {  /* vararg */ // 點操作符
      FuncState *fs = ls->fs;
      check_condition(ls, fs->f->is_vararg,
                      "cannot use " LUA_QL("...") " outside a vararg function");
      init_exp(v, VVARARG, luaK_codeABC(fs, OP_VARARG, 0, 1, 0));
      break;
    }
    case '{': {  /* constructor */
      constructor(ls, v);
      return;
    }
    case TK_FUNCTION: {
      luaX_next(ls);
      body(ls, v, 0, ls->linenumber);
      return;
    }
    default: {
      suffixedexp(ls, v);
      return;
    }
  }
  luaX_next(ls);
}
      而statement則可以看lparser.c:statement()函式,通過該函式反過來又可以對lua的語法有個大概的瞭解與印象加深,如下:
static void statement (LexState *ls) {
  int line = ls->linenumber;  /* may be needed for error messages */
  enterlevel(ls);
  switch (ls->t.token) {
    case ';': {  /* stat -> ';' (empty statement) */
      luaX_next(ls);  /* skip ';' */
      break;
    }
    case TK_IF: {  /* stat -> ifstat */
      ifstat(ls, line);
      break;
    }
    case TK_WHILE: {  /* stat -> whilestat */
      whilestat(ls, line);
      break;
    }
    case TK_DO: {  /* stat -> DO block END */
      luaX_next(ls);  /* skip DO */
      block(ls);
      check_match(ls, TK_END, TK_DO, line);
      break;
    }
    case TK_FOR: {  /* stat -> forstat */
      forstat(ls, line);
      break;
    }
    case TK_REPEAT: {  /* stat -> repeatstat */
      repeatstat(ls, line);
      break;
    }
    case TK_FUNCTION: {  /* stat -> funcstat */
      funcstat(ls, line);
      break;
    }
    case TK_LOCAL: {  /* stat -> localstat */
      luaX_next(ls);  /* skip LOCAL */
      if (testnext(ls, TK_FUNCTION))  /* local function? */
        localfunc(ls);
      else
        localstat(ls);
      break;
    }
    case TK_DBCOLON: {  /* stat -> label */
      luaX_next(ls);  /* skip double colon */
      labelstat(ls, str_checkname(ls), line);
      break;
    }
    case TK_RETURN: {  /* stat -> retstat */
      luaX_next(ls);  /* skip RETURN */
      retstat(ls);
      break;
    }
    case TK_BREAK:   /* stat -> breakstat */
    case TK_GOTO: {  /* stat -> 'goto' NAME */
      gotostat(ls, luaK_jump(ls->fs));
      break;
    }
    default: {  /* stat -> func | assignment */
      exprstat(ls);
      break;
    }
  }
  lua_assert(ls->fs->f->maxstacksize >= ls->fs->freereg &&
             ls->fs->freereg >= ls->fs->nactvar);
  ls->fs->freereg = ls->fs->nactvar;  /* free registers */
  leavelevel(ls);
}
      exp也好,statement也好,這種程式碼的組織形式是非常容易掌握的,就不再對case裡的各種exp或者stat進一步分析了,可能還忽略了一點,那就是luaX_next是怎麼保證工作正確的呢,如下:llex.c
void luaX_next (LexState *ls) {
  ls->lastline = ls->linenumber;
  if (ls->lookahead.token != TK_EOS) {  /* is there a look-ahead token? */
    ls->t = ls->lookahead;  /* use this one */
    ls->lookahead.token = TK_EOS;  /* and discharge it */
  }
  else
    ls->t.token = llex(ls, &ls->t.seminfo);  /* read next token */
}
      llex.c:llex()函式就不再列出了,一個大迴圈加一個大switch,閱讀沒什麼難度。還有最後一個問題,這樣分析的最後儲存的結果是什麼,其實在lpaerse.c:luaY_parser函式中就已經知道了,那就是FuncState,表示式被翻成變數與位元組碼,存入Proto中。