1. 程式人生 > >lua5.3.1 原始碼閱讀記錄(基礎)

lua5.3.1 原始碼閱讀記錄(基礎)

通用資料結構:Tvalue

// lua中所有物件的基本資料結構
union Value {
  GCObject *gc;    /* collectable objects */
  void *p;         /* light userdata */
  int b;           /* booleans */
  lua_CFunction f; /* light C functions */
  lua_Integer i;   /* integer numbers */
  lua_Number n;    /* float numbers */
};
//Tagged Values. This is the basic representation of values in Lua, 這個巨集代表連個資料型別,以個是上面的結構一個是整數,者連個才合成了lua中的基本值;
#define TValuefields Value value_; int tt_ struct lua_TValue { TValuefields; }; typedef struct lua_TValue TValue;

上面的定義可以看成,TValue將是lua中的基本資料結構;

基本型別定義:basic types

下面是最基本的型別定義巨集:

/*
** basic types
*/
#define LUA_TNONE       (-1)
#define LUA_TNIL        0
#define LUA_TBOOLEAN        1
#define LUA_TLIGHTUSERDATA 2 #define LUA_TNUMBER 3 #define LUA_TSTRING 4 #define LUA_TTABLE 5 #define LUA_TFUNCTION 6 #define LUA_TUSERDATA 7 #define LUA_TTHREAD 8

函式型別在lua中被稱為第一類, 其定義如下:
高位代表型別的變體,低位代表型別

/*
** LUA_TFUNCTION variants:
** 0 - Lua function
** 1 - light C function
** 2 - regular C function (closure)
*/
/* Variant tags for functions */ #define LUA_TLCL (LUA_TFUNCTION | (0 << 4)) /* Lua closure */ #define LUA_TLCF (LUA_TFUNCTION | (1 << 4)) /* light C function */ #define LUA_TCCL (LUA_TFUNCTION | (2 << 4)) /* C closure */

Table資料結構

lua的雜湊表有一個高效的實現, 幾乎可以任務操作雜湊表的時間複雜度為常數;下面是lua原始碼中對table的介紹:

/*
** Implementation of tables (aka arrays, objects, or hash tables).
** Tables keep its elements in two parts: an array part and a hash part.
** Non-negative integer keys are all candidates to be kept in the array
** part. The actual size of the array is the largest 'n' such that
** more than half the slots between 1 and n are in use.
** Hash uses a mix of chained scatter table with Brent's variation.
** A main invariant of these tables is that, if an element is not
** in its main position (i.e. the 'original' position that its hash gives
** to it), then the colliding element is in its own main position.
** Hence even when the load factor reaches 100%, performance remains good.
*/

下面就是其論文”The Implementation of lua”中給出的table結構示意圖:
這裡寫圖片描述

總體意識就是: 整數為鍵的pair是優先儲存在陣列中,table根據內容自動並且動態的對這兩部分進行是的的分配, 圖中的string是儲存的hash中;

table的資料結構如下:

// hash表node的鍵結構
typedef union TKey {
  struct {
    TValuefields;
    int next;  /* for chaining (offset for next node) */
  } nk;
  TValue tvk;
} TKey;

// hash連結串列節點
typedef struct Node {
  TValue i_val;
  TKey i_key;
} Node;
// 表中包含有hash表node(長度lsizenode)和陣列array(長度sizearray)兩部分
typedef struct Table {
  CommonHeader;  // 公共頭部
  lu_byte flags;  /* 1<<p means tagmethod(p) is not present */
  lu_byte lsizenode;  /* log2 of size of 'node' array */
  unsigned int sizearray;  /* size of 'array' array */
  TValue *array;  /* array part */
  Node *node;
  Node *lastfree;  /* any free position is before this position */
  struct Table *metatable;
  GCObject *gclist;
} Table;

table讀取

論文說明:
這裡寫圖片描述
即非負整數鍵都有可能儲存在array部分, hash 使用了一個混合chained scatter table(鏈狀發散表)和Brent’s變數型別的結構; (鏈狀發散表就是指hash表頭節點指向一個連結串列, 連結串列中的鍵相同)
表讀取函式如下, 會根據具體不同的型別呼叫不同的雜湊查詢方法,比如int則是優先在array中查詢:

/*
** main search function
*/
const TValue *luaH_get (Table *t, const TValue *key) {
  switch (ttype(key)) {
    case LUA_TSHRSTR: return luaH_getstr(t, tsvalue(key));
    case LUA_TNUMINT: return luaH_getint(t, ivalue(key));
    case LUA_TNIL: return luaO_nilobject;
    case LUA_TNUMFLT: {
      lua_Integer k;
      if (luaV_tointeger(key, &k, 0)) /* index is int? */
        return luaH_getint(t, k);  /* use specialized version */
      /* else... */
    }  /* FALLTHROUGH */
    default: {
      Node *n = mainposition(t, key);
      for (;;) {  /* check whether 'key' is somewhere in the chain */
        if (luaV_rawequalobj(gkey(n), key))
          return gval(n);  /* that's it */
        else {
          int nx = gnext(n);
          if (nx == 0) break;
          n += nx;
        }
      };
      return luaO_nilobject;
    }
  }
}

這裡有分short string, int, nil, double幾種查詢,如下面是short string:


/*
** search function for short strings
*/
const TValue *luaH_getshortstr (Table *t, TString *key) {
  Node *n = hashstr(t, key); // 通過鍵查詢到頭節點
  lua_assert(key->tt == LUA_TSHRSTR);
  for (;;) {  /* check whether 'key' is somewhere in the chain */
    const TValue *k = gkey(n);
    if (ttisshrstring(k) && eqshrstr(tsvalue(k), key))
      return gval(n);  /* that's it */
    else {
      int nx = gnext(n);
      if (nx == 0) break;
      n += nx;
    }
  };
  return luaO_nilobject;
}

下面是int的獲取方式, 可以看出, 當超出陣列範圍時就會查詢hash表:

/*
** search function for integers
*/
const TValue *luaH_getint (Table *t, lua_Integer key) {
  /* (1 <= key && key <= t->sizearray) */
  if (l_castS2U(key - 1) < t->sizearray)
    return &t->array[key - 1];
  else {
    Node *n = hashint(t, key);
    for (;;) {  /* check whether 'key' is somewhere in the chain */
      if (ttisinteger(gkey(n)) && ivalue(gkey(n)) == key)
        return gval(n);  /* that's it */
      else {
        int nx = gnext(n);
        if (nx == 0) break;
        n += nx;
      }
    };
    return luaO_nilobject;
  }
}

在論文中經常提到mainpostion, 這個是指array中的位置或hash表鍵KEY對應的連結串列的頭節點;

/*
** returns the 'main' position of an element in a table (that is, the index
** of its hash value)
*/
static Node *mainposition (const Table *t, const TValue *key) {
/*...*/
}

table寫入

/*
** beware: when using this function you probably need to check a GC
** barrier and invalidate the TM cache.
*/
TValue *luaH_set (lua_State *L, Table *t, const TValue *key) {
  const TValue *p = luaH_get(t, key);
  if (p != luaO_nilobject)
    return cast(TValue *, p);
  else return luaH_newkey(L, t, key);
}

重點在luaH_newkey函式裡,

/*
** inserts a new key into a hash table; first, check whether key's main
** position is free. If not, check whether colliding node is in its main
** position or not: if it is not, move colliding node to an empty place and
** put new key in its main position; otherwise (colliding node is in its main
** position), new key goes to an empty position.
檢查mainpostion是不是衝突節點, 如果不是則給衝突節點重新分配記憶體, 並把自己寫入mainpos;如果是則分配新空間把自己寫人(這裡不是特別理解)
*/
TValue *luaH_newkey (lua_State *L, Table *t, const TValue *key) {
  Node *mp;
  TValue aux;
  if (ttisnil(key)) luaG_runerror(L, "table index is nil");
  else if (ttisfloat(key)) {
    lua_Integer k;
    if (luaV_tointeger(key, &k, 0)) {  /* index is int? */  // float轉換為int
      setivalue(&aux, k);
      key = &aux;  /* insert it as an integer */
    }
    else if (luai_numisnan(fltvalue(key)))
      luaG_runerror(L, "table index is NaN");
  }
  mp = mainposition(t, key);
  if (!ttisnil(gval(mp)) || isdummy(mp)) {  /* main position is taken? */
    Node *othern;
    Node *f = getfreepos(t);  /* get a free place */ // 通過lastfree域來查詢新空閒節點
    if (f == NULL) {  /* cannot find a free place? */
      rehash(L, t, key);  /* grow table */    // rehash過程
      /* whatever called 'newkey' takes care of TM cache and GC barrier */
      return luaH_set(L, t, key);  /* insert key into grown table */
    }
    lua_assert(!isdummy(f));
    othern = mainposition(t, gkey(mp));
    /*.......*/
}

看rehash過程:

/*
** nums[i] = number of keys 'k' where 2^(i - 1) < k <= 2^i
*/
static void rehash (lua_State *L, Table *t, const TValue *ek) {
  unsigned int asize;  /* optimal size for array part */  // 陣列中個數的最優個數
  unsigned int na;  /* number of keys in the array part */  // KEY個數
  unsigned int nums[MAXABITS + 1];
  int i;
  int totaluse;
  for (i = 0; i <= MAXABITS; i++) nums[i] = 0;  /* reset counts */
  // numusearray將array分為2^(i-1)~2^i個這樣的片段來統計KEY的個數, 沒有將所有的整數都存放與陣列中, 而是將多於一半的整數KEY儲存到陣列;
  na = numusearray(t, nums);  /* count keys in array part */
  totaluse = na;  /* all those keys are integer keys */
  totaluse += numusehash(t, nums, &na);  /* count keys in hash part */
  /* count extra key */
  na += countint(ek, nums);
  totaluse++;
  /* compute new size for array part */
  // 下面的函式保證了空間一半以上被利用
  asize = computesizes(nums, &na);
  /* resize the table to new computed sizes */
  luaH_resize(L, t, asize, totaluse - na);
}

// 注: 陣列只會增大, 而hash會增大或減小

TString

字串是存放於全域性hash表裡, 存放內部化字串即短字串時也可能會需要將雜湊連結串列擴大;

/*
** Header for string value; string bytes follow the end of this structure
** (aligned according to 'UTString'; see next).
字串的頭部, 資料跟隨這個頭部
*/
typedef struct TString {
  CommonHeader;
  lu_byte extra;  /* reserved words for short strings; "has hash" for longs */
  lu_byte shrlen;  /* length for short strings */
  unsigned int hash;
  union {
    size_t lnglen;  /* length for long strings */
    struct TString *hnext;  /* linked list for hash table */
  } u;
} TString;

UserData

儲存形式上和字串相同, 但不追究’/0’

/*
** Header for userdata; memory area follows the end of this structure
** (aligned according to 'UUdata'; see next).
*/
typedef struct Udata {
  CommonHeader;
  lu_byte ttuv_;  /* user value's tag */
  struct Table *metatable;
  size_t len;  /* number of bytes */
  union Value user_;  /* user value */
} Udata;

棧和呼叫鏈

lua執行緒資料結構如下, 每個執行緒裡都有一個指向全域性的共享lua狀態:

/*
** 'per thread' state
*/
struct lua_State {
  CommonHeader;
  lu_byte status;
  StkId top;  /* first free slot in the stack */
  global_State *l_G;   // 所有執行緒共享的全域性狀態, 真正的lua虛擬機器
 /**....**/
  StkId stack_last;  /* last free slot in the stack */
  StkId stack;  /* stack base */
  UpVal *openupval;  /* list of open upvalues in this stack */  // 指向棧的openupval
  GCObject *gclist;    /// 垃圾回收
 /**....**/
};

lua_State的所有的lua C API都是圍繞這個狀態機來改變狀態的, 獨立線上程棧裡操作;
而全域性共享的真正虛擬機器是如下說明的:

/*
** 'global state', shared by all threads of this state
*/
typedef struct global_State {
/**.....**/
}

狀態機的棧資訊資料結構StkId
看到下面的定義可以知道, StkId就是一個TValuefields巨集定義的結構, 該結構包含Value value_;int tt_ 兩部分, value_是聯合值,Value型別, tt_是說明聯合物件的型別; 由上面Value的結構可知, 它是一個由{垃圾回收型別;void*的light userdata; booleans; functions; integer; number;} 這些型別組合的聯合型別, 所以需要一個tt_來說明當前的TValue到底是什麼型別;

typedef TValue *StkId;  /* index to stack elements */

#define BASIC_STACK_SIZE        (2*LUA_MINSTACK)  // 棧大小

/* minimum Lua stack available to a C function */
 #define LUA_MINSTACK   20

棧的初始化

資料棧和呼叫棧共享了lua的執行緒, 同一個虛擬機器中不同執行緒共享了global_State;

// 棧這邊程式碼的還沒有仔細研究
static void stack_init (lua_State *L1, lua_State *L) {
  int i; CallInfo *ci;   // CallInfo是當前函式的呼叫棧, 以雙向連結串列的形式存在與執行緒物件裡 
  /* initialize stack array */
  L1->stack = luaM_newvector(L, BASIC_STACK_SIZE, TValue);  // 初始化長度
  L1->stacksize = BASIC_STACK_SIZE;
  for (i = 0; i < BASIC_STACK_SIZE; i++)
    setnilvalue(L1->stack + i);  /* erase new stack */
  L1->top = L1->stack;
  L1->stack_last = L1->stack + L1->stacksize - EXTRA_STACK;
  /* initialize first ci */
  ci = &L1->base_ci;
  ci->next = ci->previous = NULL;
  ci->callstatus = 0;
  ci->func = L1->top;
  setnilvalue(L1->top++);  /* 'function' entry for this 'ci' */
  ci->top = L1->top + LUA_MINSTACK;
  L1->ci = ci;
}

執行緒

資料棧和呼叫棧構成了lua的執行緒, 同一個虛擬機器中不同執行緒共享了global_State
參考lua_newthread的建立過程:

// lua_newstate建立的是lua虛擬機器
LUA_API lua_State *lua_newstate (lua_Alloc f, void *ud) {
  /***.....**/
}
// lua_newthread是執行緒
LUA_API lua_State *lua_newthread (lua_State *L) {
  global_State *g = G(L);
  lua_State *L1;
  lua_lock(L);
  luaC_checkGC(L);
  /* create new thread */
  // LX: thread state + extra space
  L1 = &cast(LX *, luaM_newobject(L, LUA_TTHREAD, sizeof(LX)))->l;
  L1->marked = luaC_white(g);
  L1->tt = LUA_TTHREAD;  // 型別
  /* link it on list 'allgc' */  // 掛到垃圾回收上
  L1->next = g->allgc;
  g->allgc = obj2gco(L1);
  /* anchor it on L stack */
  setthvalue(L, L->top, L1);
  api_incr_top(L);
  preinit_thread(L1, g);
  L1->hookmask = L->hookmask;
  L1->basehookcount = L->basehookcount;
  L1->hook = L->hook;
  resethookcount(L1);
  /* initialize L1 extra space */
  memcpy(lua_getextraspace(L1), lua_getextraspace(g->mainthread),
         LUA_EXTRASPACE);
  luai_userstatethread(L, L1);
  stack_init(L1, L);  /* init stack */
  lua_unlock(L);
  return L1;
}

lua C API

一般的如lua_pushstring之類的理解不難, 現在看一個lua_pushvalue的程式碼:

LUA_API void lua_pushvalue (lua_State *L, int idx) {
  lua_lock(L);
  setobj2s(L, L->top, index2addr(L, idx)); // 頂部壓值
  api_incr_top(L);
  lua_unlock(L);
}
// index2addr的實現
static TValue *index2addr (lua_State *L, int idx) {
  CallInfo *ci = L->ci;   // 呼叫棧
  if (idx > 0) {  // 正索引
    TValue *o = ci->func + idx;  // 被呼叫函式的棧底+idx索引找到對應的值
    api_check(L, idx <= ci->top - (ci->func + 1), "unacceptable index");
    if (o >= L->top) return NONVALIDVALUE;
    else return o;
  }
  else if (!ispseudo(idx)) {  /* negative index */   // 負索引
    api_check(L, idx != 0 && -idx <= L->top - (ci->func + 1), "invalid index");
    return L->top + idx;
  }
  else if (idx == LUA_REGISTRYINDEX)   // 全域性
    return &G(L)->l_registry;
  else {  /* upvalues */
    idx = LUA_REGISTRYINDEX - idx;
    api_check(L, idx <= MAXUPVAL + 1, "upvalue index too large");
    if (ttislcf(ci->func))  /* light C function? */
      return NONVALIDVALUE;  /* it has no upvalues */
    else {
      CClosure *func = clCvalue(ci->func);
      return (idx <= func->nupvalues) ? &func->upvalue[idx-1] : NONVALIDVALUE;
    }
  }
}