[swarthmore cs75] Compiler 4 – Diamondback
課程回顧
Swarthmore學院16年開的編譯系統課,總共10次大作業。本隨筆記錄了相關的課堂筆記以及第6次大作業。
- 函式宣告
增加函式宣告、函式呼叫的抽象語法;在轉換成anf之前還要檢查函式宣告和呼叫是否正確。
well_formed函式分別檢查Program中的的函式宣告列表ds和main是否有錯誤。
well_formed_decl函式還需要檢查函式體是否正確(參考下圖右邊第一個遞迴的例子)。
well_formed_expr函式,檢查expr中呼叫的函式是否已經定義,接著遞迴呼叫well_formed_expr檢查引數(參考下圖右邊第二例子)。
anf_p將Program轉換為anf文法(參考下圖fact函式):首先使用anf處理main,接下來用anf_d處理函式宣告,anf_d中函式體需要遞迴處理。
acompile_p為Program生成彙編程式碼:acompile_decl為函式宣告生成彙編程式碼,acompile_aexpr為main生成彙編程式碼。
acompile_cexpr為函式呼叫生成彙編程式碼。
acompile_decl為函式體生成彙編程式碼(目前只支援一個引數),包括初始化(如提前分配空間)、遞迴處理函式體等。
下圖為函式呼叫時棧幀的情況,綠色是Caller的職責,紅色是Callee的職責(儲存old ebp,申請空間,恢復esp/ebp等)。
-
尾遞迴
普通遞迴vs.尾遞迴:普通遞迴的棧空間使用情況先是達到一個峰值,然後逐漸減小;而尾遞迴的空間複雜度始終為O(1)。
tail position vs. non tail position:為了能夠使用尾遞迴優化彙編程式碼,需要確定哪些位置是tail position(一旦該位置表示式求值完成,整個表示式就完成了求值)。
在遞迴判斷tail position的時候,需要注意如果整個表示式是另一個表示式的子表示式,而且這個子表示式的位置不是tail position的時候,這個子表示式的位置則不是tail position。
如:let ans = if ... else ... in k(ans + x),這個整個if表示式的位置就不能算是tail position。
如:let ans = (let x = 10 in x) in ans
Callee的職責:
1. 使用push ebp儲存old ebp。
2. 使用mov ebp, esp更新ebp。
3. 為local vars提前分配儲存空間。
Caller的職責:
1. 將引數的值[esp-4]移動到eax中。
2. 使用eax覆蓋原來的引數[ebp+8]。
3. 將esp指向old ebp。
4. pop ebp,恢復ebp並將esp+4(指向返回地址)。
5. 使用jmp跳轉到f函式體(使用call會將返回地址再次壓棧)。
tail call optimization & proper tail calls
兩個引數的情況:如下圖,z儲存在[ebp+12],w儲存在[ebp+8],如果尾遞迴呼叫f(z, y),需要將這兩個位置的值進行更新。
多個引數的情況:
使用c-stack convention:呼叫者負責將引數格式,引數,返回指標壓入棧中。
使用functional language convention:呼叫者壓入返回指標;被呼叫者負責管理引數。
- α-renaming
為了能夠使轉換成anf的變數名不產生混淆,需要對變數進行重新命名。
原始碼:
let x = 10, y = (let x = 5 in x) + x in y
ANF:
let x = 10 in let x = 5 in y = x + x in y
實現α-renaming:
程式設計作業
- 具體語法
<program> := | <decls> <expr> | <expr> <decls> := | <decl> | <decl> <decls> <decl> := | def <identifier>(<ids>): <expr> | def <identifier>(): <expr> <ids> := | <identifier> | <identifier> , <ids> <expr> := | let <bindings> in <expr> | if <expr>: <expr> else: <expr> | <binop-expr> <binop-expr> := | <identifier> | <number> | true | false | add1(<expr>) | sub1(<expr>) | isnum(<expr>) | isbool(<expr>) | print(<expr>) | <identifier>(<exprs>) | <identifier>() | <expr> + <expr> | <expr> - <expr> | <expr> * <expr> | <expr> < <expr> | <expr> > <expr> | <expr> == <expr> | ( <expr> ) <exprs> := | <expr> | <expr> , <exprs> <bindings> := | <identifier> = <expr> | <identifier> = <expr>, <bindings>
- 抽象語法
type prim1 = | Add1 | Sub1 | Print | IsNum | IsBool type prim2 = | Plus | Minus | Times | Less | Greater | Equal type expr = | ELet of (string * expr) list * expr | EPrim1 of prim1 * expr | EPrim2 of prim2 * expr * expr | EApp of string * expr list | EIf of expr * expr * expr | ENumber of int | EBool of bool | EId of string type decl = | DFun of string * string list * expr type program = | Program of decl list * expr type immexpr = | ImmNumber of int | ImmBool of bool | ImmId of string and cexpr = | CPrim1 of prim1 * immexpr | CPrim2 of prim2 * immexpr * immexpr | CApp of string * immexpr list | CIf of immexpr * aexpr * aexpr | CImmExpr of immexpr and aexpr = | ALet of string * cexpr * aexpr | ACExpr of cexpr and adecl = | ADFun of string * string list * aexpr and aprogram = | AProgram of adecl list * aexpr
main.c
#include <stdio.h> #include <stdlib.h> #include <string.h> const int ERR_NOT_NUMBER = 1; const int ERR_NOT_BOOLEAN = 2; const int ERR_OVERFLOW = 3; extern int our_code_starts_here() asm("our_code_starts_here"); extern int print(int val) asm("print"); extern void error(int errCode, int val) asm("error"); int print(int val) { if(val & 0x00000001 ^ 0x00000001) { printf("%d\n", val >> 1); } else if(val == 0xFFFFFFFF) { printf("true\n"); } else if(val == 0x7FFFFFFF) { printf("false\n"); } else { printf("Unknown value: %#010x\n", val); } return val; } /* Copy over any error-detection functions here */ void error(int errCode, int val) { if (errCode == ERR_NOT_NUMBER) { fprintf(stderr, "Expected number, but got %010x\n", val); } else if (errCode == ERR_NOT_BOOLEAN) { fprintf(stderr, "Expected boolean, but got %010x\n", val); } else if (errCode == ERR_OVERFLOW) { fprintf(stderr, "Error: arithemetic overflow"); } exit(errCode); } // main should remain unchanged int main(int argc, char** argv) { int result = our_code_starts_here(); print(result); return 0; }
compile.ml
let rec well_formed_e (e : expr) (ds : decl list) (env : bool envt) : string list = (* FILL: you need to implement this *) match e with | EApp(fname, args) -> let found = find_decl ds fname in let error = match found with | None -> [sprintf "Function %s is not defined" fname] | Some(DFun(_, params, _)) -> let expected = List.length params in let actual = List.length args in if expected = actual then [] else [sprintf "Arity mismatch on call to %s (expected %d arguments, got %d)" fname expected actual] in error @ (List.flatten (List.map (fun arg -> well_formed_e arg ds env) args)) | EPrim2(_, left, right) -> (well_formed_e left ds env) @ (well_formed_e right ds env) | EPrim1(_, e) -> well_formed_e e ds env | EIf(cond, thn, els) -> (well_formed_e cond ds env) @ (well_formed_e thn ds env) @ (well_formed_e els ds env) | ELet([], body) -> well_formed_e body ds env | ELet((name, value)::rest, body) -> let found = find rest name in let error = match found with | None -> [] | Some(_) -> [sprintf "Duplicate binding %s" name] in error @ (well_formed_e value ds env) @ (well_formed_e (ELet(rest, body)) ds ((name, true)::env)) | ENumber(n) -> if n >= -1073741824 && n <= 1073741823 then [] else [sprintf "Number precision too large %d" n] | EBool(b) -> [] | EId(x) -> let found = find env x in let error = match found with | None -> [sprintf "Unbound identifier %s" x] | Some(_) -> [] in error let well_formed_d (d : decl) (ds : decl list) : string list = (* FILL: you need to implement this *) match d with | DFun(fname, args, body) -> let env = List.map (fun arg -> (arg, true)) args in let found = find_dup args in let error = match found with | None -> [] | Some(x) -> [sprintf "Function %s has duplicate parameter %s" fname x] in error @ (well_formed_e body ds env) let well_formed_p (p : program) : string list = match p with | Program(ds, maine) -> (* FILL: you may need to add more errors beyond those found from the declarations and the main expression *) let found = find_dup (List.map (fun (DFun(fname, _, _)) -> fname) ds) in let error = match found with | None -> [] | Some(fname) -> [sprintf "Duplicate function %s" fname] in (well_formed_e maine ds []) @ (List.flatten (List.map (fun d -> well_formed_d d ds) ds)) @ error
彙編程式碼格式
;; extern and global stuff section .text ... fun_decl1: ;; code for fun_decl1, including stack management fun_decl2: ;; code for fun_decl2, including stack management ... our_code_starts_here: ;; main entrypoint, as before, with stack management ;; errors, as before internal_error_non_number: ...
let compile_to_string prog = match well_formed_p prog with | x::rest -> (* NOTE: This is where errors are reported, by concatenating them all together *) let errstr = (List.fold_left (fun x y -> x ^ "\n" ^ y) "" (x::rest)) in failwith errstr | [] -> let anfed = (anf_program prog) in (* FILL: You need to get from ANFed program to full assembly structure this time, possibly by starting from a previous lab's code *) let preclude = sprintf "section .text extern print extern error global our_code_starts_here" in let decls = match anfed with | AProgram(decls, _) -> decls in let main = match anfed with | AProgram(_, main) -> main in let stack_setup = [ ILabel("our_code_starts_here"); IPush(Reg(EBP)); IMov(Reg(EBP), Reg(ESP)); ISub(Reg(ESP), Const(4 * count_vars main)); ] in let postlude = [ IMov(Reg(ESP), Reg(EBP)); IPop(Reg(EBP)); IRet; ILabel("error_not_number"); IPush(Reg(EAX)); IPush(Const(1)); ICall("error"); ILabel("error_not_boolean"); IPush(Reg(EAX)); IPush(Const(2)); ICall("error"); ILabel("error_overflow"); IPush(Reg(EAX)); IPush(Const(3)); ICall("error"); ] in let compiled_decls = List.flatten (List.map (fun decl -> acompile_decl decl) decls) in let compiled_main = acompile_expr main 1 [] in let as_assembly_string = (to_asm (stack_setup @ compiled_main @ postlude)) in sprintf "%s%s%s\n" preclude (to_asm compiled_decls) as_assembly_string
let acompile_imm_arg (i : immexpr) _ (env : int envt) : arg = match i with | ImmNumber(n) -> (* NOTE: the static overflow check should be done in well_formed, not here *) Const(n lsl 1) | ImmBool(b) -> if b then const_true else const_false | ImmId(name) -> begin match find env name with | Some(stackloc) -> RegOffset(-4 * stackloc, EBP) | None -> failwith ("Unbound identifier " ^ name) end let acompile_imm (i : immexpr) (si : int) (env : int envt) : instruction list = [ IMov(Reg(EAX), acompile_imm_arg i si env) ] let max n m = if n > m then n else m let rec count_c_vars (ce : cexpr) : int = match ce with | CIf(_, thn, els) -> max (count_vars thn) (count_vars els) | _ -> 0 and count_vars (ae : aexpr) : int = match ae with | ALet(x, bind, body) -> 1 + (max (count_c_vars bind) (count_vars body)) | ACExpr(ce) -> count_c_vars ce let rec acompile_step (s : cexpr) (si : int) (env : int envt) : instruction list = let postlude = [ ITest(Reg(EAX), Const(1)); IJnz("error_not_number") ] in match s with | CPrim1(op, e) -> let eGen = acompile_imm e si env in let prelude = eGen @ postlude in begin match op with | Add1 ->prelude @ [IAdd(Reg(EAX), Const(2));] | Sub1 -> prelude @ [ISub(Reg(EAX), Const(2));] | Print -> eGen @ [IPush(Reg(EAX)); ICall("print");] | IsNum -> eGen @ [IAnd(Reg(EAX), Const(1)); IShl(Reg(EAX), Const(31)); IXor(Reg(EAX), const_true);] | IsBool -> eGen @ [IAnd(Reg(EAX), Const(1)); IShl(Reg(EAX), Const(31)); IOr(Reg(EAX), const_false);] end | CPrim2(op, left, right) -> let lGen = acompile_imm left si env in let rGen = acompile_imm right si env in let imma = acompile_imm_arg right si env in let preclude = lGen @ postlude @ rGen @ postlude in begin match op with | Plus -> (*preclude @ *) lGen @ [IAdd(Reg(EAX), imma);] @ [IJo("error_overflow");] | Minus -> (*preclude @ *) lGen @ [ISub(Reg(EAX), imma);] @ [IJo("error_overflow");] | Times -> (*preclude @ *) lGen @ [ISar(Reg(EAX), Const(1)); IMul(Reg(EAX), imma);] @ [IJo("error_overflow");] | Less -> (*preclude @ *) lGen @ [ISub(Reg(EAX), imma); IAnd(Reg(EAX), HexConst(0x80000000)); IOr(Reg(EAX), const_false);] | Greater -> (*preclude @ *) lGen @ [ISub(Reg(EAX), imma); IAnd(Reg(EAX), HexConst(0x80000000)); IAdd(Reg(EAX), const_true);] | Equal -> let end_label = gen_temp "end" in lGen @ [ ICmp(Reg(EAX), imma); IMov(Reg(EAX), const_false); IJne(end_label); IMov(Reg(EAX), const_true); ILabel(end_label); ] end | CIf(cond, thn, els) -> let else_label = gen_temp "else" in let endif_label = gen_temp "endif" in acompile_imm cond si env @ [ (*ITest(Reg(EAX), Const(1)); IJz("error_not_boolean");*) ICmp(Reg(EAX), const_false); IJe(else_label) ] @ acompile_expr thn si env @ [ IJmp(endif_label); ILabel(else_label); ] @ acompile_expr els si env @ [ ILabel(endif_label); ] | CImmExpr(i) -> acompile_imm i si env | CApp(fname, args) -> (* Caller *) List.map (fun arg -> IPush(Sized(DWORD_PTR, acompile_imm_arg arg si env))) args @ [ ICall(fname); IAdd(Reg(ESP), Const(4 * List.length args)); ] and acompile_expr (e : aexpr) (si : int) (env : int envt) : instruction list = match e with | ALet(id, e, body) -> let preclude = acompile_step e (si + 1) env in let postlude = acompile_expr body (si + 1) ((id, si)::env) in preclude @ [ IMov(RegOffset(-4 * si, EBP), Reg(EAX)) ] @ postlude | ACExpr(s) -> acompile_step s si env let acompile_decl (ad : adecl) : instruction list = (* Callee *) match ad with | ADFun(fname, args, body) -> let env = List.mapi (fun i arg -> (arg, -i-2)) argsin [ ILabel(fname); IPush(Reg(EBP)); IMov(Reg(EBP), Reg(ESP)); ISub(Reg(ESP), Const(4 * count_vars body)); ] @ acompile_expr body 1 env @ [ IMov(Reg(ESP), Reg(EBP)); IPop(Reg(EBP)); IRet; ]