1. 程式人生 > >UCT(信心上限樹演算法)解四子棋問題——蒙特卡羅法模擬人機博弈

UCT(信心上限樹演算法)解四子棋問題——蒙特卡羅法模擬人機博弈

#ifndef __UCT_H__
#define __UCT_H__

#include 
#include 
#include 
#include 
#include "Judge.h" //包含判斷勝負的函式userWin,machineWin和isTie 

#define EMPTY 0 //未落子 
#define PLAYER_CHANCE 1 //玩家棋權 
#define COMPUTER_CHANCE 2 //電腦(AI)棋權 
#define TIME_LIMITATION 3000 //計算時長限制
#define PLAYER_WIN_PROFIT -1 //玩家獲勝時的收益 
#define COMPUTER_WIN_PROFIT 1 //我方AI獲勝時的收益 
#define TIE_PROFIT 0 //平局收益
#define UNTERMINAL_STATE 2 //非終止狀態 
#define VITALITY_COEFFICIENT 0.8 //比例係數c 

using namespace std;

class UCT;

class Node
{
private: 
	int **boardState; //棋局狀態
	int *topState; //頂端狀態
	int row, column; //棋盤大小(M, N)
	int _noX, _noY; //不可落子點位置 
	int _chessman; //我方持子屬性 
	int _x, _y; //前一上落子位置
	int visitedNum; //被訪問次數 
	double profit; //當前狀態我方收益
	int _depth; //節點深度 
	Node *father; //父節點
	Node **children; //子節點
	int expandableNum; //可擴充套件節點數量 
	int *expandableNode; //可擴充套件節點編號 
	friend class UCT;
	
	int *TopState() const { //複製棋盤頂端狀態陣列topState 
		int *presentTop = new int[column];
		for (int i = 0; i != column; i ++)
			presentTop[i] = topState[i];
		return presentTop;
	}
	int **BoardState() const { //複製棋盤狀態陣列boardState 
		int **presentBoardState = new int*[row];
		for (int i = 0; i < row; i ++) {
			presentBoardState[i] = new int[column];
			for(int j = 0; j < column; j ++)
				presentBoardState[i][j] = boardState[i][j];
		}
		return presentBoardState;
	}
	void clear() { //空間釋放
		for (int i = 0; i != row; i ++)
			delete [] boardState[i];
		delete [] boardState;
		delete [] topState;
		delete [] expandableNode;
		for (int i = 0; i != column; i ++)
			if (children[i]) {
				children[i] -> clear();
				delete children[i];
			}
		delete [] children;
	}
	
public:
	//建構函式 
	Node(int **board, int *top, int r, int c, int noX, int noY, int depth = 0, int x = -1, int y = -1, int playingRight = COMPUTER_CHANCE, Node* _father = NULL): 
		boardState(board), topState(top), row(r), column(c), _noX(noX), _noY(noY), _depth(depth), _x(x), _y(y), _chessman(playingRight), visitedNum(0), profit(0), father(_father) {
		expandableNum = 0; 
		children = new Node*[column]; //大小等於行數的子節點陣列 
		expandableNode = new int[column]; //可到達子節點編號的陣列 
		for (int i = 0; i != column; i ++) {
			if (topState[i] != 0) //若第i列可落子 
				expandableNode[expandableNum ++] = i;
			children[i] = NULL;
		}
	}
	int x() const { return _x; }
	int y() const { return _y; }
	int chessman() const { return _chessman; }
	bool isExpandable() const { return expandableNum > 0; }//是否可擴充套件
	//是否為終止節點 
	bool isTerminal() {
		if (_x == -1 && _y == -1) //若為根節點 
			return false;
		if ((_chessman == PLAYER_CHANCE && machineWin(_x, _y, row, column, boardState)) || //計算機勝利 
			(_chessman == COMPUTER_CHANCE && userWin(_x, _y, row, column, boardState)) || //玩家勝利 
			(isTie(column, topState))) //平局 
			return true;
		return false;
	}
	//擴充套件節點 
	Node *expand(int playingRight) { 
		int index = rand() % expandableNum; //隨機確定一個索引值 
		int **newBoardState = BoardState(); //複製棋盤狀態陣列 
		int *newTopState = TopState(); //複製棋盤頂端狀態陣列 
		int newY = expandableNode[index], newX = -- newTopState[newY]; //確定落子座標 
		newBoardState[newX][newY] = chessman(); //落子 
		if (newX - 1 == _noX && newY == _noY) //若落子位置的正上方位置是不可落子點 
			newTopState[newY] --; //更新棋盤頂端狀態陣列
		//為當前節點建立擴充套件子節點 
		children[newY] = new Node(newBoardState, newTopState, row, column, _noX, _noY, _depth + 1, newX, newY, playingRight, this);
		swap(expandableNode[index], expandableNode[-- expandableNum]); //將被選中子節點編號置換到目錄末尾
		return children[newY];
	}
	//最優子節點
	Node *bestChild() {
		Node* best;
		double maxProfitRatio = -RAND_MAX;
		for (int i = 0; i != column; i ++) {
			if (children[i] == NULL) continue;
			double modifiedProfit = (_chessman == PLAYER_CHANCE ? -1 : 1) * children[i] -> profit; //修正收益值
			int childVisitedNum = children[i] -> visitedNum; //子節點訪問數 
			double tempProfitRatio = modifiedProfit / childVisitedNum + 
				sqrtl(2 * logl(visitedNum) / childVisitedNum) * VITALITY_COEFFICIENT; //計算綜合收益率 
			if (tempProfitRatio > maxProfitRatio || (tempProfitRatio == maxProfitRatio && rand() % 2 == 0)) { //選擇綜合收益率最大的子節點 
				maxProfitRatio = tempProfitRatio;
				best = children[i];
			}
		}
		return best;
	} 
	//回溯更新
	void backup(double deltaProfit) {
		Node *temp = this;
		while (temp) {
			temp -> visitedNum ++; //訪問次數+1 
			temp -> profit += deltaProfit; //收益增加delta 
			temp = temp -> father;
		}
	} 
};

class UCT
{
private:
	Node *_root; //根節點
	int _row, _column; //行數、列數
	int _noX, _noY; //不可落子點的位置 
	int startTime; //計算開始時間
	
	//計算當前狀態收益
	int Profit(int **board, int *top, int chessman, int x, int y) const { 
		if (chessman == PLAYER_CHANCE && userWin(x, y, _row, _column, board))
			return PLAYER_WIN_PROFIT;
		if (chessman == COMPUTER_CHANCE && machineWin(x, y, _row, _column, board))
			return COMPUTER_WIN_PROFIT;
		if (isTie(_column, top))
			return TIE_PROFIT;
		return UNTERMINAL_STATE; //未進入終止狀態 
	}
	//隨機落子 
	void placeChessman(int **board, int *top, int chessman, int &x, int &y) {
		y = rand() % _column; //隨機選擇一列 
		while (top[y] == 0) //若此列已下滿 
			y = rand() % _column; //再隨機選擇一列 
		x = -- top[y]; //確定落子高度 
		board[x][y] = chessman; //落子 
		if (x - 1 == _noX && y == _noY) //若落子位置正上方緊鄰不可落子點 
			top[y] --;
	}
	//棋權變換 
	int rightChange(int chessman) const {
		if (chessman == PLAYER_CHANCE)
			return COMPUTER_CHANCE;
		else if (chessman == COMPUTER_CHANCE)
			return PLAYER_CHANCE;
		else
			return -1;
	} 
	
	//搜尋樹策略 
	Node *TreePolicy(Node *presentNode) {
		while (!presentNode -> isTerminal()) { //節點不是終止節點 
			if (presentNode -> isExpandable()) //且擁有未被訪問的子狀態 
				return Expand(presentNode); //擴充套件該節點 
			else
				presentNode = BestChild(presentNode); //選擇最優子節點 
		}
		return presentNode;
	}
	//對節點進行擴充套件
	Node *Expand(Node *presentNode) { return presentNode -> expand(rightChange(presentNode -> chessman())); }
	//最優子節點 
	Node *BestChild(Node *father) { return father -> bestChild(); }
	//模擬策略 
	double DefaultPolicy(Node *selectedNode) { 
		int **boardState = selectedNode -> BoardState(), *top = selectedNode -> TopState();
		int chessman = selectedNode -> chessman(), depth = selectedNode -> _depth;
		int x = selectedNode -> x(), y = selectedNode -> y();
		int profit = Profit(boardState, top, rightChange(chessman), x, y); //計算收益 
		while (profit == UNTERMINAL_STATE) { //若當前狀態未達終止狀態 
			depth ++;
			placeChessman(boardState, top, chessman, x, y); //隨機落子 
			profit = Profit(boardState, top, chessman, x, y); //計算收益 
			chessman = rightChange(chessman); //棋權變換 
		}
		for (int i = 0; i != _row; i ++)
			delete [] boardState[i];
		delete [] boardState;
		delete [] top;
		return double(profit);// / logl(depth + 1); //非線性加速
	}
	//回溯更新收益(深度越深收益越小)
	void Backup(Node *selectedNode, double deltaProfit) { selectedNode -> backup(deltaProfit); }
	
public:
	//建構函式 
	UCT(int row, int column, int noX, int noY): _row(row), _column(column), _noX(noX), _noY(noY), startTime(clock()) {}
	//信心上限樹搜尋 
	Node *UCTSearch(int **boardState, int *topState) {
		_root = new Node (boardState, topState, _row, _column, _noX, _noY); //以當前狀態建立根節點 
		while (clock() - startTime <= TIME_LIMITATION) { //尚未耗盡計算時長 
			Node *selectedNode = TreePolicy(_root); //運用搜索樹策略節點 
			double deltaProfit = DefaultPolicy(selectedNode); //運用模擬策略對選中節點進行一次隨機模擬 
			Backup(selectedNode, deltaProfit); //將模擬結果回溯反饋給各祖先 
		}
		return BestChild(_root);
	}
	//解構函式 
	~UCT() { _root -> clear(); delete _root; } 
};

#endif //__UCT_H__
這段程式碼是有一定的冗餘性的,我在編寫的過程中下意識地將部分本應在演算法類UCT當中實現的功能在本應只是作為結構體的Node類中實現了,造成了一定的條理性的缺失。