太长不看: 点击这里查看完整代码。
上一篇我们使用 boardgame.io + wgo.js + React 制作了一个简单的五子棋游戏,并使用了引擎给我们搭配的 AI。不过 AI 弱得很,让我们试着把它往 AlphaGo 的方向上改造,打造一个自己的 AI。
我们知道 AlphaGo 使用的是蒙特卡洛搜索和神经网络,通过 Debug 面板我们可以发现, boardgame.io 搭配的 AI 也是基于蒙特卡洛算法的。不过,如果我们想使用它的蒙特卡洛搜索算法来搭配 AlphaGo 那样的神经网络,就会发现它的蒙特卡洛树节点缺少一些信息。它的算法不支持如添加噪声、不支持先验概率、不支持基于权重选择节点等功能,所以我们得要自己做一个蒙特卡洛搜索算法。
根据上面的分析,我们的蒙特卡洛算法需提供如下功能:
创建 src/MCTS.js
文件并添加如下内容:
import { dirichletK, randomPick } from "./Random";
export function Node(a) {
return {
a: a,
p: 1,
q: 0,
w: 0,
n: 0,
// null - unexpanded, [] - terminal node, [...] - intermediate node
children: null
};
}
function isLeaf(node) {
return node.n === 0 || node.children === null || node.children.length === 0;
}
function ucbScore(node, c) {
return node.q + (c * node.p) / (node.n + 1);
}
function ucbSelectChild(node) {
const c = 5 * Math.sqrt(node.n);
let best = node.children[0];
let bestScore = ucbScore(best, c);
for (let i = 1; i < node.children.length; i++) {
const child = node.children[i];
const score = ucbScore(child, c);
if (score > bestScore) {
best = child;
bestScore = score;
}
}
return best;
}
function applyPrioProb(root, probs, useNoise) {
if (!useNoise) {
for (let i = 0; i < root.children.length; ++i) {
const child = root.children[i];
child.p = probs[child.a];
}
return;
}
const dir = dirichletK(root.children.length, 0.03);
for (let i = 0; i < root.children.length; ++i) {
const child = root.children[i];
child.p = dir[i] * 0.25 + 0.75 * probs[child.a];
}
}
function backprop(path, r) {
let i = path.length;
while (i-- > 0) {
let leaf = path[i];
leaf.n += 1;
leaf.w += r;
leaf.q = leaf.w / leaf.n;
r = -r;
}
}
function backpropAndRevertVirtualLoss(path, r) {
let i = path.length;
while (i-- > 0) {
let leaf = path[i];
leaf.w += r;
leaf.q = leaf.w / leaf.n;
r = -r;
}
}
function applyVirtualLoss(path) {
let i = path.length;
while (i-- > 0) {
let leaf = path[i];
leaf.n += 1;
leaf.q = leaf.w / leaf.n;
}
}
function revertVirtualLoss(path) {
let i = path.length;
while (i-- > 0) {
let leaf = path[i];
leaf.n -= 1;
leaf.q = leaf.w / leaf.n;
}
}
export class MCTS {
constructor({ evaluator, maxIteration, maxTime, useNoise }) {
if (!maxIteration && !maxTime)
throw new Error("maxIteration and maxTime cannot be 0 at same time");
this._eval = evaluator;
this._maxIteration = maxIteration;
this._maxTime = maxTime;
this._batch = new Set();
this._batchSize = 8;
this._useNoise = useNoise;
this._searching = false;
this._timer = null;
}
async exec(root, state, opts) {
if (this._searching)
throw new Error("another searching is in progress!");
let { maxIteration, maxTime, tao } = {
maxIteration: this._maxIteration,
maxTime: this._maxTime,
tao: 0.001,
...opts
};
if (maxIteration === 0 && maxTime === 0)
throw new Error(
"maxIteration and maxTime cannot be 0 at same time"
);
if (maxTime > 0) {
this._timer = setTimeout(() => {
this.stop();
}, maxTime);
}
if (!maxIteration) maxIteration = Number.MAX_SAFE_INTEGER;
this._searching = true;
for (let it = 0; it < maxIteration && !this._stop; ++it)
await this._step(root, state.clone());
await this._flush();
this._searching = false;
this._stop = false;
clearTimeout(this._timer);
let probs = getActionProbs(root, tao);
return {
bestChild: randomPick(root.children, probs),
actionProbs: probs.reduce((acc, p, i) => {
acc[root.children[i].a] = p;
return acc;
}, {})
};
}
stop() {
if (!this._searching) return;
this._stop = true;
clearTimeout(this._timer);
}
async _step(root, st) {
const path = [root];
let leaf = root;
while (!isLeaf(leaf)) {
leaf = ucbSelectChild(leaf);
path.push(leaf);
st.makeMove(leaf.a);
}
const gameover = st.gameover();
if (gameover) {
leaf.children = [];
let score = 0;
if (gameover.winner === st.currentPlayer) score = 1;
else if (gameover.draw) score = 0;
else score = -1;
return backprop(path, -score);
} else if (leaf.children === null) { // 提前展开(注意:也可能存在节点冲突,未处理)
let actions = st.legalMoves();
leaf.children = actions.map((a) => Node(a));
}
applyVirtualLoss(path);
let job = {
state: st,
node: leaf,
path: path
};
if (this._batch.add(job).size === this._batchSize) {
await this._flush();
}
}
async _flush() {
if (this._batch.size === 0) return;
const list = Array.from(this._batch.values());
const vals = await this._eval(list.map((b) => b.state));
for (let i = 0; i < list.length; i++) {
const info = list[i];
const leaf = info.node;
applyPrioProb(leaf, vals[i].probs, this._useNoise);
backpropAndRevertVirtualLoss(info.path, -vals[i].value);
}
this._batch.clear();
}
}
function getActionProbs(root, tao) {
tao = 1 / tao;
let maxv = root.children.reduce((x, c) => Math.max(x, c.n), 0);
let sum = 0;
let probs = root.children.map((child) => {
const p = Math.pow(child.n / maxv, tao);
sum += p;
return p;
});
for (let i = 0; i < probs.length; i++) probs[i] /= sum;
return probs;
}
exec
函数需要两个参数是为了可以复用旧的节点。如果每次执行搜索都根据当前状态重新创建节点,则上一次搜索的信息(比如,已经展开的节点、节点的模拟次数等)完全无法复用。
为了让今后的训练与 boardgame.io 分离,我们也重新抽象了 State
,并留下一组必须实现的接口:
legalMoves()
用于列举所有合法着法,着法采用数字表示makeMove()
执行着法,并交换当前玩家gameover()
获取游戏结果currentPlayer
当前玩家clone()
克隆当前状态接下来,我们就创建新的状态。
创建 src/State.js
并添加如下内容:
export const PLAYER_BLACK = 1;
export const PLAYER_WHITE = -1;
function checkWinnerByLine(stones, clr, start, end, stride) {
let cnt = 0;
for (; cnt < 5 && start !== end; start += stride) {
if (stones[start] === clr) cnt++;
else cnt = 0;
}
return cnt >= 5;
}
export function checkWinnerByMove(boardSize, stones, p) {
const _min = 4;
const c = stones[p];
if (c === 0) return 0;
let x0 = p % boardSize;
let y0 = Math.floor(p / boardSize);
let x1 = boardSize - 1 - x0;
let y1 = boardSize - 1 - y0;
let start = 0,
end = 0,
stride = 1;
x0 = Math.min(x0, _min);
x1 = Math.min(x1, _min);
start = p - x0;
end = p + x1 + 1;
if (checkWinnerByLine(stones, c, start, end, 1)) return c;
stride = boardSize;
y0 = Math.min(y0, _min);
y1 = Math.min(y1, _min);
start = p - y0 * stride;
end = p + (y1 + 1) * stride;
if (checkWinnerByLine(stones, c, start, end, stride)) return c;
stride = boardSize + 1;
let ma = Math.min(x0, y0),
mb = Math.min(x1, y1);
start = p - ma * stride;
end = p + (mb + 1) * stride;
if (checkWinnerByLine(stones, c, start, end, stride)) return c;
stride = boardSize - 1;
ma = Math.min(x1, y0);
mb = Math.min(x0, y1);
start = p - ma * stride;
end = p + (mb + 1) * stride;
if (checkWinnerByLine(stones, c, start, end, stride)) return c;
return 0;
}
export class State {
constructor({ boardSize }) {
this.boardSize = boardSize;
this.stones = new Array(boardSize * boardSize).fill(0);
this.currentPlayer = PLAYER_BLACK;
this.moveHistory = [];
this._gameover = null;
}
clone() {
let newObj = new State({
boardSize: this.boardSize
});
newObj.copy(this);
return newObj;
}
copy(src) {
if (src.boardSize !== this.boardSize)
throw new Error("incompatible board");
for (let i = 0; i < src.stones.length; i++) {
this.stones[i] = src.stones[i];
}
this.currentPlayer = src.currentPlayer;
for (let i = 0; i < src.moveHistory.length; i++) {
this.moveHistory[i] = src.moveHistory[i];
}
this.moveHistory.length = src.moveHistory.length;
this._gameover = src._gameover;
}
makeMove(mov) {
if (this._gameover) return this;
this.stones[mov] = this.currentPlayer;
this.moveHistory.push(mov);
this.currentPlayer = -this.currentPlayer;
return this;
}
legalMoves() {
let moves = [];
for (let i = 0; i < this.stones.length; i++) {
if (this.stones[i] === 0) moves.push(i);
}
return moves;
}
gameover() {
if (this._gameover || this.moveHistory.length === 0)
return this._gameover;
const mov = this.moveHistory[this.moveHistory.length - 1];
const winner = checkWinnerByMove(this.boardSize, this.stones, mov);
if (winner !== 0) {
this._gameover = { winner };
} else if (this.moveHistory.length === this.stones.length) {
this._gameover = { draw: true };
}
return this._gameover;
}
}
接下来,我们实现评估器。评估器用来评估当前状态的胜率和可行着法的概率,它应当支持一次评估多个状态。当前我们没有神经网络,所以我们先实现一个基于蒙特卡洛法的评估器,也就是对当前局面进行随机落子。基于蒙特卡洛的评估器与上面的搜索算法一起,就构成了传统的蒙特卡洛搜索算法。
创建 src/Evals.js
并粘贴如下内容:
import { BOARD_SIZE } from "./Consts";
export function MCEvaluator() {
return async function evaluator(ss) {
await new Promise((resolve) => setImmediate(resolve));
return ss.map((s) => {
const boardSize = BOARD_SIZE;
const acts = s.legalMoves();
const v = randomPlay(s.clone(), acts);
const p = new Array(boardSize * boardSize).fill(0);
for (let i = 0; i < acts.length; i++) p[acts[i]] = 1 / acts.length;
return {
value: v,
probs: p
};
});
};
}
function randomPlay(st, acts) {
const p = st.currentPlayer;
let gameover = st.gameover();
for (let i = 0; i < acts.length && !gameover; i++) {
const j = i + Math.floor(Math.random() * (acts.length - i));
const x = acts[j];
acts[j] = acts[i];
acts[i] = x;
gameover = st.makeMove(x).gameover();
}
if (p === gameover.winner) return 1;
else if (gameover.draw) return 0;
return -1;
}
有了蒙特卡洛、新的状态,现在我们把它们整合在一起,做成一个带 AI 的新游戏。定义一个新的 Game
类,用于整合 Client
、MCTS
等,每生成一个 Game
对象就代表一局新的游戏。
将 src/Game.js
重命名为 src/GameDef.js
,然后创建新的 src/Game.js
,并添加如下内容:
import { Client } from "boardgame.io/client";
import { MCTS, Node } from "./MCTS";
import { MCEvaluator } from "./Evals";
import { State } from "./State";
import { BOARD_SIZE } from "./Consts";
import { Gomoku } from "./GameDef";
export class Game {
constructor({ playAs }) {
this._client = Client({ game: Gomoku });
this._mcts = new MCTS({
evaluator: MCEvaluator(),
useNoise: false,
maxIteration: 3200 * 2
});
this._root = Node(null);
this._state = new State({ boardSize: BOARD_SIZE });
this._playAs = playAs;
this._started = false;
this._stopped = false;
this._aiPlayer = { 0: "1", 1: "0" }[this._playAs];
this._currentPlayer = this.getState().ctx.currentPlayer;
this._stateId = this.getState()._stateID - 1;
}
get currentPlayer() {
return this._currentPlayer;
}
get playAs() {
return this._playAs;
}
getState() {
return this._client.getState();
}
putStone(id) {
if (!this._started || this._stopped) return;
if (this._playAs && this._playAs !== this.currentPlayer) return;
this._client.moves.putStone(id);
}
subscribe(f) {
return this._client.subscribe(f);
}
start() {
if (this._started || this._stopped) return;
this._client.subscribe((s) => {
this._currentPlayer = s.ctx.currentPlayer;
let moves = (s.deltalog || [])
.filter((log) => log.action.type === "MAKE_MOVE")
.map((log) => log.action.payload.args[0]);
for (let mov of moves) {
this._advance(mov);
}
if (s.ctx.gameover) return;
if (s._stateID === this._stateId) return;
this._stateId = s._stateID;
if (this._aiPlayer === s.ctx.currentPlayer) {
this._mcts.exec(this._root, this._state).then((result) => {
if (this._stopped) return;
this._client.moves.putStone(result.bestChild.a);
});
}
});
this._client.start();
this._started = true;
}
stop() {
this._client.stop();
this._mcts.stop();
this._stopped = true;
}
_advance(mov) {
this._state.makeMove(mov);
let root = this._root;
if (!root.children) root = Node(mov);
else if (root.children.length === 0)
throw new Error("try to make move on terminal node");
else root = root.children.find((c) => c.a === mov);
this._root = root;
}
}
接下来,我们吧 src/App.js
整改一下:
//import { Client } from "boardgame.io/react";
import React, {
useState,
useMemo,
useEffect,
useReducer
} from "react";
import { GomokuBoard } from "./Board";
import { Game } from "./Game";
//const App = Client({ game: Gomoku });
function App() {
const [gameId, newGame] = useReducer((id) => id + 1, 1);
const game = useMemo(() => {
let game = new Game({
playAs: Math.random() > 0.5 ? "0" : "1"
});
return game;
}, [gameId]);
const [state, setState] = useState(game.getState());
const moves = useMemo(
() => ({
putStone: (id) => game.putStone(id)
}),
[game]
);
useEffect(() => {
game.start();
let unsub = game.subscribe(setState);
return () => {
unsub();
game.stop();
};
}, [game]);
const currentPlayer = game.currentPlayer;
let status = "请落子";
const gameover = state.ctx.gameover;
if (gameover) {
if (gameover.winner === "0") status = "黑方胜";
else if (gameover.winner === "1") status = "白方胜";
else status = "和棋";
} else if (game.playAs && game.playAs !== currentPlayer) {
status = "思考中...";
}
return (
<>
<div>
<button onClick={newGame}>重来</button>
</div>
<GomokuBoard {...state} moves={moves} />
<div>
<p>{status}</p>
</div>
</>
);
}
export default App;
好了,一切已经妥当,来看看效果吧。
我们新的
Game
类会自己同步状态并自动触发 AI,这将与 Debug 面板中 Log 回溯功能冲突。Debug 面板中的 AI 功能也应酌情使用,可能与Game
类冲突
本文我们实现了自己的蒙特卡洛搜索算法以及自己的 State
,并将自己的 State
与 boardgame.io 的状态进行了同步。还利用自己的蒙特卡洛搜索算法,做了一个支持 AI 对战的五子棋游戏。有了这些基础设施,我们就能逐步向 AlphaGo 那样的五子棋 AI 靠拢啦。接下来,我们给我们的游戏稍微增加一点趣味,这个趣味也是向 AlphaGo 五子棋靠拢的重要一步。