先决条件:博弈论中的极小极大算法,博弈论中的评估函数
让我们结合我们所学到迄今约极小和评价函数编写正确的井字棋AI(A rtificial我ntelligence)播放一场完美的比赛。该 AI 将考虑所有可能的场景并做出最佳移动。
寻找最佳动作:
我们将引入一个名为findBestMove()的新函数。此函数使用minimax()评估所有可用的移动,然后返回最大化器可以做出的最佳移动。伪代码如下:
function findBestMove(board):
bestMove = NULL
for each move in board :
if current move is better than bestMove
bestMove = current move
return bestMove
极小极大:
为了检查当前走法是否比最佳走法更好,我们借助minimax()函数,该函数将考虑游戏可以进行的所有可能方式并返回该走法的最佳值,假设对手也以最佳方式进行
minimax()函数的最大化器和最小化器的代码类似于findBestMove() ,唯一的区别是,它不会返回一个移动,而是返回一个值。这是伪代码:
function minimax(board, depth, isMaximizingPlayer):
if current board state is a terminal state :
return value of the board
if isMaximizingPlayer :
bestVal = -INFINITY
for each move in board :
value = minimax(board, depth+1, false)
bestVal = max( bestVal, value)
return bestVal
else :
bestVal = +INFINITY
for each move in board :
value = minimax(board, depth+1, true)
bestVal = min( bestVal, value)
return bestVal
检查 GameOver 状态:
为了检查游戏是否结束并确保没有剩余的移动,我们使用isMovesLeft()函数。这是一个简单直接的函数,它检查移动是否可用并分别返回 true 或 false。伪代码如下:
function isMovesLeft(board):
for each cell in board:
if current cell is empty:
return true
return false
让我们的人工智能更智能:
最后一步是让我们的人工智能更聪明一点。即使下面的 AI 玩得很完美,它也可能会选择采取导致胜利更慢或失败更快的动作。让我们举个例子来解释一下。
假设有 2 种可能的方式让 X 从给定的棋盘状态赢得比赛。
- 移动A : X 可以在 2 移动中获胜
- 移动B : X 可以在 4 个移动中获胜
我们的评估函数将为移动A和B返回 +10 的值。即使移动A更好,因为它可以确保更快的胜利,但我们的 AI 有时可能会选择B。为了克服这个问题,我们从评估的分数中减去深度值。这意味着在胜利的情况下,它将选择移动次数最少的胜利,而在失败的情况下,它将尝试延长游戏并尽可能多地走棋。所以新的评估值将是
- 移动A的值为 +10 – 2 = 8
- 移动B的值为 +10 – 4 = 6
现在,由于移动A与移动B相比具有更高的分数,我们的 AI 将选择移动A 而不是移动B。同样的事情必须应用于最小化器。我们没有减去深度,而是添加深度值,因为最小化器总是试图获得尽可能为负的值。我们可以在评估函数内部或外部减去深度。哪里都好。我选择在函数之外进行。伪代码实现如下。
if maximizer has won:
return WIN_SCORE – depth
else if minimizer has won:
return LOOSE_SCORE + depth
执行 :
下面是上述想法的实现。
C++
// C++ program to find the next optimal move for
// a player
#include
using namespace std;
struct Move
{
int row, col;
};
char player = 'x', opponent = 'o';
// This function returns true if there are moves
// remaining on the board. It returns false if
// there are no moves left to play.
bool isMovesLeft(char board[3][3])
{
for (int i = 0; i<3; i++)
for (int j = 0; j<3; j++)
if (board[i][j]=='_')
return true;
return false;
}
// This is the evaluation function as discussed
// in the previous article ( http://goo.gl/sJgv68 )
int evaluate(char b[3][3])
{
// Checking for Rows for X or O victory.
for (int row = 0; row<3; row++)
{
if (b[row][0]==b[row][1] &&
b[row][1]==b[row][2])
{
if (b[row][0]==player)
return +10;
else if (b[row][0]==opponent)
return -10;
}
}
// Checking for Columns for X or O victory.
for (int col = 0; col<3; col++)
{
if (b[0][col]==b[1][col] &&
b[1][col]==b[2][col])
{
if (b[0][col]==player)
return +10;
else if (b[0][col]==opponent)
return -10;
}
}
// Checking for Diagonals for X or O victory.
if (b[0][0]==b[1][1] && b[1][1]==b[2][2])
{
if (b[0][0]==player)
return +10;
else if (b[0][0]==opponent)
return -10;
}
if (b[0][2]==b[1][1] && b[1][1]==b[2][0])
{
if (b[0][2]==player)
return +10;
else if (b[0][2]==opponent)
return -10;
}
// Else if none of them have won then return 0
return 0;
}
// This is the minimax function. It considers all
// the possible ways the game can go and returns
// the value of the board
int minimax(char board[3][3], int depth, bool isMax)
{
int score = evaluate(board);
// If Maximizer has won the game return his/her
// evaluated score
if (score == 10)
return score;
// If Minimizer has won the game return his/her
// evaluated score
if (score == -10)
return score;
// If there are no more moves and no winner then
// it is a tie
if (isMovesLeft(board)==false)
return 0;
// If this maximizer's move
if (isMax)
{
int best = -1000;
// Traverse all cells
for (int i = 0; i<3; i++)
{
for (int j = 0; j<3; j++)
{
// Check if cell is empty
if (board[i][j]=='_')
{
// Make the move
board[i][j] = player;
// Call minimax recursively and choose
// the maximum value
best = max( best,
minimax(board, depth+1, !isMax) );
// Undo the move
board[i][j] = '_';
}
}
}
return best;
}
// If this minimizer's move
else
{
int best = 1000;
// Traverse all cells
for (int i = 0; i<3; i++)
{
for (int j = 0; j<3; j++)
{
// Check if cell is empty
if (board[i][j]=='_')
{
// Make the move
board[i][j] = opponent;
// Call minimax recursively and choose
// the minimum value
best = min(best,
minimax(board, depth+1, !isMax));
// Undo the move
board[i][j] = '_';
}
}
}
return best;
}
}
// This will return the best possible move for the player
Move findBestMove(char board[3][3])
{
int bestVal = -1000;
Move bestMove;
bestMove.row = -1;
bestMove.col = -1;
// Traverse all cells, evaluate minimax function for
// all empty cells. And return the cell with optimal
// value.
for (int i = 0; i<3; i++)
{
for (int j = 0; j<3; j++)
{
// Check if cell is empty
if (board[i][j]=='_')
{
// Make the move
board[i][j] = player;
// compute evaluation function for this
// move.
int moveVal = minimax(board, 0, false);
// Undo the move
board[i][j] = '_';
// If the value of the current move is
// more than the best value, then update
// best/
if (moveVal > bestVal)
{
bestMove.row = i;
bestMove.col = j;
bestVal = moveVal;
}
}
}
}
printf("The value of the best Move is : %d\n\n",
bestVal);
return bestMove;
}
// Driver code
int main()
{
char board[3][3] =
{
{ 'x', 'o', 'x' },
{ 'o', 'o', 'x' },
{ '_', '_', '_' }
};
Move bestMove = findBestMove(board);
printf("The Optimal Move is :\n");
printf("ROW: %d COL: %d\n\n", bestMove.row,
bestMove.col );
return 0;
}
Java
// Java program to find the
// next optimal move for a player
class GFG
{
static class Move
{
int row, col;
};
static char player = 'x', opponent = 'o';
// This function returns true if there are moves
// remaining on the board. It returns false if
// there are no moves left to play.
static Boolean isMovesLeft(char board[][])
{
for (int i = 0; i < 3; i++)
for (int j = 0; j < 3; j++)
if (board[i][j] == '_')
return true;
return false;
}
// This is the evaluation function as discussed
// in the previous article ( http://goo.gl/sJgv68 )
static int evaluate(char b[][])
{
// Checking for Rows for X or O victory.
for (int row = 0; row < 3; row++)
{
if (b[row][0] == b[row][1] &&
b[row][1] == b[row][2])
{
if (b[row][0] == player)
return +10;
else if (b[row][0] == opponent)
return -10;
}
}
// Checking for Columns for X or O victory.
for (int col = 0; col < 3; col++)
{
if (b[0][col] == b[1][col] &&
b[1][col] == b[2][col])
{
if (b[0][col] == player)
return +10;
else if (b[0][col] == opponent)
return -10;
}
}
// Checking for Diagonals for X or O victory.
if (b[0][0] == b[1][1] && b[1][1] == b[2][2])
{
if (b[0][0] == player)
return +10;
else if (b[0][0] == opponent)
return -10;
}
if (b[0][2] == b[1][1] && b[1][1] == b[2][0])
{
if (b[0][2] == player)
return +10;
else if (b[0][2] == opponent)
return -10;
}
// Else if none of them have won then return 0
return 0;
}
// This is the minimax function. It considers all
// the possible ways the game can go and returns
// the value of the board
static int minimax(char board[][],
int depth, Boolean isMax)
{
int score = evaluate(board);
// If Maximizer has won the game
// return his/her evaluated score
if (score == 10)
return score;
// If Minimizer has won the game
// return his/her evaluated score
if (score == -10)
return score;
// If there are no more moves and
// no winner then it is a tie
if (isMovesLeft(board) == false)
return 0;
// If this maximizer's move
if (isMax)
{
int best = -1000;
// Traverse all cells
for (int i = 0; i < 3; i++)
{
for (int j = 0; j < 3; j++)
{
// Check if cell is empty
if (board[i][j]=='_')
{
// Make the move
board[i][j] = player;
// Call minimax recursively and choose
// the maximum value
best = Math.max(best, minimax(board,
depth + 1, !isMax));
// Undo the move
board[i][j] = '_';
}
}
}
return best;
}
// If this minimizer's move
else
{
int best = 1000;
// Traverse all cells
for (int i = 0; i < 3; i++)
{
for (int j = 0; j < 3; j++)
{
// Check if cell is empty
if (board[i][j] == '_')
{
// Make the move
board[i][j] = opponent;
// Call minimax recursively and choose
// the minimum value
best = Math.min(best, minimax(board,
depth + 1, !isMax));
// Undo the move
board[i][j] = '_';
}
}
}
return best;
}
}
// This will return the best possible
// move for the player
static Move findBestMove(char board[][])
{
int bestVal = -1000;
Move bestMove = new Move();
bestMove.row = -1;
bestMove.col = -1;
// Traverse all cells, evaluate minimax function
// for all empty cells. And return the cell
// with optimal value.
for (int i = 0; i < 3; i++)
{
for (int j = 0; j < 3; j++)
{
// Check if cell is empty
if (board[i][j] == '_')
{
// Make the move
board[i][j] = player;
// compute evaluation function for this
// move.
int moveVal = minimax(board, 0, false);
// Undo the move
board[i][j] = '_';
// If the value of the current move is
// more than the best value, then update
// best/
if (moveVal > bestVal)
{
bestMove.row = i;
bestMove.col = j;
bestVal = moveVal;
}
}
}
}
System.out.printf("The value of the best Move " +
"is : %d\n\n", bestVal);
return bestMove;
}
// Driver code
public static void main(String[] args)
{
char board[][] = {{ 'x', 'o', 'x' },
{ 'o', 'o', 'x' },
{ '_', '_', '_' }};
Move bestMove = findBestMove(board);
System.out.printf("The Optimal Move is :\n");
System.out.printf("ROW: %d COL: %d\n\n",
bestMove.row, bestMove.col );
}
}
// This code is contributed by PrinciRaj1992
Python3
# Python3 program to find the next optimal move for a player
player, opponent = 'x', 'o'
# This function returns true if there are moves
# remaining on the board. It returns false if
# there are no moves left to play.
def isMovesLeft(board) :
for i in range(3) :
for j in range(3) :
if (board[i][j] == '_') :
return True
return False
# This is the evaluation function as discussed
# in the previous article ( http://goo.gl/sJgv68 )
def evaluate(b) :
# Checking for Rows for X or O victory.
for row in range(3) :
if (b[row][0] == b[row][1] and b[row][1] == b[row][2]) :
if (b[row][0] == player) :
return 10
elif (b[row][0] == opponent) :
return -10
# Checking for Columns for X or O victory.
for col in range(3) :
if (b[0][col] == b[1][col] and b[1][col] == b[2][col]) :
if (b[0][col] == player) :
return 10
elif (b[0][col] == opponent) :
return -10
# Checking for Diagonals for X or O victory.
if (b[0][0] == b[1][1] and b[1][1] == b[2][2]) :
if (b[0][0] == player) :
return 10
elif (b[0][0] == opponent) :
return -10
if (b[0][2] == b[1][1] and b[1][1] == b[2][0]) :
if (b[0][2] == player) :
return 10
elif (b[0][2] == opponent) :
return -10
# Else if none of them have won then return 0
return 0
# This is the minimax function. It considers all
# the possible ways the game can go and returns
# the value of the board
def minimax(board, depth, isMax) :
score = evaluate(board)
# If Maximizer has won the game return his/her
# evaluated score
if (score == 10) :
return score
# If Minimizer has won the game return his/her
# evaluated score
if (score == -10) :
return score
# If there are no more moves and no winner then
# it is a tie
if (isMovesLeft(board) == False) :
return 0
# If this maximizer's move
if (isMax) :
best = -1000
# Traverse all cells
for i in range(3) :
for j in range(3) :
# Check if cell is empty
if (board[i][j]=='_') :
# Make the move
board[i][j] = player
# Call minimax recursively and choose
# the maximum value
best = max( best, minimax(board,
depth + 1,
not isMax) )
# Undo the move
board[i][j] = '_'
return best
# If this minimizer's move
else :
best = 1000
# Traverse all cells
for i in range(3) :
for j in range(3) :
# Check if cell is empty
if (board[i][j] == '_') :
# Make the move
board[i][j] = opponent
# Call minimax recursively and choose
# the minimum value
best = min(best, minimax(board, depth + 1, not isMax))
# Undo the move
board[i][j] = '_'
return best
# This will return the best possible move for the player
def findBestMove(board) :
bestVal = -1000
bestMove = (-1, -1)
# Traverse all cells, evaluate minimax function for
# all empty cells. And return the cell with optimal
# value.
for i in range(3) :
for j in range(3) :
# Check if cell is empty
if (board[i][j] == '_') :
# Make the move
board[i][j] = player
# compute evaluation function for this
# move.
moveVal = minimax(board, 0, False)
# Undo the move
board[i][j] = '_'
# If the value of the current move is
# more than the best value, then update
# best/
if (moveVal > bestVal) :
bestMove = (i, j)
bestVal = moveVal
print("The value of the best Move is :", bestVal)
print()
return bestMove
# Driver code
board = [
[ 'x', 'o', 'x' ],
[ 'o', 'o', 'x' ],
[ '_', '_', '_' ]
]
bestMove = findBestMove(board)
print("The Optimal Move is :")
print("ROW:", bestMove[0], " COL:", bestMove[1])
# This code is contributed by divyesh072019
C#
// C# program to find the
// next optimal move for a player
using System;
using System.Collections.Generic;
class GFG
{
class Move
{
public int row, col;
};
static char player = 'x', opponent = 'o';
// This function returns true if there are moves
// remaining on the board. It returns false if
// there are no moves left to play.
static Boolean isMovesLeft(char [,]board)
{
for (int i = 0; i < 3; i++)
for (int j = 0; j < 3; j++)
if (board[i, j] == '_')
return true;
return false;
}
// This is the evaluation function as discussed
// in the previous article ( http://goo.gl/sJgv68 )
static int evaluate(char [,]b)
{
// Checking for Rows for X or O victory.
for (int row = 0; row < 3; row++)
{
if (b[row, 0] == b[row, 1] &&
b[row, 1] == b[row, 2])
{
if (b[row, 0] == player)
return +10;
else if (b[row, 0] == opponent)
return -10;
}
}
// Checking for Columns for X or O victory.
for (int col = 0; col < 3; col++)
{
if (b[0, col] == b[1, col] &&
b[1, col] == b[2, col])
{
if (b[0, col] == player)
return +10;
else if (b[0, col] == opponent)
return -10;
}
}
// Checking for Diagonals for X or O victory.
if (b[0, 0] == b[1, 1] && b[1, 1] == b[2, 2])
{
if (b[0, 0] == player)
return +10;
else if (b[0, 0] == opponent)
return -10;
}
if (b[0, 2] == b[1, 1] && b[1, 1] == b[2, 0])
{
if (b[0, 2] == player)
return +10;
else if (b[0, 2] == opponent)
return -10;
}
// Else if none of them have won then return 0
return 0;
}
// This is the minimax function. It considers all
// the possible ways the game can go and returns
// the value of the board
static int minimax(char [,]board,
int depth, Boolean isMax)
{
int score = evaluate(board);
// If Maximizer has won the game
// return his/her evaluated score
if (score == 10)
return score;
// If Minimizer has won the game
// return his/her evaluated score
if (score == -10)
return score;
// If there are no more moves and
// no winner then it is a tie
if (isMovesLeft(board) == false)
return 0;
// If this maximizer's move
if (isMax)
{
int best = -1000;
// Traverse all cells
for (int i = 0; i < 3; i++)
{
for (int j = 0; j < 3; j++)
{
// Check if cell is empty
if (board[i, j] == '_')
{
// Make the move
board[i, j] = player;
// Call minimax recursively and choose
// the maximum value
best = Math.Max(best, minimax(board,
depth + 1, !isMax));
// Undo the move
board[i, j] = '_';
}
}
}
return best;
}
// If this minimizer's move
else
{
int best = 1000;
// Traverse all cells
for (int i = 0; i < 3; i++)
{
for (int j = 0; j < 3; j++)
{
// Check if cell is empty
if (board[i, j] == '_')
{
// Make the move
board[i, j] = opponent;
// Call minimax recursively and choose
// the minimum value
best = Math.Min(best, minimax(board,
depth + 1, !isMax));
// Undo the move
board[i, j] = '_';
}
}
}
return best;
}
}
// This will return the best possible
// move for the player
static Move findBestMove(char [,]board)
{
int bestVal = -1000;
Move bestMove = new Move();
bestMove.row = -1;
bestMove.col = -1;
// Traverse all cells, evaluate minimax function
// for all empty cells. And return the cell
// with optimal value.
for (int i = 0; i < 3; i++)
{
for (int j = 0; j < 3; j++)
{
// Check if cell is empty
if (board[i, j] == '_')
{
// Make the move
board[i, j] = player;
// compute evaluation function for this
// move.
int moveVal = minimax(board, 0, false);
// Undo the move
board[i, j] = '_';
// If the value of the current move is
// more than the best value, then update
// best/
if (moveVal > bestVal)
{
bestMove.row = i;
bestMove.col = j;
bestVal = moveVal;
}
}
}
}
Console.Write("The value of the best Move " +
"is : {0}\n\n", bestVal);
return bestMove;
}
// Driver code
public static void Main(String[] args)
{
char [,]board = {{ 'x', 'o', 'x' },
{ 'o', 'o', 'x' },
{ '_', '_', '_' }};
Move bestMove = findBestMove(board);
Console.Write("The Optimal Move is :\n");
Console.Write("ROW: {0} COL: {1}\n\n",
bestMove.row, bestMove.col );
}
}
// This code is contributed by 29AjayKumar
Javascript
输出 :
The value of the best Move is : 10
The Optimal Move is :
ROW: 2 COL: 2
解释 :
此图像描绘了游戏可以从根板状态采取的所有可能路径。它通常被称为游戏树。
上例中的 3 种可能情况是:
- 左移:如果 X 播放 [2,0]。然后 O 将玩 [2,1] 并赢得比赛。这一招的价值是-10
- 中间移动:如果 X 播放 [2,1]。然后 O 会玩 [2,2] 来平局。这一招的价值为0
- 右移:如果 X 播放 [2,2]。那么他将赢得比赛。这一招的价值是+10;
请记住,即使 X 有可能在他走中步时获胜,O 也绝不会让这种情况发生,而是会选择和牌。
因此,X 的最佳选择是玩 [2,2],这将保证他的胜利。
我们确实鼓励我们的读者尝试提供各种输入并理解 AI 为何选择使用该动作。 Minimax 可能会让程序员感到困惑,因为它会提前考虑几个动作并且有时很难调试。请记住,minimax 算法的这种实现可以应用于任何 2 人棋盘游戏,只需对棋盘结构和我们如何迭代移动进行一些小的更改。有时,对于象国际象棋这样的复杂游戏,minimax 不可能计算所有可能的游戏状态。因此我们只计算到一定深度并使用评估函数来计算板的值。
请继续关注下周的文章,我们将在其中讨论Alpha-Beta 剪枝,它可以显着改善 minimax 遍历博弈树所花费的时间。
如果您希望与专家一起参加现场课程,请参阅DSA 现场工作专业课程和学生竞争性编程现场课程。