先决条件:博弈论中的极小极大算法,博弈论中的评估函数
让我们结合我们所学到迄今约极小和评价函数编写正确的井字棋AI(A rtificial我ntelligence)播放一场完美的比赛。该AI将考虑所有可能的情况并采取最佳行动。
寻找最佳动作:
我们将引入一个名为findBestMove()的新函数。此函数使用minimax()评估所有可用的移动,然后返回最大化器可以做出的最佳移动。伪代码如下:
function findBestMove(board):
bestMove = NULL
for each move in board :
if current move is better than bestMove
bestMove = current move
return bestMove
最小最大
为了检查当前移动是否比最佳移动更好,我们借助minimax()函数,该函数将考虑游戏进行的所有可能方式并返回该移动的最佳值,并假设对手也进行了最佳玩法
minimax()函数用于最大化器和最小化器的代码类似于findBestMove() ,唯一的区别是,它不返回移动,而是返回一个值。这是伪代码:
function minimax(board, depth, isMaximizingPlayer):
if current board state is a terminal state :
return value of the board
if isMaximizingPlayer :
bestVal = -INFINITY
for each move in board :
value = minimax(board, depth+1, false)
bestVal = max( bestVal, value)
return bestVal
else :
bestVal = +INFINITY
for each move in board :
value = minimax(board, depth+1, true)
bestVal = min( bestVal, value)
return bestVal
检查GameOver状态:
为了检查游戏是否结束并确保没有剩余的动作,我们使用isMovesLeft()函数。这是一个简单明了的函数,它检查移动是否可用,并分别返回true或false。伪代码如下:
function isMovesLeft(board):
for each cell in board:
if current cell is empty:
return true
return false
使我们的AI更智能:
最后一步是使我们的AI更加智能。即使随后的AI表现出色,它也可能会选择采取行动,这将导致胜利减慢或损失加快。让我们举个例子并对其进行解释。
假定X有2种可能的方式可以从给定板状态赢得比赛。
- A手:X可以赢2步
- 动作B :X可以赢4步
我们的评估函数将为A和B都返回+10的值。即使A动作因为可以确保更快的胜利而更好,但我们的AI有时还是会选择B。为了克服这个问题,我们从评估分数中减去深度值。这意味着在获胜的情况下,它将选择需要最少移动次数的胜利,在输失的情况下,它将尝试延长游戏时间并玩尽可能多的移动。因此,新的评估值将是
- 移动A的值为+10 – 2 = 8
- 移动B的值为+10 – 4 = 6
现在,由于移动A的得分高于移动B的分数,因此我们的AI将选择移动A而不是移动B。必须对最小化器应用相同的内容。代替减去深度,我们添加深度值,因为最小化器总是尝试获得该值,并尽可能将其设为负值。我们可以减去评估函数内部或外部的深度。任何地方都很好。我选择在函数之外执行此操作。伪代码实现如下。
if maximizer has won:
return WIN_SCORE – depth
else if minimizer has won:
return LOOSE_SCORE + depth
执行 :
以下是上述想法的实现。
C++
// C++ program to find the next optimal move for
// a player
#include
using namespace std;
struct Move
{
int row, col;
};
char player = 'x', opponent = 'o';
// This function returns true if there are moves
// remaining on the board. It returns false if
// there are no moves left to play.
bool isMovesLeft(char board[3][3])
{
for (int i = 0; i<3; i++)
for (int j = 0; j<3; j++)
if (board[i][j]=='_')
return true;
return false;
}
// This is the evaluation function as discussed
// in the previous article ( http://goo.gl/sJgv68 )
int evaluate(char b[3][3])
{
// Checking for Rows for X or O victory.
for (int row = 0; row<3; row++)
{
if (b[row][0]==b[row][1] &&
b[row][1]==b[row][2])
{
if (b[row][0]==player)
return +10;
else if (b[row][0]==opponent)
return -10;
}
}
// Checking for Columns for X or O victory.
for (int col = 0; col<3; col++)
{
if (b[0][col]==b[1][col] &&
b[1][col]==b[2][col])
{
if (b[0][col]==player)
return +10;
else if (b[0][col]==opponent)
return -10;
}
}
// Checking for Diagonals for X or O victory.
if (b[0][0]==b[1][1] && b[1][1]==b[2][2])
{
if (b[0][0]==player)
return +10;
else if (b[0][0]==opponent)
return -10;
}
if (b[0][2]==b[1][1] && b[1][1]==b[2][0])
{
if (b[0][2]==player)
return +10;
else if (b[0][2]==opponent)
return -10;
}
// Else if none of them have won then return 0
return 0;
}
// This is the minimax function. It considers all
// the possible ways the game can go and returns
// the value of the board
int minimax(char board[3][3], int depth, bool isMax)
{
int score = evaluate(board);
// If Maximizer has won the game return his/her
// evaluated score
if (score == 10)
return score;
// If Minimizer has won the game return his/her
// evaluated score
if (score == -10)
return score;
// If there are no more moves and no winner then
// it is a tie
if (isMovesLeft(board)==false)
return 0;
// If this maximizer's move
if (isMax)
{
int best = -1000;
// Traverse all cells
for (int i = 0; i<3; i++)
{
for (int j = 0; j<3; j++)
{
// Check if cell is empty
if (board[i][j]=='_')
{
// Make the move
board[i][j] = player;
// Call minimax recursively and choose
// the maximum value
best = max( best,
minimax(board, depth+1, !isMax) );
// Undo the move
board[i][j] = '_';
}
}
}
return best;
}
// If this minimizer's move
else
{
int best = 1000;
// Traverse all cells
for (int i = 0; i<3; i++)
{
for (int j = 0; j<3; j++)
{
// Check if cell is empty
if (board[i][j]=='_')
{
// Make the move
board[i][j] = opponent;
// Call minimax recursively and choose
// the minimum value
best = min(best,
minimax(board, depth+1, !isMax));
// Undo the move
board[i][j] = '_';
}
}
}
return best;
}
}
// This will return the best possible move for the player
Move findBestMove(char board[3][3])
{
int bestVal = -1000;
Move bestMove;
bestMove.row = -1;
bestMove.col = -1;
// Traverse all cells, evaluate minimax function for
// all empty cells. And return the cell with optimal
// value.
for (int i = 0; i<3; i++)
{
for (int j = 0; j<3; j++)
{
// Check if cell is empty
if (board[i][j]=='_')
{
// Make the move
board[i][j] = player;
// compute evaluation function for this
// move.
int moveVal = minimax(board, 0, false);
// Undo the move
board[i][j] = '_';
// If the value of the current move is
// more than the best value, then update
// best/
if (moveVal > bestVal)
{
bestMove.row = i;
bestMove.col = j;
bestVal = moveVal;
}
}
}
}
printf("The value of the best Move is : %d\n\n",
bestVal);
return bestMove;
}
// Driver code
int main()
{
char board[3][3] =
{
{ 'x', 'o', 'x' },
{ 'o', 'o', 'x' },
{ '_', '_', '_' }
};
Move bestMove = findBestMove(board);
printf("The Optimal Move is :\n");
printf("ROW: %d COL: %d\n\n", bestMove.row,
bestMove.col );
return 0;
}
Java
// Java program to find the
// next optimal move for a player
class GFG
{
static class Move
{
int row, col;
};
static char player = 'x', opponent = 'o';
// This function returns true if there are moves
// remaining on the board. It returns false if
// there are no moves left to play.
static Boolean isMovesLeft(char board[][])
{
for (int i = 0; i < 3; i++)
for (int j = 0; j < 3; j++)
if (board[i][j] == '_')
return true;
return false;
}
// This is the evaluation function as discussed
// in the previous article ( http://goo.gl/sJgv68 )
static int evaluate(char b[][])
{
// Checking for Rows for X or O victory.
for (int row = 0; row < 3; row++)
{
if (b[row][0] == b[row][1] &&
b[row][1] == b[row][2])
{
if (b[row][0] == player)
return +10;
else if (b[row][0] == opponent)
return -10;
}
}
// Checking for Columns for X or O victory.
for (int col = 0; col < 3; col++)
{
if (b[0][col] == b[1][col] &&
b[1][col] == b[2][col])
{
if (b[0][col] == player)
return +10;
else if (b[0][col] == opponent)
return -10;
}
}
// Checking for Diagonals for X or O victory.
if (b[0][0] == b[1][1] && b[1][1] == b[2][2])
{
if (b[0][0] == player)
return +10;
else if (b[0][0] == opponent)
return -10;
}
if (b[0][2] == b[1][1] && b[1][1] == b[2][0])
{
if (b[0][2] == player)
return +10;
else if (b[0][2] == opponent)
return -10;
}
// Else if none of them have won then return 0
return 0;
}
// This is the minimax function. It considers all
// the possible ways the game can go and returns
// the value of the board
static int minimax(char board[][],
int depth, Boolean isMax)
{
int score = evaluate(board);
// If Maximizer has won the game
// return his/her evaluated score
if (score == 10)
return score;
// If Minimizer has won the game
// return his/her evaluated score
if (score == -10)
return score;
// If there are no more moves and
// no winner then it is a tie
if (isMovesLeft(board) == false)
return 0;
// If this maximizer's move
if (isMax)
{
int best = -1000;
// Traverse all cells
for (int i = 0; i < 3; i++)
{
for (int j = 0; j < 3; j++)
{
// Check if cell is empty
if (board[i][j]=='_')
{
// Make the move
board[i][j] = player;
// Call minimax recursively and choose
// the maximum value
best = Math.max(best, minimax(board,
depth + 1, !isMax));
// Undo the move
board[i][j] = '_';
}
}
}
return best;
}
// If this minimizer's move
else
{
int best = 1000;
// Traverse all cells
for (int i = 0; i < 3; i++)
{
for (int j = 0; j < 3; j++)
{
// Check if cell is empty
if (board[i][j] == '_')
{
// Make the move
board[i][j] = opponent;
// Call minimax recursively and choose
// the minimum value
best = Math.min(best, minimax(board,
depth + 1, !isMax));
// Undo the move
board[i][j] = '_';
}
}
}
return best;
}
}
// This will return the best possible
// move for the player
static Move findBestMove(char board[][])
{
int bestVal = -1000;
Move bestMove = new Move();
bestMove.row = -1;
bestMove.col = -1;
// Traverse all cells, evaluate minimax function
// for all empty cells. And return the cell
// with optimal value.
for (int i = 0; i < 3; i++)
{
for (int j = 0; j < 3; j++)
{
// Check if cell is empty
if (board[i][j] == '_')
{
// Make the move
board[i][j] = player;
// compute evaluation function for this
// move.
int moveVal = minimax(board, 0, false);
// Undo the move
board[i][j] = '_';
// If the value of the current move is
// more than the best value, then update
// best/
if (moveVal > bestVal)
{
bestMove.row = i;
bestMove.col = j;
bestVal = moveVal;
}
}
}
}
System.out.printf("The value of the best Move " +
"is : %d\n\n", bestVal);
return bestMove;
}
// Driver code
public static void main(String[] args)
{
char board[][] = {{ 'x', 'o', 'x' },
{ 'o', 'o', 'x' },
{ '_', '_', '_' }};
Move bestMove = findBestMove(board);
System.out.printf("The Optimal Move is :\n");
System.out.printf("ROW: %d COL: %d\n\n",
bestMove.row, bestMove.col );
}
}
// This code is contributed by PrinciRaj1992
Python3
# Python3 program to find the next optimal move for a player
player, opponent = 'x', 'o'
# This function returns true if there are moves
# remaining on the board. It returns false if
# there are no moves left to play.
def isMovesLeft(board) :
for i in range(3) :
for j in range(3) :
if (board[i][j] == '_') :
return True
return False
# This is the evaluation function as discussed
# in the previous article ( http://goo.gl/sJgv68 )
def evaluate(b) :
# Checking for Rows for X or O victory.
for row in range(3) :
if (b[row][0] == b[row][1] and b[row][1] == b[row][2]) :
if (b[row][0] == player) :
return 10
elif (b[row][0] == opponent) :
return -10
# Checking for Columns for X or O victory.
for col in range(3) :
if (b[0][col] == b[1][col] and b[1][col] == b[2][col]) :
if (b[0][col] == player) :
return 10
elif (b[0][col] == opponent) :
return -10
# Checking for Diagonals for X or O victory.
if (b[0][0] == b[1][1] and b[1][1] == b[2][2]) :
if (b[0][0] == player) :
return 10
elif (b[0][0] == opponent) :
return -10
if (b[0][2] == b[1][1] and b[1][1] == b[2][0]) :
if (b[0][2] == player) :
return 10
elif (b[0][2] == opponent) :
return -10
# Else if none of them have won then return 0
return 0
# This is the minimax function. It considers all
# the possible ways the game can go and returns
# the value of the board
def minimax(board, depth, isMax) :
score = evaluate(board)
# If Maximizer has won the game return his/her
# evaluated score
if (score == 10) :
return score
# If Minimizer has won the game return his/her
# evaluated score
if (score == -10) :
return score
# If there are no more moves and no winner then
# it is a tie
if (isMovesLeft(board) == False) :
return 0
# If this maximizer's move
if (isMax) :
best = -1000
# Traverse all cells
for i in range(3) :
for j in range(3) :
# Check if cell is empty
if (board[i][j]=='_') :
# Make the move
board[i][j] = player
# Call minimax recursively and choose
# the maximum value
best = max( best, minimax(board,
depth + 1,
not isMax) )
# Undo the move
board[i][j] = '_'
return best
# If this minimizer's move
else :
best = 1000
# Traverse all cells
for i in range(3) :
for j in range(3) :
# Check if cell is empty
if (board[i][j] == '_') :
# Make the move
board[i][j] = opponent
# Call minimax recursively and choose
# the minimum value
best = min(best, minimax(board, depth + 1, not isMax))
# Undo the move
board[i][j] = '_'
return best
# This will return the best possible move for the player
def findBestMove(board) :
bestVal = -1000
bestMove = (-1, -1)
# Traverse all cells, evaluate minimax function for
# all empty cells. And return the cell with optimal
# value.
for i in range(3) :
for j in range(3) :
# Check if cell is empty
if (board[i][j] == '_') :
# Make the move
board[i][j] = player
# compute evaluation function for this
# move.
moveVal = minimax(board, 0, False)
# Undo the move
board[i][j] = '_'
# If the value of the current move is
# more than the best value, then update
# best/
if (moveVal > bestVal) :
bestMove = (i, j)
bestVal = moveVal
print("The value of the best Move is :", bestVal)
print()
return bestMove
# Driver code
board = [
[ 'x', 'o', 'x' ],
[ 'o', 'o', 'x' ],
[ '_', '_', '_' ]
]
bestMove = findBestMove(board)
print("The Optimal Move is :")
print("ROW:", bestMove[0], " COL:", bestMove[1])
# This code is contributed by divyesh072019
C#
// C# program to find the
// next optimal move for a player
using System;
using System.Collections.Generic;
class GFG
{
class Move
{
public int row, col;
};
static char player = 'x', opponent = 'o';
// This function returns true if there are moves
// remaining on the board. It returns false if
// there are no moves left to play.
static Boolean isMovesLeft(char [,]board)
{
for (int i = 0; i < 3; i++)
for (int j = 0; j < 3; j++)
if (board[i, j] == '_')
return true;
return false;
}
// This is the evaluation function as discussed
// in the previous article ( http://goo.gl/sJgv68 )
static int evaluate(char [,]b)
{
// Checking for Rows for X or O victory.
for (int row = 0; row < 3; row++)
{
if (b[row, 0] == b[row, 1] &&
b[row, 1] == b[row, 2])
{
if (b[row, 0] == player)
return +10;
else if (b[row, 0] == opponent)
return -10;
}
}
// Checking for Columns for X or O victory.
for (int col = 0; col < 3; col++)
{
if (b[0, col] == b[1, col] &&
b[1, col] == b[2, col])
{
if (b[0, col] == player)
return +10;
else if (b[0, col] == opponent)
return -10;
}
}
// Checking for Diagonals for X or O victory.
if (b[0, 0] == b[1, 1] && b[1, 1] == b[2, 2])
{
if (b[0, 0] == player)
return +10;
else if (b[0, 0] == opponent)
return -10;
}
if (b[0, 2] == b[1, 1] && b[1, 1] == b[2, 0])
{
if (b[0, 2] == player)
return +10;
else if (b[0, 2] == opponent)
return -10;
}
// Else if none of them have won then return 0
return 0;
}
// This is the minimax function. It considers all
// the possible ways the game can go and returns
// the value of the board
static int minimax(char [,]board,
int depth, Boolean isMax)
{
int score = evaluate(board);
// If Maximizer has won the game
// return his/her evaluated score
if (score == 10)
return score;
// If Minimizer has won the game
// return his/her evaluated score
if (score == -10)
return score;
// If there are no more moves and
// no winner then it is a tie
if (isMovesLeft(board) == false)
return 0;
// If this maximizer's move
if (isMax)
{
int best = -1000;
// Traverse all cells
for (int i = 0; i < 3; i++)
{
for (int j = 0; j < 3; j++)
{
// Check if cell is empty
if (board[i, j] == '_')
{
// Make the move
board[i, j] = player;
// Call minimax recursively and choose
// the maximum value
best = Math.Max(best, minimax(board,
depth + 1, !isMax));
// Undo the move
board[i, j] = '_';
}
}
}
return best;
}
// If this minimizer's move
else
{
int best = 1000;
// Traverse all cells
for (int i = 0; i < 3; i++)
{
for (int j = 0; j < 3; j++)
{
// Check if cell is empty
if (board[i, j] == '_')
{
// Make the move
board[i, j] = opponent;
// Call minimax recursively and choose
// the minimum value
best = Math.Min(best, minimax(board,
depth + 1, !isMax));
// Undo the move
board[i, j] = '_';
}
}
}
return best;
}
}
// This will return the best possible
// move for the player
static Move findBestMove(char [,]board)
{
int bestVal = -1000;
Move bestMove = new Move();
bestMove.row = -1;
bestMove.col = -1;
// Traverse all cells, evaluate minimax function
// for all empty cells. And return the cell
// with optimal value.
for (int i = 0; i < 3; i++)
{
for (int j = 0; j < 3; j++)
{
// Check if cell is empty
if (board[i, j] == '_')
{
// Make the move
board[i, j] = player;
// compute evaluation function for this
// move.
int moveVal = minimax(board, 0, false);
// Undo the move
board[i, j] = '_';
// If the value of the current move is
// more than the best value, then update
// best/
if (moveVal > bestVal)
{
bestMove.row = i;
bestMove.col = j;
bestVal = moveVal;
}
}
}
}
Console.Write("The value of the best Move " +
"is : {0}\n\n", bestVal);
return bestMove;
}
// Driver code
public static void Main(String[] args)
{
char [,]board = {{ 'x', 'o', 'x' },
{ 'o', 'o', 'x' },
{ '_', '_', '_' }};
Move bestMove = findBestMove(board);
Console.Write("The Optimal Move is :\n");
Console.Write("ROW: {0} COL: {1}\n\n",
bestMove.row, bestMove.col );
}
}
// This code is contributed by 29AjayKumar
输出 :
The value of the best Move is : 10
The Optimal Move is :
ROW: 2 COL: 2
解释 :
此图描绘了游戏从根板状态可以采取的所有可能路径。它通常被称为游戏树。
上例中的3种可能的情况是:
- 向左移动:如果X播放[2,0]。然后O将打[2,1]并赢得比赛。此举的价值是-10
- 中间移动:如果X播放[2,1]。然后,O将玩[2,2],该游戏将抽奖。此举的值是0
- 右移:如果X播放[2,2]。然后他将赢得比赛。此举的值是+10;