📜  博弈论中的最大期望算法

📅  最后修改于: 2021-09-24 05:00:55             🧑  作者: Mango

Expectimax 搜索算法是一种用于最大化预期效用的博弈论算法。它是 Minimax 算法的一种变体。 Minimax 假设对手(最小化者)以最佳方式进行游戏,而 Expectimax 则不然。这对于建模环境中的对手代理不是最佳的,或者他们的行动是基于机会的很有用。
期望极大值与极小极大值
考虑下面的极小极大树:

正如我们所知,对手代理(最小化器)发挥最佳作用,向左移动是有意义的。但是如果最小化器有可能出错(或没有以最佳方式播放)怎么办。因此,向右可能听起来更有吸引力,或者可能会产生更好的解决方案。
在下面的 Expectimax 树中,我们用机会节点替换了最小化节点。

机会节点取所有可用效用的平均值,为我们提供“预期效用”。因此,左右子树的预期效用是 (10+10)/2=10 和 (100+9)/2=54.5。最大化器节点选择正确的子树来最大化预期效用。
Expectimax 相对于 Minimax 的优势:

  • Expectimax 算法有助于利用非最优对手。
  • 与 Minimax 不同,Expectimax“可以冒险”并最终处于具有更高效用的状态,因为对手是随机的(不是最佳的)。

缺点:

  • Expectimax 不是最优的。它可能会导致代理丢失(最终处于效用较低的状态)
  • Expectimax 需要探索完整的搜索树。没有任何类型的剪枝可以完成,因为单个未开发实用程序的值可以极大地改变期望最大值。因此它可能很慢。
  • 它对效用值的单调变换很敏感。
    对于极小极大值,如果我们有两个状态S 1S 2 ,如果S 1优于S 2 ,则评估函数值f(S 1 )f(S 2 ) 的大小f( S 1 ) > f(S 2 )
    对于expectimax,评估函数值的大小很重要。

算法: Expectimax 可以使用递归算法实现如下,

  1. 如果当前调用是最大化节点,则返回节点后继节点的最大状态值。
  2. 如果当前调用是一个机会节点,则返回节点后继节点的状态值的平均值(假设所有节点的概率相等)。如果不同的节点有不同的概率,那么期望效用由i x i p i 给出
  3. 我们递归地调用该函数,直到到达终端节点(没有后继的状态)。然后返回该状态的实用程序。

执行:

C++
// C++ program to illustrate
// Expectimax Algorithm
 
#include 
using namespace std;
 
// Structure to declare
// left and right nodes
struct Node {
    int value;
    struct Node *left, *right;
};
 
// Initializing Nodes to NULL
Node* newNode(int v)
{
    Node* temp = new Node;
    temp->value = v;
    temp->left = NULL;
    temp->right = NULL;
    return temp;
}
 
// Getting expectimax
float expectimax(Node* node, bool is_max)
{
    // Condition for Terminal node
    if (node->left == NULL
        && node->right == NULL) {
        return node->value;
    }
 
    // Maximizer node. Chooses the max from the
    // left and right sub-trees
    if (is_max) {
        return max(
            expectimax(
                node->left, false),
            expectimax(node->right, false));
    }
 
    // Chance node. Returns the average of
    // the left and right sub-trees
    else {
        return (
                   expectimax(node->left, true)
                   + expectimax(node->right, true))
               / 2.0;
    }
}
 
// Driver code
int main()
{
    // Non leaf nodes.
    // If search is limited
    // to a given depth,
    // their values are
    // taken as heuristic value.
    // But because the entire tree
    // is searched their
    // values don't matter
    Node* root = newNode(0);
    root->left = newNode(0);
    root->right = newNode(0);
 
    // Assigning values to Leaf nodes
    root->left->left = newNode(10);
    root->left->right = newNode(10);
    root->right->left = newNode(9);
    root->right->right = newNode(100);
 
    float res = expectimax(root, true);
    cout << "Expectimax value is "
<< res << endl;
    return 0;
}


Java
// Java program to illustrate
// Expectimax Algorithm
class GFG{
  
// Structure to declare
// left and right nodes
static class Node {
    int value;
    Node left, right;
};
  
// Initializing Nodes to null
static Node newNode(int v)
{
    Node temp = new Node();
    temp.value = v;
    temp.left = null;
    temp.right = null;
    return temp;
}
  
// Getting expectimax
static float expectimax(Node node, boolean is_max)
{
    // Condition for Terminal node
    if (node.left == null
        && node.right == null) {
        return node.value;
    }
  
    // Maximizer node. Chooses the max from the
    // left and right sub-trees
    if (is_max) {
        return Math.max(
            expectimax(
                node.left, false),
            expectimax(node.right, false));
    }
  
    // Chance node. Returns the average of
    // the left and right sub-trees
    else {
        return (float) ((
                   expectimax(node.left, true)
                   + expectimax(node.right, true))
               / 2.0);
    }
}
  
// Driver code
public static void main(String[] args)
{
    // Non leaf nodes.
    // If search is limited
    // to a given depth,
    // their values are
    // taken as heuristic value.
    // But because the entire tree
    // is searched their
    // values don't matter
    Node root = newNode(0);
    root.left = newNode(0);
    root.right = newNode(0);
  
    // Assigning values to Leaf nodes
    root.left.left = newNode(10);
    root.left.right = newNode(10);
    root.right.left = newNode(9);
    root.right.right = newNode(100);
  
    float res = expectimax(root, true);
    System.out.print("Expectimax value is "
+ res +"\n");
}
}
 
// This code is contributed by PrinciRaj1992


Python3
# Python3 program to illustrate
# Expectimax Algorithm
  
# Structure to declare
# left and right nodes
class Node:
     
    def __init__(self, value):
         
        self.value = value
        self.left = None
        self.right = None
     
# Initializing Nodes to None
def newNode(v):
 
    temp = Node(v);
    return temp;
 
# Getting expectimax
def expectimax(node, is_max):
 
    # Condition for Terminal node
    if (node.left == None and node.right == None):
        return node.value;
     
    # Maximizer node. Chooses the max from the
    # left and right sub-trees
    if (is_max):
        return max(expectimax(node.left, False), expectimax(node.right, False))
  
    # Chance node. Returns the average of
    # the left and right sub-trees
    else:
        return (expectimax(node.left, True)+ expectimax(node.right, True))/2;
     
# Driver code
if __name__=='__main__':
     
    # Non leaf nodes.
    # If search is limited
    # to a given depth,
    # their values are
    # taken as heuristic value.
    # But because the entire tree
    # is searched their
    # values don't matter
    root = newNode(0);
    root.left = newNode(0);
    root.right = newNode(0);
  
    # Assigning values to Leaf nodes
    root.left.left = newNode(10);
    root.left.right = newNode(10);
    root.right.left = newNode(9);
    root.right.right = newNode(100);
  
    res = expectimax(root, True)
    print("Expectimax value is "+str(res))
     
# This code is contributed by rutvik_56


C#
// C# program to illustrate
// Expectimax Algorithm
using System;
 
class GFG{
   
// Structure to declare
// left and right nodes
class Node {
    public int value;
    public Node left, right;
};
   
// Initializing Nodes to null
static Node newNode(int v)
{
    Node temp = new Node();
    temp.value = v;
    temp.left = null;
    temp.right = null;
    return temp;
}
   
// Getting expectimax
static float expectimax(Node node, bool is_max)
{
    // Condition for Terminal node
    if (node.left == null
        && node.right == null) {
        return node.value;
    }
   
    // Maximizer node. Chooses the max from the
    // left and right sub-trees
    if (is_max) {
        return Math.Max(
            expectimax(
                node.left, false),
            expectimax(node.right, false));
    }
   
    // Chance node. Returns the average of
    // the left and right sub-trees
    else {
        return (float) ((
                   expectimax(node.left, true)
                   + expectimax(node.right, true))
               / 2.0);
    }
}
   
// Driver code
public static void Main(String[] args)
{
    // Non leaf nodes.
    // If search is limited
    // to a given depth,
    // their values are
    // taken as heuristic value.
    // But because the entire tree
    // is searched their
    // values don't matter
    Node root = newNode(0);
    root.left = newNode(0);
    root.right = newNode(0);
   
    // Assigning values to Leaf nodes
    root.left.left = newNode(10);
    root.left.right = newNode(10);
    root.right.left = newNode(9);
    root.right.right = newNode(100);
   
    float res = expectimax(root, true);
    Console.Write("Expectimax value is "
+ res +"\n");
}
}
 
// This code is contributed by sapnasingh4991


输出:
Expectimax value is 54.5

时间复杂度:O(b m )
空间复杂度:O(b*m),其中b是分支因子, m是树的最大深度。
应用: Expectimax 可用于其中一个代理的动作是随机的环境。以下是几个例子,

  1. Pacman 中,如果我们有随机鬼,我们可以将 Pacman 建模为最大化器,将鬼建模为机会节点。效用值将是最终状态(赢、输或平)的值或给定深度的一组可能状态的评估函数值。
  2. 我们可以通过将玩家代理建模为最大化器并将地雷建模为机会节点来创建扫雷 AI。

如果您希望与专家一起参加现场课程,请参阅DSA 现场工作专业课程学生竞争性编程现场课程