📜  最短超弦问题

📅  最后修改于: 2021-05-06 08:09:38             🧑  作者: Mango

给定一组n个字符串arr [],找到包含给定集合中的每个字符串的最小字符串作为子字符串。我们可以假设arr []中的任何字符串都不是另一个字符串的子字符串。
例子:

Input:  arr[] = {"geeks", "quiz", "for"}
Output: geeksquizfor

Input:  arr[] = {"catg", "ctaagt", "gcta", "ttca", "atgcatc"}
Output: gctaagttcatgcatc

最短超字符串贪婪近似算法
最短超弦问题是NP Hard问题。总是找到最短超字符串的解决方案要花费指数时间。以下是近似贪婪算法。

Let arr[] be given set of strings.

1) Create an auxiliary array of strings, temp[].  Copy contents
   of arr[] to temp[]

2) While temp[] contains more than one strings
     a) Find the most overlapping string pair in temp[]. Let this
        pair be 'a' and 'b'. 
     b) Replace 'a' and 'b' with the string obtained after combining
        them.

3) The only string left in temp[] is the result, return it.

如果一个字符串的前缀与其他字符串的后缀相同,则两个字符串重叠。匹配的前缀和后缀的最大重叠平均长度为最大。
以上算法的工作:

arr[] = {"catgc", "ctaagt", "gcta", "ttca", "atgcatc"}
Initialize:
temp[] = {"catgc", "ctaagt", "gcta", "ttca", "atgcatc"}

The most overlapping strings are "catgc" and "atgcatc"
(Suffix of length 4 of "catgc" is same as prefix of "atgcatc")
Replace two strings with "catgcatc", we get
temp[] = {"catgcatc", "ctaagt", "gcta", "ttca"}

The most overlapping strings are "ctaagt" and "gcta"
(Prefix of length 3 of "ctaagt" is same as suffix of "gcta")
Replace two strings with "gctaagt", we get
temp[] = {"catgcatc", "gctaagt", "ttca"}

The most overlapping strings are "catgcatc" and "ttca"
(Prefix of length 2 of "catgcatc" as suffix of "ttca")
Replace two strings with "ttcatgcatc", we get
temp[] = {"ttcatgcatc", "gctaagt"}

Now there are only two strings in temp[], after combing
the two in optimal way, we get tem[] = {"gctaagttcatgcatc"}

Since temp[] has only one string now, return it.

下面是上述算法的实现。

C++
// C++ program to find shortest
// superstring using Greedy
// Approximate Algorithm
#include 
using namespace std;
 
// Utility function to calculate
// minimum of two numbers
int min(int a, int b)
{
    return (a < b) ? a : b;
}
 
// Function to calculate maximum
// overlap in two given strings
int findOverlappingPair(string str1,
                     string str2, string &str)
{
     
    // Max will store maximum
    // overlap i.e maximum
    // length of the matching
    // prefix and suffix
    int max = INT_MIN;
    int len1 = str1.length();
    int len2 = str2.length();
 
    // Check suffix of str1 matches
    // with prefix of str2
    for (int i = 1; i <=
                      min(len1, len2); i++)
    {
         
        // Compare last i characters
        // in str1 with first i
        // characters in str2
        if (str1.compare(len1-i, i, str2,
                                 0, i) == 0)
        {
            if (max < i)
            {
                // Update max and str
                max = i;
                str = str1 + str2.substr(i);
            }
        }
    }
 
    // Check prefix of str1 matches
    // with suffix of str2
    for (int i = 1; i <=
                        min(len1, len2); i++)
    {
         
        // compare first i characters
        // in str1 with last i
        // characters in str2
        if (str1.compare(0, i, str2,
                              len2-i, i) == 0)
        {
            if (max < i)
            {
                 
                // Update max and str
                max = i;
                str = str2 + str1.substr(i);
            }
        }
    }
 
    return max;
}
 
// Function to calculate
// smallest string that contains
// each string in the given
// set as substring.
string findShortestSuperstring(string arr[],
                                    int len)
{
     
    // Run len-1 times to
    // consider every pair
    while(len != 1)
    {
         
        // To store  maximum overlap
        int max = INT_MIN;  
       
        // To store array index of strings
        int l, r;   
       
        // Involved in maximum overlap
        string resStr;   
       
        // Maximum overlap
        for (int i = 0; i < len; i++)
        {
            for (int j = i + 1; j < len; j++)
            {
                string str;
 
                // res will store maximum
                // length of the matching
                // prefix and suffix str is
                // passed by reference and
                // will store the resultant
                // string after maximum
                // overlap of arr[i] and arr[j],
                // if any.
                int res = findOverlappingPair(arr[i],
                                         arr[j], str);
 
                // check for maximum overlap
                if (max < res)
                {
                    max = res;
                    resStr.assign(str);
                    l = i, r = j;
                }
            }
        }
 
        // Ignore last element in next cycle
        len--;  
 
        // If no overlap, append arr[len] to arr[0]
        if (max == INT_MIN)
            arr[0] += arr[len];
        else
        {
           
            // Copy resultant string to index l
            arr[l] = resStr; 
           
            // Copy string at last index to index r
            arr[r] = arr[len]; 
        }
    }
    return arr[0];
}
 
// Driver program
int main()
{
    string arr[] = {"catgc", "ctaagt",
                    "gcta", "ttca", "atgcatc"};
    int len = sizeof(arr)/sizeof(arr[0]);
 
    // Function Call
    cout << "The Shortest Superstring is "
         << findShortestSuperstring(arr, len);
 
    return 0;
}
// This code is contributed by Aditya Goel


Java
// Java program to find shortest
// superstring using Greedy
// Approximate Algorithm
import java.io.*;
import java.util.*;
 
class GFG
{
 
    static String str;
 
    // Utility function to calculate
    // minimum of two numbers
    static int min(int a, int b)
    {
        return (a < b) ? a : b;
    }
 
    // Function to calculate maximum
    // overlap in two given strings
    static int findOverlappingPair(String str1,
                                   String str2)
    {
         
        // max will store maximum
        // overlap i.e maximum
        // length of the matching
        // prefix and suffix
        int max = Integer.MIN_VALUE;
        int len1 = str1.length();
        int len2 = str2.length();
 
        // check suffix of str1 matches
        // with prefix of str2
        for (int i = 1; i <=
                            min(len1, len2); i++)
        {
 
            // compare last i characters
            // in str1 with first i
            // characters in str2
            if (str1.substring(len1 - i).compareTo(
                        str2.substring(0, i)) == 0)
            {
                if (max < i)
                {
 
                    // Update max and str
                    max = i;
                    str = str1 + str2.substring(i);
                }
            }
        }
 
        // check prefix of str1 matches
        // with suffix of str2
        for (int i = 1; i <=
                           min(len1, len2); i++)
        {
 
            // compare first i characters
            // in str1 with last i
            // characters in str2
            if (str1.substring(0, i).compareTo(
                      str2.substring(len2 - i)) == 0)
            {
                if (max < i)
                {
 
                    // pdate max and str
                    max = i;
                    str = str2 + str1.substring(i);
                }
            }
        }
 
        return max;
    }
 
    // Function to calculate smallest
    // string that contains
    // each string in the given set as substring.
    static String findShortestSuperstring(
                          String arr[], int len)
    {
         
        // run len-1 times to consider every pair
        while (len != 1)
        {
             
            // To store maximum overlap
            int max = Integer.MIN_VALUE;
           
            // To store array index of strings
            // involved in maximum overlap
            int l = 0, r = 0;
                  
            // to store resultant string after
            // maximum overlap
            String resStr = "";
 
            for (int i = 0; i < len; i++)
            {
                for (int j = i + 1; j < len; j++)
                {
 
                    // res will store maximum
                    // length of the matching
                    // prefix and suffix str is
                    // passed by reference and
                    // will store the resultant
                    // string after maximum
                    // overlap of arr[i] and arr[j],
                    // if any.
                    int res = findOverlappingPair
                                  (arr[i], arr[j]);
 
                    // Check for maximum overlap
                    if (max < res)
                    {
                        max = res;
                        resStr = str;
                        l = i;
                        r = j;
                    }
                }
            }
 
            // Ignore last element in next cycle
            len--;
 
            // If no overlap,
            // append arr[len] to arr[0]
            if (max == Integer.MIN_VALUE)
                arr[0] += arr[len];
            else
            {
               
                // Copy resultant string
                // to index l
                arr[l] = resStr;
               
                // Copy string at last index
                // to index r
                arr[r] = arr[len];
            }
        }
        return arr[0];
    }
 
    // Driver Code
    public static void main(String[] args)
    {
        String[] arr = { "catgc", "ctaagt",
                      "gcta", "ttca", "atgcatc" };
        int len = arr.length;
 
        System.out.println("The Shortest Superstring is " +
                        findShortestSuperstring(arr, len));
    }
}
 
// This code is contributed by
// sanjeev2552


Java
// Java program for above approach
import java.io.*;
import java.util.*;
 
class Solution
{
 
  // Function to calculate shortest
  // super string
  public static String shortestSuperstring(
                                   String[] A)
  {
    int n = A.length;
    int[][] graph = new int[n][n];
 
    // Build the graph
    for (int i = 0; i < n; i++)
    {
      for (int j = 0; j < n; j++)
      {
        graph[i][j] = calc(A[i], A[j]);
        graph[j][i] = calc(A[j], A[i]);
      }
    }
 
    // Creating dp array
    int[][] dp = new int[1 << n][n];
 
    // Creating path array
    int[][] path = new int[1 << n][n];
    int last = -1, min = Integer.MAX_VALUE;
 
    // start TSP DP
    for (int i = 1; i < (1 << n); i++)
    {
      Arrays.fill(dp[i], Integer.MAX_VALUE);
       
      // Iterate j from 0 to n - 1
      for (int j = 0; j < n; j++)
      {
        if ((i & (1 << j)) > 0)
        {
          int prev = i - (1 << j);
           
          // Check if prev is zero
          if (prev == 0)
          {
            dp[i][j] = A[j].length();
          }
          else
          {
             
            // Iterate k from 0 to n - 1
            for (int k = 0; k < n; k++)
            {
              if (dp[prev][k] < Integer.MAX_VALUE &&
                  dp[prev][k] + graph[k][j] < dp[i][j])
              {
                dp[i][j] = dp[prev][k] + graph[k][j];
                path[i][j] = k;
              }
            }
          }
        }
        if (i == (1 << n) - 1 && dp[i][j] < min)
        {
          min = dp[i][j];
          last = j;
        }
      }
    }
     
    // Build the path
    StringBuilder sb = new StringBuilder();
    int cur = (1 << n) - 1;
     
    // Creating a stack
    Stack stack = new Stack<>();
     
    // Untill cur is zero
    // push last
    while (cur > 0)
    {
      stack.push(last);
      int temp = cur;
      cur -= (1 << last);
      last = path[temp][last];
    }
 
    // Build the result
    int i = stack.pop();
    sb.append(A[i]);
     
    // Untill stack is empty
    while (!stack.isEmpty())
    {
      int j = stack.pop();
      sb.append(A[j].substring(A[j].length() -
                                graph[i][j]));
      i = j;
    }
    return sb.toString();
  }
 
  // Funtion to check
  public static int calc(String a, String b)
  {
    for (int i = 1; i < a.length(); i++)
    {
      if (b.startsWith(a.substring(i)))
      {
        return b.length() - a.length() + i;
      }
    }
     
    // Return size of b
    return b.length();
  }
   
  // Driver Code
  public static void main(String[] args)
  {
    String[] arr = { "catgc", "ctaagt",
                    "gcta", "ttca", "atgcatc" };
     
    // Function Call
    System.out.println("The Shortest Superstring is " +
                    shortestSuperstring(arr));
   }
}


输出
The Shortest Superstring is gctaagttcatgcatc

以上算法的性能:
上面的贪婪算法被证明是4近似值(即,此算法生成的超字符串的长度永远不会超过最短可能超字符串的4倍)。推测此算法为2个近似值(没有人发现这种情况产生的值是最坏情况的两倍以上)。推测的最坏情况示例是{ab k ,b k c,b k + 1 }。例如{“ abb”,“ bbc”,“ bbb”},上述算法可以生成“ abbcbbb”(如果“ abb”和“ bbc”被选为第一对),但是实际的最短超字符串是“ abbbc”。这里的比率是7/5,但是对于较大的k,比率接近2。

另一种方法:

“贪婪方法”的意思是:每次我们以最大重叠长度合并两个字符串时,将它们从字符串数组中删除,然后将合并后的字符串放入字符串数组中。

然后问题就变成了:在该图中找到最短的路径,该路径恰好访问每个节点一次。这是一个旅行推销员问题。

应用Traveling Salesman Problem DP解决方案。记住要记录路径。

下面是上述方法的实现:

Java

// Java program for above approach
import java.io.*;
import java.util.*;
 
class Solution
{
 
  // Function to calculate shortest
  // super string
  public static String shortestSuperstring(
                                   String[] A)
  {
    int n = A.length;
    int[][] graph = new int[n][n];
 
    // Build the graph
    for (int i = 0; i < n; i++)
    {
      for (int j = 0; j < n; j++)
      {
        graph[i][j] = calc(A[i], A[j]);
        graph[j][i] = calc(A[j], A[i]);
      }
    }
 
    // Creating dp array
    int[][] dp = new int[1 << n][n];
 
    // Creating path array
    int[][] path = new int[1 << n][n];
    int last = -1, min = Integer.MAX_VALUE;
 
    // start TSP DP
    for (int i = 1; i < (1 << n); i++)
    {
      Arrays.fill(dp[i], Integer.MAX_VALUE);
       
      // Iterate j from 0 to n - 1
      for (int j = 0; j < n; j++)
      {
        if ((i & (1 << j)) > 0)
        {
          int prev = i - (1 << j);
           
          // Check if prev is zero
          if (prev == 0)
          {
            dp[i][j] = A[j].length();
          }
          else
          {
             
            // Iterate k from 0 to n - 1
            for (int k = 0; k < n; k++)
            {
              if (dp[prev][k] < Integer.MAX_VALUE &&
                  dp[prev][k] + graph[k][j] < dp[i][j])
              {
                dp[i][j] = dp[prev][k] + graph[k][j];
                path[i][j] = k;
              }
            }
          }
        }
        if (i == (1 << n) - 1 && dp[i][j] < min)
        {
          min = dp[i][j];
          last = j;
        }
      }
    }
     
    // Build the path
    StringBuilder sb = new StringBuilder();
    int cur = (1 << n) - 1;
     
    // Creating a stack
    Stack stack = new Stack<>();
     
    // Untill cur is zero
    // push last
    while (cur > 0)
    {
      stack.push(last);
      int temp = cur;
      cur -= (1 << last);
      last = path[temp][last];
    }
 
    // Build the result
    int i = stack.pop();
    sb.append(A[i]);
     
    // Untill stack is empty
    while (!stack.isEmpty())
    {
      int j = stack.pop();
      sb.append(A[j].substring(A[j].length() -
                                graph[i][j]));
      i = j;
    }
    return sb.toString();
  }
 
  // Funtion to check
  public static int calc(String a, String b)
  {
    for (int i = 1; i < a.length(); i++)
    {
      if (b.startsWith(a.substring(i)))
      {
        return b.length() - a.length() + i;
      }
    }
     
    // Return size of b
    return b.length();
  }
   
  // Driver Code
  public static void main(String[] args)
  {
    String[] arr = { "catgc", "ctaagt",
                    "gcta", "ttca", "atgcatc" };
     
    // Function Call
    System.out.println("The Shortest Superstring is " +
                    shortestSuperstring(arr));
   }
}
输出
The Shortest Superstring is gctaagttcatgcatc

时间复杂度: O(n ^ 2 * 2 ^ n)