📜  打印第K字符的字符串的排序串联子

📅  最后修改于: 2021-04-17 12:36:33             🧑  作者: Mango

给定字符串低位字母字符,当以排序形式连接时,在由(给定字符串的)子字符串形成的字符串中找到第K个字符。

例子:

Input : str = “banana”
          K = 10
Output : n
All substring in sorted form are,
"a", "an", "ana", "anan", "anana", 
"b", "ba", "ban", "bana", "banan", 
"banana", "n", "na", "nan", "nana"
Concatenated string = “aananaanana
nanabbabanbanabananbananannanannana”
We can see a 10th character in the 
above concatenated string is ‘n’ 
which is our final answer.

一个简单的解决方案是生成给定字符串的所有子字符串并将其存储在数组中。子字符串生成后,对其进行排序并在休眠之后进行连接。最后在连接的字符串打印第K个字符。

一个有效的解决方案是基于使用后缀数组对字符串的不同子字符串进行计数。同样的方法也用于解决该问题。得到后缀数组和lcp数组后,我们遍历所有lcp值,并为每个这样的值计算要跳过的字符。我们不断从K中减去许多字符,当要跳过的字符大于K时,我们停止并循环对应于当前lcp [i]的子字符串,其中从lcp [i]循环直到最大字符串长度,然后打印第K个字符。

C++
// C++ program to print Kth character
// in sorted concatenated substrings
#include 
using namespace std;
 
// Structure to store information of a suffix
struct suffix
{
    int index;  // To store original index
    int rank[2]; // To store ranks and next
                 // rank pair
};
 
// A comparison function used by sort() to compare
// two suffixes. Compares two pairs, returns 1 if
// first pair is smaller
int cmp(struct suffix a, struct suffix b)
{
    return (a.rank[0] == b.rank[0])?
           (a.rank[1] < b.rank[1] ?1: 0):
           (a.rank[0] < b.rank[0] ?1: 0);
}
 
// This is the main function that takes a string
// 'txt' of size n as an argument, builds and return
// the suffix array for the given string
vector buildSuffixArray(string txt, int n)
{
    // A structure to store suffixes and their indexes
    struct suffix suffixes[n];
 
    // Store suffixes and their indexes in an array
    // of structures. The structure is needed to sort
    // the suffixes alphabatically and maintain their
    // old indexes while sorting
    for (int i = 0; i < n; i++)
    {
        suffixes[i].index = i;
        suffixes[i].rank[0] = txt[i] - 'a';
        suffixes[i].rank[1] = ((i+1) < n)?
                              (txt[i + 1] - 'a'): -1;
    }
 
    // Sort the suffixes using the comparison function
    // defined above.
    sort(suffixes, suffixes+n, cmp);
 
    // At his point, all suffixes are sorted according
    // to first 2 characters.  Let us sort suffixes
    // according to first 4 characters, then first
    // 8 and so on
    int ind[n];  // This array is needed to get the
                 // index in suffixes[] from original
                 // index. This mapping is needed to get
                 // next suffix.
 
    for (int k = 4; k < 2*n; k = k*2)
    {
        // Assigning rank and index values to first suffix
        int rank = 0;
        int prev_rank = suffixes[0].rank[0];
        suffixes[0].rank[0] = rank;
        ind[suffixes[0].index] = 0;
 
        // Assigning rank to suffixes
        for (int i = 1; i < n; i++)
        {
            // If first rank and next ranks are same as
            // that of previous suffix in array, assign
            // the same new rank to this suffix
            if (suffixes[i].rank[0] == prev_rank &&
               suffixes[i].rank[1] == suffixes[i-1].rank[1])
            {
                prev_rank = suffixes[i].rank[0];
                suffixes[i].rank[0] = rank;
            }
 
            else // Otherwise increment rank and assign
            {
                prev_rank = suffixes[i].rank[0];
                suffixes[i].rank[0] = ++rank;
            }
            ind[suffixes[i].index] = i;
        }
 
        // Assign next rank to every suffix
        for (int i = 0; i < n; i++)
        {
            int nextindex = suffixes[i].index + k/2;
            suffixes[i].rank[1] = (nextindex < n)?
                      suffixes[ind[nextindex]].rank[0]: -1;
        }
 
        // Sort the suffixes according to first k characters
        sort(suffixes, suffixes+n, cmp);
    }
 
    // Store indexes of all sorted suffixes in the suffix
    // array
    vectorsuffixArr;
    for (int i = 0; i < n; i++)
        suffixArr.push_back(suffixes[i].index);
 
    // Return the suffix array
    return  suffixArr;
}
 
/* To construct and return LCP */
vector kasai(string txt, vector suffixArr)
{
    int n = suffixArr.size();
 
    // To store LCP array
    vector lcp(n, 0);
 
    // An auxiliary array to store inverse of suffix array
    // elements. For example if suffixArr[0] is 5, the
    // invSuff[5] would store 0.  This is used to get next
    // suffix string from suffix array.
    vector invSuff(n, 0);
 
    // Fill values in invSuff[]
    for (int i=0; i < n; i++)
        invSuff[suffixArr[i]] = i;
 
    // Initialize length of previous LCP
    int k = 0;
 
    // Process all suffixes one by one starting from
    // first suffix in txt[]
    for (int i=0; i0)
            k--;
    }
 
    // return the constructed lcp array
    return lcp;
}
 
//    Utility method to get sum of first N numbers
int sumOfFirstN(int N)
{
    return (N * (N + 1)) / 2;
}
 
// Returns Kth character in sorted concatenated
// substrings of str
char printKthCharInConcatSubstring(string str, int K)
{
    int n = str.length();
    //  calculating suffix array and lcp array
    vector suffixArr = buildSuffixArray(str, n);
    vector lcp = kasai(str, suffixArr);
 
    for (int i = 0; i < lcp.size(); i++)
     {
         //    skipping characters common to substring
         //    (n - suffixArr[i]) is length of current
         //  maximum substring lcp[i] will length of
         // common substring
        int charToSkip = sumOfFirstN(n - suffixArr[i]) -
                         sumOfFirstN(lcp[i]);
 
        /*    if characters are more than K, that means
            Kth character belongs to substring
            corresponding to current lcp[i]*/
        if (K <= charToSkip)
        {
            // loop from current lcp value to current
            // string length
            for (int j = lcp[i] + 1; j <= (n-suffixArr[i]); j++)
            {
                int curSubstringLen = j;
 
                /* Again reduce K by current substring's
                   length one by one and when it becomes less,
                    print Kth character of current susbtring */
                if (K <= curSubstringLen)
                    return str[(suffixArr[i] + K - 1)];
                else
                    K -= curSubstringLen;
 
            }
            break;
        }
        else
            K -= charToSkip;
     }
}
 
//    Driver code to test above methods
int main()
{
    string str = "banana";
    int K = 10;
    cout << printKthCharInConcatSubstring(str, K);
    return 0;
}


Python3
# Python3 program to print Kth character
# in sorted concatenated substrings
 
# Structure to store information of a suffix
class suffix:
     
    def __init__(self):
         
        self.index = 0
         
        # To store original index
        self.rank = [0] * 2
         
        # To store ranks and next
        # rank pair
 
# This is the main function that takes a string
# 'txt' of size n as an argument, builds and return
# the suffix array for the given string
def buildSuffixArray(txt: str, n: int) -> list:
 
    # A structure to store suffixes
    # and their indexes
    suffixes = [0] * n
    for i in range(n):
        suffixes[i] = suffix()
 
    # Store suffixes and their indexes in an array
    # of structures. The structure is needed to sort
    # the suffixes alphabatically and maintain their
    # old indexes while sorting
    for i in range(n):
        suffixes[i].index = i
        suffixes[i].rank[0] = ord(txt[i]) - ord('a')
        suffixes[i].rank[1] = (ord(txt[i + 1]) -
                        ord('a')) if ((i + 1) < n) else -1
 
    # Sort the suffixes using the comparison function
    # defined above.
    suffixes.sort(key = lambda a: a.rank)
 
    # At his point, all suffixes are sorted according
    # to first 2 characters.  Let us sort suffixes
    # according to first 4 characters, then first
    # 8 and so on
    ind = [0] * n
     
    # This array is needed to get the
    # index in suffixes[] from original
    # index. This mapping is needed to get
    # next suffix.
    k = 4
    while k < 2 * n:
        k *= 2
         
        # for k in range(4, 2 * n, k * 2):
 
        # Assigning rank and index values
        # to first suffix
        rank = 0
        prev_rank = suffixes[0].rank[0]
        suffixes[0].rank[0] = rank
        ind[suffixes[0].index] = 0
 
        # Assigning rank to suffixes
        for i in range(1, n):
 
            # If first rank and next ranks are same as
            # that of previous suffix in array, assign
            # the same new rank to this suffix
            if (suffixes[i].rank[0] == prev_rank and
                suffixes[i].rank[1] == suffixes[i - 1].rank[1]):
                prev_rank = suffixes[i].rank[0]
                suffixes[i].rank[0] = rank
                 
            # Otherwise increment rank and assign
            else: 
                prev_rank = suffixes[i].rank[0]
                rank += 1
                suffixes[i].rank[0] = rank
 
            ind[suffixes[i].index] = i
 
        # Assign next rank to every suffix
        for i in range(n):
            nextindex = suffixes[i].index + k // 2
            suffixes[i].rank[1] = suffixes[ind[nextindex]].rank[0] if (
                nextindex < n) else -1
 
        # Sort the suffixes according to first k characters
        suffixes.sort(key = lambda a : a.rank)
 
    # Store indexes of all sorted suffixes
    # in the suffix array
    suffixArr = []
    for i in range(n):
        suffixArr.append(suffixes[i].index)
 
    # Return the suffix array
    return suffixArr
 
# To construct and return LCP */
def kasai(txt: str, suffixArr: list) -> list:
 
    n = len(suffixArr)
 
    # To store LCP array
    lcp = [0] * n
 
    # An auxiliary array to store inverse of
    # suffix array elements. For example if
    # suffixArr[0] is 5, the invSuff[5] would
    # store 0.  This is used to get next
    # suffix string from suffix array.
    invSuff = [0] * n
 
    # Fill values in invSuff[]
    for i in range(n):
        invSuff[suffixArr[i]] = i
 
    # Initialize length of previous LCP
    k = 0
 
    # Process all suffixes one by one
    # starting from first suffix in txt[]
    for i in range(n):
 
        # If the current suffix is at n-1, then
        # we don’t have next substring to
        # consider. So lcp is not defined for
        # this substring, we put zero.
        if (invSuff[i] == n - 1):
            k = 0
            continue
 
        # j contains index of the next substring to
        # be considered  to compare with the present
        # substring, i.e., next string in suffix array
        j = suffixArr[invSuff[i] + 1]
 
        # Directly start matching from k'th index as
        # at-least k-1 characters will match
        while (i + k < n and j + k < n and
           txt[i + k] == txt[j + k]):
            k += 1
 
        lcp[invSuff[i]] = k
        # lcp for the present suffix.
 
        # Deleting the starting character
        # from the string.
        if (k > 0):
            k -= 1
 
    # Return the constructed lcp array
    return lcp
 
# Utility method to get sum of first N numbers
def sumOfFirstN(N: int) -> int:
 
    return (N * (N + 1)) // 2
 
# Returns Kth character in sorted concatenated
# substrings of str
def printKthCharInConcatSubstring(string: str,
                               K: int) -> str:
 
    n = len(string)
     
    # Calculating suffix array and lcp array
    suffixArr = buildSuffixArray(string, n)
    lcp = kasai(string, suffixArr)
 
    for i in range(len(lcp)):
 
        # Skipping characters common to substring
        # (n - suffixArr[i]) is length of current
        # maximum substring lcp[i] will length of
        # common substring
        charToSkip = (sumOfFirstN(n - suffixArr[i]) -
                                sumOfFirstN(lcp[i]))
 
        # If characters are more than K, that means
        # Kth character belongs to substring
        # corresponding to current lcp[i]
        if (K <= charToSkip):
 
            # Loop from current lcp value to current
            # string length
            for j in range(lcp[i] + 1,
               (n - suffixArr[i]) + 1):
                curSubstringLen = j
 
                # Again reduce K by current substring's
                # length one by one and when it becomes less,
                # print Kth character of current susbtring
                if (K <= curSubstringLen):
                    return string[(suffixArr[i] + K - 1)]
                else:
                    K -= curSubstringLen
                     
            break
 
        else:
            K -= charToSkip
 
# Driver code
if __name__ == "__main__":
 
    string = "banana"
    K = 10
     
    print(printKthCharInConcatSubstring(string, K))
 
# This code is contributed by sanjeev2552


输出:

n