给定字符串低位字母字符,当以排序形式连接时,在由(给定字符串的)子字符串形成的字符串中找到第K个字符。
例子:
Input : str = “banana”
K = 10
Output : n
All substring in sorted form are,
"a", "an", "ana", "anan", "anana",
"b", "ba", "ban", "bana", "banan",
"banana", "n", "na", "nan", "nana"
Concatenated string = “aananaanana
nanabbabanbanabananbananannanannana”
We can see a 10th character in the
above concatenated string is ‘n’
which is our final answer.
一个简单的解决方案是生成给定字符串的所有子字符串并将其存储在数组中。子字符串生成后,对其进行排序并在休眠之后进行连接。最后在连接的字符串打印第K个字符。
一个有效的解决方案是基于使用后缀数组对字符串的不同子字符串进行计数。同样的方法也用于解决该问题。得到后缀数组和lcp数组后,我们遍历所有lcp值,并为每个这样的值计算要跳过的字符。我们不断从K中减去许多字符,当要跳过的字符大于K时,我们停止并循环对应于当前lcp [i]的子字符串,其中从lcp [i]循环直到最大字符串长度,然后打印第K个字符。
C++
// C++ program to print Kth character
// in sorted concatenated substrings
#include
using namespace std;
// Structure to store information of a suffix
struct suffix
{
int index; // To store original index
int rank[2]; // To store ranks and next
// rank pair
};
// A comparison function used by sort() to compare
// two suffixes. Compares two pairs, returns 1 if
// first pair is smaller
int cmp(struct suffix a, struct suffix b)
{
return (a.rank[0] == b.rank[0])?
(a.rank[1] < b.rank[1] ?1: 0):
(a.rank[0] < b.rank[0] ?1: 0);
}
// This is the main function that takes a string
// 'txt' of size n as an argument, builds and return
// the suffix array for the given string
vector buildSuffixArray(string txt, int n)
{
// A structure to store suffixes and their indexes
struct suffix suffixes[n];
// Store suffixes and their indexes in an array
// of structures. The structure is needed to sort
// the suffixes alphabatically and maintain their
// old indexes while sorting
for (int i = 0; i < n; i++)
{
suffixes[i].index = i;
suffixes[i].rank[0] = txt[i] - 'a';
suffixes[i].rank[1] = ((i+1) < n)?
(txt[i + 1] - 'a'): -1;
}
// Sort the suffixes using the comparison function
// defined above.
sort(suffixes, suffixes+n, cmp);
// At his point, all suffixes are sorted according
// to first 2 characters. Let us sort suffixes
// according to first 4 characters, then first
// 8 and so on
int ind[n]; // This array is needed to get the
// index in suffixes[] from original
// index. This mapping is needed to get
// next suffix.
for (int k = 4; k < 2*n; k = k*2)
{
// Assigning rank and index values to first suffix
int rank = 0;
int prev_rank = suffixes[0].rank[0];
suffixes[0].rank[0] = rank;
ind[suffixes[0].index] = 0;
// Assigning rank to suffixes
for (int i = 1; i < n; i++)
{
// If first rank and next ranks are same as
// that of previous suffix in array, assign
// the same new rank to this suffix
if (suffixes[i].rank[0] == prev_rank &&
suffixes[i].rank[1] == suffixes[i-1].rank[1])
{
prev_rank = suffixes[i].rank[0];
suffixes[i].rank[0] = rank;
}
else // Otherwise increment rank and assign
{
prev_rank = suffixes[i].rank[0];
suffixes[i].rank[0] = ++rank;
}
ind[suffixes[i].index] = i;
}
// Assign next rank to every suffix
for (int i = 0; i < n; i++)
{
int nextindex = suffixes[i].index + k/2;
suffixes[i].rank[1] = (nextindex < n)?
suffixes[ind[nextindex]].rank[0]: -1;
}
// Sort the suffixes according to first k characters
sort(suffixes, suffixes+n, cmp);
}
// Store indexes of all sorted suffixes in the suffix
// array
vectorsuffixArr;
for (int i = 0; i < n; i++)
suffixArr.push_back(suffixes[i].index);
// Return the suffix array
return suffixArr;
}
/* To construct and return LCP */
vector kasai(string txt, vector suffixArr)
{
int n = suffixArr.size();
// To store LCP array
vector lcp(n, 0);
// An auxiliary array to store inverse of suffix array
// elements. For example if suffixArr[0] is 5, the
// invSuff[5] would store 0. This is used to get next
// suffix string from suffix array.
vector invSuff(n, 0);
// Fill values in invSuff[]
for (int i=0; i < n; i++)
invSuff[suffixArr[i]] = i;
// Initialize length of previous LCP
int k = 0;
// Process all suffixes one by one starting from
// first suffix in txt[]
for (int i=0; i0)
k--;
}
// return the constructed lcp array
return lcp;
}
// Utility method to get sum of first N numbers
int sumOfFirstN(int N)
{
return (N * (N + 1)) / 2;
}
// Returns Kth character in sorted concatenated
// substrings of str
char printKthCharInConcatSubstring(string str, int K)
{
int n = str.length();
// calculating suffix array and lcp array
vector suffixArr = buildSuffixArray(str, n);
vector lcp = kasai(str, suffixArr);
for (int i = 0; i < lcp.size(); i++)
{
// skipping characters common to substring
// (n - suffixArr[i]) is length of current
// maximum substring lcp[i] will length of
// common substring
int charToSkip = sumOfFirstN(n - suffixArr[i]) -
sumOfFirstN(lcp[i]);
/* if characters are more than K, that means
Kth character belongs to substring
corresponding to current lcp[i]*/
if (K <= charToSkip)
{
// loop from current lcp value to current
// string length
for (int j = lcp[i] + 1; j <= (n-suffixArr[i]); j++)
{
int curSubstringLen = j;
/* Again reduce K by current substring's
length one by one and when it becomes less,
print Kth character of current susbtring */
if (K <= curSubstringLen)
return str[(suffixArr[i] + K - 1)];
else
K -= curSubstringLen;
}
break;
}
else
K -= charToSkip;
}
}
// Driver code to test above methods
int main()
{
string str = "banana";
int K = 10;
cout << printKthCharInConcatSubstring(str, K);
return 0;
}
Python3
# Python3 program to print Kth character
# in sorted concatenated substrings
# Structure to store information of a suffix
class suffix:
def __init__(self):
self.index = 0
# To store original index
self.rank = [0] * 2
# To store ranks and next
# rank pair
# This is the main function that takes a string
# 'txt' of size n as an argument, builds and return
# the suffix array for the given string
def buildSuffixArray(txt: str, n: int) -> list:
# A structure to store suffixes
# and their indexes
suffixes = [0] * n
for i in range(n):
suffixes[i] = suffix()
# Store suffixes and their indexes in an array
# of structures. The structure is needed to sort
# the suffixes alphabatically and maintain their
# old indexes while sorting
for i in range(n):
suffixes[i].index = i
suffixes[i].rank[0] = ord(txt[i]) - ord('a')
suffixes[i].rank[1] = (ord(txt[i + 1]) -
ord('a')) if ((i + 1) < n) else -1
# Sort the suffixes using the comparison function
# defined above.
suffixes.sort(key = lambda a: a.rank)
# At his point, all suffixes are sorted according
# to first 2 characters. Let us sort suffixes
# according to first 4 characters, then first
# 8 and so on
ind = [0] * n
# This array is needed to get the
# index in suffixes[] from original
# index. This mapping is needed to get
# next suffix.
k = 4
while k < 2 * n:
k *= 2
# for k in range(4, 2 * n, k * 2):
# Assigning rank and index values
# to first suffix
rank = 0
prev_rank = suffixes[0].rank[0]
suffixes[0].rank[0] = rank
ind[suffixes[0].index] = 0
# Assigning rank to suffixes
for i in range(1, n):
# If first rank and next ranks are same as
# that of previous suffix in array, assign
# the same new rank to this suffix
if (suffixes[i].rank[0] == prev_rank and
suffixes[i].rank[1] == suffixes[i - 1].rank[1]):
prev_rank = suffixes[i].rank[0]
suffixes[i].rank[0] = rank
# Otherwise increment rank and assign
else:
prev_rank = suffixes[i].rank[0]
rank += 1
suffixes[i].rank[0] = rank
ind[suffixes[i].index] = i
# Assign next rank to every suffix
for i in range(n):
nextindex = suffixes[i].index + k // 2
suffixes[i].rank[1] = suffixes[ind[nextindex]].rank[0] if (
nextindex < n) else -1
# Sort the suffixes according to first k characters
suffixes.sort(key = lambda a : a.rank)
# Store indexes of all sorted suffixes
# in the suffix array
suffixArr = []
for i in range(n):
suffixArr.append(suffixes[i].index)
# Return the suffix array
return suffixArr
# To construct and return LCP */
def kasai(txt: str, suffixArr: list) -> list:
n = len(suffixArr)
# To store LCP array
lcp = [0] * n
# An auxiliary array to store inverse of
# suffix array elements. For example if
# suffixArr[0] is 5, the invSuff[5] would
# store 0. This is used to get next
# suffix string from suffix array.
invSuff = [0] * n
# Fill values in invSuff[]
for i in range(n):
invSuff[suffixArr[i]] = i
# Initialize length of previous LCP
k = 0
# Process all suffixes one by one
# starting from first suffix in txt[]
for i in range(n):
# If the current suffix is at n-1, then
# we don’t have next substring to
# consider. So lcp is not defined for
# this substring, we put zero.
if (invSuff[i] == n - 1):
k = 0
continue
# j contains index of the next substring to
# be considered to compare with the present
# substring, i.e., next string in suffix array
j = suffixArr[invSuff[i] + 1]
# Directly start matching from k'th index as
# at-least k-1 characters will match
while (i + k < n and j + k < n and
txt[i + k] == txt[j + k]):
k += 1
lcp[invSuff[i]] = k
# lcp for the present suffix.
# Deleting the starting character
# from the string.
if (k > 0):
k -= 1
# Return the constructed lcp array
return lcp
# Utility method to get sum of first N numbers
def sumOfFirstN(N: int) -> int:
return (N * (N + 1)) // 2
# Returns Kth character in sorted concatenated
# substrings of str
def printKthCharInConcatSubstring(string: str,
K: int) -> str:
n = len(string)
# Calculating suffix array and lcp array
suffixArr = buildSuffixArray(string, n)
lcp = kasai(string, suffixArr)
for i in range(len(lcp)):
# Skipping characters common to substring
# (n - suffixArr[i]) is length of current
# maximum substring lcp[i] will length of
# common substring
charToSkip = (sumOfFirstN(n - suffixArr[i]) -
sumOfFirstN(lcp[i]))
# If characters are more than K, that means
# Kth character belongs to substring
# corresponding to current lcp[i]
if (K <= charToSkip):
# Loop from current lcp value to current
# string length
for j in range(lcp[i] + 1,
(n - suffixArr[i]) + 1):
curSubstringLen = j
# Again reduce K by current substring's
# length one by one and when it becomes less,
# print Kth character of current susbtring
if (K <= curSubstringLen):
return string[(suffixArr[i] + K - 1)]
else:
K -= curSubstringLen
break
else:
K -= charToSkip
# Driver code
if __name__ == "__main__":
string = "banana"
K = 10
print(printKthCharInConcatSubstring(string, K))
# This code is contributed by sanjeev2552
输出:
n