给定 N 个数组的最长公共子数组的长度
给定一个包含N个数组的二维数组array[][] ,任务是在N个数组中找到最长的公共子数组(LCS)。
例子:
Input: N = 3,
array[][] = { { 0, 1, 2, 3, 4 },
{ 2, 3, 4 },
{ 4, 0, 1, 2, 3 } }
Output: 2
Explanation: The longest common subpath is {2, 3}.
Input: N = 2,
array[][] = {{0, 1, 2, 3, 4},
{4, 3, 2, 1, 0}}
Output: 1
Explanation: The possible longest common subpaths are [0], [1], [2], [3], and [4]. All have a length of 1.
方法:很明显LCS的长度可以二分查找。也就是说,如果有一个长度为L的公共子数组,那么总会有一个长度小于L的公共子数组。因此,二分查找框架如下:
lower = 0, upper = maxlength + 1; // LCS in [lower, upper).
while (lower + 1 < upper) {
middle = (lower + upper) / 2;
if (there is some common substring with length middle) {
lower = middle;
} else{
upper = middle;
}
}
LCS = lower;
所以,这里的重点是检查是否有一些常见的长度为中间的子数组。一种常用的方法是采用散列,即Rabin Karp Hashing。
Hash(S[0..n-1]) = (S[n-1] * MAGIC^0 + S[n-2] * MAGIC^1 + .. + S[n-1-i] * MAGIC^i + … ) % MOD
这里最方便的一点是Hash(S[0…i])可用于在O(1)时间内计算Hash(S[l…r]) ,准备时间为O(N) 。那是,
Hash(S[l..r]) = (Hash(S[0..r]) – Hash(S[0..l-1]) * MAGIC^(r-l+1)) % MOD
因此,可以从两个给定的数组中找到长度为中间的子数组的所有哈希值,然后检查是否有重叠。这个过程可以通过 O(|S|) 中的哈希表或 O(|S|log|S|) 中的 Set(平衡二叉搜索树)来完成。因此,Binary Search + Hash 可以在 O(|S| log|S|) 时间内解决这个问题。请按照以下步骤解决此问题:
- 将变量min_len初始化为可能的最大长度,即INT_MAX 。
- 使用变量i遍历范围[0, N)并执行以下任务:
- 将min_len的值设置为min_len或array[i].size() 的最小值。
- 初始化变量start为0 , end为min_len , mid为0以对长度执行二进制搜索。
- 遍历 while 循环,直到start小于等于end并执行以下步骤:
- 将mid的值设置为start和end 的平均值。
- 调用函数check(array, mid)来检查长度mid是否可以作为答案或不使用 Rabin-karp 散列。
- 如果函数返回true,则将start的值设置为mid+1 ,否则将end设置为mid-1。
- 执行上述步骤后,打印end的值作为答案。
下面是上述方法的实现
C++
// C++ program for the above approach
#include
using namespace std;
const long long p = 1299827;
const long long mod = 1e11 + 7;
long long M;
// Function to implement rabin - carp
// hashing to check whether the given length
// is possible or not
bool check(vector >& array, int len)
{
if (len == 0)
return true;
map freq;
for (int i = 0; i < M; i++) {
long long curr_hash = 0, pow = 1;
set found_hashes;
for (int j = 0; j < len; j++) {
curr_hash = (curr_hash * p) % mod;
curr_hash += array[i][j];
if (j != len - 1)
pow = (pow * p) % mod;
}
found_hashes.insert(curr_hash);
for (int j = len; j < array[i].size(); j++) {
curr_hash += mod;
curr_hash -= (array[i][j - len] * pow) % mod;
curr_hash %= mod;
curr_hash = curr_hash * p;
curr_hash %= mod;
curr_hash += array[i][j];
curr_hash %= mod;
found_hashes.insert(curr_hash);
}
while (found_hashes.size()) {
long long h = *(found_hashes.begin());
found_hashes.erase(found_hashes.begin());
freq[h]++;
if (freq[h] == M)
return true;
}
}
return false;
}
// Function to find the longest common sub-array
// from the given N arrays
int longestCommonSubpath(long long N,
vector >& array)
{
M = N;
// Find the maximum length possible
int minlen = INT_MAX;
for (int i = 0; i < array.size(); i++) {
minlen = min(minlen, (int)array[i].size());
}
// Binary search on the length
int start = 0, end = minlen, mid = 0;
while (start <= end) {
int mid = (start + end) / 2;
// Function Call to check whether
// it is possible or not
if (check(array, mid)) {
start = mid + 1;
}
else {
end = mid - 1;
}
}
return end;
}
// Driver Code
int main()
{
vector > arr{ { 0, 1, 2, 3, 4 },
{ 2, 3, 4 },
{ 4, 0, 1, 2, 3 } };
long long N = arr.size();
cout << longestCommonSubpath(N, arr);
return 0;
}
Java
// Java program for the above approach
import java.util.HashMap;
import java.util.HashSet;
class GFG {
static long p = 1299827;
static long mod = (long) 1E11 + 7;
static long M;
// Function to implement rabin - carp
// hashing to check whether the given length
// is possible or not
static boolean check(int[][] array, int len) {
if (len == 0)
return true;
HashMap freq = new HashMap();
for (int i = 0; i < M; i++) {
long curr_hash = 0, pow = 1;
HashSet found_hashes = new HashSet();
for (int j = 0; j < len; j++) {
curr_hash = (curr_hash * p) % mod;
curr_hash += array[i][j];
if (j != len - 1)
pow = (pow * p) % mod;
}
found_hashes.add(curr_hash);
for (int j = len; j < array[i].length; j++) {
curr_hash += mod;
curr_hash -= (array[i][j - len] * pow) % mod;
curr_hash %= mod;
curr_hash = curr_hash * p;
curr_hash %= mod;
curr_hash += array[i][j];
curr_hash %= mod;
found_hashes.add(curr_hash);
}
while (found_hashes.size() > 0) {
long h = found_hashes.iterator().next();
found_hashes.remove(h);
if (freq.containsKey(h)) {
freq.put(h, freq.get(h) + 1);
} else {
freq.put(h, 1);
}
if (freq.get(h) == M)
return true;
}
}
return false;
}
// Function to find the longest common sub-array
// from the given N arrays
public static int longestCommonSubpath(long N, int[][] array) {
M = N;
// Find the maximum length possible
int minlen = Integer.MAX_VALUE;
for (int i = 0; i < array.length; i++) {
minlen = Math.min(minlen, (int) array[i].length);
}
// Binary search on the length
int start = 0, end = minlen, mid = 0;
while (start <= end) {
mid = (start + end) / 2;
// Function Call to check whether
// it is possible or not
if (check(array, mid)) {
start = mid + 1;
} else {
end = mid - 1;
}
}
return end;
}
// Driver Code
public static void main(String args[]) {
int[][] arr = { { 0, 1, 2, 3, 4 }, { 2, 3, 4 }, { 4, 0, 1, 2, 3 } };
long N = arr.length;
System.out.println(longestCommonSubpath(N, arr));
}
}
// This code is contributed by gfgking.
Python3
# Python Program to implement
# the above approach
p = 1299827
mod = 1e11 + 7
M = None
# Function to implement rabin - carp
# hashing to check whether the given length
# is possible or not
def check(array, _len, M):
if (_len == 0):
return True
freq = {}
for i in range(M):
curr_hash = 0
pow = 1
found_hashes = set()
for j in range(_len):
curr_hash = (curr_hash * p) % mod
curr_hash = curr_hash + array[i][j]
if (j != _len - 1):
pow = (pow * p) % mod
found_hashes.add(curr_hash)
for j in range(_len, len(array[i])):
curr_hash = curr_hash + mod
curr_hash = curr_hash - (array[i][j - _len] * pow) % mod
curr_hash = curr_hash % mod
curr_hash = curr_hash * p
curr_hash = curr_hash % mod
curr_hash = curr_hash + array[i][j]
curr_hash = curr_hash % mod
found_hashes.add(curr_hash)
while (len(found_hashes) != 0):
it = list(found_hashes)
# get first entry:
h = it[0]
found_hashes.remove(h)
if (h not in freq):
freq[h] = 1
else:
freq[h] += 1
if (h in freq and freq[h] == M):
return True
return False
# Function to find the longest common sub-array
# from the given N arrays
def longestCommonSubpath(N, array):
M = N
# Find the maximum length possible
minlen = 10 ** 9
for i in range(len(array)):
minlen = min(minlen, len(array[i]))
# Binary search on the length
start = 0
end = minlen
mid = 0
while (start <= end):
mid = (start + end) // 2
# Function Call to check whether
# it is possible or not
if (check(array, mid, M)):
start = mid + 1
else:
end = mid - 1
return end
# Driver Code
arr = [[0, 1, 2, 3, 4], [2, 3, 4], [4, 0, 1, 2, 3]]
N = len(arr)
print(longestCommonSubpath(N, arr))
# This code is contributed by Saurabh Jaiswal
Javascript
2
时间复杂度: O(N*log(N))
辅助空间: O(N)