先决条件:
- 特里
- 数据结构的持久性
Trie是一种方便的数据结构,在执行多个字符串查找时通常会发挥作用。在本文中,我们将介绍此数据结构中的持久性概念。持久性只是意味着保留更改。但是显然,保留更改会导致额外的内存消耗,从而影响时间复杂度。
我们的目标是在Trie中应用持久性,并确保其花费的时间不超过标准Trie搜索,即O(length_of_key) 。我们还将分析持久性在Trie的标准空间复杂度上引起的额外空间复杂性。
让我们考虑版本,即对于Trie中的每个更改/插入,我们都会创建一个新版本。
我们将认为我们的初始版本为Version-0。现在,当我们在Trie中进行任何插入操作时,我们将为其创建一个新版本,并以类似的方式跟踪所有版本的记录。
但是每次为每个版本创建整个Trie都会使内存增加一倍,并严重影响空间复杂性。因此,对于许多版本,此想法很容易用完内存。
让我们利用以下事实:对于Trie中的每个新插入,将精确地访问/修改X个(length_of_key)节点。因此,我们的新版本将仅包含这X个新节点,其余的trie节点将与先前版本相同。因此,很明显,对于每个新版本,我们只需要创建这X个新节点,而其余的trie节点可以与先前版本共享。
考虑下图以获得更好的可视化效果:
现在,出现了一个问题:如何跟踪所有版本?
我们只需要跟踪所有版本的第一个根节点,这将用于跟踪不同版本中的所有新创建的节点,因为根节点为我们提供了该特定版本的入口点。为此,我们可以为所有版本维护一个指向trie根节点的指针数组。
Let’s consider the below scenario and see how we can use Persistent Trie to solve it !
Given an array of strings and we need to determine if a string exists in some
range [l, r] in the array. To have an analogy, consider the array to be a
list of words in a dictionary at ith page(i is the index of the array) and
we need to determine whether a given word X exists in the page range [l, r]?
下面是上述问题的实现:
C++
// C++ implementation of the approach
#include
using namespace std;
// Distinct numbers of chars in key
const int sz = 26;
// Persistent Trie node structure
struct PersistentTrie {
// Stores all children nodes, where ith children denotes
// ith alphabetical character
vector children;
// Marks the ending of the key
bool keyEnd = false;
// Constructor 1
PersistentTrie(bool keyEnd = false)
{
this->keyEnd = keyEnd;
}
// Constructor 2
PersistentTrie(vector& children, bool keyEnd = false)
{
this->children = children;
this->keyEnd = keyEnd;
}
// detects existence of key in trie
bool findKey(string& key, int len);
// Inserts key into trie
// returns new node after insertion
PersistentTrie* insert(string& key, int len);
};
// Dummy PersistentTrie node
PersistentTrie* dummy;
// Initialize dummy for easy implementation
void init()
{
dummy = new PersistentTrie(true);
// All children of dummy as dummy
vector children(sz, dummy);
dummy->children = children;
}
// Inserts key into current trie
// returns newly created trie node after insertion
PersistentTrie* PersistentTrie::insert(string& key, int len)
{
// If reached the end of key string
if (len == key.length()) {
// Create new trie node with current trie node
// marked as keyEnd
return new PersistentTrie((*this).children, true);
}
// Fetch current child nodes
vector new_version_PersistentTrie = (*this).children;
// Insert at key[len] child and
// update the new child node
PersistentTrie* tmpNode = new_version_PersistentTrie[key[len] - 'a'];
new_version_PersistentTrie[key[len] - 'a'] = tmpNode->insert(key, len + 1);
// Return a new node with modified key[len] child node
return new PersistentTrie(new_version_PersistentTrie);
}
// Returns the presence of key in current trie
bool PersistentTrie::findKey(string& key, int len)
{
// If reached end of key
if (key.length() == len)
// Return if this is a keyEnd in trie
return this->keyEnd;
// If we cannot find key[len] child in trie
// we say key doesn't exist in the trie
if (this->children[key[len] - 'a'] == dummy)
return false;
// Recursively search the rest of
// key length in children[key] trie
return this->children[key[len] - 'a']->findKey(key, len + 1);
}
// dfs traversal over the current trie
// prints all the keys present in the current trie
void printAllKeysInTrie(PersistentTrie* root, string& s)
{
int flag = 0;
for (int i = 0; i < sz; i++) {
if (root->children[i] != dummy) {
flag = 1;
s.push_back('a' + i);
printAllKeysInTrie(root->children[i], s);
s.pop_back();
}
}
if (flag == 0 and s.length() > 0)
cout << s << endl;
}
// Driver code
int main(int argc, char const* argv[])
{
// Initialize the PersistentTrie
init();
// Input keys
vector keys({ "goku", "gohan", "goten", "gogeta" });
// Cache to store trie entry roots after each insertion
PersistentTrie* root[keys.size()];
// Marking first root as dummy
root[0] = dummy;
// Inserting all keys
for (int i = 1; i <= keys.size(); i++) {
// Caching new root for ith version of trie
root[i] = root[i - 1]->insert(keys[i - 1], 0);
}
int idx = 3;
cout << "All keys in trie after version - " << idx << endl;
string key = "";
printAllKeysInTrie(root[idx], key);
string queryString = "goku";
int l = 2, r = 3;
cout << "range : "
<< "[" << l << ", " << r << "]" << endl;
if (root[r]->findKey(queryString, 0) and !root[l - 1]->findKey(queryString, 0))
cout << queryString << " - exists in above range" << endl;
else
cout << queryString << " - does not exist in above range" << endl;
queryString = "goten";
l = 2, r = 4;
cout << "range : "
<< "[" << l << ", " << r << "]" << endl;
if (root[r]->findKey(queryString, 0) and !root[l - 1]->findKey(queryString, 0))
cout << queryString << " - exists in above range" << endl;
else
cout << queryString << " - does not exist in above range" << endl;
return 0;
}
Java
// Java program for the above approach
import java.io.*;
import java.util.*;
// Persistent Trie node structure
class PersistentTrie
{
// Stores all children nodes, where
// ith children denotes ith
// alphabetical character
PersistentTrie[] children;
// Marks the ending of the key
boolean keyEnd = false;
// Constructor 1
PersistentTrie(boolean keyEnd)
{
this.keyEnd = keyEnd;
}
// Constructor 2
PersistentTrie(PersistentTrie[] children,
boolean keyEnd)
{
this.children = children;
this.keyEnd = keyEnd;
}
// Detects existence of key in trie
boolean findKey(String key, int len,
PersistentTrie dummy)
{
// If reached end of key
if (key.length() == len)
// Return if this is a keyEnd in trie
return this.keyEnd;
// If we cannot find key[len] child in trie
// we say key doesn't exist in the trie
if (this.children[key.charAt(len) - 'a'] == dummy)
return false;
// Recursively search the rest of
// key length in children[key] trie
return this.children[key.charAt(len) - 'a'].findKey(
key, len + 1, dummy);
}
// Inserts key into trie
// returns new node after insertion
PersistentTrie insert(String key, int len)
{
// If reached the end of key string
if (len == key.length())
{
// Create new trie node with current trie node
// marked as keyEnd
return new PersistentTrie(this.children.clone(),
true);
}
// Fetch current child nodes
PersistentTrie[] new_version_PersistentTrie
= this.children.clone();
// Insert at key[len] child and
// update the new child node
PersistentTrie tmpNode
= new_version_PersistentTrie[key.charAt(len)
- 'a'];
new_version_PersistentTrie[key.charAt(len) - 'a']
= tmpNode.insert(key, len + 1);
// Return a new node with modified key[len] child
// node
return new PersistentTrie(
new_version_PersistentTrie, false);
}
}
class GFG{
static final int sz = 26;
// Dummy PersistentTrie node
static PersistentTrie dummy;
// Initialize dummy for easy implementation
static void init()
{
dummy = new PersistentTrie(false);
// All children of dummy as dummy
PersistentTrie[] children = new PersistentTrie[sz];
for(int i = 0; i < sz; i++)
children[i] = dummy;
dummy.children = children;
}
// dfs traversal over the current trie
// prints all the keys present in the current trie
static void printAllKeysInTrie(PersistentTrie root,
String s)
{
int flag = 0;
for(int i = 0; i < sz; i++)
{
if (root.children[i] != dummy)
{
flag = 1;
printAllKeysInTrie(root.children[i],
s + ((char)('a' + i)));
}
if (root.children[i].keyEnd)
System.out.println(s + (char)('a' + i));
}
}
// Driver code
public static void main(String[] args)
{
// Initialize the PersistentTrie
init();
// Input keys
List keys = Arrays.asList(new String[]{
"goku", "gohan", "goten", "gogeta" });
// Cache to store trie entry roots after each
// insertion
PersistentTrie[] root
= new PersistentTrie[keys.size() + 1];
// Marking first root as dummy
root[0] = dummy;
// Inserting all keys
for(int i = 1; i <= keys.size(); i++)
{
// Caching new root for ith version of trie
root[i]
= root[i - 1].insert(keys.get(i - 1), 0);
}
int idx = 3;
System.out.println("All keys in trie " +
"after version - " + idx);
String key = "";
printAllKeysInTrie(root[3], key);
String queryString = "goku";
int l = 2, r = 3;
System.out.println("range : " + "[" + l +
", " + r + "]");
if (root[r].findKey(queryString, 0, dummy) &&
!root[l - 1].findKey(queryString, 0, dummy))
System.out.println(queryString +
" - exists in above range");
else
System.out.println(queryString +
" - does not exist in " +
"above range");
queryString = "goten";
l = 2;
r = 4;
System.out.println("range : " + "[" + l +
", " + r + "]");
if (root[r].findKey(queryString, 0, dummy) &&
!root[l - 1].findKey(queryString, 0, dummy))
System.out.println(queryString +
" - exists in above range");
else
System.out.println(queryString +
" - does not exist in above range");
}
}
// This code is contributed by jithin
All keys in trie after version - 3
gohan
goku
goten
range : [2, 3]
goku - does not exist in above range
range : [2, 4]
goten - exists in above range
时间复杂度:如上所述,插入时我们将访问Trie中所有X (键的长度)个节点。因此,我们将访问X个状态,并且在每个状态下,通过对新创建的trie节点的当前版本喜欢以前版本的sz子代,来完成O(sz)的工作量。因此,插入的时间复杂度变为O(length_of_key * sz) 。但是搜索在整个要搜索的密钥长度上仍然是线性的,因此,就像标准特里树一样,搜索密钥的时间复杂度仍然是O(length_of_key) 。
空间复杂性:显然,数据结构的持久性伴随着空间的交换,我们将在维护不同版本的Trie上消耗更多的内存。现在,让我们可视化最坏的情况–对于插入,我们正在创建O(length_of_key)节点,每个新创建的节点都将占用O(sz)的空间来存储其子级。因此,用于插入上述实现的空间复杂度为O(length_of_key * sz)。