给定很长的URL列表,找出最后一个唯一的URL。只允许遍历所有URL。
例子:
Input:
https://www.geeksforgeeks.org
http://quiz.geeksforgeeks.org
http://qa.geeksforgeeks.org
https://practice.geeksforgeeks.org
https://ide.geeksforgeeks.org
http://www.contribute.geeksforgeeks.org
http://quiz.geeksforgeeks.org
https://practice.geeksforgeeks.org
https://ide.geeksforgeeks.org
http://quiz.geeksforgeeks.org
http://qa.geeksforgeeks.org
https://practice.geeksforgeeks.org
Output:
http://www.contribute.geeksforgeeks.org
通过将Trie与双链表一起使用,我们可以一次遍历解决此问题(我们可以在O(1)时间插入和删除)。想法是将所有URL都一一插入到Trie中,并检查是否重复。要知道我们以前是否遇到过URL,我们需要将每个URL的最后一个节点标记为叶节点。如果是第一次遇到URL,则将其插入到双向链接列表中,并在该trie的叶子节点中的链接列表中维护指向该节点的指针。如果遇到的URL已经存在于Trie中,并且在链接列表中具有指向该URL的指针,则从链接列表中删除该节点,并将其在Trie中的指针设置为null。处理完所有URL后,链接列表将仅包含不同的URL,并且链接列表开头的节点将是最后一个唯一URL。
// C++ program to print distinct URLs using Trie
// and Doubly Linked List
#include
using namespace std;
// Alphabet size (# of symbols)
const int ALPHABET_SIZE = 256;
// A linked list node
struct DLLNode
{
string data;
DLLNode* next, * prev;
};
// trie node
struct TrieNode
{
TrieNode* children[ALPHABET_SIZE];
// isLeaf is true if the node represents
// end of a word
bool isLeaf;
DLLNode* LLptr;
};
/* Given a reference (pointer to pointer) to the
head of a list and an int, inserts a new node
on the front of the list. */
void push(DLLNode*& head_ref, string new_data)
{
DLLNode* new_node = new DLLNode;
// put in the data
new_node->data = new_data;
// Make next of new node as head and previous
// as NULL
new_node->next = (head_ref);
new_node->prev = NULL;
// change prev of head node to new node
if(head_ref != NULL)
head_ref->prev = new_node;
// move the head to point to the new node
head_ref = new_node;
}
/* Function to delete a node in a Doubly Linked List.
head_ref --> pointer to head node pointer.
del --> pointer to node to be deleted. */
void deleteNode(DLLNode*& head_ref, DLLNode* del)
{
// base case
if (head_ref == NULL || del == NULL)
return;
// If node to be deleted is head node
if (head_ref == del)
head_ref = del->next;
// Change next only if node to be deleted is
// NOT the last node
if (del->next != NULL)
del->next->prev = del->prev;
// Change prev only if node to be deleted is
// NOT the first node
if (del->prev != NULL)
del->prev->next = del->next;
// Finally, free the memory occupied by del
delete(del);
return;
}
// Returns new trie node (initialized to NULLs)
TrieNode* getNewTrieNode(void)
{
TrieNode* pNode = new TrieNode;
if (pNode)
{
pNode->isLeaf = false;
for (int i = 0; i < ALPHABET_SIZE; i++)
pNode->children[i] = NULL;
pNode->LLptr = NULL;
}
return pNode;
}
// If not present, inserts key into trie
// If the key is prefix of trie node, just marks leaf node
void insert(TrieNode* root, string key, DLLNode*& head)
{
int index;
TrieNode* pCrawl = root;
for (int level = 0; level < key.length(); level++)
{
index = int(key[level]);
if (!pCrawl->children[index])
pCrawl->children[index] = getNewTrieNode();
pCrawl = pCrawl->children[index];
}
if (pCrawl->isLeaf)
{
// cout << "Duplicate Found " << key << endl;
// delete from linked list
if (pCrawl->LLptr)
deleteNode(head, pCrawl->LLptr);
pCrawl->LLptr = NULL;
}
else
{
// mark last node as leaf
pCrawl->isLeaf = true;
// insert to linked list
push(head, key);
pCrawl->LLptr = head;
}
}
// Driver function
int main()
{
string urls[] = {
"https://www.geeksforgeeks.org",
"http://www.contribute.geeksforgeeks.org",
"http://quiz.geeksforgeeks.org",
"http://qa.geeksforgeeks.org",
"https://practice.geeksforgeeks.org",
"https://ide.geeksforgeeks.org",
"http://quiz.geeksforgeeks.org",
"https://practice.geeksforgeeks.org",
"https://ide.geeksforgeeks.org",
"http://quiz.geeksforgeeks.org",
"http://qa.geeksforgeeks.org",
"https://practice.geeksforgeeks.org"
};
TrieNode* root = getNewTrieNode();
// Start with the empty list
DLLNode* head = NULL;
int n = sizeof(urls)/sizeof(urls[0]);
// Construct Trie from given URLs
for (int i = 0; i < n; i++)
insert(root, urls[i], head);
// head of linked list will point to last
// distinct URL
cout << head->data << endl;
return 0;
}
输出:
http://www.contribute.geeksforgeeks.org