我们建议阅读以下文章作为前提条件。
贪婪算法|套装3(霍夫曼编码)
以上文章中讨论的算法的时间复杂度为O(nLogn)。如果我们知道给定的数组已排序(按频率的非降序排列),则可以在O(n)的时间内生成霍夫曼代码。以下是用于排序输入的O(n)算法。
1.创建两个空队列。
2.为每个唯一字符创建一个叶节点,然后以不降序的频率将其排队到第一个队列中。最初,第二个队列为空。
3.通过检查两个队列的前端,以最小的频率使两个节点出队。重复两次以下步骤
1.如果第二个队列为空,则从第一个队列中出队。
2.如果第一个队列为空,则从第二个队列出队。
3.否则,比较两个队列的前端并出队最小队列。
4.创建一个频率等于两个节点频率之和的新内部节点。使第一个出队节点为左子节点,第二个出队节点为右子节点。将此节点排队到第二个队列。
5.当队列中有多个节点时,请重复步骤3和4。其余节点是根节点,树已完成。
C++
// C++ Program for Efficient Huffman Coding for Sorted input
#include
using namespace std;
// This constant can be avoided by explicitly
// calculating height of Huffman Tree
#define MAX_TREE_HT 100
// A node of huffman tree
class QueueNode {
public:
char data;
unsigned freq;
QueueNode *left, *right;
};
// Structure for Queue: collection
// of Huffman Tree nodes (or QueueNodes)
class Queue {
public:
int front, rear;
int capacity;
QueueNode** array;
};
// A utility function to create a new Queuenode
QueueNode* newNode(char data, unsigned freq)
{
QueueNode* temp = new QueueNode[(sizeof(QueueNode))];
temp->left = temp->right = NULL;
temp->data = data;
temp->freq = freq;
return temp;
}
// A utility function to create a Queue of given capacity
Queue* createQueue(int capacity)
{
Queue* queue = new Queue[(sizeof(Queue))];
queue->front = queue->rear = -1;
queue->capacity = capacity;
queue->array = new QueueNode*[(queue->capacity
* sizeof(QueueNode*))];
return queue;
}
// A utility function to check if size of given queue is 1
int isSizeOne(Queue* queue)
{
return queue->front == queue->rear
&& queue->front != -1;
}
// A utility function to check if given queue is empty
int isEmpty(Queue* queue) { return queue->front == -1; }
// A utility function to check if given queue is full
int isFull(Queue* queue)
{
return queue->rear == queue->capacity - 1;
}
// A utility function to add an item to queue
void enQueue(Queue* queue, QueueNode* item)
{
if (isFull(queue))
return;
queue->array[++queue->rear] = item;
if (queue->front == -1)
++queue->front;
}
// A utility function to remove an item from queue
QueueNode* deQueue(Queue* queue)
{
if (isEmpty(queue))
return NULL;
QueueNode* temp = queue->array[queue->front];
if (queue->front
== queue
->rear) // If there is only one item in queue
queue->front = queue->rear = -1;
else
++queue->front;
return temp;
}
// A utility function to get from of queue
QueueNode* getFront(Queue* queue)
{
if (isEmpty(queue))
return NULL;
return queue->array[queue->front];
}
/* A function to get minimum item from two queues */
QueueNode* findMin(Queue* firstQueue, Queue* secondQueue)
{
// Step 3.a: If first queue is empty, dequeue from
// second queue
if (isEmpty(firstQueue))
return deQueue(secondQueue);
// Step 3.b: If second queue is empty, dequeue from
// first queue
if (isEmpty(secondQueue))
return deQueue(firstQueue);
// Step 3.c: Else, compare the front of two queues and
// dequeue minimum
if (getFront(firstQueue)->freq
< getFront(secondQueue)->freq)
return deQueue(firstQueue);
return deQueue(secondQueue);
}
// Utility function to check if this node is leaf
int isLeaf(QueueNode* root)
{
return !(root->left) && !(root->right);
}
// A utility function to print an array of size n
void printArr(int arr[], int n)
{
int i;
for (i = 0; i < n; ++i)
cout << arr[i];
cout << endl;
}
// The main function that builds Huffman tree
QueueNode* buildHuffmanTree(char data[], int freq[],
int size)
{
QueueNode *left, *right, *top;
// Step 1: Create two empty queues
Queue* firstQueue = createQueue(size);
Queue* secondQueue = createQueue(size);
// Step 2:Create a leaf node for each unique character
// and Enqueue it to the first queue in non-decreasing
// order of frequency. Initially second queue is empty
for (int i = 0; i < size; ++i)
enQueue(firstQueue, newNode(data[i], freq[i]));
// Run while Queues contain more than one node. Finally,
// first queue will be empty and second queue will
// contain only one node
while (
!(isEmpty(firstQueue) && isSizeOne(secondQueue))) {
// Step 3: Dequeue two nodes with the minimum
// frequency by examining the front of both queues
left = findMin(firstQueue, secondQueue);
right = findMin(firstQueue, secondQueue);
// Step 4: Create a new internal node with frequency
// equal to the sum of the two nodes frequencies.
// Enqueue this node to second queue.
top = newNode('$', left->freq + right->freq);
top->left = left;
top->right = right;
enQueue(secondQueue, top);
}
return deQueue(secondQueue);
}
// Prints huffman codes from the root of Huffman Tree. It
// uses arr[] to store codes
void printCodes(QueueNode* root, int arr[], int top)
{
// Assign 0 to left edge and recur
if (root->left) {
arr[top] = 0;
printCodes(root->left, arr, top + 1);
}
// Assign 1 to right edge and recur
if (root->right) {
arr[top] = 1;
printCodes(root->right, arr, top + 1);
}
// If this is a leaf node, then it contains one of the
// input characters, print the character and its code
// from arr[]
if (isLeaf(root)) {
cout << root->data << ": ";
printArr(arr, top);
}
}
// The main function that builds a Huffman Tree and print
// codes by traversing the built Huffman Tree
void HuffmanCodes(char data[], int freq[], int size)
{
// Construct Huffman Tree
QueueNode* root = buildHuffmanTree(data, freq, size);
// Print Huffman codes using the Huffman tree built
// above
int arr[MAX_TREE_HT], top = 0;
printCodes(root, arr, top);
}
// Driver code
int main()
{
char arr[] = { 'a', 'b', 'c', 'd', 'e', 'f' };
int freq[] = { 5, 9, 12, 13, 16, 45 };
int size = sizeof(arr) / sizeof(arr[0]);
HuffmanCodes(arr, freq, size);
return 0;
}
// This code is contributed by rathbhupendra
C
// C Program for Efficient Huffman Coding for Sorted input
#include
#include
// This constant can be avoided by explicitly calculating
// height of Huffman Tree
#define MAX_TREE_HT 100
// A node of huffman tree
struct QueueNode {
char data;
unsigned freq;
struct QueueNode *left, *right;
};
// Structure for Queue: collection of Huffman Tree nodes (or
// QueueNodes)
struct Queue {
int front, rear;
int capacity;
struct QueueNode** array;
};
// A utility function to create a new Queuenode
struct QueueNode* newNode(char data, unsigned freq)
{
struct QueueNode* temp = (struct QueueNode*)malloc(
sizeof(struct QueueNode));
temp->left = temp->right = NULL;
temp->data = data;
temp->freq = freq;
return temp;
}
// A utility function to create a Queue of given capacity
struct Queue* createQueue(int capacity)
{
struct Queue* queue
= (struct Queue*)malloc(sizeof(struct Queue));
queue->front = queue->rear = -1;
queue->capacity = capacity;
queue->array = (struct QueueNode**)malloc(
queue->capacity * sizeof(struct QueueNode*));
return queue;
}
// A utility function to check if size of given queue is 1
int isSizeOne(struct Queue* queue)
{
return queue->front == queue->rear
&& queue->front != -1;
}
// A utility function to check if given queue is empty
int isEmpty(struct Queue* queue)
{
return queue->front == -1;
}
// A utility function to check if given queue is full
int isFull(struct Queue* queue)
{
return queue->rear == queue->capacity - 1;
}
// A utility function to add an item to queue
void enQueue(struct Queue* queue, struct QueueNode* item)
{
if (isFull(queue))
return;
queue->array[++queue->rear] = item;
if (queue->front == -1)
++queue->front;
}
// A utility function to remove an item from queue
struct QueueNode* deQueue(struct Queue* queue)
{
if (isEmpty(queue))
return NULL;
struct QueueNode* temp = queue->array[queue->front];
if (queue->front
== queue
->rear) // If there is only one item in queue
queue->front = queue->rear = -1;
else
++queue->front;
return temp;
}
// A utility function to get from of queue
struct QueueNode* getFront(struct Queue* queue)
{
if (isEmpty(queue))
return NULL;
return queue->array[queue->front];
}
/* A function to get minimum item from two queues */
struct QueueNode* findMin(struct Queue* firstQueue,
struct Queue* secondQueue)
{
// Step 3.a: If first queue is empty, dequeue from
// second queue
if (isEmpty(firstQueue))
return deQueue(secondQueue);
// Step 3.b: If second queue is empty, dequeue from
// first queue
if (isEmpty(secondQueue))
return deQueue(firstQueue);
// Step 3.c: Else, compare the front of two queues and
// dequeue minimum
if (getFront(firstQueue)->freq
< getFront(secondQueue)->freq)
return deQueue(firstQueue);
return deQueue(secondQueue);
}
// Utility function to check if this node is leaf
int isLeaf(struct QueueNode* root)
{
return !(root->left) && !(root->right);
}
// A utility function to print an array of size n
void printArr(int arr[], int n)
{
int i;
for (i = 0; i < n; ++i)
printf("%d", arr[i]);
printf("\n");
}
// The main function that builds Huffman tree
struct QueueNode* buildHuffmanTree(char data[], int freq[],
int size)
{
struct QueueNode *left, *right, *top;
// Step 1: Create two empty queues
struct Queue* firstQueue = createQueue(size);
struct Queue* secondQueue = createQueue(size);
// Step 2:Create a leaf node for each unique character
// and Enqueue it to the first queue in non-decreasing
// order of frequency. Initially second queue is empty
for (int i = 0; i < size; ++i)
enQueue(firstQueue, newNode(data[i], freq[i]));
// Run while Queues contain more than one node. Finally,
// first queue will be empty and second queue will
// contain only one node
while (
!(isEmpty(firstQueue) && isSizeOne(secondQueue))) {
// Step 3: Dequeue two nodes with the minimum
// frequency by examining the front of both queues
left = findMin(firstQueue, secondQueue);
right = findMin(firstQueue, secondQueue);
// Step 4: Create a new internal node with frequency
// equal to the sum of the two nodes frequencies.
// Enqueue this node to second queue.
top = newNode('$', left->freq + right->freq);
top->left = left;
top->right = right;
enQueue(secondQueue, top);
}
return deQueue(secondQueue);
}
// Prints huffman codes from the root of Huffman Tree. It
// uses arr[] to store codes
void printCodes(struct QueueNode* root, int arr[], int top)
{
// Assign 0 to left edge and recur
if (root->left) {
arr[top] = 0;
printCodes(root->left, arr, top + 1);
}
// Assign 1 to right edge and recur
if (root->right) {
arr[top] = 1;
printCodes(root->right, arr, top + 1);
}
// If this is a leaf node, then it contains one of the
// input characters, print the character and its code
// from arr[]
if (isLeaf(root)) {
printf("%c: ", root->data);
printArr(arr, top);
}
}
// The main function that builds a Huffman Tree and print
// codes by traversing the built Huffman Tree
void HuffmanCodes(char data[], int freq[], int size)
{
// Construct Huffman Tree
struct QueueNode* root
= buildHuffmanTree(data, freq, size);
// Print Huffman codes using the Huffman tree built
// above
int arr[MAX_TREE_HT], top = 0;
printCodes(root, arr, top);
}
// Driver program to test above functions
int main()
{
char arr[] = { 'a', 'b', 'c', 'd', 'e', 'f' };
int freq[] = { 5, 9, 12, 13, 16, 45 };
int size = sizeof(arr) / sizeof(arr[0]);
HuffmanCodes(arr, freq, size);
return 0;
}
输出:
f: 0
c: 100
d: 101
a: 1100
b: 1101
e: 111
时间复杂度: O(n)
如果输入未排序,则需要先对其进行排序,然后才能通过上述算法对其进行处理。可以使用堆排序或合并排序来完成排序,这两种都在Theta(nlogn)中运行。因此,对于未排序的输入,总的时间复杂度变为O(nlogn)。