📅  最后修改于: 2023-12-03 14:52:11.012000             🧑  作者: Mango
在 C 编程语言中,我们可以使用多种方法来压缩字符串,以减少其所占用的空间。以下是一些常用的方法。
Run-Length Encoding(RLE)是一种常见的压缩算法,在许多不同的应用程序中都得到了广泛的应用。它的基本思想是将连续的相同字符替换为一个字符并计算它们的数量。下面是一个示例程序,展示了如何使用 RLE 压缩字符串。
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char *rle_compress(char *str) {
int len = strlen(str);
int i = 0, j = 0;
char *compressed = (char*)malloc(len * 2 * sizeof(char));
while(i < len) {
char c = str[i];
int count = 1;
while (i + 1 < len && str[i+1] == c) {
count++;
i++;
}
if (count == 1) {
compressed[j++] = c;
} else {
compressed[j++] = count + '0';
compressed[j++] = c;
}
i++;
}
return compressed;
}
int main() {
char *str = "aaabbbbcccccdddd";
char *compressed = rle_compress(str);
printf("Compressed string: %s\n", compressed);
free(compressed);
return 0;
}
在上面的示例程序中,我们定义了一个名为 rle_compress
的函数,它接受一个字符串,并返回压缩后的字符串。该函数执行了以下操作:
Huffman 编码是一种基于字符频率的压缩算法,它将出现频率较高的字符用较短的编码表示,而将出现频率较低的字符用较长的编码表示。以下是一个示例程序,展示了如何使用 Huffman 编码压缩字符串。
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_TREE_HT 100
struct MinHeapNode {
char data;
unsigned freq;
struct MinHeapNode *left, *right;
};
struct MinHeap {
unsigned size;
unsigned capacity;
struct MinHeapNode **array;
};
struct MinHeapNode *new_node(char data, unsigned freq) {
struct MinHeapNode *temp = (struct MinHeapNode*)malloc(sizeof(struct MinHeapNode));
temp->left = temp->right = NULL;
temp->data = data;
temp->freq = freq;
return temp;
};
struct MinHeap *create_min_heap(unsigned capacity) {
struct MinHeap *min_heap = (struct MinHeap*)malloc(sizeof(struct MinHeap));
min_heap->size = 0;
min_heap->capacity = capacity;
min_heap->array = (struct MinHeapNode**)malloc(min_heap->capacity * sizeof(struct MinHeapNode*));
return min_heap;
};
void swap_min_heap_node(struct MinHeapNode** a, struct MinHeapNode** b) {
struct MinHeapNode *t = *a;
*a = *b;
*b = t;
}
void min_heapify(struct MinHeap *min_heap, int idx) {
int smallest = idx;
int left = 2 * idx + 1;
int right = 2 * idx + 2;
if (left < min_heap->size && min_heap->array[left]->freq < min_heap->array[smallest]->freq) {
smallest = left;
}
if (right < min_heap->size && min_heap->array[right]->freq < min_heap->array[smallest]->freq) {
smallest = right;
}
if (smallest != idx) {
swap_min_heap_node(&min_heap->array[smallest], &min_heap->array[idx]);
min_heapify(min_heap, smallest);
}
}
int is_size_one(struct MinHeap *min_heap) {
return min_heap->size == 1;
}
struct MinHeapNode *extract_min(struct MinHeap *min_heap) {
struct MinHeapNode *temp = min_heap->array[0];
min_heap->array[0] = min_heap->array[min_heap->size - 1];
--min_heap->size;
min_heapify(min_heap, 0);
return temp;
}
void insert_min_heap(struct MinHeap *min_heap, struct MinHeapNode *min_heap_node) {
++min_heap->size;
int i = min_heap->size - 1;
while (i && min_heap_node->freq < min_heap->array[(i - 1) / 2]->freq) {
min_heap->array[i] = min_heap->array[(i - 1) / 2];
i = (i - 1) / 2;
}
min_heap->array[i] = min_heap_node;
}
void build_min_heap(struct MinHeap *min_heap) {
int n = min_heap->size - 1;
int i;
for (i = (n - 1) / 2; i >= 0; --i) {
min_heapify(min_heap, i);
}
}
void print_arr(int arr[], int n) {
int i;
for (i = 0; i < n; ++i) {
printf("%d", arr[i]);
}
printf("\n");
}
int is_leaf(struct MinHeapNode *root) {
return !(root->left) && !(root->right);
}
struct MinHeap *create_and_build_min_heap(char data[], int freq[], int size) {
struct MinHeap *min_heap = create_min_heap(size);
for (int i = 0; i < size; ++i) {
min_heap->array[i] = new_node(data[i], freq[i]);
}
min_heap->size = size;
build_min_heap(min_heap);
return min_heap;
}
struct MinHeapNode *build_huffman_tree(char data[], int freq[], int size) {
struct MinHeapNode *left, *right, *top;
struct MinHeap *min_heap = create_and_build_min_heap(data, freq, size);
while (!is_size_one(min_heap)) {
left = extract_min(min_heap);
right = extract_min(min_heap);
top = new_node('$', left->freq + right->freq);
top->left = left;
top->right = right;
insert_min_heap(min_heap, top);
}
return extract_min(min_heap);
}
void print_codes(struct MinHeapNode *root, int arr[], int top) {
if (root->left) {
arr[top] = 0;
print_codes(root->left, arr, top + 1);
}
if (root->right) {
arr[top] = 1;
print_codes(root->right, arr, top + 1);
}
if (is_leaf(root)) {
printf("%c: ", root->data);
print_arr(arr, top);
}
}
void huffman_compress(char *str) {
int len = strlen(str);
int freq[256] = {0};
for (int i = 0; i < len; ++i) {
freq[str[i]]++;
}
char data[256];
int j = 0;
for (int i = 0; i < 256; ++i) {
if (freq[i] > 0) {
data[j++] = i;
}
}
struct MinHeapNode *root = build_huffman_tree(data, freq, j);
int arr[MAX_TREE_HT], top = 0;
print_codes(root, arr, top);
}
int main() {
char *str = "aaabbbbcccccdddd";
huffman_compress(str);
return 0;
}
在上面的示例程序中,我们定义了一个名为 huffman_compress
的函数,它接受一个字符串,并使用 Huffman 编码将其压缩。该函数执行了以下操作: