📜  如何压缩字符串 - C 编程语言(1)

📅  最后修改于: 2023-12-03 14:52:11.012000             🧑  作者: Mango

如何压缩字符串 - C 编程语言

在 C 编程语言中,我们可以使用多种方法来压缩字符串,以减少其所占用的空间。以下是一些常用的方法。

方法一:使用 Run-Length Encoding (RLE)

Run-Length Encoding(RLE)是一种常见的压缩算法,在许多不同的应用程序中都得到了广泛的应用。它的基本思想是将连续的相同字符替换为一个字符并计算它们的数量。下面是一个示例程序,展示了如何使用 RLE 压缩字符串。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

char *rle_compress(char *str) {
    int len = strlen(str);
    int i = 0, j = 0;
    char *compressed = (char*)malloc(len * 2 * sizeof(char));

    while(i < len) {
        char c = str[i];
        int count = 1;
        while (i + 1 < len && str[i+1] == c) {
            count++;
            i++;
        }

        if (count == 1) {
            compressed[j++] = c;
        } else {
            compressed[j++] = count + '0';
            compressed[j++] = c;
        }

        i++;
    }
    
    return compressed;
}

int main() {
    char *str = "aaabbbbcccccdddd";
    char *compressed = rle_compress(str);
    printf("Compressed string: %s\n", compressed);
    free(compressed);
    return 0;
}

在上面的示例程序中,我们定义了一个名为 rle_compress 的函数,它接受一个字符串,并返回压缩后的字符串。该函数执行了以下操作:

  1. 遍历字符串,并记录当前字符以及连续相同字符的数量
  2. 如果当前字符只出现了一次,则将其添加到压缩后的字符串中
  3. 如果当前字符出现了多次,则将其出现次数和字符本身都添加到压缩后的字符串中
方法二:使用 Huffman 编码

Huffman 编码是一种基于字符频率的压缩算法,它将出现频率较高的字符用较短的编码表示,而将出现频率较低的字符用较长的编码表示。以下是一个示例程序,展示了如何使用 Huffman 编码压缩字符串。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define MAX_TREE_HT 100

struct MinHeapNode {
    char data;
    unsigned freq;
    struct MinHeapNode *left, *right;
};

struct MinHeap {
    unsigned size;
    unsigned capacity;
    struct MinHeapNode **array;
};

struct MinHeapNode *new_node(char data, unsigned freq) {
    struct MinHeapNode *temp = (struct MinHeapNode*)malloc(sizeof(struct MinHeapNode));
    temp->left = temp->right = NULL;
    temp->data = data;
    temp->freq = freq;
    return temp;
};

struct MinHeap *create_min_heap(unsigned capacity) {
    struct MinHeap *min_heap = (struct MinHeap*)malloc(sizeof(struct MinHeap));
    min_heap->size = 0;
    min_heap->capacity = capacity;
    min_heap->array = (struct MinHeapNode**)malloc(min_heap->capacity * sizeof(struct MinHeapNode*));
    return min_heap;
};

void swap_min_heap_node(struct MinHeapNode** a, struct MinHeapNode** b) {
    struct MinHeapNode *t = *a;
    *a = *b;
    *b = t;
}

void min_heapify(struct MinHeap *min_heap, int idx) {
    int smallest = idx;
    int left = 2 * idx + 1;
    int right = 2 * idx + 2;

    if (left < min_heap->size && min_heap->array[left]->freq < min_heap->array[smallest]->freq) {
        smallest = left;
    }

    if (right < min_heap->size && min_heap->array[right]->freq < min_heap->array[smallest]->freq) {
        smallest = right;
    }

    if (smallest != idx) {
        swap_min_heap_node(&min_heap->array[smallest], &min_heap->array[idx]);
        min_heapify(min_heap, smallest);
    }
}

int is_size_one(struct MinHeap *min_heap) {
    return min_heap->size == 1;
}

struct MinHeapNode *extract_min(struct MinHeap *min_heap) {
    struct MinHeapNode *temp = min_heap->array[0];
    min_heap->array[0] = min_heap->array[min_heap->size - 1];
    --min_heap->size;
    min_heapify(min_heap, 0);
    return temp;
}

void insert_min_heap(struct MinHeap *min_heap, struct MinHeapNode *min_heap_node) {
    ++min_heap->size;
    int i = min_heap->size - 1;

    while (i && min_heap_node->freq < min_heap->array[(i - 1) / 2]->freq) {
        min_heap->array[i] = min_heap->array[(i - 1) / 2];
        i = (i - 1) / 2;
    }

    min_heap->array[i] = min_heap_node;
}

void build_min_heap(struct MinHeap *min_heap) {
    int n = min_heap->size - 1;
    int i;

    for (i = (n - 1) / 2; i >= 0; --i) {
        min_heapify(min_heap, i);
    }
}

void print_arr(int arr[], int n) {
    int i;
    for (i = 0; i < n; ++i) {
        printf("%d", arr[i]);
    }
    printf("\n");
}

int is_leaf(struct MinHeapNode *root) {
    return !(root->left) && !(root->right);
}

struct MinHeap *create_and_build_min_heap(char data[], int freq[], int size) {
    struct MinHeap *min_heap = create_min_heap(size);

    for (int i = 0; i < size; ++i) {
        min_heap->array[i] = new_node(data[i], freq[i]);
    }

    min_heap->size = size;
    build_min_heap(min_heap);

    return min_heap;
}

struct MinHeapNode *build_huffman_tree(char data[], int freq[], int size) {
    struct MinHeapNode *left, *right, *top;
    struct MinHeap *min_heap = create_and_build_min_heap(data, freq, size);

    while (!is_size_one(min_heap)) {
        left = extract_min(min_heap);
        right = extract_min(min_heap);
        top = new_node('$', left->freq + right->freq);
        top->left = left;
        top->right = right;
        insert_min_heap(min_heap, top);
    }

    return extract_min(min_heap);
}

void print_codes(struct MinHeapNode *root, int arr[], int top) {
    if (root->left) {
        arr[top] = 0;
        print_codes(root->left, arr, top + 1);
    }

    if (root->right) {
        arr[top] = 1;
        print_codes(root->right, arr, top + 1);
    }

    if (is_leaf(root)) {
        printf("%c: ", root->data);
        print_arr(arr, top);
    }
}

void huffman_compress(char *str) {
    int len = strlen(str);
    int freq[256] = {0};
    for (int i = 0; i < len; ++i) {
        freq[str[i]]++;
    }

    char data[256];
    int j = 0;
    for (int i = 0; i < 256; ++i) {
        if (freq[i] > 0) {
            data[j++] = i;
        }
    }

    struct MinHeapNode *root = build_huffman_tree(data, freq, j);
    int arr[MAX_TREE_HT], top = 0;
    print_codes(root, arr, top);
}

int main() {
    char *str = "aaabbbbcccccdddd";
    huffman_compress(str);
    return 0;
}

在上面的示例程序中,我们定义了一个名为 huffman_compress 的函数,它接受一个字符串,并使用 Huffman 编码将其压缩。该函数执行了以下操作:

  1. 计算输入字符串中每个字符出现的频率
  2. 根据字符频率构建 Huffman 树
  3. 在 Huffman 树中找到每个字符的编码
  4. 将编码后的结果输出