305 lines
7.9 KiB
C
305 lines
7.9 KiB
C
|
#include <assert.h>
|
||
|
#include <stdio.h>
|
||
|
#include <stdlib.h>
|
||
|
#include <string.h>
|
||
|
#include "compressor.h"
|
||
|
|
||
|
#define BUFSIZE 1024
|
||
|
|
||
|
// Huffman Tree Node
|
||
|
struct MinHeapNode {
|
||
|
char data;
|
||
|
unsigned freq;
|
||
|
struct MinHeapNode *left, *right;
|
||
|
};
|
||
|
|
||
|
// MinHeap
|
||
|
struct MinHeap {
|
||
|
unsigned size;
|
||
|
unsigned capacity;
|
||
|
struct MinHeapNode** array;
|
||
|
};
|
||
|
|
||
|
// Create a new node
|
||
|
struct MinHeapNode* newNode(char data, unsigned freq) {
|
||
|
struct MinHeapNode* temp = (struct MinHeapNode*)malloc(sizeof(struct MinHeapNode));
|
||
|
temp->data = data;
|
||
|
temp->freq = freq;
|
||
|
temp->left = temp->right = NULL;
|
||
|
return temp;
|
||
|
}
|
||
|
|
||
|
// Create a MinHeap
|
||
|
struct MinHeap* createMinHeap(unsigned capacity) {
|
||
|
struct MinHeap* minHeap = (struct MinHeap*)malloc(sizeof(struct MinHeap));
|
||
|
minHeap->size = 0;
|
||
|
minHeap->capacity = capacity;
|
||
|
minHeap->array = (struct MinHeapNode**)malloc(minHeap->capacity * sizeof(struct MinHeapNode*));
|
||
|
return minHeap;
|
||
|
}
|
||
|
|
||
|
// Swap two min heap nodes
|
||
|
void swapMinHeapNode(struct MinHeapNode** a, struct MinHeapNode** b) {
|
||
|
struct MinHeapNode* t = *a;
|
||
|
*a = *b;
|
||
|
*b = t;
|
||
|
}
|
||
|
|
||
|
// MinHeapify a node
|
||
|
void minHeapify(struct MinHeap* minHeap, int idx) {
|
||
|
int smallest = idx;
|
||
|
int left = 2 * idx + 1;
|
||
|
int right = 2 * idx + 2;
|
||
|
|
||
|
if (left < minHeap->size && minHeap->array[left]->freq < minHeap->array[smallest]->freq)
|
||
|
smallest = left;
|
||
|
|
||
|
if (right < minHeap->size && minHeap->array[right]->freq < minHeap->array[smallest]->freq)
|
||
|
smallest = right;
|
||
|
|
||
|
if (smallest != idx) {
|
||
|
swapMinHeapNode(&minHeap->array[smallest], &minHeap->array[idx]);
|
||
|
minHeapify(minHeap, smallest);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Extract the minimum value node
|
||
|
struct MinHeapNode* extractMin(struct MinHeap* minHeap) {
|
||
|
struct MinHeapNode* temp = minHeap->array[0];
|
||
|
minHeap->array[0] = minHeap->array[minHeap->size - 1];
|
||
|
--minHeap->size;
|
||
|
minHeapify(minHeap, 0);
|
||
|
return temp;
|
||
|
}
|
||
|
|
||
|
// Insert a node into the MinHeap
|
||
|
void insertMinHeap(struct MinHeap* minHeap, struct MinHeapNode* minHeapNode) {
|
||
|
++minHeap->size;
|
||
|
int i = minHeap->size - 1;
|
||
|
|
||
|
while (i && minHeapNode->freq < minHeap->array[(i - 1) / 2]->freq) {
|
||
|
minHeap->array[i] = minHeap->array[(i - 1) / 2];
|
||
|
i = (i - 1) / 2;
|
||
|
}
|
||
|
minHeap->array[i] = minHeapNode;
|
||
|
}
|
||
|
|
||
|
// Build a MinHeap
|
||
|
struct MinHeap* buildMinHeap(char data[], int freq[], int size) {
|
||
|
struct MinHeap* minHeap = createMinHeap(size);
|
||
|
for (int i = 0; i < size; ++i)
|
||
|
minHeap->array[i] = newNode(data[i], freq[i]);
|
||
|
minHeap->size = size;
|
||
|
for (int i = (minHeap->size - 2) / 2; i >= 0; --i)
|
||
|
minHeapify(minHeap, i);
|
||
|
return minHeap;
|
||
|
}
|
||
|
|
||
|
// Build Huffman Tree
|
||
|
struct MinHeapNode* buildHuffmanTree(char data[], int freq[], int size) {
|
||
|
struct MinHeapNode *left, *right, *top;
|
||
|
struct MinHeap* minHeap = buildMinHeap(data, freq, size);
|
||
|
|
||
|
while (minHeap->size != 1) {
|
||
|
left = extractMin(minHeap);
|
||
|
right = extractMin(minHeap);
|
||
|
|
||
|
top = newNode('$', left->freq + right->freq);
|
||
|
top->left = left;
|
||
|
top->right = right;
|
||
|
|
||
|
insertMinHeap(minHeap, top);
|
||
|
}
|
||
|
|
||
|
return extractMin(minHeap);
|
||
|
}
|
||
|
|
||
|
// Generate Huffman Codes
|
||
|
void generateCodes(struct MinHeapNode* root, char** codes, char* buffer, int top) {
|
||
|
if (root->left) {
|
||
|
buffer[top] = '0';
|
||
|
generateCodes(root->left, codes, buffer, top + 1);
|
||
|
}
|
||
|
if (root->right) {
|
||
|
buffer[top] = '1';
|
||
|
generateCodes(root->right, codes, buffer, top + 1);
|
||
|
}
|
||
|
if (!root->left && !root->right) {
|
||
|
buffer[top] = '\0';
|
||
|
codes[(unsigned char)root->data] = strdup(buffer);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Compress using Huffman encoding
|
||
|
void compress_1(const char* input_file_name, const char* output_file_name) {
|
||
|
FILE* input = fopen(input_file_name, "rb");
|
||
|
FILE* output = fopen(output_file_name, "wb");
|
||
|
if (!input || !output) {
|
||
|
perror("File error");
|
||
|
exit(EXIT_FAILURE);
|
||
|
}
|
||
|
|
||
|
int freq[256] = {0};
|
||
|
char buffer[BUFSIZE];
|
||
|
size_t bytes_read;
|
||
|
|
||
|
while ((bytes_read = fread(buffer, 1, BUFSIZE, input)) > 0) {
|
||
|
for (size_t i = 0; i < bytes_read; i++) {
|
||
|
freq[(unsigned char)buffer[i]]++;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
char data[256];
|
||
|
int freq_array[256];
|
||
|
int size = 0;
|
||
|
|
||
|
for (int i = 0; i < 256; i++) {
|
||
|
if (freq[i] > 0) {
|
||
|
data[size] = (char)i;
|
||
|
freq_array[size] = freq[i];
|
||
|
size++;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
struct MinHeapNode* root = buildHuffmanTree(data, freq_array, size);
|
||
|
char* codes[256] = {NULL};
|
||
|
char code_buffer[256];
|
||
|
generateCodes(root, codes, code_buffer, 0);
|
||
|
|
||
|
rewind(input);
|
||
|
|
||
|
fwrite(&size, sizeof(int), 1, output);
|
||
|
for (int i = 0; i < size; i++) {
|
||
|
fputc(data[i], output);
|
||
|
fwrite(&freq_array[i], sizeof(int), 1, output);
|
||
|
}
|
||
|
|
||
|
unsigned char bit_buffer = 0;
|
||
|
int bit_count = 0;
|
||
|
|
||
|
while ((bytes_read = fread(buffer, 1, BUFSIZE, input)) > 0) {
|
||
|
for (size_t i = 0; i < bytes_read; i++) {
|
||
|
char* code = codes[(unsigned char)buffer[i]];
|
||
|
for (char* p = code; *p; p++) {
|
||
|
bit_buffer = (bit_buffer << 1) | (*p - '0');
|
||
|
bit_count++;
|
||
|
if (bit_count == 8) {
|
||
|
fputc(bit_buffer, output);
|
||
|
bit_buffer = 0;
|
||
|
bit_count = 0;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (bit_count > 0) {
|
||
|
bit_buffer <<= (8 - bit_count);
|
||
|
fputc(bit_buffer, output);
|
||
|
}
|
||
|
|
||
|
fclose(input);
|
||
|
fclose(output);
|
||
|
for (int i = 0; i < 256; i++) {
|
||
|
free(codes[i]);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Decompress using Huffman encoding
|
||
|
void decompress_1(const char* input_file_name, const char* output_file_name) {
|
||
|
FILE* input = fopen(input_file_name, "rb");
|
||
|
FILE* output = fopen(output_file_name, "wb");
|
||
|
if (!input || !output) {
|
||
|
perror("File error");
|
||
|
exit(EXIT_FAILURE);
|
||
|
}
|
||
|
|
||
|
int size;
|
||
|
fread(&size, sizeof(int), 1, input);
|
||
|
char data[256];
|
||
|
int freq[256];
|
||
|
|
||
|
for (int i = 0; i < size; i++) {
|
||
|
data[i] = fgetc(input);
|
||
|
fread(&freq[i], sizeof(int), 1, input);
|
||
|
}
|
||
|
|
||
|
struct MinHeapNode* root = buildHuffmanTree(data, freq, size);
|
||
|
struct MinHeapNode* current = root;
|
||
|
|
||
|
int bit_buffer;
|
||
|
int bit_count = 0;
|
||
|
int byte;
|
||
|
|
||
|
while ((byte = fgetc(input)) != EOF) {
|
||
|
for (int i = 7; i >= 0; i--) {
|
||
|
int bit = (byte >> i) & 1;
|
||
|
if (bit == 0) {
|
||
|
current = current->left;
|
||
|
} else {
|
||
|
current = current->right;
|
||
|
}
|
||
|
|
||
|
if (!current->left && !current->right) {
|
||
|
fputc(current->data, output);
|
||
|
current = root;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
fclose(input);
|
||
|
fclose(output);
|
||
|
}
|
||
|
|
||
|
// Compress using RLE
|
||
|
void compress_2(const char* input_file_name, const char* output_file_name) {
|
||
|
FILE* input = fopen(input_file_name, "rb");
|
||
|
FILE* output = fopen(output_file_name, "wb");
|
||
|
if (!input || !output) {
|
||
|
perror("File error");
|
||
|
exit(EXIT_FAILURE);
|
||
|
}
|
||
|
|
||
|
unsigned char buffer[BUFSIZE];
|
||
|
size_t bytes_read;
|
||
|
|
||
|
while ((bytes_read = fread(buffer, 1, BUFSIZE, input)) > 0) {
|
||
|
for (size_t i = 0; i < bytes_read; i++) {
|
||
|
unsigned char current = buffer[i];
|
||
|
size_t count = 1;
|
||
|
while (i + 1 < bytes_read && buffer[i + 1] == current) {
|
||
|
count++;
|
||
|
i++;
|
||
|
}
|
||
|
|
||
|
fputc(current, output);
|
||
|
fputc(count, output);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
fclose(input);
|
||
|
fclose(output);
|
||
|
}
|
||
|
|
||
|
// Decompress using RLE
|
||
|
void decompress_2(const char* input_file_name, const char* output_file_name) {
|
||
|
FILE* input = fopen(input_file_name, "rb");
|
||
|
FILE* output = fopen(output_file_name, "wb");
|
||
|
if (!input || !output) {
|
||
|
perror("File error");
|
||
|
exit(EXIT_FAILURE);
|
||
|
}
|
||
|
|
||
|
int current;
|
||
|
int count;
|
||
|
|
||
|
while ((current = fgetc(input)) != EOF) {
|
||
|
count = fgetc(input);
|
||
|
for (int i = 0; i < count; i++) {
|
||
|
fputc(current, output);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
fclose(input);
|
||
|
fclose(output);
|
||
|
}
|