usaa24/sk1/compressor.c

305 lines
7.9 KiB
C

#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "compressor.h"
#define BUFSIZE 1024
// Huffman Tree Node
struct MinHeapNode {
char data;
unsigned freq;
struct MinHeapNode *left, *right;
};
// MinHeap
struct MinHeap {
unsigned size;
unsigned capacity;
struct MinHeapNode** array;
};
// Create a new node
struct MinHeapNode* newNode(char data, unsigned freq) {
struct MinHeapNode* temp = (struct MinHeapNode*)malloc(sizeof(struct MinHeapNode));
temp->data = data;
temp->freq = freq;
temp->left = temp->right = NULL;
return temp;
}
// Create a MinHeap
struct MinHeap* createMinHeap(unsigned capacity) {
struct MinHeap* minHeap = (struct MinHeap*)malloc(sizeof(struct MinHeap));
minHeap->size = 0;
minHeap->capacity = capacity;
minHeap->array = (struct MinHeapNode**)malloc(minHeap->capacity * sizeof(struct MinHeapNode*));
return minHeap;
}
// Swap two min heap nodes
void swapMinHeapNode(struct MinHeapNode** a, struct MinHeapNode** b) {
struct MinHeapNode* t = *a;
*a = *b;
*b = t;
}
// MinHeapify a node
void minHeapify(struct MinHeap* minHeap, int idx) {
int smallest = idx;
int left = 2 * idx + 1;
int right = 2 * idx + 2;
if (left < minHeap->size && minHeap->array[left]->freq < minHeap->array[smallest]->freq)
smallest = left;
if (right < minHeap->size && minHeap->array[right]->freq < minHeap->array[smallest]->freq)
smallest = right;
if (smallest != idx) {
swapMinHeapNode(&minHeap->array[smallest], &minHeap->array[idx]);
minHeapify(minHeap, smallest);
}
}
// Extract the minimum value node
struct MinHeapNode* extractMin(struct MinHeap* minHeap) {
struct MinHeapNode* temp = minHeap->array[0];
minHeap->array[0] = minHeap->array[minHeap->size - 1];
--minHeap->size;
minHeapify(minHeap, 0);
return temp;
}
// Insert a node into the MinHeap
void insertMinHeap(struct MinHeap* minHeap, struct MinHeapNode* minHeapNode) {
++minHeap->size;
int i = minHeap->size - 1;
while (i && minHeapNode->freq < minHeap->array[(i - 1) / 2]->freq) {
minHeap->array[i] = minHeap->array[(i - 1) / 2];
i = (i - 1) / 2;
}
minHeap->array[i] = minHeapNode;
}
// Build a MinHeap
struct MinHeap* buildMinHeap(char data[], int freq[], int size) {
struct MinHeap* minHeap = createMinHeap(size);
for (int i = 0; i < size; ++i)
minHeap->array[i] = newNode(data[i], freq[i]);
minHeap->size = size;
for (int i = (minHeap->size - 2) / 2; i >= 0; --i)
minHeapify(minHeap, i);
return minHeap;
}
// Build Huffman Tree
struct MinHeapNode* buildHuffmanTree(char data[], int freq[], int size) {
struct MinHeapNode *left, *right, *top;
struct MinHeap* minHeap = buildMinHeap(data, freq, size);
while (minHeap->size != 1) {
left = extractMin(minHeap);
right = extractMin(minHeap);
top = newNode('$', left->freq + right->freq);
top->left = left;
top->right = right;
insertMinHeap(minHeap, top);
}
return extractMin(minHeap);
}
// Generate Huffman Codes
void generateCodes(struct MinHeapNode* root, char** codes, char* buffer, int top) {
if (root->left) {
buffer[top] = '0';
generateCodes(root->left, codes, buffer, top + 1);
}
if (root->right) {
buffer[top] = '1';
generateCodes(root->right, codes, buffer, top + 1);
}
if (!root->left && !root->right) {
buffer[top] = '\0';
codes[(unsigned char)root->data] = strdup(buffer);
}
}
// Compress using Huffman encoding
void compress_1(const char* input_file_name, const char* output_file_name) {
FILE* input = fopen(input_file_name, "rb");
FILE* output = fopen(output_file_name, "wb");
if (!input || !output) {
perror("File error");
exit(EXIT_FAILURE);
}
int freq[256] = {0};
char buffer[BUFSIZE];
size_t bytes_read;
while ((bytes_read = fread(buffer, 1, BUFSIZE, input)) > 0) {
for (size_t i = 0; i < bytes_read; i++) {
freq[(unsigned char)buffer[i]]++;
}
}
char data[256];
int freq_array[256];
int size = 0;
for (int i = 0; i < 256; i++) {
if (freq[i] > 0) {
data[size] = (char)i;
freq_array[size] = freq[i];
size++;
}
}
struct MinHeapNode* root = buildHuffmanTree(data, freq_array, size);
char* codes[256] = {NULL};
char code_buffer[256];
generateCodes(root, codes, code_buffer, 0);
rewind(input);
fwrite(&size, sizeof(int), 1, output);
for (int i = 0; i < size; i++) {
fputc(data[i], output);
fwrite(&freq_array[i], sizeof(int), 1, output);
}
unsigned char bit_buffer = 0;
int bit_count = 0;
while ((bytes_read = fread(buffer, 1, BUFSIZE, input)) > 0) {
for (size_t i = 0; i < bytes_read; i++) {
char* code = codes[(unsigned char)buffer[i]];
for (char* p = code; *p; p++) {
bit_buffer = (bit_buffer << 1) | (*p - '0');
bit_count++;
if (bit_count == 8) {
fputc(bit_buffer, output);
bit_buffer = 0;
bit_count = 0;
}
}
}
}
if (bit_count > 0) {
bit_buffer <<= (8 - bit_count);
fputc(bit_buffer, output);
}
fclose(input);
fclose(output);
for (int i = 0; i < 256; i++) {
free(codes[i]);
}
}
// Decompress using Huffman encoding
void decompress_1(const char* input_file_name, const char* output_file_name) {
FILE* input = fopen(input_file_name, "rb");
FILE* output = fopen(output_file_name, "wb");
if (!input || !output) {
perror("File error");
exit(EXIT_FAILURE);
}
int size;
fread(&size, sizeof(int), 1, input);
char data[256];
int freq[256];
for (int i = 0; i < size; i++) {
data[i] = fgetc(input);
fread(&freq[i], sizeof(int), 1, input);
}
struct MinHeapNode* root = buildHuffmanTree(data, freq, size);
struct MinHeapNode* current = root;
int bit_buffer;
int bit_count = 0;
int byte;
while ((byte = fgetc(input)) != EOF) {
for (int i = 7; i >= 0; i--) {
int bit = (byte >> i) & 1;
if (bit == 0) {
current = current->left;
} else {
current = current->right;
}
if (!current->left && !current->right) {
fputc(current->data, output);
current = root;
}
}
}
fclose(input);
fclose(output);
}
// Compress using RLE
void compress_2(const char* input_file_name, const char* output_file_name) {
FILE* input = fopen(input_file_name, "rb");
FILE* output = fopen(output_file_name, "wb");
if (!input || !output) {
perror("File error");
exit(EXIT_FAILURE);
}
unsigned char buffer[BUFSIZE];
size_t bytes_read;
while ((bytes_read = fread(buffer, 1, BUFSIZE, input)) > 0) {
for (size_t i = 0; i < bytes_read; i++) {
unsigned char current = buffer[i];
size_t count = 1;
while (i + 1 < bytes_read && buffer[i + 1] == current) {
count++;
i++;
}
fputc(current, output);
fputc(count, output);
}
}
fclose(input);
fclose(output);
}
// Decompress using RLE
void decompress_2(const char* input_file_name, const char* output_file_name) {
FILE* input = fopen(input_file_name, "rb");
FILE* output = fopen(output_file_name, "wb");
if (!input || !output) {
perror("File error");
exit(EXIT_FAILURE);
}
int current;
int count;
while ((current = fgetc(input)) != EOF) {
count = fgetc(input);
for (int i = 0; i < count; i++) {
fputc(current, output);
}
}
fclose(input);
fclose(output);
}