usaa24/sk1/compressor.c

289 lines
7.9 KiB
C

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#define MAX_TREE_NODES 256
// Structure to represent a tree node
typedef struct HuffmanNode {
unsigned char data;
unsigned frequency;
struct HuffmanNode* left;
struct HuffmanNode* right;
} HuffmanNode;
// A structure to represent the Min Heap (Priority Queue)
typedef struct MinHeap {
unsigned size;
unsigned capacity;
HuffmanNode** array;
} MinHeap;
// Function to create a new node
HuffmanNode* newNode(unsigned char data, unsigned frequency) {
HuffmanNode* node = (HuffmanNode*)malloc(sizeof(HuffmanNode));
if (!node) {
perror("Failed to allocate memory for new node");
exit(EXIT_FAILURE);
}
node->data = data;
node->frequency = frequency;
node->left = node->right = NULL;
return node;
}
// Function to create a MinHeap
MinHeap* createMinHeap(unsigned capacity) {
MinHeap* minHeap = (MinHeap*)malloc(sizeof(MinHeap));
if (!minHeap) {
perror("Failed to allocate memory for MinHeap");
exit(EXIT_FAILURE);
}
minHeap->size = 0;
minHeap->capacity = capacity;
minHeap->array = (HuffmanNode**)malloc(capacity * sizeof(HuffmanNode*));
if (!minHeap->array) {
perror("Failed to allocate memory for MinHeap array");
exit(EXIT_FAILURE);
}
return minHeap;
}
// Function to swap two min heap nodes
void swapMinHeapNode(HuffmanNode** a, HuffmanNode** b) {
HuffmanNode* temp = *a;
*a = *b;
*b = temp;
}
// Function to min heapify
void minHeapify(MinHeap* minHeap, int idx) {
int smallest = idx;
int left = 2 * idx + 1;
int right = 2 * idx + 2;
if (left < minHeap->size && minHeap->array[left]->frequency < minHeap->array[smallest]->frequency)
smallest = left;
if (right < minHeap->size && minHeap->array[right]->frequency < minHeap->array[smallest]->frequency)
smallest = right;
if (smallest != idx) {
swapMinHeapNode(&minHeap->array[smallest], &minHeap->array[idx]);
minHeapify(minHeap, smallest);
}
}
// Check if the size of heap is one
int isSizeOne(MinHeap* minHeap) {
return (minHeap->size == 1);
}
// Extract the minimum node from heap
HuffmanNode* extractMin(MinHeap* minHeap) {
HuffmanNode* temp = minHeap->array[0];
minHeap->array[0] = minHeap->array[minHeap->size - 1];
--minHeap->size;
minHeapify(minHeap, 0);
return temp;
}
// Insert a new node to MinHeap
void insertMinHeap(MinHeap* minHeap, HuffmanNode* node) {
++minHeap->size;
int i = minHeap->size - 1;
while (i && node->frequency < minHeap->array[(i - 1) / 2]->frequency) {
minHeap->array[i] = minHeap->array[(i - 1) / 2];
i = (i - 1) / 2;
}
minHeap->array[i] = node;
}
// Build a min heap of given capacity
void buildMinHeap(MinHeap* minHeap) {
int n = minHeap->size - 1;
for (int i = (n - 1) / 2; i >= 0; --i)
minHeapify(minHeap, i);
}
// Function to build the Huffman tree
HuffmanNode* buildHuffmanTree(unsigned char* data, unsigned* freq, int size) {
HuffmanNode *left, *right, *top;
MinHeap* minHeap = createMinHeap(size);
for (int i = 0; i < size; ++i)
insertMinHeap(minHeap, newNode(data[i], freq[data[i]]));
buildMinHeap(minHeap);
while (!isSizeOne(minHeap)) {
left = extractMin(minHeap);
right = extractMin(minHeap);
top = newNode('$', left->frequency + right->frequency);
top->left = left;
top->right = right;
insertMinHeap(minHeap, top);
}
return extractMin(minHeap);
}
// Function to generate the Huffman codes for each character
void generateCodes(HuffmanNode* root, char* arr, int top, char** codes) {
if (root->left) {
arr[top] = '0';
generateCodes(root->left, arr, top + 1, codes);
}
if (root->right) {
arr[top] = '1';
generateCodes(root->right, arr, top + 1, codes);
}
if (!root->left && !root->right) {
arr[top] = '\0'; // Null terminate the string
codes[root->data] = strdup(arr);
}
}
// Function to compress a file
int compress_1(const char* input_file, const char* output_file) {
FILE* input = fopen(input_file, "rb");
FILE* output = fopen(output_file, "wb");
if (!input || !output) {
perror("Error opening file");
return -1;
}
unsigned freq[256] = {0};
unsigned char data;
while (fread(&data, sizeof(data), 1, input) == 1)
freq[data]++;
unsigned char unique_data[256];
int unique_count = 0;
for (int i = 0; i < 256; i++) {
if (freq[i] > 0) {
unique_data[unique_count++] = i;
}
}
HuffmanNode* root = buildHuffmanTree(unique_data, freq, unique_count);
char* codes[256] = {0};
char arr[256];
generateCodes(root, arr, 0, codes);
fwrite(&unique_count, sizeof(int), 1, output);
for (int i = 0; i < unique_count; i++) {
unsigned char symbol = unique_data[i];
fwrite(&symbol, sizeof(unsigned char), 1, output);
fwrite(&freq[symbol], sizeof(unsigned), 1, output);
}
fseek(input, 0, SEEK_SET);
unsigned char buffer = 0;
int bit_count = 0;
size_t total_bits = 0;
while (fread(&data, sizeof(data), 1, input) == 1) {
char* code = codes[data];
for (int i = 0; code[i] != '\0'; i++) {
unsigned char bit = code[i] - '0';
buffer = (buffer << 1) | bit;
bit_count++;
total_bits++;
if (bit_count == 8) {
fwrite(&buffer, sizeof(unsigned char), 1, output);
bit_count = 0;
buffer = 0;
}
}
}
if (bit_count > 0) {
buffer <<= (8 - bit_count);
fwrite(&buffer, sizeof(unsigned char), 1, output);
}
fwrite(&total_bits, sizeof(size_t), 1, output); // Write total bits used
fclose(input);
fclose(output);
return 0;
}
// Function to decompress the compressed file
int decompress_1(const char* input_file_name, const char* output_file_name) {
FILE* input = fopen(input_file_name, "rb");
if (!input) {
perror("Error opening input file");
return -1;
}
FILE* output = fopen(output_file_name, "wb");
if (!output) {
perror("Error opening output file");
fclose(input);
return -1;
}
int unique_count;
if (fread(&unique_count, sizeof(int), 1, input) != 1) {
perror("Error reading from input file");
fclose(input);
fclose(output);
return -1;
}
unsigned char unique_data[256];
unsigned freq[256] = {0};
for (int i = 0; i < unique_count; i++) {
if (fread(&unique_data[i], sizeof(unsigned char), 1, input) != 1 ||
fread(&freq[unique_data[i]], sizeof(unsigned), 1, input) != 1) {
perror("Error reading from input file");
fclose(input);
fclose(output);
return -1;
}
}
HuffmanNode* root = buildHuffmanTree(unique_data, freq, unique_count);
size_t total_bits;
fseek(input, -sizeof(size_t), SEEK_END);
fread(&total_bits, sizeof(size_t), 1, input);
fseek(input, sizeof(int) + unique_count * (sizeof(unsigned char) + sizeof(unsigned)), SEEK_SET);
HuffmanNode* current = root;
unsigned char byte;
size_t bits_read = 0;
while (bits_read < total_bits && fread(&byte, sizeof(byte), 1, input) == 1) {
for (int i = 7; i >= 0 && bits_read < total_bits; i--, bits_read++) {
if (byte & (1 << i)) {
current = current->right;
} else {
current = current->left;
}
if (!current->left && !current->right) {
fwrite(&current->data, sizeof(current->data), 1, output);
current = root;
}
}
}
fclose(input);
fclose(output);
return 0;
}
int compress_2(const char* input_file_name, const char* output_file_name);
int decompress_2(const char* input_file_name, const char* output_file_name);