286 lines
7.1 KiB
C
286 lines
7.1 KiB
C
#include "compressor.h"
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
unsigned char* read_file(const char* file_name, size_t* size) {
|
|
FILE* file = fopen(file_name, "rb");
|
|
if (!file) {
|
|
perror("Error opening file");
|
|
return NULL;
|
|
}
|
|
|
|
fseek(file, 0, SEEK_END);
|
|
*size = ftell(file);
|
|
rewind(file);
|
|
|
|
unsigned char* buffer = (unsigned char*)malloc(*size);
|
|
if (!buffer) {
|
|
perror("Memory allocation error");
|
|
fclose(file);
|
|
return NULL;
|
|
}
|
|
|
|
fread(buffer, 1, *size, file);
|
|
fclose(file);
|
|
return buffer;
|
|
}
|
|
|
|
int write_file(const char* file_name, const unsigned char* buffer, size_t size) {
|
|
FILE* file = fopen(file_name, "wb");
|
|
if (!file) {
|
|
perror("Error opening file");
|
|
return -1;
|
|
}
|
|
|
|
fwrite(buffer, 1, size, file);
|
|
fclose(file);
|
|
return 0;
|
|
}
|
|
|
|
HuffmanNode* build_huffman_tree(const unsigned char* data, size_t size) {
|
|
size_t frequencies[256] = {0};
|
|
|
|
for (size_t i = 0; i < size; i++) {
|
|
frequencies[data[i]]++;
|
|
}
|
|
|
|
HuffmanNode* nodes[256];
|
|
int node_count = 0;
|
|
|
|
for (int i = 0; i < 256; i++) {
|
|
if (frequencies[i] > 0) {
|
|
HuffmanNode* node = (HuffmanNode*)malloc(sizeof(HuffmanNode));
|
|
node->symbol = (unsigned char)i;
|
|
node->frequency = frequencies[i];
|
|
node->left = NULL;
|
|
node->right = NULL;
|
|
nodes[node_count++] = node;
|
|
}
|
|
}
|
|
|
|
while (node_count > 1) {
|
|
|
|
int min1 = 0, min2 = 1;
|
|
if (nodes[min2]->frequency < nodes[min1]->frequency) {
|
|
min1 = 1;
|
|
min2 = 0;
|
|
}
|
|
|
|
for (int i = 2; i < node_count; i++) {
|
|
if (nodes[i]->frequency < nodes[min1]->frequency) {
|
|
min2 = min1;
|
|
min1 = i;
|
|
} else if (nodes[i]->frequency < nodes[min2]->frequency) {
|
|
min2 = i;
|
|
}
|
|
}
|
|
|
|
HuffmanNode* new_node = (HuffmanNode*)malloc(sizeof(HuffmanNode));
|
|
new_node->symbol = 0;
|
|
new_node->frequency = nodes[min1]->frequency + nodes[min2]->frequency;
|
|
new_node->left = nodes[min1];
|
|
new_node->right = nodes[min2];
|
|
|
|
nodes[min1] = new_node;
|
|
nodes[min2] = nodes[node_count - 1];
|
|
node_count--;
|
|
}
|
|
|
|
return nodes[0];
|
|
}
|
|
|
|
void generate_huffman_codes(HuffmanNode* root, char** codes, char* buffer, int depth) {
|
|
if (!root->left && !root->right) {
|
|
buffer[depth] = '\0';
|
|
codes[root->symbol] = strdup(buffer);
|
|
return;
|
|
}
|
|
|
|
if (root->left) {
|
|
buffer[depth] = '0';
|
|
generate_huffman_codes(root->left, codes, buffer, depth + 1);
|
|
}
|
|
|
|
if (root->right) {
|
|
buffer[depth] = '1';
|
|
generate_huffman_codes(root->right, codes, buffer, depth + 1);
|
|
}
|
|
}
|
|
|
|
void free_huffman_tree(HuffmanNode* root) {
|
|
if (!root) return;
|
|
free_huffman_tree(root->left);
|
|
free_huffman_tree(root->right);
|
|
free(root);
|
|
}
|
|
|
|
void write_bits(FILE* file, const char* bits, unsigned char* buffer, int* bit_pos) {
|
|
for (int i = 0; bits[i] != '\0'; i++) {
|
|
if (bits[i] == '1') {
|
|
buffer[*bit_pos / 8] |= (1 << (7 - (*bit_pos % 8)));
|
|
}
|
|
(*bit_pos)++;
|
|
if (*bit_pos % 8 == 0) {
|
|
fwrite(buffer, 1, 1, file);
|
|
buffer[*bit_pos / 8 - 1] = 0; // Clear the written byte
|
|
}
|
|
}
|
|
}
|
|
|
|
int read_bit(FILE* file, unsigned char* buffer, int* bit_pos) {
|
|
if (*bit_pos % 8 == 0) {
|
|
if (fread(buffer, 1, 1, file) != 1) return -1;
|
|
}
|
|
int bit = (*buffer >> (7 - (*bit_pos % 8))) & 1;
|
|
(*bit_pos)++;
|
|
return bit;
|
|
}
|
|
|
|
HuffmanNode* deserialize_tree(FILE* file) {
|
|
int marker = fgetc(file);
|
|
if (marker == EOF) return NULL;
|
|
|
|
if (marker == '1') {
|
|
// Leaf node: Read the symbol
|
|
HuffmanNode* node = (HuffmanNode*)malloc(sizeof(HuffmanNode));
|
|
node->symbol = fgetc(file);
|
|
node->left = node->right = NULL;
|
|
return node;
|
|
}
|
|
|
|
// Internal node: Create a new node and deserialize children
|
|
HuffmanNode* node = (HuffmanNode*)malloc(sizeof(HuffmanNode));
|
|
node->left = deserialize_tree(file);
|
|
node->right = deserialize_tree(file);
|
|
return node;
|
|
}
|
|
|
|
|
|
int compress_1(const char* input_file_name, const char* output_file_name) {
|
|
size_t size;
|
|
unsigned char* data = read_file(input_file_name, &size);
|
|
if (!data) return -1;
|
|
|
|
// Build Huffman tree
|
|
HuffmanNode* root = build_huffman_tree(data, size);
|
|
if (!root) {
|
|
free(data);
|
|
return -1;
|
|
}
|
|
|
|
// Generate Huffman codes
|
|
char* codes[256] = {0};
|
|
char buffer[256];
|
|
generate_huffman_codes(root, codes, buffer, 0);
|
|
|
|
// Open output file
|
|
FILE* output_file = fopen(output_file_name, "wb");
|
|
if (!output_file) {
|
|
perror("Error opening output file");
|
|
free(data);
|
|
free_huffman_tree(root);
|
|
return -1;
|
|
}
|
|
|
|
// Compress data
|
|
unsigned char bit_buffer[1] = {0};
|
|
int bit_pos = 0;
|
|
|
|
for (size_t i = 0; i < size; i++) {
|
|
write_bits(output_file, codes[data[i]], bit_buffer, &bit_pos);
|
|
}
|
|
|
|
// Flush remaining bits
|
|
if (bit_pos % 8 != 0) {
|
|
fwrite(bit_buffer, 1, 1, output_file);
|
|
}
|
|
|
|
fclose(output_file);
|
|
|
|
printf("Compressing using Huffman coding...\n");
|
|
|
|
// Clean up
|
|
for (int i = 0; i < 256; i++) {
|
|
if (codes[i]) free(codes[i]);
|
|
}
|
|
free_huffman_tree(root);
|
|
free(data);
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
|
|
int decompress_1(const char* input_file_name, const char* output_file_name) {
|
|
FILE* input_file = fopen(input_file_name, "rb");
|
|
if (!input_file) {
|
|
perror("Error opening input file");
|
|
return -1;
|
|
}
|
|
|
|
FILE* output_file = fopen(output_file_name, "wb");
|
|
if (!output_file) {
|
|
perror("Error opening output file");
|
|
fclose(input_file);
|
|
return -1;
|
|
}
|
|
|
|
|
|
HuffmanNode* root = deserialize_tree(input_file);
|
|
if (!root) {
|
|
fclose(input_file);
|
|
fclose(output_file);
|
|
return -1;
|
|
}
|
|
|
|
|
|
unsigned char bit_buffer[1];
|
|
int bit_pos = 0;
|
|
HuffmanNode* current = root;
|
|
|
|
while (1) {
|
|
int bit = read_bit(input_file, bit_buffer, &bit_pos);
|
|
if (bit == -1) break;
|
|
|
|
current = (bit == 0) ? current->left : current->right;
|
|
|
|
if (!current->left && !current->right) {
|
|
fputc(current->symbol, output_file);
|
|
current = root;
|
|
}
|
|
}
|
|
|
|
fclose(input_file);
|
|
fclose(output_file);
|
|
free_huffman_tree(root);
|
|
|
|
printf("Decompressing using Huffman coding...\n");
|
|
return 0;
|
|
}
|
|
|
|
|
|
|
|
int compress_2(const char* input_file_name, const char* output_file_name) {
|
|
size_t size;
|
|
unsigned char* data = read_file(input_file_name, &size);
|
|
if (!data) return -1;
|
|
|
|
printf("Compressing using Algorithm 2 (LZ77)...\n");
|
|
|
|
int result = write_file(output_file_name, data, size);
|
|
free(data);
|
|
return result;
|
|
}
|
|
|
|
int decompress_2(const char* input_file_name, const char* output_file_name) {
|
|
size_t size;
|
|
unsigned char* data = read_file(input_file_name, &size);
|
|
if (!data) return -1;
|
|
|
|
printf("Decompressing using Algorithm 2 (LZ77)...\n");
|
|
|
|
int result = write_file(output_file_name, data, size);
|
|
free(data);
|
|
return result;
|
|
} |