210 lines
4.7 KiB
C
210 lines
4.7 KiB
C
#include "compressor.h"
|
|
#include <string.h>
|
|
|
|
static HuffmanNode *create_node(unsigned char symbol, uint32_t freq) {
|
|
HuffmanNode *node = malloc(sizeof(HuffmanNode));
|
|
if (!node) return NULL;
|
|
node->symbol = symbol;
|
|
node->freq = freq;
|
|
node->left = NULL;
|
|
node->right = NULL;
|
|
return node;
|
|
}
|
|
|
|
static void free_tree(HuffmanNode *root) {
|
|
if (!root) return;
|
|
free_tree(root->left);
|
|
free_tree(root->right);
|
|
free(root);
|
|
}
|
|
|
|
static HuffmanNode *build_tree(uint32_t freq[]) {
|
|
HuffmanNode *nodes[SYMBOLS * 2];
|
|
int count = 0;
|
|
|
|
for (int i = 0; i < SYMBOLS; i++) {
|
|
if (freq[i] > 0) {
|
|
nodes[count++] = create_node((unsigned char)i, freq[i]);
|
|
}
|
|
}
|
|
|
|
if (count == 0) return NULL;
|
|
|
|
while (count > 1) {
|
|
int min1 = 0, min2 = 1;
|
|
|
|
if (nodes[min2]->freq < nodes[min1]->freq) {
|
|
int tmp = min1;
|
|
min1 = min2;
|
|
min2 = tmp;
|
|
}
|
|
|
|
for (int i = 2; i < count; i++) {
|
|
if (nodes[i]->freq < nodes[min1]->freq) {
|
|
min2 = min1;
|
|
min1 = i;
|
|
} else if (nodes[i]->freq < nodes[min2]->freq) {
|
|
min2 = i;
|
|
}
|
|
}
|
|
|
|
HuffmanNode *parent =
|
|
create_node(0, nodes[min1]->freq + nodes[min2]->freq);
|
|
parent->left = nodes[min1];
|
|
parent->right = nodes[min2];
|
|
|
|
nodes[min1] = parent;
|
|
nodes[min2] = nodes[count - 1];
|
|
count--;
|
|
}
|
|
|
|
return nodes[0];
|
|
}
|
|
|
|
static void build_codes(HuffmanNode *root, char *buffer, int depth,
|
|
char codes[SYMBOLS][256]) {
|
|
if (!root) return;
|
|
|
|
if (!root->left && !root->right) {
|
|
if (depth == 0) {
|
|
buffer[0] = '0';
|
|
depth = 1;
|
|
}
|
|
|
|
buffer[depth] = '\0';
|
|
strcpy(codes[root->symbol], buffer);
|
|
return;
|
|
}
|
|
|
|
if (root->left) {
|
|
buffer[depth] = '0';
|
|
build_codes(root->left, buffer, depth + 1, codes);
|
|
}
|
|
|
|
if (root->right) {
|
|
buffer[depth] = '1';
|
|
build_codes(root->right, buffer, depth + 1, codes);
|
|
}
|
|
}
|
|
|
|
int compress_file(const char *infile, const char *outfile) {
|
|
FILE *in = fopen(infile, "rb");
|
|
FILE *out = fopen(outfile, "wb");
|
|
if (!in || !out) return 1;
|
|
|
|
uint32_t freq[SYMBOLS] = {0};
|
|
int c;
|
|
|
|
while ((c = fgetc(in)) != EOF) {
|
|
freq[(unsigned char)c]++;
|
|
}
|
|
uint64_t original_size = 0;
|
|
for( int i = 0; i < SYMBOLS; i++) original_size += freq[i];
|
|
|
|
fwrite("HUF1",1,4,out);
|
|
fwrite(&original_size, sizeof(uint64_t), 1, out);
|
|
fwrite(freq, sizeof(uint32_t), SYMBOLS, out);
|
|
|
|
HuffmanNode *root = build_tree(freq);
|
|
if (!root) {
|
|
fclose(in);
|
|
fclose(out);
|
|
return 1;
|
|
}
|
|
|
|
char codes[SYMBOLS][256] = {{0}};
|
|
char buffer[256];
|
|
build_codes(root, buffer, 0, codes);
|
|
|
|
rewind(in);
|
|
|
|
unsigned char out_byte = 0;
|
|
int bit_count = 0;
|
|
|
|
while ((c = fgetc(in)) != EOF) {
|
|
char *code = codes[(unsigned char)c];
|
|
for (int i = 0; code[i] != '\0'; i++) {
|
|
out_byte <<= 1;
|
|
if (code[i] == '1') out_byte |= 1;
|
|
bit_count++;
|
|
|
|
if (bit_count == 8) {
|
|
fputc(out_byte, out);
|
|
out_byte = 0;
|
|
bit_count = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (bit_count > 0) {
|
|
out_byte <<= (8 - bit_count);
|
|
fputc(out_byte, out);
|
|
}
|
|
|
|
free_tree(root);
|
|
fclose(in);
|
|
fclose(out);
|
|
return 0;
|
|
}
|
|
|
|
int decompress_file(const char *infile, const char *outfile) {
|
|
FILE *in = fopen(infile, "rb");
|
|
FILE *out = fopen(outfile, "wb");
|
|
if (!in || !out) return 1;
|
|
|
|
char magic[4];
|
|
if (fread(magic, 1, 4, in) != 4 || memcmp(magic, "HUF1", 4) != 0) {
|
|
fclose(in);
|
|
fclose(out);
|
|
return 1;
|
|
}
|
|
|
|
uint64_t original_size = 0;
|
|
if (fread(&original_size, sizeof(uint64_t), 1, in) != 1) {
|
|
fclose(in);
|
|
fclose(out);
|
|
return 1;
|
|
}
|
|
|
|
uint32_t freq[SYMBOLS];
|
|
if (fread(freq, sizeof(uint32_t), SYMBOLS, in) != SYMBOLS) {
|
|
fclose(in);
|
|
fclose(out);
|
|
return 1;
|
|
}
|
|
|
|
HuffmanNode *root = build_tree(freq);
|
|
if (!root) {
|
|
fclose(in);
|
|
fclose(out);
|
|
return 1;
|
|
}
|
|
|
|
HuffmanNode *current = root;
|
|
int byte;
|
|
uint64_t written = 0;
|
|
while ((byte = fgetc(in)) != EOF) {
|
|
for (int i = 7; i >= 0; i--) {
|
|
int bit = (byte >> i) & 1;
|
|
current = bit ? current->right : current->left;
|
|
|
|
if (!current->left && !current->right) {
|
|
fputc(current->symbol, out);
|
|
written++;
|
|
if ( written == original_size) {
|
|
free_tree(root);
|
|
fclose(in);
|
|
fclose(out);
|
|
return 0;
|
|
}
|
|
current = root;
|
|
}
|
|
}
|
|
}
|
|
|
|
free_tree(root);
|
|
fclose(in);
|
|
fclose(out);
|
|
return 0;
|
|
}
|