usaa25/sk1/compressor.c
2026-02-07 02:10:39 +01:00

210 lines
4.7 KiB
C

#include "compressor.h"
#include <string.h>
static HuffmanNode *create_node(unsigned char symbol, uint32_t freq) {
HuffmanNode *node = malloc(sizeof(HuffmanNode));
if (!node) return NULL;
node->symbol = symbol;
node->freq = freq;
node->left = NULL;
node->right = NULL;
return node;
}
static void free_tree(HuffmanNode *root) {
if (!root) return;
free_tree(root->left);
free_tree(root->right);
free(root);
}
static HuffmanNode *build_tree(uint32_t freq[]) {
HuffmanNode *nodes[SYMBOLS * 2];
int count = 0;
for (int i = 0; i < SYMBOLS; i++) {
if (freq[i] > 0) {
nodes[count++] = create_node((unsigned char)i, freq[i]);
}
}
if (count == 0) return NULL;
while (count > 1) {
int min1 = 0, min2 = 1;
if (nodes[min2]->freq < nodes[min1]->freq) {
int tmp = min1;
min1 = min2;
min2 = tmp;
}
for (int i = 2; i < count; i++) {
if (nodes[i]->freq < nodes[min1]->freq) {
min2 = min1;
min1 = i;
} else if (nodes[i]->freq < nodes[min2]->freq) {
min2 = i;
}
}
HuffmanNode *parent =
create_node(0, nodes[min1]->freq + nodes[min2]->freq);
parent->left = nodes[min1];
parent->right = nodes[min2];
nodes[min1] = parent;
nodes[min2] = nodes[count - 1];
count--;
}
return nodes[0];
}
static void build_codes(HuffmanNode *root, char *buffer, int depth,
char codes[SYMBOLS][256]) {
if (!root) return;
if (!root->left && !root->right) {
if (depth == 0) {
buffer[0] = '0';
depth = 1;
}
buffer[depth] = '\0';
strcpy(codes[root->symbol], buffer);
return;
}
if (root->left) {
buffer[depth] = '0';
build_codes(root->left, buffer, depth + 1, codes);
}
if (root->right) {
buffer[depth] = '1';
build_codes(root->right, buffer, depth + 1, codes);
}
}
int compress_file(const char *infile, const char *outfile) {
FILE *in = fopen(infile, "rb");
FILE *out = fopen(outfile, "wb");
if (!in || !out) return 1;
uint32_t freq[SYMBOLS] = {0};
int c;
while ((c = fgetc(in)) != EOF) {
freq[(unsigned char)c]++;
}
uint64_t original_size = 0;
for( int i = 0; i < SYMBOLS; i++) original_size += freq[i];
fwrite("HUF1",1,4,out);
fwrite(&original_size, sizeof(uint64_t), 1, out);
fwrite(freq, sizeof(uint32_t), SYMBOLS, out);
HuffmanNode *root = build_tree(freq);
if (!root) {
fclose(in);
fclose(out);
return 1;
}
char codes[SYMBOLS][256] = {{0}};
char buffer[256];
build_codes(root, buffer, 0, codes);
rewind(in);
unsigned char out_byte = 0;
int bit_count = 0;
while ((c = fgetc(in)) != EOF) {
char *code = codes[(unsigned char)c];
for (int i = 0; code[i] != '\0'; i++) {
out_byte <<= 1;
if (code[i] == '1') out_byte |= 1;
bit_count++;
if (bit_count == 8) {
fputc(out_byte, out);
out_byte = 0;
bit_count = 0;
}
}
}
if (bit_count > 0) {
out_byte <<= (8 - bit_count);
fputc(out_byte, out);
}
free_tree(root);
fclose(in);
fclose(out);
return 0;
}
int decompress_file(const char *infile, const char *outfile) {
FILE *in = fopen(infile, "rb");
FILE *out = fopen(outfile, "wb");
if (!in || !out) return 1;
char magic[4];
if (fread(magic, 1, 4, in) != 4 || memcmp(magic, "HUF1", 4) != 0) {
fclose(in);
fclose(out);
return 1;
}
uint64_t original_size = 0;
if (fread(&original_size, sizeof(uint64_t), 1, in) != 1) {
fclose(in);
fclose(out);
return 1;
}
uint32_t freq[SYMBOLS];
if (fread(freq, sizeof(uint32_t), SYMBOLS, in) != SYMBOLS) {
fclose(in);
fclose(out);
return 1;
}
HuffmanNode *root = build_tree(freq);
if (!root) {
fclose(in);
fclose(out);
return 1;
}
HuffmanNode *current = root;
int byte;
uint64_t written = 0;
while ((byte = fgetc(in)) != EOF) {
for (int i = 7; i >= 0; i--) {
int bit = (byte >> i) & 1;
current = bit ? current->right : current->left;
if (!current->left && !current->right) {
fputc(current->symbol, out);
written++;
if ( written == original_size) {
free_tree(root);
fclose(in);
fclose(out);
return 0;
}
current = root;
}
}
}
free_tree(root);
fclose(in);
fclose(out);
return 0;
}