#include "compressor.h" #include static HuffmanNode *create_node(unsigned char symbol, uint32_t freq) { HuffmanNode *node = malloc(sizeof(HuffmanNode)); if (!node) return NULL; node->symbol = symbol; node->freq = freq; node->left = NULL; node->right = NULL; return node; } static void free_tree(HuffmanNode *root) { if (!root) return; free_tree(root->left); free_tree(root->right); free(root); } static HuffmanNode *build_tree(uint32_t freq[]) { HuffmanNode *nodes[SYMBOLS * 2]; int count = 0; for (int i = 0; i < SYMBOLS; i++) { if (freq[i] > 0) { nodes[count++] = create_node((unsigned char)i, freq[i]); } } if (count == 0) return NULL; while (count > 1) { int min1 = 0, min2 = 1; if (nodes[min2]->freq < nodes[min1]->freq) { int tmp = min1; min1 = min2; min2 = tmp; } for (int i = 2; i < count; i++) { if (nodes[i]->freq < nodes[min1]->freq) { min2 = min1; min1 = i; } else if (nodes[i]->freq < nodes[min2]->freq) { min2 = i; } } HuffmanNode *parent = create_node(0, nodes[min1]->freq + nodes[min2]->freq); parent->left = nodes[min1]; parent->right = nodes[min2]; nodes[min1] = parent; nodes[min2] = nodes[count - 1]; count--; } return nodes[0]; } static void build_codes(HuffmanNode *root, char *buffer, int depth, char codes[SYMBOLS][256]) { if (!root) return; if (!root->left && !root->right) { if (depth == 0) { buffer[0] = '0'; depth = 1; } buffer[depth] = '\0'; strcpy(codes[root->symbol], buffer); return; } if (root->left) { buffer[depth] = '0'; build_codes(root->left, buffer, depth + 1, codes); } if (root->right) { buffer[depth] = '1'; build_codes(root->right, buffer, depth + 1, codes); } } int compress_file(const char *infile, const char *outfile) { FILE *in = fopen(infile, "rb"); FILE *out = fopen(outfile, "wb"); if (!in || !out) return 1; uint32_t freq[SYMBOLS] = {0}; int c; while ((c = fgetc(in)) != EOF) { freq[(unsigned char)c]++; } uint64_t original_size = 0; for( int i = 0; i < SYMBOLS; i++) original_size += freq[i]; fwrite("HUF1",1,4,out); fwrite(&original_size, sizeof(uint64_t), 1, out); fwrite(freq, sizeof(uint32_t), SYMBOLS, out); HuffmanNode *root = build_tree(freq); if (!root) { fclose(in); fclose(out); return 1; } char codes[SYMBOLS][256] = {{0}}; char buffer[256]; build_codes(root, buffer, 0, codes); rewind(in); unsigned char out_byte = 0; int bit_count = 0; while ((c = fgetc(in)) != EOF) { char *code = codes[(unsigned char)c]; for (int i = 0; code[i] != '\0'; i++) { out_byte <<= 1; if (code[i] == '1') out_byte |= 1; bit_count++; if (bit_count == 8) { fputc(out_byte, out); out_byte = 0; bit_count = 0; } } } if (bit_count > 0) { out_byte <<= (8 - bit_count); fputc(out_byte, out); } free_tree(root); fclose(in); fclose(out); return 0; } int decompress_file(const char *infile, const char *outfile) { FILE *in = fopen(infile, "rb"); FILE *out = fopen(outfile, "wb"); if (!in || !out) return 1; char magic[4]; if (fread(magic, 1, 4, in) != 4 || memcmp(magic, "HUF1", 4) != 0) { fclose(in); fclose(out); return 1; } uint64_t original_size = 0; if (fread(&original_size, sizeof(uint64_t), 1, in) != 1) { fclose(in); fclose(out); return 1; } uint32_t freq[SYMBOLS]; if (fread(freq, sizeof(uint32_t), SYMBOLS, in) != SYMBOLS) { fclose(in); fclose(out); return 1; } HuffmanNode *root = build_tree(freq); if (!root) { fclose(in); fclose(out); return 1; } HuffmanNode *current = root; int byte; uint64_t written = 0; while ((byte = fgetc(in)) != EOF) { for (int i = 7; i >= 0; i--) { int bit = (byte >> i) & 1; current = bit ? current->right : current->left; if (!current->left && !current->right) { fputc(current->symbol, out); written++; if ( written == original_size) { free_tree(root); fclose(in); fclose(out); return 0; } current = root; } } } free_tree(root); fclose(in); fclose(out); return 0; }