#include #include #include #include #include "compressor.h" #define BUFSIZE 1024 // Huffman Tree Node struct MinHeapNode { char data; unsigned freq; struct MinHeapNode *left, *right; }; // MinHeap struct MinHeap { unsigned size; unsigned capacity; struct MinHeapNode** array; }; // Create a new node struct MinHeapNode* newNode(char data, unsigned freq) { struct MinHeapNode* temp = (struct MinHeapNode*)malloc(sizeof(struct MinHeapNode)); temp->data = data; temp->freq = freq; temp->left = temp->right = NULL; return temp; } // Create a MinHeap struct MinHeap* createMinHeap(unsigned capacity) { struct MinHeap* minHeap = (struct MinHeap*)malloc(sizeof(struct MinHeap)); minHeap->size = 0; minHeap->capacity = capacity; minHeap->array = (struct MinHeapNode**)malloc(minHeap->capacity * sizeof(struct MinHeapNode*)); return minHeap; } // Swap two min heap nodes void swapMinHeapNode(struct MinHeapNode** a, struct MinHeapNode** b) { struct MinHeapNode* t = *a; *a = *b; *b = t; } // MinHeapify a node void minHeapify(struct MinHeap* minHeap, int idx) { int smallest = idx; int left = 2 * idx + 1; int right = 2 * idx + 2; if (left < minHeap->size && minHeap->array[left]->freq < minHeap->array[smallest]->freq) smallest = left; if (right < minHeap->size && minHeap->array[right]->freq < minHeap->array[smallest]->freq) smallest = right; if (smallest != idx) { swapMinHeapNode(&minHeap->array[smallest], &minHeap->array[idx]); minHeapify(minHeap, smallest); } } // Extract the minimum value node struct MinHeapNode* extractMin(struct MinHeap* minHeap) { struct MinHeapNode* temp = minHeap->array[0]; minHeap->array[0] = minHeap->array[minHeap->size - 1]; --minHeap->size; minHeapify(minHeap, 0); return temp; } // Insert a node into the MinHeap void insertMinHeap(struct MinHeap* minHeap, struct MinHeapNode* minHeapNode) { ++minHeap->size; int i = minHeap->size - 1; while (i && minHeapNode->freq < minHeap->array[(i - 1) / 2]->freq) { minHeap->array[i] = minHeap->array[(i - 1) / 2]; i = (i - 1) / 2; } minHeap->array[i] = minHeapNode; } // Build a MinHeap struct MinHeap* buildMinHeap(char data[], int freq[], int size) { struct MinHeap* minHeap = createMinHeap(size); for (int i = 0; i < size; ++i) minHeap->array[i] = newNode(data[i], freq[i]); minHeap->size = size; for (int i = (minHeap->size - 2) / 2; i >= 0; --i) minHeapify(minHeap, i); return minHeap; } // Build Huffman Tree struct MinHeapNode* buildHuffmanTree(char data[], int freq[], int size) { struct MinHeapNode *left, *right, *top; struct MinHeap* minHeap = buildMinHeap(data, freq, size); while (minHeap->size != 1) { left = extractMin(minHeap); right = extractMin(minHeap); top = newNode('$', left->freq + right->freq); top->left = left; top->right = right; insertMinHeap(minHeap, top); } return extractMin(minHeap); } // Generate Huffman Codes void generateCodes(struct MinHeapNode* root, char** codes, char* buffer, int top) { if (root->left) { buffer[top] = '0'; generateCodes(root->left, codes, buffer, top + 1); } if (root->right) { buffer[top] = '1'; generateCodes(root->right, codes, buffer, top + 1); } if (!root->left && !root->right) { buffer[top] = '\0'; codes[(unsigned char)root->data] = strdup(buffer); } } // Compress using Huffman encoding void compress_1(const char* input_file_name, const char* output_file_name) { FILE* input = fopen(input_file_name, "rb"); FILE* output = fopen(output_file_name, "wb"); if (!input || !output) { perror("File error"); exit(EXIT_FAILURE); } int freq[256] = {0}; char buffer[BUFSIZE]; size_t bytes_read; while ((bytes_read = fread(buffer, 1, BUFSIZE, input)) > 0) { for (size_t i = 0; i < bytes_read; i++) { freq[(unsigned char)buffer[i]]++; } } char data[256]; int freq_array[256]; int size = 0; for (int i = 0; i < 256; i++) { if (freq[i] > 0) { data[size] = (char)i; freq_array[size] = freq[i]; size++; } } struct MinHeapNode* root = buildHuffmanTree(data, freq_array, size); char* codes[256] = {NULL}; char code_buffer[256]; generateCodes(root, codes, code_buffer, 0); rewind(input); fwrite(&size, sizeof(int), 1, output); for (int i = 0; i < size; i++) { fputc(data[i], output); fwrite(&freq_array[i], sizeof(int), 1, output); } unsigned char bit_buffer = 0; int bit_count = 0; while ((bytes_read = fread(buffer, 1, BUFSIZE, input)) > 0) { for (size_t i = 0; i < bytes_read; i++) { char* code = codes[(unsigned char)buffer[i]]; for (char* p = code; *p; p++) { bit_buffer = (bit_buffer << 1) | (*p - '0'); bit_count++; if (bit_count == 8) { fputc(bit_buffer, output); bit_buffer = 0; bit_count = 0; } } } } if (bit_count > 0) { bit_buffer <<= (8 - bit_count); fputc(bit_buffer, output); } fclose(input); fclose(output); for (int i = 0; i < 256; i++) { free(codes[i]); } } // Decompress using Huffman encoding void decompress_1(const char* input_file_name, const char* output_file_name) { FILE* input = fopen(input_file_name, "rb"); FILE* output = fopen(output_file_name, "wb"); if (!input || !output) { perror("File error"); exit(EXIT_FAILURE); } int size; fread(&size, sizeof(int), 1, input); char data[256]; int freq[256]; for (int i = 0; i < size; i++) { data[i] = fgetc(input); fread(&freq[i], sizeof(int), 1, input); } struct MinHeapNode* root = buildHuffmanTree(data, freq, size); struct MinHeapNode* current = root; int bit_buffer; int bit_count = 0; int byte; while ((byte = fgetc(input)) != EOF) { for (int i = 7; i >= 0; i--) { int bit = (byte >> i) & 1; if (bit == 0) { current = current->left; } else { current = current->right; } if (!current->left && !current->right) { fputc(current->data, output); current = root; } } } fclose(input); fclose(output); } // Compress using RLE void compress_2(const char* input_file_name, const char* output_file_name) { FILE* input = fopen(input_file_name, "rb"); FILE* output = fopen(output_file_name, "wb"); if (!input || !output) { perror("File error"); exit(EXIT_FAILURE); } unsigned char buffer[BUFSIZE]; size_t bytes_read; while ((bytes_read = fread(buffer, 1, BUFSIZE, input)) > 0) { for (size_t i = 0; i < bytes_read; i++) { unsigned char current = buffer[i]; size_t count = 1; while (i + 1 < bytes_read && buffer[i + 1] == current) { count++; i++; } fputc(current, output); fputc(count, output); } } fclose(input); fclose(output); } // Decompress using RLE void decompress_2(const char* input_file_name, const char* output_file_name) { FILE* input = fopen(input_file_name, "rb"); FILE* output = fopen(output_file_name, "wb"); if (!input || !output) { perror("File error"); exit(EXIT_FAILURE); } int current; int count; while ((current = fgetc(input)) != EOF) { count = fgetc(input); for (int i = 0; i < count; i++) { fputc(current, output); } } fclose(input); fclose(output); }